上一篇我们分析了hmap,hamp可以说是Open vSwitch中基石结构,很多Open vSwitch中数据结构都依赖hmap。本篇我们来分析一下ofpbuf,这个结构,我们从名字上就可得知,此数据结构用于存储数据的,比如收发OpenFlow报文。
我们首先来看一下,它数据结构定义。(有些内容我是直接写在代码注释中的)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
/* Buffer for holding arbitrary data. An ofpbuf is automatically reallocated * as necessary if it grows too large for the available memory. * * 'frame' and offset conventions: * * Network frames (aka "packets"): 'frame' MUST be set to the start of the * packet, layer offsets MAY be set as appropriate for the packet. * Additionally, we assume in many places that the 'frame' and 'data' are * the same for packets. * * OpenFlow messages: 'frame' points to the start of the OpenFlow * header, while 'l3_ofs' is the length of the OpenFlow header. * When parsing, the 'data' will move past these, as data is being * pulled from the OpenFlow message. * * Actions: When encoding OVS action lists, the 'frame' is used * as a pointer to the beginning of the current action (see ofpact_put()). * * rconn: Reuses 'frame' as a private pointer while queuing. */ |
struct ofpbuf {//这个有一个预编译,为了简单起见,我们认为DPDK_NETDEV宏无效(关于DPDK网上有很多资料)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
#ifdef DPDK_NETDEV struct rte_mbuf mbuf; /* DPDK mbuf */ #else void *base_; /* First byte of allocated space. 指向内存申请的起始位置。释放内存时候此变量传给free */ void *data_; /* First byte actually in use. 指向当前可用内存起始位置。最开始base_和data_ 是一样的 */ uint32_t size_; /* Number of bytes in use. 表示内存已经使用的字节数 当size_ = allocated时候表示内存用完。 */ #endif uint32_t allocated; /* Number of bytes allocated. 表示从系统中申请的内存块大小*/ void *frame; /* Packet frame start, or NULL. 这个字段可参考上面注释*/ uint16_t l2_5_ofs; /* MPLS label stack offset from 'frame', or * UINT16_MAX 2.5层 偏移量 */ uint16_t l3_ofs; /* Network-level header offset from 'frame', or UINT16_MAX. 3层网络层 偏移量*/ uint16_t l4_ofs; /* Transport-level header offset from 'frame', or UINT16_MAX. 4层传输层 偏移量*/ enum ofpbuf_source source; /* Source of memory allocated as 'base'. 表示该内存来自堆、栈,主要用于内存释放。取值为ofpbuf_source枚举*/ struct list list_node; /* Private list element for use by owner. 链表节点。 用于将多个ofpbuf关联在一起 */ }; //枚举类型 enum OVS_PACKED_ENUM ofpbuf_source { OFPBUF_MALLOC, /* Obtained via malloc(). */ OFPBUF_STACK, /* Un-movable stack space or static buffer. */ OFPBUF_STUB, /* Starts on stack, may expand into heap. */ OFPBUF_DPDK, /* buffer data is from DPDK allocated memory. ref to build_ofpbuf() in netdev-dpdk. */ }; |
下面是可能的存储结构图:
上图表示,分配16个字节空间,灰色部分为预留空间(4字节),蓝色为占用空间(5个字节),白色为剩余可用空间(7个字节)。
数据结构相对简单,我们看一下主要函数。由代码中的注释可知,数据结构ofpbuf支持内存空间自动扩充,可以理解为简单内存池。为了深入就理解ofpbuf,我们选择一个从堆中申请内存的例子(Test-sflow.c)进行分析(因为其他内存类型是不需要释放空间的),如下所示:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
static void test_sflow_main(int argc, char *argv[]) { .... struct ofpbuf buf; .... ofpbuf_init(&buf, MAX_RECV); for (;;) { int retval; unixctl_server_run(server); ofpbuf_clear(&buf); do { retval = read(sock, ofpbuf_data(&buf), buf.allocated); } while (retval < 0 && errno == EINTR); if (retval > 0) { ofpbuf_put_uninit(&buf, retval); print_sflow(&buf); fflush(stdout); } if (exiting) { break; } poll_fd_wait(sock, POLLIN); unixctl_server_wait(server); poll_block(); }//for exit } |
1、初始化opfbuf结构
我们可以先申请一个局部变量,然后将该变量地址和要申请的大小传给函数ofpbuf_init(OpenvSwitch代码好处是每个函数都是很小,耐心钻研一定可以看懂)。我们来看一下函数调用关系:
ofpbuf初始化流程,经过的函数依次是,ofpbuf_init,ofpbuf_use,ofpbuf_use__,ofpbuf_init__。(函数命名中最后是两个下划线代表是静态函数)现在我们来看一下各个函数实现。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
static void ofpbuf_init__(struct ofpbuf *b, size_t allocated, enum ofpbuf_source source) { b->allocated = allocated;//设置申请的内存大小 即内存块大小 b->source = source;//内存的类型,当前实例是malloc类型 b->frame = NULL; b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX; list_poison(&b->list_node); } static void ofpbuf_use__(struct ofpbuf *b, void *base, size_t allocated, enum ofpbuf_source source) { ofpbuf_set_base(b, base);//设置base ofpbuf_set_data(b, base);//设置data 此时base和data保存的都是内存起始位置,只不过是data会变化,base不变 ofpbuf_set_size(b, 0);//设置已经使用的内存大小 起初为0 ofpbuf_init__(b, allocated, source); } /* Initializes 'b' as an empty ofpbuf that contains the 'allocated' bytes of * memory starting at 'base'. 'base' should be the first byte of a region * obtained from malloc(). It will be freed (with free()) if 'b' is resized or * freed. */ void ofpbuf_use(struct ofpbuf *b, void *base, size_t allocated) { ofpbuf_use__(b, base, allocated, OFPBUF_MALLOC);//内存类型为malloc类型 } /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size' * bytes. */ void ofpbuf_init(struct ofpbuf *b, size_t size) { ofpbuf_use(b, size ? xmalloc(size) : NULL, size); } |
上面是初始化操作流程,逻辑和内容十分简单。我们现在来看一下put操作,即增加内存空间。在介绍put操作之前,我们先来看四个工具函数,也是非常小的函数:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
/* Returns the byte following the last byte of data in use in 'b'. * 返回第一个可存储数据地址 针对上图返回值是 (0x832200C + 5) */ static inline void *ofpbuf_tail(const struct ofpbuf *b) { return (char *) ofpbuf_data(b) + ofpbuf_size(b); /* data_ 指向数据报文起始位置,即上面蓝色开始位置 */ } /* Returns the byte following the last byte allocated for use (but not * necessarily in use) by 'b'. * <span style="font-family: Arial, Helvetica, sans-serif;">返回内存区最后一个字节地址 针对上图返回值为 (0x8322008 + 16)</span> */ static inline void *ofpbuf_end(const struct ofpbuf *b) { return (char *) ofpbuf_base(b) + b->allocated; /* base_ 指向内存区起始位置 */ } /* Returns the number of bytes of headroom in 'b', that is, the number of bytes * of unused space in ofpbuf 'b' before the data that is in use. (Most * commonly, the data in a ofpbuf is at its beginning, and thus the ofpbuf's * headroom is 0.) * 头部剩余空间大小。 直接用data_ - base_ 就可以得到。 */ static inline size_t ofpbuf_headroom(const struct ofpbuf *b) { return (char*)ofpbuf_data(b) - (char*)ofpbuf_base(b); } /* Returns the number of bytes that may be appended to the tail end of ofpbuf * 'b' before the ofpbuf must be reallocated. * 尾部剩余空间大小。 */ static inline size_t ofpbuf_tailroom(const struct ofpbuf *b) { return (char*)ofpbuf_end(b) - (char*)ofpbuf_tail(b); } |
下面就是扩大内存的具体函数:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
/* Appends 'size' bytes of data to the tail end of 'b', reallocating and * copying its data if necessary. Returns a pointer to the first byte of the * new data, which is left uninitialized. * 扩大size大小内存空间,但是不初始化 */ void * ofpbuf_put_uninit(struct ofpbuf *b, size_t size) { void *p; ofpbuf_prealloc_tailroom(b, size); /* 在尾部,扩大内存 */ p = ofpbuf_tail(b); /* 扩展内存后,保存第一个可用内存地址 */ ofpbuf_set_size(b, ofpbuf_size(b) + size); /* 设置已用内存空间大小 */ return p; } /* Appends 'size' zeroed bytes to the tail end of 'b'. Data in 'b' is * reallocated and copied if necessary. Returns a pointer to the first byte of * the data's location in the ofpbuf. * 扩大size大小内存空间,初始化为0 */ void * ofpbuf_put_zeros(struct ofpbuf *b, size_t size) { void *dst = ofpbuf_put_uninit(b, size); memset(dst, 0, size); return dst; } /* Appends the 'size' bytes of data in 'p' to the tail end of 'b'. Data in 'b' * is reallocated and copied if necessary. Returns a pointer to the first * byte of the data's location in the ofpbuf. * 扩大size大小内存空间,用p进行初始化 */ void * ofpbuf_put(struct ofpbuf *b, const void *p, size_t size) { void *dst = ofpbuf_put_uninit(b, size); memcpy(dst, p, size); return dst; } |
这三个函数功能都是类似的,在原有ofpbuf结构b中增大size大小的内存空间。 函数ofpbuf_put_uninit会被其他两个函数调用。我来分析一下这个函数。
ofpbuf_prealloc_tailroom 在尾部扩展内存,这个函数逻辑也是很简单
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
/* Returns the number of bytes that may be appended to the tail end of ofpbuf * 'b' before the ofpbuf must be reallocated. * 返回可用内存空间,即上图中白色空间大小 */ static inline size_t ofpbuf_tailroom(const struct ofpbuf *b) { return (char*)ofpbuf_end(b) - (char*)ofpbuf_tail(b); } /* Reallocates 'b' so that it has exactly 'new_headroom' and 'new_tailroom' * bytes of headroom and tailroom, respectively. * 内存扩充函数 我们只关注malloc的内存 即红色部分 */ static void ofpbuf_resize__(struct ofpbuf *b, size_t new_headroom, size_t new_tailroom) { void *new_base, *new_data; size_t new_allocated; new_allocated = new_headroom + ofpbuf_size(b) + new_tailroom; switch (b->source) { case OFPBUF_DPDK: OVS_NOT_REACHED(); case OFPBUF_MALLOC: if (new_headroom == ofpbuf_headroom(b)) {//调用realloc申请内存 new_base = xrealloc(ofpbuf_base(b), new_allocated); } else { new_base = xmalloc(new_allocated);//调用malloc申请内存并且修改ofpbuf中相关数据 ofpbuf_copy__(b, new_base, new_headroom, new_tailroom); /* 将数据复制到新的内存空间中 需要注意头部剩余空间和使用空间。*/ free(ofpbuf_base(b)); } break;</span> case OFPBUF_STACK: OVS_NOT_REACHED(); case OFPBUF_STUB: b->source = OFPBUF_MALLOC; new_base = xmalloc(new_allocated); ofpbuf_copy__(b, new_base, new_headroom, new_tailroom); break; default: OVS_NOT_REACHED(); } // 重新设置allocated和base_ 指针 b->allocated = new_allocated; ofpbuf_set_base(b, new_base); // 重新设置data_ 指针 new_data = (char *) new_base + new_headroom; if (ofpbuf_data(b) != new_data) { if (b->frame) { uintptr_t data_delta = (char *) new_data - (char *) ofpbuf_data(b); b->frame = (char *) b->frame + data_delta; } ofpbuf_set_data(b, new_data); } } /* Ensures that 'b' has room for at least 'size' bytes at its tail end, * reallocating and copying its data if necessary. Its headroom, if any, is * preserved. * 尾部扩充内存 首先需要判断剩余内存是否满足需求,如果size大于剩余可用空间则需要重新申请内存 * 为了避免内存碎片和快速申请,每次至少申请64字节 */ void ofpbuf_prealloc_tailroom(struct ofpbuf *b, size_t size) { if (size > ofpbuf_tailroom(b)) { ofpbuf_resize__(b, ofpbuf_headroom(b), MAX(size, 64)); } } |
与pull对应函数是push,此类函数主要是在头部扩充内存,这里我们不在进行讨论。函数ofpbuf_pull主要增大灰色空间大小,即将蓝色区域向后移动size大小。
1 2 3 4 5 6 7 8 9 10 |
/* Removes 'size' bytes from the head end of 'b', which must contain at least * 'size' bytes of data. Returns the first byte of data removed. */ static inline void *ofpbuf_pull(struct ofpbuf *b, size_t size) { void *data = ofpbuf_data(b); ovs_assert(ofpbuf_size(b) >= size); ofpbuf_set_data(b, (char*)ofpbuf_data(b) + size); ofpbuf_set_size(b, ofpbuf_size(b) - size); return data; } |
最后我们来看一下释放函数,这个函数也是非常简单的。
1 2 3 4 5 6 7 8 9 10 11 |
/* Frees memory that 'b' points to. */ void ofpbuf_uninit(struct ofpbuf *b) { if (b) { if (b->source == OFPBUF_MALLOC) { free(ofpbuf_base(b)); } ovs_assert(b->source != OFPBUF_DPDK); } } |
上面就是本博客主要介绍的内存,ofpbuf相对简单,下面我们会分析Open vSwitch会话相关的数据结构struct connmgr,struct ofconn,struct ofproto等,这部分数据结构属于Open vSwitch管理层。对于学习Open vSwitch是非常重要。
作者简介:
徐小冰:毕业于河北大学,主要从事嵌入式软件开发,虚拟化,SDN。目前基于ODL和Open vSwitch进行二次开发,希望与广大网友一起探讨学习。作者系OpenDaylihgt群(194240432)资深活跃用户,@IT难人。