grub配置

在/etc/grub2.cfg里改参数
需要在/etc/default/grub里面加上 GRUB_CMDLINE_LINUX_DEFAULT=”default_hugepagesz=1GB hugepagesz=1G hugepages=4 iommu=pt intel_iommu=on”,如果/etc/default/grub不存在,则创建一个,加上这些kernel参数。

然后再用update-grub,这些参数在/boot/grub2/grub.cfg里面不是在最后,而是在中间给vmlinuz的参数,比如像如下的例子: linux /vmlinuz-4.4.0-66-generic root=/dev/mapper/m4–u1604–vg-root ro iommu=pt intel_iommu=on default_hugepagesz=1GB hugepagesz=1G hugepages=8

实验:

一、

1.没改/boot/grub2/grub.cfg里面加上iommu=pt intel_iommu=on default_hugepagesz=1GB hugepagesz=1G hugepages=8 cat /proc/cmdline显示如下:
image

2.hugepage的显示如下

image

3.hugepage的该路径里面只有2048K这个目录 image

二、

1.在/boot/grub2/grub.cfg里面相关条目改成如下的设置,然后重启机器 image

2.cat /proc/cmdline显示

image

3.hugepage的显示如下,只有4个1G了

image

4.hugepage的该路径里面只有1048576K这个目录

image

1G大页和2M大页根据物理环境和负载不同会有不同的影响,2M大概会有10-20%左右的性能降低。 另外用了1G大页,就不用设置sysctl 的vm.nr_hugepages参数了。

100G网卡打流试验

1.使用2张100G的网卡,在物理机os上进行iperf打流,实验结果差不多为46G左右,达到了5成左右 2.然后在100G的网卡连接的2台机器上分别安装mininet,通过创建分布在两个物理机上的虚拟机进行iperf打流,流量经过openvswitch,实验结果差不多为43G左右。

背景

OpenStack中NOVA服务在comupute2和compute3上面都是UP的,但是虚拟机起的都在compute2上面 我把compute3中的服务都卸载了,检查了nova.conf和neutron文件夹中的配置文件发现都没问题,我把libvirt文件也删除了,重新安装了KVM

image

image

发现问题仍然存在

调试

在nova的log中发现了如下log
vi /var/log/nova/nova-conductor.log

1
2
3
4
2017-03-16 06:11:55.355 1511 ERROR nova.scheduler.utils [req-9779d696-34bd-442a-9b71-5e3a0ec8893f 5be92d6fb50e4c188f4991b1e7988c1b 1cac0a39dbb4473e85c836595a0e7673 - - -] [instance: 8c818ee0-36d8-450b-a61f-3744305d4244] 
Error from last host: compute3 (node compute3): [u'Traceback (most recent call last):\n', u'  File "/usr/lib/python2.7/site-packages/nova/compute/manager.py", line 1905, in _do_build_and_run_instance\n    filter_properties)\n', u'  
File "/usr/lib/python2.7/site-packages/nova/compute/manager.py", line 2058, in _build_and_run_instance\n    
instance_uuid=instance.uuid, reason=six.text_type(e))\n', u"RescheduledException: Build of instance 8c818ee0-36d8-450b-a61f-3744305d4244 was re-scheduled: Secret not found: no secret with matching uuid '5b059071-1aff-4b72-bc5f-0122a7d6c1df'\n"]

原因

我把libvirt文件删除了之后,里面有秘钥文件,也被删除了,我从compute2中把文件拷贝到compute3的libvirt对应目录中,重启服务就OK了

image

image

背景

每一个SW单独有PROC线程,RECV线程,FREE SW的时候也不管其他的线程有没有拿着数据,这样当消息返回了error之后直接就free switch了,但是其他线程还拿着switch中的数据,这样就形成了野指针。

重写思路

在消息处理中,我们采用的思路是消息的串行化,总共有三个线程,分别是PROC,RECV,和SEND线程,与sw的数量进行解耦,这样的好处是,控制器可以支持更多的交换机,如果线程与sw的数量绑定,那么sw的增加的数量就很有限了。

把消息在处理的时候并存一个状态,那么每个消息在wait-close状态的时候,在每个线程中走一遍,大家都不拿着了之后,再进行free,在epoll线程这边形成了一下队列,防止了野指针。

photo

1
2
3
	pthread_create(&swPktRecv_thread, NULL, msg_recv_thread, NULL);
	pthread_create(&swPktProc_thread, NULL, msg_proc_thread, NULL);
	pthread_create(&swPktSend_thread, NULL, msg_send_thread, NULL);
1
2
3
4
5
6
7
8
9
10
//recv线程
void *msg_recv_thread(void *index)
{
    gn_switch_t *sw = NULL;

	prctl(PR_SET_NAME, (unsigned long) "Msg_Recv_Thread" ) ;  

	msg_recv(sw);
    return NULL;
}

//交换机消息接收并存入缓存空间
void msg_recv(gn_switch_t *sw)
{
    INT4 iRet =0;
    UINT4 head = 0;
    UINT4 tail = 0;
    INT4 len = 0;
	INT4  iSockFd = 0;
	UINT4 uiMsgType = 0;
	gst_msgsock_t newsockmsgRecv = {0};
	gst_msgsock_t newsockmsgProc = {0};
	p_Queue_node pNode  = NULL;
 
	int nErr= 0;
	gn_switch_t * switch_gw = NULL;

	char writebuf[20] = {0};
	
	while(1)
	{
		iRet = Read_Event(EVENT_RECV);
		if(GN_OK != iRet) 
		{
			LOG_PROC("ERROR", "Error: %s! %d",FN, LN);
			return ;
		}
		pop_MsgSock_from_queue( &g_tRecv_queue, &newsockmsgRecv);
		iSockFd = newsockmsgRecv.iSockFd;
		uiMsgType = newsockmsgRecv.uiMsgType;
		
		//writebuf[0]= (char)iSockFd;
		//iRet = write(g_iConnPipeFd[1],writebuf, 1);
		//LOG_PROC("INFO", "sock fd: %d ! TYPE=%d ret=%d", iSockFd,newsockmsgRecv.uiMsgType,ret);
		//如果消息的类型不是connected,那么把消息pop出来,然后push进proc线程
		if(WAITCLOSE == uiMsgType)
		{
			push_MsgAndEvent(EVENT_PROC, pNode, &g_tProc_queue, newsockmsgProc, WAITCLOSE, iSockFd, switch_gw->index );
		}
		//如果消息是connected,那么读取buffer中的数据
		if(CONNECTED == uiMsgType)
		{
			// find sw index by sock fd
			switch_gw = find_sw_by_sockfd(iSockFd);
			if (NULL == switch_gw) 
			{
				LOG_PROC("ERROR", "sock fd: %d switch is NULL!",iSockFd);
				continue;
			}
			while(switch_gw->state)
			{
		        head = switch_gw->recv_buffer.head;
		        tail = switch_gw->recv_buffer.tail;
						
		        if( (tail+1)%g_server.buff_num != head )
	            {//by:yhy 判断buffer是否满
	                len = recv(switch_gw->sock_fd, switch_gw->recv_buffer.buff_arr[tail].buff, g_server.buff_len, 0);
	                if(len <= 0)
	                {//by:yhy recv返回0即连接关闭,小于0即出错
	                	nErr= errno;
						//by:yhy added 20170214
						//LOG_PROC("DEBUG", "%s : len=%d nErr=%d",FN,len, nErr);

						if(( 0 == len) || (EAGAIN == nErr)) // 缓冲区数据读完
						{
							break;
						}
							
						if(EINTR == nErr)
						{
							continue;
						}
						//push_RecvEvent_queue(WAITCLOSE,switch_gw->index,iSockFd,1);
	                    free_switch(switch_gw);
	                    break;
	                }
	                else
	                {//by:yhy recv返回大于0即接收长度
	                    switch_gw->recv_buffer.buff_arr[tail].len = len;
	                    switch_gw->recv_buffer.tail = (tail+1)%g_server.buff_num;
						//LOG_PROC("DEBUG", "%s : len=%d ",FN,len);
												
						push_MsgAndEvent(EVENT_PROC, pNode, &g_tProc_queue, newsockmsgProc,CONNECTED, iSockFd, switch_gw->index );
	                }
	            }
				else
				{
					break;
				}

		    }	

		}
	}
}

//交换机信息处理线程
void *msg_proc_thread(void *index)
{
    gn_switch_t *sw = NULL;
	INT4 iRet = 0;
    UINT4 head = 0;
    UINT4 tail = 0;
	UINT4 uiMsgType = 0;
	UINT4 uiswIndex = 0;
	INT4  iSockFd = 0;
	p_Queue_node pNode  = NULL;
	
	gst_msgsock_t newsockmsgProc= {0};	
	gst_msgsock_t newsockmsgSend= {0};
	
	prctl(PR_SET_NAME, (unsigned long) "Msg_Proc_Thread" ) ;  
	
    while(1)
    {
		iRet = Read_Event(EVENT_PROC);
		if(GN_OK != iRet) 
		{
			LOG_PROC("ERROR", "Error: %s! %d",FN, LN);
			return NULL;
		}
		pop_MsgSock_from_queue( &g_tProc_queue, &newsockmsgProc);
		uiMsgType =  newsockmsgProc.uiMsgType;
		uiswIndex = newsockmsgProc.uiSw_Index ;
		iSockFd = newsockmsgProc.iSockFd;
		sw = &g_server.switches[uiswIndex];
		if(CONNECTED ==  uiMsgType)
		{
	        head = sw->recv_buffer.head;
	        tail = sw->recv_buffer.tail;
			//判断缓存空间是否为空
	        if(head != tail )
	        {
	            msg_process(sw);
	            sw->recv_buffer.head =(head + 1) % g_server.buff_num;
				
	        }

		}
		if(WAITCLOSE ==  uiMsgType)
		{
			push_MsgAndEvent(EVENT_SEND, pNode, &g_tSend_queue, newsockmsgSend,WAITCLOSE, iSockFd, sw->index );
		}
		if(NEWACCEPT == uiMsgType)
		{
			//new_switch
		}
		if(CLOSE_ACT == uiMsgType)
		{
			free_switch(sw);
		}

    }
    return NULL;
}

void *msg_send_thread(void *index)
{
	UINT4 uiMsgType = 0;
	UINT4 uiswIndex = 0;
    INT4  iSockFd = 0;
	INT4  iErrno =0;
	INT4  iWriteLen = 0;
	INT4  iRet = 0;
	gn_switch_t *sw = NULL;
	gst_msgsock_t newsockmsgSend = {0};
	gst_msgsock_t newsockmsgProc = {0};
	p_Queue_node pNode  = NULL;
	
	prctl(PR_SET_NAME, (unsigned long) "Msg_Send_Thread" ) ; 
	
	while(1)
	{
		iRet = Read_Event(EVENT_SEND);
		if(GN_OK != iRet) 
		{
			LOG_PROC("ERROR", "Error: %s! %d",FN,LN);
			return NULL;
		}
		pop_MsgSock_from_queue( &g_tSend_queue, &newsockmsgSend);
		uiMsgType =  newsockmsgSend.uiMsgType;
		uiswIndex = newsockmsgSend.uiSw_Index ;
		if(CONNECTED ==  uiMsgType)
		{
			//根据发送长度判断是否有数据发送
			sw = &g_server.switches[uiswIndex];
		    if(sw->send_len)
		    {
		    	iWriteLen = 0;
		        pthread_mutex_lock(&sw->send_buffer_mutex);
				while(iWriteLen < sw->send_len)				//modify by ycy
				{
		        	iRet = write(sw->sock_fd, sw->send_buffer+iWriteLen, sw->send_len-iWriteLen);
			
					if(iRet < 0)
					{
						iErrno = errno;
						LOG_PROC("DEBUG", "%s : len=%d nErr=%d",FN,iRet, iErrno);
						if((EINTR== iErrno)||(EAGAIN== iErrno))
						{
							//usleep(1);
							continue;
						}
						break;
					}
					else
					{
						iWriteLen += iRet;
					}
				}
				//一次性全部清空
		        memset(sw->send_buffer, 0, g_sendbuf_len);
		        sw->send_len = 0;
		        pthread_mutex_unlock(&sw->send_buffer_mutex);
		    }
		}
		if(WAITCLOSE ==  uiMsgType)
		{	
			iSockFd =  sw->sock_fd;
			push_MsgAndEvent(EVENT_PROC, pNode, &g_tProc_queue, newsockmsgProc, CLOSE_ACT, iSockFd, sw->index );
		}
	
	}
	return NULL;
}

背景

memory_pool原来的架构是

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
//内存池结构体
typedef struct _Queue_List      
{
    UINT4       total_len;   
    //内存池总共的单元个数
    UINT4       usable_len;  
    //内存池可用的单元个数
    void        *data;	      
    //内存池数据区
    void        **arr;		  
    //内存池存储数据区各区块头指针的一个数组(该内存池是向上增长的)
    UINT4       head;		  
    //内存池数据区头指针索引(该内存池是向上增长的)即当前未使用的区块头指针的索引
    UINT4       tail;
    void        *mutex;
    char        *states;     
    //防止重复释放,1:没有占用  0:已经占用
    UINT2       block;
}Queue_List;

每个交换机都有一个内存池 在配置文件里面写了

1
2
3
4
#switch receive buffer number
buff_num=2000
#every switch receive buffer length
buff_len=20480

有时候一个字节就浪费20K大小,就比较的浪费内存,以下是代码的实现


//在_q_list中插入data
//与其说是将data插入q_list不如说是将data所占空间还给内存池q_list
static int Queue_In(void *_q_list, void *data)
{
    Queue_List *q_list;
    UINT4   tail;
    int     pos;
    q_list = (Queue_List *)(_q_list);

    if(data == NULL)
    {
        return -1;
    }

    //判断数据是否是块的整数倍
    if( (data - q_list->data) %  q_list->block )
    {
        printf("free add error,%p\n",data);
        return -1;
    }
	//节点的位置及合法性校验
    pos = (data - q_list->data) /  q_list->block ;      
    if(pos <0 || pos >= q_list->total_len )
    {
        printf("free add error over %d\n",pos);
        return -1;
    }
	
	//多线程安全访问
    pthread_mutex_lock(q_list->mutex );
    {
		//说明重复释放了内存
		if( *(q_list->states + pos ) == 1 )                 
		{
			pthread_mutex_unlock(q_list->mutex );
			LOG_PROC("ERROR", "%s : already free",FN);
			Debug_PrintTrace();
			return -1;
		}
		
        tail = q_list->tail;
        q_list->arr[tail] = data;
        *(q_list->states + pos ) = 1;
        q_list->tail = ( tail + 1 ) % q_list->total_len;
        q_list->usable_len++;
    }
    pthread_mutex_unlock(q_list->mutex );

    return 1;
}

//从queue中取出一元素(更像是取出一元素的空间并初始化0)
static void *Queue_Out(void *_q_list)     
{
    Queue_List *q_list;
    int head;
    int pos;
    void *unit=NULL;
    q_list = (Queue_List *)(_q_list);

    pthread_mutex_lock(q_list->mutex );
    {
        if( q_list->usable_len >0 )
        {//by:yhy 可用数量大于0
            head = q_list->head;
            unit = q_list->arr[head];
            pos = (unit - q_list->data)  /  q_list->block;    //确定节点地址
            *(q_list->states + pos) = 0;                      //没有占用
            q_list->usable_len--;
            q_list->head = (head+1)%q_list->total_len;
			//by:yhy  清零初始化
            memset(unit, 0, q_list->block);
        }
    }
    pthread_mutex_unlock(q_list->mutex );
    return unit;
}
//取得block*len大小的队列
static void *Queue_Init(UINT4 block ,UINT4 len)
{
	UINT8 i;
    char    *data;
    Queue_List *q_list;

    q_list = (Queue_List *)malloc(sizeof(Queue_List));
    if(q_list == NULL ) return q_list;
    memset(q_list, 0, sizeof(Queue_List));

    //锁
    q_list->mutex = (pthread_mutex_t *)malloc( sizeof(pthread_mutex_t) );
    if(q_list->mutex == NULL)   return NULL;
    pthread_mutex_init(q_list->mutex , NULL);

    q_list->data = malloc(    block * len );  //首地址
    if( q_list->data == NULL )  return NULL;
    memset(q_list->data, 0, block * len);

    q_list->arr = (void **)malloc( len * sizeof(void *) );
    if( q_list->arr == NULL )   return NULL;
    memset(q_list->arr, 0, len * sizeof(void *));

    q_list->states = (char *)malloc( len * sizeof(char) );
    if( q_list->states == NULL )    return NULL;
    memset(q_list->states, 0, len * sizeof(char));


    q_list->head = 0;
    q_list->tail = 0;               //都指向0位置处
    q_list->usable_len = 0;         //能用的个数
    q_list->block = block;
    q_list->total_len = len;


    for(i=0; i < len ; i++)
    {
        data = q_list->data + i *block ;
        Queue_In(q_list, data);
    }
    return (void *)q_list;

}

//取得lock*len大小的队列(内存池)
void *mem_create(UINT4 block , UINT4 len)   //块大小,块数量
{
    void *pool;
    pool = Queue_Init(block,len);
    return  (void *)pool;
}
//queue获取队列首部一元素地址(该元素已赋0,即初始化)
//        为何清零参见mem_free的注释
//!!!注意:mem_get可能会存在获取不到内存空间的时候.即有可能返回NULL指针
void *mem_get(void *pool)
{
    void *data;
    data = Queue_Out(pool);
    return data;
}
//此处的free操作其实就是把已经利用结束的内存区域还给内存池.供以后需要再利用时通过mem_get获得  这也是为什么mem_get时需要清零的原因
int mem_free(void *pool ,void *data)    //禁止同一内存释放两次
{
    return Queue_In(pool , data);
}
//内存池销毁
void mem_destroy(void *pool)
{
    Queue_List *q_list = (Queue_List *)pool;
    free(q_list->arr);
    free(q_list->data);
    free(q_list->states);
    if(q_list->mutex)
    {
        pthread_mutex_destroy(q_list->mutex);
        free(q_list->mutex);
    }

    free(q_list);
}
//返回内存池可用节点个数
UINT4 mem_num(void *_pool)              //可用节点的个数
{
    Queue_List *pool;
    pool = (Queue_List *)(_pool);
    return (pool->total_len - pool->usable_len);
}

重写

动态分配

开辟一个单独的buffer,然后分初始化、write、read这些函数,接着对不同的情况进行分类讨论,先预分配一个固定大小的buffer,当来了一个大的包的时候就对这个buffer进行扩展

WRITE

再对这个缓冲区的write的场景进行分类讨论,对不同的初始情况进行讨论,如buffer已经是中间写过了,或者是两边写过了的情况。对进来的包也需要进行分类讨论,对包长超过末端的时候,需要把包分段,把写不下的包写在缓冲区的头部,在整个过程中需要保持header指针的位置是正确的

photo

photo

READ

read函数也需要进行分类讨论,对不同的初始情况进行讨论,如buffer已经是中间写过了,或者是两边写过了的情况。对读出去的包也需要进行分类讨论,对读出长度超过末端的时候,需要把分次读取,把末端的数据读出去之后,还需要把头部的数据也读出去,在整个过程中需要保持header指针的位置是正确的。

PEEK

peek的功能就是看一下,因为读的时候不知道包的长度,由此需要从head中读取length,这个时候不对buffer的head进行任何操作,等读到length之后,再整个读取,然后改动head的位置。还有一个原因是因为我们用得是队列,拿出来之后不能原位放回去,之后放在最后(FIFO),如果像链表那样的可以原味放回去的,那么写出来的逻辑也可以不一样。

加锁

在值变化的时候需要加锁,因为可能同时读和同时写

1
2
3
4
5
6
7
8
typedef struct loop_buffer
{
	char  *buffer;
	UINT4 total_len;
	UINT4 cur_len;
	char  *head;
	pthread_mutex_t buffer_mutex;
}loop_buffer_t;

/***********************************************************************/
//Circle Queue Related start
loop_buffer_t *init_loop_buffer(INT4 len)
{	
	loop_buffer_t *p_loop_buffer = NULL;
	p_loop_buffer = (loop_buffer_t *)gn_malloc(sizeof(loop_buffer_t));
	if(NULL== p_loop_buffer)
	{
		LOG_PROC("ERROR", "init_loop_buffer -- p_loop_buffer gn_malloc  Final return GN_ERR");
		return NULL;
	}

	p_loop_buffer->buffer = (char *)gn_malloc(len * sizeof(char));
	if(NULL== p_loop_buffer->buffer)
	{
		LOG_PROC("ERROR", "init_loop_buffer -- p_loop_buffer.buffer gn_malloc  Finall return GN_ERR");
		return NULL;
	}
	p_loop_buffer->head = p_loop_buffer->buffer;
	p_loop_buffer->total_len = len;
	pthread_mutex_init(&(p_loop_buffer->buffer_mutex) , NULL);
	
	return p_loop_buffer;
}

void reset_loop_buffer(loop_buffer_t *p_loop_buffer)
{
	pthread_mutex_lock(&(p_loop_buffer->buffer_mutex));
	p_loop_buffer->cur_len = 0;
	p_loop_buffer->head = p_loop_buffer->buffer;
	pthread_mutex_unlock(&(p_loop_buffer->buffer_mutex));
}

BOOL buffer_write(loop_buffer_t *p_loop_buffer,char* p_recv,INT4 len)
{

	UINT4 n_pos = 0;
	UINT4 first_len = 0;
	UINT4 second_len = 0;
	
	pthread_mutex_lock(&(p_loop_buffer->buffer_mutex));
	if(p_loop_buffer->cur_len + len > p_loop_buffer->total_len)
	{
		UINT4 need_space = 0;
	    char *p_temp = NULL;
		p_temp = p_loop_buffer->buffer;
        need_space = p_loop_buffer->cur_len + len - p_loop_buffer->total_len;
		need_space = need_space > 1024? need_space : 1024;

	    p_loop_buffer->buffer = (char *)gn_malloc(sizeof(p_loop_buffer->total_len + need_space));

		if(NULL == p_loop_buffer->buffer)
		{
			LOG_PROC("ERROR", "%s -- p_loop_buffer.buffer gn_malloc  Finall return GN_ERR",FN);
			pthread_mutex_unlock(&(p_loop_buffer->buffer_mutex));
			return FALSE;
		}

		if(p_loop_buffer->head + p_loop_buffer->cur_len >= p_temp + p_loop_buffer->total_len)
		{
		    second_len = p_loop_buffer->head + p_loop_buffer->cur_len - (p_temp + p_loop_buffer->total_len);
			memcpy(p_loop_buffer->buffer,p_loop_buffer->head,(p_loop_buffer->cur_len - second_len));
		    memcpy(p_loop_buffer->buffer + p_loop_buffer->cur_len - second_len,p_temp,second_len);
	    }
		else
		{
		    memcpy(p_loop_buffer->buffer,p_loop_buffer->head,p_loop_buffer->cur_len);	
		}
		p_loop_buffer->total_len = p_loop_buffer->total_len + need_space;
		free(p_temp);
	}
	if(p_loop_buffer->head + p_loop_buffer->cur_len + len <= p_loop_buffer->buffer + p_loop_buffer->total_len)
    {
        memcpy(p_loop_buffer->head + p_loop_buffer->cur_len,p_recv,len);
    }
    else
	{
		if(p_loop_buffer->head + p_loop_buffer->cur_len >= p_loop_buffer->buffer + p_loop_buffer->total_len)
		{
			n_pos = (p_loop_buffer->head + p_loop_buffer->cur_len) - (p_loop_buffer->buffer + p_loop_buffer->total_len);
			memcpy(p_loop_buffer->buffer + n_pos,p_recv,len);
		}
		else
		{
			first_len = p_loop_buffer->buffer + p_loop_buffer->total_len - (p_loop_buffer->head + p_loop_buffer->cur_len);
			if(first_len >= len)
            {
                memcpy(p_loop_buffer->head + p_loop_buffer->cur_len,p_recv,len);
            }
            else
            {
                memcpy(p_loop_buffer->head + p_loop_buffer->cur_len,p_recv,first_len);
                memcpy(p_loop_buffer->buffer,p_recv + first_len,len - first_len);
            }
		}
	}
	p_loop_buffer->cur_len += len;
	
	pthread_mutex_unlock(&(p_loop_buffer->buffer_mutex));
	
	return TRUE;
}

BOOL buffer_read(loop_buffer_t *p_loop_buffer,char* p_dest,INT4 len,BOOL peek)
{
	UINT4 first_len = 0;
	UINT4 second_len = 0;
	if(NULL == p_loop_buffer || NULL == p_dest)
	{
		LOG_PROC("ERROR", "%s -- NULL == p_loop_buffer",FN);
		return FALSE;
	}
	
	
	if(p_loop_buffer->cur_len < len)
	{
		LOG_PROC("ERROR", "%s -- p_loop_buffer->cur_len < len READ memory out of range",FN);
		return FALSE;
	}
	
	pthread_mutex_lock(&(p_loop_buffer->buffer_mutex));	
	if(p_loop_buffer->head + p_loop_buffer->cur_len > p_loop_buffer->buffer + p_loop_buffer->total_len)
	{
		second_len = p_loop_buffer->head + p_loop_buffer->cur_len - (p_loop_buffer->buffer + p_loop_buffer->total_len);
	    first_len = p_loop_buffer->cur_len - first_len;
		
		if(len >= first_len)
		{
			memcpy(p_dest,p_loop_buffer->head,first_len);
			memcpy(p_dest + first_len,p_loop_buffer->buffer,len - first_len);
			if(FALSE == peek)
			{
				p_loop_buffer->head = p_loop_buffer->buffer ;
				p_loop_buffer->cur_len = p_loop_buffer->cur_len - len;
			}	
		}
		else
		{
			memcpy(p_dest,p_loop_buffer->head,len);
			if(FALSE == peek)
			{
				p_loop_buffer->cur_len = p_loop_buffer->cur_len - len;
				p_loop_buffer->head = p_loop_buffer->head + p_loop_buffer->cur_len ;
			}	
		}
	}
	else
	{
		memcpy(p_dest,p_loop_buffer->head,len);
		if(FALSE == peek)
		{
			p_loop_buffer->head += len;
			
			if(p_loop_buffer->head = p_loop_buffer->buffer + p_loop_buffer->total_len)
			{
				p_loop_buffer->head = p_loop_buffer->buffer;
			}
		}	
	}
	
	pthread_mutex_unlock(&(p_loop_buffer->buffer_mutex));
	
	return TRUE;
}
//Circle Queue Related stop
/***********************************************************************/

背景介绍

文章中有两个函数,分别是new switch和free switch,但是对g_server.cur_switch的操作出现了问题,其中g_server.cur_switch这个定义的参数的全局的,那么任何线程都可以访问,以下是这两个函数


//有新的交换机连入则将其初始化入g_server.switches,并发送ofpt_hello
static INT4 new_switch(UINT4 switch_sock, struct sockaddr_in addr)
{
    if(g_server.cur_switch < g_server.max_switch)
    {
        UINT idx = 0;
        for(; idx < g_server.max_switch; idx++)
        {
            if(0 == g_server.switches[idx].state)
            {//by:yhy 找到g_sever中交换机数组中最近的未使用的那一项
                g_server.cur_switch++;
                g_server.switches[idx].sock_fd = switch_sock;
                g_server.switches[idx].msg_driver.msg_handler = of_message_handler;
                g_server.switches[idx].sw_ip = *(UINT4 *)&addr.sin_addr;
                g_server.switches[idx].sw_port = *(UINT2 *)&addr.sin_port;
                g_server.switches[idx].recv_buffer.head = 0;
                g_server.switches[idx].recv_buffer.tail = 0;
                g_server.switches[idx].send_len = 0;
                memset(g_server.switches[idx].send_buffer, 0, g_sendbuf_len);
                g_server.switches[idx].state = 1;
                g_server.switches[idx].sock_state = 0;

                // printf("version:%d, ip:%d\n", g_server.switches[idx].ofp_version, g_server.switches[idx].sw_ip);
                of13_msg_hello(&g_server.switches[idx], NULL);

                return idx;
            }
        }
    }
    return -1;
}


//释放交换机结构体
void free_switch(gn_switch_t *sw)
{
    UINT4 port_idx;
    UINT4 hash_idx;
    mac_user_t *p_macuser;
    UINT1 dpid[8];
    ulli64_to_uc8(sw->dpid, dpid);
    LOG_PROC("WARNING", "Switch [%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x] disconnected", dpid[0],
            dpid[1], dpid[2], dpid[3], dpid[4], dpid[5], dpid[6], dpid[7]);
    g_server.cur_switch--;
    
    sw->sock_fd = 0;
    sw->sock_state = 0;
    //reset driver
    sw->sw_ip = 0;
    sw->sw_port = 0;
    sw->recv_buffer.head = 0;
    sw->recv_buffer.tail = 0;
    sw->state = 0;
    
    if((sw->msg_driver.msg_handler != of10_message_handler)
       && (sw->msg_driver.msg_handler != of13_message_handler)
       && (sw->msg_driver.msg_handler != of_message_handler))
    {
        gn_free((void **)&(sw->msg_driver.msg_handler));
    }
    sw->msg_driver.msg_handler = of_message_handler;
    sw->send_len = 0;

    //触发删除交换机的相关操作
    event_delete_switch_on(sw);
   

    clean_flow_entries(sw);

    //reset neighbor
    for (port_idx = 0; port_idx < MAX_PORTS; port_idx++)
    {
        if (sw->neighbor[port_idx])
        {
        	of13_delete_line2(sw,port_idx);
        }
    }
    memset(sw->ports, 0, sizeof(gn_port_t) * MAX_PORTS);

    //reset user
    for (hash_idx = 0; hash_idx < g_macuser_table.macuser_hsize; hash_idx++)
    {
        p_macuser = sw->users[hash_idx];
        del_mac_user(p_macuser);
    }
    memset(sw->users, 0, g_macuser_table.macuser_hsize * sizeof(mac_user_t *));
}

问题

在原来的函数中,在switch满了的情况下,当free switch的函数进去之后直接g_server.cur_switch–,g_server.cur_switch就又有位置给新的new switch了,由于多线程的特性,在free switch线程没执行结束时,当新的交换机进来,对g_server.cur_switch++,然后初始化,但是这个时候cpu又调用了free switch线程,就又吧new switch函数中初始化好的参数又置为空 了,那么这样就有问题了

重写思路

把g_server.cur_switch–的操作放在清空参数之后,最后才是g_server.cur_switch–的操作,此时new switch才可以进来,避免了以上的问题,并且对g_server.cur_switch这样参数加锁


//有新的交换机连入则将其初始化入g_server.switches,并发送ofpt_hello
static INT4 new_switch(UINT4 switch_sock, struct sockaddr_in addr)
{
    if(g_server.cur_switch < g_server.max_switch)
    {
        UINT idx = 0;
        for(; idx < g_server.max_switch; idx++)
        {
            if(0 == g_server.switches[idx].state)
            {//by:yhy 找到g_sever中交换机数组中最近的未使用的那一项
                g_server.switches[idx].sock_fd = switch_sock;
                g_server.switches[idx].msg_driver.msg_handler = of_message_handler;
                g_server.switches[idx].sw_ip = *(UINT4 *)&addr.sin_addr;
                g_server.switches[idx].sw_port = *(UINT2 *)&addr.sin_port;
                g_server.switches[idx].recv_buffer.head = 0;
                g_server.switches[idx].recv_buffer.tail = 0;
                g_server.switches[idx].send_len = 0;
                memset(g_server.switches[idx].send_buffer, 0, g_sendbuf_len);
                g_server.switches[idx].state = 1;
                g_server.switches[idx].sock_state = 0;
                pthread_mutex_lock(&g_server.cur_switch_mutex);
				g_server.cur_switch++;
				pthread_mutex_unlock(&g_server.cur_switch_mutex);
                // printf("version:%d, ip:%d\n", g_server.switches[idx].ofp_version, g_server.switches[idx].sw_ip);
                of13_msg_hello(&g_server.switches[idx], NULL);
                
                return idx;
            }
        }
    }
    return -1;
}

//释放交换机结构体
void free_switch(gn_switch_t *sw)
{
    UINT4 port_idx;
    UINT4 hash_idx;
    mac_user_t *p_macuser;
    UINT1 dpid[8];
    ulli64_to_uc8(sw->dpid, dpid);
    LOG_PROC("WARNING", "Switch [%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x] disconnected", dpid[0],
            dpid[1], dpid[2], dpid[3], dpid[4], dpid[5], dpid[6], dpid[7]);    
    sw->sock_fd = 0;
    sw->sock_state = 0;
    //reset driver
    sw->sw_ip = 0;
    sw->sw_port = 0;
    sw->recv_buffer.head = 0;
    sw->recv_buffer.tail = 0;
    sw->state = 0;
    
    if((sw->msg_driver.msg_handler != of10_message_handler)
       && (sw->msg_driver.msg_handler != of13_message_handler)
       && (sw->msg_driver.msg_handler != of_message_handler))
    {
        gn_free((void **)&(sw->msg_driver.msg_handler));
    }
    sw->msg_driver.msg_handler = of_message_handler;
    sw->send_len = 0;
    
    //触发删除交换机的相关操作
    event_delete_switch_on(sw);
   

    clean_flow_entries(sw);

    //reset neighbor
    for (port_idx = 0; port_idx < MAX_PORTS; port_idx++)
    {
        if (sw->neighbor[port_idx])
        {
        	of13_delete_line2(sw,port_idx);
        }
    }
    memset(sw->ports, 0, sizeof(gn_port_t) * MAX_PORTS);

    //reset user
    for (hash_idx = 0; hash_idx < g_macuser_table.macuser_hsize; hash_idx++)
    {
        p_macuser = sw->users[hash_idx];
        del_mac_user(p_macuser);
    }
    memset(sw->users, 0, g_macuser_table.macuser_hsize * sizeof(mac_user_t *));
	
	pthread_mutex_lock(&g_server.cur_switch_mutex);
	g_server.cur_switch--;
	pthread_mutex_unlock(&g_server.cur_switch_mutex);
}

在inc/gnflush0type.h中增加


//控制器自身sever端的配置信息
typedef struct gn_server
{
    UINT4 sock_fd;
    UINT4 ip;
    UINT4 port;
    UINT4 buff_num;
    UINT4 buff_len;
    UINT4 max_switch;
    UINT4 cur_switch;						//当前连接的交换机数量
	pthread_mutex_t cur_switch_mutex;
    UINT4 cpu_num;
    struct gn_switch *switches;
    UINT1 state;
    UINT1 pad[3];
}gn_server_t;