ktcpvs是LVS的一个子项目,主要是针对layer-7的负载均衡,具体介绍参考www.linuxvirtualserver.org。本文是针 对ktcpvs2.6.9版本的,对应linux2.6内核的,下面进入正题,对我理解的ktcpvs源码进行一个描述。
1.整个module的入口文件是tcp_vs.c:
module_init(ktcpvs_init);
这是module的初始化接口,由此转到ktcpvs_init。
2.ktcpvs_init函数
static int __init ktcpvs_init(void)
{
tcp_vs_control_start();
tcp_vs_slowtimer_init();
tcp_vs_srvconn_init();
(void) kernel_thread(master_daemon, NULL, 0);
}
做一些初始化工作之后,进入master_daemon线程。
3.master_daemon线程
static int master_daemon(void *unused)
{
/* main loop */
while (sysctl_ktcpvs_unload == 0) {
read_lock(&__tcp_vs_svc_lock);
list_for_each(l, &tcp_vs_svc_list) {
svc = list_entry(l, struct tcp_vs_service, list);
if (!atomic_read(&svc->running) && svc->start)
kernel_thread(tcp_vs_daemon, svc, 0);
}
read_unlock(&__tcp_vs_svc_lock);
}
return 0;
}
这是主工作线程,只有一个,遍历tcp_vs_svc_list(相当于config文件),启动每个service(ktcpvs的一个service 就对应一个虚拟服务,相当于config文件中的其中一个vitual web)就是svc,每个svc开启tcp_vs_daemon线程。
4.tcp_vs_daemon线程
static int tcp_vs_daemon(void *__svc)
{
/* Then start listening and spawn the daemons */
if (StartListening(svc) < 0)
goto out;
for (i = 0; i < svc->conf.startservers; i++)
make_child(child_table, i, svc);
/* dynamically keep enough thread to handle load */
child_pool_maintenance(child_table, svc);
}
/* stop listening */
StopListening(svc);
return 0;
}
每个虚拟服务svc都会启动这样一个线程,对于一个svc,主要调用StartListening函数创建socket,启动监听,然后根据config 文件startserver参数通过make_child函数初始启动相应的多个children与svc相关的连接准备,放到child_table 中,child_table主要由struct tcp_vs_child数组children构成,每个数组元素主要有个指向svc的指针。最后是关闭监听,退出,不再跟下去了,主要看 StartListening和make_child。
5.StartListening函数,在Misc.c文件
int StartListening(struct tcp_vs_service *svc)
{
/* First create a socket */
error = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
/* Now bind the socket */
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = svc->conf.addr;
sin.sin_port = svc->conf.port;
error = sock->ops->bind(sock, (struct sockaddr *) &sin, sizeof(sin));
/* Now, start listening on the socket */
error = sock->ops->listen(sock, sysctl_ktcpvs_max_backlog);
svc->mainsock = sock;
return 0;
}
就是创建socket,绑定制定地址和端口,开始监听。
6.make_child函数
static inline void make_child(struct tcp_vs_child_table *tbl, int slot, struct tcp_vs_service *svc)
{
tbl->children[slot].svc = svc;
if (kernel_thread(tcp_vs_child, &tbl->children[slot], CLONE_VM | CLONE_FS | CLONE_FILES) < 0)
TCP_VS_ERR(“spawn child failed\n”);
}
把tcp_vs_child_table的children[i]跟svc关联上,然后启动tcp_vs_child线程。
7.tcp_vs_child线程
static int tcp_vs_child(void *__child)
{
struct tcp_vs_child *chd = (struct tcp_vs_child *) __child;
struct tcp_vs_service *svc = chd->svc;
chd->pid = current->pid;
sock = svc->mainsock;
while (svc->stop == 0 && sysctl_ktcpvs_unload == 0) {
/* create tcp_vs_conn object */
conn = tcp_vs_conn_create(sock, Buffer, BufLen);
/* Do the actual accept */
ret = sock->ops->accept(sock, conn->csock, O_NONBLOCK);
/* Do the work */
ret = tcp_vs_conn_handle(conn, svc);
}
return 0;
}
一个虚拟服务会根据配置启动tcp_vs_child的线程池,把sock指向之前在监听的mainsock,初始化一个与client之间的 tcp_vs_conn对象conn,从监听socket accpet一个socket指定由conn->csock处理(就是与client之间的socket),然后就处理进入 tcp_vs_conn_handle函数进行具体数据报处理。
8.tcp_vs_conn_handle函数
int tcp_vs_conn_handle(struct tcp_vs_conn *conn, struct tcp_vs_service *svc)
{
csock = conn->csock;
if (csock->sk->sk_state != TCP_ESTABLISHED) {
if (csock->sk->sk_state == TCP_CLOSE_WAIT)
return 0;
return -1;
}
switch (svc->scheduler->schedule(conn, svc)) {
case 1: /* scheduler has done all the work */
return 0;
case 0: /* further process needed */
break;
case -1: /* try to redirect the connection to other sockets */
/* fault tolerance function*/
if (!fault_redirect(conn,svc))
break;
if (svc->conf.redirect_port) {
redirect_to_local(conn, svc->conf.redirect_addr, svc->conf.redirect_port);
return 0;
}
return -1;
default:
return 0;
}
dsock = conn->dsock;
lastupdated = jiffies;
while ((jiffies – lastupdated) < sysctl_ktcpvs_read_timeout * HZ) {
/* if the connection is closed, go out of this loop */
if (dsock->sk->sk_state != TCP_ESTABLISHED
&& dsock->sk->sk_state != TCP_CLOSE_WAIT)
break;
if (csock->sk->sk_state != TCP_ESTABLISHED
&& csock->sk->sk_state != TCP_CLOSE_WAIT)
break;
/* Do we have data from server? */
if (!skb_queue_empty(&(dsock->sk->sk_receive_queue))) {
if (tcp_vs_relay_socket(dsock, csock) == 0)
break;
lastupdated = jiffies;
}
/* Do we have data from client? */
if (!skb_queue_empty(&(csock->sk->sk_receive_queue))) {
if (tcp_vs_relay_socket(csock, dsock) == 0)
break;
lastupdated = jiffies;
}
if (skb_queue_empty(&(dsock->sk->sk_receive_queue))
&& skb_queue_empty(&(csock->sk->sk_receive_queue))) {
if (dsock->sk->sk_state == TCP_CLOSE_WAIT
|| csock->sk->sk_state == TCP_CLOSE_WAIT)
break;
/*
* Put the current task on the sleep wait queue
* of both the sockets, wake up the task if one
* socket has some data ready.
*/
add_wait_queue(csock->sk->sk_sleep, &wait1);
add_wait_queue(dsock->sk->sk_sleep, &wait2);
__set_current_state(TASK_INTERRUPTIBLE);
__set_current_state(TASK_RUNNING);
remove_wait_queue(csock->sk->sk_sleep, &wait1);
remove_wait_queue(dsock->sk->sk_sleep, &wait2);
}
}
return 0;
}
根据config中配置的scheduler来处理client的请求,如果scheduler配置为http,则在tcp_vs_http.c这定义了static struct tcp_vs_scheduler tcp_vs_http_scheduler = {
{0}, /* n_list */
“http”, /* name */
THIS_MODULE, /* this module */
tcp_vs_http_init_svc, /* initializer */
tcp_vs_http_done_svc, /* done */
tcp_vs_http_update_svc, /* update */
tcp_vs_http_schedule, /* select a server by http request */
};调度函数是tcp_vs_http_schedule,负责选择一个realserver并与之建立连接,把client过来的数据包转发过去,调度完之后,再由tcp_vs_relay_socket把realserver得到响应再响应给client。