]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * IP Virtual Server | |
3 | * data structure and functionality definitions | |
4 | */ | |
5 | ||
6 | #ifndef _NET_IP_VS_H | |
7 | #define _NET_IP_VS_H | |
8 | ||
9 | #include <linux/ip_vs.h> /* definitions shared with userland */ | |
10 | ||
11 | /* old ipvsadm versions still include this file directly */ | |
12 | #ifdef __KERNEL__ | |
13 | ||
14 | #include <asm/types.h> /* for __uXX types */ | |
15 | ||
16 | #include <linux/sysctl.h> /* for ctl_path */ | |
17 | #include <linux/list.h> /* for struct list_head */ | |
18 | #include <linux/spinlock.h> /* for struct rwlock_t */ | |
19 | #include <asm/atomic.h> /* for struct atomic_t */ | |
20 | #include <linux/compiler.h> | |
21 | #include <linux/timer.h> | |
22 | ||
23 | #include <net/checksum.h> | |
24 | #include <linux/netfilter.h> /* for union nf_inet_addr */ | |
25 | #include <linux/ip.h> | |
26 | #include <linux/ipv6.h> /* for struct ipv6hdr */ | |
27 | #include <net/ipv6.h> /* for ipv6_addr_copy */ | |
28 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | |
29 | #include <net/netfilter/nf_conntrack.h> | |
30 | #endif | |
31 | #include <net/net_namespace.h> /* Netw namespace */ | |
32 | ||
33 | /* | |
34 | * Generic access of ipvs struct | |
35 | */ | |
36 | static inline struct netns_ipvs *net_ipvs(struct net* net) | |
37 | { | |
38 | return net->ipvs; | |
39 | } | |
40 | /* | |
41 | * Get net ptr from skb in traffic cases | |
42 | * use skb_sknet when call is from userland (ioctl or netlink) | |
43 | */ | |
44 | static inline struct net *skb_net(const struct sk_buff *skb) | |
45 | { | |
46 | #ifdef CONFIG_NET_NS | |
47 | #ifdef CONFIG_IP_VS_DEBUG | |
48 | /* | |
49 | * This is used for debug only. | |
50 | * Start with the most likely hit | |
51 | * End with BUG | |
52 | */ | |
53 | if (likely(skb->dev && skb->dev->nd_net)) | |
54 | return dev_net(skb->dev); | |
55 | if (skb_dst(skb) && skb_dst(skb)->dev) | |
56 | return dev_net(skb_dst(skb)->dev); | |
57 | WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", | |
58 | __func__, __LINE__); | |
59 | if (likely(skb->sk && skb->sk->sk_net)) | |
60 | return sock_net(skb->sk); | |
61 | pr_err("There is no net ptr to find in the skb in %s() line:%d\n", | |
62 | __func__, __LINE__); | |
63 | BUG(); | |
64 | #else | |
65 | return dev_net(skb->dev ? : skb_dst(skb)->dev); | |
66 | #endif | |
67 | #else | |
68 | return &init_net; | |
69 | #endif | |
70 | } | |
71 | ||
72 | static inline struct net *skb_sknet(const struct sk_buff *skb) | |
73 | { | |
74 | #ifdef CONFIG_NET_NS | |
75 | #ifdef CONFIG_IP_VS_DEBUG | |
76 | /* Start with the most likely hit */ | |
77 | if (likely(skb->sk && skb->sk->sk_net)) | |
78 | return sock_net(skb->sk); | |
79 | WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n", | |
80 | __func__, __LINE__); | |
81 | if (likely(skb->dev && skb->dev->nd_net)) | |
82 | return dev_net(skb->dev); | |
83 | pr_err("There is no net ptr to find in the skb in %s() line:%d\n", | |
84 | __func__, __LINE__); | |
85 | BUG(); | |
86 | #else | |
87 | return sock_net(skb->sk); | |
88 | #endif | |
89 | #else | |
90 | return &init_net; | |
91 | #endif | |
92 | } | |
93 | /* | |
94 | * This one needed for single_open_net since net is stored directly in | |
95 | * private not as a struct i.e. seq_file_net can't be used. | |
96 | */ | |
97 | static inline struct net *seq_file_single_net(struct seq_file *seq) | |
98 | { | |
99 | #ifdef CONFIG_NET_NS | |
100 | return (struct net *)seq->private; | |
101 | #else | |
102 | return &init_net; | |
103 | #endif | |
104 | } | |
105 | ||
106 | /* Connections' size value needed by ip_vs_ctl.c */ | |
107 | extern int ip_vs_conn_tab_size; | |
108 | ||
109 | ||
110 | struct ip_vs_iphdr { | |
111 | int len; | |
112 | __u8 protocol; | |
113 | union nf_inet_addr saddr; | |
114 | union nf_inet_addr daddr; | |
115 | }; | |
116 | ||
117 | static inline void | |
118 | ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr) | |
119 | { | |
120 | #ifdef CONFIG_IP_VS_IPV6 | |
121 | if (af == AF_INET6) { | |
122 | const struct ipv6hdr *iph = nh; | |
123 | iphdr->len = sizeof(struct ipv6hdr); | |
124 | iphdr->protocol = iph->nexthdr; | |
125 | ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr); | |
126 | ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr); | |
127 | } else | |
128 | #endif | |
129 | { | |
130 | const struct iphdr *iph = nh; | |
131 | iphdr->len = iph->ihl * 4; | |
132 | iphdr->protocol = iph->protocol; | |
133 | iphdr->saddr.ip = iph->saddr; | |
134 | iphdr->daddr.ip = iph->daddr; | |
135 | } | |
136 | } | |
137 | ||
138 | static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, | |
139 | const union nf_inet_addr *src) | |
140 | { | |
141 | #ifdef CONFIG_IP_VS_IPV6 | |
142 | if (af == AF_INET6) | |
143 | ipv6_addr_copy(&dst->in6, &src->in6); | |
144 | else | |
145 | #endif | |
146 | dst->ip = src->ip; | |
147 | } | |
148 | ||
149 | static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, | |
150 | const union nf_inet_addr *b) | |
151 | { | |
152 | #ifdef CONFIG_IP_VS_IPV6 | |
153 | if (af == AF_INET6) | |
154 | return ipv6_addr_equal(&a->in6, &b->in6); | |
155 | #endif | |
156 | return a->ip == b->ip; | |
157 | } | |
158 | ||
159 | #ifdef CONFIG_IP_VS_DEBUG | |
160 | #include <linux/net.h> | |
161 | ||
162 | extern int ip_vs_get_debug_level(void); | |
163 | ||
164 | static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, | |
165 | const union nf_inet_addr *addr, | |
166 | int *idx) | |
167 | { | |
168 | int len; | |
169 | #ifdef CONFIG_IP_VS_IPV6 | |
170 | if (af == AF_INET6) | |
171 | len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6]", | |
172 | &addr->in6) + 1; | |
173 | else | |
174 | #endif | |
175 | len = snprintf(&buf[*idx], buf_len - *idx, "%pI4", | |
176 | &addr->ip) + 1; | |
177 | ||
178 | *idx += len; | |
179 | BUG_ON(*idx > buf_len + 1); | |
180 | return &buf[*idx - len]; | |
181 | } | |
182 | ||
183 | #define IP_VS_DBG_BUF(level, msg, ...) \ | |
184 | do { \ | |
185 | char ip_vs_dbg_buf[160]; \ | |
186 | int ip_vs_dbg_idx = 0; \ | |
187 | if (level <= ip_vs_get_debug_level()) \ | |
188 | printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ | |
189 | } while (0) | |
190 | #define IP_VS_ERR_BUF(msg...) \ | |
191 | do { \ | |
192 | char ip_vs_dbg_buf[160]; \ | |
193 | int ip_vs_dbg_idx = 0; \ | |
194 | pr_err(msg); \ | |
195 | } while (0) | |
196 | ||
197 | /* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */ | |
198 | #define IP_VS_DBG_ADDR(af, addr) \ | |
199 | ip_vs_dbg_addr(af, ip_vs_dbg_buf, \ | |
200 | sizeof(ip_vs_dbg_buf), addr, \ | |
201 | &ip_vs_dbg_idx) | |
202 | ||
203 | #define IP_VS_DBG(level, msg, ...) \ | |
204 | do { \ | |
205 | if (level <= ip_vs_get_debug_level()) \ | |
206 | printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ | |
207 | } while (0) | |
208 | #define IP_VS_DBG_RL(msg, ...) \ | |
209 | do { \ | |
210 | if (net_ratelimit()) \ | |
211 | printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ | |
212 | } while (0) | |
213 | #define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) \ | |
214 | do { \ | |
215 | if (level <= ip_vs_get_debug_level()) \ | |
216 | pp->debug_packet(af, pp, skb, ofs, msg); \ | |
217 | } while (0) | |
218 | #define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) \ | |
219 | do { \ | |
220 | if (level <= ip_vs_get_debug_level() && \ | |
221 | net_ratelimit()) \ | |
222 | pp->debug_packet(af, pp, skb, ofs, msg); \ | |
223 | } while (0) | |
224 | #else /* NO DEBUGGING at ALL */ | |
225 | #define IP_VS_DBG_BUF(level, msg...) do {} while (0) | |
226 | #define IP_VS_ERR_BUF(msg...) do {} while (0) | |
227 | #define IP_VS_DBG(level, msg...) do {} while (0) | |
228 | #define IP_VS_DBG_RL(msg...) do {} while (0) | |
229 | #define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) do {} while (0) | |
230 | #define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) do {} while (0) | |
231 | #endif | |
232 | ||
233 | #define IP_VS_BUG() BUG() | |
234 | #define IP_VS_ERR_RL(msg, ...) \ | |
235 | do { \ | |
236 | if (net_ratelimit()) \ | |
237 | pr_err(msg, ##__VA_ARGS__); \ | |
238 | } while (0) | |
239 | ||
240 | #ifdef CONFIG_IP_VS_DEBUG | |
241 | #define EnterFunction(level) \ | |
242 | do { \ | |
243 | if (level <= ip_vs_get_debug_level()) \ | |
244 | printk(KERN_DEBUG \ | |
245 | pr_fmt("Enter: %s, %s line %i\n"), \ | |
246 | __func__, __FILE__, __LINE__); \ | |
247 | } while (0) | |
248 | #define LeaveFunction(level) \ | |
249 | do { \ | |
250 | if (level <= ip_vs_get_debug_level()) \ | |
251 | printk(KERN_DEBUG \ | |
252 | pr_fmt("Leave: %s, %s line %i\n"), \ | |
253 | __func__, __FILE__, __LINE__); \ | |
254 | } while (0) | |
255 | #else | |
256 | #define EnterFunction(level) do {} while (0) | |
257 | #define LeaveFunction(level) do {} while (0) | |
258 | #endif | |
259 | ||
260 | #define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); } | |
261 | ||
262 | ||
263 | /* | |
264 | * The port number of FTP service (in network order). | |
265 | */ | |
266 | #define FTPPORT cpu_to_be16(21) | |
267 | #define FTPDATA cpu_to_be16(20) | |
268 | ||
269 | /* | |
270 | * TCP State Values | |
271 | */ | |
272 | enum { | |
273 | IP_VS_TCP_S_NONE = 0, | |
274 | IP_VS_TCP_S_ESTABLISHED, | |
275 | IP_VS_TCP_S_SYN_SENT, | |
276 | IP_VS_TCP_S_SYN_RECV, | |
277 | IP_VS_TCP_S_FIN_WAIT, | |
278 | IP_VS_TCP_S_TIME_WAIT, | |
279 | IP_VS_TCP_S_CLOSE, | |
280 | IP_VS_TCP_S_CLOSE_WAIT, | |
281 | IP_VS_TCP_S_LAST_ACK, | |
282 | IP_VS_TCP_S_LISTEN, | |
283 | IP_VS_TCP_S_SYNACK, | |
284 | IP_VS_TCP_S_LAST | |
285 | }; | |
286 | ||
287 | /* | |
288 | * UDP State Values | |
289 | */ | |
290 | enum { | |
291 | IP_VS_UDP_S_NORMAL, | |
292 | IP_VS_UDP_S_LAST, | |
293 | }; | |
294 | ||
295 | /* | |
296 | * ICMP State Values | |
297 | */ | |
298 | enum { | |
299 | IP_VS_ICMP_S_NORMAL, | |
300 | IP_VS_ICMP_S_LAST, | |
301 | }; | |
302 | ||
303 | /* | |
304 | * SCTP State Values | |
305 | */ | |
306 | enum ip_vs_sctp_states { | |
307 | IP_VS_SCTP_S_NONE, | |
308 | IP_VS_SCTP_S_INIT_CLI, | |
309 | IP_VS_SCTP_S_INIT_SER, | |
310 | IP_VS_SCTP_S_INIT_ACK_CLI, | |
311 | IP_VS_SCTP_S_INIT_ACK_SER, | |
312 | IP_VS_SCTP_S_ECHO_CLI, | |
313 | IP_VS_SCTP_S_ECHO_SER, | |
314 | IP_VS_SCTP_S_ESTABLISHED, | |
315 | IP_VS_SCTP_S_SHUT_CLI, | |
316 | IP_VS_SCTP_S_SHUT_SER, | |
317 | IP_VS_SCTP_S_SHUT_ACK_CLI, | |
318 | IP_VS_SCTP_S_SHUT_ACK_SER, | |
319 | IP_VS_SCTP_S_CLOSED, | |
320 | IP_VS_SCTP_S_LAST | |
321 | }; | |
322 | ||
323 | /* | |
324 | * Delta sequence info structure | |
325 | * Each ip_vs_conn has 2 (output AND input seq. changes). | |
326 | * Only used in the VS/NAT. | |
327 | */ | |
328 | struct ip_vs_seq { | |
329 | __u32 init_seq; /* Add delta from this seq */ | |
330 | __u32 delta; /* Delta in sequence numbers */ | |
331 | __u32 previous_delta; /* Delta in sequence numbers | |
332 | before last resized pkt */ | |
333 | }; | |
334 | ||
335 | /* | |
336 | * counters per cpu | |
337 | */ | |
338 | struct ip_vs_counters { | |
339 | __u32 conns; /* connections scheduled */ | |
340 | __u32 inpkts; /* incoming packets */ | |
341 | __u32 outpkts; /* outgoing packets */ | |
342 | __u64 inbytes; /* incoming bytes */ | |
343 | __u64 outbytes; /* outgoing bytes */ | |
344 | }; | |
345 | /* | |
346 | * Stats per cpu | |
347 | */ | |
348 | struct ip_vs_cpu_stats { | |
349 | struct ip_vs_counters ustats; | |
350 | struct u64_stats_sync syncp; | |
351 | }; | |
352 | ||
353 | /* | |
354 | * IPVS statistics objects | |
355 | */ | |
356 | struct ip_vs_estimator { | |
357 | struct list_head list; | |
358 | ||
359 | u64 last_inbytes; | |
360 | u64 last_outbytes; | |
361 | u32 last_conns; | |
362 | u32 last_inpkts; | |
363 | u32 last_outpkts; | |
364 | ||
365 | u32 cps; | |
366 | u32 inpps; | |
367 | u32 outpps; | |
368 | u32 inbps; | |
369 | u32 outbps; | |
370 | }; | |
371 | ||
372 | struct ip_vs_stats { | |
373 | struct ip_vs_stats_user ustats; /* statistics */ | |
374 | struct ip_vs_estimator est; /* estimator */ | |
375 | struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ | |
376 | spinlock_t lock; /* spin lock */ | |
377 | struct ip_vs_stats_user ustats0; /* reset values */ | |
378 | }; | |
379 | ||
380 | struct dst_entry; | |
381 | struct iphdr; | |
382 | struct ip_vs_conn; | |
383 | struct ip_vs_app; | |
384 | struct sk_buff; | |
385 | struct ip_vs_proto_data; | |
386 | ||
387 | struct ip_vs_protocol { | |
388 | struct ip_vs_protocol *next; | |
389 | char *name; | |
390 | u16 protocol; | |
391 | u16 num_states; | |
392 | int dont_defrag; | |
393 | ||
394 | void (*init)(struct ip_vs_protocol *pp); | |
395 | ||
396 | void (*exit)(struct ip_vs_protocol *pp); | |
397 | ||
398 | void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); | |
399 | ||
400 | void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); | |
401 | ||
402 | int (*conn_schedule)(int af, struct sk_buff *skb, | |
403 | struct ip_vs_proto_data *pd, | |
404 | int *verdict, struct ip_vs_conn **cpp); | |
405 | ||
406 | struct ip_vs_conn * | |
407 | (*conn_in_get)(int af, | |
408 | const struct sk_buff *skb, | |
409 | const struct ip_vs_iphdr *iph, | |
410 | unsigned int proto_off, | |
411 | int inverse); | |
412 | ||
413 | struct ip_vs_conn * | |
414 | (*conn_out_get)(int af, | |
415 | const struct sk_buff *skb, | |
416 | const struct ip_vs_iphdr *iph, | |
417 | unsigned int proto_off, | |
418 | int inverse); | |
419 | ||
420 | int (*snat_handler)(struct sk_buff *skb, | |
421 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp); | |
422 | ||
423 | int (*dnat_handler)(struct sk_buff *skb, | |
424 | struct ip_vs_protocol *pp, struct ip_vs_conn *cp); | |
425 | ||
426 | int (*csum_check)(int af, struct sk_buff *skb, | |
427 | struct ip_vs_protocol *pp); | |
428 | ||
429 | const char *(*state_name)(int state); | |
430 | ||
431 | int (*state_transition)(struct ip_vs_conn *cp, int direction, | |
432 | const struct sk_buff *skb, | |
433 | struct ip_vs_proto_data *pd); | |
434 | ||
435 | int (*register_app)(struct net *net, struct ip_vs_app *inc); | |
436 | ||
437 | void (*unregister_app)(struct net *net, struct ip_vs_app *inc); | |
438 | ||
439 | int (*app_conn_bind)(struct ip_vs_conn *cp); | |
440 | ||
441 | void (*debug_packet)(int af, struct ip_vs_protocol *pp, | |
442 | const struct sk_buff *skb, | |
443 | int offset, | |
444 | const char *msg); | |
445 | ||
446 | void (*timeout_change)(struct ip_vs_proto_data *pd, int flags); | |
447 | }; | |
448 | ||
449 | /* | |
450 | * protocol data per netns | |
451 | */ | |
452 | struct ip_vs_proto_data { | |
453 | struct ip_vs_proto_data *next; | |
454 | struct ip_vs_protocol *pp; | |
455 | int *timeout_table; /* protocol timeout table */ | |
456 | atomic_t appcnt; /* counter of proto app incs. */ | |
457 | struct tcp_states_t *tcp_state_table; | |
458 | }; | |
459 | ||
460 | extern struct ip_vs_protocol *ip_vs_proto_get(unsigned short proto); | |
461 | extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net, | |
462 | unsigned short proto); | |
463 | ||
464 | struct ip_vs_conn_param { | |
465 | struct net *net; | |
466 | const union nf_inet_addr *caddr; | |
467 | const union nf_inet_addr *vaddr; | |
468 | __be16 cport; | |
469 | __be16 vport; | |
470 | __u16 protocol; | |
471 | u16 af; | |
472 | ||
473 | const struct ip_vs_pe *pe; | |
474 | char *pe_data; | |
475 | __u8 pe_data_len; | |
476 | }; | |
477 | ||
478 | /* | |
479 | * IP_VS structure allocated for each dynamically scheduled connection | |
480 | */ | |
481 | struct ip_vs_conn { | |
482 | struct hlist_node c_list; /* hashed list heads */ | |
483 | #ifdef CONFIG_NET_NS | |
484 | struct net *net; /* Name space */ | |
485 | #endif | |
486 | /* Protocol, addresses and port numbers */ | |
487 | u16 af; /* address family */ | |
488 | __be16 cport; | |
489 | __be16 vport; | |
490 | __be16 dport; | |
491 | __u32 fwmark; /* Fire wall mark from skb */ | |
492 | union nf_inet_addr caddr; /* client address */ | |
493 | union nf_inet_addr vaddr; /* virtual address */ | |
494 | union nf_inet_addr daddr; /* destination address */ | |
495 | volatile __u32 flags; /* status flags */ | |
496 | __u16 protocol; /* Which protocol (TCP/UDP) */ | |
497 | ||
498 | /* counter and timer */ | |
499 | atomic_t refcnt; /* reference count */ | |
500 | struct timer_list timer; /* Expiration timer */ | |
501 | volatile unsigned long timeout; /* timeout */ | |
502 | ||
503 | /* Flags and state transition */ | |
504 | spinlock_t lock; /* lock for state transition */ | |
505 | volatile __u16 state; /* state info */ | |
506 | volatile __u16 old_state; /* old state, to be used for | |
507 | * state transition triggerd | |
508 | * synchronization | |
509 | */ | |
510 | ||
511 | /* Control members */ | |
512 | struct ip_vs_conn *control; /* Master control connection */ | |
513 | atomic_t n_control; /* Number of controlled ones */ | |
514 | struct ip_vs_dest *dest; /* real server */ | |
515 | atomic_t in_pkts; /* incoming packet counter */ | |
516 | ||
517 | /* packet transmitter for different forwarding methods. If it | |
518 | mangles the packet, it must return NF_DROP or better NF_STOLEN, | |
519 | otherwise this must be changed to a sk_buff **. | |
520 | NF_ACCEPT can be returned when destination is local. | |
521 | */ | |
522 | int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, | |
523 | struct ip_vs_protocol *pp); | |
524 | ||
525 | /* Note: we can group the following members into a structure, | |
526 | in order to save more space, and the following members are | |
527 | only used in VS/NAT anyway */ | |
528 | struct ip_vs_app *app; /* bound ip_vs_app object */ | |
529 | void *app_data; /* Application private data */ | |
530 | struct ip_vs_seq in_seq; /* incoming seq. struct */ | |
531 | struct ip_vs_seq out_seq; /* outgoing seq. struct */ | |
532 | ||
533 | const struct ip_vs_pe *pe; | |
534 | char *pe_data; | |
535 | __u8 pe_data_len; | |
536 | }; | |
537 | ||
538 | /* | |
539 | * To save some memory in conn table when name space is disabled. | |
540 | */ | |
541 | static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp) | |
542 | { | |
543 | #ifdef CONFIG_NET_NS | |
544 | return cp->net; | |
545 | #else | |
546 | return &init_net; | |
547 | #endif | |
548 | } | |
549 | static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net) | |
550 | { | |
551 | #ifdef CONFIG_NET_NS | |
552 | cp->net = net; | |
553 | #endif | |
554 | } | |
555 | ||
556 | static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp, | |
557 | struct net *net) | |
558 | { | |
559 | #ifdef CONFIG_NET_NS | |
560 | return cp->net == net; | |
561 | #else | |
562 | return 1; | |
563 | #endif | |
564 | } | |
565 | ||
566 | /* | |
567 | * Extended internal versions of struct ip_vs_service_user and | |
568 | * ip_vs_dest_user for IPv6 support. | |
569 | * | |
570 | * We need these to conveniently pass around service and destination | |
571 | * options, but unfortunately, we also need to keep the old definitions to | |
572 | * maintain userspace backwards compatibility for the setsockopt interface. | |
573 | */ | |
574 | struct ip_vs_service_user_kern { | |
575 | /* virtual service addresses */ | |
576 | u16 af; | |
577 | u16 protocol; | |
578 | union nf_inet_addr addr; /* virtual ip address */ | |
579 | u16 port; | |
580 | u32 fwmark; /* firwall mark of service */ | |
581 | ||
582 | /* virtual service options */ | |
583 | char *sched_name; | |
584 | char *pe_name; | |
585 | unsigned flags; /* virtual service flags */ | |
586 | unsigned timeout; /* persistent timeout in sec */ | |
587 | u32 netmask; /* persistent netmask */ | |
588 | }; | |
589 | ||
590 | ||
591 | struct ip_vs_dest_user_kern { | |
592 | /* destination server address */ | |
593 | union nf_inet_addr addr; | |
594 | u16 port; | |
595 | ||
596 | /* real server options */ | |
597 | unsigned conn_flags; /* connection flags */ | |
598 | int weight; /* destination weight */ | |
599 | ||
600 | /* thresholds for active connections */ | |
601 | u32 u_threshold; /* upper threshold */ | |
602 | u32 l_threshold; /* lower threshold */ | |
603 | }; | |
604 | ||
605 | ||
606 | /* | |
607 | * The information about the virtual service offered to the net | |
608 | * and the forwarding entries | |
609 | */ | |
610 | struct ip_vs_service { | |
611 | struct list_head s_list; /* for normal service table */ | |
612 | struct list_head f_list; /* for fwmark-based service table */ | |
613 | atomic_t refcnt; /* reference counter */ | |
614 | atomic_t usecnt; /* use counter */ | |
615 | ||
616 | u16 af; /* address family */ | |
617 | __u16 protocol; /* which protocol (TCP/UDP) */ | |
618 | union nf_inet_addr addr; /* IP address for virtual service */ | |
619 | __be16 port; /* port number for the service */ | |
620 | __u32 fwmark; /* firewall mark of the service */ | |
621 | unsigned flags; /* service status flags */ | |
622 | unsigned timeout; /* persistent timeout in ticks */ | |
623 | __be32 netmask; /* grouping granularity */ | |
624 | struct net *net; | |
625 | ||
626 | struct list_head destinations; /* real server d-linked list */ | |
627 | __u32 num_dests; /* number of servers */ | |
628 | struct ip_vs_stats stats; /* statistics for the service */ | |
629 | struct ip_vs_app *inc; /* bind conns to this app inc */ | |
630 | ||
631 | /* for scheduling */ | |
632 | struct ip_vs_scheduler *scheduler; /* bound scheduler object */ | |
633 | rwlock_t sched_lock; /* lock sched_data */ | |
634 | void *sched_data; /* scheduler application data */ | |
635 | ||
636 | /* alternate persistence engine */ | |
637 | struct ip_vs_pe *pe; | |
638 | }; | |
639 | ||
640 | ||
641 | /* | |
642 | * The real server destination forwarding entry | |
643 | * with ip address, port number, and so on. | |
644 | */ | |
645 | struct ip_vs_dest { | |
646 | struct list_head n_list; /* for the dests in the service */ | |
647 | struct list_head d_list; /* for table with all the dests */ | |
648 | ||
649 | u16 af; /* address family */ | |
650 | __be16 port; /* port number of the server */ | |
651 | union nf_inet_addr addr; /* IP address of the server */ | |
652 | volatile unsigned flags; /* dest status flags */ | |
653 | atomic_t conn_flags; /* flags to copy to conn */ | |
654 | atomic_t weight; /* server weight */ | |
655 | ||
656 | atomic_t refcnt; /* reference counter */ | |
657 | struct ip_vs_stats stats; /* statistics */ | |
658 | ||
659 | /* connection counters and thresholds */ | |
660 | atomic_t activeconns; /* active connections */ | |
661 | atomic_t inactconns; /* inactive connections */ | |
662 | atomic_t persistconns; /* persistent connections */ | |
663 | __u32 u_threshold; /* upper threshold */ | |
664 | __u32 l_threshold; /* lower threshold */ | |
665 | ||
666 | /* for destination cache */ | |
667 | spinlock_t dst_lock; /* lock of dst_cache */ | |
668 | struct dst_entry *dst_cache; /* destination cache entry */ | |
669 | u32 dst_rtos; /* RT_TOS(tos) for dst */ | |
670 | u32 dst_cookie; | |
671 | #ifdef CONFIG_IP_VS_IPV6 | |
672 | struct in6_addr dst_saddr; | |
673 | #endif | |
674 | ||
675 | /* for virtual service */ | |
676 | struct ip_vs_service *svc; /* service it belongs to */ | |
677 | __u16 protocol; /* which protocol (TCP/UDP) */ | |
678 | __be16 vport; /* virtual port number */ | |
679 | union nf_inet_addr vaddr; /* virtual IP address */ | |
680 | __u32 vfwmark; /* firewall mark of service */ | |
681 | }; | |
682 | ||
683 | ||
684 | /* | |
685 | * The scheduler object | |
686 | */ | |
687 | struct ip_vs_scheduler { | |
688 | struct list_head n_list; /* d-linked list head */ | |
689 | char *name; /* scheduler name */ | |
690 | atomic_t refcnt; /* reference counter */ | |
691 | struct module *module; /* THIS_MODULE/NULL */ | |
692 | ||
693 | /* scheduler initializing service */ | |
694 | int (*init_service)(struct ip_vs_service *svc); | |
695 | /* scheduling service finish */ | |
696 | int (*done_service)(struct ip_vs_service *svc); | |
697 | /* scheduler updating service */ | |
698 | int (*update_service)(struct ip_vs_service *svc); | |
699 | ||
700 | /* selecting a server from the given service */ | |
701 | struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc, | |
702 | const struct sk_buff *skb); | |
703 | }; | |
704 | ||
705 | /* The persistence engine object */ | |
706 | struct ip_vs_pe { | |
707 | struct list_head n_list; /* d-linked list head */ | |
708 | char *name; /* scheduler name */ | |
709 | atomic_t refcnt; /* reference counter */ | |
710 | struct module *module; /* THIS_MODULE/NULL */ | |
711 | ||
712 | /* get the connection template, if any */ | |
713 | int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb); | |
714 | bool (*ct_match)(const struct ip_vs_conn_param *p, | |
715 | struct ip_vs_conn *ct); | |
716 | u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, | |
717 | bool inverse); | |
718 | int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); | |
719 | }; | |
720 | ||
721 | /* | |
722 | * The application module object (a.k.a. app incarnation) | |
723 | */ | |
724 | struct ip_vs_app { | |
725 | struct list_head a_list; /* member in app list */ | |
726 | int type; /* IP_VS_APP_TYPE_xxx */ | |
727 | char *name; /* application module name */ | |
728 | __u16 protocol; | |
729 | struct module *module; /* THIS_MODULE/NULL */ | |
730 | struct list_head incs_list; /* list of incarnations */ | |
731 | ||
732 | /* members for application incarnations */ | |
733 | struct list_head p_list; /* member in proto app list */ | |
734 | struct ip_vs_app *app; /* its real application */ | |
735 | __be16 port; /* port number in net order */ | |
736 | atomic_t usecnt; /* usage counter */ | |
737 | ||
738 | /* | |
739 | * output hook: Process packet in inout direction, diff set for TCP. | |
740 | * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, | |
741 | * 2=Mangled but checksum was not updated | |
742 | */ | |
743 | int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, | |
744 | struct sk_buff *, int *diff); | |
745 | ||
746 | /* | |
747 | * input hook: Process packet in outin direction, diff set for TCP. | |
748 | * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, | |
749 | * 2=Mangled but checksum was not updated | |
750 | */ | |
751 | int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, | |
752 | struct sk_buff *, int *diff); | |
753 | ||
754 | /* ip_vs_app initializer */ | |
755 | int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *); | |
756 | ||
757 | /* ip_vs_app finish */ | |
758 | int (*done_conn)(struct ip_vs_app *, struct ip_vs_conn *); | |
759 | ||
760 | ||
761 | /* not used now */ | |
762 | int (*bind_conn)(struct ip_vs_app *, struct ip_vs_conn *, | |
763 | struct ip_vs_protocol *); | |
764 | ||
765 | void (*unbind_conn)(struct ip_vs_app *, struct ip_vs_conn *); | |
766 | ||
767 | int * timeout_table; | |
768 | int * timeouts; | |
769 | int timeouts_size; | |
770 | ||
771 | int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app, | |
772 | int *verdict, struct ip_vs_conn **cpp); | |
773 | ||
774 | struct ip_vs_conn * | |
775 | (*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app, | |
776 | const struct iphdr *iph, unsigned int proto_off, | |
777 | int inverse); | |
778 | ||
779 | struct ip_vs_conn * | |
780 | (*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app, | |
781 | const struct iphdr *iph, unsigned int proto_off, | |
782 | int inverse); | |
783 | ||
784 | int (*state_transition)(struct ip_vs_conn *cp, int direction, | |
785 | const struct sk_buff *skb, | |
786 | struct ip_vs_app *app); | |
787 | ||
788 | void (*timeout_change)(struct ip_vs_app *app, int flags); | |
789 | }; | |
790 | ||
791 | /* IPVS in network namespace */ | |
792 | struct netns_ipvs { | |
793 | int gen; /* Generation */ | |
794 | /* | |
795 | * Hash table: for real service lookups | |
796 | */ | |
797 | #define IP_VS_RTAB_BITS 4 | |
798 | #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) | |
799 | #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) | |
800 | ||
801 | struct list_head rs_table[IP_VS_RTAB_SIZE]; | |
802 | /* ip_vs_app */ | |
803 | struct list_head app_list; | |
804 | ||
805 | /* ip_vs_proto */ | |
806 | #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ | |
807 | struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; | |
808 | /* ip_vs_proto_tcp */ | |
809 | #ifdef CONFIG_IP_VS_PROTO_TCP | |
810 | #define TCP_APP_TAB_BITS 4 | |
811 | #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) | |
812 | #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) | |
813 | struct list_head tcp_apps[TCP_APP_TAB_SIZE]; | |
814 | spinlock_t tcp_app_lock; | |
815 | #endif | |
816 | /* ip_vs_proto_udp */ | |
817 | #ifdef CONFIG_IP_VS_PROTO_UDP | |
818 | #define UDP_APP_TAB_BITS 4 | |
819 | #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) | |
820 | #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) | |
821 | struct list_head udp_apps[UDP_APP_TAB_SIZE]; | |
822 | spinlock_t udp_app_lock; | |
823 | #endif | |
824 | /* ip_vs_proto_sctp */ | |
825 | #ifdef CONFIG_IP_VS_PROTO_SCTP | |
826 | #define SCTP_APP_TAB_BITS 4 | |
827 | #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) | |
828 | #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) | |
829 | /* Hash table for SCTP application incarnations */ | |
830 | struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; | |
831 | spinlock_t sctp_app_lock; | |
832 | #endif | |
833 | /* ip_vs_conn */ | |
834 | atomic_t conn_count; /* connection counter */ | |
835 | ||
836 | /* ip_vs_ctl */ | |
837 | struct ip_vs_stats tot_stats; /* Statistics & est. */ | |
838 | ||
839 | int num_services; /* no of virtual services */ | |
840 | ||
841 | rwlock_t rs_lock; /* real services table */ | |
842 | /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ | |
843 | struct lock_class_key ctl_key; /* ctl_mutex debuging */ | |
844 | /* Trash for destinations */ | |
845 | struct list_head dest_trash; | |
846 | /* Service counters */ | |
847 | atomic_t ftpsvc_counter; | |
848 | atomic_t nullsvc_counter; | |
849 | ||
850 | #ifdef CONFIG_SYSCTL | |
851 | /* 1/rate drop and drop-entry variables */ | |
852 | struct delayed_work defense_work; /* Work handler */ | |
853 | int drop_rate; | |
854 | int drop_counter; | |
855 | atomic_t dropentry; | |
856 | /* locks in ctl.c */ | |
857 | spinlock_t dropentry_lock; /* drop entry handling */ | |
858 | spinlock_t droppacket_lock; /* drop packet handling */ | |
859 | spinlock_t securetcp_lock; /* state and timeout tables */ | |
860 | ||
861 | /* sys-ctl struct */ | |
862 | struct ctl_table_header *sysctl_hdr; | |
863 | struct ctl_table *sysctl_tbl; | |
864 | #endif | |
865 | ||
866 | /* sysctl variables */ | |
867 | int sysctl_amemthresh; | |
868 | int sysctl_am_droprate; | |
869 | int sysctl_drop_entry; | |
870 | int sysctl_drop_packet; | |
871 | int sysctl_secure_tcp; | |
872 | #ifdef CONFIG_IP_VS_NFCT | |
873 | int sysctl_conntrack; | |
874 | #endif | |
875 | int sysctl_snat_reroute; | |
876 | int sysctl_sync_ver; | |
877 | int sysctl_cache_bypass; | |
878 | int sysctl_expire_nodest_conn; | |
879 | int sysctl_expire_quiescent_template; | |
880 | int sysctl_sync_threshold[2]; | |
881 | int sysctl_nat_icmp_send; | |
882 | ||
883 | /* ip_vs_lblc */ | |
884 | int sysctl_lblc_expiration; | |
885 | struct ctl_table_header *lblc_ctl_header; | |
886 | struct ctl_table *lblc_ctl_table; | |
887 | /* ip_vs_lblcr */ | |
888 | int sysctl_lblcr_expiration; | |
889 | struct ctl_table_header *lblcr_ctl_header; | |
890 | struct ctl_table *lblcr_ctl_table; | |
891 | /* ip_vs_est */ | |
892 | struct list_head est_list; /* estimator list */ | |
893 | spinlock_t est_lock; | |
894 | struct timer_list est_timer; /* Estimation timer */ | |
895 | /* ip_vs_sync */ | |
896 | struct list_head sync_queue; | |
897 | spinlock_t sync_lock; | |
898 | struct ip_vs_sync_buff *sync_buff; | |
899 | spinlock_t sync_buff_lock; | |
900 | struct sockaddr_in sync_mcast_addr; | |
901 | struct task_struct *master_thread; | |
902 | struct task_struct *backup_thread; | |
903 | int send_mesg_maxlen; | |
904 | int recv_mesg_maxlen; | |
905 | volatile int sync_state; | |
906 | volatile int master_syncid; | |
907 | volatile int backup_syncid; | |
908 | /* multicast interface name */ | |
909 | char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | |
910 | char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | |
911 | /* net name space ptr */ | |
912 | struct net *net; /* Needed by timer routines */ | |
913 | }; | |
914 | ||
915 | #define DEFAULT_SYNC_THRESHOLD 3 | |
916 | #define DEFAULT_SYNC_PERIOD 50 | |
917 | #define DEFAULT_SYNC_VER 1 | |
918 | ||
919 | #ifdef CONFIG_SYSCTL | |
920 | ||
921 | static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) | |
922 | { | |
923 | return ipvs->sysctl_sync_threshold[0]; | |
924 | } | |
925 | ||
926 | static inline int sysctl_sync_period(struct netns_ipvs *ipvs) | |
927 | { | |
928 | return ipvs->sysctl_sync_threshold[1]; | |
929 | } | |
930 | ||
931 | static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) | |
932 | { | |
933 | return ipvs->sysctl_sync_ver; | |
934 | } | |
935 | ||
936 | #else | |
937 | ||
938 | static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) | |
939 | { | |
940 | return DEFAULT_SYNC_THRESHOLD; | |
941 | } | |
942 | ||
943 | static inline int sysctl_sync_period(struct netns_ipvs *ipvs) | |
944 | { | |
945 | return DEFAULT_SYNC_PERIOD; | |
946 | } | |
947 | ||
948 | static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) | |
949 | { | |
950 | return DEFAULT_SYNC_VER; | |
951 | } | |
952 | ||
953 | #endif | |
954 | ||
955 | /* | |
956 | * IPVS core functions | |
957 | * (from ip_vs_core.c) | |
958 | */ | |
959 | extern const char *ip_vs_proto_name(unsigned proto); | |
960 | extern void ip_vs_init_hash_table(struct list_head *table, int rows); | |
961 | #define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t))) | |
962 | ||
963 | #define IP_VS_APP_TYPE_FTP 1 | |
964 | ||
965 | /* | |
966 | * ip_vs_conn handling functions | |
967 | * (from ip_vs_conn.c) | |
968 | */ | |
969 | ||
970 | enum { | |
971 | IP_VS_DIR_INPUT = 0, | |
972 | IP_VS_DIR_OUTPUT, | |
973 | IP_VS_DIR_INPUT_ONLY, | |
974 | IP_VS_DIR_LAST, | |
975 | }; | |
976 | ||
977 | static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol, | |
978 | const union nf_inet_addr *caddr, | |
979 | __be16 cport, | |
980 | const union nf_inet_addr *vaddr, | |
981 | __be16 vport, | |
982 | struct ip_vs_conn_param *p) | |
983 | { | |
984 | p->net = net; | |
985 | p->af = af; | |
986 | p->protocol = protocol; | |
987 | p->caddr = caddr; | |
988 | p->cport = cport; | |
989 | p->vaddr = vaddr; | |
990 | p->vport = vport; | |
991 | p->pe = NULL; | |
992 | p->pe_data = NULL; | |
993 | } | |
994 | ||
995 | struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); | |
996 | struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); | |
997 | ||
998 | struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, | |
999 | const struct ip_vs_iphdr *iph, | |
1000 | unsigned int proto_off, | |
1001 | int inverse); | |
1002 | ||
1003 | struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); | |
1004 | ||
1005 | struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, | |
1006 | const struct ip_vs_iphdr *iph, | |
1007 | unsigned int proto_off, | |
1008 | int inverse); | |
1009 | ||
1010 | /* put back the conn without restarting its timer */ | |
1011 | static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) | |
1012 | { | |
1013 | atomic_dec(&cp->refcnt); | |
1014 | } | |
1015 | extern void ip_vs_conn_put(struct ip_vs_conn *cp); | |
1016 | extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); | |
1017 | ||
1018 | struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, | |
1019 | const union nf_inet_addr *daddr, | |
1020 | __be16 dport, unsigned flags, | |
1021 | struct ip_vs_dest *dest, __u32 fwmark); | |
1022 | extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); | |
1023 | ||
1024 | extern const char * ip_vs_state_name(__u16 proto, int state); | |
1025 | ||
1026 | extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp); | |
1027 | extern int ip_vs_check_template(struct ip_vs_conn *ct); | |
1028 | extern void ip_vs_random_dropentry(struct net *net); | |
1029 | extern int ip_vs_conn_init(void); | |
1030 | extern void ip_vs_conn_cleanup(void); | |
1031 | ||
1032 | static inline void ip_vs_control_del(struct ip_vs_conn *cp) | |
1033 | { | |
1034 | struct ip_vs_conn *ctl_cp = cp->control; | |
1035 | if (!ctl_cp) { | |
1036 | IP_VS_ERR_BUF("request control DEL for uncontrolled: " | |
1037 | "%s:%d to %s:%d\n", | |
1038 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
1039 | ntohs(cp->cport), | |
1040 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), | |
1041 | ntohs(cp->vport)); | |
1042 | ||
1043 | return; | |
1044 | } | |
1045 | ||
1046 | IP_VS_DBG_BUF(7, "DELeting control for: " | |
1047 | "cp.dst=%s:%d ctl_cp.dst=%s:%d\n", | |
1048 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
1049 | ntohs(cp->cport), | |
1050 | IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr), | |
1051 | ntohs(ctl_cp->cport)); | |
1052 | ||
1053 | cp->control = NULL; | |
1054 | if (atomic_read(&ctl_cp->n_control) == 0) { | |
1055 | IP_VS_ERR_BUF("BUG control DEL with n=0 : " | |
1056 | "%s:%d to %s:%d\n", | |
1057 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
1058 | ntohs(cp->cport), | |
1059 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), | |
1060 | ntohs(cp->vport)); | |
1061 | ||
1062 | return; | |
1063 | } | |
1064 | atomic_dec(&ctl_cp->n_control); | |
1065 | } | |
1066 | ||
1067 | static inline void | |
1068 | ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) | |
1069 | { | |
1070 | if (cp->control) { | |
1071 | IP_VS_ERR_BUF("request control ADD for already controlled: " | |
1072 | "%s:%d to %s:%d\n", | |
1073 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
1074 | ntohs(cp->cport), | |
1075 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), | |
1076 | ntohs(cp->vport)); | |
1077 | ||
1078 | ip_vs_control_del(cp); | |
1079 | } | |
1080 | ||
1081 | IP_VS_DBG_BUF(7, "ADDing control for: " | |
1082 | "cp.dst=%s:%d ctl_cp.dst=%s:%d\n", | |
1083 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
1084 | ntohs(cp->cport), | |
1085 | IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr), | |
1086 | ntohs(ctl_cp->cport)); | |
1087 | ||
1088 | cp->control = ctl_cp; | |
1089 | atomic_inc(&ctl_cp->n_control); | |
1090 | } | |
1091 | ||
1092 | ||
1093 | /* | |
1094 | * IPVS application functions | |
1095 | * (from ip_vs_app.c) | |
1096 | */ | |
1097 | #define IP_VS_APP_MAX_PORTS 8 | |
1098 | extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app); | |
1099 | extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app); | |
1100 | extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1101 | extern void ip_vs_unbind_app(struct ip_vs_conn *cp); | |
1102 | extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, | |
1103 | __u16 proto, __u16 port); | |
1104 | extern int ip_vs_app_inc_get(struct ip_vs_app *inc); | |
1105 | extern void ip_vs_app_inc_put(struct ip_vs_app *inc); | |
1106 | ||
1107 | extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); | |
1108 | extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); | |
1109 | extern int ip_vs_app_init(void); | |
1110 | extern void ip_vs_app_cleanup(void); | |
1111 | ||
1112 | void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); | |
1113 | void ip_vs_unbind_pe(struct ip_vs_service *svc); | |
1114 | int register_ip_vs_pe(struct ip_vs_pe *pe); | |
1115 | int unregister_ip_vs_pe(struct ip_vs_pe *pe); | |
1116 | struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); | |
1117 | struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name); | |
1118 | ||
1119 | static inline void ip_vs_pe_get(const struct ip_vs_pe *pe) | |
1120 | { | |
1121 | if (pe && pe->module) | |
1122 | __module_get(pe->module); | |
1123 | } | |
1124 | ||
1125 | static inline void ip_vs_pe_put(const struct ip_vs_pe *pe) | |
1126 | { | |
1127 | if (pe && pe->module) | |
1128 | module_put(pe->module); | |
1129 | } | |
1130 | ||
1131 | /* | |
1132 | * IPVS protocol functions (from ip_vs_proto.c) | |
1133 | */ | |
1134 | extern int ip_vs_protocol_init(void); | |
1135 | extern void ip_vs_protocol_cleanup(void); | |
1136 | extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); | |
1137 | extern int *ip_vs_create_timeout_table(int *table, int size); | |
1138 | extern int | |
1139 | ip_vs_set_state_timeout(int *table, int num, const char *const *names, | |
1140 | const char *name, int to); | |
1141 | extern void | |
1142 | ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, | |
1143 | const struct sk_buff *skb, | |
1144 | int offset, const char *msg); | |
1145 | ||
1146 | extern struct ip_vs_protocol ip_vs_protocol_tcp; | |
1147 | extern struct ip_vs_protocol ip_vs_protocol_udp; | |
1148 | extern struct ip_vs_protocol ip_vs_protocol_icmp; | |
1149 | extern struct ip_vs_protocol ip_vs_protocol_esp; | |
1150 | extern struct ip_vs_protocol ip_vs_protocol_ah; | |
1151 | extern struct ip_vs_protocol ip_vs_protocol_sctp; | |
1152 | ||
1153 | /* | |
1154 | * Registering/unregistering scheduler functions | |
1155 | * (from ip_vs_sched.c) | |
1156 | */ | |
1157 | extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); | |
1158 | extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); | |
1159 | extern int ip_vs_bind_scheduler(struct ip_vs_service *svc, | |
1160 | struct ip_vs_scheduler *scheduler); | |
1161 | extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); | |
1162 | extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); | |
1163 | extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); | |
1164 | extern struct ip_vs_conn * | |
1165 | ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, | |
1166 | struct ip_vs_proto_data *pd, int *ignored); | |
1167 | extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, | |
1168 | struct ip_vs_proto_data *pd); | |
1169 | ||
1170 | extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg); | |
1171 | ||
1172 | ||
1173 | /* | |
1174 | * IPVS control data and functions (from ip_vs_ctl.c) | |
1175 | */ | |
1176 | extern struct ip_vs_stats ip_vs_stats; | |
1177 | extern const struct ctl_path net_vs_ctl_path[]; | |
1178 | extern int sysctl_ip_vs_sync_ver; | |
1179 | ||
1180 | extern void ip_vs_sync_switch_mode(struct net *net, int mode); | |
1181 | extern struct ip_vs_service * | |
1182 | ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, | |
1183 | const union nf_inet_addr *vaddr, __be16 vport); | |
1184 | ||
1185 | static inline void ip_vs_service_put(struct ip_vs_service *svc) | |
1186 | { | |
1187 | atomic_dec(&svc->usecnt); | |
1188 | } | |
1189 | ||
1190 | extern struct ip_vs_dest * | |
1191 | ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, | |
1192 | const union nf_inet_addr *daddr, __be16 dport); | |
1193 | ||
1194 | extern int ip_vs_use_count_inc(void); | |
1195 | extern void ip_vs_use_count_dec(void); | |
1196 | extern int ip_vs_control_init(void); | |
1197 | extern void ip_vs_control_cleanup(void); | |
1198 | extern struct ip_vs_dest * | |
1199 | ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, | |
1200 | __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, | |
1201 | __u16 protocol, __u32 fwmark); | |
1202 | extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); | |
1203 | ||
1204 | ||
1205 | /* | |
1206 | * IPVS sync daemon data and function prototypes | |
1207 | * (from ip_vs_sync.c) | |
1208 | */ | |
1209 | extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, | |
1210 | __u8 syncid); | |
1211 | extern int stop_sync_thread(struct net *net, int state); | |
1212 | extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); | |
1213 | extern int ip_vs_sync_init(void); | |
1214 | extern void ip_vs_sync_cleanup(void); | |
1215 | ||
1216 | ||
1217 | /* | |
1218 | * IPVS rate estimator prototypes (from ip_vs_est.c) | |
1219 | */ | |
1220 | extern int ip_vs_estimator_init(void); | |
1221 | extern void ip_vs_estimator_cleanup(void); | |
1222 | extern void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats); | |
1223 | extern void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats); | |
1224 | extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); | |
1225 | extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst, | |
1226 | struct ip_vs_stats *stats); | |
1227 | ||
1228 | /* | |
1229 | * Various IPVS packet transmitters (from ip_vs_xmit.c) | |
1230 | */ | |
1231 | extern int ip_vs_null_xmit | |
1232 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1233 | extern int ip_vs_bypass_xmit | |
1234 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1235 | extern int ip_vs_nat_xmit | |
1236 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1237 | extern int ip_vs_tunnel_xmit | |
1238 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1239 | extern int ip_vs_dr_xmit | |
1240 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1241 | extern int ip_vs_icmp_xmit | |
1242 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset); | |
1243 | extern void ip_vs_dst_reset(struct ip_vs_dest *dest); | |
1244 | ||
1245 | #ifdef CONFIG_IP_VS_IPV6 | |
1246 | extern int ip_vs_bypass_xmit_v6 | |
1247 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1248 | extern int ip_vs_nat_xmit_v6 | |
1249 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1250 | extern int ip_vs_tunnel_xmit_v6 | |
1251 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1252 | extern int ip_vs_dr_xmit_v6 | |
1253 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); | |
1254 | extern int ip_vs_icmp_xmit_v6 | |
1255 | (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, | |
1256 | int offset); | |
1257 | #endif | |
1258 | ||
1259 | #ifdef CONFIG_SYSCTL | |
1260 | /* | |
1261 | * This is a simple mechanism to ignore packets when | |
1262 | * we are loaded. Just set ip_vs_drop_rate to 'n' and | |
1263 | * we start to drop 1/rate of the packets | |
1264 | */ | |
1265 | ||
1266 | static inline int ip_vs_todrop(struct netns_ipvs *ipvs) | |
1267 | { | |
1268 | if (!ipvs->drop_rate) | |
1269 | return 0; | |
1270 | if (--ipvs->drop_counter > 0) | |
1271 | return 0; | |
1272 | ipvs->drop_counter = ipvs->drop_rate; | |
1273 | return 1; | |
1274 | } | |
1275 | #else | |
1276 | static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; } | |
1277 | #endif | |
1278 | ||
1279 | /* | |
1280 | * ip_vs_fwd_tag returns the forwarding tag of the connection | |
1281 | */ | |
1282 | #define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK) | |
1283 | ||
1284 | static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) | |
1285 | { | |
1286 | char fwd; | |
1287 | ||
1288 | switch (IP_VS_FWD_METHOD(cp)) { | |
1289 | case IP_VS_CONN_F_MASQ: | |
1290 | fwd = 'M'; break; | |
1291 | case IP_VS_CONN_F_LOCALNODE: | |
1292 | fwd = 'L'; break; | |
1293 | case IP_VS_CONN_F_TUNNEL: | |
1294 | fwd = 'T'; break; | |
1295 | case IP_VS_CONN_F_DROUTE: | |
1296 | fwd = 'R'; break; | |
1297 | case IP_VS_CONN_F_BYPASS: | |
1298 | fwd = 'B'; break; | |
1299 | default: | |
1300 | fwd = '?'; break; | |
1301 | } | |
1302 | return fwd; | |
1303 | } | |
1304 | ||
1305 | extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, | |
1306 | struct ip_vs_conn *cp, int dir); | |
1307 | ||
1308 | #ifdef CONFIG_IP_VS_IPV6 | |
1309 | extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, | |
1310 | struct ip_vs_conn *cp, int dir); | |
1311 | #endif | |
1312 | ||
1313 | extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset); | |
1314 | ||
1315 | static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum) | |
1316 | { | |
1317 | __be32 diff[2] = { ~old, new }; | |
1318 | ||
1319 | return csum_partial(diff, sizeof(diff), oldsum); | |
1320 | } | |
1321 | ||
1322 | #ifdef CONFIG_IP_VS_IPV6 | |
1323 | static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new, | |
1324 | __wsum oldsum) | |
1325 | { | |
1326 | __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0], | |
1327 | new[3], new[2], new[1], new[0] }; | |
1328 | ||
1329 | return csum_partial(diff, sizeof(diff), oldsum); | |
1330 | } | |
1331 | #endif | |
1332 | ||
1333 | static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) | |
1334 | { | |
1335 | __be16 diff[2] = { ~old, new }; | |
1336 | ||
1337 | return csum_partial(diff, sizeof(diff), oldsum); | |
1338 | } | |
1339 | ||
1340 | /* | |
1341 | * Forget current conntrack (unconfirmed) and attach notrack entry | |
1342 | */ | |
1343 | static inline void ip_vs_notrack(struct sk_buff *skb) | |
1344 | { | |
1345 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | |
1346 | enum ip_conntrack_info ctinfo; | |
1347 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | |
1348 | ||
1349 | if (!ct || !nf_ct_is_untracked(ct)) { | |
1350 | nf_reset(skb); | |
1351 | skb->nfct = &nf_ct_untracked_get()->ct_general; | |
1352 | skb->nfctinfo = IP_CT_NEW; | |
1353 | nf_conntrack_get(skb->nfct); | |
1354 | } | |
1355 | #endif | |
1356 | } | |
1357 | ||
1358 | #ifdef CONFIG_IP_VS_NFCT | |
1359 | /* | |
1360 | * Netfilter connection tracking | |
1361 | * (from ip_vs_nfct.c) | |
1362 | */ | |
1363 | static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) | |
1364 | { | |
1365 | #ifdef CONFIG_SYSCTL | |
1366 | return ipvs->sysctl_conntrack; | |
1367 | #else | |
1368 | return 0; | |
1369 | #endif | |
1370 | } | |
1371 | ||
1372 | extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, | |
1373 | int outin); | |
1374 | extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp); | |
1375 | extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, | |
1376 | struct ip_vs_conn *cp, u_int8_t proto, | |
1377 | const __be16 port, int from_rs); | |
1378 | extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); | |
1379 | ||
1380 | #else | |
1381 | ||
1382 | static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) | |
1383 | { | |
1384 | return 0; | |
1385 | } | |
1386 | ||
1387 | static inline void ip_vs_update_conntrack(struct sk_buff *skb, | |
1388 | struct ip_vs_conn *cp, int outin) | |
1389 | { | |
1390 | } | |
1391 | ||
1392 | static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, | |
1393 | struct ip_vs_conn *cp) | |
1394 | { | |
1395 | return NF_ACCEPT; | |
1396 | } | |
1397 | ||
1398 | static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | |
1399 | { | |
1400 | } | |
1401 | /* CONFIG_IP_VS_NFCT */ | |
1402 | #endif | |
1403 | ||
1404 | static inline unsigned int | |
1405 | ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) | |
1406 | { | |
1407 | /* | |
1408 | * We think the overhead of processing active connections is 256 | |
1409 | * times higher than that of inactive connections in average. (This | |
1410 | * 256 times might not be accurate, we will change it later) We | |
1411 | * use the following formula to estimate the overhead now: | |
1412 | * dest->activeconns*256 + dest->inactconns | |
1413 | */ | |
1414 | return (atomic_read(&dest->activeconns) << 8) + | |
1415 | atomic_read(&dest->inactconns); | |
1416 | } | |
1417 | ||
1418 | #endif /* __KERNEL__ */ | |
1419 | ||
1420 | #endif /* _NET_IP_VS_H */ |