Commit 2906f66a authored by Venkata Mohan Reddy's avatar Venkata Mohan Reddy Committed by Patrick McHardy

ipvs: SCTP Trasport Loadbalancing Support

Enhance IPVS to load balance SCTP transport protocol packets. This is done
based on the SCTP rfc 4960. All possible control chunks have been taken
care. The state machine used in this code looks some what lengthy. I tried
to make the state machine easy to understand.
Signed-off-by: default avatarVenkata Mohan Reddy Koppula <mohanreddykv@gmail.com>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
parent 477c6086
...@@ -224,6 +224,26 @@ enum { ...@@ -224,6 +224,26 @@ enum {
IP_VS_ICMP_S_LAST, IP_VS_ICMP_S_LAST,
}; };
/*
* SCTP State Values
*/
enum ip_vs_sctp_states {
IP_VS_SCTP_S_NONE,
IP_VS_SCTP_S_INIT_CLI,
IP_VS_SCTP_S_INIT_SER,
IP_VS_SCTP_S_INIT_ACK_CLI,
IP_VS_SCTP_S_INIT_ACK_SER,
IP_VS_SCTP_S_ECHO_CLI,
IP_VS_SCTP_S_ECHO_SER,
IP_VS_SCTP_S_ESTABLISHED,
IP_VS_SCTP_S_SHUT_CLI,
IP_VS_SCTP_S_SHUT_SER,
IP_VS_SCTP_S_SHUT_ACK_CLI,
IP_VS_SCTP_S_SHUT_ACK_SER,
IP_VS_SCTP_S_CLOSED,
IP_VS_SCTP_S_LAST
};
/* /*
* Delta sequence info structure * Delta sequence info structure
* Each ip_vs_conn has 2 (output AND input seq. changes). * Each ip_vs_conn has 2 (output AND input seq. changes).
...@@ -741,7 +761,7 @@ extern struct ip_vs_protocol ip_vs_protocol_udp; ...@@ -741,7 +761,7 @@ extern struct ip_vs_protocol ip_vs_protocol_udp;
extern struct ip_vs_protocol ip_vs_protocol_icmp; extern struct ip_vs_protocol ip_vs_protocol_icmp;
extern struct ip_vs_protocol ip_vs_protocol_esp; extern struct ip_vs_protocol ip_vs_protocol_esp;
extern struct ip_vs_protocol ip_vs_protocol_ah; extern struct ip_vs_protocol ip_vs_protocol_ah;
extern struct ip_vs_protocol ip_vs_protocol_sctp;
/* /*
* Registering/unregistering scheduler functions * Registering/unregistering scheduler functions
......
...@@ -104,6 +104,13 @@ config IP_VS_PROTO_AH ...@@ -104,6 +104,13 @@ config IP_VS_PROTO_AH
This option enables support for load balancing AH (Authentication This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure. Header) transport protocol. Say Y if unsure.
config IP_VS_PROTO_SCTP
bool "SCTP load balancing support"
select LIBCRC32C
---help---
This option enables support for load balancing SCTP transport
protocol. Say Y if unsure.
comment "IPVS scheduler" comment "IPVS scheduler"
config IP_VS_RR config IP_VS_RR
......
...@@ -7,6 +7,7 @@ ip_vs_proto-objs-y := ...@@ -7,6 +7,7 @@ ip_vs_proto-objs-y :=
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/sctp.h>
#include <linux/icmp.h> #include <linux/icmp.h>
#include <net/ip.h> #include <net/ip.h>
...@@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto) ...@@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto)
return "UDP"; return "UDP";
case IPPROTO_TCP: case IPPROTO_TCP:
return "TCP"; return "TCP";
case IPPROTO_SCTP:
return "SCTP";
case IPPROTO_ICMP: case IPPROTO_ICMP:
return "ICMP"; return "ICMP";
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
...@@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
ip_send_check(ciph); ip_send_check(ciph);
} }
/* the TCP/UDP port */ /* the TCP/UDP/SCTP port */
if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) { if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
IPPROTO_SCTP == ciph->protocol) {
__be16 *ports = (void *)ciph + ciph->ihl*4; __be16 *ports = (void *)ciph + ciph->ihl*4;
if (inout) if (inout)
...@@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, ...@@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
ciph->saddr = cp->daddr.in6; ciph->saddr = cp->daddr.in6;
} }
/* the TCP/UDP port */ /* the TCP/UDP/SCTP port */
if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) { if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
IPPROTO_SCTP == ciph->nexthdr) {
__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
if (inout) if (inout)
...@@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ...@@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
goto out; goto out;
} }
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol) if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16); offset += 2 * sizeof(__u16);
if (!skb_make_writable(skb, offset)) if (!skb_make_writable(skb, offset))
goto out; goto out;
...@@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) ...@@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
} }
#endif #endif
/*
* Check if sctp chunc is ABORT chunk
*/
static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
{
sctp_chunkhdr_t *sch, schunk;
sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
sizeof(schunk), &schunk);
if (sch == NULL)
return 0;
if (sch->type == SCTP_CID_ABORT)
return 1;
return 0;
}
static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
{ {
struct tcphdr _tcph, *th; struct tcphdr _tcph, *th;
...@@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, ...@@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(!cp)) { if (unlikely(!cp)) {
if (sysctl_ip_vs_nat_icmp_send && if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP || (pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP)) { pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr; __be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, iph.len, pptr = skb_header_pointer(skb, iph.len,
...@@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, ...@@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
* existing entry if it is not RST * existing entry if it is not RST
* packet or not TCP packet. * packet or not TCP packet.
*/ */
if (iph.protocol != IPPROTO_TCP if ((iph.protocol != IPPROTO_TCP &&
|| !is_tcp_reset(skb, iph.len)) { iph.protocol != IPPROTO_SCTP)
|| ((iph.protocol == IPPROTO_TCP
&& !is_tcp_reset(skb, iph.len))
|| (iph.protocol == IPPROTO_SCTP
&& !is_sctp_abort(skb,
iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) if (af == AF_INET6)
icmpv6_send(skb, icmpv6_send(skb,
...@@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) ...@@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
/* do the statistics and put it back */ /* do the statistics and put it back */
ip_vs_in_stats(cp, skb); ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr) if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
IPPROTO_SCTP == cih->nexthdr)
offset += 2 * sizeof(__u16); offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
/* do not touch skb anymore */ /* do not touch skb anymore */
...@@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, ...@@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
* encorage the standby servers to update the connections timeout * encorage the standby servers to update the connections timeout
*/ */
pkts = atomic_add_return(1, &cp->in_pkts); pkts = atomic_add_return(1, &cp->in_pkts);
if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(atomic_read(&cp->in_pkts) %
sysctl_ip_vs_sync_threshold[1]
== sysctl_ip_vs_sync_threshold[0])) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
ip_vs_sync_conn(cp);
goto out;
}
}
if (af == AF_INET && if (af == AF_INET &&
(ip_vs_sync_state & IP_VS_STATE_MASTER) && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP || (((cp->protocol != IPPROTO_TCP ||
...@@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, ...@@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT))))) (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp); ip_vs_sync_conn(cp);
out:
cp->old_state = cp->state; cp->old_state = cp->state;
ip_vs_conn_put(cp); ip_vs_conn_put(cp);
......
...@@ -2132,8 +2132,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ...@@ -2132,8 +2132,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
} }
} }
/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
usvc.protocol != IPPROTO_SCTP) {
pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n", pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
usvc.protocol, &usvc.addr.ip, usvc.protocol, &usvc.addr.ip,
ntohs(usvc.port), usvc.sched_name); ntohs(usvc.port), usvc.sched_name);
......
...@@ -257,6 +257,9 @@ int __init ip_vs_protocol_init(void) ...@@ -257,6 +257,9 @@ int __init ip_vs_protocol_init(void)
#ifdef CONFIG_IP_VS_PROTO_UDP #ifdef CONFIG_IP_VS_PROTO_UDP
REGISTER_PROTOCOL(&ip_vs_protocol_udp); REGISTER_PROTOCOL(&ip_vs_protocol_udp);
#endif #endif
#ifdef CONFIG_IP_VS_PROTO_SCTP
REGISTER_PROTOCOL(&ip_vs_protocol_sctp);
#endif
#ifdef CONFIG_IP_VS_PROTO_AH #ifdef CONFIG_IP_VS_PROTO_AH
REGISTER_PROTOCOL(&ip_vs_protocol_ah); REGISTER_PROTOCOL(&ip_vs_protocol_ah);
#endif #endif
......
This diff is collapsed.
...@@ -400,6 +400,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) ...@@ -400,6 +400,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
flags |= IP_VS_CONN_F_INACTIVE; flags |= IP_VS_CONN_F_INACTIVE;
else else
flags &= ~IP_VS_CONN_F_INACTIVE; flags &= ~IP_VS_CONN_F_INACTIVE;
} else if (s->protocol == IPPROTO_SCTP) {
if (state != IP_VS_SCTP_S_ESTABLISHED)
flags |= IP_VS_CONN_F_INACTIVE;
else
flags &= ~IP_VS_CONN_F_INACTIVE;
} }
cp = ip_vs_conn_new(AF_INET, s->protocol, cp = ip_vs_conn_new(AF_INET, s->protocol,
(union nf_inet_addr *)&s->caddr, (union nf_inet_addr *)&s->caddr,
...@@ -434,6 +439,15 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) ...@@ -434,6 +439,15 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
atomic_dec(&dest->inactconns); atomic_dec(&dest->inactconns);
cp->flags &= ~IP_VS_CONN_F_INACTIVE; cp->flags &= ~IP_VS_CONN_F_INACTIVE;
} }
} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
(cp->state != state)) {
dest = cp->dest;
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
(state != IP_VS_SCTP_S_ESTABLISHED)) {
atomic_dec(&dest->activeconns);
atomic_inc(&dest->inactconns);
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
} }
if (opt) if (opt)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment