Commit 4d405552 authored by David S. Miller's avatar David S. Miller
parents eff253c4 7fd10678
......@@ -242,4 +242,164 @@ struct ip_vs_daemon_user {
int syncid;
};
/*
*
* IPVS Generic Netlink interface definitions
*
*/
/* Generic Netlink family info */
#define IPVS_GENL_NAME "IPVS"
#define IPVS_GENL_VERSION 0x1
struct ip_vs_flags {
__be32 flags;
__be32 mask;
};
/* Generic Netlink command attributes */
enum {
IPVS_CMD_UNSPEC = 0,
IPVS_CMD_NEW_SERVICE, /* add service */
IPVS_CMD_SET_SERVICE, /* modify service */
IPVS_CMD_DEL_SERVICE, /* delete service */
IPVS_CMD_GET_SERVICE, /* get service info */
IPVS_CMD_NEW_DEST, /* add destination */
IPVS_CMD_SET_DEST, /* modify destination */
IPVS_CMD_DEL_DEST, /* delete destination */
IPVS_CMD_GET_DEST, /* get destination info */
IPVS_CMD_NEW_DAEMON, /* start sync daemon */
IPVS_CMD_DEL_DAEMON, /* stop sync daemon */
IPVS_CMD_GET_DAEMON, /* get sync daemon status */
IPVS_CMD_SET_CONFIG, /* set config settings */
IPVS_CMD_GET_CONFIG, /* get config settings */
IPVS_CMD_SET_INFO, /* only used in GET_INFO reply */
IPVS_CMD_GET_INFO, /* get general IPVS info */
IPVS_CMD_ZERO, /* zero all counters and stats */
IPVS_CMD_FLUSH, /* flush services and dests */
__IPVS_CMD_MAX,
};
#define IPVS_CMD_MAX (__IPVS_CMD_MAX - 1)
/* Attributes used in the first level of commands */
enum {
IPVS_CMD_ATTR_UNSPEC = 0,
IPVS_CMD_ATTR_SERVICE, /* nested service attribute */
IPVS_CMD_ATTR_DEST, /* nested destination attribute */
IPVS_CMD_ATTR_DAEMON, /* nested sync daemon attribute */
IPVS_CMD_ATTR_TIMEOUT_TCP, /* TCP connection timeout */
IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, /* TCP FIN wait timeout */
IPVS_CMD_ATTR_TIMEOUT_UDP, /* UDP timeout */
__IPVS_CMD_ATTR_MAX,
};
#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
/*
* Attributes used to describe a service
*
* Used inside nested attribute IPVS_CMD_ATTR_SERVICE
*/
enum {
IPVS_SVC_ATTR_UNSPEC = 0,
IPVS_SVC_ATTR_AF, /* address family */
IPVS_SVC_ATTR_PROTOCOL, /* virtual service protocol */
IPVS_SVC_ATTR_ADDR, /* virtual service address */
IPVS_SVC_ATTR_PORT, /* virtual service port */
IPVS_SVC_ATTR_FWMARK, /* firewall mark of service */
IPVS_SVC_ATTR_SCHED_NAME, /* name of scheduler */
IPVS_SVC_ATTR_FLAGS, /* virtual service flags */
IPVS_SVC_ATTR_TIMEOUT, /* persistent timeout */
IPVS_SVC_ATTR_NETMASK, /* persistent netmask */
IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */
__IPVS_SVC_ATTR_MAX,
};
#define IPVS_SVC_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
/*
* Attributes used to describe a destination (real server)
*
* Used inside nested attribute IPVS_CMD_ATTR_DEST
*/
enum {
IPVS_DEST_ATTR_UNSPEC = 0,
IPVS_DEST_ATTR_ADDR, /* real server address */
IPVS_DEST_ATTR_PORT, /* real server port */
IPVS_DEST_ATTR_FWD_METHOD, /* forwarding method */
IPVS_DEST_ATTR_WEIGHT, /* destination weight */
IPVS_DEST_ATTR_U_THRESH, /* upper threshold */
IPVS_DEST_ATTR_L_THRESH, /* lower threshold */
IPVS_DEST_ATTR_ACTIVE_CONNS, /* active connections */
IPVS_DEST_ATTR_INACT_CONNS, /* inactive connections */
IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */
IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */
__IPVS_DEST_ATTR_MAX,
};
#define IPVS_DEST_ATTR_MAX (__IPVS_DEST_ATTR_MAX - 1)
/*
* Attributes describing a sync daemon
*
* Used inside nested attribute IPVS_CMD_ATTR_DAEMON
*/
enum {
IPVS_DAEMON_ATTR_UNSPEC = 0,
IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */
IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */
IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */
__IPVS_DAEMON_ATTR_MAX,
};
#define IPVS_DAEMON_ATTR_MAX (__IPVS_DAEMON_ATTR_MAX - 1)
/*
* Attributes used to describe service or destination entry statistics
*
* Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
*/
enum {
IPVS_STATS_ATTR_UNSPEC = 0,
IPVS_STATS_ATTR_CONNS, /* connections scheduled */
IPVS_STATS_ATTR_INPKTS, /* incoming packets */
IPVS_STATS_ATTR_OUTPKTS, /* outgoing packets */
IPVS_STATS_ATTR_INBYTES, /* incoming bytes */
IPVS_STATS_ATTR_OUTBYTES, /* outgoing bytes */
IPVS_STATS_ATTR_CPS, /* current connection rate */
IPVS_STATS_ATTR_INPPS, /* current in packet rate */
IPVS_STATS_ATTR_OUTPPS, /* current out packet rate */
IPVS_STATS_ATTR_INBPS, /* current in byte rate */
IPVS_STATS_ATTR_OUTBPS, /* current out byte rate */
__IPVS_STATS_ATTR_MAX,
};
#define IPVS_STATS_ATTR_MAX (__IPVS_STATS_ATTR_MAX - 1)
/* Attributes used in response to IPVS_CMD_GET_INFO command */
enum {
IPVS_INFO_ATTR_UNSPEC = 0,
IPVS_INFO_ATTR_VERSION, /* IPVS version number */
IPVS_INFO_ATTR_CONN_TAB_SIZE, /* size of connection hash table */
__IPVS_INFO_ATTR_MAX,
};
#define IPVS_INFO_ATTR_MAX (__IPVS_INFO_ATTR_MAX - 1)
#endif /* _IP_VS_H */
......@@ -683,6 +683,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
/*
* IPVS rate estimator prototypes (from ip_vs_est.c)
*/
extern int ip_vs_estimator_init(void);
extern void ip_vs_estimator_cleanup(void);
extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
......
......@@ -71,14 +71,20 @@ config IP_VS_PROTO_UDP
This option enables support for load balancing UDP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_AH_ESP
bool
depends on UNDEFINED
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
select IP_VS_PROTO_AH_ESP
---help---
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
select IP_VS_PROTO_AH_ESP
---help---
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
......
......@@ -6,8 +6,7 @@
ip_vs_proto-objs-y :=
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
......
......@@ -1070,10 +1070,12 @@ static int __init ip_vs_init(void)
{
int ret;
ip_vs_estimator_init();
ret = ip_vs_control_init();
if (ret < 0) {
IP_VS_ERR("can't setup control.\n");
goto cleanup_nothing;
goto cleanup_estimator;
}
ip_vs_protocol_init();
......@@ -1106,7 +1108,8 @@ static int __init ip_vs_init(void)
cleanup_protocol:
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
cleanup_nothing:
cleanup_estimator:
ip_vs_estimator_cleanup();
return ret;
}
......@@ -1117,6 +1120,7 @@ static void __exit ip_vs_cleanup(void)
ip_vs_app_cleanup();
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
ip_vs_estimator_cleanup();
IP_VS_INFO("ipvs unloaded.\n");
}
......
......@@ -37,6 +37,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/genetlink.h>
#include <asm/uaccess.h>
......@@ -868,6 +869,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
svc->num_dests++;
/* call the update_service function of its scheduler */
if (svc->scheduler->update_service)
svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
......@@ -898,6 +900,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
svc->num_dests++;
/* call the update_service function of its scheduler */
if (svc->scheduler->update_service)
svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
......@@ -948,6 +951,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
/* call the update_service, because server weight may be changed */
if (svc->scheduler->update_service)
svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
......@@ -1011,12 +1015,12 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
*/
list_del(&dest->n_list);
svc->num_dests--;
if (svcupd) {
/*
* Call the update_service function of its scheduler
*/
if (svcupd && svc->scheduler->update_service)
svc->scheduler->update_service(svc);
}
}
......@@ -2320,6 +2324,872 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
.owner = THIS_MODULE,
};
/*
* Generic Netlink interface
*/
/* IPVS genetlink family */
static struct genl_family ip_vs_genl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = 0,
.name = IPVS_GENL_NAME,
.version = IPVS_GENL_VERSION,
.maxattr = IPVS_CMD_MAX,
};
/* Policy used for first-level command attributes */
static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
[IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
[IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
[IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
[IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
[IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
};
/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
[IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
.len = IP_VS_IFNAME_MAXLEN },
[IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
};
/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
[IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
[IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
[IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
.len = sizeof(union nf_inet_addr) },
[IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
[IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
[IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
.len = IP_VS_SCHEDNAME_MAXLEN },
[IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
.len = sizeof(struct ip_vs_flags) },
[IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
[IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
[IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
};
/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
.len = sizeof(union nf_inet_addr) },
[IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
[IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
struct ip_vs_stats *stats)
{
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
if (!nl_stats)
return -EMSGSIZE;
spin_lock_bh(&stats->lock);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
spin_unlock_bh(&stats->lock);
nla_nest_end(skb, nl_stats);
return 0;
nla_put_failure:
spin_unlock_bh(&stats->lock);
nla_nest_cancel(skb, nl_stats);
return -EMSGSIZE;
}
static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct ip_vs_service *svc)
{
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
return -EMSGSIZE;
NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
if (svc->fwmark) {
NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
} else {
NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
}
NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
goto nla_put_failure;
nla_nest_end(skb, nl_service);
return 0;
nla_put_failure:
nla_nest_cancel(skb, nl_service);
return -EMSGSIZE;
}
static int ip_vs_genl_dump_service(struct sk_buff *skb,
struct ip_vs_service *svc,
struct netlink_callback *cb)
{
void *hdr;
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
&ip_vs_genl_family, NLM_F_MULTI,
IPVS_CMD_NEW_SERVICE);
if (!hdr)
return -EMSGSIZE;
if (ip_vs_genl_fill_service(skb, svc) < 0)
goto nla_put_failure;
return genlmsg_end(skb, hdr);
nla_put_failure:
genlmsg_cancel(skb, hdr);
return -EMSGSIZE;
}
static int ip_vs_genl_dump_services(struct sk_buff *skb,
struct netlink_callback *cb)
{
int idx = 0, i;
int start = cb->args[0];
struct ip_vs_service *svc;
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
if (++idx <= start)
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
goto nla_put_failure;
}
}
}
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
if (++idx <= start)
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
goto nla_put_failure;
}
}
}
nla_put_failure:
mutex_unlock(&__ip_vs_mutex);
cb->args[0] = idx;
return skb->len;
}
static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
struct nlattr *nla, int full_entry)
{
struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
/* Parse mandatory identifying service fields first */
if (nla == NULL ||
nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
return -EINVAL;
nla_af = attrs[IPVS_SVC_ATTR_AF];
nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
nla_port = attrs[IPVS_SVC_ATTR_PORT];
nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
return -EINVAL;
/* For now, only support IPv4 */
if (nla_get_u16(nla_af) != AF_INET)
return -EAFNOSUPPORT;
if (nla_fwmark) {
usvc->protocol = IPPROTO_TCP;
usvc->fwmark = nla_get_u32(nla_fwmark);
} else {
usvc->protocol = nla_get_u16(nla_protocol);
nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
usvc->port = nla_get_u16(nla_port);
usvc->fwmark = 0;
}
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_sched, *nla_flags, *nla_timeout,
*nla_netmask;
struct ip_vs_flags flags;
struct ip_vs_service *svc;
nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
return -EINVAL;
nla_memcpy(&flags, nla_flags, sizeof(flags));
/* prefill flags from service if it already exists */
if (usvc->fwmark)
svc = __ip_vs_svc_fwm_get(usvc->fwmark);
else
svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
usvc->port);
if (svc) {
usvc->flags = svc->flags;
ip_vs_service_put(svc);
} else
usvc->flags = 0;
/* set new flags from userland */
usvc->flags = (usvc->flags & ~flags.mask) |
(flags.flags & flags.mask);
strlcpy(usvc->sched_name, nla_data(nla_sched),
sizeof(usvc->sched_name));
usvc->timeout = nla_get_u32(nla_timeout);
usvc->netmask = nla_get_u32(nla_netmask);
}
return 0;
}
static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
{
struct ip_vs_service_user usvc;
int ret;
ret = ip_vs_genl_parse_service(&usvc, nla, 0);
if (ret)
return ERR_PTR(ret);
if (usvc.fwmark)
return __ip_vs_svc_fwm_get(usvc.fwmark);
else
return __ip_vs_service_get(usvc.protocol, usvc.addr,
usvc.port);
}
static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
{
struct nlattr *nl_dest;
nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
if (!nl_dest)
return -EMSGSIZE;
NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
atomic_read(&dest->activeconns));
NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
atomic_read(&dest->inactconns));
NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
atomic_read(&dest->persistconns));
if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
goto nla_put_failure;
nla_nest_end(skb, nl_dest);
return 0;
nla_put_failure:
nla_nest_cancel(skb, nl_dest);
return -EMSGSIZE;
}
static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
struct netlink_callback *cb)
{
void *hdr;
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
&ip_vs_genl_family, NLM_F_MULTI,
IPVS_CMD_NEW_DEST);
if (!hdr)
return -EMSGSIZE;
if (ip_vs_genl_fill_dest(skb, dest) < 0)
goto nla_put_failure;
return genlmsg_end(skb, hdr);
nla_put_failure:
genlmsg_cancel(skb, hdr);
return -EMSGSIZE;
}
static int ip_vs_genl_dump_dests(struct sk_buff *skb,
struct netlink_callback *cb)
{
int idx = 0;
int start = cb->args[0];
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
mutex_lock(&__ip_vs_mutex);
/* Try to find the service for which to dump destinations */
if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
goto out_err;
svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
/* Dump the destinations */
list_for_each_entry(dest, &svc->destinations, n_list) {
if (++idx <= start)
continue;
if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
idx--;
goto nla_put_failure;
}
}
nla_put_failure:
cb->args[0] = idx;
ip_vs_service_put(svc);
out_err:
mutex_unlock(&__ip_vs_mutex);
return skb->len;
}
static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
struct nlattr *nla, int full_entry)
{
struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
struct nlattr *nla_addr, *nla_port;
/* Parse mandatory identifying destination fields first */
if (nla == NULL ||
nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
return -EINVAL;
nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
nla_port = attrs[IPVS_DEST_ATTR_PORT];
if (!(nla_addr && nla_port))
return -EINVAL;
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
udest->port = nla_get_u16(nla_port);
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
*nla_l_thresh;
nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
return -EINVAL;
udest->conn_flags = nla_get_u32(nla_fwd)
& IP_VS_CONN_F_FWD_MASK;
udest->weight = nla_get_u32(nla_weight);
udest->u_threshold = nla_get_u32(nla_u_thresh);
udest->l_threshold = nla_get_u32(nla_l_thresh);
}
return 0;
}
static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
const char *mcast_ifn, __be32 syncid)
{
struct nlattr *nl_daemon;
nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
if (!nl_daemon)
return -EMSGSIZE;
NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
nla_nest_end(skb, nl_daemon);
return 0;
nla_put_failure:
nla_nest_cancel(skb, nl_daemon);
return -EMSGSIZE;
}
static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
const char *mcast_ifn, __be32 syncid,
struct netlink_callback *cb)
{
void *hdr;
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
&ip_vs_genl_family, NLM_F_MULTI,
IPVS_CMD_NEW_DAEMON);
if (!hdr)
return -EMSGSIZE;
if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
goto nla_put_failure;
return genlmsg_end(skb, hdr);
nla_put_failure:
genlmsg_cancel(skb, hdr);
return -EMSGSIZE;
}
static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
struct netlink_callback *cb)
{
mutex_lock(&__ip_vs_mutex);
if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
ip_vs_master_mcast_ifn,
ip_vs_master_syncid, cb) < 0)
goto nla_put_failure;
cb->args[0] = 1;
}
if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
ip_vs_backup_mcast_ifn,
ip_vs_backup_syncid, cb) < 0)
goto nla_put_failure;
cb->args[1] = 1;
}
nla_put_failure:
mutex_unlock(&__ip_vs_mutex);
return skb->len;
}
static int ip_vs_genl_new_daemon(struct nlattr **attrs)
{
if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
}
static int ip_vs_genl_del_daemon(struct nlattr **attrs)
{
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
}
static int ip_vs_genl_set_config(struct nlattr **attrs)
{
struct ip_vs_timeout_user t;
__ip_vs_get_timeouts(&t);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
t.tcp_fin_timeout =
nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
return ip_vs_set_timeout(&t);
}
static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_service *svc = NULL;
struct ip_vs_service_user usvc;
struct ip_vs_dest_user udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
ret = ip_vs_flush();
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
ret = ip_vs_genl_set_config(info->attrs);
goto out;
} else if (cmd == IPVS_CMD_NEW_DAEMON ||
cmd == IPVS_CMD_DEL_DAEMON) {
struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
info->attrs[IPVS_CMD_ATTR_DAEMON],
ip_vs_daemon_policy)) {
ret = -EINVAL;
goto out;
}
if (cmd == IPVS_CMD_NEW_DAEMON)
ret = ip_vs_genl_new_daemon(daemon_attrs);
else
ret = ip_vs_genl_del_daemon(daemon_attrs);
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
ret = ip_vs_zero_all();
goto out;
}
/* All following commands require a service argument, so check if we
* received a valid one. We need a full service specification when
* adding / editing a service. Only identifying members otherwise. */
if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
need_full_svc = 1;
ret = ip_vs_genl_parse_service(&usvc,
info->attrs[IPVS_CMD_ATTR_SERVICE],
need_full_svc);
if (ret)
goto out;
/* Lookup the exact service by <protocol, addr, port> or fwmark */
if (usvc.fwmark == 0)
svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
else
svc = __ip_vs_svc_fwm_get(usvc.fwmark);
/* Unless we're adding a new service, the service must already exist */
if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
ret = -ESRCH;
goto out;
}
/* Destination commands require a valid destination argument. For
* adding / editing a destination, we need a full destination
* specification. */
if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
cmd == IPVS_CMD_DEL_DEST) {
if (cmd != IPVS_CMD_DEL_DEST)
need_full_dest = 1;
ret = ip_vs_genl_parse_dest(&udest,
info->attrs[IPVS_CMD_ATTR_DEST],
need_full_dest);
if (ret)
goto out;
}
switch (cmd) {
case IPVS_CMD_NEW_SERVICE:
if (svc == NULL)
ret = ip_vs_add_service(&usvc, &svc);
else
ret = -EEXIST;
break;
case IPVS_CMD_SET_SERVICE:
ret = ip_vs_edit_service(svc, &usvc);
break;
case IPVS_CMD_DEL_SERVICE:
ret = ip_vs_del_service(svc);
break;
case IPVS_CMD_NEW_DEST:
ret = ip_vs_add_dest(svc, &udest);
break;
case IPVS_CMD_SET_DEST:
ret = ip_vs_edit_dest(svc, &udest);
break;
case IPVS_CMD_DEL_DEST:
ret = ip_vs_del_dest(svc, &udest);
break;
case IPVS_CMD_ZERO:
ret = ip_vs_zero_service(svc);
break;
default:
ret = -EINVAL;
}
out:
if (svc)
ip_vs_service_put(svc);
mutex_unlock(&__ip_vs_mutex);
return ret;
}
static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct sk_buff *msg;
void *reply;
int ret, cmd, reply_cmd;
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
reply_cmd = IPVS_CMD_NEW_SERVICE;
else if (cmd == IPVS_CMD_GET_INFO)
reply_cmd = IPVS_CMD_SET_INFO;
else if (cmd == IPVS_CMD_GET_CONFIG)
reply_cmd = IPVS_CMD_SET_CONFIG;
else {
IP_VS_ERR("unknown Generic Netlink command\n");
return -EINVAL;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
mutex_lock(&__ip_vs_mutex);
reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
if (reply == NULL)
goto nla_put_failure;
switch (cmd) {
case IPVS_CMD_GET_SERVICE:
{
struct ip_vs_service *svc;
svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc)) {
ret = PTR_ERR(svc);
goto out_err;
} else if (svc) {
ret = ip_vs_genl_fill_service(msg, svc);
ip_vs_service_put(svc);
if (ret)
goto nla_put_failure;
} else {
ret = -ESRCH;
goto out_err;
}
break;
}
case IPVS_CMD_GET_CONFIG:
{
struct ip_vs_timeout_user t;
__ip_vs_get_timeouts(&t);
#ifdef CONFIG_IP_VS_PROTO_TCP
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
t.tcp_fin_timeout);
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
#endif
break;
}
case IPVS_CMD_GET_INFO:
NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
IP_VS_CONN_TAB_SIZE);
break;
}
genlmsg_end(msg, reply);
ret = genlmsg_unicast(msg, info->snd_pid);
goto out;
nla_put_failure:
IP_VS_ERR("not enough space in Netlink message\n");
ret = -EMSGSIZE;
out_err:
nlmsg_free(msg);
out:
mutex_unlock(&__ip_vs_mutex);
return ret;
}
static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
{
.cmd = IPVS_CMD_NEW_SERVICE,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_SET_SERVICE,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_DEL_SERVICE,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_GET_SERVICE,
.flags = GENL_ADMIN_PERM,
.doit = ip_vs_genl_get_cmd,
.dumpit = ip_vs_genl_dump_services,
.policy = ip_vs_cmd_policy,
},
{
.cmd = IPVS_CMD_NEW_DEST,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_SET_DEST,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_DEL_DEST,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_GET_DEST,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.dumpit = ip_vs_genl_dump_dests,
},
{
.cmd = IPVS_CMD_NEW_DAEMON,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_DEL_DAEMON,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_GET_DAEMON,
.flags = GENL_ADMIN_PERM,
.dumpit = ip_vs_genl_dump_daemons,
},
{
.cmd = IPVS_CMD_SET_CONFIG,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_GET_CONFIG,
.flags = GENL_ADMIN_PERM,
.doit = ip_vs_genl_get_cmd,
},
{
.cmd = IPVS_CMD_GET_INFO,
.flags = GENL_ADMIN_PERM,
.doit = ip_vs_genl_get_cmd,
},
{
.cmd = IPVS_CMD_ZERO,
.flags = GENL_ADMIN_PERM,
.policy = ip_vs_cmd_policy,
.doit = ip_vs_genl_set_cmd,
},
{
.cmd = IPVS_CMD_FLUSH,
.flags = GENL_ADMIN_PERM,
.doit = ip_vs_genl_set_cmd,
},
};
static int __init ip_vs_genl_register(void)
{
int ret, i;
ret = genl_register_family(&ip_vs_genl_family);
if (ret)
return ret;
for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
if (ret)
goto err_out;
}
return 0;
err_out:
genl_unregister_family(&ip_vs_genl_family);
return ret;
}
static void ip_vs_genl_unregister(void)
{
genl_unregister_family(&ip_vs_genl_family);
}
/* End of Generic Netlink interface definitions */
int __init ip_vs_control_init(void)
{
......@@ -2334,6 +3204,13 @@ int __init ip_vs_control_init(void)
return ret;
}
ret = ip_vs_genl_register();
if (ret) {
IP_VS_ERR("cannot register Generic Netlink interface.\n");
nf_unregister_sockopt(&ip_vs_sockopts);
return ret;
}
proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
......@@ -2368,6 +3245,7 @@ void ip_vs_control_cleanup(void)
unregister_sysctl_table(sysctl_header);
proc_net_remove(&init_net, "ip_vs_stats");
proc_net_remove(&init_net, "ip_vs");
ip_vs_genl_unregister();
nf_unregister_sockopt(&ip_vs_sockopts);
LeaveFunction(2);
}
......@@ -124,8 +124,6 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
est->outbps = stats->outbps<<5;
spin_lock_bh(&est_lock);
if (list_empty(&est_list))
mod_timer(&est_timer, jiffies + 2 * HZ);
list_add(&est->list, &est_list);
spin_unlock_bh(&est_lock);
}
......@@ -136,11 +134,6 @@ void ip_vs_kill_estimator(struct ip_vs_stats *stats)
spin_lock_bh(&est_lock);
list_del(&est->list);
while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
spin_unlock_bh(&est_lock);
cpu_relax();
spin_lock_bh(&est_lock);
}
spin_unlock_bh(&est_lock);
}
......@@ -160,3 +153,14 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
est->inbps = 0;
est->outbps = 0;
}
int __init ip_vs_estimator_init(void)
{
mod_timer(&est_timer, jiffies + 2 * HZ);
return 0;
}
void ip_vs_estimator_cleanup(void)
{
del_timer_sync(&est_timer);
}
......@@ -96,7 +96,6 @@ struct ip_vs_lblc_entry {
* IPVS lblc hash table
*/
struct ip_vs_lblc_table {
rwlock_t lock; /* lock for this table */
struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
......@@ -123,31 +122,6 @@ static ctl_table vs_vars_table[] = {
static struct ctl_table_header * sysctl_header;
/*
* new/free a ip_vs_lblc_entry, which is a mapping of a destionation
* IP address to a server.
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
if (en == NULL) {
IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
return NULL;
}
INIT_LIST_HEAD(&en->list);
en->addr = daddr;
atomic_inc(&dest->refcnt);
en->dest = dest;
return en;
}
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
list_del(&en->list);
......@@ -173,55 +147,66 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
* Hash an entry in the ip_vs_lblc_table.
* returns bool success.
*/
static int
static void
ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
{
unsigned hash;
if (!list_empty(&en->list)) {
IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
"called from %p\n", __builtin_return_address(0));
return 0;
}
unsigned hash = ip_vs_lblc_hashkey(en->addr);
/*
* Hash by destination IP address
*/
hash = ip_vs_lblc_hashkey(en->addr);
write_lock(&tbl->lock);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
write_unlock(&tbl->lock);
return 1;
}
/*
* Get ip_vs_lblc_entry associated with supplied parameters.
* Get ip_vs_lblc_entry associated with supplied parameters. Called under read
* lock
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
{
unsigned hash;
unsigned hash = ip_vs_lblc_hashkey(addr);
struct ip_vs_lblc_entry *en;
hash = ip_vs_lblc_hashkey(addr);
list_for_each_entry(en, &tbl->bucket[hash], list)
if (en->addr == addr)
return en;
read_lock(&tbl->lock);
return NULL;
}
list_for_each_entry(en, &tbl->bucket[hash], list) {
if (en->addr == addr) {
/* HIT */
read_unlock(&tbl->lock);
return en;
}
}
read_unlock(&tbl->lock);
/*
* Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
* address to a server. Called under write lock.
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
en = ip_vs_lblc_get(tbl, daddr);
if (!en) {
en = kmalloc(sizeof(*en), GFP_ATOMIC);
if (!en) {
IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
return NULL;
}
en->addr = daddr;
en->lastuse = jiffies;
atomic_inc(&dest->refcnt);
en->dest = dest;
ip_vs_lblc_hash(tbl, en);
} else if (en->dest != dest) {
atomic_dec(&en->dest->refcnt);
atomic_inc(&dest->refcnt);
en->dest = dest;
}
return en;
}
......@@ -230,30 +215,29 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
*/
static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
{
int i;
struct ip_vs_lblc_entry *en, *nxt;
int i;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
write_lock(&tbl->lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
write_unlock(&tbl->lock);
}
}
static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
{
struct ip_vs_lblc_table *tbl = svc->sched_data;
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblc_entry *en, *nxt;
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
write_lock(&tbl->lock);
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse + sysctl_ip_vs_lblc_expiration))
......@@ -262,7 +246,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
write_unlock(&tbl->lock);
write_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
......@@ -281,17 +265,16 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
*/
static void ip_vs_lblc_check_expire(unsigned long data)
{
struct ip_vs_lblc_table *tbl;
struct ip_vs_service *svc = (struct ip_vs_service *) data;
struct ip_vs_lblc_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int goal;
int i, j;
struct ip_vs_lblc_entry *en, *nxt;
tbl = (struct ip_vs_lblc_table *)data;
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
ip_vs_lblc_full_check(tbl);
ip_vs_lblc_full_check(svc);
tbl->counter = 1;
goto out;
}
......@@ -308,7 +291,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
write_lock(&tbl->lock);
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
continue;
......@@ -317,7 +300,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
atomic_dec(&tbl->entries);
goal--;
}
write_unlock(&tbl->lock);
write_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
......@@ -336,15 +319,14 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
/*
* Allocate the ip_vs_lblc_table for this service
*/
tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
if (tbl == NULL) {
IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
return -ENOMEM;
}
svc->sched_data = tbl;
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
"current service\n",
sizeof(struct ip_vs_lblc_table));
"current service\n", sizeof(*tbl));
/*
* Initialize the hash buckets
......@@ -352,7 +334,6 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
INIT_LIST_HEAD(&tbl->bucket[i]);
}
rwlock_init(&tbl->lock);
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
......@@ -361,9 +342,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
* Hook periodic timer for garbage collection
*/
setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
(unsigned long)tbl);
tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
add_timer(&tbl->periodic_timer);
(unsigned long)svc);
mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
}
......@@ -380,22 +360,16 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
ip_vs_lblc_flush(tbl);
/* release the table itself */
kfree(svc->sched_data);
kfree(tbl);
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
sizeof(struct ip_vs_lblc_table));
sizeof(*tbl));
return 0;
}
static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
{
return 0;
}
static inline struct ip_vs_dest *
__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
{
struct ip_vs_dest *dest, *least;
int loh, doh;
......@@ -484,46 +458,54 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
static struct ip_vs_dest *
ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
struct ip_vs_lblc_table *tbl;
struct ip_vs_lblc_entry *en;
struct ip_vs_lblc_table *tbl = svc->sched_data;
struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest = NULL;
struct ip_vs_lblc_entry *en;
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
tbl = (struct ip_vs_lblc_table *)svc->sched_data;
/* First look in our cache */
read_lock(&svc->sched_lock);
en = ip_vs_lblc_get(tbl, iph->daddr);
if (en == NULL) {
dest = __ip_vs_wlc_schedule(svc, iph);
if (dest == NULL) {
IP_VS_DBG(1, "no destination available\n");
return NULL;
}
en = ip_vs_lblc_new(iph->daddr, dest);
if (en == NULL) {
return NULL;
}
ip_vs_lblc_hash(tbl, en);
} else {
if (en) {
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
/*
* If the destination is not available, i.e. it's in the trash,
* we must ignore it, as it may be removed from under our feet,
* if someone drops our reference count. Our caller only makes
* sure that destinations, that are not in the trash, are not
* moved to the trash, while we are scheduling. But anyone can
* free up entries from the trash at any time.
*/
if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
dest = en->dest;
if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0
|| is_overloaded(dest, svc)) {
dest = __ip_vs_wlc_schedule(svc, iph);
if (dest == NULL) {
}
read_unlock(&svc->sched_lock);
/* If the destination has a weight and is not overloaded, use it */
if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
goto out;
/* No cache entry or it is invalid, time to schedule */
dest = __ip_vs_lblc_schedule(svc, iph);
if (!dest) {
IP_VS_DBG(1, "no destination available\n");
return NULL;
}
atomic_dec(&en->dest->refcnt);
atomic_inc(&dest->refcnt);
en->dest = dest;
}
}
en->lastuse = jiffies;
/* If we fail to create a cache entry, we'll just use the valid dest */
write_lock(&svc->sched_lock);
ip_vs_lblc_new(tbl, iph->daddr, dest);
write_unlock(&svc->sched_lock);
out:
IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(en->addr),
NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
ntohs(dest->port));
......@@ -542,7 +524,6 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
.n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
.init_service = ip_vs_lblc_init_svc,
.done_service = ip_vs_lblc_done_svc,
.update_service = ip_vs_lblc_update_svc,
.schedule = ip_vs_lblc_schedule,
};
......
......@@ -106,7 +106,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
return NULL;
}
e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC);
e = kmalloc(sizeof(*e), GFP_ATOMIC);
if (e == NULL) {
IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
return NULL;
......@@ -116,11 +116,9 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
e->dest = dest;
/* link it to the list */
write_lock(&set->lock);
e->next = set->list;
set->list = e;
atomic_inc(&set->size);
write_unlock(&set->lock);
set->lastmod = jiffies;
return e;
......@@ -131,7 +129,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
{
struct ip_vs_dest_list *e, **ep;
write_lock(&set->lock);
for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
if (e->dest == dest) {
/* HIT */
......@@ -144,7 +141,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
}
ep = &e->next;
}
write_unlock(&set->lock);
}
static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
......@@ -174,7 +170,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
if (set == NULL)
return NULL;
read_lock(&set->lock);
/* select the first destination server, whose weight > 0 */
for (e=set->list; e!=NULL; e=e->next) {
least = e->dest;
......@@ -188,7 +183,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
goto nextstage;
}
}
read_unlock(&set->lock);
return NULL;
/* find the destination with the weighted least load */
......@@ -207,7 +201,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
loh = doh;
}
}
read_unlock(&set->lock);
IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
......@@ -229,7 +222,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
if (set == NULL)
return NULL;
read_lock(&set->lock);
/* select the first destination server, whose weight > 0 */
for (e=set->list; e!=NULL; e=e->next) {
most = e->dest;
......@@ -239,7 +231,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
goto nextstage;
}
}
read_unlock(&set->lock);
return NULL;
/* find the destination with the weighted most load */
......@@ -256,7 +247,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
moh = doh;
}
}
read_unlock(&set->lock);
IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
......@@ -284,7 +274,6 @@ struct ip_vs_lblcr_entry {
* IPVS lblcr hash table
*/
struct ip_vs_lblcr_table {
rwlock_t lock; /* lock for this table */
struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
......@@ -311,32 +300,6 @@ static ctl_table vs_vars_table[] = {
static struct ctl_table_header * sysctl_header;
/*
* new/free a ip_vs_lblcr_entry, which is a mapping of a destination
* IP address to a server.
*/
static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr)
{
struct ip_vs_lblcr_entry *en;
en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
if (en == NULL) {
IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
return NULL;
}
INIT_LIST_HEAD(&en->list);
en->addr = daddr;
/* initilize its dest set */
atomic_set(&(en->set.size), 0);
en->set.list = NULL;
rwlock_init(&en->set.lock);
return en;
}
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
list_del(&en->list);
......@@ -358,55 +321,68 @@ static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
* Hash an entry in the ip_vs_lblcr_table.
* returns bool success.
*/
static int
static void
ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
{
unsigned hash;
if (!list_empty(&en->list)) {
IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, "
"called from %p\n", __builtin_return_address(0));
return 0;
}
/*
* Hash by destination IP address
*/
hash = ip_vs_lblcr_hashkey(en->addr);
unsigned hash = ip_vs_lblcr_hashkey(en->addr);
write_lock(&tbl->lock);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
write_unlock(&tbl->lock);
return 1;
}
/*
* Get ip_vs_lblcr_entry associated with supplied parameters.
* Get ip_vs_lblcr_entry associated with supplied parameters. Called under
* read lock.
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
{
unsigned hash;
unsigned hash = ip_vs_lblcr_hashkey(addr);
struct ip_vs_lblcr_entry *en;
hash = ip_vs_lblcr_hashkey(addr);
list_for_each_entry(en, &tbl->bucket[hash], list)
if (en->addr == addr)
return en;
return NULL;
}
read_lock(&tbl->lock);
list_for_each_entry(en, &tbl->bucket[hash], list) {
if (en->addr == addr) {
/* HIT */
read_unlock(&tbl->lock);
return en;
/*
* Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
* IP address to a server. Called under write lock.
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr,
struct ip_vs_dest *dest)
{
struct ip_vs_lblcr_entry *en;
en = ip_vs_lblcr_get(tbl, daddr);
if (!en) {
en = kmalloc(sizeof(*en), GFP_ATOMIC);
if (!en) {
IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
return NULL;
}
en->addr = daddr;
en->lastuse = jiffies;
/* initilize its dest set */
atomic_set(&(en->set.size), 0);
en->set.list = NULL;
rwlock_init(&en->set.lock);
ip_vs_lblcr_hash(tbl, en);
}
read_unlock(&tbl->lock);
write_lock(&en->set.lock);
ip_vs_dest_set_insert(&en->set, dest);
write_unlock(&en->set.lock);
return NULL;
return en;
}
......@@ -418,19 +394,18 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
int i;
struct ip_vs_lblcr_entry *en, *nxt;
/* No locking required, only called during cleanup. */
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
write_lock(&tbl->lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
atomic_dec(&tbl->entries);
}
write_unlock(&tbl->lock);
}
}
static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
{
struct ip_vs_lblcr_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
......@@ -438,7 +413,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
write_lock(&tbl->lock);
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
now))
......@@ -447,7 +422,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
ip_vs_lblcr_free(en);
atomic_dec(&tbl->entries);
}
write_unlock(&tbl->lock);
write_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
......@@ -466,17 +441,16 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
*/
static void ip_vs_lblcr_check_expire(unsigned long data)
{
struct ip_vs_lblcr_table *tbl;
struct ip_vs_service *svc = (struct ip_vs_service *) data;
struct ip_vs_lblcr_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int goal;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
tbl = (struct ip_vs_lblcr_table *)data;
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
ip_vs_lblcr_full_check(tbl);
ip_vs_lblcr_full_check(svc);
tbl->counter = 1;
goto out;
}
......@@ -493,7 +467,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
write_lock(&tbl->lock);
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
continue;
......@@ -502,7 +476,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
atomic_dec(&tbl->entries);
goal--;
}
write_unlock(&tbl->lock);
write_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
......@@ -520,15 +494,14 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
/*
* Allocate the ip_vs_lblcr_table for this service
*/
tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
if (tbl == NULL) {
IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
return -ENOMEM;
}
svc->sched_data = tbl;
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
"current service\n",
sizeof(struct ip_vs_lblcr_table));
"current service\n", sizeof(*tbl));
/*
* Initialize the hash buckets
......@@ -536,7 +509,6 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
INIT_LIST_HEAD(&tbl->bucket[i]);
}
rwlock_init(&tbl->lock);
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
......@@ -545,9 +517,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
* Hook periodic timer for garbage collection
*/
setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
(unsigned long)tbl);
tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
add_timer(&tbl->periodic_timer);
(unsigned long)svc);
mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
}
......@@ -564,22 +535,16 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
ip_vs_lblcr_flush(tbl);
/* release the table itself */
kfree(svc->sched_data);
kfree(tbl);
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
sizeof(struct ip_vs_lblcr_table));
sizeof(*tbl));
return 0;
}
static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc)
{
return 0;
}
static inline struct ip_vs_dest *
__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
{
struct ip_vs_dest *dest, *least;
int loh, doh;
......@@ -669,50 +634,78 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
static struct ip_vs_dest *
ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
struct ip_vs_lblcr_table *tbl;
struct ip_vs_lblcr_entry *en;
struct ip_vs_lblcr_table *tbl = svc->sched_data;
struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest = NULL;
struct ip_vs_lblcr_entry *en;
IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
/* First look in our cache */
read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(tbl, iph->daddr);
if (en == NULL) {
dest = __ip_vs_wlc_schedule(svc, iph);
if (dest == NULL) {
IP_VS_DBG(1, "no destination available\n");
return NULL;
}
en = ip_vs_lblcr_new(iph->daddr);
if (en == NULL) {
return NULL;
}
ip_vs_dest_set_insert(&en->set, dest);
ip_vs_lblcr_hash(tbl, en);
} else {
if (en) {
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
/* Get the least loaded destination */
read_lock(&en->set.lock);
dest = ip_vs_dest_set_min(&en->set);
if (!dest || is_overloaded(dest, svc)) {
dest = __ip_vs_wlc_schedule(svc, iph);
if (dest == NULL) {
IP_VS_DBG(1, "no destination available\n");
return NULL;
}
ip_vs_dest_set_insert(&en->set, dest);
}
read_unlock(&en->set.lock);
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) {
time_after(jiffies, en->set.lastmod +
sysctl_ip_vs_lblcr_expiration)) {
struct ip_vs_dest *m;
write_lock(&en->set.lock);
m = ip_vs_dest_set_max(&en->set);
if (m)
ip_vs_dest_set_erase(&en->set, m);
write_unlock(&en->set.lock);
}
/* If the destination is not overloaded, use it */
if (dest && !is_overloaded(dest, svc)) {
read_unlock(&svc->sched_lock);
goto out;
}
en->lastuse = jiffies;
/* The cache entry is invalid, time to schedule */
dest = __ip_vs_lblcr_schedule(svc, iph);
if (!dest) {
IP_VS_DBG(1, "no destination available\n");
read_unlock(&svc->sched_lock);
return NULL;
}
/* Update our cache entry */
write_lock(&en->set.lock);
ip_vs_dest_set_insert(&en->set, dest);
write_unlock(&en->set.lock);
}
read_unlock(&svc->sched_lock);
if (dest)
goto out;
/* No cache entry, time to schedule */
dest = __ip_vs_lblcr_schedule(svc, iph);
if (!dest) {
IP_VS_DBG(1, "no destination available\n");
return NULL;
}
/* If we fail to create a cache entry, we'll just use the valid dest */
write_lock(&svc->sched_lock);
ip_vs_lblcr_new(tbl, iph->daddr, dest);
write_unlock(&svc->sched_lock);
out:
IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
NIPQUAD(en->addr),
NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
ntohs(dest->port));
......@@ -731,7 +724,6 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
.n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
.init_service = ip_vs_lblcr_init_svc,
.done_service = ip_vs_lblcr_done_svc,
.update_service = ip_vs_lblcr_update_svc,
.schedule = ip_vs_lblcr_schedule,
};
......
......@@ -20,24 +20,6 @@
#include <net/ip_vs.h>
static int ip_vs_lc_init_svc(struct ip_vs_service *svc)
{
return 0;
}
static int ip_vs_lc_done_svc(struct ip_vs_service *svc)
{
return 0;
}
static int ip_vs_lc_update_svc(struct ip_vs_service *svc)
{
return 0;
}
static inline unsigned int
ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
{
......@@ -99,9 +81,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = {
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
.init_service = ip_vs_lc_init_svc,
.done_service = ip_vs_lc_done_svc,
.update_service = ip_vs_lc_update_svc,
.schedule = ip_vs_lc_schedule,
};
......
......@@ -37,27 +37,6 @@
#include <net/ip_vs.h>
static int
ip_vs_nq_init_svc(struct ip_vs_service *svc)
{
return 0;
}
static int
ip_vs_nq_done_svc(struct ip_vs_service *svc)
{
return 0;
}
static int
ip_vs_nq_update_svc(struct ip_vs_service *svc)
{
return 0;
}
static inline unsigned int
ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
{
......@@ -137,9 +116,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
.init_service = ip_vs_nq_init_svc,
.done_service = ip_vs_nq_done_svc,
.update_service = ip_vs_nq_update_svc,
.schedule = ip_vs_nq_schedule,
};
......
/*
* ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS
* ip_vs_proto_ah_esp.c: AH/ESP IPSec load balancing support for IPVS
*
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
......@@ -39,7 +39,7 @@ struct isakmp_hdr {
static struct ip_vs_conn *
ah_conn_in_get(const struct sk_buff *skb,
ah_esp_conn_in_get(const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct iphdr *iph,
unsigned int proto_off,
......@@ -79,7 +79,7 @@ ah_conn_in_get(const struct sk_buff *skb,
static struct ip_vs_conn *
ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
struct ip_vs_conn *cp;
......@@ -112,12 +112,12 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
static int
ah_conn_schedule(struct sk_buff *skb,
ah_esp_conn_schedule(struct sk_buff *skb,
struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
/*
* AH is only related traffic. Pass the packet to IP stack.
* AH/ESP is only related traffic. Pass the packet to IP stack.
*/
*verdict = NF_ACCEPT;
return 0;
......@@ -125,7 +125,7 @@ ah_conn_schedule(struct sk_buff *skb,
static void
ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
char buf[256];
......@@ -143,28 +143,29 @@ ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
}
static void ah_init(struct ip_vs_protocol *pp)
static void ah_esp_init(struct ip_vs_protocol *pp)
{
/* nothing to do now */
}
static void ah_exit(struct ip_vs_protocol *pp)
static void ah_esp_exit(struct ip_vs_protocol *pp)
{
/* nothing to do now */
}
#ifdef CONFIG_IP_VS_PROTO_AH
struct ip_vs_protocol ip_vs_protocol_ah = {
.name = "AH",
.protocol = IPPROTO_AH,
.num_states = 1,
.dont_defrag = 1,
.init = ah_init,
.exit = ah_exit,
.conn_schedule = ah_conn_schedule,
.conn_in_get = ah_conn_in_get,
.conn_out_get = ah_conn_out_get,
.init = ah_esp_init,
.exit = ah_esp_exit,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
.snat_handler = NULL,
.dnat_handler = NULL,
.csum_check = NULL,
......@@ -172,7 +173,31 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
.register_app = NULL,
.unregister_app = NULL,
.app_conn_bind = NULL,
.debug_packet = ah_debug_packet,
.debug_packet = ah_esp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
.set_state_timeout = NULL,
};
#endif
#ifdef CONFIG_IP_VS_PROTO_ESP
struct ip_vs_protocol ip_vs_protocol_esp = {
.name = "ESP",
.protocol = IPPROTO_ESP,
.num_states = 1,
.dont_defrag = 1,
.init = ah_esp_init,
.exit = ah_esp_exit,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
.snat_handler = NULL,
.dnat_handler = NULL,
.csum_check = NULL,
.state_transition = NULL,
.register_app = NULL,
.unregister_app = NULL,
.app_conn_bind = NULL,
.debug_packet = ah_esp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
};
#endif
/*
* ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS
*
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation;
*
*/
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <net/ip_vs.h>
/* TODO:
struct isakmp_hdr {
__u8 icookie[8];
__u8 rcookie[8];
__u8 np;
__u8 version;
__u8 xchgtype;
__u8 flags;
__u32 msgid;
__u32 length;
};
*/
#define PORT_ISAKMP 500
static struct ip_vs_conn *
esp_conn_in_get(const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct iphdr *iph,
unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->saddr,
htons(PORT_ISAKMP),
iph->daddr,
htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->daddr,
htons(PORT_ISAKMP),
iph->saddr,
htons(PORT_ISAKMP));
}
if (!cp) {
/*
* We are not sure if the packet is from our
* service, so our conn_schedule hook should return NF_ACCEPT
*/
IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
"%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
inverse ? "ICMP+" : "",
pp->name,
NIPQUAD(iph->saddr),
NIPQUAD(iph->daddr));
}
return cp;
}
static struct ip_vs_conn *
esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
const struct iphdr *iph, unsigned int proto_off, int inverse)
{
struct ip_vs_conn *cp;
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->saddr,
htons(PORT_ISAKMP),
iph->daddr,
htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->daddr,
htons(PORT_ISAKMP),
iph->saddr,
htons(PORT_ISAKMP));
}
if (!cp) {
IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
"%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
inverse ? "ICMP+" : "",
pp->name,
NIPQUAD(iph->saddr),
NIPQUAD(iph->daddr));
}
return cp;
}
static int
esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
/*
* ESP is only related traffic. Pass the packet to IP stack.
*/
*verdict = NF_ACCEPT;
return 0;
}
static void
esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
int offset, const char *msg)
{
char buf[256];
struct iphdr _iph, *ih;
ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
if (ih == NULL)
sprintf(buf, "%s TRUNCATED", pp->name);
else
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
pp->name, NIPQUAD(ih->saddr),
NIPQUAD(ih->daddr));
printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
}
static void esp_init(struct ip_vs_protocol *pp)
{
/* nothing to do now */
}
static void esp_exit(struct ip_vs_protocol *pp)
{
/* nothing to do now */
}
struct ip_vs_protocol ip_vs_protocol_esp = {
.name = "ESP",
.protocol = IPPROTO_ESP,
.num_states = 1,
.dont_defrag = 1,
.init = esp_init,
.exit = esp_exit,
.conn_schedule = esp_conn_schedule,
.conn_in_get = esp_conn_in_get,
.conn_out_get = esp_conn_out_get,
.snat_handler = NULL,
.dnat_handler = NULL,
.csum_check = NULL,
.state_transition = NULL,
.register_app = NULL,
.unregister_app = NULL,
.app_conn_bind = NULL,
.debug_packet = esp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
};
......@@ -32,12 +32,6 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
}
static int ip_vs_rr_done_svc(struct ip_vs_service *svc)
{
return 0;
}
static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
{
svc->sched_data = &svc->destinations;
......@@ -96,7 +90,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
.init_service = ip_vs_rr_init_svc,
.done_service = ip_vs_rr_done_svc,
.update_service = ip_vs_rr_update_svc,
.schedule = ip_vs_rr_schedule,
};
......
......@@ -41,27 +41,6 @@
#include <net/ip_vs.h>
static int
ip_vs_sed_init_svc(struct ip_vs_service *svc)
{
return 0;
}
static int
ip_vs_sed_done_svc(struct ip_vs_service *svc)
{
return 0;
}
static int
ip_vs_sed_update_svc(struct ip_vs_service *svc)
{
return 0;
}
static inline unsigned int
ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
{
......@@ -139,9 +118,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
.init_service = ip_vs_sed_init_svc,
.done_service = ip_vs_sed_done_svc,
.update_service = ip_vs_sed_update_svc,
.schedule = ip_vs_sed_schedule,
};
......
......@@ -25,27 +25,6 @@
#include <net/ip_vs.h>
static int
ip_vs_wlc_init_svc(struct ip_vs_service *svc)
{
return 0;
}
static int
ip_vs_wlc_done_svc(struct ip_vs_service *svc)
{
return 0;
}
static int
ip_vs_wlc_update_svc(struct ip_vs_service *svc)
{
return 0;
}
static inline unsigned int
ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
{
......@@ -127,9 +106,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler =
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
.init_service = ip_vs_wlc_init_svc,
.done_service = ip_vs_wlc_done_svc,
.update_service = ip_vs_wlc_update_svc,
.schedule = ip_vs_wlc_schedule,
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment