Commit 9872bec7 authored by Patrick McHardy's avatar Patrick McHardy Committed by David S. Miller

[NETFILTER]: nfnetlink: use RCU for queue instances hash

Use RCU for queue instances hash. Avoids multiple atomic operations
for each packet.
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a3c8e7fd
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
struct nfqnl_instance { struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */ struct hlist_node hlist; /* global list of queues */
atomic_t use; struct rcu_head rcu;
int peer_pid; int peer_pid;
unsigned int queue_maxlen; unsigned int queue_maxlen;
...@@ -68,7 +68,7 @@ struct nfqnl_instance { ...@@ -68,7 +68,7 @@ struct nfqnl_instance {
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
static DEFINE_RWLOCK(instances_lock); static DEFINE_SPINLOCK(instances_lock);
#define INSTANCE_BUCKETS 16 #define INSTANCE_BUCKETS 16
static struct hlist_head instance_table[INSTANCE_BUCKETS]; static struct hlist_head instance_table[INSTANCE_BUCKETS];
...@@ -79,52 +79,30 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num) ...@@ -79,52 +79,30 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
} }
static struct nfqnl_instance * static struct nfqnl_instance *
__instance_lookup(u_int16_t queue_num) instance_lookup(u_int16_t queue_num)
{ {
struct hlist_head *head; struct hlist_head *head;
struct hlist_node *pos; struct hlist_node *pos;
struct nfqnl_instance *inst; struct nfqnl_instance *inst;
head = &instance_table[instance_hashfn(queue_num)]; head = &instance_table[instance_hashfn(queue_num)];
hlist_for_each_entry(inst, pos, head, hlist) { hlist_for_each_entry_rcu(inst, pos, head, hlist) {
if (inst->queue_num == queue_num) if (inst->queue_num == queue_num)
return inst; return inst;
} }
return NULL; return NULL;
} }
static struct nfqnl_instance *
instance_lookup_get(u_int16_t queue_num)
{
struct nfqnl_instance *inst;
read_lock_bh(&instances_lock);
inst = __instance_lookup(queue_num);
if (inst)
atomic_inc(&inst->use);
read_unlock_bh(&instances_lock);
return inst;
}
static void
instance_put(struct nfqnl_instance *inst)
{
if (inst && atomic_dec_and_test(&inst->use)) {
QDEBUG("kfree(inst=%p)\n", inst);
kfree(inst);
}
}
static struct nfqnl_instance * static struct nfqnl_instance *
instance_create(u_int16_t queue_num, int pid) instance_create(u_int16_t queue_num, int pid)
{ {
struct nfqnl_instance *inst; struct nfqnl_instance *inst;
unsigned int h;
QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid);
write_lock_bh(&instances_lock); spin_lock(&instances_lock);
if (__instance_lookup(queue_num)) { if (instance_lookup(queue_num)) {
inst = NULL; inst = NULL;
QDEBUG("aborting, instance already exists\n"); QDEBUG("aborting, instance already exists\n");
goto out_unlock; goto out_unlock;
...@@ -139,18 +117,17 @@ instance_create(u_int16_t queue_num, int pid) ...@@ -139,18 +117,17 @@ instance_create(u_int16_t queue_num, int pid)
inst->queue_maxlen = NFQNL_QMAX_DEFAULT; inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
inst->copy_range = 0xfffff; inst->copy_range = 0xfffff;
inst->copy_mode = NFQNL_COPY_NONE; inst->copy_mode = NFQNL_COPY_NONE;
/* needs to be two, since we _put() after creation */
atomic_set(&inst->use, 2);
spin_lock_init(&inst->lock); spin_lock_init(&inst->lock);
INIT_LIST_HEAD(&inst->queue_list); INIT_LIST_HEAD(&inst->queue_list);
INIT_RCU_HEAD(&inst->rcu);
if (!try_module_get(THIS_MODULE)) if (!try_module_get(THIS_MODULE))
goto out_free; goto out_free;
hlist_add_head(&inst->hlist, h = instance_hashfn(queue_num);
&instance_table[instance_hashfn(queue_num)]); hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
write_unlock_bh(&instances_lock); spin_unlock(&instances_lock);
QDEBUG("successfully created new instance\n"); QDEBUG("successfully created new instance\n");
...@@ -159,7 +136,7 @@ instance_create(u_int16_t queue_num, int pid) ...@@ -159,7 +136,7 @@ instance_create(u_int16_t queue_num, int pid)
out_free: out_free:
kfree(inst); kfree(inst);
out_unlock: out_unlock:
write_unlock_bh(&instances_lock); spin_unlock(&instances_lock);
return NULL; return NULL;
} }
...@@ -167,38 +144,29 @@ static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, ...@@ -167,38 +144,29 @@ static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
unsigned long data); unsigned long data);
static void static void
_instance_destroy2(struct nfqnl_instance *inst, int lock) instance_destroy_rcu(struct rcu_head *head)
{ {
/* first pull it out of the global list */ struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
if (lock) rcu);
write_lock_bh(&instances_lock);
QDEBUG("removing instance %p (queuenum=%u) from hash\n",
inst, inst->queue_num);
hlist_del(&inst->hlist);
if (lock)
write_unlock_bh(&instances_lock);
/* then flush all pending skbs from the queue */
nfqnl_flush(inst, NULL, 0); nfqnl_flush(inst, NULL, 0);
kfree(inst);
/* and finally put the refcount */
instance_put(inst);
module_put(THIS_MODULE); module_put(THIS_MODULE);
} }
static inline void static void
__instance_destroy(struct nfqnl_instance *inst) __instance_destroy(struct nfqnl_instance *inst)
{ {
_instance_destroy2(inst, 0); hlist_del_rcu(&inst->hlist);
call_rcu(&inst->rcu, instance_destroy_rcu);
} }
static inline void static void
instance_destroy(struct nfqnl_instance *inst) instance_destroy(struct nfqnl_instance *inst)
{ {
_instance_destroy2(inst, 1); spin_lock(&instances_lock);
__instance_destroy(inst);
spin_unlock(&instances_lock);
} }
static inline void static inline void
...@@ -485,7 +453,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) ...@@ -485,7 +453,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
QDEBUG("entered\n"); QDEBUG("entered\n");
queue = instance_lookup_get(queuenum); /* rcu_read_lock()ed by nf_hook_slow() */
queue = instance_lookup(queuenum);
if (!queue) { if (!queue) {
QDEBUG("no queue instance matching\n"); QDEBUG("no queue instance matching\n");
return -EINVAL; return -EINVAL;
...@@ -493,13 +462,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) ...@@ -493,13 +462,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
if (queue->copy_mode == NFQNL_COPY_NONE) { if (queue->copy_mode == NFQNL_COPY_NONE) {
QDEBUG("mode COPY_NONE, aborting\n"); QDEBUG("mode COPY_NONE, aborting\n");
status = -EAGAIN; return -EAGAIN;
goto err_out_put;
} }
nskb = nfqnl_build_packet_message(queue, entry, &status); nskb = nfqnl_build_packet_message(queue, entry, &status);
if (nskb == NULL) if (nskb == NULL)
goto err_out_put; return status;
spin_lock_bh(&queue->lock); spin_lock_bh(&queue->lock);
...@@ -526,7 +494,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) ...@@ -526,7 +494,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
__enqueue_entry(queue, entry); __enqueue_entry(queue, entry);
spin_unlock_bh(&queue->lock); spin_unlock_bh(&queue->lock);
instance_put(queue);
return status; return status;
err_out_free_nskb: err_out_free_nskb:
...@@ -534,9 +501,6 @@ err_out_free_nskb: ...@@ -534,9 +501,6 @@ err_out_free_nskb:
err_out_unlock: err_out_unlock:
spin_unlock_bh(&queue->lock); spin_unlock_bh(&queue->lock);
err_out_put:
instance_put(queue);
return status; return status;
} }
...@@ -616,21 +580,18 @@ nfqnl_dev_drop(int ifindex) ...@@ -616,21 +580,18 @@ nfqnl_dev_drop(int ifindex)
QDEBUG("entering for ifindex %u\n", ifindex); QDEBUG("entering for ifindex %u\n", ifindex);
/* this only looks like we have to hold the readlock for a way too long rcu_read_lock();
* time, issue_verdict(), nf_reinject(), ... - but we always only
* issue NF_DROP, which is processed directly in nf_reinject() */
read_lock_bh(&instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) { for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *tmp; struct hlist_node *tmp;
struct nfqnl_instance *inst; struct nfqnl_instance *inst;
struct hlist_head *head = &instance_table[i]; struct hlist_head *head = &instance_table[i];
hlist_for_each_entry(inst, tmp, head, hlist) hlist_for_each_entry_rcu(inst, tmp, head, hlist)
nfqnl_flush(inst, dev_cmp, ifindex); nfqnl_flush(inst, dev_cmp, ifindex);
} }
read_unlock_bh(&instances_lock); rcu_read_unlock();
} }
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
...@@ -665,7 +626,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, ...@@ -665,7 +626,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
int i; int i;
/* destroy all instances for this pid */ /* destroy all instances for this pid */
write_lock_bh(&instances_lock); spin_lock(&instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) { for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *tmp, *t2; struct hlist_node *tmp, *t2;
struct nfqnl_instance *inst; struct nfqnl_instance *inst;
...@@ -677,7 +638,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, ...@@ -677,7 +638,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
__instance_destroy(inst); __instance_destroy(inst);
} }
} }
write_unlock_bh(&instances_lock); spin_unlock(&instances_lock);
} }
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -705,18 +666,21 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, ...@@ -705,18 +666,21 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
struct nf_queue_entry *entry; struct nf_queue_entry *entry;
int err; int err;
queue = instance_lookup_get(queue_num); rcu_read_lock();
if (!queue) queue = instance_lookup(queue_num);
return -ENODEV; if (!queue) {
err = -ENODEV;
goto err_out_unlock;
}
if (queue->peer_pid != NETLINK_CB(skb).pid) { if (queue->peer_pid != NETLINK_CB(skb).pid) {
err = -EPERM; err = -EPERM;
goto err_out_put; goto err_out_unlock;
} }
if (!nfqa[NFQA_VERDICT_HDR]) { if (!nfqa[NFQA_VERDICT_HDR]) {
err = -EINVAL; err = -EINVAL;
goto err_out_put; goto err_out_unlock;
} }
vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
...@@ -724,14 +688,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, ...@@ -724,14 +688,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
err = -EINVAL; err = -EINVAL;
goto err_out_put; goto err_out_unlock;
} }
entry = find_dequeue_entry(queue, ntohl(vhdr->id)); entry = find_dequeue_entry(queue, ntohl(vhdr->id));
if (entry == NULL) { if (entry == NULL) {
err = -ENOENT; err = -ENOENT;
goto err_out_put; goto err_out_unlock;
} }
rcu_read_unlock();
if (nfqa[NFQA_PAYLOAD]) { if (nfqa[NFQA_PAYLOAD]) {
if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
...@@ -744,11 +709,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, ...@@ -744,11 +709,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
nla_data(nfqa[NFQA_MARK])); nla_data(nfqa[NFQA_MARK]));
nf_reinject(entry, verdict); nf_reinject(entry, verdict);
instance_put(queue);
return 0; return 0;
err_out_put: err_out_unlock:
instance_put(queue); rcu_read_unlock();
return err; return err;
} }
...@@ -776,45 +740,61 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ...@@ -776,45 +740,61 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id); u_int16_t queue_num = ntohs(nfmsg->res_id);
struct nfqnl_instance *queue; struct nfqnl_instance *queue;
struct nfqnl_msg_config_cmd *cmd = NULL;
int ret = 0; int ret = 0;
QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
queue = instance_lookup_get(queue_num); if (nfqa[NFQA_CFG_CMD]) {
if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { cmd = nla_data(nfqa[NFQA_CFG_CMD]);
ret = -EPERM;
goto out_put; /* Commands without queue context - might sleep */
switch (cmd->command) {
case NFQNL_CFG_CMD_PF_BIND:
ret = nf_register_queue_handler(ntohs(cmd->pf),
&nfqh);
break;
case NFQNL_CFG_CMD_PF_UNBIND:
ret = nf_unregister_queue_handler(ntohs(cmd->pf),
&nfqh);
break;
default:
break;
} }
if (nfqa[NFQA_CFG_CMD]) { if (ret < 0)
struct nfqnl_msg_config_cmd *cmd; return ret;
}
cmd = nla_data(nfqa[NFQA_CFG_CMD]); rcu_read_lock();
QDEBUG("found CFG_CMD\n"); queue = instance_lookup(queue_num);
if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
ret = -EPERM;
goto err_out_unlock;
}
if (cmd != NULL) {
switch (cmd->command) { switch (cmd->command) {
case NFQNL_CFG_CMD_BIND: case NFQNL_CFG_CMD_BIND:
if (queue) if (queue) {
return -EBUSY; ret = -EBUSY;
goto err_out_unlock;
}
queue = instance_create(queue_num, NETLINK_CB(skb).pid); queue = instance_create(queue_num, NETLINK_CB(skb).pid);
if (!queue) if (!queue) {
return -EINVAL; ret = -EINVAL;
goto err_out_unlock;
}
break; break;
case NFQNL_CFG_CMD_UNBIND: case NFQNL_CFG_CMD_UNBIND:
if (!queue) if (!queue) {
return -ENODEV; ret = -ENODEV;
goto err_out_unlock;
}
instance_destroy(queue); instance_destroy(queue);
break; break;
case NFQNL_CFG_CMD_PF_BIND: case NFQNL_CFG_CMD_PF_BIND:
QDEBUG("registering queue handler for pf=%u\n",
ntohs(cmd->pf));
ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh);
break;
case NFQNL_CFG_CMD_PF_UNBIND: case NFQNL_CFG_CMD_PF_UNBIND:
QDEBUG("unregistering queue handler for pf=%u\n",
ntohs(cmd->pf));
ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh);
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
...@@ -827,7 +807,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ...@@ -827,7 +807,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
if (!queue) { if (!queue) {
ret = -ENODEV; ret = -ENODEV;
goto out_put; goto err_out_unlock;
} }
params = nla_data(nfqa[NFQA_CFG_PARAMS]); params = nla_data(nfqa[NFQA_CFG_PARAMS]);
nfqnl_set_mode(queue, params->copy_mode, nfqnl_set_mode(queue, params->copy_mode,
...@@ -839,7 +819,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ...@@ -839,7 +819,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
if (!queue) { if (!queue) {
ret = -ENODEV; ret = -ENODEV;
goto out_put; goto err_out_unlock;
} }
queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
spin_lock_bh(&queue->lock); spin_lock_bh(&queue->lock);
...@@ -847,8 +827,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ...@@ -847,8 +827,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
spin_unlock_bh(&queue->lock); spin_unlock_bh(&queue->lock);
} }
out_put: err_out_unlock:
instance_put(queue); rcu_read_unlock();
return ret; return ret;
} }
...@@ -916,7 +896,7 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) ...@@ -916,7 +896,7 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
static void *seq_start(struct seq_file *seq, loff_t *pos) static void *seq_start(struct seq_file *seq, loff_t *pos)
{ {
read_lock_bh(&instances_lock); spin_lock(&instances_lock);
return get_idx(seq, *pos); return get_idx(seq, *pos);
} }
...@@ -928,7 +908,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) ...@@ -928,7 +908,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
static void seq_stop(struct seq_file *s, void *v) static void seq_stop(struct seq_file *s, void *v)
{ {
read_unlock_bh(&instances_lock); spin_unlock(&instances_lock);
} }
static int seq_show(struct seq_file *s, void *v) static int seq_show(struct seq_file *s, void *v)
...@@ -940,8 +920,7 @@ static int seq_show(struct seq_file *s, void *v) ...@@ -940,8 +920,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->peer_pid, inst->queue_total, inst->peer_pid, inst->queue_total,
inst->copy_mode, inst->copy_range, inst->copy_mode, inst->copy_range,
inst->queue_dropped, inst->queue_user_dropped, inst->queue_dropped, inst->queue_user_dropped,
inst->id_sequence, inst->id_sequence, 1);
atomic_read(&inst->use));
} }
static const struct seq_operations nfqnl_seq_ops = { static const struct seq_operations nfqnl_seq_ops = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment