Commit 9872bec7 authored by Patrick McHardy's avatar Patrick McHardy Committed by David S. Miller

[NETFILTER]: nfnetlink: use RCU for queue instances hash

Use RCU for queue instances hash. Avoids multiple atomic operations
for each packet.
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a3c8e7fd
......@@ -47,7 +47,7 @@
struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
atomic_t use;
struct rcu_head rcu;
int peer_pid;
unsigned int queue_maxlen;
......@@ -68,7 +68,7 @@ struct nfqnl_instance {
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
static DEFINE_RWLOCK(instances_lock);
static DEFINE_SPINLOCK(instances_lock);
#define INSTANCE_BUCKETS 16
static struct hlist_head instance_table[INSTANCE_BUCKETS];
......@@ -79,52 +79,30 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
}
static struct nfqnl_instance *
__instance_lookup(u_int16_t queue_num)
instance_lookup(u_int16_t queue_num)
{
struct hlist_head *head;
struct hlist_node *pos;
struct nfqnl_instance *inst;
head = &instance_table[instance_hashfn(queue_num)];
hlist_for_each_entry(inst, pos, head, hlist) {
hlist_for_each_entry_rcu(inst, pos, head, hlist) {
if (inst->queue_num == queue_num)
return inst;
}
return NULL;
}
static struct nfqnl_instance *
instance_lookup_get(u_int16_t queue_num)
{
struct nfqnl_instance *inst;
read_lock_bh(&instances_lock);
inst = __instance_lookup(queue_num);
if (inst)
atomic_inc(&inst->use);
read_unlock_bh(&instances_lock);
return inst;
}
static void
instance_put(struct nfqnl_instance *inst)
{
if (inst && atomic_dec_and_test(&inst->use)) {
QDEBUG("kfree(inst=%p)\n", inst);
kfree(inst);
}
}
static struct nfqnl_instance *
instance_create(u_int16_t queue_num, int pid)
{
struct nfqnl_instance *inst;
unsigned int h;
QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid);
write_lock_bh(&instances_lock);
if (__instance_lookup(queue_num)) {
spin_lock(&instances_lock);
if (instance_lookup(queue_num)) {
inst = NULL;
QDEBUG("aborting, instance already exists\n");
goto out_unlock;
......@@ -139,18 +117,17 @@ instance_create(u_int16_t queue_num, int pid)
inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
inst->copy_range = 0xfffff;
inst->copy_mode = NFQNL_COPY_NONE;
/* needs to be two, since we _put() after creation */
atomic_set(&inst->use, 2);
spin_lock_init(&inst->lock);
INIT_LIST_HEAD(&inst->queue_list);
INIT_RCU_HEAD(&inst->rcu);
if (!try_module_get(THIS_MODULE))
goto out_free;
hlist_add_head(&inst->hlist,
&instance_table[instance_hashfn(queue_num)]);
h = instance_hashfn(queue_num);
hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
write_unlock_bh(&instances_lock);
spin_unlock(&instances_lock);
QDEBUG("successfully created new instance\n");
......@@ -159,7 +136,7 @@ instance_create(u_int16_t queue_num, int pid)
out_free:
kfree(inst);
out_unlock:
write_unlock_bh(&instances_lock);
spin_unlock(&instances_lock);
return NULL;
}
......@@ -167,38 +144,29 @@ static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
unsigned long data);
static void
_instance_destroy2(struct nfqnl_instance *inst, int lock)
instance_destroy_rcu(struct rcu_head *head)
{
/* first pull it out of the global list */
if (lock)
write_lock_bh(&instances_lock);
struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
rcu);
QDEBUG("removing instance %p (queuenum=%u) from hash\n",
inst, inst->queue_num);
hlist_del(&inst->hlist);
if (lock)
write_unlock_bh(&instances_lock);
/* then flush all pending skbs from the queue */
nfqnl_flush(inst, NULL, 0);
/* and finally put the refcount */
instance_put(inst);
kfree(inst);
module_put(THIS_MODULE);
}
static inline void
static void
__instance_destroy(struct nfqnl_instance *inst)
{
_instance_destroy2(inst, 0);
hlist_del_rcu(&inst->hlist);
call_rcu(&inst->rcu, instance_destroy_rcu);
}
static inline void
static void
instance_destroy(struct nfqnl_instance *inst)
{
_instance_destroy2(inst, 1);
spin_lock(&instances_lock);
__instance_destroy(inst);
spin_unlock(&instances_lock);
}
static inline void
......@@ -485,7 +453,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
QDEBUG("entered\n");
queue = instance_lookup_get(queuenum);
/* rcu_read_lock()ed by nf_hook_slow() */
queue = instance_lookup(queuenum);
if (!queue) {
QDEBUG("no queue instance matching\n");
return -EINVAL;
......@@ -493,13 +462,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
if (queue->copy_mode == NFQNL_COPY_NONE) {
QDEBUG("mode COPY_NONE, aborting\n");
status = -EAGAIN;
goto err_out_put;
return -EAGAIN;
}
nskb = nfqnl_build_packet_message(queue, entry, &status);
if (nskb == NULL)
goto err_out_put;
return status;
spin_lock_bh(&queue->lock);
......@@ -526,7 +494,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
__enqueue_entry(queue, entry);
spin_unlock_bh(&queue->lock);
instance_put(queue);
return status;
err_out_free_nskb:
......@@ -534,9 +501,6 @@ err_out_free_nskb:
err_out_unlock:
spin_unlock_bh(&queue->lock);
err_out_put:
instance_put(queue);
return status;
}
......@@ -616,21 +580,18 @@ nfqnl_dev_drop(int ifindex)
QDEBUG("entering for ifindex %u\n", ifindex);
/* this only looks like we have to hold the readlock for a way too long
* time, issue_verdict(), nf_reinject(), ... - but we always only
* issue NF_DROP, which is processed directly in nf_reinject() */
read_lock_bh(&instances_lock);
rcu_read_lock();
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *tmp;
struct nfqnl_instance *inst;
struct hlist_head *head = &instance_table[i];
hlist_for_each_entry(inst, tmp, head, hlist)
hlist_for_each_entry_rcu(inst, tmp, head, hlist)
nfqnl_flush(inst, dev_cmp, ifindex);
}
read_unlock_bh(&instances_lock);
rcu_read_unlock();
}
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
......@@ -665,7 +626,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
int i;
/* destroy all instances for this pid */
write_lock_bh(&instances_lock);
spin_lock(&instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *tmp, *t2;
struct nfqnl_instance *inst;
......@@ -677,7 +638,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
__instance_destroy(inst);
}
}
write_unlock_bh(&instances_lock);
spin_unlock(&instances_lock);
}
return NOTIFY_DONE;
}
......@@ -705,18 +666,21 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
struct nf_queue_entry *entry;
int err;
queue = instance_lookup_get(queue_num);
if (!queue)
return -ENODEV;
rcu_read_lock();
queue = instance_lookup(queue_num);
if (!queue) {
err = -ENODEV;
goto err_out_unlock;
}
if (queue->peer_pid != NETLINK_CB(skb).pid) {
err = -EPERM;
goto err_out_put;
goto err_out_unlock;
}
if (!nfqa[NFQA_VERDICT_HDR]) {
err = -EINVAL;
goto err_out_put;
goto err_out_unlock;
}
vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
......@@ -724,14 +688,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
err = -EINVAL;
goto err_out_put;
goto err_out_unlock;
}
entry = find_dequeue_entry(queue, ntohl(vhdr->id));
if (entry == NULL) {
err = -ENOENT;
goto err_out_put;
goto err_out_unlock;
}
rcu_read_unlock();
if (nfqa[NFQA_PAYLOAD]) {
if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
......@@ -744,11 +709,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
nla_data(nfqa[NFQA_MARK]));
nf_reinject(entry, verdict);
instance_put(queue);
return 0;
err_out_put:
instance_put(queue);
err_out_unlock:
rcu_read_unlock();
return err;
}
......@@ -776,45 +740,61 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
struct nfqnl_instance *queue;
struct nfqnl_msg_config_cmd *cmd = NULL;
int ret = 0;
QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
queue = instance_lookup_get(queue_num);
if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
ret = -EPERM;
goto out_put;
if (nfqa[NFQA_CFG_CMD]) {
cmd = nla_data(nfqa[NFQA_CFG_CMD]);
/* Commands without queue context - might sleep */
switch (cmd->command) {
case NFQNL_CFG_CMD_PF_BIND:
ret = nf_register_queue_handler(ntohs(cmd->pf),
&nfqh);
break;
case NFQNL_CFG_CMD_PF_UNBIND:
ret = nf_unregister_queue_handler(ntohs(cmd->pf),
&nfqh);
break;
default:
break;
}
if (nfqa[NFQA_CFG_CMD]) {
struct nfqnl_msg_config_cmd *cmd;
if (ret < 0)
return ret;
}
cmd = nla_data(nfqa[NFQA_CFG_CMD]);
QDEBUG("found CFG_CMD\n");
rcu_read_lock();
queue = instance_lookup(queue_num);
if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
ret = -EPERM;
goto err_out_unlock;
}
if (cmd != NULL) {
switch (cmd->command) {
case NFQNL_CFG_CMD_BIND:
if (queue)
return -EBUSY;
if (queue) {
ret = -EBUSY;
goto err_out_unlock;
}
queue = instance_create(queue_num, NETLINK_CB(skb).pid);
if (!queue)
return -EINVAL;
if (!queue) {
ret = -EINVAL;
goto err_out_unlock;
}
break;
case NFQNL_CFG_CMD_UNBIND:
if (!queue)
return -ENODEV;
if (!queue) {
ret = -ENODEV;
goto err_out_unlock;
}
instance_destroy(queue);
break;
case NFQNL_CFG_CMD_PF_BIND:
QDEBUG("registering queue handler for pf=%u\n",
ntohs(cmd->pf));
ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh);
break;
case NFQNL_CFG_CMD_PF_UNBIND:
QDEBUG("unregistering queue handler for pf=%u\n",
ntohs(cmd->pf));
ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh);
break;
default:
ret = -EINVAL;
......@@ -827,7 +807,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
if (!queue) {
ret = -ENODEV;
goto out_put;
goto err_out_unlock;
}
params = nla_data(nfqa[NFQA_CFG_PARAMS]);
nfqnl_set_mode(queue, params->copy_mode,
......@@ -839,7 +819,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
if (!queue) {
ret = -ENODEV;
goto out_put;
goto err_out_unlock;
}
queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
spin_lock_bh(&queue->lock);
......@@ -847,8 +827,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
spin_unlock_bh(&queue->lock);
}
out_put:
instance_put(queue);
err_out_unlock:
rcu_read_unlock();
return ret;
}
......@@ -916,7 +896,7 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
static void *seq_start(struct seq_file *seq, loff_t *pos)
{
read_lock_bh(&instances_lock);
spin_lock(&instances_lock);
return get_idx(seq, *pos);
}
......@@ -928,7 +908,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
static void seq_stop(struct seq_file *s, void *v)
{
read_unlock_bh(&instances_lock);
spin_unlock(&instances_lock);
}
static int seq_show(struct seq_file *s, void *v)
......@@ -940,8 +920,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->peer_pid, inst->queue_total,
inst->copy_mode, inst->copy_range,
inst->queue_dropped, inst->queue_user_dropped,
inst->id_sequence,
atomic_read(&inst->use));
inst->id_sequence, 1);
}
static const struct seq_operations nfqnl_seq_ops = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment