Commit 512615b6 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

udp: secondary hash on (local port, local address)

Extends udp_table to contain a secondary hash table.

socket anchor for this second hash is free, because UDP
doesnt use skc_bind_node : We define an union to hold
both skc_bind_node & a new hlist_nulls_node udp_portaddr_node

udp_lib_get_port() inserts sockets into second hash chain
(additional cost of one atomic op)

udp_lib_unhash() deletes socket from second hash chain
(additional cost of one atomic op)

Note : No spinlock lockdep annotation is needed, because
lock for the secondary hash chain is always get after
lock for primary hash chain.
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d4cada4a
...@@ -57,6 +57,7 @@ struct udp_sock { ...@@ -57,6 +57,7 @@ struct udp_sock {
struct inet_sock inet; struct inet_sock inet;
#define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0]
#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1]
#define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node
int pending; /* Any pending frames ? */ int pending; /* Any pending frames ? */
unsigned int corkflag; /* Cork is required */ unsigned int corkflag; /* Cork is required */
__u16 encap_type; /* Is this an Encapsulation socket? */ __u16 encap_type; /* Is this an Encapsulation socket? */
......
...@@ -105,7 +105,7 @@ struct net; ...@@ -105,7 +105,7 @@ struct net;
/** /**
* struct sock_common - minimal network layer representation of sockets * struct sock_common - minimal network layer representation of sockets
* @skc_node: main hash linkage for various protocol lookup tables * @skc_node: main hash linkage for various protocol lookup tables
* @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
* @skc_refcnt: reference count * @skc_refcnt: reference count
* @skc_tx_queue_mapping: tx queue number for this connection * @skc_tx_queue_mapping: tx queue number for this connection
* @skc_hash: hash value used with various protocol lookup tables * @skc_hash: hash value used with various protocol lookup tables
...@@ -115,6 +115,7 @@ struct net; ...@@ -115,6 +115,7 @@ struct net;
* @skc_reuse: %SO_REUSEADDR setting * @skc_reuse: %SO_REUSEADDR setting
* @skc_bound_dev_if: bound device index if != 0 * @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
* @skc_prot: protocol handlers inside a network family * @skc_prot: protocol handlers inside a network family
* @skc_net: reference to the network namespace of this socket * @skc_net: reference to the network namespace of this socket
* *
...@@ -140,7 +141,10 @@ struct sock_common { ...@@ -140,7 +141,10 @@ struct sock_common {
volatile unsigned char skc_state; volatile unsigned char skc_state;
unsigned char skc_reuse; unsigned char skc_reuse;
int skc_bound_dev_if; int skc_bound_dev_if;
struct hlist_node skc_bind_node; union {
struct hlist_node skc_bind_node;
struct hlist_nulls_node skc_portaddr_node;
};
struct proto *skc_prot; struct proto *skc_prot;
#ifdef CONFIG_NET_NS #ifdef CONFIG_NET_NS
struct net *skc_net; struct net *skc_net;
......
...@@ -63,10 +63,19 @@ struct udp_hslot { ...@@ -63,10 +63,19 @@ struct udp_hslot {
spinlock_t lock; spinlock_t lock;
} __attribute__((aligned(2 * sizeof(long)))); } __attribute__((aligned(2 * sizeof(long))));
/**
* struct udp_table - UDP table
*
* @hash: hash table, sockets are hashed on (local port)
* @hash2: hash table, sockets are hashed on (local port, local address)
* @mask: number of slots in hash tables, minus 1
* @log: log2(number of slots in hash table)
*/
struct udp_table { struct udp_table {
struct udp_hslot *hash; struct udp_hslot *hash;
unsigned int mask; struct udp_hslot *hash2;
unsigned int log; unsigned int mask;
unsigned int log;
}; };
extern struct udp_table udp_table; extern struct udp_table udp_table;
extern void udp_table_init(struct udp_table *, const char *); extern void udp_table_init(struct udp_table *, const char *);
...@@ -75,6 +84,15 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table, ...@@ -75,6 +84,15 @@ static inline struct udp_hslot *udp_hashslot(struct udp_table *table,
{ {
return &table->hash[udp_hashfn(net, num, table->mask)]; return &table->hash[udp_hashfn(net, num, table->mask)];
} }
/*
* For secondary hash, net_hash_mix() is performed before calling
* udp_hashslot2(), this explains difference with udp_hashslot()
*/
static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
unsigned int hash)
{
return &table->hash2[hash & table->mask];
}
/* Note: this must match 'valbool' in sock_setsockopt */ /* Note: this must match 'valbool' in sock_setsockopt */
#define UDP_CSUM_NOXMIT 1 #define UDP_CSUM_NOXMIT 1
......
...@@ -163,7 +163,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, ...@@ -163,7 +163,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*saddr_comp)(const struct sock *sk1, int (*saddr_comp)(const struct sock *sk1,
const struct sock *sk2)) const struct sock *sk2))
{ {
struct udp_hslot *hslot; struct udp_hslot *hslot, *hslot2;
struct udp_table *udptable = sk->sk_prot->h.udp_table; struct udp_table *udptable = sk->sk_prot->h.udp_table;
int error = 1; int error = 1;
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
...@@ -222,6 +222,13 @@ found: ...@@ -222,6 +222,13 @@ found:
sk_nulls_add_node_rcu(sk, &hslot->head); sk_nulls_add_node_rcu(sk, &hslot->head);
hslot->count++; hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
spin_lock(&hslot2->lock);
hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
&hslot2->head);
hslot2->count++;
spin_unlock(&hslot2->lock);
} }
error = 0; error = 0;
fail_unlock: fail_unlock:
...@@ -1062,14 +1069,22 @@ void udp_lib_unhash(struct sock *sk) ...@@ -1062,14 +1069,22 @@ void udp_lib_unhash(struct sock *sk)
{ {
if (sk_hashed(sk)) { if (sk_hashed(sk)) {
struct udp_table *udptable = sk->sk_prot->h.udp_table; struct udp_table *udptable = sk->sk_prot->h.udp_table;
struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk), struct udp_hslot *hslot, *hslot2;
udp_sk(sk)->udp_port_hash);
hslot = udp_hashslot(udptable, sock_net(sk),
udp_sk(sk)->udp_port_hash);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
spin_lock_bh(&hslot->lock); spin_lock_bh(&hslot->lock);
if (sk_nulls_del_node_init_rcu(sk)) { if (sk_nulls_del_node_init_rcu(sk)) {
hslot->count--; hslot->count--;
inet_sk(sk)->inet_num = 0; inet_sk(sk)->inet_num = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_lock(&hslot2->lock);
hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
} }
spin_unlock_bh(&hslot->lock); spin_unlock_bh(&hslot->lock);
} }
...@@ -1857,7 +1872,7 @@ void __init udp_table_init(struct udp_table *table, const char *name) ...@@ -1857,7 +1872,7 @@ void __init udp_table_init(struct udp_table *table, const char *name)
if (!CONFIG_BASE_SMALL) if (!CONFIG_BASE_SMALL)
table->hash = alloc_large_system_hash(name, table->hash = alloc_large_system_hash(name,
sizeof(struct udp_hslot), 2 * sizeof(struct udp_hslot),
uhash_entries, uhash_entries,
21, /* one slot per 2 MB */ 21, /* one slot per 2 MB */
0, 0,
...@@ -1869,17 +1884,23 @@ void __init udp_table_init(struct udp_table *table, const char *name) ...@@ -1869,17 +1884,23 @@ void __init udp_table_init(struct udp_table *table, const char *name)
*/ */
if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
sizeof(struct udp_hslot), GFP_KERNEL); 2 * sizeof(struct udp_hslot), GFP_KERNEL);
if (!table->hash) if (!table->hash)
panic(name); panic(name);
table->log = ilog2(UDP_HTABLE_SIZE_MIN); table->log = ilog2(UDP_HTABLE_SIZE_MIN);
table->mask = UDP_HTABLE_SIZE_MIN - 1; table->mask = UDP_HTABLE_SIZE_MIN - 1;
} }
table->hash2 = table->hash + (table->mask + 1);
for (i = 0; i <= table->mask; i++) { for (i = 0; i <= table->mask; i++) {
INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
table->hash[i].count = 0; table->hash[i].count = 0;
spin_lock_init(&table->hash[i].lock); spin_lock_init(&table->hash[i].lock);
} }
for (i = 0; i <= table->mask; i++) {
INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i);
table->hash2[i].count = 0;
spin_lock_init(&table->hash2[i].lock);
}
} }
void __init udp_init(void) void __init udp_init(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment