Commit 22c047cc authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

[NET]: Hashed spinlocks in net/ipv4/route.c

- Locking abstraction
- Spinlocks moved out of rt hash table : Less memory (50%) used by rt 
  hash table. it's a win even on UP.
- Sizing of spinlocks table depends on NR_CPUS
Signed-off-by: default avatarEric Dumazet <dada1@cosmosbay.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f0e36f8c
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
* Marc Boucher : routing by fwmark * Marc Boucher : routing by fwmark
* Robert Olsson : Added rt_cache statistics * Robert Olsson : Added rt_cache statistics
* Arnaldo C. Melo : Convert proc stuff to seq_file * Arnaldo C. Melo : Convert proc stuff to seq_file
* Eric Dumazet : hashed spinlocks
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
...@@ -201,8 +202,37 @@ __u8 ip_tos2prio[16] = { ...@@ -201,8 +202,37 @@ __u8 ip_tos2prio[16] = {
struct rt_hash_bucket { struct rt_hash_bucket {
struct rtable *chain; struct rtable *chain;
spinlock_t lock; };
} __attribute__((__aligned__(8))); #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
/*
* Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
* The size of this table is a power of two and depends on the number of CPUS.
*/
#if NR_CPUS >= 32
#define RT_HASH_LOCK_SZ 4096
#elif NR_CPUS >= 16
#define RT_HASH_LOCK_SZ 2048
#elif NR_CPUS >= 8
#define RT_HASH_LOCK_SZ 1024
#elif NR_CPUS >= 4
#define RT_HASH_LOCK_SZ 512
#else
#define RT_HASH_LOCK_SZ 256
#endif
static spinlock_t *rt_hash_locks;
# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
# define rt_hash_lock_init() { \
int i; \
rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
spin_lock_init(&rt_hash_locks[i]); \
}
#else
# define rt_hash_lock_addr(slot) NULL
# define rt_hash_lock_init()
#endif
static struct rt_hash_bucket *rt_hash_table; static struct rt_hash_bucket *rt_hash_table;
static unsigned rt_hash_mask; static unsigned rt_hash_mask;
...@@ -587,7 +617,7 @@ static void rt_check_expire(unsigned long dummy) ...@@ -587,7 +617,7 @@ static void rt_check_expire(unsigned long dummy)
i = (i + 1) & rt_hash_mask; i = (i + 1) & rt_hash_mask;
rthp = &rt_hash_table[i].chain; rthp = &rt_hash_table[i].chain;
spin_lock(&rt_hash_table[i].lock); spin_lock(rt_hash_lock_addr(i));
while ((rth = *rthp) != NULL) { while ((rth = *rthp) != NULL) {
if (rth->u.dst.expires) { if (rth->u.dst.expires) {
/* Entry is expired even if it is in use */ /* Entry is expired even if it is in use */
...@@ -620,7 +650,7 @@ static void rt_check_expire(unsigned long dummy) ...@@ -620,7 +650,7 @@ static void rt_check_expire(unsigned long dummy)
rt_free(rth); rt_free(rth);
#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
} }
spin_unlock(&rt_hash_table[i].lock); spin_unlock(rt_hash_lock_addr(i));
/* Fallback loop breaker. */ /* Fallback loop breaker. */
if (time_after(jiffies, now)) if (time_after(jiffies, now))
...@@ -643,11 +673,11 @@ static void rt_run_flush(unsigned long dummy) ...@@ -643,11 +673,11 @@ static void rt_run_flush(unsigned long dummy)
get_random_bytes(&rt_hash_rnd, 4); get_random_bytes(&rt_hash_rnd, 4);
for (i = rt_hash_mask; i >= 0; i--) { for (i = rt_hash_mask; i >= 0; i--) {
spin_lock_bh(&rt_hash_table[i].lock); spin_lock_bh(rt_hash_lock_addr(i));
rth = rt_hash_table[i].chain; rth = rt_hash_table[i].chain;
if (rth) if (rth)
rt_hash_table[i].chain = NULL; rt_hash_table[i].chain = NULL;
spin_unlock_bh(&rt_hash_table[i].lock); spin_unlock_bh(rt_hash_lock_addr(i));
for (; rth; rth = next) { for (; rth; rth = next) {
next = rth->u.rt_next; next = rth->u.rt_next;
...@@ -780,7 +810,7 @@ static int rt_garbage_collect(void) ...@@ -780,7 +810,7 @@ static int rt_garbage_collect(void)
k = (k + 1) & rt_hash_mask; k = (k + 1) & rt_hash_mask;
rthp = &rt_hash_table[k].chain; rthp = &rt_hash_table[k].chain;
spin_lock_bh(&rt_hash_table[k].lock); spin_lock_bh(rt_hash_lock_addr(k));
while ((rth = *rthp) != NULL) { while ((rth = *rthp) != NULL) {
if (!rt_may_expire(rth, tmo, expire)) { if (!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1; tmo >>= 1;
...@@ -812,7 +842,7 @@ static int rt_garbage_collect(void) ...@@ -812,7 +842,7 @@ static int rt_garbage_collect(void)
goal--; goal--;
#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
} }
spin_unlock_bh(&rt_hash_table[k].lock); spin_unlock_bh(rt_hash_lock_addr(k));
if (goal <= 0) if (goal <= 0)
break; break;
} }
...@@ -882,7 +912,7 @@ restart: ...@@ -882,7 +912,7 @@ restart:
rthp = &rt_hash_table[hash].chain; rthp = &rt_hash_table[hash].chain;
spin_lock_bh(&rt_hash_table[hash].lock); spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) { while ((rth = *rthp) != NULL) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
if (!(rth->u.dst.flags & DST_BALANCED) && if (!(rth->u.dst.flags & DST_BALANCED) &&
...@@ -908,7 +938,7 @@ restart: ...@@ -908,7 +938,7 @@ restart:
rth->u.dst.__use++; rth->u.dst.__use++;
dst_hold(&rth->u.dst); dst_hold(&rth->u.dst);
rth->u.dst.lastuse = now; rth->u.dst.lastuse = now;
spin_unlock_bh(&rt_hash_table[hash].lock); spin_unlock_bh(rt_hash_lock_addr(hash));
rt_drop(rt); rt_drop(rt);
*rp = rth; *rp = rth;
...@@ -949,7 +979,7 @@ restart: ...@@ -949,7 +979,7 @@ restart:
if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
int err = arp_bind_neighbour(&rt->u.dst); int err = arp_bind_neighbour(&rt->u.dst);
if (err) { if (err) {
spin_unlock_bh(&rt_hash_table[hash].lock); spin_unlock_bh(rt_hash_lock_addr(hash));
if (err != -ENOBUFS) { if (err != -ENOBUFS) {
rt_drop(rt); rt_drop(rt);
...@@ -990,7 +1020,7 @@ restart: ...@@ -990,7 +1020,7 @@ restart:
} }
#endif #endif
rt_hash_table[hash].chain = rt; rt_hash_table[hash].chain = rt;
spin_unlock_bh(&rt_hash_table[hash].lock); spin_unlock_bh(rt_hash_lock_addr(hash));
*rp = rt; *rp = rt;
return 0; return 0;
} }
...@@ -1058,7 +1088,7 @@ static void rt_del(unsigned hash, struct rtable *rt) ...@@ -1058,7 +1088,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
{ {
struct rtable **rthp; struct rtable **rthp;
spin_lock_bh(&rt_hash_table[hash].lock); spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt); ip_rt_put(rt);
for (rthp = &rt_hash_table[hash].chain; *rthp; for (rthp = &rt_hash_table[hash].chain; *rthp;
rthp = &(*rthp)->u.rt_next) rthp = &(*rthp)->u.rt_next)
...@@ -1067,7 +1097,7 @@ static void rt_del(unsigned hash, struct rtable *rt) ...@@ -1067,7 +1097,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
rt_free(rt); rt_free(rt);
break; break;
} }
spin_unlock_bh(&rt_hash_table[hash].lock); spin_unlock_bh(rt_hash_lock_addr(hash));
} }
void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
...@@ -3073,7 +3103,7 @@ __setup("rhash_entries=", set_rhash_entries); ...@@ -3073,7 +3103,7 @@ __setup("rhash_entries=", set_rhash_entries);
int __init ip_rt_init(void) int __init ip_rt_init(void)
{ {
int i, order, goal, rc = 0; int order, goal, rc = 0;
rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
(jiffies ^ (jiffies >> 7))); (jiffies ^ (jiffies >> 7)));
...@@ -3122,10 +3152,8 @@ int __init ip_rt_init(void) ...@@ -3122,10 +3152,8 @@ int __init ip_rt_init(void)
/* NOTHING */; /* NOTHING */;
rt_hash_mask--; rt_hash_mask--;
for (i = 0; i <= rt_hash_mask; i++) { memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
spin_lock_init(&rt_hash_table[i].lock); rt_hash_lock_init();
rt_hash_table[i].chain = NULL;
}
ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
ip_rt_max_size = (rt_hash_mask + 1) * 16; ip_rt_max_size = (rt_hash_mask + 1) * 16;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment