net: speedup sk_wake_async()

An incoming datagram must bring into cpu cache *lot* of cache lines, in particular : (other parts omitted (hash chains, ip route cache...)) On 32bit arches : offsetof(struct sock, sk_rcvbuf) =0x30 (read) offsetof(struct sock, sk_lock) =0x34 (rw) offsetof(struct sock, sk_sleep) =0x50 (read) offsetof(struct sock, sk_rmem_alloc) =0x64 (rw) offsetof(struct sock, sk_receive_queue)=0x74 (rw) offsetof(struct sock, sk_forward_alloc)=0x98 (rw) offsetof(struct sock, sk_callback_lock)=0xcc (rw) offsetof(struct sock, sk_drops) =0xd8 (read if we add dropcount support, rw if frame dropped) offsetof(struct sock, sk_filter) =0xf8 (read) offsetof(struct sock, sk_socket) =0x138 (read) offsetof(struct sock, sk_data_ready) =0x15c (read) We can avoid sk->sk_socket and socket->fasync_list referencing on sockets with no fasync() structures. (socket->fasync_list ptr is probably already in cache because it shares a cache line with socket->wait, ie location pointed by sk->sk_sleep) This avoids one cache line load per incoming packet for common cases (no fasync()) We can leave (or even move in a future patch) sk->sk_socket in a cold location Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>

net: speedup sk_wake_async()
An incoming datagram must bring into cpu cache *lot* of cache lines, in particular : (other parts omitted (hash chains, ip route cache...)) On 32bit arches : offsetof(struct sock, sk_rcvbuf) =0x30 (read) offsetof(struct sock, sk_lock) =0x34 (rw) offsetof(struct sock, sk_sleep) =0x50 (read) offsetof(struct sock, sk_rmem_alloc) =0x64 (rw) offsetof(struct sock, sk_receive_queue)=0x74 (rw) offsetof(struct sock, sk_forward_alloc)=0x98 (rw) offsetof(struct sock, sk_callback_lock)=0xcc (rw) offsetof(struct sock, sk_drops) =0xd8 (read if we add dropcount support, rw if frame dropped) offsetof(struct sock, sk_filter) =0xf8 (read) offsetof(struct sock, sk_socket) =0x138 (read) offsetof(struct sock, sk_data_ready) =0x15c (read) We can avoid sk->sk_socket and socket->fasync_list referencing on sockets with no fasync() structures. (socket->fasync_list ptr is probably already in cache because it shares a cache line with socket->wait, ie location pointed by sk->sk_sleep) This avoids one cache line load per incoming packet for common cases (no fasync()) We can leave (or even move in a future patch) sk->sk_socket in a cold location Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
bcdce719 · Eric Dumazet · David S. Miller · bd32cafc · bcdce719 · bcdce719
Commit bcdce719 authored Oct 06, 2009 by Eric Dumazet Committed by David S. Miller Oct 06, 2009
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

include/net/sock.h include/net/sock.h +2 -1

net/socket.c net/socket.c +3 -0

No files found.
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -504,6 +504,7 @@ enum sock_flags {
 	SOCK_TIMESTAMPING_SOFTWARE,     /* %SOF_TIMESTAMPING_SOFTWARE */
 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
 	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
+	SOCK_FASYNC, /* fasync() active */
 };
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -1396,7 +1397,7 @@ static inline unsigned long sock_wspace(struct sock *sk)
 static inline void sk_wake_async(struct sock *sk, int how, int band)
 {
-	if (sk->sk_socket && sk->sk_socket->fasync_list)
+	if (sock_flag(sk, SOCK_FASYNC))
 		sock_wake_async(sk->sk_socket, how, band);
 }

--- a/net/socket.c
+++ b/net/socket.c
@@ -1100,11 +1100,14 @@ static int sock_fasync(int fd, struct file *filp, int on)
 		fna->fa_next = sock->fasync_list;
 		write_lock_bh(&sk->sk_callback_lock);
 		sock->fasync_list = fna;
+		sock_set_flag(sk, SOCK_FASYNC);
 		write_unlock_bh(&sk->sk_callback_lock);
 	} else {
 		if (fa != NULL) {
 			write_lock_bh(&sk->sk_callback_lock);
 			*prev = fa->fa_next;
+			if (!sock->fasync_list)
+				sock_reset_flag(sk, SOCK_FASYNC);
 			write_unlock_bh(&sk->sk_callback_lock);
 			kfree(fa);
 		}