Commit 3419b23a authored by Davide Libenzi's avatar Davide Libenzi Committed by Linus Torvalds

[PATCH] epoll: use unlocked wqueue operations

A few days ago Arjan signaled a lockdep red flag on epoll locks, and
precisely between the epoll's device structure lock (->lock) and the wait
queue head lock (->lock).

Like I explained in another email, and directly to Arjan, this can't happen
in reality because of the explicit check at eventpoll.c:592, that does not
allow to drop an epoll fd inside the same epoll fd.  Since lockdep is
working on per-structure locks, it will never be able to know of policies
enforced in other parts of the code.

It was decided time ago of having the ability to drop epoll fds inside
other epoll fds, that triggers a very trick wakeup operations (due to
possibly reentrant callback-driven wakeups) handled by the
ep_poll_safewake() function.  While looking again at the code though, I
noticed that all the operations done on the epoll's main structure wait
queue head (->wq) are already protected by the epoll lock (->lock), so that
locked-style functions can be used to manipulate the ->wq member.  This
makes both a lock-acquire save, and lockdep happy.

Running totalmess on my dual opteron for a while did not reveal any problem
so far:

http://www.xmailserver.org/totalmess.cSigned-off-by: default avatarDavide Libenzi <davidel@xmailserver.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4ad3bcf3
/* /*
* fs/eventpoll.c ( Efficent event polling implementation ) * fs/eventpoll.c ( Efficent event polling implementation )
* Copyright (C) 2001,...,2003 Davide Libenzi * Copyright (C) 2001,...,2006 Davide Libenzi
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, ...@@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* Notify waiting tasks that events are available */ /* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq)) if (waitqueue_active(&ep->wq))
wake_up(&ep->wq); __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
if (waitqueue_active(&ep->poll_wait)) if (waitqueue_active(&ep->poll_wait))
pwake++; pwake++;
} }
...@@ -1083,7 +1083,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even ...@@ -1083,7 +1083,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
/* Notify waiting tasks that events are available */ /* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq)) if (waitqueue_active(&ep->wq))
wake_up(&ep->wq); __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
TASK_INTERRUPTIBLE);
if (waitqueue_active(&ep->poll_wait)) if (waitqueue_active(&ep->poll_wait))
pwake++; pwake++;
} }
...@@ -1260,7 +1261,8 @@ is_linked: ...@@ -1260,7 +1261,8 @@ is_linked:
* wait list. * wait list.
*/ */
if (waitqueue_active(&ep->wq)) if (waitqueue_active(&ep->wq))
wake_up(&ep->wq); __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
TASK_INTERRUPTIBLE);
if (waitqueue_active(&ep->poll_wait)) if (waitqueue_active(&ep->poll_wait))
pwake++; pwake++;
...@@ -1444,7 +1446,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) ...@@ -1444,7 +1446,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
* wait list. * wait list.
*/ */
if (waitqueue_active(&ep->wq)) if (waitqueue_active(&ep->wq))
wake_up(&ep->wq); __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
TASK_INTERRUPTIBLE);
if (waitqueue_active(&ep->poll_wait)) if (waitqueue_active(&ep->poll_wait))
pwake++; pwake++;
} }
...@@ -1516,7 +1519,7 @@ retry: ...@@ -1516,7 +1519,7 @@ retry:
* ep_poll_callback() when events will become available. * ep_poll_callback() when events will become available.
*/ */
init_waitqueue_entry(&wait, current); init_waitqueue_entry(&wait, current);
add_wait_queue(&ep->wq, &wait); __add_wait_queue(&ep->wq, &wait);
for (;;) { for (;;) {
/* /*
...@@ -1536,7 +1539,7 @@ retry: ...@@ -1536,7 +1539,7 @@ retry:
jtimeout = schedule_timeout(jtimeout); jtimeout = schedule_timeout(jtimeout);
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
} }
remove_wait_queue(&ep->wq, &wait); __remove_wait_queue(&ep->wq, &wait);
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
} }
......
/* /*
* include/linux/eventpoll.h ( Efficent event polling implementation ) * include/linux/eventpoll.h ( Efficent event polling implementation )
* Copyright (C) 2001,...,2003 Davide Libenzi * Copyright (C) 2001,...,2006 Davide Libenzi
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment