Commit 87c3a86e authored by Davide Libenzi's avatar Davide Libenzi Committed by Linus Torvalds

eventfd: remove fput() call from possible IRQ context

Remove a source of fput() call from inside IRQ context.  Myself, like Eric,
wasn't able to reproduce an fput() call from IRQ context, but Jeff said he was
able to, with the attached test program.  Independently from this, the bug is
conceptually there, so we might be better off fixing it.  This patch adds an
optimization similar to the one we already do on ->ki_filp, on ->ki_eventfd.
Playing with ->f_count directly is not pretty in general, but the alternative
here would be to add a brand new delayed fput() infrastructure, that I'm not
sure is worth it.
Signed-off-by: default avatarDavide Libenzi <davidel@xmailserver.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: default avatarJeff Moyer <jmoyer@redhat.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d0115552
...@@ -443,7 +443,7 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx) ...@@ -443,7 +443,7 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
req->private = NULL; req->private = NULL;
req->ki_iovec = NULL; req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list); INIT_LIST_HEAD(&req->ki_run_list);
req->ki_eventfd = ERR_PTR(-EINVAL); req->ki_eventfd = NULL;
/* Check if the completion queue has enough free space to /* Check if the completion queue has enough free space to
* accept an event from this io. * accept an event from this io.
...@@ -485,8 +485,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) ...@@ -485,8 +485,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
{ {
assert_spin_locked(&ctx->ctx_lock); assert_spin_locked(&ctx->ctx_lock);
if (!IS_ERR(req->ki_eventfd))
fput(req->ki_eventfd);
if (req->ki_dtor) if (req->ki_dtor)
req->ki_dtor(req); req->ki_dtor(req);
if (req->ki_iovec != &req->ki_inline_vec) if (req->ki_iovec != &req->ki_inline_vec)
...@@ -508,8 +506,11 @@ static void aio_fput_routine(struct work_struct *data) ...@@ -508,8 +506,11 @@ static void aio_fput_routine(struct work_struct *data)
list_del(&req->ki_list); list_del(&req->ki_list);
spin_unlock_irq(&fput_lock); spin_unlock_irq(&fput_lock);
/* Complete the fput */ /* Complete the fput(s) */
__fput(req->ki_filp); if (req->ki_filp != NULL)
__fput(req->ki_filp);
if (req->ki_eventfd != NULL)
__fput(req->ki_eventfd);
/* Link the iocb into the context's free list */ /* Link the iocb into the context's free list */
spin_lock_irq(&ctx->ctx_lock); spin_lock_irq(&ctx->ctx_lock);
...@@ -527,12 +528,14 @@ static void aio_fput_routine(struct work_struct *data) ...@@ -527,12 +528,14 @@ static void aio_fput_routine(struct work_struct *data)
*/ */
static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
{ {
int schedule_putreq = 0;
dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
req, atomic_long_read(&req->ki_filp->f_count)); req, atomic_long_read(&req->ki_filp->f_count));
assert_spin_locked(&ctx->ctx_lock); assert_spin_locked(&ctx->ctx_lock);
req->ki_users --; req->ki_users--;
BUG_ON(req->ki_users < 0); BUG_ON(req->ki_users < 0);
if (likely(req->ki_users)) if (likely(req->ki_users))
return 0; return 0;
...@@ -540,10 +543,23 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) ...@@ -540,10 +543,23 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
req->ki_cancel = NULL; req->ki_cancel = NULL;
req->ki_retry = NULL; req->ki_retry = NULL;
/* Must be done under the lock to serialise against cancellation. /*
* Call this aio_fput as it duplicates fput via the fput_work. * Try to optimize the aio and eventfd file* puts, by avoiding to
* schedule work in case it is not __fput() time. In normal cases,
* we would not be holding the last reference to the file*, so
* this function will be executed w/out any aio kthread wakeup.
*/ */
if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
schedule_putreq++;
else
req->ki_filp = NULL;
if (req->ki_eventfd != NULL) {
if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
schedule_putreq++;
else
req->ki_eventfd = NULL;
}
if (unlikely(schedule_putreq)) {
get_ioctx(ctx); get_ioctx(ctx);
spin_lock(&fput_lock); spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head); list_add(&req->ki_list, &fput_head);
...@@ -1009,7 +1025,7 @@ int aio_complete(struct kiocb *iocb, long res, long res2) ...@@ -1009,7 +1025,7 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called * eventfd. The eventfd_signal() function is safe to be called
* from IRQ context. * from IRQ context.
*/ */
if (!IS_ERR(iocb->ki_eventfd)) if (iocb->ki_eventfd != NULL)
eventfd_signal(iocb->ki_eventfd, 1); eventfd_signal(iocb->ki_eventfd, 1);
put_rq: put_rq:
...@@ -1608,6 +1624,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, ...@@ -1608,6 +1624,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) { if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd); ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL;
goto out_put_req; goto out_put_req;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment