Commit c32ccd87 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] inotify: lock avoidance with parent watch status in dentry

Previous inotify work avoidance is good when inotify is completely unused,
but it breaks down if even a single watch is in place anywhere in the
system.  Robin Holt notices that udev is one such culprit - it slows down a
512-thread application on a 512 CPU system from 6 seconds to 22 minutes.

Solve this by adding a flag in the dentry that tells inotify whether or not
its parent inode has a watch on it.  Event queueing to parent will skip
taking locks if this flag is cleared.  Setting and clearing of this flag on
all child dentries versus event delivery: this is no in terms of race
cases, and that was shown to be equivalent to always performing the check.

The essential behaviour is that activity occuring _after_ a watch has been
added and _before_ it has been removed, will generate events.
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Cc: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent bf36b901
...@@ -802,6 +802,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode) ...@@ -802,6 +802,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
if (inode) if (inode)
list_add(&entry->d_alias, &inode->i_dentry); list_add(&entry->d_alias, &inode->i_dentry);
entry->d_inode = inode; entry->d_inode = inode;
fsnotify_d_instantiate(entry, inode);
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode); security_d_instantiate(entry, inode);
} }
...@@ -853,6 +854,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) ...@@ -853,6 +854,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
list_add(&entry->d_alias, &inode->i_dentry); list_add(&entry->d_alias, &inode->i_dentry);
do_negative: do_negative:
entry->d_inode = inode; entry->d_inode = inode;
fsnotify_d_instantiate(entry, inode);
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode); security_d_instantiate(entry, inode);
return NULL; return NULL;
...@@ -983,6 +985,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) ...@@ -983,6 +985,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
new = __d_find_alias(inode, 1); new = __d_find_alias(inode, 1);
if (new) { if (new) {
BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
fsnotify_d_instantiate(new, inode);
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
security_d_instantiate(new, inode); security_d_instantiate(new, inode);
d_rehash(dentry); d_rehash(dentry);
...@@ -992,6 +995,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) ...@@ -992,6 +995,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
/* d_instantiate takes dcache_lock, so we do it by hand */ /* d_instantiate takes dcache_lock, so we do it by hand */
list_add(&dentry->d_alias, &inode->i_dentry); list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode; dentry->d_inode = inode;
fsnotify_d_instantiate(dentry, inode);
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
security_d_instantiate(dentry, inode); security_d_instantiate(dentry, inode);
d_rehash(dentry); d_rehash(dentry);
...@@ -1176,6 +1180,9 @@ void d_delete(struct dentry * dentry) ...@@ -1176,6 +1180,9 @@ void d_delete(struct dentry * dentry)
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
isdir = S_ISDIR(dentry->d_inode->i_mode); isdir = S_ISDIR(dentry->d_inode->i_mode);
if (atomic_read(&dentry->d_count) == 1) { if (atomic_read(&dentry->d_count) == 1) {
/* remove this and other inotify debug checks after 2.6.18 */
dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
dentry_iput(dentry); dentry_iput(dentry);
fsnotify_nameremove(dentry, isdir); fsnotify_nameremove(dentry, isdir);
return; return;
...@@ -1342,6 +1349,7 @@ already_unhashed: ...@@ -1342,6 +1349,7 @@ already_unhashed:
list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
spin_unlock(&target->d_lock); spin_unlock(&target->d_lock);
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock); write_sequnlock(&rename_lock);
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
......
...@@ -38,7 +38,6 @@ ...@@ -38,7 +38,6 @@
#include <asm/ioctls.h> #include <asm/ioctls.h>
static atomic_t inotify_cookie; static atomic_t inotify_cookie;
static atomic_t inotify_watches;
static kmem_cache_t *watch_cachep; static kmem_cache_t *watch_cachep;
static kmem_cache_t *event_cachep; static kmem_cache_t *event_cachep;
...@@ -380,6 +379,48 @@ static int find_inode(const char __user *dirname, struct nameidata *nd, ...@@ -380,6 +379,48 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
return error; return error;
} }
/*
* inotify_inode_watched - returns nonzero if there are watches on this inode
* and zero otherwise. We call this lockless, we do not care if we race.
*/
static inline int inotify_inode_watched(struct inode *inode)
{
return !list_empty(&inode->inotify_watches);
}
/*
* Get child dentry flag into synch with parent inode.
* Flag should always be clear for negative dentrys.
*/
static void set_dentry_child_flags(struct inode *inode, int watched)
{
struct dentry *alias;
spin_lock(&dcache_lock);
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct dentry *child;
list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode) {
WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
continue;
}
spin_lock(&child->d_lock);
if (watched) {
WARN_ON(child->d_flags &
DCACHE_INOTIFY_PARENT_WATCHED);
child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
} else {
WARN_ON(!(child->d_flags &
DCACHE_INOTIFY_PARENT_WATCHED));
child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED;
}
spin_unlock(&child->d_lock);
}
}
spin_unlock(&dcache_lock);
}
/* /*
* create_watch - creates a watch on the given device. * create_watch - creates a watch on the given device.
* *
...@@ -426,7 +467,6 @@ static struct inotify_watch *create_watch(struct inotify_device *dev, ...@@ -426,7 +467,6 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
get_inotify_watch(watch); get_inotify_watch(watch);
atomic_inc(&dev->user->inotify_watches); atomic_inc(&dev->user->inotify_watches);
atomic_inc(&inotify_watches);
return watch; return watch;
} }
...@@ -458,8 +498,10 @@ static void remove_watch_no_event(struct inotify_watch *watch, ...@@ -458,8 +498,10 @@ static void remove_watch_no_event(struct inotify_watch *watch,
list_del(&watch->i_list); list_del(&watch->i_list);
list_del(&watch->d_list); list_del(&watch->d_list);
if (!inotify_inode_watched(watch->inode))
set_dentry_child_flags(watch->inode, 0);
atomic_dec(&dev->user->inotify_watches); atomic_dec(&dev->user->inotify_watches);
atomic_dec(&inotify_watches);
idr_remove(&dev->idr, watch->wd); idr_remove(&dev->idr, watch->wd);
put_inotify_watch(watch); put_inotify_watch(watch);
} }
...@@ -481,16 +523,39 @@ static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev) ...@@ -481,16 +523,39 @@ static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
remove_watch_no_event(watch, dev); remove_watch_no_event(watch, dev);
} }
/* Kernel API */
/* /*
* inotify_inode_watched - returns nonzero if there are watches on this inode * inotify_d_instantiate - instantiate dcache entry for inode
* and zero otherwise. We call this lockless, we do not care if we race.
*/ */
static inline int inotify_inode_watched(struct inode *inode) void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
{ {
return !list_empty(&inode->inotify_watches); struct dentry *parent;
if (!inode)
return;
WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
spin_lock(&entry->d_lock);
parent = entry->d_parent;
if (inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
spin_unlock(&entry->d_lock);
} }
/* Kernel API */ /*
* inotify_d_move - dcache entry has been moved
*/
void inotify_d_move(struct dentry *entry)
{
struct dentry *parent;
parent = entry->d_parent;
if (inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
else
entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
}
/** /**
* inotify_inode_queue_event - queue an event to all watches on this inode * inotify_inode_queue_event - queue an event to all watches on this inode
...@@ -538,7 +603,7 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, ...@@ -538,7 +603,7 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
struct dentry *parent; struct dentry *parent;
struct inode *inode; struct inode *inode;
if (!atomic_read (&inotify_watches)) if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
return; return;
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
...@@ -993,6 +1058,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) ...@@ -993,6 +1058,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
goto out; goto out;
} }
if (!inotify_inode_watched(inode))
set_dentry_child_flags(inode, 1);
/* Add the watch to the device's and the inode's list */ /* Add the watch to the device's and the inode's list */
list_add(&watch->d_list, &dev->watches); list_add(&watch->d_list, &dev->watches);
list_add(&watch->i_list, &inode->inotify_watches); list_add(&watch->i_list, &inode->inotify_watches);
...@@ -1065,7 +1133,6 @@ static int __init inotify_setup(void) ...@@ -1065,7 +1133,6 @@ static int __init inotify_setup(void)
inotify_max_user_watches = 8192; inotify_max_user_watches = 8192;
atomic_set(&inotify_cookie, 0); atomic_set(&inotify_cookie, 0);
atomic_set(&inotify_watches, 0);
watch_cachep = kmem_cache_create("inotify_watch_cache", watch_cachep = kmem_cache_create("inotify_watch_cache",
sizeof(struct inotify_watch), sizeof(struct inotify_watch),
......
...@@ -162,6 +162,8 @@ d_iput: no no no yes ...@@ -162,6 +162,8 @@ d_iput: no no no yes
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010 #define DCACHE_UNHASHED 0x0010
#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */
extern spinlock_t dcache_lock; extern spinlock_t dcache_lock;
/** /**
......
...@@ -16,6 +16,25 @@ ...@@ -16,6 +16,25 @@
#include <linux/dnotify.h> #include <linux/dnotify.h>
#include <linux/inotify.h> #include <linux/inotify.h>
/*
* fsnotify_d_instantiate - instantiate a dentry for inode
* Called with dcache_lock held.
*/
static inline void fsnotify_d_instantiate(struct dentry *entry,
struct inode *inode)
{
inotify_d_instantiate(entry, inode);
}
/*
* fsnotify_d_move - entry has been moved
* Called with dcache_lock and entry->d_lock held.
*/
static inline void fsnotify_d_move(struct dentry *entry)
{
inotify_d_move(entry);
}
/* /*
* fsnotify_move - file old_name at old_dir was moved to new_name at new_dir * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
*/ */
......
...@@ -71,6 +71,8 @@ struct inotify_event { ...@@ -71,6 +71,8 @@ struct inotify_event {
#ifdef CONFIG_INOTIFY #ifdef CONFIG_INOTIFY
extern void inotify_d_instantiate(struct dentry *, struct inode *);
extern void inotify_d_move(struct dentry *);
extern void inotify_inode_queue_event(struct inode *, __u32, __u32, extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
const char *); const char *);
extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
...@@ -81,6 +83,15 @@ extern u32 inotify_get_cookie(void); ...@@ -81,6 +83,15 @@ extern u32 inotify_get_cookie(void);
#else #else
static inline void inotify_d_instantiate(struct dentry *dentry,
struct inode *inode)
{
}
static inline void inotify_d_move(struct dentry *dentry)
{
}
static inline void inotify_inode_queue_event(struct inode *inode, static inline void inotify_inode_queue_event(struct inode *inode,
__u32 mask, __u32 cookie, __u32 mask, __u32 cookie,
const char *filename) const char *filename)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment