Commit a9280fed authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'reiserfs/kill-bkl' of...

Merge branch 'reiserfs/kill-bkl' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing

* 'reiserfs/kill-bkl' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing: (31 commits)
  kill-the-bkl/reiserfs: turn GFP_ATOMIC flag to GFP_NOFS in reiserfs_get_block()
  kill-the-bkl/reiserfs: drop the fs race watchdog from _get_block_create_0()
  kill-the-bkl/reiserfs: definitely drop the bkl from reiserfs_ioctl()
  kill-the-bkl/reiserfs: always lock the ioctl path
  kill-the-bkl/reiserfs: fix reiserfs lock to cpu_add_remove_lock dependency
  kill-the-bkl/reiserfs: Fix induced mm->mmap_sem to sysfs_mutex dependency
  kill-the-bkl/reiserfs: panic in case of lock imbalance
  kill-the-bkl/reiserfs: fix recursive reiserfs write lock in reiserfs_commit_write()
  kill-the-bkl/reiserfs: fix recursive reiserfs lock in reiserfs_mkdir()
  kill-the-bkl/reiserfs: fix "reiserfs lock" / "inode mutex" lock inversion dependency
  kill-the-bkl/reiserfs: move the concurrent tree accesses checks per superblock
  kill-the-bkl/reiserfs: acquire the inode mutex safely
  kill-the-bkl/reiserfs: unlock only when needed in search_by_key
  kill-the-bkl/reiserfs: use mutex_lock in reiserfs_mutex_lock_safe
  kill-the-bkl/reiserfs: factorize the locking in reiserfs_write_end()
  kill-the-bkl/reiserfs: reduce number of contentions in search_by_key()
  kill-the-bkl/reiserfs: don't hold the write recursively in reiserfs_lookup()
  kill-the-bkl/reiserfs: lock only once on reiserfs_get_block()
  kill-the-bkl/reiserfs: conditionaly release the write lock on fs_changed()
  kill-the-BKL/reiserfs: add reiserfs_cond_resched()
  ...
parents 2b876f95 1d2c6cfd
...@@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o ...@@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
hashes.o tail_conversion.o journal.o resize.o \ hashes.o tail_conversion.o journal.o resize.o \
item_ops.o ioctl.o procfs.o xattr.o item_ops.o ioctl.o procfs.o xattr.o lock.o
ifeq ($(CONFIG_REISERFS_FS_XATTR),y) ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
reiserfs-objs += xattr_user.o xattr_trusted.o reiserfs-objs += xattr_user.o xattr_trusted.o
......
...@@ -1249,14 +1249,18 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, ...@@ -1249,14 +1249,18 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
else if (bitmap == 0) else if (bitmap == 0)
block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
reiserfs_write_unlock(sb);
bh = sb_bread(sb, block); bh = sb_bread(sb, block);
reiserfs_write_lock(sb);
if (bh == NULL) if (bh == NULL)
reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
"reading failed", __func__, block); "reading failed", __func__, block);
else { else {
if (buffer_locked(bh)) { if (buffer_locked(bh)) {
PROC_INFO_INC(sb, scan_bitmap.wait); PROC_INFO_INC(sb, scan_bitmap.wait);
reiserfs_write_unlock(sb);
__wait_on_buffer(bh); __wait_on_buffer(bh);
reiserfs_write_lock(sb);
} }
BUG_ON(!buffer_uptodate(bh)); BUG_ON(!buffer_uptodate(bh));
BUG_ON(atomic_read(&bh->b_count) == 0); BUG_ON(atomic_read(&bh->b_count) == 0);
......
...@@ -20,7 +20,7 @@ const struct file_operations reiserfs_dir_operations = { ...@@ -20,7 +20,7 @@ const struct file_operations reiserfs_dir_operations = {
.read = generic_read_dir, .read = generic_read_dir,
.readdir = reiserfs_readdir, .readdir = reiserfs_readdir,
.fsync = reiserfs_dir_fsync, .fsync = reiserfs_dir_fsync,
.ioctl = reiserfs_ioctl, .unlocked_ioctl = reiserfs_ioctl,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = reiserfs_compat_ioctl, .compat_ioctl = reiserfs_compat_ioctl,
#endif #endif
...@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, ...@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
// user space buffer is swapped out. At that time // user space buffer is swapped out. At that time
// entry can move to somewhere else // entry can move to somewhere else
memcpy(local_buf, d_name, d_reclen); memcpy(local_buf, d_name, d_reclen);
/*
* Since filldir might sleep, we can release
* the write lock here for other waiters
*/
reiserfs_write_unlock(inode->i_sb);
if (filldir if (filldir
(dirent, local_buf, d_reclen, d_off, d_ino, (dirent, local_buf, d_reclen, d_off, d_ino,
DT_UNKNOWN) < 0) { DT_UNKNOWN) < 0) {
reiserfs_write_lock(inode->i_sb);
if (local_buf != small_buf) { if (local_buf != small_buf) {
kfree(local_buf); kfree(local_buf);
} }
goto end; goto end;
} }
reiserfs_write_lock(inode->i_sb);
if (local_buf != small_buf) { if (local_buf != small_buf) {
kfree(local_buf); kfree(local_buf);
} }
......
...@@ -21,14 +21,6 @@ ...@@ -21,14 +21,6 @@
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#ifdef CONFIG_REISERFS_CHECK
struct tree_balance *cur_tb = NULL; /* detects whether more than one
copy of tb exists as a means
of checking whether schedule
is interrupting do_balance */
#endif
static inline void buffer_info_init_left(struct tree_balance *tb, static inline void buffer_info_init_left(struct tree_balance *tb,
struct buffer_info *bi) struct buffer_info *bi)
{ {
...@@ -1840,11 +1832,12 @@ static int check_before_balancing(struct tree_balance *tb) ...@@ -1840,11 +1832,12 @@ static int check_before_balancing(struct tree_balance *tb)
{ {
int retval = 0; int retval = 0;
if (cur_tb) { if (REISERFS_SB(tb->tb_sb)->cur_tb) {
reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
"occurred based on cur_tb not being null at " "occurred based on cur_tb not being null at "
"this point in code. do_balance cannot properly " "this point in code. do_balance cannot properly "
"handle schedule occurring while it runs."); "handle concurrent tree accesses on a same "
"mount point.");
} }
/* double check that buffers that we will modify are unlocked. (fix_nodes should already have /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
...@@ -1986,7 +1979,7 @@ static inline void do_balance_starts(struct tree_balance *tb) ...@@ -1986,7 +1979,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
"check");*/ "check");*/
RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
#ifdef CONFIG_REISERFS_CHECK #ifdef CONFIG_REISERFS_CHECK
cur_tb = tb; REISERFS_SB(tb->tb_sb)->cur_tb = tb;
#endif #endif
} }
...@@ -1996,7 +1989,7 @@ static inline void do_balance_completed(struct tree_balance *tb) ...@@ -1996,7 +1989,7 @@ static inline void do_balance_completed(struct tree_balance *tb)
#ifdef CONFIG_REISERFS_CHECK #ifdef CONFIG_REISERFS_CHECK
check_leaf_level(tb); check_leaf_level(tb);
check_internal_levels(tb); check_internal_levels(tb);
cur_tb = NULL; REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
#endif #endif
/* reiserfs_free_block is no longer schedule safe. So, we need to /* reiserfs_free_block is no longer schedule safe. So, we need to
......
...@@ -284,7 +284,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t ...@@ -284,7 +284,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
const struct file_operations reiserfs_file_operations = { const struct file_operations reiserfs_file_operations = {
.read = do_sync_read, .read = do_sync_read,
.write = reiserfs_file_write, .write = reiserfs_file_write,
.ioctl = reiserfs_ioctl, .unlocked_ioctl = reiserfs_ioctl,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = reiserfs_compat_ioctl, .compat_ioctl = reiserfs_compat_ioctl,
#endif #endif
......
...@@ -563,9 +563,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, ...@@ -563,9 +563,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
return needed_nodes; return needed_nodes;
} }
#ifdef CONFIG_REISERFS_CHECK
extern struct tree_balance *cur_tb;
#endif
/* Set parameters for balancing. /* Set parameters for balancing.
* Performs write of results of analysis of balancing into structure tb, * Performs write of results of analysis of balancing into structure tb,
...@@ -1022,7 +1019,11 @@ static int get_far_parent(struct tree_balance *tb, ...@@ -1022,7 +1019,11 @@ static int get_far_parent(struct tree_balance *tb,
/* Check whether the common parent is locked. */ /* Check whether the common parent is locked. */
if (buffer_locked(*pcom_father)) { if (buffer_locked(*pcom_father)) {
/* Release the write lock while the buffer is busy */
reiserfs_write_unlock(tb->tb_sb);
__wait_on_buffer(*pcom_father); __wait_on_buffer(*pcom_father);
reiserfs_write_lock(tb->tb_sb);
if (FILESYSTEM_CHANGED_TB(tb)) { if (FILESYSTEM_CHANGED_TB(tb)) {
brelse(*pcom_father); brelse(*pcom_father);
return REPEAT_SEARCH; return REPEAT_SEARCH;
...@@ -1927,7 +1928,9 @@ static int get_direct_parent(struct tree_balance *tb, int h) ...@@ -1927,7 +1928,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
return REPEAT_SEARCH; return REPEAT_SEARCH;
if (buffer_locked(bh)) { if (buffer_locked(bh)) {
reiserfs_write_unlock(tb->tb_sb);
__wait_on_buffer(bh); __wait_on_buffer(bh);
reiserfs_write_lock(tb->tb_sb);
if (FILESYSTEM_CHANGED_TB(tb)) if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH; return REPEAT_SEARCH;
} }
...@@ -1965,7 +1968,9 @@ static int get_neighbors(struct tree_balance *tb, int h) ...@@ -1965,7 +1968,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
FL[h]); FL[h]);
son_number = B_N_CHILD_NUM(tb->FL[h], child_position); son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
reiserfs_write_unlock(sb);
bh = sb_bread(sb, son_number); bh = sb_bread(sb, son_number);
reiserfs_write_lock(sb);
if (!bh) if (!bh)
return IO_ERROR; return IO_ERROR;
if (FILESYSTEM_CHANGED_TB(tb)) { if (FILESYSTEM_CHANGED_TB(tb)) {
...@@ -2003,7 +2008,9 @@ static int get_neighbors(struct tree_balance *tb, int h) ...@@ -2003,7 +2008,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
child_position = child_position =
(bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
son_number = B_N_CHILD_NUM(tb->FR[h], child_position); son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
reiserfs_write_unlock(sb);
bh = sb_bread(sb, son_number); bh = sb_bread(sb, son_number);
reiserfs_write_lock(sb);
if (!bh) if (!bh)
return IO_ERROR; return IO_ERROR;
if (FILESYSTEM_CHANGED_TB(tb)) { if (FILESYSTEM_CHANGED_TB(tb)) {
...@@ -2278,7 +2285,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) ...@@ -2278,7 +2285,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
REPEAT_SEARCH : CARRY_ON; REPEAT_SEARCH : CARRY_ON;
} }
#endif #endif
reiserfs_write_unlock(tb->tb_sb);
__wait_on_buffer(locked); __wait_on_buffer(locked);
reiserfs_write_lock(tb->tb_sb);
if (FILESYSTEM_CHANGED_TB(tb)) if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH; return REPEAT_SEARCH;
} }
...@@ -2349,12 +2358,14 @@ int fix_nodes(int op_mode, struct tree_balance *tb, ...@@ -2349,12 +2358,14 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
/* if it possible in indirect_to_direct conversion */ /* if it possible in indirect_to_direct conversion */
if (buffer_locked(tbS0)) { if (buffer_locked(tbS0)) {
reiserfs_write_unlock(tb->tb_sb);
__wait_on_buffer(tbS0); __wait_on_buffer(tbS0);
reiserfs_write_lock(tb->tb_sb);
if (FILESYSTEM_CHANGED_TB(tb)) if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH; return REPEAT_SEARCH;
} }
#ifdef CONFIG_REISERFS_CHECK #ifdef CONFIG_REISERFS_CHECK
if (cur_tb) { if (REISERFS_SB(tb->tb_sb)->cur_tb) {
print_cur_tb("fix_nodes"); print_cur_tb("fix_nodes");
reiserfs_panic(tb->tb_sb, "PAP-8305", reiserfs_panic(tb->tb_sb, "PAP-8305",
"there is pending do_balance"); "there is pending do_balance");
......
...@@ -251,7 +251,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block, ...@@ -251,7 +251,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
struct cpu_key key; struct cpu_key key;
struct buffer_head *bh; struct buffer_head *bh;
struct item_head *ih, tmp_ih; struct item_head *ih, tmp_ih;
int fs_gen;
b_blocknr_t blocknr; b_blocknr_t blocknr;
char *p = NULL; char *p = NULL;
int chars; int chars;
...@@ -265,7 +264,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block, ...@@ -265,7 +264,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
(loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
3); 3);
research:
result = search_for_position_by_key(inode->i_sb, &key, &path); result = search_for_position_by_key(inode->i_sb, &key, &path);
if (result != POSITION_FOUND) { if (result != POSITION_FOUND) {
pathrelse(&path); pathrelse(&path);
...@@ -340,7 +338,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block, ...@@ -340,7 +338,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
} }
// read file tail into part of page // read file tail into part of page
offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
fs_gen = get_generation(inode->i_sb);
copy_item_head(&tmp_ih, ih); copy_item_head(&tmp_ih, ih);
/* we only want to kmap if we are reading the tail into the page. /* we only want to kmap if we are reading the tail into the page.
...@@ -348,13 +345,9 @@ static int _get_block_create_0(struct inode *inode, sector_t block, ...@@ -348,13 +345,9 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
** sure we need to. But, this means the item might move if ** sure we need to. But, this means the item might move if
** kmap schedules ** kmap schedules
*/ */
if (!p) { if (!p)
p = (char *)kmap(bh_result->b_page); p = (char *)kmap(bh_result->b_page);
if (fs_changed(fs_gen, inode->i_sb)
&& item_moved(&tmp_ih, &path)) {
goto research;
}
}
p += offset; p += offset;
memset(p, 0, inode->i_sb->s_blocksize); memset(p, 0, inode->i_sb->s_blocksize);
do { do {
...@@ -489,10 +482,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, ...@@ -489,10 +482,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
disappeared */ disappeared */
if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
int err; int err;
lock_kernel();
reiserfs_write_lock(inode->i_sb);
err = reiserfs_commit_for_inode(inode); err = reiserfs_commit_for_inode(inode);
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
unlock_kernel();
reiserfs_write_unlock(inode->i_sb);
if (err < 0) if (err < 0)
ret = err; ret = err;
} }
...@@ -601,6 +598,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, ...@@ -601,6 +598,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
__le32 *item; __le32 *item;
int done; int done;
int fs_gen; int fs_gen;
int lock_depth;
struct reiserfs_transaction_handle *th = NULL; struct reiserfs_transaction_handle *th = NULL;
/* space reserved in transaction batch: /* space reserved in transaction batch:
. 3 balancings in direct->indirect conversion . 3 balancings in direct->indirect conversion
...@@ -616,12 +614,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, ...@@ -616,12 +614,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
loff_t new_offset = loff_t new_offset =
(((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
/* bad.... */ lock_depth = reiserfs_write_lock_once(inode->i_sb);
reiserfs_write_lock(inode->i_sb);
version = get_inode_item_key_version(inode); version = get_inode_item_key_version(inode);
if (!file_capable(inode, block)) { if (!file_capable(inode, block)) {
reiserfs_write_unlock(inode->i_sb); reiserfs_write_unlock_once(inode->i_sb, lock_depth);
return -EFBIG; return -EFBIG;
} }
...@@ -633,7 +630,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, ...@@ -633,7 +630,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
/* find number of block-th logical block of the file */ /* find number of block-th logical block of the file */
ret = _get_block_create_0(inode, block, bh_result, ret = _get_block_create_0(inode, block, bh_result,
create | GET_BLOCK_READ_DIRECT); create | GET_BLOCK_READ_DIRECT);
reiserfs_write_unlock(inode->i_sb); reiserfs_write_unlock_once(inode->i_sb, lock_depth);
return ret; return ret;
} }
/* /*
...@@ -751,7 +748,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, ...@@ -751,7 +748,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
if (!dangle && th) if (!dangle && th)
retval = reiserfs_end_persistent_transaction(th); retval = reiserfs_end_persistent_transaction(th);
reiserfs_write_unlock(inode->i_sb); reiserfs_write_unlock_once(inode->i_sb, lock_depth);
/* the item was found, so new blocks were not added to the file /* the item was found, so new blocks were not added to the file
** there is no need to make sure the inode is updated with this ** there is no need to make sure the inode is updated with this
...@@ -935,7 +932,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, ...@@ -935,7 +932,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
if (blocks_needed == 1) { if (blocks_needed == 1) {
un = &unf_single; un = &unf_single;
} else { } else {
un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
if (!un) { if (!un) {
un = &unf_single; un = &unf_single;
blocks_needed = 1; blocks_needed = 1;
...@@ -997,10 +994,16 @@ int reiserfs_get_block(struct inode *inode, sector_t block, ...@@ -997,10 +994,16 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
if (retval) if (retval)
goto failure; goto failure;
} }
/* inserting indirect pointers for a hole can take a /*
** long time. reschedule if needed * inserting indirect pointers for a hole can take a
* long time. reschedule if needed and also release the write
* lock for others.
*/ */
cond_resched(); if (need_resched()) {
reiserfs_write_unlock_once(inode->i_sb, lock_depth);
schedule();
lock_depth = reiserfs_write_lock_once(inode->i_sb);
}
retval = search_for_position_by_key(inode->i_sb, &key, &path); retval = search_for_position_by_key(inode->i_sb, &key, &path);
if (retval == IO_ERROR) { if (retval == IO_ERROR) {
...@@ -1035,7 +1038,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, ...@@ -1035,7 +1038,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
retval = err; retval = err;
} }
reiserfs_write_unlock(inode->i_sb); reiserfs_write_unlock_once(inode->i_sb, lock_depth);
reiserfs_check_path(&path); reiserfs_check_path(&path);
return retval; return retval;
} }
...@@ -2072,8 +2075,9 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) ...@@ -2072,8 +2075,9 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
int error; int error;
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
int err2; int err2;
int lock_depth;
reiserfs_write_lock(inode->i_sb); lock_depth = reiserfs_write_lock_once(inode->i_sb);
if (inode->i_size > 0) { if (inode->i_size > 0) {
error = grab_tail_page(inode, &page, &bh); error = grab_tail_page(inode, &page, &bh);
...@@ -2142,14 +2146,17 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) ...@@ -2142,14 +2146,17 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
page_cache_release(page); page_cache_release(page);
} }
reiserfs_write_unlock(inode->i_sb); reiserfs_write_unlock_once(inode->i_sb, lock_depth);
return 0; return 0;
out: out:
if (page) { if (page) {
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
} }
reiserfs_write_unlock(inode->i_sb);
reiserfs_write_unlock_once(inode->i_sb, lock_depth);
return error; return error;
} }
...@@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page, ...@@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
int ret; int ret;
int old_ref = 0; int old_ref = 0;
reiserfs_write_unlock(inode->i_sb);
reiserfs_wait_on_write_block(inode->i_sb); reiserfs_wait_on_write_block(inode->i_sb);
reiserfs_write_lock(inode->i_sb);
fix_tail_page_for_writing(page); fix_tail_page_for_writing(page);
if (reiserfs_transaction_running(inode->i_sb)) { if (reiserfs_transaction_running(inode->i_sb)) {
struct reiserfs_transaction_handle *th; struct reiserfs_transaction_handle *th;
...@@ -2664,6 +2674,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, ...@@ -2664,6 +2674,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
int update_sd = 0; int update_sd = 0;
struct reiserfs_transaction_handle *th; struct reiserfs_transaction_handle *th;
unsigned start; unsigned start;
int lock_depth = 0;
bool locked = false;
if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
pos ++; pos ++;
...@@ -2690,9 +2702,11 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, ...@@ -2690,9 +2702,11 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
** to do the i_size updates here. ** to do the i_size updates here.
*/ */
pos += copied; pos += copied;
if (pos > inode->i_size) { if (pos > inode->i_size) {
struct reiserfs_transaction_handle myth; struct reiserfs_transaction_handle myth;
reiserfs_write_lock(inode->i_sb); lock_depth = reiserfs_write_lock_once(inode->i_sb);
locked = true;
/* If the file have grown beyond the border where it /* If the file have grown beyond the border where it
can have a tail, unmark it as needing a tail can have a tail, unmark it as needing a tail
packing */ packing */
...@@ -2703,10 +2717,9 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, ...@@ -2703,10 +2717,9 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
ret = journal_begin(&myth, inode->i_sb, 1); ret = journal_begin(&myth, inode->i_sb, 1);
if (ret) { if (ret)
reiserfs_write_unlock(inode->i_sb);
goto journal_error; goto journal_error;
}
reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(inode);
inode->i_size = pos; inode->i_size = pos;
/* /*
...@@ -2718,34 +2731,36 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, ...@@ -2718,34 +2731,36 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
reiserfs_update_sd(&myth, inode); reiserfs_update_sd(&myth, inode);
update_sd = 1; update_sd = 1;
ret = journal_end(&myth, inode->i_sb, 1); ret = journal_end(&myth, inode->i_sb, 1);
reiserfs_write_unlock(inode->i_sb);
if (ret) if (ret)
goto journal_error; goto journal_error;
} }
if (th) { if (th) {
reiserfs_write_lock(inode->i_sb); if (!locked) {
lock_depth = reiserfs_write_lock_once(inode->i_sb);
locked = true;
}
if (!update_sd) if (!update_sd)
mark_inode_dirty(inode); mark_inode_dirty(inode);
ret = reiserfs_end_persistent_transaction(th); ret = reiserfs_end_persistent_transaction(th);
reiserfs_write_unlock(inode->i_sb);
if (ret) if (ret)
goto out; goto out;
} }
out: out:
if (locked)
reiserfs_write_unlock_once(inode->i_sb, lock_depth);
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
return ret == 0 ? copied : ret; return ret == 0 ? copied : ret;
journal_error: journal_error:
reiserfs_write_unlock_once(inode->i_sb, lock_depth);
locked = false;
if (th) { if (th) {
reiserfs_write_lock(inode->i_sb);
if (!update_sd) if (!update_sd)
reiserfs_update_sd(th, inode); reiserfs_update_sd(th, inode);
ret = reiserfs_end_persistent_transaction(th); ret = reiserfs_end_persistent_transaction(th);
reiserfs_write_unlock(inode->i_sb);
} }
goto out; goto out;
} }
...@@ -2758,7 +2773,10 @@ int reiserfs_commit_write(struct file *f, struct page *page, ...@@ -2758,7 +2773,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
int update_sd = 0; int update_sd = 0;
struct reiserfs_transaction_handle *th = NULL; struct reiserfs_transaction_handle *th = NULL;
reiserfs_write_unlock(inode->i_sb);
reiserfs_wait_on_write_block(inode->i_sb); reiserfs_wait_on_write_block(inode->i_sb);
reiserfs_write_lock(inode->i_sb);
if (reiserfs_transaction_running(inode->i_sb)) { if (reiserfs_transaction_running(inode->i_sb)) {
th = current->journal_info; th = current->journal_info;
} }
...@@ -2770,7 +2788,6 @@ int reiserfs_commit_write(struct file *f, struct page *page, ...@@ -2770,7 +2788,6 @@ int reiserfs_commit_write(struct file *f, struct page *page,
*/ */
if (pos > inode->i_size) { if (pos > inode->i_size) {
struct reiserfs_transaction_handle myth; struct reiserfs_transaction_handle myth;
reiserfs_write_lock(inode->i_sb);
/* If the file have grown beyond the border where it /* If the file have grown beyond the border where it
can have a tail, unmark it as needing a tail can have a tail, unmark it as needing a tail
packing */ packing */
...@@ -2781,10 +2798,9 @@ int reiserfs_commit_write(struct file *f, struct page *page, ...@@ -2781,10 +2798,9 @@ int reiserfs_commit_write(struct file *f, struct page *page,
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
ret = journal_begin(&myth, inode->i_sb, 1); ret = journal_begin(&myth, inode->i_sb, 1);
if (ret) { if (ret)
reiserfs_write_unlock(inode->i_sb);
goto journal_error; goto journal_error;
}
reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(inode);
inode->i_size = pos; inode->i_size = pos;
/* /*
...@@ -2796,16 +2812,13 @@ int reiserfs_commit_write(struct file *f, struct page *page, ...@@ -2796,16 +2812,13 @@ int reiserfs_commit_write(struct file *f, struct page *page,
reiserfs_update_sd(&myth, inode); reiserfs_update_sd(&myth, inode);
update_sd = 1; update_sd = 1;
ret = journal_end(&myth, inode->i_sb, 1); ret = journal_end(&myth, inode->i_sb, 1);
reiserfs_write_unlock(inode->i_sb);
if (ret) if (ret)
goto journal_error; goto journal_error;
} }
if (th) { if (th) {
reiserfs_write_lock(inode->i_sb);
if (!update_sd) if (!update_sd)
mark_inode_dirty(inode); mark_inode_dirty(inode);
ret = reiserfs_end_persistent_transaction(th); ret = reiserfs_end_persistent_transaction(th);
reiserfs_write_unlock(inode->i_sb);
if (ret) if (ret)
goto out; goto out;
} }
...@@ -2815,11 +2828,9 @@ int reiserfs_commit_write(struct file *f, struct page *page, ...@@ -2815,11 +2828,9 @@ int reiserfs_commit_write(struct file *f, struct page *page,
journal_error: journal_error:
if (th) { if (th) {
reiserfs_write_lock(inode->i_sb);
if (!update_sd) if (!update_sd)
reiserfs_update_sd(th, inode); reiserfs_update_sd(th, inode);
ret = reiserfs_end_persistent_transaction(th); ret = reiserfs_end_persistent_transaction(th);
reiserfs_write_unlock(inode->i_sb);
} }
return ret; return ret;
......
...@@ -13,44 +13,52 @@ ...@@ -13,44 +13,52 @@
#include <linux/compat.h> #include <linux/compat.h>
/* /*
** reiserfs_ioctl - handler for ioctl for inode * reiserfs_ioctl - handler for ioctl for inode
** supported commands: * supported commands:
** 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
** and prevent packing file (argument arg has to be non-zero) * and prevent packing file (argument arg has to be non-zero)
** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
** 3) That's all for a while ... * 3) That's all for a while ...
*/ */
int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
unsigned long arg)
{ {
struct inode *inode = filp->f_path.dentry->d_inode;
unsigned int flags; unsigned int flags;
int err = 0; int err = 0;
reiserfs_write_lock(inode->i_sb);
switch (cmd) { switch (cmd) {
case REISERFS_IOC_UNPACK: case REISERFS_IOC_UNPACK:
if (S_ISREG(inode->i_mode)) { if (S_ISREG(inode->i_mode)) {
if (arg) if (arg)
return reiserfs_unpack(inode, filp); err = reiserfs_unpack(inode, filp);
else
return 0;
} else } else
return -ENOTTY; err = -ENOTTY;
/* following two cases are taken from fs/ext2/ioctl.c by Remy break;
Card (card@masi.ibp.fr) */ /*
* following two cases are taken from fs/ext2/ioctl.c by Remy
* Card (card@masi.ibp.fr)
*/
case REISERFS_IOC_GETFLAGS: case REISERFS_IOC_GETFLAGS:
if (!reiserfs_attrs(inode->i_sb)) if (!reiserfs_attrs(inode->i_sb)) {
return -ENOTTY; err = -ENOTTY;
break;
}
flags = REISERFS_I(inode)->i_attrs; flags = REISERFS_I(inode)->i_attrs;
i_attrs_to_sd_attrs(inode, (__u16 *) & flags); i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
return put_user(flags, (int __user *)arg); err = put_user(flags, (int __user *)arg);
break;
case REISERFS_IOC_SETFLAGS:{ case REISERFS_IOC_SETFLAGS:{
if (!reiserfs_attrs(inode->i_sb)) if (!reiserfs_attrs(inode->i_sb)) {
return -ENOTTY; err = -ENOTTY;
break;
}
err = mnt_want_write(filp->f_path.mnt); err = mnt_want_write(filp->f_path.mnt);
if (err) if (err)
return err; break;
if (!is_owner_or_cap(inode)) { if (!is_owner_or_cap(inode)) {
err = -EPERM; err = -EPERM;
...@@ -90,16 +98,18 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, ...@@ -90,16 +98,18 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
mark_inode_dirty(inode); mark_inode_dirty(inode);
setflags_out: setflags_out:
mnt_drop_write(filp->f_path.mnt); mnt_drop_write(filp->f_path.mnt);
return err; break;
} }
case REISERFS_IOC_GETVERSION: case REISERFS_IOC_GETVERSION:
return put_user(inode->i_generation, (int __user *)arg); err = put_user(inode->i_generation, (int __user *)arg);
break;
case REISERFS_IOC_SETVERSION: case REISERFS_IOC_SETVERSION:
if (!is_owner_or_cap(inode)) if (!is_owner_or_cap(inode))
return -EPERM; err = -EPERM;
break;
err = mnt_want_write(filp->f_path.mnt); err = mnt_want_write(filp->f_path.mnt);
if (err) if (err)
return err; break;
if (get_user(inode->i_generation, (int __user *)arg)) { if (get_user(inode->i_generation, (int __user *)arg)) {
err = -EFAULT; err = -EFAULT;
goto setversion_out; goto setversion_out;
...@@ -108,19 +118,20 @@ setflags_out: ...@@ -108,19 +118,20 @@ setflags_out:
mark_inode_dirty(inode); mark_inode_dirty(inode);
setversion_out: setversion_out:
mnt_drop_write(filp->f_path.mnt); mnt_drop_write(filp->f_path.mnt);
return err; break;
default: default:
return -ENOTTY; err = -ENOTTY;
} }
reiserfs_write_unlock(inode->i_sb);
return err;
} }
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
struct inode *inode = file->f_path.dentry->d_inode;
int ret;
/* These are just misnamed, they actually get/put from/to user an int */ /* These are just misnamed, they actually get/put from/to user an int */
switch (cmd) { switch (cmd) {
case REISERFS_IOC32_UNPACK: case REISERFS_IOC32_UNPACK:
...@@ -141,10 +152,8 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, ...@@ -141,10 +152,8 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
default: default:
return -ENOIOCTLCMD; return -ENOIOCTLCMD;
} }
lock_kernel();
ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
unlock_kernel();
return ret;
} }
#endif #endif
......
...@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh) ...@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh)
clear_buffer_journal_restore_dirty(bh); clear_buffer_journal_restore_dirty(bh);
} }
/* utility function to force a BUG if it is called without the big
** kernel lock held. caller is the string printed just before calling BUG()
*/
void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
{
#ifdef CONFIG_SMP
if (current->lock_depth < 0) {
reiserfs_panic(sb, "journal-1", "%s called without kernel "
"lock held", caller);
}
#else
;
#endif
}
/* return a cnode with same dev, block number and size in table, or null if not found */ /* return a cnode with same dev, block number and size in table, or null if not found */
static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
super_block super_block
...@@ -556,7 +541,8 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, ...@@ -556,7 +541,8 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
static inline void lock_journal(struct super_block *sb) static inline void lock_journal(struct super_block *sb)
{ {
PROC_INFO_INC(sb, journal.lock_journal); PROC_INFO_INC(sb, journal.lock_journal);
mutex_lock(&SB_JOURNAL(sb)->j_mutex);
reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
} }
/* unlock the current transaction */ /* unlock the current transaction */
...@@ -708,7 +694,9 @@ static void check_barrier_completion(struct super_block *s, ...@@ -708,7 +694,9 @@ static void check_barrier_completion(struct super_block *s,
disable_barrier(s); disable_barrier(s);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
set_buffer_dirty(bh); set_buffer_dirty(bh);
reiserfs_write_unlock(s);
sync_dirty_buffer(bh); sync_dirty_buffer(bh);
reiserfs_write_lock(s);
} }
} }
...@@ -996,8 +984,13 @@ static int reiserfs_async_progress_wait(struct super_block *s) ...@@ -996,8 +984,13 @@ static int reiserfs_async_progress_wait(struct super_block *s)
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
struct reiserfs_journal *j = SB_JOURNAL(s); struct reiserfs_journal *j = SB_JOURNAL(s);
if (atomic_read(&j->j_async_throttle))
if (atomic_read(&j->j_async_throttle)) {
reiserfs_write_unlock(s);
congestion_wait(BLK_RW_ASYNC, HZ / 10); congestion_wait(BLK_RW_ASYNC, HZ / 10);
reiserfs_write_lock(s);
}
return 0; return 0;
} }
...@@ -1043,7 +1036,8 @@ static int flush_commit_list(struct super_block *s, ...@@ -1043,7 +1036,8 @@ static int flush_commit_list(struct super_block *s,
} }
/* make sure nobody is trying to flush this one at the same time */ /* make sure nobody is trying to flush this one at the same time */
mutex_lock(&jl->j_commit_mutex); reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
if (!journal_list_still_alive(s, trans_id)) { if (!journal_list_still_alive(s, trans_id)) {
mutex_unlock(&jl->j_commit_mutex); mutex_unlock(&jl->j_commit_mutex);
goto put_jl; goto put_jl;
...@@ -1061,12 +1055,17 @@ static int flush_commit_list(struct super_block *s, ...@@ -1061,12 +1055,17 @@ static int flush_commit_list(struct super_block *s,
if (!list_empty(&jl->j_bh_list)) { if (!list_empty(&jl->j_bh_list)) {
int ret; int ret;
unlock_kernel();
/*
* We might sleep in numerous places inside
* write_ordered_buffers. Relax the write lock.
*/
reiserfs_write_unlock(s);
ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
journal, jl, &jl->j_bh_list); journal, jl, &jl->j_bh_list);
if (ret < 0 && retval == 0) if (ret < 0 && retval == 0)
retval = ret; retval = ret;
lock_kernel(); reiserfs_write_lock(s);
} }
BUG_ON(!list_empty(&jl->j_bh_list)); BUG_ON(!list_empty(&jl->j_bh_list));
/* /*
...@@ -1085,8 +1084,11 @@ static int flush_commit_list(struct super_block *s, ...@@ -1085,8 +1084,11 @@ static int flush_commit_list(struct super_block *s,
SB_ONDISK_JOURNAL_SIZE(s); SB_ONDISK_JOURNAL_SIZE(s);
tbh = journal_find_get_block(s, bn); tbh = journal_find_get_block(s, bn);
if (tbh) { if (tbh) {
if (buffer_dirty(tbh)) if (buffer_dirty(tbh)) {
ll_rw_block(WRITE, 1, &tbh) ; reiserfs_write_unlock(s);
ll_rw_block(WRITE, 1, &tbh);
reiserfs_write_lock(s);
}
put_bh(tbh) ; put_bh(tbh) ;
} }
} }
...@@ -1114,12 +1116,19 @@ static int flush_commit_list(struct super_block *s, ...@@ -1114,12 +1116,19 @@ static int flush_commit_list(struct super_block *s,
bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
(jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
tbh = journal_find_get_block(s, bn); tbh = journal_find_get_block(s, bn);
reiserfs_write_unlock(s);
wait_on_buffer(tbh); wait_on_buffer(tbh);
reiserfs_write_lock(s);
// since we're using ll_rw_blk above, it might have skipped over // since we're using ll_rw_blk above, it might have skipped over
// a locked buffer. Double check here // a locked buffer. Double check here
// //
if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ /* redundant, sync_dirty_buffer() checks */
if (buffer_dirty(tbh)) {
reiserfs_write_unlock(s);
sync_dirty_buffer(tbh); sync_dirty_buffer(tbh);
reiserfs_write_lock(s);
}
if (unlikely(!buffer_uptodate(tbh))) { if (unlikely(!buffer_uptodate(tbh))) {
#ifdef CONFIG_REISERFS_CHECK #ifdef CONFIG_REISERFS_CHECK
reiserfs_warning(s, "journal-601", reiserfs_warning(s, "journal-601",
...@@ -1143,10 +1152,15 @@ static int flush_commit_list(struct super_block *s, ...@@ -1143,10 +1152,15 @@ static int flush_commit_list(struct super_block *s,
if (buffer_dirty(jl->j_commit_bh)) if (buffer_dirty(jl->j_commit_bh))
BUG(); BUG();
mark_buffer_dirty(jl->j_commit_bh) ; mark_buffer_dirty(jl->j_commit_bh) ;
reiserfs_write_unlock(s);
sync_dirty_buffer(jl->j_commit_bh) ; sync_dirty_buffer(jl->j_commit_bh) ;
reiserfs_write_lock(s);
} }
} else } else {
reiserfs_write_unlock(s);
wait_on_buffer(jl->j_commit_bh); wait_on_buffer(jl->j_commit_bh);
reiserfs_write_lock(s);
}
check_barrier_completion(s, jl->j_commit_bh); check_barrier_completion(s, jl->j_commit_bh);
...@@ -1286,7 +1300,9 @@ static int _update_journal_header_block(struct super_block *sb, ...@@ -1286,7 +1300,9 @@ static int _update_journal_header_block(struct super_block *sb,
if (trans_id >= journal->j_last_flush_trans_id) { if (trans_id >= journal->j_last_flush_trans_id) {
if (buffer_locked((journal->j_header_bh))) { if (buffer_locked((journal->j_header_bh))) {
reiserfs_write_unlock(sb);
wait_on_buffer((journal->j_header_bh)); wait_on_buffer((journal->j_header_bh));
reiserfs_write_lock(sb);
if (unlikely(!buffer_uptodate(journal->j_header_bh))) { if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
#ifdef CONFIG_REISERFS_CHECK #ifdef CONFIG_REISERFS_CHECK
reiserfs_warning(sb, "journal-699", reiserfs_warning(sb, "journal-699",
...@@ -1312,12 +1328,16 @@ static int _update_journal_header_block(struct super_block *sb, ...@@ -1312,12 +1328,16 @@ static int _update_journal_header_block(struct super_block *sb,
disable_barrier(sb); disable_barrier(sb);
goto sync; goto sync;
} }
reiserfs_write_unlock(sb);
wait_on_buffer(journal->j_header_bh); wait_on_buffer(journal->j_header_bh);
reiserfs_write_lock(sb);
check_barrier_completion(sb, journal->j_header_bh); check_barrier_completion(sb, journal->j_header_bh);
} else { } else {
sync: sync:
set_buffer_dirty(journal->j_header_bh); set_buffer_dirty(journal->j_header_bh);
reiserfs_write_unlock(sb);
sync_dirty_buffer(journal->j_header_bh); sync_dirty_buffer(journal->j_header_bh);
reiserfs_write_lock(sb);
} }
if (!buffer_uptodate(journal->j_header_bh)) { if (!buffer_uptodate(journal->j_header_bh)) {
reiserfs_warning(sb, "journal-837", reiserfs_warning(sb, "journal-837",
...@@ -1409,7 +1429,7 @@ static int flush_journal_list(struct super_block *s, ...@@ -1409,7 +1429,7 @@ static int flush_journal_list(struct super_block *s,
/* if flushall == 0, the lock is already held */ /* if flushall == 0, the lock is already held */
if (flushall) { if (flushall) {
mutex_lock(&journal->j_flush_mutex); reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
} else if (mutex_trylock(&journal->j_flush_mutex)) { } else if (mutex_trylock(&journal->j_flush_mutex)) {
BUG(); BUG();
} }
...@@ -1553,7 +1573,11 @@ static int flush_journal_list(struct super_block *s, ...@@ -1553,7 +1573,11 @@ static int flush_journal_list(struct super_block *s,
reiserfs_panic(s, "journal-1011", reiserfs_panic(s, "journal-1011",
"cn->bh is NULL"); "cn->bh is NULL");
} }
reiserfs_write_unlock(s);
wait_on_buffer(cn->bh); wait_on_buffer(cn->bh);
reiserfs_write_lock(s);
if (!cn->bh) { if (!cn->bh) {
reiserfs_panic(s, "journal-1012", reiserfs_panic(s, "journal-1012",
"cn->bh is NULL"); "cn->bh is NULL");
...@@ -1769,7 +1793,7 @@ static int kupdate_transactions(struct super_block *s, ...@@ -1769,7 +1793,7 @@ static int kupdate_transactions(struct super_block *s,
struct reiserfs_journal *journal = SB_JOURNAL(s); struct reiserfs_journal *journal = SB_JOURNAL(s);
chunk.nr = 0; chunk.nr = 0;
mutex_lock(&journal->j_flush_mutex); reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
if (!journal_list_still_alive(s, orig_trans_id)) { if (!journal_list_still_alive(s, orig_trans_id)) {
goto done; goto done;
} }
...@@ -1973,11 +1997,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, ...@@ -1973,11 +1997,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
reiserfs_mounted_fs_count--; reiserfs_mounted_fs_count--;
/* wait for all commits to finish */ /* wait for all commits to finish */
cancel_delayed_work(&SB_JOURNAL(sb)->j_work); cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
/*
* We must release the write lock here because
* the workqueue job (flush_async_commit) needs this lock
*/
reiserfs_write_unlock(sb);
flush_workqueue(commit_wq); flush_workqueue(commit_wq);
if (!reiserfs_mounted_fs_count) { if (!reiserfs_mounted_fs_count) {
destroy_workqueue(commit_wq); destroy_workqueue(commit_wq);
commit_wq = NULL; commit_wq = NULL;
} }
reiserfs_write_lock(sb);
free_journal_ram(sb); free_journal_ram(sb);
...@@ -2243,7 +2275,11 @@ static int journal_read_transaction(struct super_block *sb, ...@@ -2243,7 +2275,11 @@ static int journal_read_transaction(struct super_block *sb,
/* read in the log blocks, memcpy to the corresponding real block */ /* read in the log blocks, memcpy to the corresponding real block */
ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
for (i = 0; i < get_desc_trans_len(desc); i++) { for (i = 0; i < get_desc_trans_len(desc); i++) {
reiserfs_write_unlock(sb);
wait_on_buffer(log_blocks[i]); wait_on_buffer(log_blocks[i]);
reiserfs_write_lock(sb);
if (!buffer_uptodate(log_blocks[i])) { if (!buffer_uptodate(log_blocks[i])) {
reiserfs_warning(sb, "journal-1212", reiserfs_warning(sb, "journal-1212",
"REPLAY FAILURE fsck required! " "REPLAY FAILURE fsck required! "
...@@ -2765,11 +2801,27 @@ int journal_init(struct super_block *sb, const char *j_dev_name, ...@@ -2765,11 +2801,27 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
goto free_and_return; goto free_and_return;
} }
/*
* We need to unlock here to avoid creating the following
* dependency:
* reiserfs_lock -> sysfs_mutex
* Because the reiserfs mmap path creates the following dependency:
* mm->mmap -> reiserfs_lock, hence we have
* mm->mmap -> reiserfs_lock ->sysfs_mutex
* This would ends up in a circular dependency with sysfs readdir path
* which does sysfs_mutex -> mm->mmap_sem
* This is fine because the reiserfs lock is useless in mount path,
* at least until we call journal_begin. We keep it for paranoid
* reasons.
*/
reiserfs_write_unlock(sb);
if (journal_init_dev(sb, journal, j_dev_name) != 0) { if (journal_init_dev(sb, journal, j_dev_name) != 0) {
reiserfs_write_lock(sb);
reiserfs_warning(sb, "sh-462", reiserfs_warning(sb, "sh-462",
"unable to initialize jornal device"); "unable to initialize jornal device");
goto free_and_return; goto free_and_return;
} }
reiserfs_write_lock(sb);
rs = SB_DISK_SUPER_BLOCK(sb); rs = SB_DISK_SUPER_BLOCK(sb);
...@@ -2881,8 +2933,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name, ...@@ -2881,8 +2933,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
} }
reiserfs_mounted_fs_count++; reiserfs_mounted_fs_count++;
if (reiserfs_mounted_fs_count <= 1) if (reiserfs_mounted_fs_count <= 1) {
reiserfs_write_unlock(sb);
commit_wq = create_workqueue("reiserfs"); commit_wq = create_workqueue("reiserfs");
reiserfs_write_lock(sb);
}
INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
journal->j_work_sb = sb; journal->j_work_sb = sb;
...@@ -2964,8 +3019,11 @@ static void queue_log_writer(struct super_block *s) ...@@ -2964,8 +3019,11 @@ static void queue_log_writer(struct super_block *s)
init_waitqueue_entry(&wait, current); init_waitqueue_entry(&wait, current);
add_wait_queue(&journal->j_join_wait, &wait); add_wait_queue(&journal->j_join_wait, &wait);
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
reiserfs_write_unlock(s);
schedule(); schedule();
reiserfs_write_lock(s);
}
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
remove_wait_queue(&journal->j_join_wait, &wait); remove_wait_queue(&journal->j_join_wait, &wait);
} }
...@@ -2982,7 +3040,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) ...@@ -2982,7 +3040,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
struct reiserfs_journal *journal = SB_JOURNAL(sb); struct reiserfs_journal *journal = SB_JOURNAL(sb);
unsigned long bcount = journal->j_bcount; unsigned long bcount = journal->j_bcount;
while (1) { while (1) {
reiserfs_write_unlock(sb);
schedule_timeout_uninterruptible(1); schedule_timeout_uninterruptible(1);
reiserfs_write_lock(sb);
journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
while ((atomic_read(&journal->j_wcount) > 0 || while ((atomic_read(&journal->j_wcount) > 0 ||
atomic_read(&journal->j_jlock)) && atomic_read(&journal->j_jlock)) &&
...@@ -3033,7 +3093,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, ...@@ -3033,7 +3093,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
unlock_journal(sb); unlock_journal(sb);
reiserfs_write_unlock(sb);
reiserfs_wait_on_write_block(sb); reiserfs_wait_on_write_block(sb);
reiserfs_write_lock(sb);
PROC_INFO_INC(sb, journal.journal_relock_writers); PROC_INFO_INC(sb, journal.journal_relock_writers);
goto relock; goto relock;
} }
...@@ -3506,14 +3568,14 @@ static void flush_async_commits(struct work_struct *work) ...@@ -3506,14 +3568,14 @@ static void flush_async_commits(struct work_struct *work)
struct reiserfs_journal_list *jl; struct reiserfs_journal_list *jl;
struct list_head *entry; struct list_head *entry;
lock_kernel(); reiserfs_write_lock(sb);
if (!list_empty(&journal->j_journal_list)) { if (!list_empty(&journal->j_journal_list)) {
/* last entry is the youngest, commit it and you get everything */ /* last entry is the youngest, commit it and you get everything */
entry = journal->j_journal_list.prev; entry = journal->j_journal_list.prev;
jl = JOURNAL_LIST_ENTRY(entry); jl = JOURNAL_LIST_ENTRY(entry);
flush_commit_list(sb, jl, 1); flush_commit_list(sb, jl, 1);
} }
unlock_kernel(); reiserfs_write_unlock(sb);
} }
/* /*
...@@ -4041,7 +4103,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, ...@@ -4041,7 +4103,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
* the new transaction is fully setup, and we've already flushed the * the new transaction is fully setup, and we've already flushed the
* ordered bh list * ordered bh list
*/ */
mutex_lock(&jl->j_commit_mutex); reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
/* save the transaction id in case we need to commit it later */ /* save the transaction id in case we need to commit it later */
commit_trans_id = jl->j_trans_id; commit_trans_id = jl->j_trans_id;
...@@ -4156,7 +4218,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, ...@@ -4156,7 +4218,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
next = cn->next; next = cn->next;
free_cnode(sb, cn); free_cnode(sb, cn);
cn = next; cn = next;
reiserfs_write_unlock(sb);
cond_resched(); cond_resched();
reiserfs_write_lock(sb);
} }
/* we are done with both the c_bh and d_bh, but /* we are done with both the c_bh and d_bh, but
...@@ -4203,10 +4267,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, ...@@ -4203,10 +4267,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
* is lost. * is lost.
*/ */
if (!list_empty(&jl->j_tail_bh_list)) { if (!list_empty(&jl->j_tail_bh_list)) {
unlock_kernel(); reiserfs_write_unlock(sb);
write_ordered_buffers(&journal->j_dirty_buffers_lock, write_ordered_buffers(&journal->j_dirty_buffers_lock,
journal, jl, &jl->j_tail_bh_list); journal, jl, &jl->j_tail_bh_list);
lock_kernel(); reiserfs_write_lock(sb);
} }
BUG_ON(!list_empty(&jl->j_tail_bh_list)); BUG_ON(!list_empty(&jl->j_tail_bh_list));
mutex_unlock(&jl->j_commit_mutex); mutex_unlock(&jl->j_commit_mutex);
......
#include <linux/reiserfs_fs.h>
#include <linux/mutex.h>
/*
* The previous reiserfs locking scheme was heavily based on
* the tricky properties of the Bkl:
*
* - it was acquired recursively by a same task
* - the performances relied on the release-while-schedule() property
*
* Now that we replace it by a mutex, we still want to keep the same
* recursive property to avoid big changes in the code structure.
* We use our own lock_owner here because the owner field on a mutex
* is only available in SMP or mutex debugging, also we only need this field
* for this mutex, no need for a system wide mutex facility.
*
* Also this lock is often released before a call that could block because
* reiserfs performances were partialy based on the release while schedule()
* property of the Bkl.
*/
void reiserfs_write_lock(struct super_block *s)
{
struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
if (sb_i->lock_owner != current) {
mutex_lock(&sb_i->lock);
sb_i->lock_owner = current;
}
/* No need to protect it, only the current task touches it */
sb_i->lock_depth++;
}
void reiserfs_write_unlock(struct super_block *s)
{
struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
/*
* Are we unlocking without even holding the lock?
* Such a situation must raise a BUG() if we don't want
* to corrupt the data.
*/
BUG_ON(sb_i->lock_owner != current);
if (--sb_i->lock_depth == -1) {
sb_i->lock_owner = NULL;
mutex_unlock(&sb_i->lock);
}
}
/*
* If we already own the lock, just exit and don't increase the depth.
* Useful when we don't want to lock more than once.
*
* We always return the lock_depth we had before calling
* this function.
*/
int reiserfs_write_lock_once(struct super_block *s)
{
struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
if (sb_i->lock_owner != current) {
mutex_lock(&sb_i->lock);
sb_i->lock_owner = current;
return sb_i->lock_depth++;
}
return sb_i->lock_depth;
}
void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
{
if (lock_depth == -1)
reiserfs_write_unlock(s);
}
/*
* Utility function to force a BUG if it is called without the superblock
* write lock held. caller is the string printed just before calling BUG()
*/
void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
{
struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
if (sb_i->lock_depth < 0)
reiserfs_panic(sb, "%s called without kernel lock held %d",
caller);
}
...@@ -324,6 +324,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, ...@@ -324,6 +324,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd) struct nameidata *nd)
{ {
int retval; int retval;
int lock_depth;
struct inode *inode = NULL; struct inode *inode = NULL;
struct reiserfs_dir_entry de; struct reiserfs_dir_entry de;
INITIALIZE_PATH(path_to_entry); INITIALIZE_PATH(path_to_entry);
...@@ -331,7 +332,13 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, ...@@ -331,7 +332,13 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
return ERR_PTR(-ENAMETOOLONG); return ERR_PTR(-ENAMETOOLONG);
reiserfs_write_lock(dir->i_sb); /*
* Might be called with or without the write lock, must be careful
* to not recursively hold it in case we want to release the lock
* before rescheduling.
*/
lock_depth = reiserfs_write_lock_once(dir->i_sb);
de.de_gen_number_bit_string = NULL; de.de_gen_number_bit_string = NULL;
retval = retval =
reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
...@@ -341,7 +348,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, ...@@ -341,7 +348,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
inode = reiserfs_iget(dir->i_sb, inode = reiserfs_iget(dir->i_sb,
(struct cpu_key *)&(de.de_dir_id)); (struct cpu_key *)&(de.de_dir_id));
if (!inode || IS_ERR(inode)) { if (!inode || IS_ERR(inode)) {
reiserfs_write_unlock(dir->i_sb); reiserfs_write_unlock_once(dir->i_sb, lock_depth);
return ERR_PTR(-EACCES); return ERR_PTR(-EACCES);
} }
...@@ -350,7 +357,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, ...@@ -350,7 +357,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_PRIVATE(dir)) if (IS_PRIVATE(dir))
inode->i_flags |= S_PRIVATE; inode->i_flags |= S_PRIVATE;
} }
reiserfs_write_unlock(dir->i_sb); reiserfs_write_unlock_once(dir->i_sb, lock_depth);
if (retval == IO_ERROR) { if (retval == IO_ERROR) {
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
...@@ -725,6 +732,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ...@@ -725,6 +732,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct inode *inode; struct inode *inode;
struct reiserfs_transaction_handle th; struct reiserfs_transaction_handle th;
struct reiserfs_security_handle security; struct reiserfs_security_handle security;
int lock_depth;
/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
int jbegin_count = int jbegin_count =
JOURNAL_PER_BALANCE_CNT * 3 + JOURNAL_PER_BALANCE_CNT * 3 +
...@@ -748,7 +756,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ...@@ -748,7 +756,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return retval; return retval;
} }
jbegin_count += retval; jbegin_count += retval;
reiserfs_write_lock(dir->i_sb); lock_depth = reiserfs_write_lock_once(dir->i_sb);
retval = journal_begin(&th, dir->i_sb, jbegin_count); retval = journal_begin(&th, dir->i_sb, jbegin_count);
if (retval) { if (retval) {
...@@ -798,8 +806,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ...@@ -798,8 +806,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
unlock_new_inode(inode); unlock_new_inode(inode);
retval = journal_end(&th, dir->i_sb, jbegin_count); retval = journal_end(&th, dir->i_sb, jbegin_count);
out_failed: out_failed:
reiserfs_write_unlock(dir->i_sb); reiserfs_write_unlock_once(dir->i_sb, lock_depth);
return retval; return retval;
} }
......
...@@ -349,10 +349,6 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) ...@@ -349,10 +349,6 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
. */ . */
#ifdef CONFIG_REISERFS_CHECK
extern struct tree_balance *cur_tb;
#endif
void __reiserfs_panic(struct super_block *sb, const char *id, void __reiserfs_panic(struct super_block *sb, const char *id,
const char *function, const char *fmt, ...) const char *function, const char *fmt, ...)
{ {
......
...@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) ...@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
reiserfs_write_unlock(s);
sync_dirty_buffer(bh); sync_dirty_buffer(bh);
reiserfs_write_lock(s);
// update bitmap_info stuff // update bitmap_info stuff
bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
brelse(bh); brelse(bh);
......
...@@ -222,9 +222,6 @@ static inline int bin_search(const void *key, /* Key to search for. */ ...@@ -222,9 +222,6 @@ static inline int bin_search(const void *key, /* Key to search for. */
return ITEM_NOT_FOUND; return ITEM_NOT_FOUND;
} }
#ifdef CONFIG_REISERFS_CHECK
extern struct tree_balance *cur_tb;
#endif
/* Minimal possible key. It is never in the tree. */ /* Minimal possible key. It is never in the tree. */
const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
...@@ -519,25 +516,48 @@ static int is_tree_node(struct buffer_head *bh, int level) ...@@ -519,25 +516,48 @@ static int is_tree_node(struct buffer_head *bh, int level)
#define SEARCH_BY_KEY_READA 16 #define SEARCH_BY_KEY_READA 16
/* The function is NOT SCHEDULE-SAFE! */ /*
static void search_by_key_reada(struct super_block *s, * The function is NOT SCHEDULE-SAFE!
* It might unlock the write lock if we needed to wait for a block
* to be read. Note that in this case it won't recover the lock to avoid
* high contention resulting from too much lock requests, especially
* the caller (search_by_key) will perform other schedule-unsafe
* operations just after calling this function.
*
* @return true if we have unlocked
*/
static bool search_by_key_reada(struct super_block *s,
struct buffer_head **bh, struct buffer_head **bh,
b_blocknr_t *b, int num) b_blocknr_t *b, int num)
{ {
int i, j; int i, j;
bool unlocked = false;
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
bh[i] = sb_getblk(s, b[i]); bh[i] = sb_getblk(s, b[i]);
} }
/*
* We are going to read some blocks on which we
* have a reference. It's safe, though we might be
* reading blocks concurrently changed if we release
* the lock. But it's still fine because we check later
* if the tree changed
*/
for (j = 0; j < i; j++) { for (j = 0; j < i; j++) {
/* /*
* note, this needs attention if we are getting rid of the BKL * note, this needs attention if we are getting rid of the BKL
* you have to make sure the prepared bit isn't set on this buffer * you have to make sure the prepared bit isn't set on this buffer
*/ */
if (!buffer_uptodate(bh[j])) if (!buffer_uptodate(bh[j])) {
if (!unlocked) {
reiserfs_write_unlock(s);
unlocked = true;
}
ll_rw_block(READA, 1, bh + j); ll_rw_block(READA, 1, bh + j);
}
brelse(bh[j]); brelse(bh[j]);
} }
return unlocked;
} }
/************************************************************************** /**************************************************************************
...@@ -625,11 +645,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s ...@@ -625,11 +645,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
have a pointer to it. */ have a pointer to it. */
if ((bh = last_element->pe_buffer = if ((bh = last_element->pe_buffer =
sb_getblk(sb, block_number))) { sb_getblk(sb, block_number))) {
bool unlocked = false;
if (!buffer_uptodate(bh) && reada_count > 1) if (!buffer_uptodate(bh) && reada_count > 1)
search_by_key_reada(sb, reada_bh, /* may unlock the write lock */
unlocked = search_by_key_reada(sb, reada_bh,
reada_blocks, reada_count); reada_blocks, reada_count);
/*
* If we haven't already unlocked the write lock,
* then we need to do that here before reading
* the current block
*/
if (!buffer_uptodate(bh) && !unlocked) {
reiserfs_write_unlock(sb);
unlocked = true;
}
ll_rw_block(READ, 1, &bh); ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh); wait_on_buffer(bh);
if (unlocked)
reiserfs_write_lock(sb);
if (!buffer_uptodate(bh)) if (!buffer_uptodate(bh))
goto io_error; goto io_error;
} else { } else {
...@@ -673,7 +708,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s ...@@ -673,7 +708,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
!key_in_buffer(search_path, key, sb), !key_in_buffer(search_path, key, sb),
"PAP-5130: key is not in the buffer"); "PAP-5130: key is not in the buffer");
#ifdef CONFIG_REISERFS_CHECK #ifdef CONFIG_REISERFS_CHECK
if (cur_tb) { if (REISERFS_SB(sb)->cur_tb) {
print_cur_tb("5140"); print_cur_tb("5140");
reiserfs_panic(sb, "PAP-5140", reiserfs_panic(sb, "PAP-5140",
"schedule occurred in do_balance!"); "schedule occurred in do_balance!");
...@@ -1024,7 +1059,9 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st ...@@ -1024,7 +1059,9 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
reiserfs_free_block(th, inode, block, 1); reiserfs_free_block(th, inode, block, 1);
} }
reiserfs_write_unlock(sb);
cond_resched(); cond_resched();
reiserfs_write_lock(sb);
if (item_moved (&s_ih, path)) { if (item_moved (&s_ih, path)) {
need_re_search = 1; need_re_search = 1;
......
...@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s) ...@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s)
struct reiserfs_transaction_handle th; struct reiserfs_transaction_handle th;
th.t_trans_id = 0; th.t_trans_id = 0;
lock_kernel(); reiserfs_write_lock(s);
if (s->s_dirt) if (s->s_dirt)
reiserfs_write_super(s); reiserfs_write_super(s);
...@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s) ...@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s)
reiserfs_proc_info_done(s); reiserfs_proc_info_done(s);
reiserfs_write_unlock(s);
mutex_destroy(&REISERFS_SB(s)->lock);
kfree(s->s_fs_info); kfree(s->s_fs_info);
s->s_fs_info = NULL; s->s_fs_info = NULL;
unlock_kernel();
} }
static struct kmem_cache *reiserfs_inode_cachep; static struct kmem_cache *reiserfs_inode_cachep;
...@@ -554,25 +554,28 @@ static void reiserfs_dirty_inode(struct inode *inode) ...@@ -554,25 +554,28 @@ static void reiserfs_dirty_inode(struct inode *inode)
struct reiserfs_transaction_handle th; struct reiserfs_transaction_handle th;
int err = 0; int err = 0;
int lock_depth;
if (inode->i_sb->s_flags & MS_RDONLY) { if (inode->i_sb->s_flags & MS_RDONLY) {
reiserfs_warning(inode->i_sb, "clm-6006", reiserfs_warning(inode->i_sb, "clm-6006",
"writing inode %lu on readonly FS", "writing inode %lu on readonly FS",
inode->i_ino); inode->i_ino);
return; return;
} }
reiserfs_write_lock(inode->i_sb); lock_depth = reiserfs_write_lock_once(inode->i_sb);
/* this is really only used for atime updates, so they don't have /* this is really only used for atime updates, so they don't have
** to be included in O_SYNC or fsync ** to be included in O_SYNC or fsync
*/ */
err = journal_begin(&th, inode->i_sb, 1); err = journal_begin(&th, inode->i_sb, 1);
if (err) { if (err)
reiserfs_write_unlock(inode->i_sb); goto out;
return;
}
reiserfs_update_sd(&th, inode); reiserfs_update_sd(&th, inode);
journal_end(&th, inode->i_sb, 1); journal_end(&th, inode->i_sb, 1);
reiserfs_write_unlock(inode->i_sb);
out:
reiserfs_write_unlock_once(inode->i_sb, lock_depth);
} }
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
...@@ -1168,11 +1171,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) ...@@ -1168,11 +1171,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
unsigned int qfmt = 0; unsigned int qfmt = 0;
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
int i; int i;
#endif
reiserfs_write_lock(s);
#ifdef CONFIG_QUOTA
memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
#endif #endif
lock_kernel();
rs = SB_DISK_SUPER_BLOCK(s); rs = SB_DISK_SUPER_BLOCK(s);
if (!reiserfs_parse_options if (!reiserfs_parse_options
...@@ -1295,12 +1301,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) ...@@ -1295,12 +1301,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
out_ok: out_ok:
replace_mount_options(s, new_opts); replace_mount_options(s, new_opts);
unlock_kernel(); reiserfs_write_unlock(s);
return 0; return 0;
out_err: out_err:
kfree(new_opts); kfree(new_opts);
unlock_kernel(); reiserfs_write_unlock(s);
return err; return err;
} }
...@@ -1404,7 +1410,9 @@ static int read_super_block(struct super_block *s, int offset) ...@@ -1404,7 +1410,9 @@ static int read_super_block(struct super_block *s, int offset)
static int reread_meta_blocks(struct super_block *s) static int reread_meta_blocks(struct super_block *s)
{ {
ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
reiserfs_write_unlock(s);
wait_on_buffer(SB_BUFFER_WITH_SB(s)); wait_on_buffer(SB_BUFFER_WITH_SB(s));
reiserfs_write_lock(s);
if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
reiserfs_warning(s, "reiserfs-2504", "error reading the super"); reiserfs_warning(s, "reiserfs-2504", "error reading the super");
return 1; return 1;
...@@ -1613,7 +1621,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) ...@@ -1613,7 +1621,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
if (!sbi) { if (!sbi) {
errval = -ENOMEM; errval = -ENOMEM;
goto error; goto error_alloc;
} }
s->s_fs_info = sbi; s->s_fs_info = sbi;
/* Set default values for options: non-aggressive tails, RO on errors */ /* Set default values for options: non-aggressive tails, RO on errors */
...@@ -1627,6 +1635,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) ...@@ -1627,6 +1635,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
/* setup default block allocator options */ /* setup default block allocator options */
reiserfs_init_alloc_options(s); reiserfs_init_alloc_options(s);
mutex_init(&REISERFS_SB(s)->lock);
REISERFS_SB(s)->lock_depth = -1;
/*
* This function is called with the bkl, which also was the old
* locking used here.
* do_journal_begin() will soon check if we hold the lock (ie: was the
* bkl). This is likely because do_journal_begin() has several another
* callers because at this time, it doesn't seem to be necessary to
* protect against anything.
* Anyway, let's be conservative and lock for now.
*/
reiserfs_write_lock(s);
jdev_name = NULL; jdev_name = NULL;
if (reiserfs_parse_options if (reiserfs_parse_options
(s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
...@@ -1852,9 +1874,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) ...@@ -1852,9 +1874,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
init_waitqueue_head(&(sbi->s_wait)); init_waitqueue_head(&(sbi->s_wait));
spin_lock_init(&sbi->bitmap_lock); spin_lock_init(&sbi->bitmap_lock);
reiserfs_write_unlock(s);
return (0); return (0);
error: error:
reiserfs_write_unlock(s);
error_alloc:
if (jinit_done) { /* kill the commit thread, free journal ram */ if (jinit_done) { /* kill the commit thread, free journal ram */
journal_release_error(NULL, s); journal_release_error(NULL, s);
} }
......
...@@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s) ...@@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
int err = 0; int err = 0;
/* If we don't have the privroot located yet - go find it */ /* If we don't have the privroot located yet - go find it */
mutex_lock(&s->s_root->d_inode->i_mutex); reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
strlen(PRIVROOT_NAME)); strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) { if (!IS_ERR(dentry)) {
...@@ -1004,14 +1004,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) ...@@ -1004,14 +1004,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
goto error; goto error;
if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
mutex_lock(&s->s_root->d_inode->i_mutex); reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
err = create_privroot(REISERFS_SB(s)->priv_root); err = create_privroot(REISERFS_SB(s)->priv_root);
mutex_unlock(&s->s_root->d_inode->i_mutex); mutex_unlock(&s->s_root->d_inode->i_mutex);
} }
if (privroot->d_inode) { if (privroot->d_inode) {
s->s_xattr = reiserfs_xattr_handlers; s->s_xattr = reiserfs_xattr_handlers;
mutex_lock(&privroot->d_inode->i_mutex); reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
if (!REISERFS_SB(s)->xattr_root) { if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry; struct dentry *dentry;
dentry = lookup_one_len(XAROOT_NAME, privroot, dentry = lookup_one_len(XAROOT_NAME, privroot,
......
...@@ -52,11 +52,63 @@ ...@@ -52,11 +52,63 @@
#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION #define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION
#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION #define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION
/* Locking primitives */ /*
/* Right now we are still falling back to (un)lock_kernel, but eventually that * Locking primitives. The write lock is a per superblock
would evolve into real per-fs locks */ * special mutex that has properties close to the Big Kernel Lock
#define reiserfs_write_lock( sb ) lock_kernel() * which was used in the previous locking scheme.
#define reiserfs_write_unlock( sb ) unlock_kernel() */
void reiserfs_write_lock(struct super_block *s);
void reiserfs_write_unlock(struct super_block *s);
int reiserfs_write_lock_once(struct super_block *s);
void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
/*
* Several mutexes depend on the write lock.
* However sometimes we want to relax the write lock while we hold
* these mutexes, according to the release/reacquire on schedule()
* properties of the Bkl that were used.
* Reiserfs performances and locking were based on this scheme.
* Now that the write lock is a mutex and not the bkl anymore, doing so
* may result in a deadlock:
*
* A acquire write_lock
* A acquire j_commit_mutex
* A release write_lock and wait for something
* B acquire write_lock
* B can't acquire j_commit_mutex and sleep
* A can't acquire write lock anymore
* deadlock
*
* What we do here is avoiding such deadlock by playing the same game
* than the Bkl: if we can't acquire a mutex that depends on the write lock,
* we release the write lock, wait a bit and then retry.
*
* The mutexes concerned by this hack are:
* - The commit mutex of a journal list
* - The flush mutex
* - The journal lock
* - The inode mutex
*/
static inline void reiserfs_mutex_lock_safe(struct mutex *m,
struct super_block *s)
{
reiserfs_write_unlock(s);
mutex_lock(m);
reiserfs_write_lock(s);
}
/*
* When we schedule, we usually want to also release the write lock,
* according to the previous bkl based locking scheme of reiserfs.
*/
static inline void reiserfs_cond_resched(struct super_block *s)
{
if (need_resched()) {
reiserfs_write_unlock(s);
schedule();
reiserfs_write_lock(s);
}
}
struct fid; struct fid;
...@@ -1329,7 +1381,11 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) ...@@ -1329,7 +1381,11 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
#define get_generation(s) atomic_read (&fs_generation(s)) #define get_generation(s) atomic_read (&fs_generation(s))
#define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen)
#define __fs_changed(gen,s) (gen != get_generation (s)) #define __fs_changed(gen,s) (gen != get_generation (s))
#define fs_changed(gen,s) ({cond_resched(); __fs_changed(gen, s);}) #define fs_changed(gen,s) \
({ \
reiserfs_cond_resched(s); \
__fs_changed(gen, s); \
})
/***************************************************************************/ /***************************************************************************/
/* FIXATE NODES */ /* FIXATE NODES */
...@@ -2258,8 +2314,7 @@ __u32 r5_hash(const signed char *msg, int len); ...@@ -2258,8 +2314,7 @@ __u32 r5_hash(const signed char *msg, int len);
#define SPARE_SPACE 500 #define SPARE_SPACE 500
/* prototypes from ioctl.c */ /* prototypes from ioctl.c */
int reiserfs_ioctl(struct inode *inode, struct file *filp, long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
unsigned int cmd, unsigned long arg);
long reiserfs_compat_ioctl(struct file *filp, long reiserfs_compat_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg); unsigned int cmd, unsigned long arg);
int reiserfs_unpack(struct inode *inode, struct file *filp); int reiserfs_unpack(struct inode *inode, struct file *filp);
......
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#endif #endif
typedef enum { typedef enum {
...@@ -355,6 +357,13 @@ struct reiserfs_sb_info { ...@@ -355,6 +357,13 @@ struct reiserfs_sb_info {
struct reiserfs_journal *s_journal; /* pointer to journal information */ struct reiserfs_journal *s_journal; /* pointer to journal information */
unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
/* Serialize writers access, replace the old bkl */
struct mutex lock;
/* Owner of the lock (can be recursive) */
struct task_struct *lock_owner;
/* Depth of the lock, start from -1 like the bkl */
int lock_depth;
/* Comment? -Hans */ /* Comment? -Hans */
void (*end_io_handler) (struct buffer_head *, int); void (*end_io_handler) (struct buffer_head *, int);
hashf_t s_hash_function; /* pointer to function which is used hashf_t s_hash_function; /* pointer to function which is used
...@@ -408,6 +417,17 @@ struct reiserfs_sb_info { ...@@ -408,6 +417,17 @@ struct reiserfs_sb_info {
char *s_qf_names[MAXQUOTAS]; char *s_qf_names[MAXQUOTAS];
int s_jquota_fmt; int s_jquota_fmt;
#endif #endif
#ifdef CONFIG_REISERFS_CHECK
struct tree_balance *cur_tb; /*
* Detects whether more than one
* copy of tb exists per superblock
* as a means of checking whether
* do_balance is executing concurrently
* against another tree reader/writer
* on a same mount point.
*/
#endif
}; };
/* Definitions of reiserfs on-disk properties: */ /* Definitions of reiserfs on-disk properties: */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment