Commit 3512a79d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: Fix NULL dereference in ext4_ext_migrate()'s error handling
  ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages
  ext4: Initialize preallocation list_head's properly
  ext4: Fix lockdep warning
  ext4: Fix to read empty directory blocks correctly in 64k
  jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate()
  Revert "ext4: wait on all pending commits in ext4_sync_fs()"
  jbd2: Fix return value of jbd2_journal_start_commit()
parents 39a65762 09054264
...@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen) ...@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
{ {
unsigned len = le16_to_cpu(dlen); unsigned len = le16_to_cpu(dlen);
if (len == EXT4_MAX_REC_LEN) if (len == EXT4_MAX_REC_LEN || len == 0)
return 1 << 16; return 1 << 16;
return len; return len;
} }
......
...@@ -47,8 +47,10 @@ ...@@ -47,8 +47,10 @@
static inline int ext4_begin_ordered_truncate(struct inode *inode, static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size) loff_t new_size)
{ {
return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, return jbd2_journal_begin_ordered_truncate(
new_size); EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode,
new_size);
} }
static void ext4_invalidatepage(struct page *page, unsigned long offset); static void ext4_invalidatepage(struct page *page, unsigned long offset);
...@@ -2437,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2437,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping,
int no_nrwrite_index_update; int no_nrwrite_index_update;
int pages_written = 0; int pages_written = 0;
long pages_skipped; long pages_skipped;
int range_cyclic, cycled = 1, io_done = 0;
int needed_blocks, ret = 0, nr_to_writebump = 0; int needed_blocks, ret = 0, nr_to_writebump = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
...@@ -2488,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2488,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping,
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1; range_whole = 1;
if (wbc->range_cyclic) range_cyclic = wbc->range_cyclic;
if (wbc->range_cyclic) {
index = mapping->writeback_index; index = mapping->writeback_index;
else if (index)
cycled = 0;
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = LLONG_MAX;
wbc->range_cyclic = 0;
} else
index = wbc->range_start >> PAGE_CACHE_SHIFT; index = wbc->range_start >> PAGE_CACHE_SHIFT;
mpd.wbc = wbc; mpd.wbc = wbc;
...@@ -2504,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2504,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->no_nrwrite_index_update = 1; wbc->no_nrwrite_index_update = 1;
pages_skipped = wbc->pages_skipped; pages_skipped = wbc->pages_skipped;
retry:
while (!ret && wbc->nr_to_write > 0) { while (!ret && wbc->nr_to_write > 0) {
/* /*
...@@ -2546,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2546,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping,
pages_written += mpd.pages_written; pages_written += mpd.pages_written;
wbc->pages_skipped = pages_skipped; wbc->pages_skipped = pages_skipped;
ret = 0; ret = 0;
io_done = 1;
} else if (wbc->nr_to_write) } else if (wbc->nr_to_write)
/* /*
* There is no more writeout needed * There is no more writeout needed
...@@ -2554,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2554,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping,
*/ */
break; break;
} }
if (!io_done && !cycled) {
cycled = 1;
index = 0;
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = mapping->writeback_index - 1;
goto retry;
}
if (pages_skipped != wbc->pages_skipped) if (pages_skipped != wbc->pages_skipped)
printk(KERN_EMERG "This should not happen leaving %s " printk(KERN_EMERG "This should not happen leaving %s "
"with nr_to_write = %ld ret = %d\n", "with nr_to_write = %ld ret = %d\n",
...@@ -2561,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2561,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping,
/* Update index */ /* Update index */
index += pages_written; index += pages_written;
wbc->range_cyclic = range_cyclic;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
/* /*
* set the writeback_index so that range_cyclic * set the writeback_index so that range_cyclic
......
...@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) ...@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
pa->pa_free = pa->pa_len; pa->pa_free = pa->pa_len;
atomic_set(&pa->pa_count, 1); atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock); spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
INIT_LIST_HEAD(&pa->pa_group_list);
pa->pa_deleted = 0; pa->pa_deleted = 0;
pa->pa_linear = 0; pa->pa_linear = 0;
...@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) ...@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
atomic_set(&pa->pa_count, 1); atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock); spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list); INIT_LIST_HEAD(&pa->pa_inode_list);
INIT_LIST_HEAD(&pa->pa_group_list);
pa->pa_deleted = 0; pa->pa_deleted = 0;
pa->pa_linear = 1; pa->pa_linear = 1;
...@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) ...@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
pa->pa_free -= ac->ac_b_ex.fe_len; pa->pa_free -= ac->ac_b_ex.fe_len;
pa->pa_len -= ac->ac_b_ex.fe_len; pa->pa_len -= ac->ac_b_ex.fe_len;
spin_unlock(&pa->pa_lock); spin_unlock(&pa->pa_lock);
/*
* We want to add the pa to the right bucket.
* Remove it from the list and while adding
* make sure the list to which we are adding
* doesn't grow big.
*/
if (likely(pa->pa_free)) {
spin_lock(pa->pa_obj_lock);
list_del_rcu(&pa->pa_inode_list);
spin_unlock(pa->pa_obj_lock);
ext4_mb_add_n_trim(ac);
}
} }
ext4_mb_put_pa(ac, ac->ac_sb, pa);
} }
if (ac->alloc_semp) if (ac->alloc_semp)
up_read(ac->alloc_semp); up_read(ac->alloc_semp);
if (pa) {
/*
* We want to add the pa to the right bucket.
* Remove it from the list and while adding
* make sure the list to which we are adding
* doesn't grow big. We need to release
* alloc_semp before calling ext4_mb_add_n_trim()
*/
if (pa->pa_linear && likely(pa->pa_free)) {
spin_lock(pa->pa_obj_lock);
list_del_rcu(&pa->pa_inode_list);
spin_unlock(pa->pa_obj_lock);
ext4_mb_add_n_trim(ac);
}
ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->ac_bitmap_page) if (ac->ac_bitmap_page)
page_cache_release(ac->ac_bitmap_page); page_cache_release(ac->ac_bitmap_page);
if (ac->ac_buddy_page) if (ac->ac_buddy_page)
......
...@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode) ...@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
+ 1); + 1);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
retval = PTR_ERR(handle); retval = PTR_ERR(handle);
goto err_out; return retval;
} }
tmp_inode = ext4_new_inode(handle, tmp_inode = ext4_new_inode(handle,
inode->i_sb->s_root->d_inode, inode->i_sb->s_root->d_inode,
...@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode) ...@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode)
if (IS_ERR(tmp_inode)) { if (IS_ERR(tmp_inode)) {
retval = -ENOMEM; retval = -ENOMEM;
ext4_journal_stop(handle); ext4_journal_stop(handle);
tmp_inode = NULL; return retval;
goto err_out;
} }
i_size_write(tmp_inode, i_size_read(inode)); i_size_write(tmp_inode, i_size_read(inode));
/* /*
...@@ -618,8 +617,7 @@ err_out: ...@@ -618,8 +617,7 @@ err_out:
ext4_journal_stop(handle); ext4_journal_stop(handle);
if (tmp_inode) iput(tmp_inode);
iput(tmp_inode);
return retval; return retval;
} }
...@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb) ...@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb)
static int ext4_sync_fs(struct super_block *sb, int wait) static int ext4_sync_fs(struct super_block *sb, int wait)
{ {
int ret = 0; int ret = 0;
tid_t target;
trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
sb->s_dirt = 0; sb->s_dirt = 0;
if (EXT4_SB(sb)->s_journal) { if (EXT4_SB(sb)->s_journal) {
if (wait) if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
ret = ext4_force_commit(sb); &target)) {
else if (wait)
jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
target);
}
} else { } else {
ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
} }
......
...@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal) ...@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
} }
/* /*
* Called under j_state_lock. Returns true if a transaction was started. * Called under j_state_lock. Returns true if a transaction commit was started.
*/ */
int __jbd2_log_start_commit(journal_t *journal, tid_t target) int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{ {
...@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) ...@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
/* /*
* Start a commit of the current running transaction (if any). Returns true * Start a commit of the current running transaction (if any). Returns true
* if a transaction was started, and fills its tid in at *ptid * if a transaction is going to be committed (or is currently already
* committing), and fills its tid in at *ptid
*/ */
int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{ {
...@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) ...@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
if (journal->j_running_transaction) { if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid; tid_t tid = journal->j_running_transaction->t_tid;
ret = __jbd2_log_start_commit(journal, tid); __jbd2_log_start_commit(journal, tid);
if (ret && ptid) /* There's a running transaction and we've just made sure
* it's commit has been scheduled. */
if (ptid)
*ptid = tid; *ptid = tid;
} else if (journal->j_committing_transaction && ptid) { ret = 1;
} else if (journal->j_committing_transaction) {
/* /*
* If ext3_write_super() recently started a commit, then we * If ext3_write_super() recently started a commit, then we
* have to wait for completion of that transaction * have to wait for completion of that transaction
*/ */
*ptid = journal->j_committing_transaction->t_tid; if (ptid)
*ptid = journal->j_committing_transaction->t_tid;
ret = 1; ret = 1;
} }
spin_unlock(&journal->j_state_lock); spin_unlock(&journal->j_state_lock);
......
...@@ -2129,26 +2129,46 @@ done: ...@@ -2129,26 +2129,46 @@ done:
} }
/* /*
* This function must be called when inode is journaled in ordered mode * File truncate and transaction commit interact with each other in a
* before truncation happens. It starts writeout of truncated part in * non-trivial way. If a transaction writing data block A is
* case it is in the committing transaction so that we stand to ordered * committing, we cannot discard the data by truncate until we have
* mode consistency guarantees. * written them. Otherwise if we crashed after the transaction with
* write has committed but before the transaction with truncate has
* committed, we could see stale data in block A. This function is a
* helper to solve this problem. It starts writeout of the truncated
* part in case it is in the committing transaction.
*
* Filesystem code must call this function when inode is journaled in
* ordered mode before truncation happens and after the inode has been
* placed on orphan list with the new inode size. The second condition
* avoids the race that someone writes new data and we start
* committing the transaction after this function has been called but
* before a transaction for truncate is started (and furthermore it
* allows us to optimize the case where the addition to orphan list
* happens in the same transaction as write --- we don't have to write
* any data in such case).
*/ */
int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *jinode,
loff_t new_size) loff_t new_size)
{ {
journal_t *journal; transaction_t *inode_trans, *commit_trans;
transaction_t *commit_trans;
int ret = 0; int ret = 0;
if (!inode->i_transaction && !inode->i_next_transaction) /* This is a quick check to avoid locking if not necessary */
if (!jinode->i_transaction)
goto out; goto out;
journal = inode->i_transaction->t_journal; /* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started
* a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock); spin_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction; commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock); spin_unlock(&journal->j_state_lock);
if (inode->i_transaction == commit_trans) { spin_lock(&journal->j_list_lock);
ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, inode_trans = jinode->i_transaction;
spin_unlock(&journal->j_list_lock);
if (inode_trans == commit_trans) {
ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
new_size, LLONG_MAX); new_size, LLONG_MAX);
if (ret) if (ret)
jbd2_journal_abort(journal, ret); jbd2_journal_abort(journal, ret);
......
...@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) ...@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
static inline int ocfs2_begin_ordered_truncate(struct inode *inode, static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
loff_t new_size) loff_t new_size)
{ {
return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, return jbd2_journal_begin_ordered_truncate(
new_size); OCFS2_SB(inode->i_sb)->journal->j_journal,
&OCFS2_I(inode)->ip_jinode,
new_size);
} }
#endif /* OCFS2_JOURNAL_H */ #endif /* OCFS2_JOURNAL_H */
...@@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *); ...@@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment