Commit 7f5aa215 authored by Jan Kara's avatar Jan Kara Committed by Theodore Tso

jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate()

If we race with commit code setting i_transaction to NULL, we could
possibly dereference it.  Proper locking requires the journal pointer
(to access journal->j_list_lock), which we don't have.  So we have to
change the prototype of the function so that filesystem passes us the
journal pointer.  Also add a more detailed comment about why the
function jbd2_journal_begin_ordered_truncate() does what it does and
how it should be used.

Thanks to Dan Carpenter <error27@gmail.com> for pointing to the
suspitious code.
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
Acked-by: default avatarJoel Becker <joel.becker@oracle.com>
CC: linux-ext4@vger.kernel.org
CC: ocfs2-devel@oss.oracle.com
CC: mfasheh@suse.de
CC: Dan Carpenter <error27@gmail.com>
parent 9eddacf9
...@@ -47,7 +47,9 @@ ...@@ -47,7 +47,9 @@
static inline int ext4_begin_ordered_truncate(struct inode *inode, static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size) loff_t new_size)
{ {
return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, return jbd2_journal_begin_ordered_truncate(
EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode,
new_size); new_size);
} }
......
...@@ -2129,26 +2129,46 @@ done: ...@@ -2129,26 +2129,46 @@ done:
} }
/* /*
* This function must be called when inode is journaled in ordered mode * File truncate and transaction commit interact with each other in a
* before truncation happens. It starts writeout of truncated part in * non-trivial way. If a transaction writing data block A is
* case it is in the committing transaction so that we stand to ordered * committing, we cannot discard the data by truncate until we have
* mode consistency guarantees. * written them. Otherwise if we crashed after the transaction with
*/ * write has committed but before the transaction with truncate has
int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, * committed, we could see stale data in block A. This function is a
* helper to solve this problem. It starts writeout of the truncated
* part in case it is in the committing transaction.
*
* Filesystem code must call this function when inode is journaled in
* ordered mode before truncation happens and after the inode has been
* placed on orphan list with the new inode size. The second condition
* avoids the race that someone writes new data and we start
* committing the transaction after this function has been called but
* before a transaction for truncate is started (and furthermore it
* allows us to optimize the case where the addition to orphan list
* happens in the same transaction as write --- we don't have to write
* any data in such case).
*/
int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *jinode,
loff_t new_size) loff_t new_size)
{ {
journal_t *journal; transaction_t *inode_trans, *commit_trans;
transaction_t *commit_trans;
int ret = 0; int ret = 0;
if (!inode->i_transaction && !inode->i_next_transaction) /* This is a quick check to avoid locking if not necessary */
if (!jinode->i_transaction)
goto out; goto out;
journal = inode->i_transaction->t_journal; /* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started
* a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock); spin_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction; commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock); spin_unlock(&journal->j_state_lock);
if (inode->i_transaction == commit_trans) { spin_lock(&journal->j_list_lock);
ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, inode_trans = jinode->i_transaction;
spin_unlock(&journal->j_list_lock);
if (inode_trans == commit_trans) {
ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
new_size, LLONG_MAX); new_size, LLONG_MAX);
if (ret) if (ret)
jbd2_journal_abort(journal, ret); jbd2_journal_abort(journal, ret);
......
...@@ -513,7 +513,9 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) ...@@ -513,7 +513,9 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
static inline int ocfs2_begin_ordered_truncate(struct inode *inode, static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
loff_t new_size) loff_t new_size)
{ {
return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, return jbd2_journal_begin_ordered_truncate(
OCFS2_SB(inode->i_sb)->journal->j_journal,
&OCFS2_I(inode)->ip_jinode,
new_size); new_size);
} }
......
...@@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *); ...@@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment