Commit 3530c188 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (64 commits)
  ext4: Update documentation about quota mount options
  ext4: replace MAX_DEFRAG_SIZE with EXT_MAX_BLOCK
  ext4: Fix the alloc on close after a truncate hueristic
  ext4: Add a tracepoint for ext4_alloc_da_blocks()
  ext4: store EXT4_EXT_MIGRATE in i_state instead of i_flags
  ext4: limit block allocations for indirect-block files to < 2^32
  ext4: Fix different block exchange issue in EXT4_IOC_MOVE_EXT
  ext4: Add null extent check to ext_get_path
  ext4: Replace BUG_ON() with ext4_error() in move_extents.c
  ext4: Replace get_ext_path macro with an inline funciton
  ext4: Fix include/trace/events/ext4.h to work with Systemtap
  ext4: Fix initalization of s_flex_groups
  ext4: Always set dx_node's fake_dirent explicitly.
  ext4: Fix async commit mode to be safe by using a barrier
  ext4: Don't update superblock write time when filesystem is read-only
  ext4: Clarify the locking details in mballoc
  ext4: check for need init flag in ext4_mb_load_buddy
  ext4: move ext4_mb_init_group() function earlier in the mballoc.c
  ext4: Make non-journal fsync work properly
  ext4: Assure that metadata blocks are written during fsync in no journal mode
  ...
parents 6952b61d 1358870d
......@@ -134,15 +134,9 @@ ro Mount filesystem read only. Note that ext4 will
mount options "ro,noload" can be used to prevent
writes to the filesystem.
journal_checksum Enable checksumming of the journal transactions.
This will allow the recovery code in e2fsck and the
kernel to detect corruption in the kernel. It is a
compatible change and will be ignored by older kernels.
journal_async_commit Commit block can be written to disk without waiting
for descriptor blocks. If enabled older kernels cannot
mount the device. This will enable 'journal_checksum'
internally.
mount the device.
journal=update Update the ext4 file system's journal to the current
format.
......@@ -263,10 +257,18 @@ resuid=n The user ID which may use the reserved blocks.
sb=n Use alternate superblock at this location.
quota
noquota
grpquota
usrquota
quota These options are ignored by the filesystem. They
noquota are used only by quota tools to recognize volumes
grpquota where quota should be turned on. See documentation
usrquota in the quota-tools package for more details
(http://sourceforge.net/projects/linuxquota).
jqfmt=<quota type> These options tell filesystem details about quota
usrjquota=<file> so that quota information can be properly updated
grpjquota=<file> during journal replay. They replace the above
quota options. See documentation in the quota-tools
package for more details
(http://sourceforge.net/projects/linuxquota).
bh (*) ext4 associates buffer heads to data pages to
nobh (a) cache disk block mapping information
......
......@@ -37,7 +37,7 @@ config EXT4DEV_COMPAT
To enable backwards compatibility so that systems that are
still expecting to mount ext4 filesystems using ext4dev,
chose Y here. This feature will go away by 2.6.31, so
choose Y here. This feature will go away by 2.6.31, so
please arrange to get your userspace programs fixed!
config EXT4_FS_XATTR
......@@ -77,3 +77,12 @@ config EXT4_FS_SECURITY
If you are not using a security module that requires using
extended attributes for file security labels, say N.
config EXT4_DEBUG
bool "EXT4 debugging support"
depends on EXT4_FS
help
Enables run-time debugging support for the ext4 filesystem.
If you select Y here, then you will be able to turn on debugging
with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug"
......@@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
* new bitmap information
*/
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
ext4_mb_update_group_info(grp, blocks_freed);
grp->bb_free += blocks_freed;
up_write(&grp->alloc_sem);
/* We dirtied the bitmap block */
......
......@@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t;
/* prefer goal again. length */
#define EXT4_MB_HINT_MERGE 1
#define EXT4_MB_HINT_MERGE 0x0001
/* blocks already reserved */
#define EXT4_MB_HINT_RESERVED 2
#define EXT4_MB_HINT_RESERVED 0x0002
/* metadata is being allocated */
#define EXT4_MB_HINT_METADATA 4
#define EXT4_MB_HINT_METADATA 0x0004
/* first blocks in the file */
#define EXT4_MB_HINT_FIRST 8
#define EXT4_MB_HINT_FIRST 0x0008
/* search for the best chunk */
#define EXT4_MB_HINT_BEST 16
#define EXT4_MB_HINT_BEST 0x0010
/* data is being allocated */
#define EXT4_MB_HINT_DATA 32
#define EXT4_MB_HINT_DATA 0x0020
/* don't preallocate (for tails) */
#define EXT4_MB_HINT_NOPREALLOC 64
#define EXT4_MB_HINT_NOPREALLOC 0x0040
/* allocate for locality group */
#define EXT4_MB_HINT_GROUP_ALLOC 128
#define EXT4_MB_HINT_GROUP_ALLOC 0x0080
/* allocate goal blocks or none */
#define EXT4_MB_HINT_GOAL_ONLY 256
#define EXT4_MB_HINT_GOAL_ONLY 0x0100
/* goal is meaningful */
#define EXT4_MB_HINT_TRY_GOAL 512
#define EXT4_MB_HINT_TRY_GOAL 0x0200
/* blocks already pre-reserved by delayed allocation */
#define EXT4_MB_DELALLOC_RESERVED 1024
#define EXT4_MB_DELALLOC_RESERVED 0x0400
/* We are doing stream allocation */
#define EXT4_MB_STREAM_ALLOC 0x0800
struct ext4_allocation_request {
......@@ -111,6 +113,21 @@ struct ext4_allocation_request {
unsigned int flags;
};
/*
* For delayed allocation tracking
*/
struct mpage_da_data {
struct inode *inode;
sector_t b_blocknr; /* start block number of extent */
size_t b_size; /* size of extent */
unsigned long b_state; /* state of the extent */
unsigned long first_page, next_page; /* extent of pages */
struct writeback_control *wbc;
int io_done;
int pages_written;
int retval;
};
/*
* Special inodes numbers
*/
......@@ -251,7 +268,6 @@ struct flex_groups {
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
......@@ -289,6 +305,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
......@@ -386,6 +403,9 @@ struct ext4_mount_options {
#endif
};
/* Max physical block we can addres w/o extents */
#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
/*
* Structure of an inode on the disk
*/
......@@ -456,7 +476,6 @@ struct move_extent {
__u64 len; /* block length to be moved */
__u64 moved_len; /* moved block length */
};
#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
#define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
......@@ -694,7 +713,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
......@@ -841,6 +859,7 @@ struct ext4_sb_info {
unsigned long s_gdb_count; /* Number of group descriptor blocks */
unsigned long s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
unsigned long s_overhead_last; /* Last calculated overhead */
unsigned long s_blocks_last; /* Last seen block count */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
......@@ -950,6 +969,7 @@ struct ext4_sb_info {
atomic_t s_mb_lost_chunks;
atomic_t s_mb_preallocated;
atomic_t s_mb_discarded;
atomic_t s_lock_busy;
/* locality groups */
struct ext4_locality_group *s_locality_groups;
......@@ -1340,8 +1360,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *,
ext4_fsblk_t, unsigned long, int, unsigned long *);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
ext4_grpblk_t add);
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
ext4_group_t, int);
......@@ -1367,6 +1385,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_can_truncate(struct inode *inode);
extern void ext4_truncate(struct inode *);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern int ext4_alloc_da_blocks(struct inode *inode);
......@@ -1575,15 +1594,18 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
struct ext4_group_info {
unsigned long bb_state;
struct rb_root bb_free_root;
unsigned short bb_first_free;
unsigned short bb_free;
unsigned short bb_fragments;
ext4_grpblk_t bb_first_free; /* first free block */
ext4_grpblk_t bb_free; /* total free blocks */
ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
struct list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK
void *bb_bitmap;
#endif
struct rw_semaphore alloc_sem;
unsigned short bb_counters[];
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order.
* bb_counters[3] = 5 means
* 5 free 8-block regions. */
};
#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
......@@ -1591,15 +1613,42 @@ struct ext4_group_info {
#define EXT4_MB_GRP_NEED_INIT(grp) \
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
#define EXT4_MAX_CONTENTION 8
#define EXT4_CONTENTION_THRESHOLD 2
static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
ext4_group_t group)
{
return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
}
/*
* Returns true if the filesystem is busy enough that attempts to
* access the block group locks has run into contention.
*/
static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
{
return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
}
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{
spin_lock(ext4_group_lock_ptr(sb, group));
spinlock_t *lock = ext4_group_lock_ptr(sb, group);
if (spin_trylock(lock))
/*
* We're able to grab the lock right away, so drop the
* lock contention counter.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
else {
/*
* The lock is busy, so bump the contention counter,
* and then wait on the spin lock.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
EXT4_MAX_CONTENTION);
spin_lock(lock);
}
}
static inline void ext4_unlock_group(struct super_block *sb,
......
......@@ -43,8 +43,7 @@
#define CHECK_BINSEARCH__
/*
* If EXT_DEBUG is defined you can use the 'extdebug' mount option
* to get lots of info about what's going on.
* Turn on EXT_DEBUG to get lots of info about extents operations.
*/
#define EXT_DEBUG__
#ifdef EXT_DEBUG
......@@ -138,6 +137,7 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
#define EXT_BREAK 1
#define EXT_REPEAT 2
/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */
#define EXT_MAX_BLOCK 0xffffffff
/*
......
......@@ -44,7 +44,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
handle, err);
}
else
brelse(bh);
bforget(bh);
return err;
}
......@@ -60,7 +60,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
handle, err);
}
else
brelse(bh);
bforget(bh);
return err;
}
......@@ -89,6 +89,9 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
ext4_journal_abort_handle(where, __func__, bh,
handle, err);
} else {
if (inode && bh)
mark_buffer_dirty_inode(bh, inode);
else
mark_buffer_dirty(bh);
if (inode && inode_needs_sync(inode)) {
sync_dirty_buffer(bh);
......
This diff is collapsed.
......@@ -50,7 +50,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int ret = 0;
int err, ret = 0;
J_ASSERT(ext4_journal_current_handle() == NULL);
......@@ -79,6 +79,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
goto out;
}
if (!journal)
ret = sync_mapping_buffers(inode->i_mapping);
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
goto out;
......@@ -91,10 +94,12 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0, /* sys_fsync did this */
};
ret = sync_inode(inode, &wbc);
if (journal && (journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
err = sync_inode(inode, &wbc);
if (ret == 0)
ret = err;
}
out:
if (journal && (journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
return ret;
}
......@@ -1189,7 +1189,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
i, ext4_free_inodes_count(sb, gdp), x);
(unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
bitmap_count += x;
}
brelse(bitmap_bh);
......
This diff is collapsed.
......@@ -243,10 +243,9 @@ setversion_out:
me.donor_start, me.len, &me.moved_len);
fput(donor_filp);
if (!err)
if (copy_to_user((struct move_extent *)arg,
&me, sizeof(me)))
if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
return -EFAULT;
return err;
}
......
This diff is collapsed.
......@@ -37,11 +37,19 @@
/*
*/
#define MB_DEBUG__
#ifdef MB_DEBUG
#define mb_debug(fmt, a...) printk(fmt, ##a)
#ifdef CONFIG_EXT4_DEBUG
extern u8 mb_enable_debug;
#define mb_debug(n, fmt, a...) \
do { \
if ((n) <= mb_enable_debug) { \
printk(KERN_DEBUG "(%s, %d): %s: ", \
__FILE__, __LINE__, __func__); \
printk(fmt, ## a); \
} \
} while (0)
#else
#define mb_debug(fmt, a...)
#define mb_debug(n, fmt, a...)
#endif
/*
......@@ -128,8 +136,8 @@ struct ext4_prealloc_space {
unsigned pa_deleted;
ext4_fsblk_t pa_pstart; /* phys. block */
ext4_lblk_t pa_lstart; /* log. block */
unsigned short pa_len; /* len of preallocated chunk */
unsigned short pa_free; /* how many blocks are free */
ext4_grpblk_t pa_len; /* len of preallocated chunk */
ext4_grpblk_t pa_free; /* how many blocks are free */
unsigned short pa_type; /* pa type. inode or group */
spinlock_t *pa_obj_lock;
struct inode *pa_inode; /* hack, for history only */
......@@ -144,7 +152,7 @@ struct ext4_free_extent {
ext4_lblk_t fe_logical;
ext4_grpblk_t fe_start;
ext4_group_t fe_group;
int fe_len;
ext4_grpblk_t fe_len;
};
/*
......
......@@ -353,17 +353,16 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
down_write(&EXT4_I(inode)->i_data_sem);
/*
* if EXT4_EXT_MIGRATE is cleared a block allocation
* if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
* happened after we started the migrate. We need to
* fail the migrate
*/
if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) {
retval = -EAGAIN;
up_write(&EXT4_I(inode)->i_data_sem);
goto err_out;
} else
EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
~EXT4_EXT_MIGRATE;
EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
/*
* We have the extent map build with the tmp inode.
* Now copy the i_data across
......@@ -517,14 +516,15 @@ int ext4_ext_migrate(struct inode *inode)
* when we add extents we extent the journal
*/
/*
* Even though we take i_mutex we can still cause block allocation
* via mmap write to holes. If we have allocated new blocks we fail
* migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
* The flag is updated with i_data_sem held to prevent racing with
* block allocation.
* Even though we take i_mutex we can still cause block
* allocation via mmap write to holes. If we have allocated
* new blocks we fail migrate. New block allocation will
* clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated
* with i_data_sem held to prevent racing with block
* allocation.
*/
down_read((&EXT4_I(inode)->i_data_sem));
EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE;
up_read((&EXT4_I(inode)->i_data_sem));
handle = ext4_journal_start(inode, 1);
......@@ -618,7 +618,7 @@ err_out:
tmp_inode->i_nlink = 0;
ext4_journal_stop(handle);
unlock_new_inode(tmp_inode);
iput(tmp_inode);
return retval;
......
This diff is collapsed.
......@@ -1518,8 +1518,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval;
if (blocks == 1 && !dx_fallback &&
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
return make_indexed_dir(handle, dentry, inode, bh);
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
retval = make_indexed_dir(handle, dentry, inode, bh);
if (retval == -ENOSPC)
brelse(bh);
return retval;
}
brelse(bh);
}
bh = ext4_append(handle, dir, &block, &retval);
......@@ -1528,7 +1532,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *) bh->b_data;
de->inode = 0;
de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
return add_dirent_to_buf(handle, dentry, inode, de, bh);
retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
if (retval == -ENOSPC)
brelse(bh);
return retval;
}
/*
......@@ -1590,9 +1597,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
goto cleanup;
node2 = (struct dx_node *)(bh2->b_data);
entries2 = node2->entries;
memset(&node2->fake, 0, sizeof(struct fake_dirent));
node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
sb->s_blocksize);
node2->fake.inode = 0;
BUFFER_TRACE(frame->bh, "get_write_access");
err = ext4_journal_get_write_access(handle, frame->bh);
if (err)
......@@ -1657,6 +1664,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (!de)
goto cleanup;
err = add_dirent_to_buf(handle, dentry, inode, de, bh);
if (err != -ENOSPC)
bh = NULL;
goto cleanup;
......@@ -2310,7 +2318,7 @@ static int ext4_link(struct dentry *old_dentry,
struct inode *inode = old_dentry->d_inode;
int err, retries = 0;
if (EXT4_DIR_LINK_MAX(inode))
if (inode->i_nlink >= EXT4_LINK_MAX)
return -EMLINK;
/*
......@@ -2413,7 +2421,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename;
retval = -EMLINK;
if (!new_inode && new_dir != old_dir &&
new_dir->i_nlink >= EXT4_LINK_MAX)
EXT4_DIR_LINK_MAX(new_dir))
goto end_rename;
}
if (!new_bh) {
......
......@@ -746,7 +746,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
struct inode *inode = NULL;
handle_t *handle;
int gdb_off, gdb_num;
int num_grp_locked = 0;
int err, err2;
gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
......@@ -856,7 +855,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
* using the new disk blocks.
*/
num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)((char *)primary->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
......@@ -875,10 +873,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
* descriptor
*/
err = ext4_mb_add_groupinfo(sb, input->group, gdp);
if (err) {
ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
if (err)
goto exit_journal;
}
/*
* Make the new blocks and inodes valid next. We do this before
......@@ -920,7 +916,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
/* Update the global fs size fields */
sbi->s_groups_count++;
ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
ext4_handle_dirty_metadata(handle, NULL, primary);
......
......@@ -45,6 +45,7 @@
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "mballoc.h"
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
......@@ -344,7 +345,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
errstr = "Out of memory";
break;
case -EROFS:
if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
if (!sb || (EXT4_SB(sb)->s_journal &&
EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
errstr = "Journal has aborted";
else
errstr = "Readonly filesystem";
......@@ -1279,11 +1281,9 @@ static int parse_options(char *options, struct super_block *sb,
*journal_devnum = option;
break;
case Opt_journal_checksum:
set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
break;
break; /* Kept for backwards compatibility */
case Opt_journal_async_commit:
set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
break;
case Opt_noload:
set_opt(sbi->s_mount_opt, NOLOAD);
......@@ -1695,12 +1695,12 @@ static int ext4_fill_flex_info(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
flex_group = ext4_flex_group(sbi, i);
atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
ext4_free_inodes_count(sb, gdp));
atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
ext4_free_blks_count(sb, gdp));
atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
ext4_used_dirs_count(sb, gdp));
atomic_add(ext4_free_inodes_count(sb, gdp),
&sbi->s_flex_groups[flex_group].free_inodes);
atomic_add(ext4_free_blks_count(sb, gdp),
&sbi->s_flex_groups[flex_group].free_blocks);
atomic_add(ext4_used_dirs_count(sb, gdp),
&sbi->s_flex_groups[flex_group].used_dirs);
}
return 1;
......@@ -2253,6 +2253,49 @@ static struct kobj_type ext4_ktype = {
.release = ext4_sb_release,
};
/*
* Check whether this filesystem can be mounted based on
* the features present and the RDONLY/RDWR mount requested.
* Returns 1 if this filesystem can be mounted as requested,
* 0 if it cannot be.
*/
static int ext4_feature_set_ok(struct super_block *sb, int readonly)
{
if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
ext4_msg(sb, KERN_ERR,
"Couldn't mount because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
~EXT4_FEATURE_INCOMPAT_SUPP));
return 0;
}
if (readonly)
return 1;
/* Check that feature set is OK for a read-write mount */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
~EXT4_FEATURE_RO_COMPAT_SUPP));
return 0;
}
/*
* Large file size enabled file system can only be mounted
* read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
*/
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
if (sizeof(blkcnt_t) < sizeof(u64)) {
ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
"cannot be mounted RDWR without "
"CONFIG_LBDAF");
return 0;
}
}
return 1;
}
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock)
__acquires(kernel_lock)
......@@ -2274,7 +2317,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
unsigned int db_count;
unsigned int i;
int needs_recovery, has_huge_files;
int features;
__u64 blocks_count;
int err;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
......@@ -2401,39 +2443,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* previously didn't change the revision level when setting the flags,
* so there is a chance incompat flags are set on a rev 0 filesystem.
*/
features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
if (features) {
ext4_msg(sb, KERN_ERR,
"Couldn't mount because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
~EXT4_FEATURE_INCOMPAT_SUPP));
goto failed_mount;
}
features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
if (!(sb->s_flags & MS_RDONLY) && features) {
ext4_msg(sb, KERN_ERR,
"Couldn't mount RDWR because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
~EXT4_FEATURE_RO_COMPAT_SUPP));
if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
goto failed_mount;
}
has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
if (has_huge_files) {
/*
* Large file size enabled file system can only be
* mount if kernel is build with CONFIG_LBDAF
*/
if (sizeof(root->i_blocks) < sizeof(u64) &&
!(sb->s_flags & MS_RDONLY)) {
ext4_msg(sb, KERN_ERR, "Filesystem with huge "
"files cannot be mounted read-write "
"without CONFIG_LBDAF");
goto failed_mount;
}
}
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
......@@ -2469,6 +2481,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
}
has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
has_huge_files);
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
......@@ -2549,12 +2563,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
if (ext4_blocks_count(es) >
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
/*
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
if ((ext4_blocks_count(es) >
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
(ext4_blocks_count(es) >
(pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely");
" too large to mount safely on this system");
if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
ret = -EFBIG;
goto failed_mount;
}
......@@ -2595,6 +2616,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
sbi->s_groups_count = blocks_count;
sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
......@@ -2729,20 +2752,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount4;
}
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
jbd2_journal_set_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
jbd2_journal_set_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
jbd2_journal_clear_features(sbi->s_journal, 0, 0,
if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
jbd2_journal_set_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else {
jbd2_journal_clear_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
else
jbd2_journal_clear_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
}
/* We have now updated the journal if required, so we can
* validate the data journaling mode. */
......@@ -3208,6 +3225,17 @@ static int ext4_commit_super(struct super_block *sb, int sync)
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
}
/*
* If the file system is mounted read-only, don't update the
* superblock write time. This avoids updating the superblock
* write time when we are mounting the root file system
* read/only but we need to replay the journal; at that point,
* for people who are east of GMT and who make their clock
* tick in localtime for Windows bug-for-bug compatibility,
* the clock is set in the future, and this will cause e2fsck
* to complain and force a full file system check.
*/
if (!(sb->s_flags & MS_RDONLY))
es->s_wtime = cpu_to_le32(get_seconds());
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
......@@ -3477,18 +3505,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal)
ext4_mark_recovery_complete(sb, es);
} else {
int ret;
if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
~EXT4_FEATURE_RO_COMPAT_SUPP))) {
ext4_msg(sb, KERN_WARNING, "couldn't "
"remount RDWR because of unsupported "
"optional features (%x)",
(le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
~EXT4_FEATURE_RO_COMPAT_SUPP));
/* Make sure we can mount this feature set readwrite */
if (!ext4_feature_set_ok(sb, 0)) {
err = -EROFS;
goto restore_opts;
}
/*
* Make sure the group descriptor checksums
* are sane. If they aren't, refuse to remount r/w.
......
......@@ -810,12 +810,23 @@ inserted:
get_bh(new_bh);
} else {
/* We need to allocate a new block */
ext4_fsblk_t goal = ext4_group_first_block_no(sb,
ext4_fsblk_t goal, block;
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode,
/* non-extent files can't have physical blocks past 2^32 */
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
block = ext4_new_meta_blocks(handle, inode,
goal, NULL, &error);
if (error)
goto cleanup;
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
ea_idebug(inode, "creating block %d", block);
new_bh = sb_getblk(sb, block);
......
......@@ -25,6 +25,7 @@
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <trace/events/jbd2.h>
/*
......@@ -711,6 +712,8 @@ start_journal_io:
&cbh, crc32_sum);
if (err)
__jbd2_journal_abort_hard(journal);
if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_dev, NULL);
}
/*
......
......@@ -1187,6 +1187,12 @@ static int journal_reset(journal_t *journal)
first = be32_to_cpu(sb->s_first);
last = be32_to_cpu(sb->s_maxlen);
if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n",
first, last);
journal_fail_superblock(journal);
return -EINVAL;
}
journal->j_first = first;
journal->j_last = last;
......
......@@ -57,7 +57,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
INIT_LIST_HEAD(&transaction->t_private_list);
/* Set up the commit timer for the new transaction. */
journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
add_timer(&journal->j_commit_timer);
J_ASSERT(journal->j_running_transaction == NULL);
......@@ -238,6 +238,8 @@ repeat_locked:
__jbd2_log_space_left(journal));
spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock);
lock_map_acquire(&handle->h_lockdep_map);
out:
if (unlikely(new_transaction)) /* It's usually NULL */
kfree(new_transaction);
......@@ -303,8 +305,6 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
handle = ERR_PTR(err);
goto out;
}
lock_map_acquire(&handle->h_lockdep_map);
out:
return handle;
}
......@@ -426,6 +426,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
__jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock);
lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks;
ret = start_this_handle(journal, handle);
return ret;
......
......@@ -652,7 +652,7 @@ struct transaction_s
* This transaction is being forced and some process is
* waiting for it to finish.
*/
int t_synchronous_commit:1;
unsigned int t_synchronous_commit:1;
/*
* For use by the filesystem to store fs-specific data
......
This diff is collapsed.
......@@ -159,7 +159,7 @@ TRACE_EVENT(jbd2_submit_inode_data,
),
TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino)
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
);
#endif /* _TRACE_JBD2_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment