Commit e893123c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (49 commits)
  ext4: Avoid corrupting the uninitialized bit in the extent during truncate
  ext4: Don't treat a truncation of a zero-length file as replace-via-truncate
  ext4: fix dx_map_entry to support 256k directory blocks
  ext4: truncate the file properly if we fail to copy data from userspace
  ext4: Avoid leaking blocks after a block allocation failure
  ext4: Change all super.c messages to print the device
  ext4: Get rid of EXTEND_DISKSIZE flag of ext4_get_blocks_handle()
  ext4: super.c whitespace cleanup
  jbd2: Fix minor typos in comments in fs/jbd2/journal.c
  ext4: Clean up calls to ext4_get_group_desc()
  ext4: remove unused function __ext4_write_dirty_metadata
  ext2: Fix memory leak in ext2_fill_super() in case of a failed mount
  ext3: Fix memory leak in ext3_fill_super() in case of a failed mount
  ext4: Fix memory leak in ext4_fill_super() in case of a failed mount
  ext4: down i_data_sem only for read when walking tree for fiemap
  ext4: Add a comprehensive block validity check to ext4_get_blocks()
  ext4: Clean up ext4_get_blocks() so it does not depend on bh_result->b_state
  ext4: Merge ext4_da_get_block_write() into mpage_da_map_blocks()
  ext4: Add BUG_ON debugging checks to noalloc_get_block_write()
  ext4: Add documentation to the ext4_*get_block* functions
  ...
parents 27951daa a41f2071
...@@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh) ...@@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh)
BUG_ON(!buffer_locked(bh)); BUG_ON(!buffer_locked(bh));
BUG_ON(!buffer_mapped(bh)); BUG_ON(!buffer_mapped(bh));
BUG_ON(!bh->b_end_io); BUG_ON(!bh->b_end_io);
BUG_ON(buffer_delay(bh));
BUG_ON(buffer_unwritten(bh));
/* /*
* Mask in barrier bit for a write (could be either a WRITE or a * Mask in barrier bit for a write (could be either a WRITE or a
......
...@@ -1093,6 +1093,7 @@ failed_mount: ...@@ -1093,6 +1093,7 @@ failed_mount:
brelse(bh); brelse(bh);
failed_sbi: failed_sbi:
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi); kfree(sbi);
return ret; return ret;
} }
......
...@@ -2021,6 +2021,7 @@ failed_mount: ...@@ -2021,6 +2021,7 @@ failed_mount:
brelse(bh); brelse(bh);
out_fail: out_fail:
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi); kfree(sbi);
lock_kernel(); lock_kernel();
return ret; return ret;
......
...@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ...@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o ext4_jbd2.o migrate.o mballoc.o block_validity.o
ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include "ext4.h" #include "ext4.h"
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "group.h"
#include "mballoc.h" #include "mballoc.h"
/* /*
...@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ...@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_group_t block_group, struct ext4_group_desc *gdp) ext4_group_t block_group, struct ext4_group_desc *gdp)
{ {
int bit, bit_max; int bit, bit_max;
ext4_group_t ngroups = ext4_get_groups_count(sb);
unsigned free_blocks, group_blocks; unsigned free_blocks, group_blocks;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
...@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ...@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
bit_max += ext4_bg_num_gdb(sb, block_group); bit_max += ext4_bg_num_gdb(sb, block_group);
} }
if (block_group == sbi->s_groups_count - 1) { if (block_group == ngroups - 1) {
/* /*
* Even though mke2fs always initialize first and last group * Even though mke2fs always initialize first and last group
* if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
...@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ...@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
*/ */
group_blocks = ext4_blocks_count(sbi->s_es) - group_blocks = ext4_blocks_count(sbi->s_es) -
le32_to_cpu(sbi->s_es->s_first_data_block) - le32_to_cpu(sbi->s_es->s_first_data_block) -
(EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
} else { } else {
group_blocks = EXT4_BLOCKS_PER_GROUP(sb); group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
} }
...@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ...@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
{ {
unsigned int group_desc; unsigned int group_desc;
unsigned int offset; unsigned int offset;
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
if (block_group >= sbi->s_groups_count) { if (block_group >= ngroups) {
ext4_error(sb, "ext4_get_group_desc", ext4_error(sb, "ext4_get_group_desc",
"block_group >= groups_count - " "block_group >= groups_count - "
"block_group = %u, groups_count = %u", "block_group = %u, groups_count = %u",
block_group, sbi->s_groups_count); block_group, ngroups);
return NULL; return NULL;
} }
smp_rmb();
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
...@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) ...@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
unlock_buffer(bh); unlock_buffer(bh);
return bh; return bh;
} }
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc); ext4_init_block_bitmap(sb, bh, block_group, desc);
set_bitmap_uptodate(bh); set_bitmap_uptodate(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); ext4_unlock_group(sb, block_group);
unlock_buffer(bh); unlock_buffer(bh);
return bh; return bh;
} }
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); ext4_unlock_group(sb, block_group);
if (buffer_uptodate(bh)) { if (buffer_uptodate(bh)) {
/* /*
* if not uninit if bh is uptodate, * if not uninit if bh is uptodate,
...@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, ...@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
down_write(&grp->alloc_sem); down_write(&grp->alloc_sem);
for (i = 0, blocks_freed = 0; i < count; i++) { for (i = 0, blocks_freed = 0; i < count; i++) {
BUFFER_TRACE(bitmap_bh, "clear bit"); BUFFER_TRACE(bitmap_bh, "clear bit");
if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
bit + i, bitmap_bh->b_data)) { bit + i, bitmap_bh->b_data)) {
ext4_error(sb, __func__, ext4_error(sb, __func__,
"bit already cleared for block %llu", "bit already cleared for block %llu",
...@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, ...@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
blocks_freed++; blocks_freed++;
} }
} }
spin_lock(sb_bgl_lock(sbi, block_group)); ext4_lock_group(sb, block_group);
blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
ext4_free_blks_set(sb, desc, blk_free_count); ext4_free_blks_set(sb, desc, blk_free_count);
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
spin_unlock(sb_bgl_lock(sbi, block_group)); ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
if (sbi->s_log_groups_per_flex) { if (sbi->s_log_groups_per_flex) {
...@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) ...@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
ext4_fsblk_t desc_count; ext4_fsblk_t desc_count;
struct ext4_group_desc *gdp; struct ext4_group_desc *gdp;
ext4_group_t i; ext4_group_t i;
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t ngroups = ext4_get_groups_count(sb);
#ifdef EXT4FS_DEBUG #ifdef EXT4FS_DEBUG
struct ext4_super_block *es; struct ext4_super_block *es;
ext4_fsblk_t bitmap_count; ext4_fsblk_t bitmap_count;
...@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) ...@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
bitmap_count = 0; bitmap_count = 0;
gdp = NULL; gdp = NULL;
smp_rmb();
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL); gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp) if (!gdp)
...@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) ...@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
return bitmap_count; return bitmap_count;
#else #else
desc_count = 0; desc_count = 0;
smp_rmb();
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL); gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp) if (!gdp)
......
/*
* linux/fs/ext4/block_validity.c
*
* Copyright (C) 2009
* Theodore Ts'o (tytso@mit.edu)
*
* Track which blocks in the filesystem are metadata blocks that
* should never be used as data blocks by files or directories.
*/
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/module.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/version.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include "ext4.h"
struct ext4_system_zone {
struct rb_node node;
ext4_fsblk_t start_blk;
unsigned int count;
};
static struct kmem_cache *ext4_system_zone_cachep;
int __init init_ext4_system_zone(void)
{
ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
SLAB_RECLAIM_ACCOUNT);
if (ext4_system_zone_cachep == NULL)
return -ENOMEM;
return 0;
}
void exit_ext4_system_zone(void)
{
kmem_cache_destroy(ext4_system_zone_cachep);
}
static inline int can_merge(struct ext4_system_zone *entry1,
struct ext4_system_zone *entry2)
{
if ((entry1->start_blk + entry1->count) == entry2->start_blk)
return 1;
return 0;
}
/*
* Mark a range of blocks as belonging to the "system zone" --- that
* is, filesystem metadata blocks which should never be used by
* inodes.
*/
static int add_system_zone(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
unsigned int count)
{
struct ext4_system_zone *new_entry = NULL, *entry;
struct rb_node **n = &sbi->system_blks.rb_node, *node;
struct rb_node *parent = NULL, *new_node = NULL;
while (*n) {
parent = *n;
entry = rb_entry(parent, struct ext4_system_zone, node);
if (start_blk < entry->start_blk)
n = &(*n)->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
if (start_blk + count > (entry->start_blk +
entry->count))
entry->count = (start_blk + count -
entry->start_blk);
new_node = *n;
new_entry = rb_entry(new_node, struct ext4_system_zone,
node);
break;
}
}
if (!new_entry) {
new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
GFP_KERNEL);
if (!new_entry)
return -ENOMEM;
new_entry->start_blk = start_blk;
new_entry->count = count;
new_node = &new_entry->node;
rb_link_node(new_node, parent, n);
rb_insert_color(new_node, &sbi->system_blks);
}
/* Can we merge to the left? */
node = rb_prev(new_node);
if (node) {
entry = rb_entry(node, struct ext4_system_zone, node);
if (can_merge(entry, new_entry)) {
new_entry->start_blk = entry->start_blk;
new_entry->count += entry->count;
rb_erase(node, &sbi->system_blks);
kmem_cache_free(ext4_system_zone_cachep, entry);
}
}
/* Can we merge to the right? */
node = rb_next(new_node);
if (node) {
entry = rb_entry(node, struct ext4_system_zone, node);
if (can_merge(new_entry, entry)) {
new_entry->count += entry->count;
rb_erase(node, &sbi->system_blks);
kmem_cache_free(ext4_system_zone_cachep, entry);
}
}
return 0;
}
static void debug_print_tree(struct ext4_sb_info *sbi)
{
struct rb_node *node;
struct ext4_system_zone *entry;
int first = 1;
printk(KERN_INFO "System zones: ");
node = rb_first(&sbi->system_blks);
while (node) {
entry = rb_entry(node, struct ext4_system_zone, node);
printk("%s%llu-%llu", first ? "" : ", ",
entry->start_blk, entry->start_blk + entry->count - 1);
first = 0;
node = rb_next(node);
}
printk("\n");
}
int ext4_setup_system_zone(struct super_block *sb)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp;
ext4_group_t i;
int flex_size = ext4_flex_bg_size(sbi);
int ret;
if (!test_opt(sb, BLOCK_VALIDITY)) {
if (EXT4_SB(sb)->system_blks.rb_node)
ext4_release_system_zone(sb);
return 0;
}
if (EXT4_SB(sb)->system_blks.rb_node)
return 0;
for (i=0; i < ngroups; i++) {
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0)))
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
sbi->s_gdb_count + 1);
gdp = ext4_get_group_desc(sb, i, NULL);
ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
if (ret)
return ret;
ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
if (ret)
return ret;
ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
sbi->s_itb_per_group);
if (ret)
return ret;
}
if (test_opt(sb, DEBUG))
debug_print_tree(EXT4_SB(sb));
return 0;
}
/* Called when the filesystem is unmounted */
void ext4_release_system_zone(struct super_block *sb)
{
struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node;
struct rb_node *parent;
struct ext4_system_zone *entry;
while (n) {
/* Do the node's children first */
if (n->rb_left) {
n = n->rb_left;
continue;
}
if (n->rb_right) {
n = n->rb_right;
continue;
}
/*
* The node has no children; free it, and then zero
* out parent's link to it. Finally go to the
* beginning of the loop and try to free the parent
* node.
*/
parent = rb_parent(n);
entry = rb_entry(n, struct ext4_system_zone, node);
kmem_cache_free(ext4_system_zone_cachep, entry);
if (!parent)
EXT4_SB(sb)->system_blks.rb_node = NULL;
else if (parent->rb_left == n)
parent->rb_left = NULL;
else if (parent->rb_right == n)
parent->rb_right = NULL;
n = parent;
}
EXT4_SB(sb)->system_blks.rb_node = NULL;
}
/*
* Returns 1 if the passed-in block region (start_blk,
* start_blk+count) is valid; 0 if some part of the block region
* overlaps with filesystem metadata blocks.
*/
int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
unsigned int count)
{
struct ext4_system_zone *entry;
struct rb_node *n = sbi->system_blks.rb_node;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count > ext4_blocks_count(sbi->s_es)))
return 0;
while (n) {
entry = rb_entry(n, struct ext4_system_zone, node);
if (start_blk + count - 1 < entry->start_blk)
n = n->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else
return 0;
}
return 1;
}
...@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp, ...@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp,
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
map_bh.b_state = 0; map_bh.b_state = 0;
err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
0, 0, 0);
if (err > 0) { if (err > 0) {
pgoff_t index = map_bh.b_blocknr >> pgoff_t index = map_bh.b_blocknr >>
(PAGE_CACHE_SHIFT - inode->i_blkbits); (PAGE_CACHE_SHIFT - inode->i_blkbits);
......
This diff is collapsed.
/*
* ext4_i.h
*
* Copyright (C) 1992, 1993, 1994, 1995
* Remy Card (card@masi.ibp.fr)
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie (Paris VI)
*
* from
*
* linux/include/linux/minix_fs_i.h
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
#ifndef _EXT4_I
#define _EXT4_I
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/seqlock.h>
#include <linux/mutex.h>
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
/* data type for filesystem-wide blocks number */
typedef unsigned long long ext4_fsblk_t;
/* data type for file logical block number */
typedef __u32 ext4_lblk_t;
/* data type for block group number */
typedef unsigned int ext4_group_t;
/*
* storage for cached extent
*/
struct ext4_ext_cache {
ext4_fsblk_t ec_start;
ext4_lblk_t ec_block;
__u32 ec_len; /* must be 32bit to return holes */
__u32 ec_type;
};
/*
* fourth extended file system inode data in memory
*/
struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */
__u32 i_flags;
ext4_fsblk_t i_file_acl;
__u32 i_dtime;
/*
* i_block_group is the number of the block group which contains
* this file's inode. Constant across the lifetime of the inode,
* it is ued for making block allocation decisions - we try to
* place a file's data blocks near its inode block, and new inodes
* near to their parent directory's inode.
*/
ext4_group_t i_block_group;
__u32 i_state; /* Dynamic state flags for ext4 */
ext4_lblk_t i_dir_start_lookup;
#ifdef CONFIG_EXT4_FS_XATTR
/*
* Extended attributes can be read independently of the main file
* data. Taking i_mutex even when reading would cause contention
* between readers of EAs and writers of regular file data, so
* instead we synchronize on xattr_sem when reading or changing
* EAs.
*/
struct rw_semaphore xattr_sem;
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif
struct list_head i_orphan; /* unlinked but open inodes */
/*
* i_disksize keeps track of what the inode size is ON DISK, not
* in memory. During truncate, i_size is set to the new size by
* the VFS prior to calling ext4_truncate(), but the filesystem won't
* set i_disksize to 0 until the truncate is actually under way.
*
* The intent is that i_disksize always represents the blocks which
* are used by this file. This allows recovery to restart truncate
* on orphans if we crash during truncate. We actually write i_disksize
* into the on-disk inode when writing inodes out, instead of i_size.
*
* The only time when i_disksize and i_size may be different is when
* a truncate is in progress. The only things which change i_disksize
* are ext4_get_block (growth) and ext4_truncate (shrinkth).
*/
loff_t i_disksize;
/*
* i_data_sem is for serialising ext4_truncate() against
* ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
* data tree are chopped off during truncate. We can't do that in
* ext4 because whenever we perform intermediate commits during
* truncate, the inode and all the metadata blocks *must* be in a
* consistent state which allows truncation of the orphans to restart
* during recovery. Hence we must fix the get_block-vs-truncate race
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
struct inode vfs_inode;
struct jbd2_inode jinode;
struct ext4_ext_cache i_cached_extent;
/*
* File creation time. Its function is same as that of
* struct timespec i_{a,c,m}time in the generic inode.
*/
struct timespec i_crtime;
/* mballoc */
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;
/* ialloc */
ext4_group_t i_last_alloc_group;
/* allocation reservation info for delalloc */
unsigned int i_reserved_data_blocks;
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
unsigned short i_delalloc_reserved_flag;
/* on-disk additional length */
__u16 i_extra_isize;
spinlock_t i_block_reservation_lock;
};
#endif /* _EXT4_I */
/*
* ext4_sb.h
*
* Copyright (C) 1992, 1993, 1994, 1995
* Remy Card (card@masi.ibp.fr)
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie (Paris VI)
*
* from
*
* linux/include/linux/minix_fs_sb.h
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
#ifndef _EXT4_SB
#define _EXT4_SB
#ifdef __KERNEL__
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
#endif
#include <linux/rbtree.h>
/*
* fourth extended-fs super-block data in memory
*/
struct ext4_sb_info {
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
unsigned long s_inodes_per_block;/* Number of inodes per block */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
unsigned long s_inodes_per_group;/* Number of inodes in a group */
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
unsigned long s_gdb_count; /* Number of group descriptor blocks */
unsigned long s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
unsigned long s_overhead_last; /* Last calculated overhead */
unsigned long s_blocks_last; /* Last seen block count */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
unsigned long s_mount_opt;
ext4_fsblk_t s_sb_block;
uid_t s_resuid;
gid_t s_resgid;
unsigned short s_mount_state;
unsigned short s_pad;
int s_addr_per_block_bits;
int s_desc_per_block_bits;
int s_inode_size;
int s_first_ino;
unsigned int s_inode_readahead_blks;
spinlock_t s_next_gen_lock;
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
struct percpu_counter s_dirtyblocks_counter;
struct blockgroup_lock *s_blockgroup_lock;
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
struct completion s_kobj_unregister;
/* Journaling */
struct inode *s_journal_inode;
struct journal_s *s_journal;
struct list_head s_orphan;
unsigned long s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time;
struct block_device *journal_bdev;
#ifdef CONFIG_JBD2_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
#ifdef CONFIG_QUOTA
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
#ifdef EXTENTS_STATS
/* ext4 extents stats */
unsigned long s_ext_min;
unsigned long s_ext_max;
unsigned long s_depth_max;
spinlock_t s_ext_stats_lock;
unsigned long s_ext_blocks;
unsigned long s_ext_extents;
#endif
/* for buddy allocator */
struct ext4_group_info ***s_group_info;
struct inode *s_buddy_cache;
long s_blocks_reserved;
spinlock_t s_reserve_lock;
spinlock_t s_md_lock;
tid_t s_last_transaction;
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
/* tunables */
unsigned long s_stripe;
unsigned int s_mb_stream_request;
unsigned int s_mb_max_to_scan;
unsigned int s_mb_min_to_scan;
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
/* history to debug policy */
struct ext4_mb_history *s_mb_history;
int s_mb_history_cur;
int s_mb_history_max;
int s_mb_history_num;
spinlock_t s_mb_history_lock;
int s_mb_history_filter;
/* stats for buddy allocator */
spinlock_t s_mb_pa_lock;
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */
atomic_t s_bal_ex_scanned; /* total extents scanned */
atomic_t s_bal_goals; /* goal hits */
atomic_t s_bal_breaks; /* too long searches */
atomic_t s_bal_2orders; /* 2^order hits */
spinlock_t s_bal_lock;
unsigned long s_mb_buddies_generated;
unsigned long long s_mb_generation_time;
atomic_t s_mb_lost_chunks;
atomic_t s_mb_preallocated;
atomic_t s_mb_discarded;
/* locality groups */
struct ext4_locality_group *s_locality_groups;
/* for write statistics */
unsigned long s_sectors_written_start;
u64 s_kbytes_written;
unsigned int s_log_groups_per_flex;
struct flex_groups *s_flex_groups;
};
static inline spinlock_t *
sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
{
return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
}
#endif /* _EXT4_SB */
...@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth) ...@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth)
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{ {
ext4_fsblk_t block = ext_pblock(ext), valid_block; ext4_fsblk_t block = ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext); int len = ext4_ext_get_actual_len(ext);
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
valid_block = le32_to_cpu(es->s_first_data_block) + return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
EXT4_SB(inode->i_sb)->s_gdb_count;
if (unlikely(block <= valid_block ||
((block + len) > ext4_blocks_count(es))))
return 0;
else
return 1;
} }
static int ext4_valid_extent_idx(struct inode *inode, static int ext4_valid_extent_idx(struct inode *inode,
struct ext4_extent_idx *ext_idx) struct ext4_extent_idx *ext_idx)
{ {
ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; ext4_fsblk_t block = idx_pblock(ext_idx);
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
valid_block = le32_to_cpu(es->s_first_data_block) + return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
EXT4_SB(inode->i_sb)->s_gdb_count;
if (unlikely(block <= valid_block ||
(block >= ext4_blocks_count(es))))
return 0;
else
return 1;
} }
static int ext4_valid_extent_entries(struct inode *inode, static int ext4_valid_extent_entries(struct inode *inode,
...@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ...@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex = EXT_LAST_EXTENT(eh); ex = EXT_LAST_EXTENT(eh);
ex_ee_block = le32_to_cpu(ex->ee_block); ex_ee_block = le32_to_cpu(ex->ee_block);
if (ext4_ext_is_uninitialized(ex))
uninitialized = 1;
ex_ee_len = ext4_ext_get_actual_len(ex); ex_ee_len = ext4_ext_get_actual_len(ex);
while (ex >= EXT_FIRST_EXTENT(eh) && while (ex >= EXT_FIRST_EXTENT(eh) &&
ex_ee_block + ex_ee_len > start) { ex_ee_block + ex_ee_len > start) {
if (ext4_ext_is_uninitialized(ex))
uninitialized = 1;
else
uninitialized = 0;
ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
path[depth].p_ext = ex; path[depth].p_ext = ex;
...@@ -2784,7 +2774,7 @@ fix_extent_len: ...@@ -2784,7 +2774,7 @@ fix_extent_len:
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_lblk_t iblock,
unsigned int max_blocks, struct buffer_head *bh_result, unsigned int max_blocks, struct buffer_head *bh_result,
int create, int extend_disksize) int flags)
{ {
struct ext4_ext_path *path = NULL; struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
...@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
int err = 0, depth, ret, cache_type; int err = 0, depth, ret, cache_type;
unsigned int allocated = 0; unsigned int allocated = 0;
struct ext4_allocation_request ar; struct ext4_allocation_request ar;
loff_t disksize;
__clear_bit(BH_New, &bh_result->b_state); __clear_bit(BH_New, &bh_result->b_state);
ext_debug("blocks %u/%u requested for inode %u\n", ext_debug("blocks %u/%u requested for inode %u\n",
...@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
cache_type = ext4_ext_in_cache(inode, iblock, &newex); cache_type = ext4_ext_in_cache(inode, iblock, &newex);
if (cache_type) { if (cache_type) {
if (cache_type == EXT4_EXT_CACHE_GAP) { if (cache_type == EXT4_EXT_CACHE_GAP) {
if (!create) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
/* /*
* block isn't allocated yet and * block isn't allocated yet and
* user doesn't want to allocate it * user doesn't want to allocate it
...@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
EXT4_EXT_CACHE_EXTENT); EXT4_EXT_CACHE_EXTENT);
goto out; goto out;
} }
if (create == EXT4_CREATE_UNINITIALIZED_EXT) if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
goto out; goto out;
if (!create) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
if (allocated > max_blocks)
allocated = max_blocks;
/* /*
* We have blocks reserved already. We * We have blocks reserved already. We
* return allocated blocks so that delalloc * return allocated blocks so that delalloc
...@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* the buffer head will be unmapped so that * the buffer head will be unmapped so that
* a read from the block returns 0s. * a read from the block returns 0s.
*/ */
if (allocated > max_blocks)
allocated = max_blocks;
set_buffer_unwritten(bh_result); set_buffer_unwritten(bh_result);
bh_result->b_bdev = inode->i_sb->s_bdev; bh_result->b_bdev = inode->i_sb->s_bdev;
bh_result->b_blocknr = newblock; bh_result->b_blocknr = newblock;
...@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* requested block isn't allocated yet; * requested block isn't allocated yet;
* we couldn't try to create block if create flag is zero * we couldn't try to create block if create flag is zero
*/ */
if (!create) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
/* /*
* put just found gap into cache to speed up * put just found gap into cache to speed up
* subsequent requests * subsequent requests
...@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* EXT_UNINIT_MAX_LEN. * EXT_UNINIT_MAX_LEN.
*/ */
if (max_blocks > EXT_INIT_MAX_LEN && if (max_blocks > EXT_INIT_MAX_LEN &&
create != EXT4_CREATE_UNINITIALIZED_EXT) !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
max_blocks = EXT_INIT_MAX_LEN; max_blocks = EXT_INIT_MAX_LEN;
else if (max_blocks > EXT_UNINIT_MAX_LEN && else if (max_blocks > EXT_UNINIT_MAX_LEN &&
create == EXT4_CREATE_UNINITIALIZED_EXT) (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
max_blocks = EXT_UNINIT_MAX_LEN; max_blocks = EXT_UNINIT_MAX_LEN;
/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
...@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* try to insert new extent into found leaf and return */ /* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock); ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(ar.len); newex.ee_len = cpu_to_le16(ar.len);
if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */
ext4_ext_mark_uninitialized(&newex); ext4_ext_mark_uninitialized(&newex);
err = ext4_ext_insert_extent(handle, inode, path, &newex); err = ext4_ext_insert_extent(handle, inode, path, &newex);
if (err) { if (err) {
...@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
newblock = ext_pblock(&newex); newblock = ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex); allocated = ext4_ext_get_actual_len(&newex);
outnew: outnew:
if (extend_disksize) {
disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
if (disksize > i_size_read(inode))
disksize = i_size_read(inode);
if (disksize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = disksize;
}
set_buffer_new(bh_result); set_buffer_new(bh_result);
/* Cache only when it is _not_ an uninitialized extent */ /* Cache only when it is _not_ an uninitialized extent */
if (create != EXT4_CREATE_UNINITIALIZED_EXT) if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
ext4_ext_put_in_cache(inode, iblock, allocated, newblock, ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
EXT4_EXT_CACHE_EXTENT); EXT4_EXT_CACHE_EXTENT);
out: out:
...@@ -3150,9 +3131,10 @@ retry: ...@@ -3150,9 +3131,10 @@ retry:
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
break; break;
} }
ret = ext4_get_blocks_wrap(handle, inode, block, map_bh.b_state = 0;
ret = ext4_get_blocks(handle, inode, block,
max_blocks, &map_bh, max_blocks, &map_bh,
EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
if (ret <= 0) { if (ret <= 0) {
#ifdef EXT4FS_DEBUG #ifdef EXT4FS_DEBUG
WARN_ON(ret <= 0); WARN_ON(ret <= 0);
...@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, ...@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
void *data) void *data)
{ {
struct fiemap_extent_info *fieinfo = data; struct fiemap_extent_info *fieinfo = data;
unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
__u64 logical; __u64 logical;
__u64 physical; __u64 physical;
__u64 length; __u64 length;
...@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, ...@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
* *
* XXX this might miss a single-block extent at EXT_MAX_BLOCK * XXX this might miss a single-block extent at EXT_MAX_BLOCK
*/ */
if (logical + length - 1 == EXT_MAX_BLOCK || if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
loff_t size = i_size_read(inode);
loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
flags |= FIEMAP_EXTENT_LAST; flags |= FIEMAP_EXTENT_LAST;
if ((flags & FIEMAP_EXTENT_DELALLOC) &&
logical+length > size)
length = (size - logical + bs - 1) & ~(bs-1);
}
error = fiemap_fill_next_extent(fieinfo, logical, physical, error = fiemap_fill_next_extent(fieinfo, logical, physical,
length, flags); length, flags);
...@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* Walk the extent tree gathering extent information. * Walk the extent tree gathering extent information.
* ext4_ext_fiemap_cb will push extents back to user. * ext4_ext_fiemap_cb will push extents back to user.
*/ */
down_write(&EXT4_I(inode)->i_data_sem); down_read(&EXT4_I(inode)->i_data_sem);
error = ext4_ext_walk_space(inode, start_blk, len_blks, error = ext4_ext_walk_space(inode, start_blk, len_blks,
ext4_ext_fiemap_cb, fieinfo); ext4_ext_fiemap_cb, fieinfo);
up_write(&EXT4_I(inode)->i_data_sem); up_read(&EXT4_I(inode)->i_data_sem);
} }
return error; return error;
......
/*
* linux/fs/ext4/group.h
*
* Copyright (C) 2007 Cluster File Systems, Inc
*
* Author: Andreas Dilger <adilger@clusterfs.com>
*/
#ifndef _LINUX_EXT4_GROUP_H
#define _LINUX_EXT4_GROUP_H
extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
struct ext4_group_desc *gdp);
extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
struct ext4_group_desc *gdp);
struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
ext4_group_t block_group);
extern unsigned ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc);
#define ext4_free_blocks_after_init(sb, group, desc) \
ext4_init_block_bitmap(sb, NULL, group, desc)
extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc);
extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
#endif /* _LINUX_EXT4_GROUP_H */
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "ext4.h" #include "ext4.h"
#include "group.h"
/* /*
* with AGGRESSIVE_CHECK allocator runs consistency checks over * with AGGRESSIVE_CHECK allocator runs consistency checks over
......
...@@ -37,7 +37,6 @@ ...@@ -37,7 +37,6 @@
#include "ext4.h" #include "ext4.h"
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "namei.h"
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
...@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, ...@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
ext4fs_dirhash(de->name, de->name_len, &h); ext4fs_dirhash(de->name, de->name_len, &h);
map_tail--; map_tail--;
map_tail->hash = h.hash; map_tail->hash = h.hash;
map_tail->offs = (u16) ((char *) de - base); map_tail->offs = ((char *) de - base)>>2;
map_tail->size = le16_to_cpu(de->rec_len); map_tail->size = le16_to_cpu(de->rec_len);
count++; count++;
cond_resched(); cond_resched();
...@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, ...@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
unsigned rec_len = 0; unsigned rec_len = 0;
while (count--) { while (count--) {
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
(from + (map->offs<<2));
rec_len = EXT4_DIR_REC_LEN(de->name_len); rec_len = EXT4_DIR_REC_LEN(de->name_len);
memcpy (to, de, rec_len); memcpy (to, de, rec_len);
((struct ext4_dir_entry_2 *) to)->rec_len = ((struct ext4_dir_entry_2 *) to)->rec_len =
...@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) ...@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
if (!ext4_handle_valid(handle)) if (!ext4_handle_valid(handle))
return 0; return 0;
lock_super(sb); mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
if (!list_empty(&EXT4_I(inode)->i_orphan)) if (!list_empty(&EXT4_I(inode)->i_orphan))
goto out_unlock; goto out_unlock;
...@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) ...@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
/* @@@ FIXME: Observation from aviro: /* @@@ FIXME: Observation from aviro:
* I think I can trigger J_ASSERT in ext4_orphan_add(). We block * I think I can trigger J_ASSERT in ext4_orphan_add(). We block
* here (on lock_super()), so race with ext4_link() which might bump * here (on s_orphan_lock), so race with ext4_link() which might bump
* ->i_nlink. For, say it, character device. Not a regular file, * ->i_nlink. For, say it, character device. Not a regular file,
* not a directory, not a symlink and ->i_nlink > 0. * not a directory, not a symlink and ->i_nlink > 0.
*
* tytso, 4/25/2009: I'm not sure how that could happen;
* shouldn't the fs core protect us from these sort of
* unlink()/link() races?
*/ */
J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
...@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) ...@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
jbd_debug(4, "orphan inode %lu will point to %d\n", jbd_debug(4, "orphan inode %lu will point to %d\n",
inode->i_ino, NEXT_ORPHAN(inode)); inode->i_ino, NEXT_ORPHAN(inode));
out_unlock: out_unlock:
unlock_super(sb); mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
ext4_std_error(inode->i_sb, err); ext4_std_error(inode->i_sb, err);
return err; return err;
} }
...@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) ...@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
if (!ext4_handle_valid(handle)) if (!ext4_handle_valid(handle))
return 0; return 0;
lock_super(inode->i_sb); mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
if (list_empty(&ei->i_orphan)) { if (list_empty(&ei->i_orphan))
unlock_super(inode->i_sb); goto out;
return 0;
}
ino_next = NEXT_ORPHAN(inode); ino_next = NEXT_ORPHAN(inode);
prev = ei->i_orphan.prev; prev = ei->i_orphan.prev;
...@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) ...@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
out_err: out_err:
ext4_std_error(inode->i_sb, err); ext4_std_error(inode->i_sb, err);
out: out:
unlock_super(inode->i_sb); mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
return err; return err;
out_brelse: out_brelse:
...@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = { ...@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = {
.removexattr = generic_removexattr, .removexattr = generic_removexattr,
#endif #endif
.permission = ext4_permission, .permission = ext4_permission,
.fiemap = ext4_fiemap,
}; };
const struct inode_operations ext4_special_inode_operations = { const struct inode_operations ext4_special_inode_operations = {
......
/* linux/fs/ext4/namei.h
*
* Copyright (C) 2005 Simtec Electronics
* Ben Dooks <ben@simtec.co.uk>
*
*/
extern struct dentry *ext4_get_parent(struct dentry *child);
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "group.h"
#define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define outside(b, first, last) ((b) < (first) || (b) >= (last))
#define inside(b, first, last) ((b) >= (first) && (b) < (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last))
...@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb, ...@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb,
if (IS_ERR(handle)) if (IS_ERR(handle))
return PTR_ERR(handle); return PTR_ERR(handle);
lock_super(sb); mutex_lock(&sbi->s_resize_lock);
if (input->group != sbi->s_groups_count) { if (input->group != sbi->s_groups_count) {
err = -EBUSY; err = -EBUSY;
goto exit_journal; goto exit_journal;
...@@ -302,7 +301,7 @@ exit_bh: ...@@ -302,7 +301,7 @@ exit_bh:
brelse(bh); brelse(bh);
exit_journal: exit_journal:
unlock_super(sb); mutex_unlock(&sbi->s_resize_lock);
if ((err2 = ext4_journal_stop(handle)) && !err) if ((err2 = ext4_journal_stop(handle)) && !err)
err = err2; err = err2;
...@@ -643,11 +642,12 @@ exit_free: ...@@ -643,11 +642,12 @@ exit_free:
* important part is that the new block and inode counts are in the backup * important part is that the new block and inode counts are in the backup
* superblocks, and the location of the new group metadata in the GDT backups. * superblocks, and the location of the new group metadata in the GDT backups.
* *
* We do not need lock_super() for this, because these blocks are not * We do not need take the s_resize_lock for this, because these
* otherwise touched by the filesystem code when it is mounted. We don't * blocks are not otherwise touched by the filesystem code when it is
* need to worry about last changing from sbi->s_groups_count, because the * mounted. We don't need to worry about last changing from
* worst that can happen is that we do not copy the full number of backups * sbi->s_groups_count, because the worst that can happen is that we
* at this time. The resize which changed s_groups_count will backup again. * do not copy the full number of backups at this time. The resize
* which changed s_groups_count will backup again.
*/ */
static void update_backups(struct super_block *sb, static void update_backups(struct super_block *sb,
int blk_off, char *data, int size) int blk_off, char *data, int size)
...@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ...@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
goto exit_put; goto exit_put;
} }
lock_super(sb); mutex_lock(&sbi->s_resize_lock);
if (input->group != sbi->s_groups_count) { if (input->group != sbi->s_groups_count) {
ext4_warning(sb, __func__, ext4_warning(sb, __func__,
"multiple resizers run on filesystem!"); "multiple resizers run on filesystem!");
...@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ...@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
/* /*
* OK, now we've set up the new group. Time to make it active. * OK, now we've set up the new group. Time to make it active.
* *
* Current kernels don't lock all allocations via lock_super(), * We do not lock all allocations via s_resize_lock
* so we have to be safe wrt. concurrent accesses the group * so we have to be safe wrt. concurrent accesses the group
* data. So we need to be careful to set all of the relevant * data. So we need to be careful to set all of the relevant
* group descriptor data etc. *before* we enable the group. * group descriptor data etc. *before* we enable the group.
...@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ...@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
* *
* The precise rules we use are: * The precise rules we use are:
* *
* * Writers of s_groups_count *must* hold lock_super * * Writers of s_groups_count *must* hold s_resize_lock
* AND * AND
* * Writers must perform a smp_wmb() after updating all dependent * * Writers must perform a smp_wmb() after updating all dependent
* data and before modifying the groups count * data and before modifying the groups count
* *
* * Readers must hold lock_super() over the access * * Readers must hold s_resize_lock over the access
* OR * OR
* * Readers must perform an smp_rmb() after reading the groups count * * Readers must perform an smp_rmb() after reading the groups count
* and before reading any dependent data. * and before reading any dependent data.
...@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ...@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
sb->s_dirt = 1; sb->s_dirt = 1;
exit_journal: exit_journal:
unlock_super(sb); mutex_unlock(&sbi->s_resize_lock);
if ((err2 = ext4_journal_stop(handle)) && !err) if ((err2 = ext4_journal_stop(handle)) && !err)
err = err2; err = err2;
if (!err) { if (!err) {
...@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ...@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
/* We don't need to worry about locking wrt other resizers just /* We don't need to worry about locking wrt other resizers just
* yet: we're going to revalidate es->s_blocks_count after * yet: we're going to revalidate es->s_blocks_count after
* taking lock_super() below. */ * taking the s_resize_lock below. */
o_blocks_count = ext4_blocks_count(es); o_blocks_count = ext4_blocks_count(es);
o_groups_count = EXT4_SB(sb)->s_groups_count; o_groups_count = EXT4_SB(sb)->s_groups_count;
...@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ...@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
goto exit_put; goto exit_put;
} }
lock_super(sb); mutex_lock(&EXT4_SB(sb)->s_resize_lock);
if (o_blocks_count != ext4_blocks_count(es)) { if (o_blocks_count != ext4_blocks_count(es)) {
ext4_warning(sb, __func__, ext4_warning(sb, __func__,
"multiple resizers run on filesystem!"); "multiple resizers run on filesystem!");
unlock_super(sb); mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
ext4_journal_stop(handle); ext4_journal_stop(handle);
err = -EBUSY; err = -EBUSY;
goto exit_put; goto exit_put;
...@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ...@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
EXT4_SB(sb)->s_sbh))) { EXT4_SB(sb)->s_sbh))) {
ext4_warning(sb, __func__, ext4_warning(sb, __func__,
"error %d on journal write access", err); "error %d on journal write access", err);
unlock_super(sb); mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
ext4_journal_stop(handle); ext4_journal_stop(handle);
goto exit_put; goto exit_put;
} }
ext4_blocks_count_set(es, o_blocks_count + add); ext4_blocks_count_set(es, o_blocks_count + add);
ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
sb->s_dirt = 1; sb->s_dirt = 1;
unlock_super(sb); mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add); o_blocks_count + add);
/* We add the blocks to the bitmap and set the group need init bit */ /* We add the blocks to the bitmap and set the group need init bit */
......
This diff is collapsed.
...@@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd, ...@@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
switch (cmd) { switch (cmd) {
case FIBMAP: case FIBMAP:
return ioctl_fibmap(filp, p); return ioctl_fibmap(filp, p);
case FS_IOC_FIEMAP:
return ioctl_fiemap(filp, arg);
case FIGETBSZ:
return put_user(inode->i_sb->s_blocksize, p);
case FIONREAD: case FIONREAD:
return put_user(i_size_read(inode) - filp->f_pos, p); return put_user(i_size_read(inode) - filp->f_pos, p);
} }
...@@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, ...@@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
error = ioctl_fsthaw(filp); error = ioctl_fsthaw(filp);
break; break;
case FS_IOC_FIEMAP:
return ioctl_fiemap(filp, arg);
case FIGETBSZ:
{
struct inode *inode = filp->f_path.dentry->d_inode;
int __user *p = (int __user *)arg;
return put_user(inode->i_sb->s_blocksize, p);
}
default: default:
if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
error = file_ioctl(filp, cmd, arg); error = file_ioctl(filp, cmd, arg);
......
...@@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) ...@@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
* Journal abort has very specific semantics, which we describe * Journal abort has very specific semantics, which we describe
* for journal abort. * for journal abort.
* *
* Two internal function, which provide abort to te jbd layer * Two internal functions, which provide abort to the jbd layer
* itself are here. * itself are here.
*/ */
...@@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) ...@@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
* int jbd2_journal_errno () - returns the journal's error state. * int jbd2_journal_errno () - returns the journal's error state.
* @journal: journal to examine. * @journal: journal to examine.
* *
* This is the errno numbet set with jbd2_journal_abort(), the last * This is the errno number set with jbd2_journal_abort(), the last
* time the journal was mounted - if the journal was stopped * time the journal was mounted - if the journal was stopped
* without calling abort this will be 0. * without calling abort this will be 0.
* *
...@@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal) ...@@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal)
* int jbd2_journal_clear_err () - clears the journal's error state * int jbd2_journal_clear_err () - clears the journal's error state
* @journal: journal to act on. * @journal: journal to act on.
* *
* An error must be cleared or Acked to take a FS out of readonly * An error must be cleared or acked to take a FS out of readonly
* mode. * mode.
*/ */
int jbd2_journal_clear_err(journal_t *journal) int jbd2_journal_clear_err(journal_t *journal)
...@@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal) ...@@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal)
* void jbd2_journal_ack_err() - Ack journal err. * void jbd2_journal_ack_err() - Ack journal err.
* @journal: journal to act on. * @journal: journal to act on.
* *
* An error must be cleared or Acked to take a FS out of readonly * An error must be cleared or acked to take a FS out of readonly
* mode. * mode.
*/ */
void jbd2_journal_ack_err(journal_t *journal) void jbd2_journal_ack_err(journal_t *journal)
......
...@@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, ...@@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
struct buffer_head map_bh; struct buffer_head map_bh;
unsigned long first_logical_block = 0; unsigned long first_logical_block = 0;
clear_buffer_mapped(&map_bh); map_bh.b_state = 0;
map_bh.b_size = 0;
for (page_idx = 0; page_idx < nr_pages; page_idx++) { for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, lru); struct page *page = list_entry(pages->prev, struct page, lru);
...@@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block) ...@@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block)
struct buffer_head map_bh; struct buffer_head map_bh;
unsigned long first_logical_block = 0; unsigned long first_logical_block = 0;
clear_buffer_mapped(&map_bh); map_bh.b_state = 0;
map_bh.b_size = 0;
bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
&map_bh, &first_logical_block, get_block); &map_bh, &first_logical_block, get_block);
if (bio) if (bio)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment