Commit e43c2597 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: Calculate metadata requirements more accurately
  ext4: Fix accounting of reserved metadata blocks
parents 3e8d95d9 9d0be502
......@@ -699,6 +699,8 @@ struct ext4_inode_info {
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
unsigned short i_delalloc_reserved_flag;
sector_t i_da_metadata_calc_last_lblock;
int i_da_metadata_calc_len;
/* on-disk additional length */
__u16 i_extra_isize;
......
......@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
}
extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
sector_t lblocks);
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
......
......@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
* to allocate @blocks
* Worse case is one block per extent
*/
int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
int lcap, icap, rcap, leafs, idxs, num;
int newextents = blocks;
rcap = ext4_ext_space_root_idx(inode, 0);
lcap = ext4_ext_space_block(inode, 0);
icap = ext4_ext_space_block_idx(inode, 0);
struct ext4_inode_info *ei = EXT4_I(inode);
int idxs, num = 0;
/* number of new leaf blocks needed */
num = leafs = (newextents + lcap - 1) / lcap;
idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
/ sizeof(struct ext4_extent_idx));
/*
* Worse case, we need separate index block(s)
* to link all new leaf blocks
* If the new delayed allocation block is contiguous with the
* previous da block, it can share index blocks with the
* previous block, so we only need to allocate a new index
* block every idxs leaf blocks. At ldxs**2 blocks, we need
* an additional index block, and at ldxs**3 blocks, yet
* another index blocks.
*/
idxs = (leafs + icap - 1) / icap;
do {
num += idxs;
idxs = (idxs + icap - 1) / icap;
} while (idxs > rcap);
if (ei->i_da_metadata_calc_len &&
ei->i_da_metadata_calc_last_lblock+1 == lblock) {
if ((ei->i_da_metadata_calc_len % idxs) == 0)
num++;
if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
num++;
if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
num++;
ei->i_da_metadata_calc_len = 0;
} else
ei->i_da_metadata_calc_len++;
ei->i_da_metadata_calc_last_lblock++;
return num;
}
return num;
/*
* In the worst case we need a new set of index blocks at
* every level of the inode's extent tree.
*/
ei->i_da_metadata_calc_len = 1;
ei->i_da_metadata_calc_last_lblock = lblock;
return ext_depth(inode) + 1;
}
static int
......
......@@ -1009,38 +1009,44 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
return &EXT4_I(inode)->i_reserved_quota;
}
#endif
/*
* Calculate the number of metadata blocks need to reserve
* to allocate @blocks for non extent file based file
* to allocate a new block at @lblocks for non extent file based file
*/
static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
static int ext4_indirect_calc_metadata_amount(struct inode *inode,
sector_t lblock)
{
int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
int ind_blks, dind_blks, tind_blks;
/* number of new indirect blocks needed */
ind_blks = (blocks + icap - 1) / icap;
struct ext4_inode_info *ei = EXT4_I(inode);
int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
int blk_bits;
dind_blks = (ind_blks + icap - 1) / icap;
if (lblock < EXT4_NDIR_BLOCKS)
return 0;
tind_blks = 1;
lblock -= EXT4_NDIR_BLOCKS;
return ind_blks + dind_blks + tind_blks;
if (ei->i_da_metadata_calc_len &&
(lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
ei->i_da_metadata_calc_len++;
return 0;
}
ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
ei->i_da_metadata_calc_len = 1;
blk_bits = roundup_pow_of_two(lblock + 1);
return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
}
/*
* Calculate the number of metadata blocks need to reserve
* to allocate given number of blocks
* to allocate a block located at @lblock
*/
static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
if (!blocks)
return 0;
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
return ext4_ext_calc_metadata_amount(inode, blocks);
return ext4_ext_calc_metadata_amount(inode, lblock);
return ext4_indirect_calc_metadata_amount(inode, blocks);
return ext4_indirect_calc_metadata_amount(inode, lblock);
}
/*
......@@ -1076,9 +1082,10 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
* only when we have written all of the delayed
* allocation blocks.
*/
mdb_free = ei->i_allocated_meta_blocks;
mdb_free = ei->i_reserved_meta_blocks;
ei->i_reserved_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
ei->i_allocated_meta_blocks = 0;
}
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
......@@ -1802,12 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied;
}
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
/*
* Reserve a single block located at lblock
*/
static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long md_needed, md_reserved, total = 0;
unsigned long md_needed, md_reserved;
/*
* recalculate the amount of metadata blocks to reserve
......@@ -1817,8 +1827,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
repeat:
spin_lock(&ei->i_block_reservation_lock);
md_reserved = ei->i_reserved_meta_blocks;
md_needed = ext4_calc_metadata_amount(inode, nrblocks);
total = md_needed + nrblocks;
md_needed = ext4_calc_metadata_amount(inode, lblock);
spin_unlock(&ei->i_block_reservation_lock);
/*
......@@ -1826,7 +1835,7 @@ repeat:
* later. Real quota accounting is done at pages writeout
* time.
*/
if (vfs_dq_reserve_block(inode, total)) {
if (vfs_dq_reserve_block(inode, md_needed + 1)) {
/*
* We tend to badly over-estimate the amount of
* metadata blocks which are needed, so if we have
......@@ -1838,8 +1847,8 @@ repeat:
return -EDQUOT;
}
if (ext4_claim_free_blocks(sbi, total)) {
vfs_dq_release_reservation_block(inode, total);
if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
vfs_dq_release_reservation_block(inode, md_needed + 1);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
retry:
if (md_reserved)
......@@ -1850,7 +1859,7 @@ repeat:
return -ENOSPC;
}
spin_lock(&ei->i_block_reservation_lock);
ei->i_reserved_data_blocks += nrblocks;
ei->i_reserved_data_blocks++;
ei->i_reserved_meta_blocks += md_needed;
spin_unlock(&ei->i_block_reservation_lock);
......@@ -1889,8 +1898,9 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
* only when we have written all of the delayed
* allocation blocks.
*/
to_free += ei->i_allocated_meta_blocks;
ei->i_allocated_meta_blocks = 0;
to_free += ei->i_reserved_meta_blocks;
ei->i_reserved_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
}
/* update fs dirty blocks counter */
......@@ -2504,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
ret = ext4_da_reserve_space(inode, 1);
ret = ext4_da_reserve_space(inode, iblock);
if (ret)
/* not enough space to reserve */
return ret;
......
......@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
ei->i_delalloc_reserved_flag = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
#ifdef CONFIG_QUOTA
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment