Commit e7d4cb6b authored by Tao Ma's avatar Tao Ma Committed by Mark Fasheh

ocfs2: Abstract ocfs2_extent_tree in b-tree operations.

In the old extent tree operation, we take the hypothesis that we
are using the ocfs2_extent_list in ocfs2_dinode as the tree root.
As xattr will also use ocfs2_extent_list to store large value
for a xattr entry, we refactor the tree operation so that xattr
can use it directly.

The refactoring includes 4 steps:
1. Abstract set/get of last_eb_blk and update_clusters since they may
   be stored in different location for dinode and xattr.
2. Add a new structure named ocfs2_extent_tree to indicate the
   extent tree the operation will work on.
3. Remove all the use of fe_bh and di, use root_bh and root_el in
   extent tree instead. So now all the fe_bh is replaced with
   et->root_bh, el with root_el accordingly.
4. Make ocfs2_lock_allocators generic. Now it is limited to be only used
   in file extend allocation. But the whole function is useful when we want
   to store large EAs.

Note: This patch doesn't touch ocfs2_commit_truncate() since it is not used
for anything other than truncate inode data btrees.
Signed-off-by: default avatarTao Ma <tao.ma@oracle.com>
Signed-off-by: default avatarMark Fasheh <mfasheh@suse.com>
parent 811f933d
This diff is collapsed.
......@@ -26,28 +26,37 @@
#ifndef OCFS2_ALLOC_H
#define OCFS2_ALLOC_H
enum ocfs2_extent_tree_type {
OCFS2_DINODE_EXTENT = 0,
};
struct ocfs2_alloc_context;
int ocfs2_insert_extent(struct ocfs2_super *osb,
handle_t *handle,
struct inode *inode,
struct buffer_head *fe_bh,
struct buffer_head *root_bh,
u32 cpos,
u64 start_blk,
u32 new_clusters,
u8 flags,
struct ocfs2_alloc_context *meta_ac);
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_extent_tree_type et_type);
struct ocfs2_cached_dealloc_ctxt;
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *root_bh,
handle_t *handle, u32 cpos, u32 len, u32 phys,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
struct ocfs2_cached_dealloc_ctxt *dealloc,
enum ocfs2_extent_tree_type et_type);
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *root_bh,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
struct ocfs2_cached_dealloc_ctxt *dealloc,
enum ocfs2_extent_tree_type et_type);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *bh);
struct buffer_head *root_bh,
enum ocfs2_extent_tree_type et_type);
/*
* how many new metadata chunks would an allocation need at maximum?
*
......
......@@ -1278,7 +1278,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
} else if (unwritten) {
ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
wc->w_handle, cpos, 1, phys,
meta_ac, &wc->w_dealloc);
meta_ac, &wc->w_dealloc,
OCFS2_DINODE_EXTENT);
if (ret < 0) {
mlog_errno(ret);
goto out;
......@@ -1712,7 +1713,13 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* ocfs2_lock_allocators(). It greatly over-estimates
* the work to be done.
*/
ret = ocfs2_lock_allocators(inode, wc->w_di_bh,
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
" clusters_to_add = %u, extents_to_split = %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
clusters_to_alloc, extents_to_split);
ret = ocfs2_lock_allocators(inode, wc->w_di_bh, &di->id2.i_list,
clusters_to_alloc, extents_to_split,
&data_ac, &meta_ac);
if (ret) {
......
......@@ -1306,7 +1306,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
* related blocks have been journaled already.
*/
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0,
NULL);
NULL, OCFS2_DINODE_EXTENT);
if (ret) {
mlog_errno(ret);
goto out_commit;
......@@ -1338,7 +1338,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno,
len, 0, NULL);
len, 0, NULL, OCFS2_DINODE_EXTENT);
if (ret) {
mlog_errno(ret);
goto out_commit;
......@@ -1481,7 +1481,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
spin_unlock(&OCFS2_I(dir)->ip_lock);
num_free_extents = ocfs2_num_free_extents(osb, dir,
parent_fe_bh);
parent_fe_bh,
OCFS2_DINODE_EXTENT);
if (num_free_extents < 0) {
status = num_free_extents;
mlog_errno(status);
......
......@@ -521,7 +521,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
if (mark_unwritten)
flags = OCFS2_EXT_UNWRITTEN;
free_extents = ocfs2_num_free_extents(osb, inode, fe_bh);
free_extents = ocfs2_num_free_extents(osb, inode, fe_bh,
OCFS2_DINODE_EXTENT);
if (free_extents < 0) {
status = free_extents;
mlog_errno(status);
......@@ -570,7 +571,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
*logical_offset, block, num_bits,
flags, meta_ac);
flags, meta_ac, OCFS2_DINODE_EXTENT);
if (status < 0) {
mlog_errno(status);
goto leave;
......@@ -599,92 +600,6 @@ leave:
return status;
}
/*
* For a given allocation, determine which allocators will need to be
* accessed, and lock them, reserving the appropriate number of bits.
*
* Sparse file systems call this from ocfs2_write_begin_nolock()
* and ocfs2_allocate_unwritten_extents().
*
* File systems which don't support holes call this from
* ocfs2_extend_allocation().
*/
int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *di_bh,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac)
{
int ret = 0, num_free_extents;
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
*meta_ac = NULL;
if (data_ac)
*data_ac = NULL;
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
"clusters_to_add = %u, extents_to_split = %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode),
le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
num_free_extents = ocfs2_num_free_extents(osb, inode, di_bh);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
goto out;
}
/*
* Sparse allocation file systems need to be more conservative
* with reserving room for expansion - the actual allocation
* happens while we've got a journal handle open so re-taking
* a cluster lock (because we ran out of room for another
* extent) will violate ordering rules.
*
* Most of the time we'll only be seeing this 1 cluster at a time
* anyway.
*
* Always lock for any unwritten extents - we might want to
* add blocks during a split.
*/
if (!num_free_extents ||
(ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
ret = ocfs2_reserve_new_metadata(osb, &di->id2.i_list, meta_ac);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
}
if (clusters_to_add == 0)
goto out;
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
out:
if (ret) {
if (*meta_ac) {
ocfs2_free_alloc_context(*meta_ac);
*meta_ac = NULL;
}
/*
* We cannot have an error and a non null *data_ac.
*/
}
return ret;
}
static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
u32 clusters_to_add, int mark_unwritten)
{
......@@ -725,7 +640,13 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
restart_all:
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
status = ocfs2_lock_allocators(inode, bh, clusters_to_add, 0, &data_ac,
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
"clusters_to_add = %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
clusters_to_add);
status = ocfs2_lock_allocators(inode, bh, &fe->id2.i_list,
clusters_to_add, 0, &data_ac,
&meta_ac);
if (status) {
mlog_errno(status);
......@@ -1397,7 +1318,8 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
struct ocfs2_alloc_context *meta_ac = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
ret = ocfs2_lock_allocators(inode, di_bh, 0, 1, NULL, &meta_ac);
ret = ocfs2_lock_allocators(inode, di_bh, &di->id2.i_list,
0, 1, NULL, &meta_ac);
if (ret) {
mlog_errno(ret);
return ret;
......@@ -1428,7 +1350,7 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
}
ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac,
dealloc);
dealloc, OCFS2_DINODE_EXTENT);
if (ret) {
mlog_errno(ret);
goto out_commit;
......
......@@ -57,10 +57,6 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *fe,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
......
......@@ -1894,3 +1894,85 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
(unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
}
}
/*
* For a given allocation, determine which allocators will need to be
* accessed, and lock them, reserving the appropriate number of bits.
*
* Sparse file systems call this from ocfs2_write_begin_nolock()
* and ocfs2_allocate_unwritten_extents().
*
* File systems which don't support holes call this from
* ocfs2_extend_allocation().
*/
int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh,
struct ocfs2_extent_list *root_el,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac)
{
int ret = 0, num_free_extents;
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
*meta_ac = NULL;
if (data_ac)
*data_ac = NULL;
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
num_free_extents = ocfs2_num_free_extents(osb, inode, root_bh,
OCFS2_DINODE_EXTENT);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
goto out;
}
/*
* Sparse allocation file systems need to be more conservative
* with reserving room for expansion - the actual allocation
* happens while we've got a journal handle open so re-taking
* a cluster lock (because we ran out of room for another
* extent) will violate ordering rules.
*
* Most of the time we'll only be seeing this 1 cluster at a time
* anyway.
*
* Always lock for any unwritten extents - we might want to
* add blocks during a split.
*/
if (!num_free_extents ||
(ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
ret = ocfs2_reserve_new_metadata(osb, root_el, meta_ac);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
}
if (clusters_to_add == 0)
goto out;
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
if (ret < 0) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
out:
if (ret) {
if (*meta_ac) {
ocfs2_free_alloc_context(*meta_ac);
*meta_ac = NULL;
}
/*
* We cannot have an error and a non null *data_ac.
*/
}
return ret;
}
......@@ -162,4 +162,9 @@ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
struct ocfs2_group_desc *gd);
int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh,
struct ocfs2_extent_list *root_el,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac);
#endif /* _CHAINALLOC_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment