Commit 9c7af40b authored by Mark Fasheh's avatar Mark Fasheh

ocfs2: throttle back local alloc when low on disk space

Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.

To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.

Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: default avatarMark Fasheh <mfasheh@suse.com>
parent ebcee4b5
...@@ -73,16 +73,51 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, ...@@ -73,16 +73,51 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
struct inode *local_alloc_inode); struct inode *local_alloc_inode);
static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
{
return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
osb->local_alloc_state == OCFS2_LA_ENABLED);
}
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters)
{
spin_lock(&osb->osb_lock);
if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
osb->local_alloc_state == OCFS2_LA_THROTTLED)
if (num_clusters >= osb->local_alloc_default_bits) {
cancel_delayed_work(&osb->la_enable_wq);
osb->local_alloc_state = OCFS2_LA_ENABLED;
}
spin_unlock(&osb->osb_lock);
}
void ocfs2_la_enable_worker(struct work_struct *work)
{
struct ocfs2_super *osb =
container_of(work, struct ocfs2_super,
la_enable_wq.work);
spin_lock(&osb->osb_lock);
osb->local_alloc_state = OCFS2_LA_ENABLED;
spin_unlock(&osb->osb_lock);
}
/* /*
* Tell us whether a given allocation should use the local alloc * Tell us whether a given allocation should use the local alloc
* file. Otherwise, it has to go to the main bitmap. * file. Otherwise, it has to go to the main bitmap.
*
* This function does semi-dirty reads of local alloc size and state!
* This is ok however, as the values are re-checked once under mutex.
*/ */
int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
{ {
int la_bits = osb->local_alloc_bits;
int ret = 0; int ret = 0;
int la_bits;
spin_lock(&osb->osb_lock);
la_bits = osb->local_alloc_bits;
if (osb->local_alloc_state != OCFS2_LA_ENABLED) if (!ocfs2_la_state_enabled(osb))
goto bail; goto bail;
/* la_bits should be at least twice the size (in clusters) of /* la_bits should be at least twice the size (in clusters) of
...@@ -96,6 +131,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) ...@@ -96,6 +131,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
bail: bail:
mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
spin_unlock(&osb->osb_lock);
return ret; return ret;
} }
...@@ -208,6 +244,9 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) ...@@ -208,6 +244,9 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
mlog_entry_void(); mlog_entry_void();
cancel_delayed_work(&osb->la_enable_wq);
flush_workqueue(ocfs2_wq);
if (osb->local_alloc_state == OCFS2_LA_UNUSED) if (osb->local_alloc_state == OCFS2_LA_UNUSED)
goto out; goto out;
...@@ -445,7 +484,7 @@ out: ...@@ -445,7 +484,7 @@ out:
} }
/* /*
* make sure we've got at least bitswanted contiguous bits in the * make sure we've got at least bits_wanted contiguous bits in the
* local alloc. You lose them when you drop i_mutex. * local alloc. You lose them when you drop i_mutex.
* *
* We will add ourselves to the transaction passed in, but may start * We will add ourselves to the transaction passed in, but may start
...@@ -476,16 +515,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, ...@@ -476,16 +515,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
mutex_lock(&local_alloc_inode->i_mutex); mutex_lock(&local_alloc_inode->i_mutex);
if (osb->local_alloc_state != OCFS2_LA_ENABLED) { /*
status = -ENOSPC; * We must double check state and allocator bits because
goto bail; * another process may have changed them while holding i_mutex.
} */
spin_lock(&osb->osb_lock);
if (bits_wanted > osb->local_alloc_bits) { if (!ocfs2_la_state_enabled(osb) ||
mlog(0, "Asking for more than my max window size!\n"); (bits_wanted > osb->local_alloc_bits)) {
spin_unlock(&osb->osb_lock);
status = -ENOSPC; status = -ENOSPC;
goto bail; goto bail;
} }
spin_unlock(&osb->osb_lock);
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
...@@ -513,6 +554,21 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, ...@@ -513,6 +554,21 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
mlog_errno(status); mlog_errno(status);
goto bail; goto bail;
} }
/*
* Under certain conditions, the window slide code
* might have reduced the number of bits available or
* disabled the the local alloc entirely. Re-check
* here and return -ENOSPC if necessary.
*/
status = -ENOSPC;
if (!ocfs2_la_state_enabled(osb))
goto bail;
free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
le32_to_cpu(alloc->id1.bitmap1.i_used);
if (bits_wanted > free_bits)
goto bail;
} }
ac->ac_inode = local_alloc_inode; ac->ac_inode = local_alloc_inode;
...@@ -780,6 +836,85 @@ bail: ...@@ -780,6 +836,85 @@ bail:
return status; return status;
} }
enum ocfs2_la_event {
OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
* enough bits theoretically
* free, but a contiguous
* allocation could not be
* found. */
OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
* enough bits free to satisfy
* our request. */
};
#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
/*
* Given an event, calculate the size of our next local alloc window.
*
* This should always be called under i_mutex of the local alloc inode
* so that local alloc disabling doesn't race with processes trying to
* use the allocator.
*
* Returns the state which the local alloc was left in. This value can
* be ignored by some paths.
*/
static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
enum ocfs2_la_event event)
{
unsigned int bits;
int state;
spin_lock(&osb->osb_lock);
if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
goto out_unlock;
}
/*
* ENOSPC and fragmentation are treated similarly for now.
*/
if (event == OCFS2_LA_EVENT_ENOSPC ||
event == OCFS2_LA_EVENT_FRAGMENTED) {
/*
* We ran out of contiguous space in the primary
* bitmap. Drastically reduce the number of bits used
* by local alloc until we have to disable it.
*/
bits = osb->local_alloc_bits >> 1;
if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
/*
* By setting state to THROTTLED, we'll keep
* the number of local alloc bits used down
* until an event occurs which would give us
* reason to assume the bitmap situation might
* have changed.
*/
osb->local_alloc_state = OCFS2_LA_THROTTLED;
osb->local_alloc_bits = bits;
} else {
osb->local_alloc_state = OCFS2_LA_DISABLED;
}
queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
OCFS2_LA_ENABLE_INTERVAL);
goto out_unlock;
}
/*
* Don't increase the size of the local alloc window until we
* know we might be able to fulfill the request. Otherwise, we
* risk bouncing around the global bitmap during periods of
* low space.
*/
if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
osb->local_alloc_bits = osb->local_alloc_default_bits;
out_unlock:
state = osb->local_alloc_state;
spin_unlock(&osb->osb_lock);
return state;
}
static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
struct ocfs2_alloc_context **ac, struct ocfs2_alloc_context **ac,
struct inode **bitmap_inode, struct inode **bitmap_inode,
...@@ -794,11 +929,20 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, ...@@ -794,11 +929,20 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
goto bail; goto bail;
} }
retry_enospc:
(*ac)->ac_bits_wanted = osb->local_alloc_bits; (*ac)->ac_bits_wanted = osb->local_alloc_bits;
status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
if (status == -ENOSPC) {
if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
OCFS2_LA_DISABLED)
goto bail;
ocfs2_free_ac_resource(*ac);
memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
goto retry_enospc;
}
if (status < 0) { if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status); mlog_errno(status);
goto bail; goto bail;
} }
...@@ -852,6 +996,34 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, ...@@ -852,6 +996,34 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
* the more specific cluster api to claim bits. */ * the more specific cluster api to claim bits. */
status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
&cluster_off, &cluster_count); &cluster_off, &cluster_count);
if (status == -ENOSPC) {
retry_enospc:
/*
* Note: We could also try syncing the journal here to
* allow use of any free bits which the current
* transaction can't give us access to. --Mark
*/
if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
OCFS2_LA_DISABLED)
goto bail;
status = ocfs2_claim_clusters(osb, handle, ac,
osb->local_alloc_bits,
&cluster_off,
&cluster_count);
if (status == -ENOSPC)
goto retry_enospc;
/*
* We only shrunk the *minimum* number of in our
* request - it's entirely possible that the allocator
* might give us more than we asked for.
*/
if (status == 0) {
spin_lock(&osb->osb_lock);
osb->local_alloc_bits = cluster_count;
spin_unlock(&osb->osb_lock);
}
}
if (status < 0) { if (status < 0) {
if (status != -ENOSPC) if (status != -ENOSPC)
mlog_errno(status); mlog_errno(status);
...@@ -895,6 +1067,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, ...@@ -895,6 +1067,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
mlog_entry_void(); mlog_entry_void();
ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
/* This will lock the main bitmap for us. */ /* This will lock the main bitmap for us. */
status = ocfs2_local_alloc_reserve_for_window(osb, status = ocfs2_local_alloc_reserve_for_window(osb,
&ac, &ac,
......
...@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, ...@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
u32 *bit_off, u32 *bit_off,
u32 *num_bits); u32 *num_bits);
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters);
void ocfs2_la_enable_worker(struct work_struct *work);
#endif /* OCFS2_LOCALALLOC_H */ #endif /* OCFS2_LOCALALLOC_H */
...@@ -171,9 +171,13 @@ struct ocfs2_alloc_stats ...@@ -171,9 +171,13 @@ struct ocfs2_alloc_stats
enum ocfs2_local_alloc_state enum ocfs2_local_alloc_state
{ {
OCFS2_LA_UNUSED = 0, OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
OCFS2_LA_ENABLED, * this mountpoint. */
OCFS2_LA_DISABLED OCFS2_LA_ENABLED, /* Local alloc is in use. */
OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
* of bits has been reduced. */
OCFS2_LA_DISABLED /* Local alloc has temporarily been
* disabled. */
}; };
enum ocfs2_mount_options enum ocfs2_mount_options
...@@ -252,9 +256,20 @@ struct ocfs2_super ...@@ -252,9 +256,20 @@ struct ocfs2_super
struct ocfs2_journal *journal; struct ocfs2_journal *journal;
unsigned long osb_commit_interval; unsigned long osb_commit_interval;
struct delayed_work la_enable_wq;
/*
* Must hold local alloc i_mutex and osb->osb_lock to change
* local_alloc_bits. Reads can be done under either lock.
*/
unsigned int local_alloc_bits; unsigned int local_alloc_bits;
enum ocfs2_local_alloc_state local_alloc_state; unsigned int local_alloc_default_bits;
enum ocfs2_local_alloc_state local_alloc_state; /* protected
* by osb_lock */
struct buffer_head *local_alloc_bh; struct buffer_head *local_alloc_bh;
u64 la_last_gd; u64 la_last_gd;
/* Next two fields are for local node slot recovery during /* Next two fields are for local node slot recovery during
......
...@@ -111,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, ...@@ -111,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
u64 *bg_blkno, u64 *bg_blkno,
u16 *bg_bit_off); u16 *bg_bit_off);
static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
{ {
struct inode *inode = ac->ac_inode; struct inode *inode = ac->ac_inode;
...@@ -686,15 +686,6 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, ...@@ -686,15 +686,6 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
if ((status < 0) && (status != -ENOSPC)) { if ((status < 0) && (status != -ENOSPC)) {
mlog_errno(status); mlog_errno(status);
goto bail; goto bail;
} else if (status == -ENOSPC) {
/* reserve_local_bits will return enospc with
* the local alloc inode still locked, so we
* can change this safely here. */
mlog(0, "Disabling local alloc\n");
/* We set to OCFS2_LA_DISABLED so that umount
* can clean up what's left of the local
* allocation */
osb->local_alloc_state = OCFS2_LA_DISABLED;
} }
} }
...@@ -1005,6 +996,7 @@ static int ocfs2_cluster_group_search(struct inode *inode, ...@@ -1005,6 +996,7 @@ static int ocfs2_cluster_group_search(struct inode *inode,
int search = -ENOSPC; int search = -ENOSPC;
int ret; int ret;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
u16 tmp_off, tmp_found; u16 tmp_off, tmp_found;
unsigned int max_bits, gd_cluster_off; unsigned int max_bits, gd_cluster_off;
...@@ -1045,6 +1037,12 @@ static int ocfs2_cluster_group_search(struct inode *inode, ...@@ -1045,6 +1037,12 @@ static int ocfs2_cluster_group_search(struct inode *inode,
*bit_off = tmp_off; *bit_off = tmp_off;
*bits_found = tmp_found; *bits_found = tmp_found;
search = 0; /* success */ search = 0; /* success */
} else if (tmp_found) {
/*
* Don't show bits which we'll be returning
* for allocation to the local alloc bitmap.
*/
ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
} }
} }
...@@ -1203,9 +1201,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, ...@@ -1203,9 +1201,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
status = -ENOSPC; status = -ENOSPC;
/* for now, the chain search is a bit simplistic. We just use /* for now, the chain search is a bit simplistic. We just use
* the 1st group with any empty bits. */ * the 1st group with any empty bits. */
while ((status = ac->ac_group_search(alloc_inode, group_bh, while ((status = ac->ac_group_search(alloc_inode, group_bh, bits_wanted,
bits_wanted, min_bits, bit_off, min_bits, bit_off, &tmp_bits)) == -ENOSPC) {
&tmp_bits)) == -ENOSPC) {
if (!bg->bg_next_group) if (!bg->bg_next_group)
break; break;
...@@ -1838,9 +1835,15 @@ int ocfs2_free_clusters(handle_t *handle, ...@@ -1838,9 +1835,15 @@ int ocfs2_free_clusters(handle_t *handle,
status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
bg_start_bit, bg_blkno, bg_start_bit, bg_blkno,
num_clusters); num_clusters);
if (status < 0) if (status < 0) {
mlog_errno(status); mlog_errno(status);
goto out;
}
ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
num_clusters);
out:
mlog_exit(status); mlog_exit(status);
return status; return status;
} }
......
...@@ -147,6 +147,7 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode) ...@@ -147,6 +147,7 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode)
* apis above. */ * apis above. */
int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
struct ocfs2_alloc_context *ac); struct ocfs2_alloc_context *ac);
void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
/* given a cluster offset, calculate which block group it belongs to /* given a cluster offset, calculate which block group it belongs to
* and return that block offset. */ * and return that block offset. */
......
...@@ -637,7 +637,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -637,7 +637,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->s_atime_quantum = parsed_options.atime_quantum; osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot; osb->preferred_slot = parsed_options.slot;
osb->osb_commit_interval = parsed_options.commit_interval; osb->osb_commit_interval = parsed_options.commit_interval;
osb->local_alloc_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
osb->local_alloc_bits = osb->local_alloc_default_bits;
status = ocfs2_verify_userspace_stack(osb, &parsed_options); status = ocfs2_verify_userspace_stack(osb, &parsed_options);
if (status) if (status)
...@@ -1425,6 +1426,7 @@ static int ocfs2_initialize_super(struct super_block *sb, ...@@ -1425,6 +1426,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
osb->local_alloc_state = OCFS2_LA_UNUSED; osb->local_alloc_state = OCFS2_LA_UNUSED;
osb->local_alloc_bh = NULL; osb->local_alloc_bh = NULL;
INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker);
init_waitqueue_head(&osb->osb_mount_event); init_waitqueue_head(&osb->osb_mount_event);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment