Commit aa958874 authored by Mark Fasheh's avatar Mark Fasheh

ocfs2: implement directory read-ahead

Uptodate.c now knows about read-ahead buffers. Use some more aggressive
logic in ocfs2_readdir().

The two functions which currently use directory read-ahead are
ocfs2_find_entry() and ocfs2_readdir().
Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent e0b4096d
...@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, ...@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
(unsigned long long)block, nr, flags, inode); (unsigned long long)block, nr, flags, inode);
BUG_ON((flags & OCFS2_BH_READAHEAD) &&
(!inode || !(flags & OCFS2_BH_CACHED)));
if (osb == NULL || osb->sb == NULL || bhs == NULL) { if (osb == NULL || osb->sb == NULL || bhs == NULL) {
status = -EINVAL; status = -EINVAL;
mlog_errno(status); mlog_errno(status);
...@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, ...@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
bh = bhs[i]; bh = bhs[i];
ignore_cache = 0; ignore_cache = 0;
/* There are three read-ahead cases here which we need to
* be concerned with. All three assume a buffer has
* previously been submitted with OCFS2_BH_READAHEAD
* and it hasn't yet completed I/O.
*
* 1) The current request is sync to disk. This rarely
* happens these days, and never when performance
* matters - the code can just wait on the buffer
* lock and re-submit.
*
* 2) The current request is cached, but not
* readahead. ocfs2_buffer_uptodate() will return
* false anyway, so we'll wind up waiting on the
* buffer lock to do I/O. We re-check the request
* with after getting the lock to avoid a re-submit.
*
* 3) The current request is readahead (and so must
* also be a caching one). We short circuit if the
* buffer is locked (under I/O) and if it's in the
* uptodate cache. The re-check from #2 catches the
* case that the previous read-ahead completes just
* before our is-it-in-flight check.
*/
if (flags & OCFS2_BH_CACHED && if (flags & OCFS2_BH_CACHED &&
!ocfs2_buffer_uptodate(inode, bh)) { !ocfs2_buffer_uptodate(inode, bh)) {
mlog(ML_UPTODATE, mlog(ML_UPTODATE,
...@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, ...@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
continue; continue;
} }
/* A read-ahead request was made - if the
* buffer is already under read-ahead from a
* previously submitted request than we are
* done here. */
if ((flags & OCFS2_BH_READAHEAD)
&& ocfs2_buffer_read_ahead(inode, bh))
continue;
lock_buffer(bh); lock_buffer(bh);
if (buffer_jbd(bh)) { if (buffer_jbd(bh)) {
#ifdef CATCH_BH_JBD_RACES #ifdef CATCH_BH_JBD_RACES
...@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, ...@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
continue; continue;
#endif #endif
} }
/* Re-check ocfs2_buffer_uptodate() as a
* previously read-ahead buffer may have
* completed I/O while we were waiting for the
* buffer lock. */
if ((flags & OCFS2_BH_CACHED)
&& !(flags & OCFS2_BH_READAHEAD)
&& ocfs2_buffer_uptodate(inode, bh)) {
unlock_buffer(bh);
continue;
}
clear_buffer_uptodate(bh); clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */ get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync; bh->b_end_io = end_buffer_read_sync;
if (flags & OCFS2_BH_READAHEAD) submit_bh(READ, bh);
submit_bh(READA, bh);
else
submit_bh(READ, bh);
continue; continue;
} }
} }
...@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, ...@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
for (i = (nr - 1); i >= 0; i--) { for (i = (nr - 1); i >= 0; i--) {
bh = bhs[i]; bh = bhs[i];
/* We know this can't have changed as we hold the if (!(flags & OCFS2_BH_READAHEAD)) {
* inode sem. Avoid doing any work on the bh if the /* We know this can't have changed as we hold the
* journal has it. */ * inode sem. Avoid doing any work on the bh if the
if (!buffer_jbd(bh)) * journal has it. */
wait_on_buffer(bh); if (!buffer_jbd(bh))
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* Status won't be cleared from here on out, if (!buffer_uptodate(bh)) {
* so we can safely record this and loop back /* Status won't be cleared from here on out,
* to cleanup the other buffers. Don't need to * so we can safely record this and loop back
* remove the clustered uptodate information * to cleanup the other buffers. Don't need to
* for this bh as it's not marked locally * remove the clustered uptodate information
* uptodate. */ * for this bh as it's not marked locally
status = -EIO; * uptodate. */
brelse(bh); status = -EIO;
bhs[i] = NULL; brelse(bh);
continue; bhs[i] = NULL;
continue;
}
} }
/* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet
* completed. */
if (inode) if (inode)
ocfs2_set_buffer_uptodate(inode, bh); ocfs2_set_buffer_uptodate(inode, bh);
} }
if (inode) if (inode)
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
(unsigned long long)block, nr, (unsigned long long)block, nr,
(!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
bail: bail:
......
...@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, ...@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb,
#define OCFS2_BH_CACHED 1 #define OCFS2_BH_CACHED 1
#define OCFS2_BH_READAHEAD 8 /* use this to pass READA down to submit_bh */ #define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
struct buffer_head **bh, int flags, struct buffer_head **bh, int flags,
......
...@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, ...@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
{ {
int error = 0; int error = 0;
unsigned long offset, blk; unsigned long offset, blk, last_ra_blk = 0;
int i, num, stored; int i, stored;
struct buffer_head * bh, * tmp; struct buffer_head * bh, * tmp;
struct ocfs2_dir_entry * de; struct ocfs2_dir_entry * de;
int err; int err;
struct inode *inode = filp->f_dentry->d_inode; struct inode *inode = filp->f_dentry->d_inode;
struct super_block * sb = inode->i_sb; struct super_block * sb = inode->i_sb;
int have_disk_lock = 0; unsigned int ra_sectors = 16;
mlog_entry("dirino=%llu\n", mlog_entry("dirino=%llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno); (unsigned long long)OCFS2_I(inode)->ip_blkno);
...@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) ...@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
mlog_errno(error); mlog_errno(error);
/* we haven't got any yet, so propagate the error. */ /* we haven't got any yet, so propagate the error. */
stored = error; stored = error;
goto bail; goto bail_nolock;
} }
have_disk_lock = 1;
offset = filp->f_pos & (sb->s_blocksize - 1); offset = filp->f_pos & (sb->s_blocksize - 1);
...@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) ...@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
continue; continue;
} }
/* /* The idea here is to begin with 8k read-ahead and to stay
* Do the readahead (8k) * 4k ahead of our current position.
*/ *
if (!offset) { * TODO: Use the pagecache for this. We just need to
for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0; * make sure it's cluster-safe... */
if (!last_ra_blk
|| (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
i > 0; i--) { i > 0; i--) {
tmp = ocfs2_bread(inode, ++blk, &err, 1); tmp = ocfs2_bread(inode, ++blk, &err, 1);
if (tmp) if (tmp)
brelse(tmp); brelse(tmp);
} }
last_ra_blk = blk;
ra_sectors = 8;
} }
revalidate: revalidate:
...@@ -194,9 +198,9 @@ revalidate: ...@@ -194,9 +198,9 @@ revalidate:
stored = 0; stored = 0;
bail: bail:
if (have_disk_lock) ocfs2_meta_unlock(inode, 0);
ocfs2_meta_unlock(inode, 0);
bail_nolock:
mlog_exit(stored); mlog_exit(stored);
return stored; return stored;
......
...@@ -1050,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode, ...@@ -1050,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
u64 p_blkno; u64 p_blkno;
int readflags = OCFS2_BH_CACHED; int readflags = OCFS2_BH_CACHED;
#if 0
/* only turn this on if we know we can deal with read_block
* returning nothing */
if (reada) if (reada)
readflags |= OCFS2_BH_READAHEAD; readflags |= OCFS2_BH_READAHEAD;
#endif
if (((u64)block << inode->i_sb->s_blocksize_bits) >= if (((u64)block << inode->i_sb->s_blocksize_bits) >=
i_size_read(inode)) { i_size_read(inode)) {
......
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include "journal.h" #include "journal.h"
#include "namei.h" #include "namei.h"
#include "suballoc.h" #include "suballoc.h"
#include "super.h"
#include "symlink.h" #include "symlink.h"
#include "sysfile.h" #include "sysfile.h"
#include "uptodate.h" #include "uptodate.h"
...@@ -1962,13 +1963,8 @@ restart: ...@@ -1962,13 +1963,8 @@ restart:
} }
num++; num++;
/* XXX: questionable readahead stuff here */
bh = ocfs2_bread(dir, b++, &err, 1); bh = ocfs2_bread(dir, b++, &err, 1);
bh_use[ra_max] = bh; bh_use[ra_max] = bh;
#if 0 // ???
if (bh)
ll_rw_block(READ, 1, &bh);
#endif
} }
} }
if ((bh = bh_use[ra_ptr++]) == NULL) if ((bh = bh_use[ra_ptr++]) == NULL)
...@@ -1976,6 +1972,10 @@ restart: ...@@ -1976,6 +1972,10 @@ restart:
wait_on_buffer(bh); wait_on_buffer(bh);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */ /* read error, skip block & hope for the best */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
block);
brelse(bh); brelse(bh);
goto next; goto next;
} }
......
...@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi, ...@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
} }
/* Warning: even if it returns true, this does *not* guarantee that /* Warning: even if it returns true, this does *not* guarantee that
* the block is stored in our inode metadata cache. */ * the block is stored in our inode metadata cache.
*
* This can be called under lock_buffer()
*/
int ocfs2_buffer_uptodate(struct inode *inode, int ocfs2_buffer_uptodate(struct inode *inode,
struct buffer_head *bh) struct buffer_head *bh)
{ {
...@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode, ...@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
return ocfs2_buffer_cached(OCFS2_I(inode), bh); return ocfs2_buffer_cached(OCFS2_I(inode), bh);
} }
/*
* Determine whether a buffer is currently out on a read-ahead request.
* ip_io_sem should be held to serialize submitters with the logic here.
*/
int ocfs2_buffer_read_ahead(struct inode *inode,
struct buffer_head *bh)
{
return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
}
/* Requires ip_lock */ /* Requires ip_lock */
static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
sector_t block) sector_t block)
...@@ -403,7 +416,11 @@ out_free: ...@@ -403,7 +416,11 @@ out_free:
* *
* Note that this function may actually fail to insert the block if * Note that this function may actually fail to insert the block if
* memory cannot be allocated. This is not fatal however (but may * memory cannot be allocated. This is not fatal however (but may
* result in a performance penalty) */ * result in a performance penalty)
*
* Readahead buffers can be passed in here before the I/O request is
* completed.
*/
void ocfs2_set_buffer_uptodate(struct inode *inode, void ocfs2_set_buffer_uptodate(struct inode *inode,
struct buffer_head *bh) struct buffer_head *bh)
{ {
......
...@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode, ...@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
struct buffer_head *bh); struct buffer_head *bh);
void ocfs2_remove_from_cache(struct inode *inode, void ocfs2_remove_from_cache(struct inode *inode,
struct buffer_head *bh); struct buffer_head *bh);
int ocfs2_buffer_read_ahead(struct inode *inode,
struct buffer_head *bh);
#endif /* OCFS2_UPTODATE_H */ #endif /* OCFS2_UPTODATE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment