Commit b517bea1 authored by Zach Brown's avatar Zach Brown Committed by Linus Torvalds

[PATCH] 64-bit jbd2 core

Here is the patch to JBD to handle 64 bit block numbers, originally from Zach
Brown.  This patch is useful only after adding support for 64-bit block
numbers in the filesystem.
Signed-off-by: default avatarBadari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: default avatarZach Brown <zach.brown@oracle.com>
Signed-off-by: default avatarDave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent d0d856e8
...@@ -271,6 +271,14 @@ write_out_data: ...@@ -271,6 +271,14 @@ write_out_data:
journal_do_submit_data(wbuf, bufs); journal_do_submit_data(wbuf, bufs);
} }
static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
sector_t block)
{
tag->t_blocknr = cpu_to_be32(block & (u32)~0);
if (tag_bytes > JBD_TAG_SIZE32)
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
}
/* /*
* jbd2_journal_commit_transaction * jbd2_journal_commit_transaction
* *
...@@ -293,6 +301,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -293,6 +301,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
int first_tag = 0; int first_tag = 0;
int tag_flag; int tag_flag;
int i; int i;
int tag_bytes = journal_tag_bytes(journal);
/* /*
* First job: lock down the current transaction and wait for * First job: lock down the current transaction and wait for
...@@ -597,10 +606,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -597,10 +606,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tag_flag |= JBD2_FLAG_SAME_UUID; tag_flag |= JBD2_FLAG_SAME_UUID;
tag = (journal_block_tag_t *) tagp; tag = (journal_block_tag_t *) tagp;
tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr); write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
tag->t_flags = cpu_to_be32(tag_flag); tag->t_flags = cpu_to_be32(tag_flag);
tagp += sizeof(journal_block_tag_t); tagp += tag_bytes;
space_left -= sizeof(journal_block_tag_t); space_left -= tag_bytes;
if (first_tag) { if (first_tag) {
memcpy (tagp, journal->j_uuid, 16); memcpy (tagp, journal->j_uuid, 16);
...@@ -614,7 +623,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -614,7 +623,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (bufs == journal->j_wbufsize || if (bufs == journal->j_wbufsize ||
commit_transaction->t_buffers == NULL || commit_transaction->t_buffers == NULL ||
space_left < sizeof(journal_block_tag_t) + 16) { space_left < tag_bytes + 16) {
jbd_debug(4, "JBD: Submit %d IOs\n", bufs); jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
......
...@@ -1609,6 +1609,17 @@ int jbd2_journal_blocks_per_page(struct inode *inode) ...@@ -1609,6 +1609,17 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
} }
/*
* helper functions to deal with 32 or 64bit block numbers.
*/
size_t journal_tag_bytes(journal_t *journal)
{
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
return JBD_TAG_SIZE64;
else
return JBD_TAG_SIZE32;
}
/* /*
* Simple support for retrying memory allocations. Introduced to help to * Simple support for retrying memory allocations. Introduced to help to
* debug different VM deadlock avoidance strategies. * debug different VM deadlock avoidance strategies.
......
...@@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal, ...@@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
* Count the number of in-use tags in a journal descriptor block. * Count the number of in-use tags in a journal descriptor block.
*/ */
static int count_tags(struct buffer_head *bh, int size) static int count_tags(journal_t *journal, struct buffer_head *bh)
{ {
char * tagp; char * tagp;
journal_block_tag_t * tag; journal_block_tag_t * tag;
int nr = 0; int nr = 0, size = journal->j_blocksize;
int tag_bytes = journal_tag_bytes(journal);
tagp = &bh->b_data[sizeof(journal_header_t)]; tagp = &bh->b_data[sizeof(journal_header_t)];
while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) { while ((tagp - bh->b_data + tag_bytes) <= size) {
tag = (journal_block_tag_t *) tagp; tag = (journal_block_tag_t *) tagp;
nr++; nr++;
tagp += sizeof(journal_block_tag_t); tagp += tag_bytes;
if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
tagp += 16; tagp += 16;
...@@ -307,6 +308,14 @@ int jbd2_journal_skip_recovery(journal_t *journal) ...@@ -307,6 +308,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
return err; return err;
} }
static inline sector_t read_tag_block(int tag_bytes, journal_block_tag_t *tag)
{
sector_t block = be32_to_cpu(tag->t_blocknr);
if (tag_bytes > JBD_TAG_SIZE32)
block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
return block;
}
static int do_one_pass(journal_t *journal, static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass) struct recovery_info *info, enum passtype pass)
{ {
...@@ -318,11 +327,12 @@ static int do_one_pass(journal_t *journal, ...@@ -318,11 +327,12 @@ static int do_one_pass(journal_t *journal,
struct buffer_head * bh; struct buffer_head * bh;
unsigned int sequence; unsigned int sequence;
int blocktype; int blocktype;
int tag_bytes = journal_tag_bytes(journal);
/* Precompute the maximum metadata descriptors in a descriptor block */ /* Precompute the maximum metadata descriptors in a descriptor block */
int MAX_BLOCKS_PER_DESC; int MAX_BLOCKS_PER_DESC;
MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
/ sizeof(journal_block_tag_t)); / tag_bytes);
/* /*
* First thing is to establish what we expect to find in the log * First thing is to establish what we expect to find in the log
...@@ -412,8 +422,7 @@ static int do_one_pass(journal_t *journal, ...@@ -412,8 +422,7 @@ static int do_one_pass(journal_t *journal,
* in pass REPLAY; otherwise, just skip over the * in pass REPLAY; otherwise, just skip over the
* blocks it describes. */ * blocks it describes. */
if (pass != PASS_REPLAY) { if (pass != PASS_REPLAY) {
next_log_block += next_log_block += count_tags(journal, bh);
count_tags(bh, journal->j_blocksize);
wrap(journal, next_log_block); wrap(journal, next_log_block);
brelse(bh); brelse(bh);
continue; continue;
...@@ -424,7 +433,7 @@ static int do_one_pass(journal_t *journal, ...@@ -424,7 +433,7 @@ static int do_one_pass(journal_t *journal,
* getting done here! */ * getting done here! */
tagp = &bh->b_data[sizeof(journal_header_t)]; tagp = &bh->b_data[sizeof(journal_header_t)];
while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) while ((tagp - bh->b_data + tag_bytes)
<= journal->j_blocksize) { <= journal->j_blocksize) {
unsigned long io_block; unsigned long io_block;
...@@ -446,7 +455,8 @@ static int do_one_pass(journal_t *journal, ...@@ -446,7 +455,8 @@ static int do_one_pass(journal_t *journal,
unsigned long blocknr; unsigned long blocknr;
J_ASSERT(obh != NULL); J_ASSERT(obh != NULL);
blocknr = be32_to_cpu(tag->t_blocknr); blocknr = read_tag_block(tag_bytes,
tag);
/* If the block has been /* If the block has been
* revoked, then we're all done * revoked, then we're all done
...@@ -494,7 +504,7 @@ static int do_one_pass(journal_t *journal, ...@@ -494,7 +504,7 @@ static int do_one_pass(journal_t *journal,
} }
skip_write: skip_write:
tagp += sizeof(journal_block_tag_t); tagp += tag_bytes;
if (!(flags & JBD2_FLAG_SAME_UUID)) if (!(flags & JBD2_FLAG_SAME_UUID))
tagp += 16; tagp += 16;
...@@ -572,17 +582,24 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, ...@@ -572,17 +582,24 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
{ {
jbd2_journal_revoke_header_t *header; jbd2_journal_revoke_header_t *header;
int offset, max; int offset, max;
int record_len = 4;
header = (jbd2_journal_revoke_header_t *) bh->b_data; header = (jbd2_journal_revoke_header_t *) bh->b_data;
offset = sizeof(jbd2_journal_revoke_header_t); offset = sizeof(jbd2_journal_revoke_header_t);
max = be32_to_cpu(header->r_count); max = be32_to_cpu(header->r_count);
while (offset < max) { if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
record_len = 8;
while (offset + record_len <= max) {
unsigned long blocknr; unsigned long blocknr;
int err; int err;
if (record_len == 4)
blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
offset += 4; else
blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
offset += record_len;
err = jbd2_journal_set_revoke(journal, blocknr, sequence); err = jbd2_journal_set_revoke(journal, blocknr, sequence);
if (err) if (err)
return err; return err;
......
...@@ -584,9 +584,17 @@ static void write_one_revoke_record(journal_t *journal, ...@@ -584,9 +584,17 @@ static void write_one_revoke_record(journal_t *journal,
*descriptorp = descriptor; *descriptorp = descriptor;
} }
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
* ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) =
cpu_to_be64(record->blocknr);
offset += 8;
} else {
* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
cpu_to_be32(record->blocknr); cpu_to_be32(record->blocknr);
offset += 4; offset += 4;
}
*offsetp = offset; *offsetp = offset;
} }
......
...@@ -150,14 +150,21 @@ typedef struct journal_header_s ...@@ -150,14 +150,21 @@ typedef struct journal_header_s
/* /*
* The block tag: used to describe a single buffer in the journal * The block tag: used to describe a single buffer in the journal.
* t_blocknr_high is only used if INCOMPAT_64BIT is set, so this
* raw struct shouldn't be used for pointer math or sizeof() - use
* journal_tag_bytes(journal) instead to compute this.
*/ */
typedef struct journal_block_tag_s typedef struct journal_block_tag_s
{ {
__be32 t_blocknr; /* The on-disk block number */ __be32 t_blocknr; /* The on-disk block number */
__be32 t_flags; /* See below */ __be32 t_flags; /* See below */
__be32 t_blocknr_high; /* most-significant high 32bits. */
} journal_block_tag_t; } journal_block_tag_t;
#define JBD_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t))
/* /*
* The revoke descriptor: used on disk to describe a series of blocks to * The revoke descriptor: used on disk to describe a series of blocks to
* be revoked from the log * be revoked from the log
...@@ -235,11 +242,13 @@ typedef struct journal_superblock_s ...@@ -235,11 +242,13 @@ typedef struct journal_superblock_s
((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
/* Features known to this kernel version: */ /* Features known to this kernel version: */
#define JBD2_KNOWN_COMPAT_FEATURES 0 #define JBD2_KNOWN_COMPAT_FEATURES 0
#define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_ROCOMPAT_FEATURES 0
#define JBD2_KNOWN_INCOMPAT_FEATURES JBD2_FEATURE_INCOMPAT_REVOKE #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \
JBD2_FEATURE_INCOMPAT_64BIT)
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -1052,6 +1061,7 @@ static inline int tid_geq(tid_t x, tid_t y) ...@@ -1052,6 +1061,7 @@ static inline int tid_geq(tid_t x, tid_t y)
} }
extern int jbd2_journal_blocks_per_page(struct inode *inode); extern int jbd2_journal_blocks_per_page(struct inode *inode);
extern size_t journal_tag_bytes(journal_t *journal);
/* /*
* Return the minimum number of blocks which must be free in the journal * Return the minimum number of blocks which must be free in the journal
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment