Commit f8628a14 authored by Andreas Dilger's avatar Andreas Dilger Committed by Theodore Ts'o

ext4: Remove 65000 subdirectory limit

This patch adds support to ext4 for allowing more than 65000
subdirectories. Currently the maximum number of subdirectories is capped
at 32000.

If we exceed 65000 subdirectories in an htree directory it sets the
inode link count to 1 and no longer counts subdirectories.  The
directory link count is not actually used when determining if a
directory is empty, as that only counts subdirectories and not regular
files that might be in there. 

A EXT4_FEATURE_RO_COMPAT_DIR_NLINK flag has been added and it is set if
the subdir count for any directory crosses 65000. A later fsck will clear
EXT4_FEATURE_RO_COMPAT_DIR_NLINK if there are no longer any directory
with >65000 subdirs.
Signed-off-by: default avatarAndreas Dilger <adilger@clusterfs.com>
Signed-off-by: default avatarKalpak Shah <kalpak@clusterfs.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 6dd4ee7c
...@@ -1629,6 +1629,35 @@ static int ext4_delete_entry (handle_t *handle, ...@@ -1629,6 +1629,35 @@ static int ext4_delete_entry (handle_t *handle,
return -ENOENT; return -ENOENT;
} }
/*
* DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
* since this indicates that nlinks count was previously 1.
*/
static void ext4_inc_count(handle_t *handle, struct inode *inode)
{
inc_nlink(inode);
if (is_dx(inode) && inode->i_nlink > 1) {
/* limit is 16-bit i_links_count */
if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
inode->i_nlink = 1;
EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
}
}
}
/*
* If a directory had nlink == 1, then we should let it be 1. This indicates
* directory has >EXT4_LINK_MAX subdirs.
*/
static void ext4_dec_count(handle_t *handle, struct inode *inode)
{
drop_nlink(inode);
if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
inc_nlink(inode);
}
static int ext4_add_nondir(handle_t *handle, static int ext4_add_nondir(handle_t *handle,
struct dentry *dentry, struct inode *inode) struct dentry *dentry, struct inode *inode)
{ {
...@@ -1725,7 +1754,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) ...@@ -1725,7 +1754,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
struct ext4_dir_entry_2 * de; struct ext4_dir_entry_2 * de;
int err, retries = 0; int err, retries = 0;
if (dir->i_nlink >= EXT4_LINK_MAX) if (EXT4_DIR_LINK_MAX(dir))
return -EMLINK; return -EMLINK;
retry: retry:
...@@ -1748,7 +1777,7 @@ retry: ...@@ -1748,7 +1777,7 @@ retry:
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
dir_block = ext4_bread (handle, inode, 0, 1, &err); dir_block = ext4_bread (handle, inode, 0, 1, &err);
if (!dir_block) { if (!dir_block) {
drop_nlink(inode); /* is this nlink == 0? */ ext4_dec_count(handle, inode); /* is this nlink == 0? */
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
iput (inode); iput (inode);
goto out_stop; goto out_stop;
...@@ -1780,7 +1809,7 @@ retry: ...@@ -1780,7 +1809,7 @@ retry:
iput (inode); iput (inode);
goto out_stop; goto out_stop;
} }
inc_nlink(dir); ext4_inc_count(handle, dir);
ext4_update_dx_flag(dir); ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir); ext4_mark_inode_dirty(handle, dir);
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
...@@ -2045,9 +2074,9 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) ...@@ -2045,9 +2074,9 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
retval = ext4_delete_entry(handle, dir, de, bh); retval = ext4_delete_entry(handle, dir, de, bh);
if (retval) if (retval)
goto end_rmdir; goto end_rmdir;
if (inode->i_nlink != 2) if (!EXT4_DIR_LINK_EMPTY(inode))
ext4_warning (inode->i_sb, "ext4_rmdir", ext4_warning (inode->i_sb, "ext4_rmdir",
"empty directory has nlink!=2 (%d)", "empty directory has too many links (%d)",
inode->i_nlink); inode->i_nlink);
inode->i_version++; inode->i_version++;
clear_nlink(inode); clear_nlink(inode);
...@@ -2058,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) ...@@ -2058,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
ext4_orphan_add(handle, inode); ext4_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
drop_nlink(dir); ext4_dec_count(handle, dir);
ext4_update_dx_flag(dir); ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir); ext4_mark_inode_dirty(handle, dir);
...@@ -2109,7 +2138,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) ...@@ -2109,7 +2138,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
dir->i_ctime = dir->i_mtime = ext4_current_time(dir); dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir); ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir); ext4_mark_inode_dirty(handle, dir);
drop_nlink(inode); ext4_dec_count(handle, inode);
if (!inode->i_nlink) if (!inode->i_nlink)
ext4_orphan_add(handle, inode); ext4_orphan_add(handle, inode);
inode->i_ctime = ext4_current_time(inode); inode->i_ctime = ext4_current_time(inode);
...@@ -2159,7 +2188,7 @@ retry: ...@@ -2159,7 +2188,7 @@ retry:
err = __page_symlink(inode, symname, l, err = __page_symlink(inode, symname, l,
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) { if (err) {
drop_nlink(inode); ext4_dec_count(handle, inode);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
iput (inode); iput (inode);
goto out_stop; goto out_stop;
...@@ -2185,8 +2214,9 @@ static int ext4_link (struct dentry * old_dentry, ...@@ -2185,8 +2214,9 @@ static int ext4_link (struct dentry * old_dentry,
struct inode *inode = old_dentry->d_inode; struct inode *inode = old_dentry->d_inode;
int err, retries = 0; int err, retries = 0;
if (inode->i_nlink >= EXT4_LINK_MAX) if (EXT4_DIR_LINK_MAX(inode))
return -EMLINK; return -EMLINK;
/* /*
* Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
* otherwise has the potential to corrupt the orphan inode list. * otherwise has the potential to corrupt the orphan inode list.
...@@ -2204,7 +2234,7 @@ retry: ...@@ -2204,7 +2234,7 @@ retry:
handle->h_sync = 1; handle->h_sync = 1;
inode->i_ctime = ext4_current_time(inode); inode->i_ctime = ext4_current_time(inode);
inc_nlink(inode); ext4_inc_count(handle, inode);
atomic_inc(&inode->i_count); atomic_inc(&inode->i_count);
err = ext4_add_nondir(handle, dentry, inode); err = ext4_add_nondir(handle, dentry, inode);
...@@ -2337,7 +2367,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, ...@@ -2337,7 +2367,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
} }
if (new_inode) { if (new_inode) {
drop_nlink(new_inode); ext4_dec_count(handle, new_inode);
new_inode->i_ctime = ext4_current_time(new_inode); new_inode->i_ctime = ext4_current_time(new_inode);
} }
old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
...@@ -2348,11 +2378,13 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, ...@@ -2348,11 +2378,13 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata"); BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
ext4_journal_dirty_metadata(handle, dir_bh); ext4_journal_dirty_metadata(handle, dir_bh);
drop_nlink(old_dir); ext4_dec_count(handle, old_dir);
if (new_inode) { if (new_inode) {
drop_nlink(new_inode); /* checked empty_dir above, can't have another parent,
* ext3_dec_count() won't work for many-linked dirs */
new_inode->i_nlink = 0;
} else { } else {
inc_nlink(new_dir); ext4_inc_count(handle, new_dir);
ext4_update_dx_flag(new_dir); ext4_update_dx_flag(new_dir);
ext4_mark_inode_dirty(handle, new_dir); ext4_mark_inode_dirty(handle, new_dir);
} }
......
...@@ -71,7 +71,7 @@ ...@@ -71,7 +71,7 @@
/* /*
* Maximal count of links to a file * Maximal count of links to a file
*/ */
#define EXT4_LINK_MAX 32000 #define EXT4_LINK_MAX 65000
/* /*
* Macro-instructions used to manage several block sizes * Macro-instructions used to manage several block sizes
...@@ -692,6 +692,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ...@@ -692,6 +692,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
...@@ -710,6 +711,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ...@@ -710,6 +711,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_INCOMPAT_64BIT) EXT4_FEATURE_INCOMPAT_64BIT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR) EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment