From 07660ad33a7d109cced29b6400f99f25adab3f54 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 2 Jul 2015 21:04:45 -0400 Subject: [PATCH] LU-6824 ldiskfs: give warning with dir htree growing Currently without large dir feature, ldiskfs directory hash tree will be limited 2 height, this means directory size is limited about 1GB, and in fact users are likely to hit ENOSPC when reaching half of limit because of bad hash. tested by following scripts. i=0 filename="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbb" while [ 1 ] do touch $filename"$i" if [ $? -ne 0 ];then break fi ((i++)) done When directoy size grow about 590M, we hit ENOSPC. Better way is to add support to e2fsprogs so that we can use large dir feature. As a walkaround way, this patch try to give warning messages to console when 10/16 and 11/16 of limit reach. So this patch will try to give following messages when warning limit or hash index tree limit reach: Directory (inode: 8388610 FID: [0x200000401:0x1:0x0]) has approached maximum limit. Directory (inode: 8388610 FID: [0x200000401:0x1:0x0]) is approaching maximum limit. FID output here is useful for administrators to locate lustre file path Signed-off-by: Wang Shilong Change-Id: I7f78c421bbb89f76298e0174cc46d774ea82eb06 Reviewed-on: http://review.whamcloud.com/15548 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Yang Sheng Reviewed-by: James Simmons --- .../ext4-give-warning-with-dir-htree-growing.patch | 168 +++++++++++++++++++++ .../ext4-give-warning-with-dir-htree-growing.patch | 164 ++++++++++++++++++++ .../series/ldiskfs-2.6-rhel6.5.series | 1 + .../series/ldiskfs-2.6-rhel6.6.series | 1 + .../series/ldiskfs-2.6-rhel6.7.series | 1 + .../series/ldiskfs-3.0-sles11sp3.series | 1 + .../series/ldiskfs-3.10-rhel7.series | 1 + lustre/osd-ldiskfs/osd_compat.c | 9 +- lustre/osd-ldiskfs/osd_handler.c | 53 ++++++- lustre/osd-ldiskfs/osd_internal.h | 6 +- lustre/osd-ldiskfs/osd_oi.c | 2 +- lustre/tests/sanity.sh | 32 +++- 12 files changed, 424 insertions(+), 15 deletions(-) create mode 100644 ldiskfs/kernel_patches/patches/rhel6.5/ext4-give-warning-with-dir-htree-growing.patch create mode 100644 ldiskfs/kernel_patches/patches/rhel7/ext4-give-warning-with-dir-htree-growing.patch diff --git a/ldiskfs/kernel_patches/patches/rhel6.5/ext4-give-warning-with-dir-htree-growing.patch b/ldiskfs/kernel_patches/patches/rhel6.5/ext4-give-warning-with-dir-htree-growing.patch new file mode 100644 index 0000000..7cdebaa --- /dev/null +++ b/ldiskfs/kernel_patches/patches/rhel6.5/ext4-give-warning-with-dir-htree-growing.patch @@ -0,0 +1,168 @@ +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 938487a..47313fd 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1178,6 +1178,7 @@ struct ext4_sb_info { + unsigned int s_mb_group_prealloc; + unsigned int s_max_writeback_mb_bump; + unsigned long s_max_dir_size; ++ unsigned long s_warning_dir_size; + /* where last allocation was done - for stream allocation */ + unsigned long s_mb_last_group; + unsigned long s_mb_last_start; +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 992dc58..57ff920 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -370,11 +370,19 @@ struct ext4_dir_lock_data { + + #define ext4_htree_lock_data(l) ((struct ext4_dir_lock_data *)(l)->lk_private) + #define ext4_find_entry(dir, name, dirent) __ext4_find_entry(dir, name, dirent, NULL) +-#define ext4_add_entry(handle, dentry, inode) __ext4_add_entry(handle, dentry, inode, NULL) +- + /* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */ + #define EXT4_HTREE_NODE_CHANGED (0xcafeULL << 32) + ++inline int ext4_add_entry(handle_t *handle, struct dentry *dentry, ++ struct inode *inode) ++{ ++ int ret = __ext4_add_entry(handle, dentry, inode, NULL); ++ ++ if (ret == -ENOBUFS) ++ ret = 0; ++ return ret; ++} ++ + static void ext4_htree_event_cb(void *target, void *event) + { + u64 *block = (u64 *)target; +@@ -2053,6 +2061,54 @@ int __ext4_add_entry(handle_t *handle, struct dentry *dentry, + } + EXPORT_SYMBOL(__ext4_add_entry); + ++static unsigned long __ext4_max_dir_size(struct dx_frame *frames, ++ struct dx_frame *frame, struct inode *dir) ++{ ++ unsigned long max_dir_size; ++ ++ if (EXT4_SB(dir->i_sb)->s_max_dir_size) { ++ max_dir_size = EXT4_SB(dir->i_sb)->s_max_dir_size; ++ } else { ++ max_dir_size = EXT4_BLOCK_SIZE(dir->i_sb); ++ while (frame >= frames) { ++ max_dir_size *= dx_get_limit(frame->entries); ++ if (frame == frames) ++ break; ++ frame--; ++ } ++ /* use 75% of max dir size in average */ ++ max_dir_size = max_dir_size / 4 * 3; ++ } ++ return max_dir_size; ++} ++ ++/* ++ * With hash tree growing, it is easy to hit ENOSPC, but it is hard ++ * to predict when it will happen. let's give administrators warning ++ * when reaching 5/8 and 11/16 of limit ++ */ ++static inline bool dir_size_in_warning_range(struct dx_frame *frames, ++ struct dx_frame *frame, ++ struct inode *dir) ++{ ++ unsigned long size1, size2; ++ struct super_block *sb = dir->i_sb; ++ ++ if (unlikely(!EXT4_SB(sb)->s_warning_dir_size)) ++ EXT4_SB(sb)->s_warning_dir_size = ++ __ext4_max_dir_size(frames, frame, dir); ++ ++ size1 = EXT4_SB(sb)->s_warning_dir_size / 16 * 10; ++ size1 = size1 & ~(EXT4_BLOCK_SIZE(sb) - 1); ++ size2 = EXT4_SB(sb)->s_warning_dir_size / 16 * 11; ++ size2 = size2 & ~(EXT4_BLOCK_SIZE(sb) - 1); ++ if (in_range(dir->i_size, size1, EXT4_BLOCK_SIZE(sb)) || ++ in_range(dir->i_size, size2, EXT4_BLOCK_SIZE(sb))) ++ return true; ++ ++ return false; ++} ++ + /* + * Returns 0 for success, or a negative error value + */ +@@ -2068,6 +2124,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, + struct ext4_dir_entry_2 *de; + int restart; + int err; ++ bool ret_warn = false; + + again: + restart = 0; +@@ -2088,6 +2145,11 @@ again: + /* Block full, should compress but for now just split */ + dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", + dx_get_count(entries), dx_get_limit(entries))); ++ ++ if (frame - frames + 1 >= ext4_dir_htree_level(sb) || ++ EXT4_SB(dir->i_sb)->s_max_dir_size) ++ ret_warn = dir_size_in_warning_range(frames, frame, dir); ++ + /* Need to split index? */ + if (dx_get_count(entries) == dx_get_limit(entries)) { + ext4_lblk_t newblock; +@@ -2119,7 +2181,7 @@ again: + "reach max htree level :%d", + dir->i_ino, levels); + if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) { +- ext4_warning(sb, "Large directory feature is" ++ ext4_warning(sb, "Large directory feature is " + "not enabled on this " + "filesystem"); + } +@@ -2248,6 +2310,8 @@ cleanup: + * repeat dx_probe() to find out valid htree-path */ + if (restart && err == 0) + goto again; ++ if (err == 0 && ret_warn) ++ err = -ENOBUFS; + return err; + } + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index f02a632..b8ed072 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1813,6 +1813,8 @@ set_qf_format: + if (option < 0) + return 0; + sbi->s_max_dir_size = option * 1024; ++ /* reset s_warning_dir_size and make it re-calculated */ ++ sbi->s_warning_dir_size = 0; + break; + case Opt_stripe: + if (match_int(&args[0], &option)) +@@ -2577,6 +2579,7 @@ EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, + inode_readahead_blks_store, s_inode_readahead_blks); + EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); + EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size); ++EXT4_RW_ATTR_SBI_UI(warning_dir_size, s_warning_dir_size); + EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); + EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); + EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); +@@ -2594,6 +2597,7 @@ static struct attribute *ext4_attrs[] = { + ATTR_LIST(inode_readahead_blks), + ATTR_LIST(inode_goal), + ATTR_LIST(max_dir_size), ++ ATTR_LIST(warning_dir_size), + ATTR_LIST(mb_stats), + ATTR_LIST(mb_max_to_scan), + ATTR_LIST(mb_min_to_scan), +@@ -3119,6 +3123,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + sb->s_fs_info = sbi; + sbi->s_mount_opt = 0; + sbi->s_max_dir_size = 0; ++ sbi->s_warning_dir_size = 0; + sbi->s_resuid = EXT4_DEF_RESUID; + sbi->s_resgid = EXT4_DEF_RESGID; + sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; diff --git a/ldiskfs/kernel_patches/patches/rhel7/ext4-give-warning-with-dir-htree-growing.patch b/ldiskfs/kernel_patches/patches/rhel7/ext4-give-warning-with-dir-htree-growing.patch new file mode 100644 index 0000000..9582e22 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/rhel7/ext4-give-warning-with-dir-htree-growing.patch @@ -0,0 +1,164 @@ +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 3c41773..157438f 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1286,6 +1286,7 @@ struct ext4_sb_info { + unsigned long s_mb_prealloc_table_size; + unsigned int s_mb_group_prealloc; + unsigned int s_max_dir_size_kb; ++ unsigned long s_warning_dir_size; + /* where last allocation was done - for stream allocation */ + unsigned long s_mb_last_group; + unsigned long s_mb_last_start; +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 7e9e04a..094d54f 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -687,12 +687,20 @@ struct ext4_dir_lock_data { + #define ext4_htree_lock_data(l) ((struct ext4_dir_lock_data *)(l)->lk_private) + #define ext4_find_entry(dir, name, dirent, inline) \ + __ext4_find_entry(dir, name, dirent, inline, NULL) +-#define ext4_add_entry(handle, dentry, inode) \ +- __ext4_add_entry(handle, dentry, inode, NULL) + + /* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */ + #define EXT4_HTREE_NODE_CHANGED (0xcafeULL << 32) + ++inline int ext4_add_entry(handle_t *handle, struct dentry *dentry, ++ struct inode *inode) ++{ ++ int ret = __ext4_add_entry(handle, dentry, inode, NULL); ++ ++ if (ret == -ENOBUFS) ++ ret = 0; ++ return ret; ++} ++ + static void ext4_htree_event_cb(void *target, void *event) + { + u64 *block = (u64 *)target; +@@ -2479,6 +2487,54 @@ int __ext4_add_entry(handle_t *handle, struct dentry *dentry, + } + EXPORT_SYMBOL(__ext4_add_entry); + ++static unsigned long __ext4_max_dir_size(struct dx_frame *frames, ++ struct dx_frame *frame, struct inode *dir) ++{ ++ unsigned long max_dir_size; ++ ++ if (EXT4_SB(dir->i_sb)->s_max_dir_size_kb) { ++ max_dir_size = EXT4_SB(dir->i_sb)->s_max_dir_size_kb << 10; ++ } else { ++ max_dir_size = EXT4_BLOCK_SIZE(dir->i_sb); ++ while (frame >= frames) { ++ max_dir_size *= dx_get_limit(frame->entries); ++ if (frame == frames) ++ break; ++ frame--; ++ } ++ /* use 75% of max dir size in average */ ++ max_dir_size = max_dir_size / 4 * 3; ++ } ++ return max_dir_size; ++} ++ ++/* ++ * With hash tree growing, it is easy to hit ENOSPC, but it is hard ++ * to predict when it will happen. let's give administrators warning ++ * when reaching 3/5 and 2/3 of limit ++ */ ++static inline bool dir_size_in_warning_range(struct dx_frame *frames, ++ struct dx_frame *frame, ++ struct inode *dir) ++{ ++ unsigned long size1, size2; ++ struct super_block *sb = dir->i_sb; ++ ++ if (unlikely(!EXT4_SB(sb)->s_warning_dir_size)) ++ EXT4_SB(sb)->s_warning_dir_size = ++ __ext4_max_dir_size(frames, frame, dir); ++ ++ size1 = EXT4_SB(sb)->s_warning_dir_size / 16 * 10; ++ size1 = size1 & ~(EXT4_BLOCK_SIZE(sb) - 1); ++ size2 = EXT4_SB(sb)->s_warning_dir_size / 16 * 11; ++ size2 = size2 & ~(EXT4_BLOCK_SIZE(sb) - 1); ++ if (in_range(dir->i_size, size1, EXT4_BLOCK_SIZE(sb)) || ++ in_range(dir->i_size, size2, EXT4_BLOCK_SIZE(sb))) ++ return true; ++ ++ return false; ++} ++ + /* + * Returns 0 for success, or a negative error value + */ +@@ -2494,6 +2550,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, + struct ext4_dir_entry_2 *de; + int restart; + int err; ++ bool ret_warn = false; + + again: + restart = 0; +@@ -2517,6 +2574,11 @@ again: + /* Block full, should compress but for now just split */ + dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", + dx_get_count(entries), dx_get_limit(entries))); ++ ++ if (frame - frames + 1 >= ext4_dir_htree_level(sb) || ++ EXT4_SB(sb)->s_warning_dir_size) ++ ret_warn = dir_size_in_warning_range(frames, frame, dir); ++ + /* Need to split index? */ + if (dx_get_count(entries) == dx_get_limit(entries)) { + ext4_lblk_t newblock; +@@ -2548,7 +2610,7 @@ again: + dir->i_ino, current->comm, levels, + ext4_dir_htree_level(sb)); + if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) { +- ext4_warning(sb, "Large directory feature is" ++ ext4_warning(sb, "Large directory feature is " + "not enabled on this " + "filesystem"); + } +@@ -2674,6 +2736,8 @@ cleanup: + * repeat dx_probe() to find out valid htree-path */ + if (restart && err == 0) + goto again; ++ if (err == 0 && ret_warn) ++ err = -ENOBUFS; + return err; + } + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index c625960..1914379 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1546,6 +1546,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, + sbi->s_li_wait_mult = arg; + } else if (token == Opt_max_dir_size_kb) { + sbi->s_max_dir_size_kb = arg; ++ /* reset s_warning_dir_size and make it re-calculated */ ++ sbi->s_warning_dir_size = 0; + } else if (token == Opt_stripe) { + sbi->s_stripe = arg; + } else if (token == Opt_resuid) { +@@ -2657,6 +2659,7 @@ EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, + EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); + EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size_kb); + EXT4_RW_ATTR_SBI_UI(max_dir_size_kb, s_max_dir_size_kb); ++EXT4_RW_ATTR_SBI_UI(warning_dir_size, s_warning_dir_size); + EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); + EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); + EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); +@@ -2686,6 +2689,7 @@ static struct attribute *ext4_attrs[] = { + ATTR_LIST(inode_goal), + ATTR_LIST(max_dir_size), + ATTR_LIST(max_dir_size_kb), ++ ATTR_LIST(warning_dir_size), + ATTR_LIST(mb_stats), + ATTR_LIST(mb_max_to_scan), + ATTR_LIST(mb_min_to_scan), +-- +1.8.3.1 + diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series index abea8ea..0881a7a 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series @@ -45,3 +45,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch rhel6.3/ext4-notalloc_under_idatasem.patch +rhel6.5/ext4-give-warning-with-dir-htree-growing.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series index 9b67ab5..e3df418 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series @@ -46,3 +46,4 @@ rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch rhel6.6/ext4-remove-truncate-warning.patch rhel6.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch rhel6.3/ext4-notalloc_under_idatasem.patch +rhel6.5/ext4-give-warning-with-dir-htree-growing.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series index 6b526f3..e3fd3fd 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series @@ -45,3 +45,4 @@ rhel6.3/ext4-journal-path-opt.patch rhel6.6/ext4-remove-truncate-warning.patch rhel6.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch rhel6.3/ext4-notalloc_under_idatasem.patch +rhel6.5/ext4-give-warning-with-dir-htree-growing.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series index d4cfd2a..4630175 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series @@ -38,3 +38,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch rhel6.3/ext4-journal-path-opt.patch sles11sp2/ext4-corrupted-inode-block-bitmaps-handling-patches.patch rhel6.3/ext4-notalloc_under_idatasem.patch +rhel6.5/ext4-give-warning-with-dir-htree-growing.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series index c48b1ac..fc947ed 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series @@ -17,3 +17,4 @@ rhel7/ext4-pdirop.patch rhel7/ext4-max-dir-size.patch rhel7/ext4-remove-truncate-warning.patch rhel7/ext4-corrupted-inode-block-bitmaps-handling-patches.patch +rhel7/ext4-give-warning-with-dir-htree-growing.patch diff --git a/lustre/osd-ldiskfs/osd_compat.c b/lustre/osd-ldiskfs/osd_compat.c index 0779f25..d58b6c8 100644 --- a/lustre/osd-ldiskfs/osd_compat.c +++ b/lustre/osd-ldiskfs/osd_compat.c @@ -225,6 +225,7 @@ int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd, struct osd_thread_info *oti = osd_oti_get(env); struct lustre_mdt_attrs *lma = &oti->oti_mdt_attrs; char *name = oti->oti_name; + struct osd_thread_info *info = osd_oti_get(env); struct dentry *dentry; struct dentry *parent; int rc; @@ -247,8 +248,8 @@ int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd, dentry = osd_child_dentry_by_inode(env, parent->d_inode, name, strlen(name)); mutex_lock(&parent->d_inode->i_mutex); - rc = osd_ldiskfs_add_entry(oh->ot_handle, dentry, obj->oo_inode, - NULL); + rc = osd_ldiskfs_add_entry(info, oh->ot_handle, dentry, + obj->oo_inode, NULL); CDEBUG(D_INODE, "%s: add %s:%lu to remote parent %lu.\n", osd_name(osd), name, obj->oo_inode->i_ino, parent->d_inode->i_ino); ldiskfs_inc_count(oh->ot_handle, parent->d_inode); @@ -735,7 +736,7 @@ static int osd_obj_add_entry(struct osd_thread_info *info, ll_vfs_dq_init(dir->d_inode); mutex_lock(&dir->d_inode->i_mutex); - rc = osd_ldiskfs_add_entry(th, child, inode, NULL); + rc = osd_ldiskfs_add_entry(info, th, child, inode, NULL); mutex_unlock(&dir->d_inode->i_mutex); RETURN(rc); @@ -1179,7 +1180,7 @@ int osd_obj_map_recover(struct osd_thread_info *info, if (rc != 0) GOTO(unlock, rc); - rc = osd_ldiskfs_add_entry(jh, tgt_child, inode, NULL); + rc = osd_ldiskfs_add_entry(info, jh, tgt_child, inode, NULL); GOTO(unlock, rc); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 2bf04b4..944b035 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -432,6 +432,53 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, return inode; } +int osd_ldiskfs_add_entry(struct osd_thread_info *info, + handle_t *handle, struct dentry *child, + struct inode *inode, struct htree_lock *hlock) +{ + int rc, rc2; + + rc = __ldiskfs_add_entry(handle, child, inode, hlock); + if (rc == -ENOBUFS || rc == -ENOSPC) { + char fidbuf[FID_LEN + 1]; + struct lustre_mdt_attrs lma; + struct lu_fid fid = { }; + char *errstr; + struct dentry *p_dentry = child->d_parent; + + rc2 = osd_get_lma(info, p_dentry->d_inode, p_dentry, + &lma); + if (rc2 == 0) { + fid = lma.lma_self_fid; + snprintf(fidbuf, sizeof(fidbuf), DFID, PFID(&fid)); + } else if (rc2 == -ENODATA) { + if (unlikely(p_dentry->d_inode == + inode->i_sb->s_root->d_inode)) + lu_local_obj_fid(&fid, OSD_FS_ROOT_OID); + else if (info->oti_dev && !info->oti_dev->od_is_ost && + fid_seq_is_mdt0(fid_seq(&fid))) + lu_igif_build(&fid, p_dentry->d_inode->i_ino, + p_dentry->d_inode->i_generation); + snprintf(fidbuf, sizeof(fidbuf), DFID, PFID(&fid)); + } else { + snprintf(fidbuf, FID_LEN, "%s", "unknown"); + } + + if (rc == -ENOSPC) + errstr = "has reached"; + else + errstr = "is approaching"; + CWARN("%.16s: directory (inode: %lu FID: %s) %s maximum entry limit\n", + LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, + p_dentry->d_inode->i_ino, fidbuf, errstr); + /* ignore such error now */ + if (rc == -ENOBUFS) + rc = 0; + } + return rc; +} + + static struct inode * osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev, struct osd_inode_id *id, struct lu_fid *fid) @@ -4151,7 +4198,8 @@ static int __osd_ea_add_rec(struct osd_thread_info *info, child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name)); child->d_fsdata = (void *)ldp; ll_vfs_dq_init(pobj->oo_inode); - rc = osd_ldiskfs_add_entry(oth->ot_handle, child, cinode, hlock); + rc = osd_ldiskfs_add_entry(info, oth->ot_handle, child, + cinode, hlock); if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_TYPE)) { struct ldiskfs_dir_entry_2 *de; struct buffer_head *bh; @@ -5380,6 +5428,7 @@ osd_dirent_reinsert(const struct lu_env *env, handle_t *jh, struct ldiskfs_dentry_param *ldp; int namelen = dentry->d_name.len; int rc; + struct osd_thread_info *info = osd_oti_get(env); ENTRY; if (!LDISKFS_HAS_INCOMPAT_FEATURE(inode->i_sb, @@ -5413,7 +5462,7 @@ osd_dirent_reinsert(const struct lu_env *env, handle_t *jh, osd_get_ldiskfs_dirent_param(ldp, fid); dentry->d_fsdata = (void *)ldp; ll_vfs_dq_init(dir); - rc = osd_ldiskfs_add_entry(jh, dentry, inode, hlock); + rc = osd_ldiskfs_add_entry(info, jh, dentry, inode, hlock); /* It is too bad, we cannot reinsert the name entry back. * That means we lose it! */ if (rc != 0) diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 19c7315..ae51525 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -169,9 +169,9 @@ struct osd_mdobj { struct osd_mdobj_map { struct dentry *omm_remote_parent; }; - -#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \ - __ldiskfs_add_entry(handle, child, cinode, hlock) +int osd_ldiskfs_add_entry(struct osd_thread_info *info, + handle_t *handle, struct dentry *child, + struct inode *inode, struct htree_lock *hlock); #define OSD_OTABLE_IT_CACHE_SIZE 64 #define OSD_OTABLE_IT_CACHE_MASK (~(OSD_OTABLE_IT_CACHE_SIZE - 1)) diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index 6b6942f..ae3d5c1 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -151,7 +151,7 @@ static int osd_oi_index_create_one(struct osd_thread_info *info, feat->dif_ptrsize, feat->dif_recsize_max, jh); dentry = osd_child_dentry_by_inode(env, dir, name, strlen(name)); - rc = osd_ldiskfs_add_entry(jh, dentry, inode, NULL); + rc = osd_ldiskfs_add_entry(info, jh, dentry, inode, NULL); ldiskfs_journal_stop(jh); iput(inode); return rc; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 86ea6d0..3b77e07 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8688,7 +8688,18 @@ set_dir_limits () { do_facet $facet "test -e $LDPROC/$canondev/max_dir_size" || LDPROC=/sys/fs/ldiskfs do_facet $facet "echo $1 >$LDPROC/$canondev/max_dir_size" + do_facet $facet "test -e $LDPROC/$canondev/warning_dir_size" || + LDPROC=/sys/fs/ldiskfs + do_facet $facet "echo $2 >$LDPROC/$canondev/warning_dir_size" + done +} + +check_mds_dmesg() { + local facets=$(get_facets MDS) + for facet in ${facets//,/ }; do + do_facet $facet "dmesg | tail -3 | grep -q $1" && return 0 done + return 1 } test_129() { @@ -8700,6 +8711,7 @@ test_129() { remote_mds_nodsh && skip "remote MDS with nodsh" && return ENOSPC=28 EFBIG=27 + has_warning=0 rm -rf $DIR/$tdir test_mkdir -p $DIR/$tdir @@ -8707,8 +8719,8 @@ test_129() { # block size of mds1 local MDT_DEV=$(mdsdevname ${SINGLEMDS//mds/}) local MDSBLOCKSIZE=$($LCTL get_param -n mdc.*MDT0000*.blocksize) - local MAX=$((MDSBLOCKSIZE * 3)) - set_dir_limits $MAX + local MAX=$((MDSBLOCKSIZE * 5)) + set_dir_limits $MAX $MAX local I=$(stat -c%s "$DIR/$tdir") local J=0 local STRIPE_COUNT=1 @@ -8717,15 +8729,25 @@ test_129() { while [[ $I -le $MAX ]]; do $MULTIOP $DIR/$tdir/$J Oc rc=$? + if [ $has_warning -eq 0 ]; then + check_mds_dmesg '"is approaching"' && + has_warning=1 + fi #check two errors ENOSPC for new version of ext4 max_dir_size patch #mainline kernel commit df981d03eeff7971ac7e6ff37000bfa702327ef1 #and EFBIG for previous versions if [ $rc -eq $EFBIG -o $rc -eq $ENOSPC ]; then - set_dir_limits 0 + set_dir_limits 0 0 echo "return code $rc received as expected" multiop $DIR/$tdir/$J Oc || error_exit "multiop failed w/o dir size limit" + check_mds_dmesg '"has reached"' || + error_exit "has reached message should be output" + + [ $has_warning ] || + error_exit "warning message should be output" + I=$(stat -c%s "$DIR/$tdir") if [ $(lustre_version_code $SINGLEMDS) -lt \ @@ -8737,7 +8759,7 @@ test_129() { fi error_exit "current dir size $I, previous limit $MAX" elif [ $rc -ne 0 ]; then - set_dir_limits 0 + set_dir_limits 0 0 error_exit "return code $rc received instead of expected " \ "$EFBIG or $ENOSPC, files in dir $I" fi @@ -8745,7 +8767,7 @@ test_129() { I=$(stat -c%s "$DIR/$tdir") done - set_dir_limits 0 + set_dir_limits 0 0 error "exceeded dir size limit $MAX($MDSCOUNT) : $I bytes" } run_test 129 "test directory size limit ========================" -- 1.8.3.1