Whamcloud - gitweb
LU-6824 ldiskfs: give warning with dir htree growing 48/15548/16
authorWang Shilong <wshilong@ddn.com>
Fri, 3 Jul 2015 01:04:45 +0000 (21:04 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 6 Jan 2016 03:01:45 +0000 (03:01 +0000)
Currently without large dir feature, ldiskfs directory hash tree
will be limited 2 height, this means directory size is limited about
1GB, and in fact users are likely to hit ENOSPC when reaching half of
limit because of bad hash. tested by following scripts.

i=0
filename="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbb"
while [ 1 ]
do
        touch $filename"$i"
        if [ $? -ne 0 ];then
                break
        fi
        ((i++))
done

When directoy size grow about 590M, we hit ENOSPC. Better way is to
add support to e2fsprogs so that we can use large dir feature.
As a walkaround way, this patch try to give warning messages to
console when 10/16 and 11/16 of limit reach.

So this patch will try to give following messages when warning
limit or hash index tree limit reach:

Directory (inode: 8388610 FID: [0x200000401:0x1:0x0]) has approached
maximum limit.

Directory (inode: 8388610 FID: [0x200000401:0x1:0x0]) is approaching
maximum limit.

FID output here is useful for administrators to locate lustre
file path

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Change-Id: I7f78c421bbb89f76298e0174cc46d774ea82eb06
Reviewed-on: http://review.whamcloud.com/15548
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Yang Sheng <yang.sheng@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
12 files changed:
ldiskfs/kernel_patches/patches/rhel6.5/ext4-give-warning-with-dir-htree-growing.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/rhel7/ext4-give-warning-with-dir-htree-growing.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.6.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.7.series
ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series
lustre/osd-ldiskfs/osd_compat.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_oi.c
lustre/tests/sanity.sh

diff --git a/ldiskfs/kernel_patches/patches/rhel6.5/ext4-give-warning-with-dir-htree-growing.patch b/ldiskfs/kernel_patches/patches/rhel6.5/ext4-give-warning-with-dir-htree-growing.patch
new file mode 100644 (file)
index 0000000..7cdebaa
--- /dev/null
@@ -0,0 +1,168 @@
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 938487a..47313fd 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1178,6 +1178,7 @@ struct ext4_sb_info {
+       unsigned int s_mb_group_prealloc;
+       unsigned int s_max_writeback_mb_bump;
+       unsigned long s_max_dir_size;
++      unsigned long s_warning_dir_size;
+       /* where last allocation was done - for stream allocation */
+       unsigned long s_mb_last_group;
+       unsigned long s_mb_last_start;
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 992dc58..57ff920 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -370,11 +370,19 @@ struct ext4_dir_lock_data {
+ #define ext4_htree_lock_data(l) ((struct ext4_dir_lock_data *)(l)->lk_private)
+ #define ext4_find_entry(dir, name, dirent) __ext4_find_entry(dir, name, dirent, NULL)
+-#define ext4_add_entry(handle, dentry, inode) __ext4_add_entry(handle, dentry, inode, NULL)
+-
+ /* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */
+ #define EXT4_HTREE_NODE_CHANGED       (0xcafeULL << 32)
++inline int ext4_add_entry(handle_t *handle, struct dentry *dentry,
++                          struct inode *inode)
++{
++      int ret = __ext4_add_entry(handle, dentry, inode, NULL);
++
++      if (ret == -ENOBUFS)
++              ret = 0;
++      return ret;
++}
++
+ static void ext4_htree_event_cb(void *target, void *event)
+ {
+       u64 *block = (u64 *)target;
+@@ -2053,6 +2061,54 @@ int __ext4_add_entry(handle_t *handle, struct dentry *dentry,
+ }
+ EXPORT_SYMBOL(__ext4_add_entry);
++static unsigned long __ext4_max_dir_size(struct dx_frame *frames,
++                             struct dx_frame *frame, struct inode *dir)
++{
++      unsigned long max_dir_size;
++
++      if (EXT4_SB(dir->i_sb)->s_max_dir_size) {
++              max_dir_size = EXT4_SB(dir->i_sb)->s_max_dir_size;
++      } else {
++              max_dir_size = EXT4_BLOCK_SIZE(dir->i_sb);
++              while (frame >= frames) {
++                      max_dir_size *= dx_get_limit(frame->entries);
++                      if (frame == frames)
++                              break;
++                      frame--;
++              }
++              /* use 75% of max dir size in average */
++              max_dir_size = max_dir_size / 4 * 3;
++      }
++      return max_dir_size;
++}
++
++/*
++ * With hash tree growing, it is easy to hit ENOSPC, but it is hard
++ * to predict when it will happen. let's give administrators warning
++ * when reaching 5/8 and 11/16 of limit
++ */
++static inline bool dir_size_in_warning_range(struct dx_frame *frames,
++                                           struct dx_frame *frame,
++                                           struct inode *dir)
++{
++      unsigned long size1, size2;
++      struct super_block *sb = dir->i_sb;
++
++      if (unlikely(!EXT4_SB(sb)->s_warning_dir_size))
++              EXT4_SB(sb)->s_warning_dir_size =
++                      __ext4_max_dir_size(frames, frame, dir);
++
++      size1 = EXT4_SB(sb)->s_warning_dir_size / 16 * 10;
++      size1 = size1 & ~(EXT4_BLOCK_SIZE(sb) - 1);
++      size2 = EXT4_SB(sb)->s_warning_dir_size / 16 * 11;
++      size2 = size2 & ~(EXT4_BLOCK_SIZE(sb) - 1);
++      if (in_range(dir->i_size, size1, EXT4_BLOCK_SIZE(sb)) ||
++          in_range(dir->i_size, size2, EXT4_BLOCK_SIZE(sb)))
++              return true;
++
++      return false;
++}
++
+ /*
+  * Returns 0 for success, or a negative error value
+  */
+@@ -2068,6 +2124,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
+       struct ext4_dir_entry_2 *de;
+       int restart;
+       int err;
++      bool ret_warn = false;
+ again:
+       restart = 0;
+@@ -2088,6 +2145,11 @@ again:
+       /* Block full, should compress but for now just split */
+       dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
+                      dx_get_count(entries), dx_get_limit(entries)));
++
++      if (frame - frames + 1 >= ext4_dir_htree_level(sb) ||
++          EXT4_SB(dir->i_sb)->s_max_dir_size)
++              ret_warn = dir_size_in_warning_range(frames, frame, dir);
++
+       /* Need to split index? */
+       if (dx_get_count(entries) == dx_get_limit(entries)) {
+               ext4_lblk_t newblock;
+@@ -2119,7 +2181,7 @@ again:
+                                        "reach max htree level :%d",
+                                        dir->i_ino, levels);
+                       if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
+-                              ext4_warning(sb, "Large directory feature is"
++                              ext4_warning(sb, "Large directory feature is "
+                                                "not enabled on this "
+                                                "filesystem");
+                       }
+@@ -2248,6 +2310,8 @@ cleanup:
+        * repeat dx_probe() to find out valid htree-path */
+       if (restart && err == 0)
+               goto again;
++      if (err == 0 && ret_warn)
++              err = -ENOBUFS;
+       return err;
+ }
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index f02a632..b8ed072 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1813,6 +1813,8 @@ set_qf_format:
+                       if (option < 0)
+                               return 0;
+                       sbi->s_max_dir_size = option * 1024;
++                      /* reset s_warning_dir_size and make it re-calculated */
++                      sbi->s_warning_dir_size = 0;
+                       break;
+               case Opt_stripe:
+                       if (match_int(&args[0], &option))
+@@ -2577,6 +2579,7 @@ EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
+                inode_readahead_blks_store, s_inode_readahead_blks);
+ EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
+ EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size);
++EXT4_RW_ATTR_SBI_UI(warning_dir_size, s_warning_dir_size);
+ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+ EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
+@@ -2594,6 +2597,7 @@ static struct attribute *ext4_attrs[] = {
+       ATTR_LIST(inode_readahead_blks),
+       ATTR_LIST(inode_goal),
+       ATTR_LIST(max_dir_size),
++      ATTR_LIST(warning_dir_size),
+       ATTR_LIST(mb_stats),
+       ATTR_LIST(mb_max_to_scan),
+       ATTR_LIST(mb_min_to_scan),
+@@ -3119,6 +3123,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+       sb->s_fs_info = sbi;
+       sbi->s_mount_opt = 0;
+       sbi->s_max_dir_size = 0;
++      sbi->s_warning_dir_size = 0;
+       sbi->s_resuid = EXT4_DEF_RESUID;
+       sbi->s_resgid = EXT4_DEF_RESGID;
+       sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
diff --git a/ldiskfs/kernel_patches/patches/rhel7/ext4-give-warning-with-dir-htree-growing.patch b/ldiskfs/kernel_patches/patches/rhel7/ext4-give-warning-with-dir-htree-growing.patch
new file mode 100644 (file)
index 0000000..9582e22
--- /dev/null
@@ -0,0 +1,164 @@
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 3c41773..157438f 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1286,6 +1286,7 @@ struct ext4_sb_info {
+       unsigned long s_mb_prealloc_table_size;
+       unsigned int s_mb_group_prealloc;
+       unsigned int s_max_dir_size_kb;
++      unsigned long s_warning_dir_size;
+       /* where last allocation was done - for stream allocation */
+       unsigned long s_mb_last_group;
+       unsigned long s_mb_last_start;
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 7e9e04a..094d54f 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -687,12 +687,20 @@ struct ext4_dir_lock_data {
+ #define ext4_htree_lock_data(l)       ((struct ext4_dir_lock_data *)(l)->lk_private)
+ #define ext4_find_entry(dir, name, dirent, inline) \
+                       __ext4_find_entry(dir, name, dirent, inline, NULL)
+-#define ext4_add_entry(handle, dentry, inode) \
+-                      __ext4_add_entry(handle, dentry, inode, NULL)
+ /* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */
+ #define EXT4_HTREE_NODE_CHANGED       (0xcafeULL << 32)
++inline int ext4_add_entry(handle_t *handle, struct dentry *dentry,
++                        struct inode *inode)
++{
++      int ret = __ext4_add_entry(handle, dentry, inode, NULL);
++
++      if (ret == -ENOBUFS)
++              ret = 0;
++      return ret;
++}
++
+ static void ext4_htree_event_cb(void *target, void *event)
+ {
+       u64 *block = (u64 *)target;
+@@ -2479,6 +2487,54 @@ int __ext4_add_entry(handle_t *handle, struct dentry *dentry,
+ }
+ EXPORT_SYMBOL(__ext4_add_entry);
++static unsigned long __ext4_max_dir_size(struct dx_frame *frames,
++                             struct dx_frame *frame, struct inode *dir)
++{
++      unsigned long max_dir_size;
++
++      if (EXT4_SB(dir->i_sb)->s_max_dir_size_kb) {
++              max_dir_size = EXT4_SB(dir->i_sb)->s_max_dir_size_kb << 10;
++      } else {
++              max_dir_size = EXT4_BLOCK_SIZE(dir->i_sb);
++              while (frame >= frames) {
++                      max_dir_size *= dx_get_limit(frame->entries);
++                      if (frame == frames)
++                              break;
++                      frame--;
++              }
++              /* use 75% of max dir size in average */
++              max_dir_size = max_dir_size / 4 * 3;
++      }
++      return max_dir_size;
++}
++
++/*
++ * With hash tree growing, it is easy to hit ENOSPC, but it is hard
++ * to predict when it will happen. let's give administrators warning
++ * when reaching 3/5 and 2/3 of limit
++ */
++static inline bool dir_size_in_warning_range(struct dx_frame *frames,
++                                           struct dx_frame *frame,
++                                           struct inode *dir)
++{
++      unsigned long size1, size2;
++      struct super_block *sb = dir->i_sb;
++
++      if (unlikely(!EXT4_SB(sb)->s_warning_dir_size))
++              EXT4_SB(sb)->s_warning_dir_size =
++                      __ext4_max_dir_size(frames, frame, dir);
++
++      size1 = EXT4_SB(sb)->s_warning_dir_size / 16 * 10;
++      size1 = size1 & ~(EXT4_BLOCK_SIZE(sb) - 1);
++      size2 = EXT4_SB(sb)->s_warning_dir_size / 16 * 11;
++      size2 = size2 & ~(EXT4_BLOCK_SIZE(sb) - 1);
++      if (in_range(dir->i_size, size1, EXT4_BLOCK_SIZE(sb)) ||
++          in_range(dir->i_size, size2, EXT4_BLOCK_SIZE(sb)))
++              return true;
++
++      return false;
++}
++
+ /*
+  * Returns 0 for success, or a negative error value
+  */
+@@ -2494,6 +2550,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
+       struct ext4_dir_entry_2 *de;
+       int restart;
+       int err;
++      bool ret_warn = false;
+ again:
+       restart = 0;
+@@ -2517,6 +2574,11 @@ again:
+       /* Block full, should compress but for now just split */
+       dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
+                      dx_get_count(entries), dx_get_limit(entries)));
++
++      if (frame - frames + 1 >= ext4_dir_htree_level(sb) ||
++          EXT4_SB(sb)->s_warning_dir_size)
++              ret_warn = dir_size_in_warning_range(frames, frame, dir);
++
+       /* Need to split index? */
+       if (dx_get_count(entries) == dx_get_limit(entries)) {
+               ext4_lblk_t newblock;
+@@ -2548,7 +2610,7 @@ again:
+                                        dir->i_ino, current->comm, levels,
+                                        ext4_dir_htree_level(sb));
+                       if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
+-                              ext4_warning(sb, "Large directory feature is"
++                              ext4_warning(sb, "Large directory feature is "
+                                                "not enabled on this "
+                                                "filesystem");
+                       }
+@@ -2674,6 +2736,8 @@ cleanup:
+        * repeat dx_probe() to find out valid htree-path */
+       if (restart && err == 0)
+               goto again;
++      if (err == 0 && ret_warn)
++              err = -ENOBUFS;
+       return err;
+ }
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index c625960..1914379 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1546,6 +1546,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
+               sbi->s_li_wait_mult = arg;
+       } else if (token == Opt_max_dir_size_kb) {
+               sbi->s_max_dir_size_kb = arg;
++              /* reset s_warning_dir_size and make it re-calculated */
++              sbi->s_warning_dir_size = 0;
+       } else if (token == Opt_stripe) {
+               sbi->s_stripe = arg;
+       } else if (token == Opt_resuid) {
+@@ -2657,6 +2659,7 @@ EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
+ EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
+ EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size_kb);
+ EXT4_RW_ATTR_SBI_UI(max_dir_size_kb, s_max_dir_size_kb);
++EXT4_RW_ATTR_SBI_UI(warning_dir_size, s_warning_dir_size);
+ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+ EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
+@@ -2686,6 +2689,7 @@ static struct attribute *ext4_attrs[] = {
+       ATTR_LIST(inode_goal),
+       ATTR_LIST(max_dir_size),
+       ATTR_LIST(max_dir_size_kb),
++      ATTR_LIST(warning_dir_size),
+       ATTR_LIST(mb_stats),
+       ATTR_LIST(mb_max_to_scan),
+       ATTR_LIST(mb_min_to_scan),
+-- 
+1.8.3.1
+
index abea8ea..0881a7a 100644 (file)
@@ -45,3 +45,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch
 rhel6.3/ext4-notalloc_under_idatasem.patch
+rhel6.5/ext4-give-warning-with-dir-htree-growing.patch
index 9b67ab5..e3df418 100644 (file)
@@ -46,3 +46,4 @@ rhel6.3/ext4-drop-inode-from-orphan-list-if-ext4_delete_inode-fails.patch
 rhel6.6/ext4-remove-truncate-warning.patch
 rhel6.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
 rhel6.3/ext4-notalloc_under_idatasem.patch
+rhel6.5/ext4-give-warning-with-dir-htree-growing.patch
index 6b526f3..e3fd3fd 100644 (file)
@@ -45,3 +45,4 @@ rhel6.3/ext4-journal-path-opt.patch
 rhel6.6/ext4-remove-truncate-warning.patch
 rhel6.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
 rhel6.3/ext4-notalloc_under_idatasem.patch
+rhel6.5/ext4-give-warning-with-dir-htree-growing.patch
index d4cfd2a..4630175 100644 (file)
@@ -38,3 +38,4 @@ rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
 sles11sp2/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
 rhel6.3/ext4-notalloc_under_idatasem.patch
+rhel6.5/ext4-give-warning-with-dir-htree-growing.patch
index c48b1ac..fc947ed 100644 (file)
@@ -17,3 +17,4 @@ rhel7/ext4-pdirop.patch
 rhel7/ext4-max-dir-size.patch
 rhel7/ext4-remove-truncate-warning.patch
 rhel7/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
+rhel7/ext4-give-warning-with-dir-htree-growing.patch
index 0779f25..d58b6c8 100644 (file)
@@ -225,6 +225,7 @@ int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
        struct osd_thread_info  *oti = osd_oti_get(env);
        struct lustre_mdt_attrs *lma = &oti->oti_mdt_attrs;
        char                    *name = oti->oti_name;
+       struct osd_thread_info  *info = osd_oti_get(env);
        struct dentry           *dentry;
        struct dentry           *parent;
        int                     rc;
@@ -247,8 +248,8 @@ int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
        dentry = osd_child_dentry_by_inode(env, parent->d_inode,
                                           name, strlen(name));
        mutex_lock(&parent->d_inode->i_mutex);
-       rc = osd_ldiskfs_add_entry(oh->ot_handle, dentry, obj->oo_inode,
-                                  NULL);
+       rc = osd_ldiskfs_add_entry(info, oh->ot_handle, dentry,
+                                  obj->oo_inode, NULL);
        CDEBUG(D_INODE, "%s: add %s:%lu to remote parent %lu.\n", osd_name(osd),
               name, obj->oo_inode->i_ino, parent->d_inode->i_ino);
        ldiskfs_inc_count(oh->ot_handle, parent->d_inode);
@@ -735,7 +736,7 @@ static int osd_obj_add_entry(struct osd_thread_info *info,
 
        ll_vfs_dq_init(dir->d_inode);
        mutex_lock(&dir->d_inode->i_mutex);
-       rc = osd_ldiskfs_add_entry(th, child, inode, NULL);
+       rc = osd_ldiskfs_add_entry(info, th, child, inode, NULL);
        mutex_unlock(&dir->d_inode->i_mutex);
 
        RETURN(rc);
@@ -1179,7 +1180,7 @@ int osd_obj_map_recover(struct osd_thread_info *info,
        if (rc != 0)
                GOTO(unlock, rc);
 
-       rc = osd_ldiskfs_add_entry(jh, tgt_child, inode, NULL);
+       rc = osd_ldiskfs_add_entry(info, jh, tgt_child, inode, NULL);
 
        GOTO(unlock, rc);
 
index 2bf04b4..944b035 100644 (file)
@@ -432,6 +432,53 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
        return inode;
 }
 
+int osd_ldiskfs_add_entry(struct osd_thread_info *info,
+                         handle_t *handle, struct dentry *child,
+                         struct inode *inode, struct htree_lock *hlock)
+{
+       int rc, rc2;
+
+       rc = __ldiskfs_add_entry(handle, child, inode, hlock);
+       if (rc == -ENOBUFS || rc == -ENOSPC) {
+               char fidbuf[FID_LEN + 1];
+               struct lustre_mdt_attrs lma;
+               struct lu_fid fid = { };
+               char *errstr;
+               struct dentry *p_dentry = child->d_parent;
+
+               rc2 = osd_get_lma(info, p_dentry->d_inode, p_dentry,
+                                &lma);
+               if (rc2 == 0) {
+                       fid = lma.lma_self_fid;
+                       snprintf(fidbuf, sizeof(fidbuf), DFID, PFID(&fid));
+               } else if (rc2 == -ENODATA) {
+                       if (unlikely(p_dentry->d_inode ==
+                                    inode->i_sb->s_root->d_inode))
+                               lu_local_obj_fid(&fid, OSD_FS_ROOT_OID);
+                       else if (info->oti_dev && !info->oti_dev->od_is_ost &&
+                                fid_seq_is_mdt0(fid_seq(&fid)))
+                               lu_igif_build(&fid, p_dentry->d_inode->i_ino,
+                                             p_dentry->d_inode->i_generation);
+                       snprintf(fidbuf, sizeof(fidbuf), DFID, PFID(&fid));
+               } else {
+                       snprintf(fidbuf, FID_LEN, "%s", "unknown");
+               }
+
+               if (rc == -ENOSPC)
+                       errstr = "has reached";
+               else
+                       errstr = "is approaching";
+               CWARN("%.16s: directory (inode: %lu FID: %s) %s maximum entry limit\n",
+                       LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                       p_dentry->d_inode->i_ino, fidbuf, errstr);
+               /* ignore such error now */
+               if (rc == -ENOBUFS)
+                       rc = 0;
+       }
+       return rc;
+}
+
+
 static struct inode *
 osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
             struct osd_inode_id *id, struct lu_fid *fid)
@@ -4151,7 +4198,8 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
        child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
        child->d_fsdata = (void *)ldp;
        ll_vfs_dq_init(pobj->oo_inode);
-       rc = osd_ldiskfs_add_entry(oth->ot_handle, child, cinode, hlock);
+       rc = osd_ldiskfs_add_entry(info, oth->ot_handle, child,
+                                  cinode, hlock);
        if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_TYPE)) {
                struct ldiskfs_dir_entry_2      *de;
                struct buffer_head              *bh;
@@ -5380,6 +5428,7 @@ osd_dirent_reinsert(const struct lu_env *env, handle_t *jh,
        struct ldiskfs_dentry_param *ldp;
        int                          namelen    = dentry->d_name.len;
        int                          rc;
+       struct osd_thread_info     *info        = osd_oti_get(env);
        ENTRY;
 
        if (!LDISKFS_HAS_INCOMPAT_FEATURE(inode->i_sb,
@@ -5413,7 +5462,7 @@ osd_dirent_reinsert(const struct lu_env *env, handle_t *jh,
        osd_get_ldiskfs_dirent_param(ldp, fid);
        dentry->d_fsdata = (void *)ldp;
        ll_vfs_dq_init(dir);
-       rc = osd_ldiskfs_add_entry(jh, dentry, inode, hlock);
+       rc = osd_ldiskfs_add_entry(info, jh, dentry, inode, hlock);
        /* It is too bad, we cannot reinsert the name entry back.
         * That means we lose it! */
        if (rc != 0)
index 19c7315..ae51525 100644 (file)
@@ -169,9 +169,9 @@ struct osd_mdobj {
 struct osd_mdobj_map {
        struct dentry   *omm_remote_parent;
 };
-
-#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \
-       __ldiskfs_add_entry(handle, child, cinode, hlock)
+int osd_ldiskfs_add_entry(struct osd_thread_info *info,
+                         handle_t *handle, struct dentry *child,
+                         struct inode *inode, struct htree_lock *hlock);
 
 #define OSD_OTABLE_IT_CACHE_SIZE       64
 #define OSD_OTABLE_IT_CACHE_MASK       (~(OSD_OTABLE_IT_CACHE_SIZE - 1))
index 6b6942f..ae3d5c1 100644 (file)
@@ -151,7 +151,7 @@ static int osd_oi_index_create_one(struct osd_thread_info *info,
                                     feat->dif_ptrsize, feat->dif_recsize_max,
                                     jh);
        dentry = osd_child_dentry_by_inode(env, dir, name, strlen(name));
-       rc = osd_ldiskfs_add_entry(jh, dentry, inode, NULL);
+       rc = osd_ldiskfs_add_entry(info, jh, dentry, inode, NULL);
        ldiskfs_journal_stop(jh);
        iput(inode);
        return rc;
index 86ea6d0..3b77e07 100755 (executable)
@@ -8688,7 +8688,18 @@ set_dir_limits () {
                do_facet $facet "test -e $LDPROC/$canondev/max_dir_size" ||
                                                LDPROC=/sys/fs/ldiskfs
                do_facet $facet "echo $1 >$LDPROC/$canondev/max_dir_size"
+               do_facet $facet "test -e $LDPROC/$canondev/warning_dir_size" ||
+                                               LDPROC=/sys/fs/ldiskfs
+               do_facet $facet "echo $2 >$LDPROC/$canondev/warning_dir_size"
+       done
+}
+
+check_mds_dmesg() {
+       local facets=$(get_facets MDS)
+       for facet in ${facets//,/ }; do
+               do_facet $facet "dmesg | tail -3 | grep -q $1" && return 0
        done
+       return 1
 }
 
 test_129() {
@@ -8700,6 +8711,7 @@ test_129() {
        remote_mds_nodsh && skip "remote MDS with nodsh" && return
        ENOSPC=28
        EFBIG=27
+       has_warning=0
 
        rm -rf $DIR/$tdir
        test_mkdir -p $DIR/$tdir
@@ -8707,8 +8719,8 @@ test_129() {
        # block size of mds1
        local MDT_DEV=$(mdsdevname ${SINGLEMDS//mds/})
        local MDSBLOCKSIZE=$($LCTL get_param -n mdc.*MDT0000*.blocksize)
-       local MAX=$((MDSBLOCKSIZE * 3))
-       set_dir_limits $MAX
+       local MAX=$((MDSBLOCKSIZE * 5))
+       set_dir_limits $MAX $MAX
        local I=$(stat -c%s "$DIR/$tdir")
        local J=0
        local STRIPE_COUNT=1
@@ -8717,15 +8729,25 @@ test_129() {
        while [[ $I -le $MAX ]]; do
                $MULTIOP $DIR/$tdir/$J Oc
                rc=$?
+               if [ $has_warning -eq 0 ]; then
+                       check_mds_dmesg '"is approaching"' &&
+                               has_warning=1
+               fi
                #check two errors ENOSPC for new version of ext4 max_dir_size patch
                #mainline kernel commit df981d03eeff7971ac7e6ff37000bfa702327ef1
                #and EFBIG for previous versions
                if [ $rc -eq $EFBIG -o $rc -eq $ENOSPC ]; then
-                       set_dir_limits 0
+                       set_dir_limits 0 0
                        echo "return code $rc received as expected"
                        multiop $DIR/$tdir/$J Oc ||
                                error_exit "multiop failed w/o dir size limit"
 
+                       check_mds_dmesg '"has reached"' ||
+                               error_exit "has reached message should be output"
+
+                       [ $has_warning ] ||
+                               error_exit "warning message should be output"
+
                        I=$(stat -c%s "$DIR/$tdir")
 
                        if [ $(lustre_version_code $SINGLEMDS) -lt \
@@ -8737,7 +8759,7 @@ test_129() {
                        fi
                        error_exit "current dir size $I, previous limit $MAX"
                elif [ $rc -ne 0 ]; then
-                       set_dir_limits 0
+                       set_dir_limits 0 0
                        error_exit "return code $rc received instead of expected " \
                                   "$EFBIG or $ENOSPC, files in dir $I"
                fi
@@ -8745,7 +8767,7 @@ test_129() {
                I=$(stat -c%s "$DIR/$tdir")
        done
 
-       set_dir_limits 0
+       set_dir_limits 0 0
        error "exceeded dir size limit $MAX($MDSCOUNT) : $I bytes"
 }
 run_test 129 "test directory size limit ========================"