Whamcloud - gitweb
LU-4923 lfsck: detailed statistics for namespace LFSCK 30/10030/7
authorFan Yong <fan.yong@intel.com>
Sat, 5 Apr 2014 01:37:07 +0000 (09:37 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 9 May 2014 14:31:59 +0000 (14:31 +0000)
Some enhancement for namespace LFSCK:

1) For the MDT device upgraded from Lustre-1.x and still not enable
   the 'dirdata' feature, then send warning message when mount.

2) More detailed statistics for how many FID-in-dirent entries have
   been repaired and how many linkEA entries have been repaired.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I6a2468b1b8ac4dee91d5d3a26872214391fd3e3b
Reviewed-on: http://review.whamcloud.com/10030
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_namespace.c
lustre/osd-ldiskfs/osd_handler.c
lustre/tests/sanity-lfsck.sh

index beccde6..46ebe84 100644 (file)
@@ -181,8 +181,11 @@ struct lfsck_namespace {
        /* The latest object has been processed (failed) during double scan. */
        struct lu_fid   ln_fid_latest_scanned_phase2;
 
-       /* For further using. 256-bytes aligned now. */
-       __u64   ln_reserved[2];
+       /* How many FID-in-dirent entries have been repaired. */
+       __u64   ln_dirent_repaired;
+
+       /* How many linkEA entries have been repaired. */
+       __u64   ln_linkea_repaired;
 };
 
 enum lfsck_layout_inconsistency_type {
index c80fb48..92e77cb 100644 (file)
 
 static const char lfsck_namespace_name[] = "lfsck_namespace";
 
-static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *des,
+static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *dst,
                                      struct lfsck_namespace *src)
 {
-       des->ln_magic = le32_to_cpu(src->ln_magic);
-       des->ln_status = le32_to_cpu(src->ln_status);
-       des->ln_flags = le32_to_cpu(src->ln_flags);
-       des->ln_success_count = le32_to_cpu(src->ln_success_count);
-       des->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1);
-       des->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2);
-       des->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
-       des->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
-       des->ln_time_last_checkpoint =
+       dst->ln_magic = le32_to_cpu(src->ln_magic);
+       dst->ln_status = le32_to_cpu(src->ln_status);
+       dst->ln_flags = le32_to_cpu(src->ln_flags);
+       dst->ln_success_count = le32_to_cpu(src->ln_success_count);
+       dst->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1);
+       dst->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2);
+       dst->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
+       dst->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
+       dst->ln_time_last_checkpoint =
                                le64_to_cpu(src->ln_time_last_checkpoint);
-       lfsck_position_le_to_cpu(&des->ln_pos_latest_start,
+       lfsck_position_le_to_cpu(&dst->ln_pos_latest_start,
                                 &src->ln_pos_latest_start);
-       lfsck_position_le_to_cpu(&des->ln_pos_last_checkpoint,
+       lfsck_position_le_to_cpu(&dst->ln_pos_last_checkpoint,
                                 &src->ln_pos_last_checkpoint);
-       lfsck_position_le_to_cpu(&des->ln_pos_first_inconsistent,
+       lfsck_position_le_to_cpu(&dst->ln_pos_first_inconsistent,
                                 &src->ln_pos_first_inconsistent);
-       des->ln_items_checked = le64_to_cpu(src->ln_items_checked);
-       des->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
-       des->ln_items_failed = le64_to_cpu(src->ln_items_failed);
-       des->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
-       des->ln_mlinked_checked = le64_to_cpu(src->ln_mlinked_checked);
-       des->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
-       des->ln_objs_repaired_phase2 =
+       dst->ln_items_checked = le64_to_cpu(src->ln_items_checked);
+       dst->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
+       dst->ln_items_failed = le64_to_cpu(src->ln_items_failed);
+       dst->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
+       dst->ln_mlinked_checked = le64_to_cpu(src->ln_mlinked_checked);
+       dst->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
+       dst->ln_objs_repaired_phase2 =
                                le64_to_cpu(src->ln_objs_repaired_phase2);
-       des->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
-       des->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
-       des->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
-       fid_le_to_cpu(&des->ln_fid_latest_scanned_phase2,
+       dst->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
+       dst->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
+       dst->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
+       fid_le_to_cpu(&dst->ln_fid_latest_scanned_phase2,
                      &src->ln_fid_latest_scanned_phase2);
+       dst->ln_dirent_repaired = le64_to_cpu(src->ln_dirent_repaired);
+       dst->ln_linkea_repaired = le64_to_cpu(src->ln_linkea_repaired);
 }
 
-static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *des,
+static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst,
                                      struct lfsck_namespace *src)
 {
-       des->ln_magic = cpu_to_le32(src->ln_magic);
-       des->ln_status = cpu_to_le32(src->ln_status);
-       des->ln_flags = cpu_to_le32(src->ln_flags);
-       des->ln_success_count = cpu_to_le32(src->ln_success_count);
-       des->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1);
-       des->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2);
-       des->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
-       des->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
-       des->ln_time_last_checkpoint =
+       dst->ln_magic = cpu_to_le32(src->ln_magic);
+       dst->ln_status = cpu_to_le32(src->ln_status);
+       dst->ln_flags = cpu_to_le32(src->ln_flags);
+       dst->ln_success_count = cpu_to_le32(src->ln_success_count);
+       dst->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1);
+       dst->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2);
+       dst->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
+       dst->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
+       dst->ln_time_last_checkpoint =
                                cpu_to_le64(src->ln_time_last_checkpoint);
-       lfsck_position_cpu_to_le(&des->ln_pos_latest_start,
+       lfsck_position_cpu_to_le(&dst->ln_pos_latest_start,
                                 &src->ln_pos_latest_start);
-       lfsck_position_cpu_to_le(&des->ln_pos_last_checkpoint,
+       lfsck_position_cpu_to_le(&dst->ln_pos_last_checkpoint,
                                 &src->ln_pos_last_checkpoint);
-       lfsck_position_cpu_to_le(&des->ln_pos_first_inconsistent,
+       lfsck_position_cpu_to_le(&dst->ln_pos_first_inconsistent,
                                 &src->ln_pos_first_inconsistent);
-       des->ln_items_checked = cpu_to_le64(src->ln_items_checked);
-       des->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
-       des->ln_items_failed = cpu_to_le64(src->ln_items_failed);
-       des->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
-       des->ln_mlinked_checked = cpu_to_le64(src->ln_mlinked_checked);
-       des->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
-       des->ln_objs_repaired_phase2 =
+       dst->ln_items_checked = cpu_to_le64(src->ln_items_checked);
+       dst->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
+       dst->ln_items_failed = cpu_to_le64(src->ln_items_failed);
+       dst->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
+       dst->ln_mlinked_checked = cpu_to_le64(src->ln_mlinked_checked);
+       dst->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
+       dst->ln_objs_repaired_phase2 =
                                cpu_to_le64(src->ln_objs_repaired_phase2);
-       des->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
-       des->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
-       des->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
-       fid_cpu_to_le(&des->ln_fid_latest_scanned_phase2,
+       dst->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
+       dst->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
+       dst->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
+       fid_cpu_to_le(&dst->ln_fid_latest_scanned_phase2,
                      &src->ln_fid_latest_scanned_phase2);
+       dst->ln_dirent_repaired = cpu_to_le64(src->ln_dirent_repaired);
+       dst->ln_linkea_repaired = cpu_to_le64(src->ln_linkea_repaired);
 }
 
 /**
@@ -862,9 +866,11 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env,
 
        if (ent->lde_attrs & LUDA_UPGRADE) {
                ns->ln_flags |= LF_UPGRADE;
+               ns->ln_dirent_repaired++;
                repaired = true;
        } else if (ent->lde_attrs & LUDA_REPAIR) {
                ns->ln_flags |= LF_INCONSISTENT;
+               ns->ln_dirent_repaired++;
                repaired = true;
        }
 
@@ -937,6 +943,7 @@ again:
 
 nodata:
                if (bk->lb_param & LPF_DRYRUN) {
+                       ns->ln_linkea_repaired++;
                        repaired = true;
                        goto record;
                }
@@ -969,6 +976,7 @@ nodata:
                        GOTO(stop, rc);
 
                count = ldata.ld_leh->leh_reccount;
+               ns->ln_linkea_repaired++;
                repaired = true;
        } else {
                GOTO(stop, rc);
@@ -1175,6 +1183,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                              "failed_phase2: "LPU64"\n"
                              "dirs: "LPU64"\n"
                              "M-linked: "LPU64"\n"
+                             "dirent_repaired: "LPU64"\n"
+                             "linkea_repaired: "LPU64"\n"
                              "nlinks_repaired: "LPU64"\n"
                              "lost_found: "LPU64"\n"
                              "success_count: %u\n"
@@ -1192,6 +1202,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                              ns->ln_objs_failed_phase2,
                              ns->ln_dirs_checked,
                              ns->ln_mlinked_checked,
+                             ns->ln_dirent_repaired,
+                             ns->ln_linkea_repaired,
                              ns->ln_objs_nlink_repaired,
                              ns->ln_objs_lost_found,
                              ns->ln_success_count,
@@ -1261,6 +1273,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                              "failed_phase2: "LPU64"\n"
                              "dirs: "LPU64"\n"
                              "M-linked: "LPU64"\n"
+                             "dirent_repaired: "LPU64"\n"
+                             "linkea_repaired: "LPU64"\n"
                              "nlinks_repaired: "LPU64"\n"
                              "lost_found: "LPU64"\n"
                              "success_count: %u\n"
@@ -1279,6 +1293,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                              ns->ln_objs_failed_phase2,
                              ns->ln_dirs_checked,
                              ns->ln_mlinked_checked,
+                             ns->ln_dirent_repaired,
+                             ns->ln_linkea_repaired,
                              ns->ln_objs_nlink_repaired,
                              ns->ln_objs_lost_found,
                              ns->ln_success_count,
@@ -1310,6 +1326,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                              "failed_phase2: "LPU64"\n"
                              "dirs: "LPU64"\n"
                              "M-linked: "LPU64"\n"
+                             "dirent_repaired: "LPU64"\n"
+                             "linkea_repaired: "LPU64"\n"
                              "nlinks_repaired: "LPU64"\n"
                              "lost_found: "LPU64"\n"
                              "success_count: %u\n"
@@ -1328,6 +1346,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                              ns->ln_objs_failed_phase2,
                              ns->ln_dirs_checked,
                              ns->ln_mlinked_checked,
+                             ns->ln_dirent_repaired,
+                             ns->ln_linkea_repaired,
                              ns->ln_objs_nlink_repaired,
                              ns->ln_objs_lost_found,
                              ns->ln_success_count,
index 5ee7ff1..9d20f5d 100644 (file)
@@ -5744,6 +5744,11 @@ static int osd_mount(const struct lu_env *env,
        if (LDISKFS_HAS_INCOMPAT_FEATURE(o->od_mnt->mnt_sb,
                                         LDISKFS_FEATURE_INCOMPAT_DIRDATA))
                LDISKFS_SB(osd_sb(o))->s_mount_opt |= LDISKFS_MOUNT_DIRDATA;
+       else if (!o->od_is_ost)
+               CWARN("%s: device %s was upgraded from Lustre-1.x without "
+                     "enabling the dirdata feature. If you do not want to "
+                     "downgrade to Lustre-1.x again, you can enable it via "
+                     "'tune2fs -O dirdata device'\n", name, dev);
 #endif
        inode = osd_sb(o)->s_root->d_inode;
        lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
@@ -5814,20 +5819,20 @@ static int osd_device_init0(const struct lu_env *env,
        o->od_writethrough_cache = 1;
        o->od_readcache_max_filesize = OSD_MAX_CACHE_SIZE;
 
-       rc = osd_mount(env, o, cfg);
-       if (rc)
-               GOTO(out_capa, rc);
-
        cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4),
                        sizeof(o->od_svname));
        if (cplen >= sizeof(o->od_svname)) {
                rc = -E2BIG;
-               GOTO(out_mnt, rc);
+               GOTO(out_capa, rc);
        }
 
        if (server_name_is_ost(o->od_svname))
                o->od_is_ost = 1;
 
+       rc = osd_mount(env, o, cfg);
+       if (rc != 0)
+               GOTO(out_capa, rc);
+
        rc = osd_obj_map_init(env, o);
        if (rc != 0)
                GOTO(out_mnt, rc);
index 0bdd684..7d51934 100644 (file)
@@ -177,7 +177,12 @@ test_1a() {
        }
 
        local repaired=$($SHOW_NAMESPACE |
+                        awk '/^dirent_repaired/ { print $2 }')
+       # for interop with old server
+       [ -z "$repaired" ] &&
+               repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
+
        [ $repaired -eq 1 ] ||
                error "(5) Fail to repair crashed FID-in-dirent: $repaired"
 
@@ -215,7 +220,12 @@ test_1b()
        }
 
        local repaired=$($SHOW_NAMESPACE |
+                        awk '/^dirent_repaired/ { print $2 }')
+       # for interop with old server
+       [ -z "$repaired" ] &&
+               repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
+
        [ $repaired -eq 1 ] ||
                error "(5) Fail to repair missed FID-in-LMA: $repaired"
 
@@ -248,7 +258,12 @@ test_2a() {
        }
 
        local repaired=$($SHOW_NAMESPACE |
+                        awk '/^linkea_repaired/ { print $2 }')
+       # for interop with old server
+       [ -z "$repaired" ] &&
+               repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
+
        [ $repaired -eq 1 ] ||
                error "(5) Fail to repair crashed linkEA: $repaired"
 
@@ -374,9 +389,14 @@ test_4()
        [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
 
        local repaired=$($SHOW_NAMESPACE |
+                        awk '/^dirent_repaired/ { print $2 }')
+       # for interop with old server
+       [ -z "$repaired" ] &&
+               repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
+
        [ $repaired -ge 9 ] ||
-               error "(9) Fail to repair crashed linkEA: $repaired"
+               error "(9) Fail to re-generate FID-in-dirent: $repaired"
 
        mount_client $MOUNT || error "(10) Fail to start client!"
 
@@ -427,9 +447,14 @@ test_5()
        [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
 
        local repaired=$($SHOW_NAMESPACE |
+                        awk '/^dirent_repaired/ { print $2 }')
+       # for interop with old server
+       [ -z "$repaired" ] &&
+               repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
+
        [ $repaired -ge 2 ] ||
-               error "(9) Fail to repair crashed linkEA: $repaired"
+               error "(9) Fail to generate FID-in-dirent for IGIF: $repaired"
 
        mount_client $MOUNT || error "(10) Fail to start client!"