Whamcloud - gitweb
LU-5518 lfsck: recover orphans from backend lost+found 36/11536/25
authorFan Yong <fan.yong@intel.com>
Wed, 27 Aug 2014 15:38:30 +0000 (23:38 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 30 Oct 2014 02:13:47 +0000 (02:13 +0000)
Some local filesystem consistency verification tools, such as
e2fsck for ldiskfs, will add the orphan objects under backend
special /lost+found directory. Such directory is invisible to
clients. The namespace LFSCK will scan such directory, and for
the objects that were visible to clients originally (according
to the FID), the namespace LFSCK will move them back to the
normal namespace (according to the linkEA) or to the global
visible .lustre/lost+found/MDTxxxx/ directory.

The namespace LFSCK will insert the orphan (that is under the
backend /lost+found directory) FID into the namespace LFSCK
tracing file for further processing (via the subsequent namespace
LFSCK second-stage scanning). At the same time, remove the orphan
name entry from backend /lost+found directory. There is an interval
between the orphan name entry removed from the backend /lost+found
directory and the orphan FID in the LFSCK tracing file handled. In
such interval, the LFSCK can be reset, then all the FIDs recorded
in the namespace LFSCK tracing file will be dropped. To guarantee
that the orphans can be found when LFSCK run next time without
e2fsck again, when remove the orphan name entry, the LFSCK will
set the orphan's ctime attribute as 1. Since normal applications
cannot change the object's ctime attribute as 1. Then when LFSCK
run next time, it can record the object (that ctime is 1) in the
namespace LFSCK tracing file during the first-stage scanning. Once
the FID in the LFSCK tracing file has been handled successfully,
then the object's ctime attribute will be changed to normal time.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I2f8e23801a868f2c99c630face4bbfe08ece8844
Reviewed-on: http://review.whamcloud.com/11536
Tested-by: Jenkins
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/fid/fid_lib.c
lustre/include/lustre_fid.h
lustre/lfsck/lfsck_engine.c
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_layout.c
lustre/lfsck/lfsck_namespace.c
lustre/osd-ldiskfs/osd_scrub.c
lustre/tests/sanity-lfsck.sh

index 169ab9a..343d037 100644 (file)
@@ -99,3 +99,9 @@ const struct lu_fid LU_LPF_FID = { .f_seq = FID_SEQ_DOT_LUSTRE,
                                   .f_oid = FID_OID_DOT_LUSTRE_LPF,
                                   .f_ver = 0x0000000000000000 };
 EXPORT_SYMBOL(LU_LPF_FID);
+
+/** "/lost+found" - special FID for ldiskfs backend, invislbe to client. */
+const struct lu_fid LU_BACKEND_LPF_FID = { .f_seq = FID_SEQ_LOCAL_FILE,
+                                          .f_oid = OSD_LPF_OID,
+                                          .f_ver = 0x0000000000000000 };
+EXPORT_SYMBOL(LU_BACKEND_LPF_FID);
index 0d957aa..2c5509e 100644 (file)
@@ -168,6 +168,7 @@ extern const struct lu_fid LUSTRE_BFL_FID;
 extern const struct lu_fid LU_OBF_FID;
 extern const struct lu_fid LU_LPF_FID;
 extern const struct lu_fid LU_DOT_LUSTRE_FID;
+extern const struct lu_fid LU_BACKEND_LPF_FID;
 
 enum {
        /*
@@ -223,6 +224,7 @@ enum local_oid {
        ACCT_GROUP_OID          = 16UL,
        LFSCK_BOOKMARK_OID      = 17UL,
        OTABLE_IT_OID           = 18UL,
+       OSD_LPF_OID             = 19UL,
        /* These two definitions are obsolete
         * OFD_GROUP0_LAST_OID     = 20UL,
         * OFD_GROUP4K_LAST_OID    = 20UL+4096,
index 4d232d9..8398230 100644 (file)
@@ -39,7 +39,7 @@
 
 #include "lfsck_internal.h"
 
-static int lfsck_unpack_ent(struct lu_dirent *ent, __u64 *cookie, __u16 *type)
+int lfsck_unpack_ent(struct lu_dirent *ent, __u64 *cookie, __u16 *type)
 {
        struct luda_type        *lt;
        int                      align = sizeof(*lt) - 1;
@@ -712,7 +712,7 @@ checkpoint:
  * registered LFSCK component(s)' API to perform related consistency
  * verification.
  *
- * It flushes related LFSCK tracing files to disk via making checkpoint
+ * It flushes related LFSCK trace files to disk via making checkpoint
  * periodically. Then if the server crashed or the LFSCK is paused, the
  * LFSCK can resume from the latest checkpoint.
  *
@@ -1693,7 +1693,7 @@ cleanup2:
 
        /* Under force exit case, some requests may be just freed without
         * verification, those objects should be re-handled when next run.
-        * So not update the on-disk tracing file under such case. */
+        * So not update the on-disk trace file under such case. */
        if (lad->lad_in_double_scan) {
                if (!lad->lad_exit)
                        rc1 = lao->la_double_scan_result(env, com, rc);
index 6fa9ca1..1ce764e 100644 (file)
@@ -112,6 +112,7 @@ enum lfsck_namespace_trace_flags {
        LNTF_CHECK_LINKEA       = 0x01,
        LNTF_CHECK_PARENT       = 0x02,
        LNTF_SKIP_NLINK         = 0x04,
+       LNTF_CHECK_ORPHAN       = 0x08,
        LNTF_ALL                = 0xff
 };
 
@@ -220,6 +221,19 @@ struct lfsck_namespace {
        /* How many lost name entries have been re-inserted. */
        __u64   ln_lost_dirent_repaired;
 
+       /* How many objects under /lost+found have been scanned. */
+       __u64   ln_local_lpf_scanned;
+
+       /* How many objects under /lost+found have been moved to
+        * namespace visible directory. */
+       __u64   ln_local_lpf_moved;
+
+       /* How many objects under /lost+found have been skipped. */
+       __u64   ln_local_lpf_skipped;
+
+       /* How many objects under /lost+found failed to be processed. */
+       __u64   ln_local_lpf_failed;
+
        /* The size of MDT targets bitmap with nbits. Such bitmap records
         * the MDTs that contain non-verified MDT-objects. */
        __u32   ln_bitmap_size;
@@ -757,6 +771,7 @@ void lfsck_quit_generic(const struct lu_env *env,
                        struct lfsck_component *com);
 
 /* lfsck_engine.c */
+int lfsck_unpack_ent(struct lu_dirent *ent, __u64 *cookie, __u16 *type);
 int lfsck_master_engine(void *args);
 int lfsck_assistant_engine(void *args);
 
index 81066d2..66609eb 100644 (file)
@@ -806,7 +806,7 @@ static void lfsck_layout_cpu_to_le(struct lfsck_layout *des,
 }
 
 /**
- * Load the OST bitmap from the lfsck_layout tracing file.
+ * Load the OST bitmap from the lfsck_layout trace file.
  *
  * \param[in] env      pointer to the thread context
  * \param[in] com      pointer to the lfsck component
@@ -873,9 +873,9 @@ static int lfsck_layout_load_bitmap(const struct lu_env *env,
 }
 
 /**
- * Load the layout LFSCK tracing file from disk.
+ * Load the layout LFSCK trace file from disk.
  *
- * The layout LFSCK tracing file records the layout LFSCK status information
+ * The layout LFSCK trace file records the layout LFSCK status information
  * and other statistics, such as how many objects have been scanned, and how
  * many objects have been repaired, and etc. It also contains the bitmap for
  * failed OSTs during the layout LFSCK. All these information will be loaded
@@ -885,7 +885,7 @@ static int lfsck_layout_load_bitmap(const struct lu_env *env,
  * \param[in] com      pointer to the lfsck component
  *
  * \retval             positive number for file data corruption, the caller
- *                     should reset the layout LFSCK tracing file
+ *                     should reset the layout LFSCK trace file
  * \retval             0 for success
  * \retval             negative error number on failure
  */
@@ -923,9 +923,9 @@ static int lfsck_layout_load(const struct lu_env *env,
 }
 
 /**
- * Store the layout LFSCK tracing file on disk.
+ * Store the layout LFSCK trace file on disk.
  *
- * The layout LFSCK tracing file records the layout LFSCK status information
+ * The layout LFSCK trace file records the layout LFSCK status information
  * and other statistics, such as how many objects have been scanned, and how
  * many objects have been repaired, and etc. It also contains the bitmap for
  * failed OSTs during the layout LFSCK. All these information will be synced
index 3f9d3d7..6ef4cc7 100644 (file)
@@ -131,6 +131,10 @@ static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *dst,
        dst->ln_bad_type_repaired = le64_to_cpu(src->ln_bad_type_repaired);
        dst->ln_lost_dirent_repaired =
                                le64_to_cpu(src->ln_lost_dirent_repaired);
+       dst->ln_local_lpf_scanned = le64_to_cpu(src->ln_local_lpf_scanned);
+       dst->ln_local_lpf_moved = le64_to_cpu(src->ln_local_lpf_moved);
+       dst->ln_local_lpf_skipped = le64_to_cpu(src->ln_local_lpf_skipped);
+       dst->ln_local_lpf_failed = le64_to_cpu(src->ln_local_lpf_failed);
        dst->ln_bitmap_size = le32_to_cpu(src->ln_bitmap_size);
 }
 
@@ -177,6 +181,10 @@ static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst,
        dst->ln_bad_type_repaired = cpu_to_le64(src->ln_bad_type_repaired);
        dst->ln_lost_dirent_repaired =
                                cpu_to_le64(src->ln_lost_dirent_repaired);
+       dst->ln_local_lpf_scanned = cpu_to_le64(src->ln_local_lpf_scanned);
+       dst->ln_local_lpf_moved = cpu_to_le64(src->ln_local_lpf_moved);
+       dst->ln_local_lpf_skipped = cpu_to_le64(src->ln_local_lpf_skipped);
+       dst->ln_local_lpf_failed = cpu_to_le64(src->ln_local_lpf_failed);
        dst->ln_bitmap_size = cpu_to_le32(src->ln_bitmap_size);
 }
 
@@ -202,7 +210,7 @@ static void lfsck_namespace_record_failure(const struct lu_env *env,
 }
 
 /**
- * Load the MDT bitmap from the lfsck_namespace tracing file.
+ * Load the MDT bitmap from the lfsck_namespace trace file.
  *
  * \param[in] env      pointer to the thread context
  * \param[in] com      pointer to the lfsck component
@@ -390,12 +398,12 @@ static int lfsck_namespace_init(const struct lu_env *env,
 }
 
 /**
- * Update the namespace LFSCK tracing file for the given @fid
+ * Update the namespace LFSCK trace file for the given @fid
  *
  * \param[in] env      pointer to the thread context
  * \param[in] com      pointer to the lfsck component
  * \param[in] fid      the fid which flags to be updated in the lfsck
- *                     tracing file
+ *                     trace file
  * \param[in] add      true if add new flags, otherwise remove flags
  *
  * \retval             0 for succeed or nothing to be done
@@ -488,7 +496,7 @@ log:
                dt_trans_stop(env, dev, th);
 
        CDEBUG(D_LFSCK, "%s: namespace LFSCK %s flags for "DFID" in the "
-              "tracing file, flags %x, old %x, new %x: rc = %d\n",
+              "trace file, flags %x, old %x, new %x: rc = %d\n",
               lfsck_lfsck2name(lfsck), add ? "add" : "del", PFID(fid),
               (__u32)flags, (__u32)old, (__u32)new, rc);
 
@@ -742,6 +750,7 @@ static int lfsck_namespace_insert_orphan(const struct lu_env *env,
        struct lu_name                  *cname  = &info->lti_name;
        struct dt_insert_rec            *rec    = &info->lti_dt_rec;
        struct lu_fid                   *tfid   = &info->lti_fid5;
+       struct lu_attr                  *la     = &info->lti_la3;
        const struct lu_fid             *cfid   = lfsck_dto2fid(orphan);
        const struct lu_fid             *pfid;
        struct lfsck_instance           *lfsck  = com->lc_lfsck;
@@ -845,6 +854,13 @@ static int lfsck_namespace_insert_orphan(const struct lu_env *env,
                }
        }
 
+       memset(la, 0, sizeof(*la));
+       la->la_ctime = cfs_time_current_sec();
+       la->la_valid = LA_CTIME;
+       rc = dt_declare_attr_set(env, orphan, la, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
        rc = dt_trans_start_local(env, dev, th);
        if (rc != 0)
                GOTO(stop, rc);
@@ -895,6 +911,11 @@ static int lfsck_namespace_insert_orphan(const struct lu_env *env,
                }
        }
 
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       rc = dt_attr_set(env, orphan, la, th, BYPASS_CAPA);
+
        GOTO(stop, rc = (rc == 0 ? 1 : rc));
 
 unlock:
@@ -994,6 +1015,10 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
        if (rc != 0)
                GOTO(stop, rc);
 
+       rc = dt_declare_attr_set(env, child, la, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
        rc = dt_trans_start_local(env, dev, th);
        if (rc != 0)
                GOTO(stop, rc);
@@ -1013,6 +1038,10 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
 
        la->la_ctime = cfs_time_current_sec();
        rc = dt_attr_set(env, parent, la, th, BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_attr_set(env, child, la, th, BYPASS_CAPA);
 
        GOTO(stop, rc = (rc == 0 ? 1 : rc));
 
@@ -1992,7 +2021,7 @@ stop:
        dt_trans_stop(env, dev, th);
 
        /* We are not sure whether the child will become orphan or not.
-        * Record it in the LFSCK tracing file for further checking in
+        * Record it in the LFSCK trace file for further checking in
         * the second-stage scanning. */
        if (!update && !dec && rc == 0)
                lfsck_namespace_trace_update(env, com, cfid,
@@ -2672,7 +2701,7 @@ next:
  * If all the known name entries have been verified, then the object's hard
  * link attribute should match the object's linkEA entries count unless the
  * object's has too much hard link to be recorded in the linkEA. Such cases
- * should have been marked in the LFSCK tracing file. Otherwise, trust the
+ * should have been marked in the LFSCK trace file. Otherwise, trust the
  * linkEA to update the object's nlink attribute.
  *
  * \param[in] env      pointer to the thread context
@@ -3388,6 +3417,10 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m,
                      "multiple_referenced_repaired: "LPU64"\n"
                      "bad_file_type_repaired: "LPU64"\n"
                      "lost_dirent_repaired: "LPU64"\n"
+                     "local_lost_found_scanned: "LPU64"\n"
+                     "local_lost_found_moved: "LPU64"\n"
+                     "local_lost_found_skipped: "LPU64"\n"
+                     "local_lost_found_failed: "LPU64"\n"
                      "success_count: %u\n"
                      "run_time_phase1: %u seconds\n"
                      "run_time_phase2: %u seconds\n",
@@ -3409,6 +3442,10 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m,
                      ns->ln_mul_ref_repaired,
                      ns->ln_bad_type_repaired,
                      ns->ln_lost_dirent_repaired,
+                     ns->ln_local_lpf_scanned,
+                     ns->ln_local_lpf_moved,
+                     ns->ln_local_lpf_skipped,
+                     ns->ln_local_lpf_failed,
                      ns->ln_success_count,
                      time_phase1,
                      time_phase2);
@@ -3673,7 +3710,7 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env,
 
        /* zero-linkEA object may be orphan, but it also maybe because
         * of upgrading. Currently, we cannot record it for double scan.
-        * Because it may cause the LFSCK tracing file to be too large. */
+        * Because it may cause the LFSCK trace file to be too large. */
        if (rc == -ENODATA) {
                if (S_ISDIR(lfsck_object_type(obj)))
                        GOTO(out, rc = 0);
@@ -3682,7 +3719,10 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env,
                if (rc != 0)
                        GOTO(out, rc);
 
-               if (la->la_nlink > 1)
+               /* "la_ctime" == 1 means that it has ever been removed from
+                * backend /lost+found directory but not been added back to
+                * the normal namespace yet. */
+               if (la->la_nlink > 1 || unlikely(la->la_ctime == 1))
                        rc = lfsck_namespace_trace_update(env, com, fid,
                                                LNTF_CHECK_LINKEA, true);
 
@@ -3721,7 +3761,10 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env,
                        if (rc != 0)
                                GOTO(out, rc);
 
-                       if (la->la_nlink > 1)
+                       /* "la_ctime" == 1 means that it has ever been
+                        * removed from backend /lost+found directory but
+                        * not been added back to the normal namespace yet. */
+                       if (la->la_nlink > 1 || unlikely(la->la_ctime == 1))
                                rc = lfsck_namespace_trace_update(env, com,
                                                fid, LNTF_CHECK_LINKEA, true);
                }
@@ -4915,6 +4958,334 @@ out:
        return rc;
 }
 
+/**
+ * Handle one orphan under the backend /lost+found directory
+ *
+ * Insert the orphan FID into the namespace LFSCK trace file for further
+ * processing (via the subsequent namespace LFSCK second-stage scanning).
+ * At the same time, remove the orphan name entry from backend /lost+found
+ * directory. There is an interval between the orphan name entry removed
+ * from the backend /lost+found directory and the orphan FID in the LFSCK
+ * trace file handled. In such interval, the LFSCK can be reset, then
+ * all the FIDs recorded in the namespace LFSCK trace file will be dropped.
+ * To guarantee that the orphans can be found when LFSCK run next time
+ * without e2fsck again, when remove the orphan name entry, the LFSCK
+ * will set the orphan's ctime attribute as 1. Since normal applications
+ * cannot change the object's ctime attribute as 1. Then when LFSCK run
+ * next time, it can record the object (that ctime is 1) in the namespace
+ * LFSCK trace file during the first-stage scanning.
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ * \param[in] parent   pointer to the object for the backend /lost+found
+ * \param[in] ent      pointer to the name entry for the target under the
+ *                     backend /lost+found
+ *
+ * \retval             positive for repaired
+ * \retval             0 if needs to repair nothing
+ * \retval             negative error number on failure
+ */
+static int lfsck_namespace_scan_local_lpf_one(const struct lu_env *env,
+                                             struct lfsck_component *com,
+                                             struct dt_object *parent,
+                                             struct lu_dirent *ent)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lu_fid                   *key    = &info->lti_fid;
+       struct lu_attr                  *la     = &info->lti_la;
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct dt_object                *obj    = com->lc_obj;
+       struct dt_device                *dev    = lfsck->li_bottom;
+       struct dt_object                *child  = NULL;
+       struct thandle                  *th     = NULL;
+       int                              rc     = 0;
+       __u8                             flags  = 0;
+       bool                             exist  = false;
+       ENTRY;
+
+       child = lfsck_object_find_by_dev(env, dev, &ent->lde_fid);
+       if (IS_ERR(child))
+               RETURN(PTR_ERR(child));
+
+       LASSERT(dt_object_exists(child));
+       LASSERT(!dt_object_remote(child));
+
+       fid_cpu_to_be(key, &ent->lde_fid);
+       rc = dt_lookup(env, obj, (struct dt_rec *)&flags,
+                      (const struct dt_key *)key, BYPASS_CAPA);
+       if (rc == 0) {
+               exist = true;
+               flags |= LNTF_CHECK_ORPHAN;
+       } else if (rc == -ENOENT) {
+               flags = LNTF_CHECK_ORPHAN;
+       } else {
+               GOTO(out, rc);
+       }
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               GOTO(out, rc = PTR_ERR(th));
+
+       /* a1. remove name entry from backend /lost+found */
+       rc = dt_declare_delete(env, parent,
+                              (const struct dt_key *)ent->lde_name, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       if (S_ISDIR(lfsck_object_type(child))) {
+               /* a2. decrease parent's nlink */
+               rc = dt_declare_ref_del(env, parent, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       }
+
+       if (exist) {
+               /* a3. remove child's FID from the LFSCK trace file. */
+               rc = dt_declare_delete(env, obj,
+                                      (const struct dt_key *)key, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       } else {
+               /* a4. set child's ctime as 1 */
+               memset(la, 0, sizeof(*la));
+               la->la_ctime = 1;
+               la->la_valid = LA_CTIME;
+               rc = dt_declare_attr_set(env, child, la, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       }
+
+       /* a5. insert child's FID into the LFSCK trace file. */
+       rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
+                              (const struct dt_key *)key, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* b1. remove name entry from backend /lost+found */
+       rc = dt_delete(env, parent, (const struct dt_key *)ent->lde_name, th,
+                      BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       if (S_ISDIR(lfsck_object_type(child))) {
+               /* b2. decrease parent's nlink */
+               dt_write_lock(env, parent, 0);
+               rc = dt_ref_del(env, parent, th);
+               dt_write_unlock(env, parent);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       }
+
+       if (exist) {
+               /* a3. remove child's FID from the LFSCK trace file. */
+               rc = dt_delete(env, obj, (const struct dt_key *)key, th,
+                              BYPASS_CAPA);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       } else {
+               /* b4. set child's ctime as 1 */
+               rc = dt_attr_set(env, child, la, th, BYPASS_CAPA);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       }
+
+       /* b5. insert child's FID into the LFSCK trace file. */
+       rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
+                      (const struct dt_key *)key, th, BYPASS_CAPA, 1);
+
+       GOTO(stop, rc = (rc == 0 ? 1 : rc));
+
+stop:
+       dt_trans_stop(env, dev, th);
+
+out:
+       lu_object_put(env, &child->do_lu);
+
+       return rc;
+}
+
+/**
+ * Handle orphans under the backend /lost+found directory
+ *
+ * Some backend checker, such as e2fsck for ldiskfs may find some orphans
+ * and put them under the backend /lost+found directory that is invisible
+ * to client. The LFSCK will scan such directory, for the original client
+ * visible orphans, add their fids into the namespace LFSCK trace file,
+ * then the subsenquent namespace LFSCK second-stage scanning can handle
+ * them as other objects to be double scanned: either move back to normal
+ * namespace, or to the global visible orphan directory:
+ * /ROOT/.lustre/lost+found/MDTxxxx/
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ */
+static void lfsck_namespace_scan_local_lpf(const struct lu_env *env,
+                                          struct lfsck_component *com)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lu_dirent                *ent    =
+                                       (struct lu_dirent *)info->lti_key;
+       struct lu_seq_range             *range  = &info->lti_range;
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct ptlrpc_thread            *thread = &lfsck->li_thread;
+       struct lfsck_bookmark           *bk     = &lfsck->li_bookmark_ram;
+       struct dt_device                *dev    = lfsck->li_bottom;
+       struct lfsck_namespace          *ns     = com->lc_file_ram;
+       struct dt_object                *parent;
+       const struct dt_it_ops          *iops;
+       struct dt_it                    *di;
+       struct seq_server_site          *ss     =
+                                       lu_site2seq(dev->dd_lu_dev.ld_site);
+       __u64                            cookie;
+       int                              rc     = 0;
+       __u16                            type;
+       ENTRY;
+
+       parent = lfsck_object_find_by_dev(env, dev, &LU_BACKEND_LPF_FID);
+       if (IS_ERR(parent)) {
+               CERROR("%s: fail to find backend /lost+found: rc = %ld\n",
+                      lfsck_lfsck2name(lfsck), PTR_ERR(parent));
+               RETURN_EXIT;
+       }
+
+       /* It is normal that the /lost+found does not exist for ZFS backend. */
+       if (!dt_object_exists(parent))
+               GOTO(out, rc = 0);
+
+       if (unlikely(!dt_try_as_dir(env, parent)))
+               GOTO(out, rc = -ENOTDIR);
+
+       CDEBUG(D_LFSCK, "%s: start to scan backend /lost+found\n",
+              lfsck_lfsck2name(lfsck));
+
+       com->lc_new_scanned = 0;
+       iops = &parent->do_index_ops->dio_it;
+       di = iops->init(env, parent, LUDA_64BITHASH | LUDA_TYPE, BYPASS_CAPA);
+       if (IS_ERR(di))
+               GOTO(out, rc = PTR_ERR(di));
+
+       rc = iops->load(env, di, 0);
+       if (rc == 0)
+               rc = iops->next(env, di);
+       else if (rc > 0)
+               rc = 0;
+
+       while (rc == 0) {
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) &&
+                   cfs_fail_val > 0) {
+                       struct l_wait_info lwi;
+
+                       lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
+                                         NULL, NULL);
+                       l_wait_event(thread->t_ctl_waitq,
+                                    !thread_is_running(thread),
+                                    &lwi);
+
+                       if (unlikely(!thread_is_running(thread)))
+                               break;
+               }
+
+               rc = iops->rec(env, di, (struct dt_rec *)ent,
+                              LUDA_64BITHASH | LUDA_TYPE);
+               if (rc == 0)
+                       rc = lfsck_unpack_ent(ent, &cookie, &type);
+
+               if (unlikely(rc != 0)) {
+                       CDEBUG(D_LFSCK, "%s: fail to iterate backend "
+                              "/lost+found: rc = %d\n",
+                              lfsck_lfsck2name(lfsck), rc);
+
+                       goto skip;
+               }
+
+               /* skip dot and dotdot entries */
+               if (ent->lde_name[0] == '.' &&
+                   (ent->lde_namelen == 1 ||
+                    (ent->lde_namelen == 2 && ent->lde_name[1] == '.')))
+                       goto next;
+
+               if (!fid_seq_in_fldb(fid_seq(&ent->lde_fid)))
+                       goto skip;
+
+               if (fid_is_norm(&ent->lde_fid)) {
+                       fld_range_set_mdt(range);
+                       rc = fld_local_lookup(env, ss->ss_server_fld,
+                                             fid_seq(&ent->lde_fid), range);
+                       if (rc != 0)
+                               goto skip;
+               } else if (lfsck_dev_idx(dev) != 0) {
+                       /* If the returned FID is IGIF, then there are three
+                        * possible cases:
+                        *
+                        * 1) The object is upgraded from old Lustre-1.8 with
+                        *    IGIF assigned to such object.
+                        * 2) The object is a backend local object and is
+                        *    invisible to client.
+                        * 3) The object lost its LMV EA, and since there is
+                        *    no FID-in-dirent for the orphan in the backend
+                        *    /lost+found directory, then the low layer will
+                        *    return IGIF for such object.
+                        *
+                        * For MDTx (x != 0), it is either case 2) or case 3),
+                        * but from the LFSCK view, they are indistinguishable.
+                        * To be safe, the LFSCK will keep it there and report
+                        * some message, then the adminstrator can handle that
+                        * furtherly.
+                        *
+                        * For MDT0, it is more possible the case 1). The LFSCK
+                        * will handle the orphan as an upgraded object. */
+                       CDEBUG(D_LFSCK, "%s: the orphan %.*s with IGIF "DFID
+                              "in the backend /lost+found on the MDT %04x, "
+                              "to be safe, skip it.\n",
+                              lfsck_lfsck2name(lfsck), ent->lde_namelen,
+                              ent->lde_name, PFID(&ent->lde_fid),
+                              lfsck_dev_idx(dev));
+                       goto skip;
+               }
+
+               rc = lfsck_namespace_scan_local_lpf_one(env, com, parent, ent);
+
+skip:
+               down_write(&com->lc_sem);
+               com->lc_new_scanned++;
+               ns->ln_local_lpf_scanned++;
+               if (rc > 0)
+                       ns->ln_local_lpf_moved++;
+               else if (rc == 0)
+                       ns->ln_local_lpf_skipped++;
+               else
+                       ns->ln_local_lpf_failed++;
+               up_write(&com->lc_sem);
+
+               if (rc < 0 && bk->lb_param & LPF_FAILOUT)
+                       break;
+
+next:
+               lfsck_control_speed_by_self(com);
+               if (unlikely(!thread_is_running(thread))) {
+                       rc = 0;
+                       break;
+               }
+
+               rc = iops->next(env, di);
+       }
+
+       iops->put(env, di);
+       iops->fini(env, di);
+
+       EXIT;
+
+out:
+       CDEBUG(D_LFSCK, "%s: stop to scan backend /lost+found: rc = %d\n",
+              lfsck_lfsck2name(lfsck), rc);
+
+       lu_object_put(env, &parent->do_lu);
+}
+
 static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env,
                                                struct lfsck_component *com)
 {
@@ -4935,6 +5306,8 @@ static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env,
        CDEBUG(D_LFSCK, "%s: namespace LFSCK phase2 scan start\n",
               lfsck_lfsck2name(lfsck));
 
+       lfsck_namespace_scan_local_lpf(env, com);
+
        com->lc_new_checked = 0;
        com->lc_new_scanned = 0;
        com->lc_time_last_checkpoint = cfs_time_current();
index 518f593..d7e5d57 100644 (file)
@@ -1430,8 +1430,8 @@ static const struct osd_lf_map osd_lf_maps[] = {
               OLF_SHOW_NAME, NULL, NULL },
 
        /* lost+found */
-       { "lost+found", { 0, 0, 0 }, OLF_SCAN_SUBITEMS | OLF_NO_OI,
-               osd_ios_general_scan, osd_ios_lf_fill },
+       { "lost+found", { FID_SEQ_LOCAL_FILE, OSD_LPF_OID, 0 },
+               OLF_SCAN_SUBITEMS, osd_ios_general_scan, osd_ios_lf_fill },
 
        { NULL, { 0, 0, 0 }, 0, NULL, NULL }
 };
index 1d330ea..f92f0f1 100644 (file)
@@ -46,7 +46,7 @@ setupall
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
 
 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
-       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27 28 29"
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27 28 29 30"
 
 build_test_filter
 
@@ -3636,6 +3636,93 @@ test_29c() {
 }
 run_test 29c "Not verify nlink attr if hark links exceed linkEA limitation"
 
+test_30() {
+       [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
+               skip "Only support backend /lost+found for ldiskfs" && return
+
+       echo "#####"
+       echo "The namespace LFSCK will move the orphans from backend"
+       echo "/lost+found directory to normal client visible namespace"
+       echo "or to global visible ./lustre/lost+found/MDTxxxx/ directory"
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS mkdir -i 0 $DIR/$tdir/foo || error "(1) Fail to mkdir foo"
+       touch $DIR/$tdir/foo/f0 || error "(2) Fail to touch f1"
+
+       echo "Inject failure stub on MDT0 to simulate the case that"
+       echo "directory d0 has no linkEA entry, then the LFSCK will"
+       echo "move it into .lustre/lost+found/MDTxxxx/ later."
+
+       #define OBD_FAIL_LFSCK_NO_LINKEA        0x161d
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x161d
+       mkdir $DIR/$tdir/foo/d0 || error "(3) Fail to mkdir d0"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+       touch $DIR/$tdir/foo/d0/f1 || error "(4) Fail to touch f1"
+       mkdir $DIR/$tdir/foo/d0/d1 || error "(5) Fail to mkdir d1"
+
+       echo "Inject failure stub on MDT0 to simulate the case that the"
+       echo "object's name entry will be removed, but not destroy the"
+       echo "object. Then backend e2fsck will handle it as orphan and"
+       echo "add them into the backend /lost+found directory."
+
+       #define OBD_FAIL_LFSCK_NO_NAMEENTRY     0x1624
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
+       rmdir $DIR/$tdir/foo/d0/d1 || error "(6) Fail to rmdir d1"
+       rm -f $DIR/$tdir/foo/d0/f1 || error "(7) Fail to unlink f1"
+       rmdir $DIR/$tdir/foo/d0 || error "(8) Fail to rmdir d0"
+       rm -f $DIR/$tdir/foo/f0 || error "(9) Fail to unlink f0"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+       umount_client $MOUNT || error "(10) Fail to stop client!"
+
+       stop $SINGLEMDS || error "(11) Fail to stop MDT0"
+
+       echo "run e2fsck"
+       run_e2fsck $(facet_host $SINGLEMDS) $MDT_DEVNAME "-y" ||
+               error "(12) Fail to run e2fsck"
+
+       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
+               error "(13) Fail to start MDT0"
+
+       echo "Trigger namespace LFSCK to recover backend orphans"
+       $START_NAMESPACE -r -A ||
+               error "(14) Fail to start LFSCK for namespace"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(15) unexpected status"
+       }
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^local_lost_found_moved/ { print $2 }')
+       [ $repaired -ge 4 ] ||
+               error "(16) Fail to recover backend orphans: $repaired"
+
+       mount_client $MOUNT || error "(17) Fail to start client!"
+
+       stat $DIR/$tdir/foo/f0 || "(18) f0 is not recovered"
+
+       ls -ail $MOUNT/.lustre/lost+found/
+
+       echo "d0 should become orphan under .lustre/lost+found/MDT0000/"
+       [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
+               error "(19) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
+
+       ls -ail $MOUNT/.lustre/lost+found/MDT0000/
+
+       cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-*-D-*)
+       [ ! -z "$cname" ] || error "(20) d0 is not recovered"
+
+       stat ${cname}/d1 || error "(21) d0 is not recovered"
+       stat ${cname}/f1 || error "(22) f1 is not recovered"
+}
+run_test 30 "LFSCK can recover the orphans from backend /lost+found"
+
 $LCTL set_param debug=-lfsck > /dev/null || true
 
 # restore MDS/OST size