Whamcloud - gitweb
LU-5512 lfsck: repair dangling name entry 30/11330/29
authorFan Yong <fan.yong@intel.com>
Wed, 6 Aug 2014 07:02:54 +0000 (15:02 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 30 Sep 2014 03:54:25 +0000 (03:54 +0000)
If the MDT-object referenced by the name entry is lost, then the
namespace LFSCK needs to repair the inconsistency as required:

1) Keep the inconsistency there and report the inconsistency case,
   then give the chance to the application to find related issues,
   and the users can make the decision about how to handle it with
   more human knownledge. (by default)

2) Re-create the missed MDT-object with the FID (in the name entry)

The LFSCK will allow the administrator to specify how to handle the
dangling name entry via a new option "-C" when trigger the LFSCK:

[-Coff] or [--create_mdtobj=off]:
Report the inconsistency via log, but keep the dangling name entry
there without repairing. (by default)

-C[on] or --create_mdtobj[=on]:
Create the lost MDT-object.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I78231914023b8d02daf4f6cde6176c1ef655f862
Reviewed-on: http://review.whamcloud.com/11330
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre/lustre_lfsck_user.h
lustre/include/obd_support.h
lustre/lfsck/lfsck_bookmark.c
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_namespace.c
lustre/mdd/mdd_dir.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osp/osp_object.c
lustre/tests/sanity-lfsck.sh
lustre/utils/lctl.c
lustre/utils/lustre_lfsck.c

index 9e40222..a740534 100644 (file)
@@ -55,6 +55,9 @@ enum lfsck_param_flags {
 
        /* Create OST-object for dangling LOV EA. */
        LPF_CREATE_OSTOBJ       = 0x0040,
 
        /* Create OST-object for dangling LOV EA. */
        LPF_CREATE_OSTOBJ       = 0x0040,
+
+       /* Create MDT-object for dangling name entry. */
+       LPF_CREATE_MDTOBJ       = 0x0080,
 };
 
 enum lfsck_type {
 };
 
 enum lfsck_type {
@@ -86,6 +89,7 @@ enum lfsck_start_valid {
        LSV_DRYRUN              = 0x00000004,
        LSV_ASYNC_WINDOWS       = 0x00000008,
        LSV_CREATE_OSTOBJ       = 0x00000010,
        LSV_DRYRUN              = 0x00000004,
        LSV_ASYNC_WINDOWS       = 0x00000008,
        LSV_CREATE_OSTOBJ       = 0x00000010,
+       LSV_CREATE_MDTOBJ       = 0x00000020,
 };
 
 /* Arguments for starting lfsck. */
 };
 
 /* Arguments for starting lfsck. */
index 8eddbca..4c80683 100644 (file)
@@ -519,6 +519,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_LFSCK_NO_LINKEA       0x161d
 #define OBD_FAIL_LFSCK_BAD_PARENT      0x161e
 #define OBD_FAIL_LFSCK_BAD_PARENT2     0x161f
 #define OBD_FAIL_LFSCK_NO_LINKEA       0x161d
 #define OBD_FAIL_LFSCK_BAD_PARENT      0x161e
 #define OBD_FAIL_LFSCK_BAD_PARENT2     0x161f
+#define OBD_FAIL_LFSCK_DANGLING2       0x1620
+#define OBD_FAIL_LFSCK_DANGLING3       0x1621
 
 #define OBD_FAIL_LFSCK_NOTIFY_NET      0x16f0
 #define OBD_FAIL_LFSCK_QUERY_NET       0x16f1
 
 #define OBD_FAIL_LFSCK_NOTIFY_NET      0x16f0
 #define OBD_FAIL_LFSCK_QUERY_NET       0x16f1
index ffaa73d..e521c3a 100644 (file)
@@ -209,6 +209,11 @@ int lfsck_set_param(const struct lu_env *env, struct lfsck_instance *lfsck,
                        dirty = true;
                }
 
                        dirty = true;
                }
 
+               if (bk->lb_param & LPF_CREATE_MDTOBJ) {
+                       bk->lb_param &= ~LPF_CREATE_MDTOBJ;
+                       dirty = true;
+               }
+
                if (bk->lb_param & LPF_FAILOUT) {
                        bk->lb_param &= ~LPF_FAILOUT;
                        dirty = true;
                if (bk->lb_param & LPF_FAILOUT) {
                        bk->lb_param &= ~LPF_FAILOUT;
                        dirty = true;
@@ -254,6 +259,18 @@ int lfsck_set_param(const struct lu_env *env, struct lfsck_instance *lfsck,
                        }
                }
 
                        }
                }
 
+               if ((start->ls_valid & LSV_CREATE_MDTOBJ) || reset) {
+                       if ((bk->lb_param & LPF_CREATE_MDTOBJ) &&
+                           !(start->ls_valid & LSV_CREATE_MDTOBJ)) {
+                               bk->lb_param &= ~LPF_CREATE_MDTOBJ;
+                               dirty = true;
+                       } else if (!(bk->lb_param & LPF_CREATE_MDTOBJ) &&
+                                  (start->ls_flags & LPF_CREATE_MDTOBJ)) {
+                               bk->lb_param |= LPF_CREATE_MDTOBJ;
+                               dirty = true;
+                       }
+               }
+
                if ((start->ls_valid & LSV_ERROR_HANDLE) || reset) {
                        if ((bk->lb_param & LPF_FAILOUT) &&
                            !(start->ls_valid & LSV_ERROR_HANDLE)) {
                if ((start->ls_valid & LSV_ERROR_HANDLE) || reset) {
                        if ((bk->lb_param & LPF_FAILOUT) &&
                            !(start->ls_valid & LSV_ERROR_HANDLE)) {
index e24acd3..ab25ede 100644 (file)
@@ -118,6 +118,7 @@ enum lfsck_namespace_inconsistency_type {
        LNIT_NONE               = 0,
        LNIT_BAD_LINKEA         = 1,
        LNIT_UNMATCHED_PAIRS    = 2,
        LNIT_NONE               = 0,
        LNIT_BAD_LINKEA         = 1,
        LNIT_UNMATCHED_PAIRS    = 2,
+       LNIT_DANGLING           = 3,
 };
 
 struct lfsck_namespace {
 };
 
 struct lfsck_namespace {
@@ -205,8 +206,11 @@ struct lfsck_namespace {
        /* How many unmatched pairs have been repaired. */
        __u64   ln_unmatched_pairs_repaired;
 
        /* How many unmatched pairs have been repaired. */
        __u64   ln_unmatched_pairs_repaired;
 
+       /* How many dangling name entries have been found/repaired. */
+       __u64   ln_dangling_repaired;
+
        /* For further using. 256-bytes aligned now. */
        /* For further using. 256-bytes aligned now. */
-       __u64   ln_reserved[29];
+       __u64   ln_reserved[28];
 };
 
 enum lfsck_layout_inconsistency_type {
 };
 
 enum lfsck_layout_inconsistency_type {
@@ -556,6 +560,19 @@ struct lfsck_assistant_req {
        struct list_head        lar_list;
 };
 
        struct list_head        lar_list;
 };
 
+struct lfsck_namespace_req {
+       struct lfsck_assistant_req       lnr_lar;
+       struct dt_object                *lnr_obj;
+       struct lu_fid                    lnr_fid;
+       __u64                            lnr_oit_cookie;
+       __u64                            lnr_dir_cookie;
+       __u32                            lnr_attr;
+       __u32                            lnr_size;
+       __u16                            lnr_type;
+       __u16                            lnr_namelen;
+       char                             lnr_name[0];
+};
+
 struct lfsck_assistant_operations {
        int (*la_handler_p1)(const struct lu_env *env,
                             struct lfsck_component *com,
 struct lfsck_assistant_operations {
        int (*la_handler_p1)(const struct lu_env *env,
                             struct lfsck_component *com,
@@ -744,6 +761,10 @@ int lfsck_namespace_rebuild_linkea(const struct lu_env *env,
                                   struct lfsck_component *com,
                                   struct dt_object *obj,
                                   struct linkea_data *ldata);
                                   struct lfsck_component *com,
                                   struct dt_object *obj,
                                   struct linkea_data *ldata);
+int lfsck_namespace_repair_dangling(const struct lu_env *env,
+                                   struct lfsck_component *com,
+                                   struct dt_object *child,
+                                   struct lfsck_namespace_req *lnr);
 int lfsck_verify_linkea(const struct lu_env *env, struct dt_device *dev,
                        struct dt_object *obj, const struct lu_name *cname,
                        const struct lu_fid *pfid);
 int lfsck_verify_linkea(const struct lu_env *env, struct dt_device *dev,
                        struct dt_object *obj, const struct lu_name *cname,
                        const struct lu_fid *pfid);
index 9b759b6..54b3498 100644 (file)
@@ -51,19 +51,6 @@ enum lfsck_nameentry_check {
 
 static const char lfsck_namespace_name[] = "lfsck_namespace";
 
 
 static const char lfsck_namespace_name[] = "lfsck_namespace";
 
-struct lfsck_namespace_req {
-       struct lfsck_assistant_req       lnr_lar;
-       struct dt_object                *lnr_obj;
-       struct lu_fid                    lnr_fid;
-       __u64                            lnr_oit_cookie;
-       __u64                            lnr_dir_cookie;
-       __u32                            lnr_attr;
-       __u32                            lnr_size;
-       __u16                            lnr_type;
-       __u16                            lnr_namelen;
-       char                             lnr_name[0];
-};
-
 static struct lfsck_namespace_req *
 lfsck_namespace_assistant_req_init(struct lfsck_instance *lfsck,
                                   struct lu_dirent *ent, __u16 type)
 static struct lfsck_namespace_req *
 lfsck_namespace_assistant_req_init(struct lfsck_instance *lfsck,
                                   struct lu_dirent *ent, __u16 type)
@@ -140,6 +127,7 @@ static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *dst,
                                le64_to_cpu(src->ln_unknown_inconsistency);
        dst->ln_unmatched_pairs_repaired =
                                le64_to_cpu(src->ln_unmatched_pairs_repaired);
                                le64_to_cpu(src->ln_unknown_inconsistency);
        dst->ln_unmatched_pairs_repaired =
                                le64_to_cpu(src->ln_unmatched_pairs_repaired);
+       dst->ln_dangling_repaired = le64_to_cpu(src->ln_dangling_repaired);
 }
 
 static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst,
 }
 
 static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst,
@@ -181,6 +169,7 @@ static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst,
                                cpu_to_le64(src->ln_unknown_inconsistency);
        dst->ln_unmatched_pairs_repaired =
                                cpu_to_le64(src->ln_unmatched_pairs_repaired);
                                cpu_to_le64(src->ln_unknown_inconsistency);
        dst->ln_unmatched_pairs_repaired =
                                cpu_to_le64(src->ln_unmatched_pairs_repaired);
+       dst->ln_dangling_repaired = cpu_to_le64(src->ln_dangling_repaired);
 }
 
 static void lfsck_namespace_record_failure(const struct lu_env *env,
 }
 
 static void lfsck_namespace_record_failure(const struct lu_env *env,
@@ -848,6 +837,200 @@ static int lfsck_namespace_shrink_linkea_cond(const struct lu_env *env,
 }
 
 /**
 }
 
 /**
+ * Conditionally replace name entry in the parent.
+ *
+ * As required, the LFSCK may re-create the lost MDT-object for dangling
+ * name entry, but such repairing may be wrong because of bad FID in the
+ * name entry. As the LFSCK processing, the real MDT-object may be found,
+ * then the LFSCK should check whether the former re-created MDT-object
+ * has been modified or not, if not, then destroy it and update the name
+ * entry in the parent to reference the real MDT-object.
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ * \param[in] parent   pointer to the parent directory
+ * \param[in] child    pointer to the MDT-object that may be the real
+ *                     MDT-object corresponding to the name entry in parent
+ * \param[in] cfid     the current FID in the name entry
+ * \param[in] cname    contains the name of the child in the parent directory
+ *
+ * \retval             positive number for repaired cases
+ * \retval             0 if nothing to be repaired
+ * \retval             negative error number on failure
+ */
+static int lfsck_namespace_replace_cond(const struct lu_env *env,
+                                       struct lfsck_component *com,
+                                       struct dt_object *parent,
+                                       struct dt_object *child,
+                                       const struct lu_fid *cfid,
+                                       const struct lu_name *cname)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lu_fid                   *tfid   = &info->lti_fid4;
+       struct lu_attr                  *la     = &info->lti_la;
+       struct dt_insert_rec            *rec    = &info->lti_dt_rec;
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct dt_device                *dev    = lfsck->li_next;
+       const char                      *name   = cname->ln_name;
+       struct dt_object                *obj    = NULL;
+       struct lustre_handle             plh    = { 0 };
+       struct lustre_handle             clh    = { 0 };
+       struct linkea_data               ldata  = { 0 };
+       struct thandle                  *th     = NULL;
+       bool                             exist  = true;
+       int                              rc     = 0;
+       ENTRY;
+
+       rc = lfsck_ibits_lock(env, lfsck, parent, &plh,
+                             MDS_INODELOCK_UPDATE, LCK_EX);
+       if (rc != 0)
+               GOTO(log, rc);
+
+       if (!fid_is_sane(cfid)) {
+               exist = false;
+               goto replace;
+       }
+
+       obj = lfsck_object_find(env, lfsck, cfid);
+       if (IS_ERR(obj)) {
+               rc = PTR_ERR(obj);
+               if (rc == -ENOENT) {
+                       exist = false;
+                       goto replace;
+               }
+
+               GOTO(log, rc);
+       }
+
+       if (!dt_object_exists(obj)) {
+               exist = false;
+               goto replace;
+       }
+
+       rc = dt_lookup(env, parent, (struct dt_rec *)tfid,
+                      (const struct dt_key *)name, BYPASS_CAPA);
+       if (rc == -ENOENT) {
+               exist = false;
+               goto replace;
+       }
+
+       if (rc != 0)
+               GOTO(log, rc);
+
+       /* Someone changed the name entry, cannot replace it. */
+       if (!lu_fid_eq(cfid, tfid))
+               GOTO(log, rc = 0);
+
+       /* lock the object to be destroyed. */
+       rc = lfsck_ibits_lock(env, lfsck, obj, &clh,
+                             MDS_INODELOCK_UPDATE |
+                             MDS_INODELOCK_XATTR, LCK_EX);
+       if (rc != 0)
+               GOTO(log, rc);
+
+       if (unlikely(lfsck_is_dead_obj(obj))) {
+               exist = false;
+               goto replace;
+       }
+
+       rc = dt_attr_get(env, obj, la, BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(log, rc);
+
+       /* The object has been modified by other(s), or it is not created by
+        * LFSCK, the two cases are indistinguishable. So cannot replace it. */
+       if (la->la_ctime != 0)
+               GOTO(log, rc);
+
+       if (S_ISREG(la->la_mode)) {
+               rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV,
+                                 BYPASS_CAPA);
+               /* If someone has created related OST-object(s),
+                * then keep it. */
+               if ((rc > 0) || (rc < 0 && rc != -ENODATA))
+                       GOTO(log, rc = (rc > 0 ? 0 : rc));
+       }
+
+replace:
+       dt_read_lock(env, child, 0);
+       rc = lfsck_links_read2(env, child, &ldata);
+       dt_read_unlock(env, child);
+
+       /* Someone changed the child, no need to replace. */
+       if (rc == -ENODATA)
+               GOTO(log, rc = 0);
+
+       if (rc != 0)
+               GOTO(log, rc);
+
+       rc = linkea_links_find(&ldata, cname, lfsck_dto2fid(parent));
+       /* Someone moved the child, no need to replace. */
+       if (rc != 0)
+               GOTO(log, rc = 0);
+
+       if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
+               GOTO(log, rc = 1);
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               GOTO(log, rc = PTR_ERR(th));
+
+       if (exist) {
+               rc = dt_declare_destroy(env, obj, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       }
+
+       rc = dt_declare_delete(env, parent, (const struct dt_key *)name, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rec->rec_type = S_IFDIR;
+       rec->rec_fid = lfsck_dto2fid(child);
+       rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
+                              (const struct dt_key *)name, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       if (exist) {
+               rc = dt_destroy(env, obj, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       }
+
+       /* The old name entry maybe not exist. */
+       dt_delete(env, parent, (const struct dt_key *)name, th,
+                 BYPASS_CAPA);
+
+       rc = dt_insert(env, parent, (const struct dt_rec *)rec,
+                      (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+
+       GOTO(stop, rc = (rc == 0 ? 1 : rc));
+
+stop:
+       dt_trans_stop(env, dev, th);
+
+log:
+       lfsck_ibits_unlock(&clh, LCK_EX);
+       lfsck_ibits_unlock(&plh, LCK_EX);
+       if (obj != NULL && !IS_ERR(obj))
+               lfsck_object_put(env, obj);
+
+       CDEBUG(D_LFSCK, "%s: namespace LFSCK conditionally destroy the "
+              "object "DFID" because of conflict with the object "DFID
+              " under the parent "DFID" with name %s: rc = %d\n",
+              lfsck_lfsck2name(lfsck), PFID(cfid),
+              PFID(lfsck_dto2fid(child)), PFID(lfsck_dto2fid(parent)),
+              name, rc);
+
+       return rc;
+}
+
+/**
  * Overwrite the linkEA for the object with the given ldata.
  *
  * The caller should take the ldlm lock before the calling.
  * Overwrite the linkEA for the object with the given ldata.
  *
  * The caller should take the ldlm lock before the calling.
@@ -1194,13 +1377,20 @@ lfsck_namespace_dsd_single(const struct lu_env *env,
        if (rc != 0)
                GOTO(out, rc);
 
        if (rc != 0)
                GOTO(out, rc);
 
-       /* XXX: The name entry references another MDT-object that may be
-        *      created by the LFSCK for repairing dangling name entry.
-        *      There will be another patch for further processing. */
        if (!lu_fid_eq(tfid, cfid)) {
        if (!lu_fid_eq(tfid, cfid)) {
-               if (!lustre_handle_is_used(lh) && retry != NULL)
+               if (!lustre_handle_is_used(lh) && retry != NULL) {
                        *retry = true;
                        *retry = true;
-               else
+
+                       GOTO(out, rc = 0);
+               }
+
+               lfsck_ibits_unlock(lh, LCK_EX);
+               /* The name entry references another MDT-object that
+                * may be created by the LFSCK for repairing dangling
+                * name entry. Try to replace it. */
+               rc = lfsck_namespace_replace_cond(env, com, parent, child,
+                                                 tfid, cname);
+               if (rc == 0)
                        rc = lfsck_namespace_dsd_orphan(env, com, child,
                                                        pfid, lh);
 
                        rc = lfsck_namespace_dsd_orphan(env, com, child,
                                                        pfid, lh);
 
@@ -1230,7 +1420,7 @@ out:
 }
 
 /**
 }
 
 /**
- * Double Scan Directory object for single linkEA entry case.
+ * Double Scan Directory object for multiple linkEA entries case.
  *
  * The given @child has multiple linkEA entries. There is at most one linkEA
  * entry will be valid, all the others will be removed. Firstly, the function
  *
  * The given @child has multiple linkEA entries. There is at most one linkEA
  * entry will be valid, all the others will be removed. Firstly, the function
@@ -1333,16 +1523,20 @@ again:
                               (const struct dt_key *)cname->ln_name,
                               BYPASS_CAPA);
                *pfid2 = *lfsck_dto2fid(parent);
                               (const struct dt_key *)cname->ln_name,
                               BYPASS_CAPA);
                *pfid2 = *lfsck_dto2fid(parent);
-               lfsck_object_put(env, parent);
                if (rc == -ENOENT) {
                if (rc == -ENOENT) {
+                       lfsck_object_put(env, parent);
                        linkea_next_entry(ldata);
                        continue;
                }
 
                        linkea_next_entry(ldata);
                        continue;
                }
 
-               if (rc != 0)
+               if (rc != 0) {
+                       lfsck_object_put(env, parent);
+
                        RETURN(rc);
                        RETURN(rc);
+               }
 
                if (lu_fid_eq(tfid, cfid)) {
 
                if (lu_fid_eq(tfid, cfid)) {
+                       lfsck_object_put(env, parent);
                        if (!lu_fid_eq(pfid, pfid2)) {
                                *type = LNIT_UNMATCHED_PAIRS;
                                rc = lfsck_namespace_repair_unmatched_pairs(env,
                        if (!lu_fid_eq(pfid, pfid2)) {
                                *type = LNIT_UNMATCHED_PAIRS;
                                rc = lfsck_namespace_repair_unmatched_pairs(env,
@@ -1351,6 +1545,7 @@ again:
                                RETURN(rc);
                        }
 
                                RETURN(rc);
                        }
 
+rebuild:
                        /* It is the most common case that we find the
                         * name entry corresponding to the linkEA entry
                         * that matches the ".." name entry. */
                        /* It is the most common case that we find the
                         * name entry corresponding to the linkEA entry
                         * that matches the ".." name entry. */
@@ -1370,10 +1565,19 @@ again:
                        RETURN(rc);
                }
 
                        RETURN(rc);
                }
 
-               /* XXX: The name entry references another MDT-object that
-                *      may be created by the LFSCK for repairing dangling
-                *      name entry. There will be another patch for further
-                *      processing. */
+               lfsck_ibits_unlock(lh, LCK_EX);
+               /* The name entry references another MDT-object that may be
+                * created by the LFSCK for repairing dangling name entry.
+                * Try to replace it. */
+               rc = lfsck_namespace_replace_cond(env, com, parent, child,
+                                                 tfid, cname);
+               lfsck_object_put(env, parent);
+               if (rc < 0)
+                       RETURN(rc);
+
+               if (rc > 0)
+                       goto rebuild;
+
                linkea_del_buf(ldata, cname);
        }
 
                linkea_del_buf(ldata, cname);
        }
 
@@ -1777,25 +1981,36 @@ static int lfsck_namespace_double_scan_one(const struct lu_env *env,
                }
 
                if (rc == 0) {
                }
 
                if (rc == 0) {
-                       lfsck_object_put(env, parent);
                        if (lu_fid_eq(cfid, lfsck_dto2fid(child))) {
                                /* It is the most common case that we
                                 * find the name entry corresponding
                                 * to the linkEA entry. */
                        if (lu_fid_eq(cfid, lfsck_dto2fid(child))) {
                                /* It is the most common case that we
                                 * find the name entry corresponding
                                 * to the linkEA entry. */
+                               lfsck_object_put(env, parent);
                                linkea_next_entry(&ldata);
                        } else {
                                linkea_next_entry(&ldata);
                        } else {
-                               /* XXX: The name entry references another
-                                *      MDT-object that may be created by
-                                *      the LFSCK for repairing dangling
-                                *      name entry. There will be another
-                                *      patch for further processing. */
-                               rc = lfsck_namespace_shrink_linkea(env, com,
-                                       child, &ldata, cname, pfid, true);
+                               /* The name entry references another
+                                * MDT-object that may be created by
+                                * the LFSCK for repairing dangling
+                                * name entry. Try to replace it. */
+                               rc = lfsck_namespace_replace_cond(env, com,
+                                               parent, child, cfid, cname);
+                               lfsck_object_put(env, parent);
                                if (rc < 0)
                                        GOTO(out, rc);
 
                                if (rc < 0)
                                        GOTO(out, rc);
 
-                               if (rc > 0)
+                               if (rc > 0) {
                                        repaired = true;
                                        repaired = true;
+                                       linkea_next_entry(&ldata);
+                               } else {
+                                       rc = lfsck_namespace_shrink_linkea(env,
+                                                       com, child, &ldata,
+                                                       cname, pfid, true);
+                                       if (rc < 0)
+                                               GOTO(out, rc);
+
+                                       if (rc > 0)
+                                               repaired = true;
+                               }
                        }
 
                        continue;
                        }
 
                        continue;
@@ -1900,6 +2115,7 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m,
                      "multiple_linked_repaired: "LPU64"\n"
                      "unknown_inconsistency: "LPU64"\n"
                      "unmatched_pairs_repaired: "LPU64"\n"
                      "multiple_linked_repaired: "LPU64"\n"
                      "unknown_inconsistency: "LPU64"\n"
                      "unmatched_pairs_repaired: "LPU64"\n"
+                     "dangling_repaired: "LPU64"\n"
                      "success_count: %u\n"
                      "run_time_phase1: %u seconds\n"
                      "run_time_phase2: %u seconds\n",
                      "success_count: %u\n"
                      "run_time_phase1: %u seconds\n"
                      "run_time_phase2: %u seconds\n",
@@ -1918,6 +2134,7 @@ static void lfsck_namespace_dump_statistics(struct seq_file *m,
                      ns->ln_mul_linked_repaired,
                      ns->ln_unknown_inconsistency,
                      ns->ln_unmatched_pairs_repaired,
                      ns->ln_mul_linked_repaired,
                      ns->ln_unknown_inconsistency,
                      ns->ln_unmatched_pairs_repaired,
+                     ns->ln_dangling_repaired,
                      ns->ln_success_count,
                      time_phase1,
                      time_phase2);
                      ns->ln_success_count,
                      time_phase1,
                      time_phase2);
@@ -2095,6 +2312,7 @@ static int lfsck_namespace_prep(const struct lu_env *env,
                        ns->ln_mul_linked_repaired = 0;
                        ns->ln_unknown_inconsistency = 0;
                        ns->ln_unmatched_pairs_repaired = 0;
                        ns->ln_mul_linked_repaired = 0;
                        ns->ln_unknown_inconsistency = 0;
                        ns->ln_unmatched_pairs_repaired = 0;
+                       ns->ln_dangling_repaired = 0;
                        fid_zero(&ns->ln_fid_latest_scanned_phase2);
                        if (list_empty(&com->lc_link_dir))
                                list_add_tail(&com->lc_link_dir,
                        fid_zero(&ns->ln_fid_latest_scanned_phase2);
                        if (list_empty(&com->lc_link_dir))
                                list_add_tail(&com->lc_link_dir,
@@ -2651,6 +2869,209 @@ static struct lfsck_operations lfsck_namespace_ops = {
        .lfsck_query            = lfsck_namespace_query,
 };
 
        .lfsck_query            = lfsck_namespace_query,
 };
 
+/**
+ * Repair dangling name entry.
+ *
+ * For the name entry with dangling reference, we need to repare the
+ * inconsistency according to the LFSCK sponsor's requirement:
+ *
+ * 1) Keep the inconsistency there and report the inconsistency case,
+ *    then give the chance to the application to find related issues,
+ *    and the users can make the decision about how to handle it with
+ *    more human knownledge. (by default)
+ *
+ * 2) Re-create the missed MDT-object with the FID information.
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ * \param[in] child    pointer to the object corresponding to the dangling
+ *                     name entry
+ * \param[in] lnr      pointer to the namespace request that contains the
+ *                     name's name, parent object, parent's LMV, and ect.
+ *
+ * \retval             positive number if no need to repair
+ * \retval             zero for repaired successfully
+ * \retval             negative error number on failure
+ */
+int lfsck_namespace_repair_dangling(const struct lu_env *env,
+                                   struct lfsck_component *com,
+                                   struct dt_object *child,
+                                   struct lfsck_namespace_req *lnr)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lu_attr                  *la     = &info->lti_la;
+       struct dt_allocation_hint       *hint   = &info->lti_hint;
+       struct dt_object_format         *dof    = &info->lti_dof;
+       struct dt_insert_rec            *rec    = &info->lti_dt_rec;
+       struct dt_object                *parent = lnr->lnr_obj;
+       const struct lu_name            *cname;
+       struct linkea_data               ldata  = { 0 };
+       struct lustre_handle             lh     = { 0 };
+       struct lu_buf                    linkea_buf;
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct lfsck_bookmark           *bk     = &lfsck->li_bookmark_ram;
+       struct dt_device                *dev    = lfsck_obj2dt_dev(child);
+       struct thandle                  *th     = NULL;
+       int                              rc     = 0;
+       __u16                            type   = lnr->lnr_type;
+       bool                             create;
+       ENTRY;
+
+       cname = lfsck_name_get_const(env, lnr->lnr_name, lnr->lnr_namelen);
+       if (bk->lb_param & LPF_CREATE_MDTOBJ)
+               create = true;
+       else
+               create = false;
+
+       if (!create || bk->lb_param & LPF_DRYRUN)
+               GOTO(log, rc = 0);
+
+       rc = linkea_data_new(&ldata, &info->lti_linkea_buf2);
+       if (rc != 0)
+               GOTO(log, rc);
+
+       rc = linkea_add_buf(&ldata, cname, lfsck_dto2fid(parent));
+       if (rc != 0)
+               GOTO(log, rc);
+
+       rc = lfsck_ibits_lock(env, lfsck, parent, &lh,
+                             MDS_INODELOCK_UPDATE, LCK_EX);
+       if (rc != 0)
+               GOTO(log, rc);
+
+       rc = lfsck_namespace_check_exist(env, parent, child, lnr->lnr_name);
+       if (rc != 0)
+               GOTO(log, rc);
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               GOTO(log, rc = PTR_ERR(th));
+
+       /* Set the ctime as zero, then others can know it is created for
+        * repairing dangling name entry by LFSCK. And if the LFSCK made
+        * wrong decision and the real MDT-object has been found later,
+        * then the LFSCK has chance to fix the incosistency properly. */
+       memset(la, 0, sizeof(*la));
+       la->la_mode = (type & S_IFMT) | 0600;
+       la->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
+                       LA_ATIME | LA_MTIME | LA_CTIME;
+
+       child->do_ops->do_ah_init(env, hint, parent, child,
+                                 la->la_mode & S_IFMT);
+
+       memset(dof, 0, sizeof(*dof));
+       dof->dof_type = dt_mode_to_dft(type);
+       /* If the target is a regular file, then the LFSCK will only create
+        * the MDT-object without stripes (dof->dof_reg.striped = 0). related
+        * OST-objects will be created when write open. */
+
+       /* 1a. create child. */
+       rc = dt_declare_create(env, child, la, hint, dof, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       if (S_ISDIR(type)) {
+               if (unlikely(!dt_try_as_dir(env, child)))
+                       GOTO(stop, rc = -ENOTDIR);
+
+               /* 2a. insert dot into child dir */
+               rec->rec_type = S_IFDIR;
+               rec->rec_fid = lfsck_dto2fid(child);
+               rc = dt_declare_insert(env, child,
+                                      (const struct dt_rec *)rec,
+                                      (const struct dt_key *)dot, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               /* 3a. insert dotdot into child dir */
+               rec->rec_fid = lfsck_dto2fid(parent);
+               rc = dt_declare_insert(env, child,
+                                      (const struct dt_rec *)rec,
+                                      (const struct dt_key *)dotdot, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               /* 4a. increase child nlink */
+               rc = dt_declare_ref_add(env, child, th);
+               if (rc != 0)
+                       GOTO(stop, rc);
+       }
+
+       /* 5a. insert linkEA for child */
+       lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf,
+                      ldata.ld_leh->leh_len);
+       rc = dt_declare_xattr_set(env, child, &linkea_buf,
+                                 XATTR_NAME_LINK, 0, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc = (rc == -EEXIST ? 1 : rc));
+
+       dt_write_lock(env, child, 0);
+       /* 1b. create child */
+       rc = dt_create(env, child, la, hint, dof, th);
+       if (rc != 0)
+               GOTO(unlock, rc = (rc == -EEXIST ? 1 : rc));
+
+       if (S_ISDIR(type)) {
+               if (unlikely(!dt_try_as_dir(env, child)))
+                       GOTO(unlock, rc = -ENOTDIR);
+
+               /* 2b. insert dot into child dir */
+               rec->rec_type = S_IFDIR;
+               rec->rec_fid = lfsck_dto2fid(child);
+               rc = dt_insert(env, child, (const struct dt_rec *)rec,
+                              (const struct dt_key *)dot, th, BYPASS_CAPA, 1);
+               if (rc != 0)
+                       GOTO(unlock, rc);
+
+               /* 3b. insert dotdot into child dir */
+               rec->rec_fid = lfsck_dto2fid(parent);
+               rc = dt_insert(env, child, (const struct dt_rec *)rec,
+                              (const struct dt_key *)dotdot, th,
+                              BYPASS_CAPA, 1);
+               if (rc != 0)
+                       GOTO(unlock, rc);
+
+               /* 4b. increase child nlink */
+               rc = dt_ref_add(env, child, th);
+               if (rc != 0)
+                       GOTO(unlock, rc);
+       }
+
+       /* 5b. insert linkEA for child. */
+       rc = dt_xattr_set(env, child, &linkea_buf,
+                         XATTR_NAME_LINK, 0, th, BYPASS_CAPA);
+
+       GOTO(unlock, rc);
+
+unlock:
+       dt_write_unlock(env, child);
+
+stop:
+       dt_trans_stop(env, dev, th);
+
+log:
+       lfsck_ibits_unlock(&lh, LCK_EX);
+       CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant found dangling "
+              "reference for: parent "DFID", child "DFID", type %u, "
+              "name %s. %s: rc = %d\n", lfsck_lfsck2name(lfsck),
+              PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
+              type, cname->ln_name,
+              create ? "Create the lost OST-object as required" :
+                       "Keep the MDT-object there by default", rc);
+
+       if (rc <= 0) {
+               struct lfsck_namespace *ns = com->lc_file_ram;
+
+               ns->ln_flags |= LF_INCONSISTENT;
+       }
+
+       return rc;
+}
+
 static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
                                                struct lfsck_component *com,
                                                struct lfsck_assistant_req *lar)
 static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
                                                struct lfsck_component *com,
                                                struct lfsck_assistant_req *lar)
@@ -2678,6 +3099,7 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
        int                         idx;
        int                         count    = 0;
        int                         rc;
        int                         idx;
        int                         count    = 0;
        int                         rc;
+       enum lfsck_namespace_inconsistency_type type = LNIT_NONE;
        ENTRY;
 
        if (lnr->lnr_attr & LUDA_UPGRADE) {
        ENTRY;
 
        if (lnr->lnr_attr & LUDA_UPGRADE) {
@@ -2743,16 +3165,22 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
        if (IS_ERR(obj))
                GOTO(out, rc = PTR_ERR(obj));
 
        if (IS_ERR(obj))
                GOTO(out, rc = PTR_ERR(obj));
 
+       cname = lfsck_name_get_const(env, lnr->lnr_name, lnr->lnr_namelen);
        if (dt_object_exists(obj) == 0) {
        if (dt_object_exists(obj) == 0) {
+
+dangling:
                rc = lfsck_namespace_check_exist(env, dir, obj, lnr->lnr_name);
                rc = lfsck_namespace_check_exist(env, dir, obj, lnr->lnr_name);
-               if (rc != 0)
-                       GOTO(out, rc);
+               if (rc == 0) {
+                       type = LNIT_DANGLING;
+                       rc = lfsck_namespace_repair_dangling(env, com,
+                                                            obj, lnr);
+                       if (rc == 0)
+                               repaired = true;
+               }
 
 
-               /* XXX: dangling name entry, will handle it in other patch. */
                GOTO(out, rc);
        }
 
                GOTO(out, rc);
        }
 
-       cname = lfsck_name_get_const(env, lnr->lnr_name, lnr->lnr_namelen);
        if (!(bk->lb_param & LPF_DRYRUN) && repaired) {
 
 again:
        if (!(bk->lb_param & LPF_DRYRUN) && repaired) {
 
 again:
@@ -2783,7 +3211,21 @@ again:
                GOTO(stop, rc);
 
        rc = lfsck_links_read(env, obj, &ldata);
                GOTO(stop, rc);
 
        rc = lfsck_links_read(env, obj, &ldata);
-       if (rc == 0) {
+       if (unlikely(rc == -ENOENT)) {
+               if (handle != NULL) {
+                       dt_write_unlock(env, obj);
+                       dtlocked = false;
+
+                       dt_trans_stop(env, dev, handle);
+                       handle = NULL;
+
+                       lfsck_ibits_unlock(&lh, LCK_EX);
+               }
+
+               /* It may happen when the remote object has been removed,
+                * but the local MDT does not aware of that. */
+               goto dangling;
+       } else if (rc == 0) {
                count = ldata.ld_leh->leh_reccount;
                rc = linkea_links_find(&ldata, cname, pfid);
                if ((rc == 0) &&
                count = ldata.ld_leh->leh_reccount;
                rc = linkea_links_find(&ldata, cname, pfid);
                if ((rc == 0) &&
@@ -2859,11 +3301,6 @@ nodata:
                        repaired = true;
                        log = true;
                }
                        repaired = true;
                        log = true;
                }
-       } else if (rc == -ENOENT) {
-               log = false;
-               repaired = false;
-
-               GOTO(stop, rc = 0);
        } else {
                GOTO(stop, rc);
        }
        } else {
                GOTO(stop, rc);
        }
@@ -2882,8 +3319,6 @@ record:
 
        /* Following modification will be in another transaction.  */
        if (handle != NULL) {
 
        /* Following modification will be in another transaction.  */
        if (handle != NULL) {
-               LASSERT(dt_write_locked(env, obj));
-
                dt_write_unlock(env, obj);
                dtlocked = false;
 
                dt_write_unlock(env, obj);
                dtlocked = false;
 
@@ -2930,6 +3365,15 @@ out:
 
                if (repaired) {
                        ns->ln_items_repaired++;
 
                if (repaired) {
                        ns->ln_items_repaired++;
+
+                       switch (type) {
+                       case LNIT_DANGLING:
+                               ns->ln_dangling_repaired++;
+                               break;
+                       default:
+                               break;
+                       }
+
                        if (bk->lb_param & LPF_DRYRUN &&
                            lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
                                lfsck_pos_fill(env, lfsck,
                        if (bk->lb_param & LPF_DRYRUN &&
                            lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
                                lfsck_pos_fill(env, lfsck,
index acb4775..b1bff5c 100644 (file)
@@ -1321,9 +1321,21 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj,
                GOTO(out_unlock, rc);
 
 
                GOTO(out_unlock, rc);
 
 
-       rc = __mdd_index_insert_only(env, mdd_tobj, mdo2fid(mdd_sobj),
-                                    mdd_object_type(mdd_sobj), name, handle,
-                                    mdd_object_capa(env, mdd_tobj));
+       if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DANGLING3)) {
+               struct lu_fid tfid = *mdo2fid(mdd_sobj);
+
+               tfid.f_oid++;
+               rc = __mdd_index_insert_only(env, mdd_tobj, &tfid,
+                                            mdd_object_type(mdd_sobj),
+                                            name, handle,
+                                            mdd_object_capa(env, mdd_tobj));
+       } else {
+               rc = __mdd_index_insert_only(env, mdd_tobj, mdo2fid(mdd_sobj),
+                                            mdd_object_type(mdd_sobj),
+                                            name, handle,
+                                            mdd_object_capa(env, mdd_tobj));
+       }
+
        if (rc != 0) {
                mdo_ref_del(env, mdd_sobj, handle);
                GOTO(out_unlock, rc);
        if (rc != 0) {
                mdo_ref_del(env, mdd_sobj, handle);
                GOTO(out_unlock, rc);
@@ -1473,20 +1485,25 @@ int mdd_unlink_sanity_check(const struct lu_env *env, struct mdd_object *pobj,
 static int mdd_declare_unlink(const struct lu_env *env, struct mdd_device *mdd,
                              struct mdd_object *p, struct mdd_object *c,
                              const struct lu_name *name, struct md_attr *ma,
 static int mdd_declare_unlink(const struct lu_env *env, struct mdd_device *mdd,
                              struct mdd_object *p, struct mdd_object *c,
                              const struct lu_name *name, struct md_attr *ma,
-                             struct thandle *handle, int no_name)
+                             struct thandle *handle, int no_name, int is_dir)
 {
 {
-       struct lu_attr     *la = &mdd_env_info(env)->mti_la_for_fix;
-        int rc;
+       struct lu_attr  *la = &mdd_env_info(env)->mti_la_for_fix;
+       int              rc;
 
 
-       if (likely(no_name == 0)) {
-               rc = mdo_declare_index_delete(env, p, name->ln_name, handle);
-               if (rc)
-                       return rc;
-       }
+       if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DANGLING2)) {
+               if (likely(no_name == 0)) {
+                       rc = mdo_declare_index_delete(env, p, name->ln_name,
+                                                     handle);
+                       if (rc != 0)
+                               return rc;
+               }
 
 
-        rc = mdo_declare_ref_del(env, p, handle);
-        if (rc)
-                return rc;
+               if (is_dir != 0) {
+                       rc = mdo_declare_ref_del(env, p, handle);
+                       if (rc != 0)
+                               return rc;
+               }
+       }
 
        LASSERT(ma->ma_attr.la_valid & LA_CTIME);
        la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime;
 
        LASSERT(ma->ma_attr.la_valid & LA_CTIME);
        la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime;
@@ -1606,7 +1623,7 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj,
                RETURN(PTR_ERR(handle));
 
        rc = mdd_declare_unlink(env, mdd, mdd_pobj, mdd_cobj,
                RETURN(PTR_ERR(handle));
 
        rc = mdd_declare_unlink(env, mdd, mdd_pobj, mdd_cobj,
-                               lname, ma, handle, no_name);
+                               lname, ma, handle, no_name, is_dir);
        if (rc)
                GOTO(stop, rc);
 
        if (rc)
                GOTO(stop, rc);
 
@@ -1617,7 +1634,7 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj,
        if (likely(mdd_cobj != NULL))
                mdd_write_lock(env, mdd_cobj, MOR_TGT_CHILD);
 
        if (likely(mdd_cobj != NULL))
                mdd_write_lock(env, mdd_cobj, MOR_TGT_CHILD);
 
-       if (likely(no_name == 0)) {
+       if (likely(no_name == 0) && !OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DANGLING2)) {
                rc = __mdd_index_delete(env, mdd_pobj, name, is_dir, handle,
                                        mdd_object_capa(env, mdd_pobj));
                if (rc)
                rc = __mdd_index_delete(env, mdd_pobj, name, is_dir, handle,
                                        mdd_object_capa(env, mdd_pobj));
                if (rc)
index 7ad3f8c..5167d9a 100644 (file)
@@ -5320,16 +5320,14 @@ again:
        inode = osd_iget(info, dev, id);
        if (IS_ERR(inode)) {
                rc = PTR_ERR(inode);
        inode = osd_iget(info, dev, id);
        if (IS_ERR(inode)) {
                rc = PTR_ERR(inode);
-               if (rc == -ENOENT || rc == -ESTALE) {
-                       *attr |= LUDA_IGNORE;
-                       rc = 0;
-               } else {
+               if (rc == -ENOENT || rc == -ESTALE)
+                       rc = 1;
+               else
                        CDEBUG(D_LFSCK, "%.16s: fail to iget for dirent "
                               "check_repair, dir = %lu/%u, name = %.*s: "
                               "rc = %d\n",
                               devname, dir->i_ino, dir->i_generation,
                               ent->oied_namelen, ent->oied_name, rc);
                        CDEBUG(D_LFSCK, "%.16s: fail to iget for dirent "
                               "check_repair, dir = %lu/%u, name = %.*s: "
                               "rc = %d\n",
                               devname, dir->i_ino, dir->i_generation,
                               ent->oied_namelen, ent->oied_name, rc);
-               }
 
                GOTO(out_journal, rc);
        }
 
                GOTO(out_journal, rc);
        }
@@ -5559,7 +5557,7 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
        if (osd_remote_fid(env, dev, fid))
                RETURN(0);
 
        if (osd_remote_fid(env, dev, fid))
                RETURN(0);
 
-       if (likely(!(attr & LUDA_IGNORE)))
+       if (likely(!(attr & LUDA_IGNORE) && rc == 0))
                rc = osd_add_oi_cache(oti, dev, id, fid);
 
        if (!(attr & LUDA_VERIFY) &&
                rc = osd_add_oi_cache(oti, dev, id, fid);
 
        if (!(attr & LUDA_VERIFY) &&
@@ -5569,7 +5567,7 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
             ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap)))
                osd_consistency_check(oti, dev, oic);
 
             ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap)))
                osd_consistency_check(oti, dev, oic);
 
-       RETURN(rc);
+       RETURN(rc > 0 ? 0 : rc);
 }
 
 /**
 }
 
 /**
index ce034ed..7628d31 100644 (file)
@@ -713,6 +713,11 @@ unlock:
 
        rc = out_remote_sync(env, osp->opd_obd->u.cli.cl_import, update, &req);
        if (rc != 0) {
 
        rc = out_remote_sync(env, osp->opd_obd->u.cli.cl_import, update, &req);
        if (rc != 0) {
+               if (rc == -ENOENT) {
+                       dt->do_lu.lo_header->loh_attr &= ~LOHA_EXISTS;
+                       obj->opo_non_exist = 1;
+               }
+
                if (obj->opo_ooa == NULL)
                        GOTO(out, rc);
 
                if (obj->opo_ooa == NULL)
                        GOTO(out, rc);
 
index 0456e17..a3e832e 100644 (file)
@@ -44,7 +44,7 @@ setupall
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
 
 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
 
 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
-       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22"
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23"
 
 build_test_filter
 
 
 build_test_filter
 
@@ -389,7 +389,7 @@ run_test 2d "LFSCK can recover the missed linkEA entry"
 test_2e()
 {
        [ $MDSCOUNT -lt 2 ] &&
 test_2e()
 {
        [ $MDSCOUNT -lt 2 ] &&
-               skip "We need at least 2 MDSes for this test" && exit 0
+               skip "We need at least 2 MDSes for this test" && return
 
        check_mount_and_prep
 
 
        check_mount_and_prep
 
@@ -1263,7 +1263,7 @@ run_test 11b "LFSCK can rebuild crashed last_id"
 
 test_12() {
        [ $MDSCOUNT -lt 2 ] &&
 
 test_12() {
        [ $MDSCOUNT -lt 2 ] &&
-               skip "We need at least 2 MDSes for test_12" && exit 0
+               skip "We need at least 2 MDSes for test_12" && return
 
        check_mount_and_prep
        for k in $(seq $MDSCOUNT); do
 
        check_mount_and_prep
        for k in $(seq $MDSCOUNT); do
@@ -2711,7 +2711,7 @@ run_test 20 "Handle the orphan with dummy LOV EA slot properly"
 
 test_21() {
        [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] &&
 
 test_21() {
        [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] &&
-               skip "ignore the test if MDS is older than 2.5.59" && exit 0
+               skip "ignore the test if MDS is older than 2.5.59" && return
 
        check_mount_and_prep
        createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
 
        check_mount_and_prep
        createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files"
@@ -2738,7 +2738,7 @@ run_test 21 "run all LFSCK components by default"
 
 test_22a() {
        [ $MDSCOUNT -lt 2 ] &&
 
 test_22a() {
        [ $MDSCOUNT -lt 2 ] &&
-               skip "We need at least 2 MDSes for this test" && exit 0
+               skip "We need at least 2 MDSes for this test" && return
 
        echo "#####"
        echo "The parent_A references the child directory via some name entry,"
 
        echo "#####"
        echo "The parent_A references the child directory via some name entry,"
@@ -2786,7 +2786,7 @@ run_test 22a "LFSCK can repair unmatched pairs (1)"
 
 test_22b() {
        [ $MDSCOUNT -lt 2 ] &&
 
 test_22b() {
        [ $MDSCOUNT -lt 2 ] &&
-               skip "We need at least 2 MDSes for this test" && exit 0
+               skip "We need at least 2 MDSes for this test" && return
 
        echo "#####"
        echo "The parent_A references the child directory via the name entry_B,"
 
        echo "#####"
        echo "The parent_A references the child directory via the name entry_B,"
@@ -2839,6 +2839,195 @@ test_22b() {
 }
 run_test 22b "LFSCK can repair unmatched pairs (2)"
 
 }
 run_test 22b "LFSCK can repair unmatched pairs (2)"
 
+test_23a() {
+       [ $MDSCOUNT -lt 2 ] &&
+               skip "We need at least 2 MDSes for this test" && return
+
+       echo "#####"
+       echo "The name entry is there, but the MDT-object for such name "
+       echo "entry does not exist. The namespace LFSCK should find out "
+       echo "and repair the inconsistency as required."
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
+       $LFS mkdir -i 1 $DIR/$tdir/d0/d1 || error "(2) Fail to mkdir d1 on MDT1"
+
+       echo "Inject failure stub on MDT1 to simulate dangling name entry"
+       #define OBD_FAIL_LFSCK_DANGLING2        0x1620
+       do_facet mds2 $LCTL set_param fail_loc=0x1620
+       rmdir $DIR/$tdir/d0/d1 || error "(3) Fail to rmdir d1"
+       do_facet mds2 $LCTL set_param fail_loc=0
+
+       echo "'ls' should fail because of dangling name entry"
+       ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(4) ls should fail."
+
+       echo "Trigger namespace LFSCK to find out dangling name entry"
+       $START_NAMESPACE -A -r ||
+               error "(5) Fail to start LFSCK for namespace"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(6) unexpected status"
+       }
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^dangling_repaired/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(7) Fail to repair dangling name entry: $repaired"
+
+       echo "'ls' should fail because not re-create MDT-object by default"
+       ls -ail $DIR/$tdir/d0/d1 > /dev/null 2>&1 && error "(8) ls should fail."
+
+       echo "Trigger namespace LFSCK again to repair dangling name entry"
+       $START_NAMESPACE -A -r -C ||
+               error "(9) Fail to start LFSCK for namespace"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(10) unexpected status"
+       }
+
+       repaired=$($SHOW_NAMESPACE |
+                  awk '/^dangling_repaired/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(11) Fail to repair dangling name entry: $repaired"
+
+       echo "'ls' should success after namespace LFSCK repairing"
+       ls -ail $DIR/$tdir/d0/d1 > /dev/null || error "(12) ls should success."
+}
+run_test 23a "LFSCK can repair dangling name entry (1)"
+
+test_23b() {
+       echo "#####"
+       echo "The objectA has multiple hard links, one of them corresponding"
+       echo "to the name entry_B. But there is something wrong for the name"
+       echo "entry_B and cause entry_B to references non-exist object_C."
+       echo "In the first-stage scanning, the LFSCK will think the entry_B"
+       echo "as dangling, and re-create the lost object_C. When the LFSCK"
+       echo "comes to the second-stage scanning, it will find that the"
+       echo "former re-creating object_C is not proper, and will try to"
+       echo "replace the object_C with the real object_A."
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
+       echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
+       echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
+
+       echo "Inject failure stub on MDT0 to simulate dangling name entry"
+       #define OBD_FAIL_LFSCK_DANGLING3        0x1621
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
+       ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+       rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
+
+       echo "'ls' should fail because of dangling name entry"
+       ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
+               error "(6) ls should fail."
+
+       echo "Trigger namespace LFSCK to find out dangling name entry"
+       $START_NAMESPACE -r -C ||
+               error "(7) Fail to start LFSCK for namespace"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(8) unexpected status"
+       }
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^dangling_repaired/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(9) Fail to repair dangling name entry: $repaired"
+
+       repaired=$($SHOW_NAMESPACE |
+                  awk '/^multiple_linked_repaired/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(10) Fail to drop the former created object: $repaired"
+
+       local data=$(cat $DIR/$tdir/d0/foo)
+       [ "$data" == "dummy" ] ||
+               error "(11) The $DIR/$tdir/d0/foo is not recovered: $data"
+}
+run_test 23b "LFSCK can repair dangling name entry (2)"
+
+test_23c() {
+       echo "#####"
+       echo "The objectA has multiple hard links, one of them corresponding"
+       echo "to the name entry_B. But there is something wrong for the name"
+       echo "entry_B and cause entry_B to references non-exist object_C."
+       echo "In the first-stage scanning, the LFSCK will think the entry_B"
+       echo "as dangling, and re-create the lost object_C. And then others"
+       echo "modified the re-created object_C. When the LFSCK comes to the"
+       echo "second-stage scanning, it will find that the former re-creating"
+       echo "object_C maybe wrong and try to replace the object_C with the"
+       echo "real object_A. But because object_C has been modified, so the"
+       echo "LFSCK cannot replace it."
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0 on MDT0"
+       echo "dummy" > $DIR/$tdir/d0/f0 || error "(2) Fail to touch on MDT0"
+       echo "dead" > $DIR/$tdir/d0/f1 || error "(3) Fail to touch on MDT0"
+
+       echo "Inject failure stub on MDT0 to simulate dangling name entry"
+       #define OBD_FAIL_LFSCK_DANGLING3        0x1621
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1621
+       ln $DIR/$tdir/d0/f0 $DIR/$tdir/d0/foo || error "(4) Fail to hard link"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+       rm -f $DIR/$tdir/d0/f1 || error "(5) Fail to unlink $DIR/$tdir/d0/f1"
+
+       echo "'ls' should fail because of dangling name entry"
+       ls -ail $DIR/$tdir/d0/foo > /dev/null 2>&1 &&
+               error "(6) ls should fail."
+
+       #define OBD_FAIL_LFSCK_DELAY3           0x1602
+       do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602
+
+       echo "Trigger namespace LFSCK to find out dangling name entry"
+       $START_NAMESPACE -r -C ||
+               error "(7) Fail to start LFSCK for namespace"
+
+       wait_update_facet client "stat $DIR/$tdir/d0/foo |
+               awk '/Size/ { print \\\$2 }'" "0" 32 || {
+               stat $DIR/$tdir/guard
+               $SHOW_NAMESPACE
+               error "(8) unexpected size"
+       }
+
+       echo "data" >> $DIR/$tdir/d0/foo || error "(9) Fail to write"
+       cancel_lru_locks osc
+
+       do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(10) unexpected status"
+       }
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^dangling_repaired/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(11) Fail to repair dangling name entry: $repaired"
+
+       local data=$(cat $DIR/$tdir/d0/foo)
+       [ "$data" != "dummy" ] ||
+               error "(12) The $DIR/$tdir/d0/foo should not be recovered"
+}
+run_test 23c "LFSCK can repair dangling name entry (3)"
+
 $LCTL set_param debug=-lfsck > /dev/null || true
 
 # restore MDS/OST size
 $LCTL set_param debug=-lfsck > /dev/null || true
 
 # restore MDS/OST size
index 1da619c..579e704 100644 (file)
@@ -381,6 +381,7 @@ command_t cmdlist[] = {
        {"lfsck_start", jt_lfsck_start, 0, "start LFSCK\n"
         "usage: lfsck_start <-M | --device [MDT,OST]_device>\n"
         "                   [-A | --all] [-c | --create_ostobj [on | off]]\n"
        {"lfsck_start", jt_lfsck_start, 0, "start LFSCK\n"
         "usage: lfsck_start <-M | --device [MDT,OST]_device>\n"
         "                   [-A | --all] [-c | --create_ostobj [on | off]]\n"
+        "                   [-C | --create_mdtobj [on | off]]\n"
         "                   [-e | --error {continue | abort}] [-h | --help]\n"
         "                   [-n | --dryrun [on | off]] [-o | --orphan]\n"
         "                   [-r | --reset] [-s | --speed speed_limit]\n"
         "                   [-e | --error {continue | abort}] [-h | --help]\n"
         "                   [-n | --dryrun [on | off]] [-o | --orphan]\n"
         "                   [-r | --reset] [-s | --speed speed_limit]\n"
index f339379..ce8263b 100644 (file)
@@ -98,6 +98,7 @@ static void usage_start(void)
                "usage:\n"
                "lfsck_start <-M | --device {MDT,OST}_device>\n"
                "            [-A | --all] [-c | --create_ostobj [on | off]]\n"
                "usage:\n"
                "lfsck_start <-M | --device {MDT,OST}_device>\n"
                "            [-A | --all] [-c | --create_ostobj [on | off]]\n"
+               "            [-C | --create_mdtobj [on | off]]\n"
                "            [-e | --error {continue | abort}] [-h | --help]\n"
                "            [-n | --dryrun [on | off]] [-o | --orphan]\n"
                "            [-r | --reset] [-s | --speed ops_per_sec_limit]\n"
                "            [-e | --error {continue | abort}] [-h | --help]\n"
                "            [-n | --dryrun [on | off]] [-o | --orphan]\n"
                "            [-r | --reset] [-s | --speed ops_per_sec_limit]\n"
@@ -108,6 +109,8 @@ static void usage_start(void)
                "-A: start LFSCK on all MDT devices\n"
                "-c: create the lost OST-object for dangling LOV EA "
                    "(default 'off', or 'on')\n"
                "-A: start LFSCK on all MDT devices\n"
                "-c: create the lost OST-object for dangling LOV EA "
                    "(default 'off', or 'on')\n"
+               "-C: create the lost MDT-object for dangling name entry "
+                   "(default 'off', or 'on')\n"
                "-e: error handle mode (default 'continue', or 'abort')\n"
                "-h: this help message\n"
                "-n: check with no modification (default 'off', or 'on')\n"
                "-e: error handle mode (default 'continue', or 'abort')\n"
                "-h: this help message\n"
                "-n: check with no modification (default 'off', or 'on')\n"
@@ -155,7 +158,7 @@ int jt_lfsck_start(int argc, char **argv)
        char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
        char device[MAX_OBD_NAME];
        struct lfsck_start start;
        char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
        char device[MAX_OBD_NAME];
        struct lfsck_start start;
-       char *optstring = "Ac::e:hM:n::ors:t:w:";
+       char *optstring = "Ac::C::e:hM:n::ors:t:w:";
        int opt, index, rc, val, i;
 
        memset(&data, 0, sizeof(data));
        int opt, index, rc, val, i;
 
        memset(&data, 0, sizeof(data));
@@ -186,6 +189,19 @@ int jt_lfsck_start(int argc, char **argv)
                        }
                        start.ls_valid |= LSV_CREATE_OSTOBJ;
                        break;
                        }
                        start.ls_valid |= LSV_CREATE_OSTOBJ;
                        break;
+               case 'C':
+                       if (optarg == NULL || strcmp(optarg, "on") == 0) {
+                               start.ls_flags |= LPF_CREATE_MDTOBJ;
+                       } else if (strcmp(optarg, "off") != 0) {
+                               fprintf(stderr, "invalid switch: -C '%s'. "
+                                       "valid switches are:\n"
+                                       "empty ('on'), or 'off' without space. "
+                                       "For example:\n"
+                                       "'-C', '-Con', '-Coff'\n", optarg);
+                               return -EINVAL;
+                       }
+                       start.ls_valid |= LSV_CREATE_MDTOBJ;
+                       break;
                case 'e':
                        if (strcmp(optarg, "abort") == 0) {
                                start.ls_flags |= LPF_FAILOUT;
                case 'e':
                        if (strcmp(optarg, "abort") == 0) {
                                start.ls_flags |= LPF_FAILOUT;