Whamcloud - gitweb
LU-5516 lfsck: repair orphan parent MDT-object 91/11391/29
authorFan Yong <fan.yong@intel.com>
Fri, 8 Aug 2014 12:54:06 +0000 (20:54 +0800)
committerAndreas Dilger <andreas.dilger@intel.com>
Wed, 22 Oct 2014 05:04:30 +0000 (05:04 +0000)
When the namespace LFSCK tries to repair the missing name entry,
means inserting the lost name entry back to its parent directory,
it may find that the parent MDT-object is also lost. Under such
case, the namespace LFSCK will firstly create the missing parent
MDT-object as an orphan and insert into the
.lustre/lost+found/MDTxxxx/ directory with the name:
${FID}-P-${conflict_version}.

Then insert the lost name entry into the orphan parent according
to the MDT-object's linkEA.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Ie9c585f08fa9f371fa3ed1281c9c814606c4a2ea
Reviewed-on: http://review.whamcloud.com/11391
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
lustre/include/lustre/lustre_idl.h
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_layout.c
lustre/lfsck/lfsck_lib.c
lustre/lfsck/lfsck_namespace.c
lustre/ptlrpc/wiretest.c
lustre/target/out_handler.c
lustre/tests/sanity-lfsck.sh
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 8372ace..a51610d 100644 (file)
@@ -3551,6 +3551,7 @@ struct lfsck_request {
        union {
                __u32   lr_speed;
                __u32   lr_status;
+               __u32   lr_type;
        };
        __u16           lr_version;
        __u16           lr_active;
@@ -3586,6 +3587,7 @@ enum lfsck_events {
        LE_PEER_EXIT            = 9,
        LE_CONDITIONAL_DESTROY  = 10,
        LE_PAIRS_VERIFY         = 11,
+       LE_CREATE_ORPHAN        = 12,
 };
 
 enum lfsck_event_flags {
index 08cc00a..a00f451 100644 (file)
@@ -652,6 +652,7 @@ struct lfsck_assistant_data {
 struct lfsck_thread_info {
        struct lu_name          lti_name_const;
        struct lu_name          lti_name;
+       struct lu_name          lti_name2;
        struct lu_buf           lti_buf;
        struct lu_buf           lti_linkea_buf;
        struct lu_buf           lti_linkea_buf2;
index 8e7fd40..18633e0 100644 (file)
@@ -2082,7 +2082,7 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env,
        if (rc == 0)
                CDEBUG(D_LFSCK, "%s: layout LFSCK destroyed the empty "
                       "OST-object "DFID" that was created for reparing "
-                      "dangling referenced case. But the original missed "
+                      "dangling referenced case. But the original missing "
                       "OST-object is found now.\n",
                       lfsck_lfsck2name(lfsck), PFID(fid));
 
@@ -2677,7 +2677,7 @@ log:
  *    and the users can make the decision about how to handle it with
  *    more human knownledge. (by default)
  *
- * 2) Re-create the missed OST-object with the FID/owner information. */
+ * 2) Re-create the missing OST-object with the FID/owner information. */
 static int lfsck_layout_repair_dangling(const struct lu_env *env,
                                        struct lfsck_component *com,
                                        struct lfsck_layout_req *llr,
index 3e6a1d5..c9ad104 100644 (file)
@@ -2913,6 +2913,7 @@ int lfsck_in_notify(const struct lu_env *env, struct dt_device *key,
        case LE_FID_ACCESSED:
        case LE_PEER_EXIT:
        case LE_CONDITIONAL_DESTROY:
+       case LE_CREATE_ORPHAN:
        case LE_PAIRS_VERIFY: {
                struct lfsck_instance  *lfsck;
                struct lfsck_component *com;
index 7ec48ee..b5e5d80 100644 (file)
@@ -946,12 +946,353 @@ log:
        return rc;
 }
 
+/**
+ * Create the specified orphan MDT-object on remote MDT.
+ *
+ * The LFSCK instance on this MDT will send LFSCK RPC to remote MDT to
+ * ask the remote LFSCK instance to create the specified orphan object
+ * under .lustre/lost+found/MDTxxxx/ directory with the name:
+ * ${FID}-P-${conflict_version}.
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ * \param[in] orphan   pointer to the orphan MDT-object
+ * \param[in] type     the orphan's type to be created
+ *
+ *  type "P":          The orphan object to be created was a parent directory
+ *                     of some DMT-object which linkEA shows that the @orphan
+ *                     object is missing.
+ *
+ * \see lfsck_layout_recreate_parent() for more types.
+ *
+ * \retval             positive number for repaired cases
+ * \retval             0 if needs to repair nothing
+ * \retval             negative error number on failure
+ */
+static int lfsck_namespace_create_orphan_remote(const struct lu_env *env,
+                                               struct lfsck_component *com,
+                                               struct dt_object *orphan,
+                                               __u32 type)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lfsck_request            *lr     = &info->lti_lr;
+       struct lu_seq_range             *range  = &info->lti_range;
+       const struct lu_fid             *fid    = lfsck_dto2fid(orphan);
+       struct lfsck_namespace          *ns     = com->lc_file_ram;
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct seq_server_site          *ss     =
+                       lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site);
+       struct lfsck_tgt_desc           *ltd    = NULL;
+       struct ptlrpc_request           *req    = NULL;
+       int                              rc;
+       ENTRY;
+
+       if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
+               GOTO(out, rc = 1);
+
+       fld_range_set_mdt(range);
+       rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       ltd = lfsck_tgt_get(&lfsck->li_mdt_descs, range->lsr_index);
+       if (ltd == NULL) {
+               ns->ln_flags |= LF_INCOMPLETE;
+
+               GOTO(out, rc = -ENODEV);
+       }
+
+       req = ptlrpc_request_alloc(class_exp2cliimp(ltd->ltd_exp),
+                                  &RQF_LFSCK_NOTIFY);
+       if (req == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_NOTIFY);
+       if (rc != 0) {
+               ptlrpc_request_free(req);
+
+               GOTO(out, rc);
+       }
+
+       lr = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
+       memset(lr, 0, sizeof(*lr));
+       lr->lr_event = LE_CREATE_ORPHAN;
+       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
+       lr->lr_active = LFSCK_TYPE_NAMESPACE;
+       lr->lr_fid = *fid;
+       lr->lr_type = type;
+
+       ptlrpc_request_set_replen(req);
+       rc = ptlrpc_queue_wait(req);
+       ptlrpc_req_finished(req);
+
+       if (rc == 0)
+               rc = 1;
+       else if (rc == -EEXIST)
+               rc = 0;
+
+       GOTO(out, rc);
+
+out:
+       CDEBUG(D_LFSCK, "%s: namespace LFSCK create object "
+              DFID" on the MDT %x remotely: rc = %d\n",
+              lfsck_lfsck2name(lfsck), PFID(fid),
+              ltd != NULL ? ltd->ltd_index : -1, rc);
+
+       if (ltd != NULL)
+               lfsck_tgt_put(ltd);
+
+       return rc;
+}
+
+/**
+ * Create the specified orphan MDT-object locally.
+ *
+ * For the case that the parent MDT-object stored in some MDT-object's
+ * linkEA entry is lost, the LFSCK will re-create the parent object as
+ * an orphan and insert it into .lustre/lost+found/MDTxxxx/ directory
+ * with the name ${FID}-P-${conflict_version}.
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ * \param[in] orphan   pointer to the orphan MDT-object to be created
+ * \param[in] type     the orphan's type to be created
+ *
+ *  type "P":          The orphan object to be created was a parent directory
+ *                     of some DMT-object which linkEA shows that the @orphan
+ *                     object is missing.
+ *
+ * \see lfsck_layout_recreate_parent() for more types.
+ *
+ * \retval             positive number for repaired cases
+ * \retval             negative error number on failure
+ */
+static int lfsck_namespace_create_orphan_local(const struct lu_env *env,
+                                              struct lfsck_component *com,
+                                              struct dt_object *orphan,
+                                              __u32 type)
+{
+       struct lfsck_thread_info        *info   = lfsck_env_info(env);
+       struct lu_attr                  *la     = &info->lti_la;
+       struct dt_allocation_hint       *hint   = &info->lti_hint;
+       struct dt_object_format         *dof    = &info->lti_dof;
+       struct lu_name                  *cname  = &info->lti_name2;
+       struct dt_insert_rec            *rec    = &info->lti_dt_rec;
+       struct lu_fid                   *tfid   = &info->lti_fid;
+       const struct lu_fid             *cfid   = lfsck_dto2fid(orphan);
+       const struct lu_fid             *pfid;
+       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct dt_device                *dev    = lfsck->li_bottom;
+       struct dt_object                *parent = NULL;
+       struct dt_object                *child  = NULL;
+       struct thandle                  *th     = NULL;
+       struct lustre_handle             lh     = { 0 };
+       struct linkea_data               ldata  = { 0 };
+       struct lu_buf                    linkea_buf;
+       char                             name[32];
+       int                              namelen;
+       int                              idx    = 0;
+       int                              rc     = 0;
+       ENTRY;
+
+       LASSERT(!dt_object_exists(orphan));
+       LASSERT(!dt_object_remote(orphan));
+
+       /* @orphan maybe not attached to lfsck->li_bottom */
+       child = lfsck_object_find_by_dev(env, dev, cfid);
+       if (IS_ERR(child))
+               GOTO(log, rc = PTR_ERR(child));
+
+       cname->ln_name = NULL;
+       if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
+               GOTO(log, rc = 1);
+
+       /* Create .lustre/lost+found/MDTxxxx when needed. */
+       if (unlikely(lfsck->li_lpf_obj == NULL)) {
+               rc = lfsck_create_lpf(env, lfsck);
+               if (rc != 0)
+                       GOTO(log, rc);
+       }
+
+       parent = lfsck->li_lpf_obj;
+       pfid = lfsck_dto2fid(parent);
+
+       /* Hold update lock on the parent to prevent others to access. */
+       rc = lfsck_ibits_lock(env, lfsck, parent, &lh,
+                             MDS_INODELOCK_UPDATE, LCK_EX);
+       if (rc != 0)
+               GOTO(log, rc);
+
+       do {
+               namelen = snprintf(name, 31, DFID"-P-%d",
+                                  PFID(cfid), idx++);
+               rc = dt_lookup(env, parent, (struct dt_rec *)tfid,
+                              (const struct dt_key *)name, BYPASS_CAPA);
+               if (rc != 0 && rc != -ENOENT)
+                       GOTO(unlock1, rc);
+       } while (rc == 0);
+
+       cname->ln_name = name;
+       cname->ln_namelen = namelen;
+
+       memset(la, 0, sizeof(*la));
+       la->la_mode = type | (S_ISDIR(type) ? 0700 : 0600);
+       la->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
+                      LA_ATIME | LA_MTIME | LA_CTIME;
+
+       child->do_ops->do_ah_init(env, hint, parent, child,
+                                 la->la_mode & S_IFMT);
+
+       memset(dof, 0, sizeof(*dof));
+       dof->dof_type = dt_mode_to_dft(type);
+
+       rc = linkea_data_new(&ldata, &info->lti_linkea_buf2);
+       if (rc != 0)
+               GOTO(unlock1, rc);
+
+       rc = linkea_add_buf(&ldata, cname, pfid);
+       if (rc != 0)
+               GOTO(unlock1, rc);
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               GOTO(unlock1, rc = PTR_ERR(th));
+
+       rc = dt_declare_create(env, child, la, hint, dof, th);
+       if (rc == 0 && S_ISDIR(type))
+               rc = dt_declare_ref_add(env, child, th);
+
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf,
+                      ldata.ld_leh->leh_len);
+       rc = dt_declare_xattr_set(env, child, &linkea_buf,
+                                 XATTR_NAME_LINK, 0, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rec->rec_type = type;
+       rec->rec_fid = cfid;
+       rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
+                              (const struct dt_key *)name, th);
+       if (rc == 0 && S_ISDIR(type))
+               rc = dt_declare_ref_add(env, parent, th);
+
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, child, 0);
+       rc = dt_create(env, child, la, hint, dof, th);
+       if (rc != 0)
+               GOTO(unlock2, rc);
+
+       if (S_ISDIR(type)) {
+               if (unlikely(!dt_try_as_dir(env, child)))
+                       GOTO(unlock2, rc = -ENOTDIR);
+
+               rec->rec_type = S_IFDIR;
+               rec->rec_fid = cfid;
+               rc = dt_insert(env, child, (const struct dt_rec *)rec,
+                              (const struct dt_key *)dot, th, BYPASS_CAPA, 1);
+               if (rc != 0)
+                       GOTO(unlock2, rc);
+
+               rec->rec_fid = pfid;
+               rc = dt_insert(env, child, (const struct dt_rec *)rec,
+                              (const struct dt_key *)dotdot, th,
+                              BYPASS_CAPA, 1);
+               if (rc != 0)
+                       GOTO(unlock2, rc);
+
+               rc = dt_ref_add(env, child, th);
+               if (rc != 0)
+                       GOTO(unlock2, rc);
+       }
+
+       rc = dt_xattr_set(env, child, &linkea_buf,
+                         XATTR_NAME_LINK, 0, th, BYPASS_CAPA);
+       dt_write_unlock(env, child);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rec->rec_type = type;
+       rec->rec_fid = cfid;
+       rc = dt_insert(env, parent, (const struct dt_rec *)rec,
+                      (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+       if (rc == 0 && S_ISDIR(type)) {
+               dt_write_lock(env, parent, 0);
+               rc = dt_ref_add(env, parent, th);
+               dt_write_unlock(env, parent);
+       }
+
+       GOTO(stop, rc = (rc == 0 ? 1 : rc));
+
+unlock2:
+       dt_write_unlock(env, child);
+
+stop:
+       dt_trans_stop(env, dev, th);
+
+unlock1:
+       lfsck_ibits_unlock(&lh, LCK_EX);
+
+log:
+       CDEBUG(D_LFSCK, "%s: namespace LFSCK create orphan locally for "
+              "the object "DFID", name = %s, type %o: rc = %d\n",
+              lfsck_lfsck2name(lfsck), PFID(cfid),
+              cname->ln_name != NULL ? cname->ln_name : "<NULL>", type, rc);
+
+       if (child != NULL && !IS_ERR(child))
+               lfsck_object_put(env, child);
+
+       return rc;
+}
+
+/**
+ * Create the specified orphan MDT-object.
+ *
+ * For the case that the parent MDT-object stored in some MDT-object's
+ * linkEA entry is lost, the LFSCK will re-create the parent object as
+ * an orphan and insert it into .lustre/lost+found/MDTxxxx/ directory
+ * with the name: ${FID}-P-${conflict_version}.
+ *
+ * \param[in] env      pointer to the thread context
+ * \param[in] com      pointer to the lfsck component
+ * \param[in] orphan   pointer to the orphan MDT-object
+ *
+ *  type "P":          The orphan object to be created was a parent directory
+ *                     of some DMT-object which linkEA shows that the @orphan
+ *                     object is missing.
+ *
+ * \see lfsck_layout_recreate_parent() for more types.
+ *
+ * \retval             positive number for repaired cases
+ * \retval             0 if needs to repair nothing
+ * \retval             negative error number on failure
+ */
 static int lfsck_namespace_create_orphan(const struct lu_env *env,
                                         struct lfsck_component *com,
                                         struct dt_object *orphan)
 {
-       /* XXX: TBD */
-       return 0;
+       struct lfsck_namespace *ns = com->lc_file_ram;
+       int                     rc;
+
+       if (dt_object_remote(orphan))
+               rc = lfsck_namespace_create_orphan_remote(env, com, orphan,
+                                                         S_IFDIR);
+       else
+               rc = lfsck_namespace_create_orphan_local(env, com, orphan,
+                                                        S_IFDIR);
+
+       if (rc != 0)
+               ns->ln_flags |= LF_INCONSISTENT;
+
+       return rc;
 }
 
 /**
@@ -1819,6 +2160,8 @@ lfsck_namespace_dsd_single(const struct lu_env *env,
                }
 
                lfsck_ibits_unlock(lh, LCK_EX);
+
+lost_parent:
                /* Create the lost parent as an orphan. */
                rc = lfsck_namespace_create_orphan(env, com, parent);
                if (rc >= 0) {
@@ -1875,6 +2218,11 @@ lfsck_namespace_dsd_single(const struct lu_env *env,
                /* Add the missing name entry back to the namespace. */
                rc = lfsck_namespace_insert_normal(env, com, parent, child,
                                                   cname->ln_name);
+               if (unlikely(rc == -ESTALE))
+                       /* It may happen when the remote object has been
+                        * removed, but the local MDT is not aware of that. */
+                       goto lost_parent;
+
                if (unlikely(rc == -EEXIST)) {
                        /* Unfortunately, someone reused the name under the
                         * parent by race. So we have to remove the linkEA
@@ -2501,6 +2849,8 @@ static int lfsck_namespace_double_scan_one(const struct lu_env *env,
                        GOTO(out, rc = PTR_ERR(parent));
 
                if (!dt_object_exists(parent)) {
+
+lost_parent:
                        if (ldata.ld_leh->leh_reccount > 1) {
                                /* If it is NOT the last linkEA entry, then
                                 * there is still other chance to make the
@@ -2632,6 +2982,11 @@ static int lfsck_namespace_double_scan_one(const struct lu_env *env,
                /* Add the missing name entry back to the namespace. */
                rc = lfsck_namespace_insert_normal(env, com, parent, child,
                                                   cname->ln_name);
+               if (unlikely(rc == -ESTALE))
+                       /* It may happen when the remote object has been
+                        * removed, but the local MDT is not aware of that. */
+                       goto lost_parent;
+
                if (unlikely(rc == -EEXIST))
                        /* Unfortunately, someone reused the name under the
                         * parent by race. So we have to remove the linkEA
@@ -3385,13 +3740,49 @@ static int lfsck_namespace_in_notify(const struct lu_env *env,
        struct lfsck_assistant_data     *lad   = com->lc_data;
        struct lfsck_tgt_descs          *ltds  = &lfsck->li_mdt_descs;
        struct lfsck_tgt_desc           *ltd;
+       int                              rc;
        bool                             fail  = false;
        ENTRY;
 
-       if (lr->lr_event != LE_PHASE1_DONE &&
-           lr->lr_event != LE_PHASE2_DONE &&
-           lr->lr_event != LE_PEER_EXIT)
+       switch (lr->lr_event) {
+       case LE_CREATE_ORPHAN: {
+               struct dt_object *orphan = NULL;
+
+               CDEBUG(D_LFSCK, "%s: namespace LFSCK handling notify from "
+                      "MDT %x to create orphan"DFID" with type %o\n",
+                      lfsck_lfsck2name(lfsck), lr->lr_index,
+                      PFID(&lr->lr_fid), lr->lr_type);
+
+               orphan = lfsck_object_find(env, lfsck, &lr->lr_fid);
+               if (IS_ERR(orphan))
+                       GOTO(out_create, rc = PTR_ERR(orphan));
+
+               if (dt_object_exists(orphan))
+                       GOTO(out_create, rc = -EEXIST);
+
+               rc = lfsck_namespace_create_orphan_local(env, com, orphan,
+                                                        lr->lr_type);
+
+               GOTO(out_create, rc = (rc == 1) ? 0 : rc);
+
+out_create:
+               CDEBUG(D_LFSCK, "%s: namespace LFSCK handled notify from "
+                      "MDT %x to create orphan"DFID" with type %o: rc = %d\n",
+                      lfsck_lfsck2name(lfsck), lr->lr_index,
+                      PFID(&lr->lr_fid), lr->lr_type, rc);
+
+               if (orphan != NULL && !IS_ERR(orphan))
+                       lfsck_object_put(env, orphan);
+
+               return rc;
+       }
+       case LE_PHASE1_DONE:
+       case LE_PHASE2_DONE:
+       case LE_PEER_EXIT:
+               break;
+       default:
                RETURN(-EINVAL);
+       }
 
        CDEBUG(D_LFSCK, "%s: namespace LFSCK handles notify %u from MDT %x, "
               "status %d\n", lfsck_lfsck2name(lfsck), lr->lr_event,
@@ -3495,7 +3886,7 @@ static struct lfsck_operations lfsck_namespace_ops = {
  *    and the users can make the decision about how to handle it with
  *    more human knownledge. (by default)
  *
- * 2) Re-create the missed MDT-object with the FID information.
+ * 2) Re-create the missing MDT-object with the FID information.
  *
  * \param[in] env      pointer to the thread context
  * \param[in] com      pointer to the lfsck component
index bb1fd78..4bf3158 100644 (file)
@@ -4737,6 +4737,8 @@ void lustre_assert_wire_constants(void)
                 (long long)LE_CONDITIONAL_DESTROY);
        LASSERTF(LE_PAIRS_VERIFY == 11, "found %lld\n",
                 (long long)LE_PAIRS_VERIFY);
+       LASSERTF(LE_CREATE_ORPHAN == 12, "found %lld\n",
+                (long long)LE_CREATE_ORPHAN);
        LASSERTF(LEF_TO_OST == 0x00000001UL, "found 0x%.8xUL\n",
                (unsigned)LEF_TO_OST);
        LASSERTF(LEF_FROM_OST == 0x00000002UL, "found 0x%.8xUL\n",
index 51bdd93..25aac41 100644 (file)
@@ -939,6 +939,9 @@ static int out_tx_index_insert_exec(const struct lu_env *env,
        struct dt_object *dt_obj = arg->object;
        int rc;
 
+       if (unlikely(!dt_object_exists(dt_obj)))
+               RETURN(-ESTALE);
+
        rc = out_obj_index_insert(env, dt_obj,
                                  (const struct dt_rec *)&arg->u.insert.rec,
                                  arg->u.insert.key, th);
index f77ad1b..602b13c 100644 (file)
@@ -46,7 +46,7 @@ setupall
        ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21"
 
 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.50) ]] &&
-       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26"
+       ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2d 2e 3 22 23 24 25 26 27"
 
 build_test_filter
 
@@ -3249,6 +3249,122 @@ test_26b() {
 }
 run_test 26b "LFSCK can add the missing remote name entry back to the namespace"
 
+test_27a() {
+       echo "#####"
+       echo "The local parent referenced by the MDT-object linkEA is lost."
+       echo "The namespace LFSCK will re-create the lost parent as orphan."
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS mkdir -i 0 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
+       touch $DIR/$tdir/d0/foo || error "(2) Fail to create foo"
+       ln $DIR/$tdir/d0/foo $DIR/$tdir/d0/dummy ||
+               error "(3) Fail to hard link to $DIR/$tdir/d0/foo"
+
+       echo "Inject failure stub on MDT0 to simulate the case that"
+       echo "foo's name entry will be removed, but the foo's object"
+       echo "and its linkEA are kept in the system. And then remove"
+       echo "another hard link and the parent directory."
+
+       #define OBD_FAIL_LFSCK_NO_NAMEENTRY     0x1624
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
+       rm -f $DIR/$tdir/d0/foo ||
+               error "(4) Fail to unlink $DIR/$tdir/d0/foo"
+       rm -f $DIR/$tdir/d0/dummy ||
+               error "(5) Fail to unlink $DIR/$tdir/d0/dummy"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+       rm -rf $DIR/$tdir/d0 || error "(5) Fail to unlink the dir d0"
+       ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(6) 'ls' should fail"
+
+       echo "Trigger namespace LFSCK to repair the lost parent"
+       $START_NAMESPACE -r -A ||
+               error "(6) Fail to start LFSCK for namespace"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(7) unexpected status"
+       }
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^lost_dirent_repaired/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(8) Fail to repair lost dirent: $repaired"
+
+       echo "There should be an orphan under .lustre/lost+found/MDT0000/"
+       [ -d $MOUNT/.lustre/lost+found/MDT0000 ] ||
+               error "(9) $MOUNT/.lustre/lost+found/MDT0000/ should be there"
+
+       ls -ail $MOUNT/.lustre/lost+found/MDT0000/
+
+       cname=$(find $MOUNT/.lustre/lost+found/MDT0000/ -name *-P-*)
+       [ ! -z "$cname" ] ||
+               error "(10) .lustre/lost+found/MDT0000/ should not be empty"
+}
+run_test 27a "LFSCK can recreate the lost local parent directory as orphan"
+
+test_27b() {
+       [ $MDSCOUNT -lt 2 ] &&
+               skip "We need at least 2 MDSes for this test" && return
+
+       echo "#####"
+       echo "The remote parent referenced by the MDT-object linkEA is lost."
+       echo "The namespace LFSCK will re-create the lost parent as orphan."
+       echo "#####"
+
+       check_mount_and_prep
+
+       $LFS mkdir -i 1 $DIR/$tdir/d0 || error "(1) Fail to mkdir d0"
+       $LFS mkdir -i 0 $DIR/$tdir/d0/foo || error "(2) Fail to mkdir foo"
+
+       $LFS path2fid $DIR/$tdir/d0
+
+       echo "Inject failure stub on MDT0 to simulate the case that"
+       echo "foo's name entry will be removed, but the foo's object"
+       echo "and its linkEA are kept in the system. And then remove"
+       echo "the parent directory."
+
+       #define OBD_FAIL_LFSCK_NO_NAMEENTRY     0x1624
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1624
+       rmdir $DIR/$tdir/d0/foo || error "(3) Fail to rmdir $DIR/$tdir/d0/foo"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+       rmdir $DIR/$tdir/d0 || error "(4) Fail to unlink the dir d0"
+       ls -ail $DIR/$tdir/d0 > /dev/null 2>&1 && "(5) 'ls' should fail"
+
+       echo "Trigger namespace LFSCK to repair the missing remote name entry"
+       $START_NAMESPACE -r -A ||
+               error "(6) Fail to start LFSCK for namespace"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(7) unexpected status"
+       }
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^lost_dirent_repaired/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(8) Fail to repair lost dirent: $repaired"
+
+       ls -ail $MOUNT/.lustre/lost+found/
+
+       echo "There should be an orphan under .lustre/lost+found/MDT0001/"
+       [ -d $MOUNT/.lustre/lost+found/MDT0001 ] ||
+               error "(9) $MOUNT/.lustre/lost+found/MDT0001/ should be there"
+
+       ls -ail $MOUNT/.lustre/lost+found/MDT0001/
+
+       cname=$(find $MOUNT/.lustre/lost+found/MDT0001/ -name *-P-*)
+       [ ! -z "$cname" ] ||
+               error "(10) .lustre/lost+found/MDT0001/ should not be empty"
+}
+run_test 27b "LFSCK can recreate the lost remote parent directory as orphan"
+
 $LCTL set_param debug=-lfsck > /dev/null || true
 
 # restore MDS/OST size
index 5bea5f4..5fa79d8 100644 (file)
@@ -2148,6 +2148,7 @@ static void check_lfsck_request(void)
        CHECK_VALUE(LE_PEER_EXIT);
        CHECK_VALUE(LE_CONDITIONAL_DESTROY);
        CHECK_VALUE(LE_PAIRS_VERIFY);
+       CHECK_VALUE(LE_CREATE_ORPHAN);
 
        CHECK_VALUE_X(LEF_TO_OST);
        CHECK_VALUE_X(LEF_FROM_OST);
index 006bdf6..612c5d4 100644 (file)
@@ -4749,6 +4749,8 @@ void lustre_assert_wire_constants(void)
                 (long long)LE_CONDITIONAL_DESTROY);
        LASSERTF(LE_PAIRS_VERIFY == 11, "found %lld\n",
                 (long long)LE_PAIRS_VERIFY);
+       LASSERTF(LE_CREATE_ORPHAN == 12, "found %lld\n",
+                (long long)LE_CREATE_ORPHAN);
        LASSERTF(LEF_TO_OST == 0x00000001UL, "found 0x%.8xUL\n",
                (unsigned)LEF_TO_OST);
        LASSERTF(LEF_FROM_OST == 0x00000002UL, "found 0x%.8xUL\n",