Whamcloud - gitweb
LU-14105 lfsck: don't LBUG() on disk data
[fs/lustre-release.git] / lustre / lfsck / lfsck_namespace.c
index b613511..6e952ac 100644 (file)
@@ -87,7 +87,7 @@ static void lfsck_namespace_assistant_req_fini(const struct lu_env *env,
                                               struct lfsck_assistant_req *lar)
 {
        struct lfsck_namespace_req *lnr =
-                       container_of0(lar, struct lfsck_namespace_req, lnr_lar);
+               container_of(lar, struct lfsck_namespace_req, lnr_lar);
 
        if (lnr->lnr_lmv != NULL)
                lfsck_lmv_put(env, lnr->lnr_lmv);
@@ -312,7 +312,7 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env,
        }
 
        if (ns->ln_bitmap_size == 0) {
-               lad->lad_incomplete = 0;
+               clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
                CFS_RESET_BITMAP(bitmap);
 
                RETURN(0);
@@ -326,9 +326,9 @@ static int lfsck_namespace_load_bitmap(const struct lu_env *env,
                RETURN(rc >= 0 ? -EINVAL : rc);
 
        if (cfs_bitmap_check_empty(bitmap))
-               lad->lad_incomplete = 0;
+               clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
        else
-               lad->lad_incomplete = 1;
+               set_bit(LAD_INCOMPLETE, &lad->lad_flags);
 
        RETURN(0);
 }
@@ -562,7 +562,7 @@ int lfsck_namespace_trace_update(const struct lu_env *env,
 
        if (new != 0) {
                rc = dt_insert(env, obj, (const struct dt_rec *)&new,
-                              (const struct dt_key *)key, th, 1);
+                              (const struct dt_key *)key, th);
                if (rc != 0)
                        GOTO(log, rc);
        }
@@ -760,19 +760,41 @@ again:
        return rc;
 }
 
+static inline bool linkea_reclen_is_valid(const struct linkea_data *ldata)
+{
+       if (ldata->ld_reclen <= 0)
+               return false;
+
+       if ((char *)ldata->ld_lee + ldata->ld_reclen >
+           (char *)ldata->ld_leh + ldata->ld_leh->leh_len)
+               return false;
+
+       return true;
+}
+
+static inline bool linkea_entry_is_valid(const struct linkea_data *ldata,
+                                        const struct lu_name *cname,
+                                        const struct lu_fid *pfid)
+{
+       if (!linkea_reclen_is_valid(ldata))
+               return false;
+
+       if (cname->ln_namelen <= 0 || cname->ln_namelen > NAME_MAX)
+               return false;
+
+       if (!fid_is_sane(pfid))
+               return false;
+
+       return true;
+}
+
 static int lfsck_namespace_unpack_linkea_entry(struct linkea_data *ldata,
                                               struct lu_name *cname,
                                               struct lu_fid *pfid,
                                               char *buf, const int buflen)
 {
        linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, cname, pfid);
-       if (unlikely(ldata->ld_reclen <= 0 ||
-                    ldata->ld_reclen + sizeof(struct link_ea_header) >
-                       ldata->ld_leh->leh_len ||
-                    cname->ln_namelen <= 0 ||
-                    cname->ln_namelen > NAME_MAX ||
-                    cname->ln_namelen >= buflen ||
-                    !fid_is_sane(pfid)))
+       if (unlikely(!linkea_entry_is_valid(ldata, cname, pfid)))
                return -EINVAL;
 
        /* To guarantee the 'name' is terminated with '0'. */
@@ -790,9 +812,7 @@ static void lfsck_linkea_del_buf(struct linkea_data *ldata,
 
        /* If current record is corrupted, all the subsequent
         * records will be dropped. */
-       if (unlikely(ldata->ld_reclen <= 0 ||
-                    ldata->ld_reclen + sizeof(struct link_ea_header) >
-                       ldata->ld_leh->leh_len)) {
+       if (unlikely(!linkea_reclen_is_valid(ldata))) {
                void *ptr = ldata->ld_lee;
 
                ldata->ld_leh->leh_len = sizeof(struct link_ea_header);
@@ -830,7 +850,10 @@ static int lfsck_namespace_filter_linkea_entry(struct linkea_data *ldata,
        while (ldata->ld_lee != NULL) {
                ldata->ld_reclen = (ldata->ld_lee->lee_reclen[0] << 8) |
                                   ldata->ld_lee->lee_reclen[1];
-               if (unlikely(ldata->ld_reclen == oldlen &&
+               if (unlikely(!linkea_reclen_is_valid(ldata))) {
+                       lfsck_linkea_del_buf(ldata, NULL);
+                       LASSERT(ldata->ld_lee == NULL);
+               } else if (unlikely(ldata->ld_reclen == oldlen &&
                             memcmp(ldata->ld_lee, oldlee, oldlen) == 0)) {
                        repeated++;
                        if (!remove)
@@ -1035,7 +1058,7 @@ again:
                        rec->rec_type = S_IFDIR;
                        rec->rec_fid = pfid;
                        rc = dt_insert(env, orphan, (const struct dt_rec *)rec,
-                                      (const struct dt_key *)dotdot, th, 1);
+                                      (const struct dt_key *)dotdot, th);
                        if (rc != 0)
                                GOTO(unlock, rc);
                }
@@ -1054,7 +1077,7 @@ again:
                rec->rec_type = lfsck_object_type(orphan) & S_IFMT;
                rec->rec_fid = cfid;
                rc = dt_insert(env, parent, (const struct dt_rec *)rec,
-                              (const struct dt_key *)cname->ln_name, th, 1);
+                              (const struct dt_key *)cname->ln_name, th);
                if (rc == 0 && S_ISDIR(rec->rec_type)) {
                        dt_write_lock(env, parent, 0);
                        rc = dt_ref_add(env, parent, th);
@@ -1090,6 +1113,99 @@ log:
        return rc;
 }
 
+static int lfsck_lmv_set(const struct lu_env *env,
+                        struct lfsck_instance *lfsck,
+                        struct dt_object *obj,
+                        struct lmv_mds_md_v1 *lmv)
+{
+       struct dt_device *dev = lfsck->li_next;
+       struct thandle *th = NULL;
+       struct lu_buf buf = { lmv, sizeof(*lmv) };
+       int rc;
+
+       ENTRY;
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       rc = dt_declare_xattr_set(env, obj, &buf, XATTR_NAME_LMV, 0, th);
+       if (rc)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_xattr_set(env, obj, &buf, XATTR_NAME_LMV, 0, th);
+       if (rc)
+               GOTO(stop, rc);
+
+       EXIT;
+stop:
+       dt_trans_stop(env, dev, th);
+
+       return rc;
+}
+
+static int lfsck_lmv_delete(const struct lu_env *env,
+                           struct lfsck_instance *lfsck,
+                           struct dt_object *obj)
+{
+       struct dt_device *dev = lfsck->li_next;
+       struct thandle *th = NULL;
+       int rc;
+
+       ENTRY;
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LMV, th);
+       if (rc)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_xattr_del(env, obj, XATTR_NAME_LMV, th);
+       if (rc)
+               GOTO(stop, rc);
+
+       EXIT;
+stop:
+       dt_trans_stop(env, dev, th);
+
+       return rc;
+}
+
+static inline int lfsck_object_is_shard(const struct lu_env *env,
+                                       struct lfsck_instance *lfsck,
+                                       struct dt_object *obj,
+                                       const struct lu_name *lname)
+{
+       struct lfsck_thread_info *info = lfsck_env_info(env);
+       struct lmv_mds_md_v1 *lmv = &info->lti_lmv;
+       int rc;
+
+       rc = lfsck_shard_name_to_index(env, lname->ln_name, lname->ln_namelen,
+                                      lfsck_object_type(obj),
+                                      lfsck_dto2fid(obj));
+       if (rc < 0)
+               return 0;
+
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv);
+       if (rc == -ENODATA)
+               return 0;
+
+       if (!rc && lmv->lmv_magic == LMV_MAGIC_STRIPE)
+               return 1;
+
+       return rc;
+}
+
 /**
  * Add the specified name entry back to namespace.
  *
@@ -1100,13 +1216,17 @@ log:
  * it is quite possible that the name entry is lost. Then the LFSCK
  * should add the name entry back to the namespace.
  *
+ * If \a child is shard, which means \a parent is a striped directory,
+ * if \a parent has LMV, we need to delete it before insertion because
+ * now parent's striping is broken and can't be parsed correctly.
+ *
  * \param[in] env      pointer to the thread context
  * \param[in] com      pointer to the lfsck component
  * \param[in] parent   pointer to the directory under which the name entry
  *                     will be inserted into
  * \param[in] child    pointer to the object referenced by the name entry
  *                     that to be inserted into the parent
- * \param[in] name     the name for the child in the parent directory
+ * \param[in] lname    the name for the child in the parent directory
  *
  * \retval             positive number for repaired cases
  * \retval             0 if nothing to be repaired
@@ -1116,19 +1236,26 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
                                         struct lfsck_component *com,
                                         struct dt_object *parent,
                                         struct dt_object *child,
-                                        const char *name)
+                                        const struct lu_name *lname)
 {
-       struct lfsck_thread_info        *info   = lfsck_env_info(env);
-       struct lu_attr                  *la     = &info->lti_la;
-       struct dt_insert_rec            *rec    = &info->lti_dt_rec;
-       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct lfsck_thread_info *info = lfsck_env_info(env);
+       struct lu_attr *la = &info->lti_la;
+       struct dt_insert_rec *rec = &info->lti_dt_rec;
+       struct lfsck_instance *lfsck = com->lc_lfsck;
        /* The child and its name may be on different MDTs. */
-       const struct lu_fid             *pfid   = lfsck_dto2fid(parent);
-       const struct lu_fid             *cfid   = lfsck_dto2fid(child);
-       struct dt_device                *dev    = lfsck->li_next;
-       struct thandle                  *th     = NULL;
-       struct lfsck_lock_handle        *llh    = &info->lti_llh;
-       int                              rc     = 0;
+       const struct lu_fid *pfid = lfsck_dto2fid(parent);
+       const struct lu_fid *cfid = lfsck_dto2fid(child);
+       struct dt_device *dev = lfsck->li_next;
+       struct thandle *th = NULL;
+       struct lfsck_lock_handle *llh = &info->lti_llh;
+       struct lmv_mds_md_v1 *lmv = &info->lti_lmv;
+       struct lu_buf buf = { lmv, sizeof(*lmv) };
+       /* whether parent's LMV is deleted before insertion */
+       bool parent_lmv_deleted = false;
+       /* whether parent's LMV is missing */
+       bool parent_lmv_lost = false;
+       int rc = 0;
+
        ENTRY;
 
        /* @parent/@child may be based on lfsck->li_bottom,
@@ -1138,9 +1265,6 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
        if (IS_ERR(parent))
                GOTO(log, rc = PTR_ERR(parent));
 
-       if (unlikely(!dt_try_as_dir(env, parent)))
-               GOTO(log, rc = -ENOTDIR);
-
        child = lfsck_object_locate(dev, child);
        if (IS_ERR(child))
                GOTO(log, rc = PTR_ERR(child));
@@ -1148,11 +1272,57 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
        if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
                GOTO(log, rc = 1);
 
-       rc = lfsck_lock(env, lfsck, parent, name, llh,
-                       MDS_INODELOCK_UPDATE, LCK_PW);
-       if (rc != 0)
+       rc = lfsck_lock(env, lfsck, parent, lname->ln_name, llh,
+                       MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
+                       MDS_INODELOCK_XATTR, LCK_EX);
+       if (rc)
                GOTO(log, rc);
 
+       rc = lfsck_object_is_shard(env, lfsck, child, lname);
+       if (rc < 0)
+               GOTO(unlock, rc);
+
+       if (rc == 1) {
+               rc = lfsck_read_stripe_lmv(env, lfsck, parent, lmv);
+               if (!rc) {
+                       /*
+                        * To add a shard, we need to convert parent to a
+                        * plain directory by deleting its LMV, and after
+                        * insertion set it back.
+                        */
+                       rc = lfsck_lmv_delete(env, lfsck, parent);
+                       if (rc)
+                               GOTO(unlock, rc);
+                       parent_lmv_deleted = true;
+                       lmv->lmv_layout_version++;
+                       lfsck_lmv_header_cpu_to_le(lmv, lmv);
+               } else if (rc == -ENODATA) {
+                       struct lu_seq_range *range = &info->lti_range;
+                       struct seq_server_site *ss = lfsck_dev_site(lfsck);
+
+                       rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv);
+                       if (rc)
+                               GOTO(unlock, rc);
+
+                       fld_range_set_mdt(range);
+                       rc = fld_server_lookup(env, ss->ss_server_fld,
+                                      fid_seq(lfsck_dto2fid(parent)), range);
+                       if (rc)
+                               GOTO(unlock, rc);
+
+                       parent_lmv_lost = true;
+                       lmv->lmv_magic = LMV_MAGIC;
+                       lmv->lmv_master_mdt_index = range->lsr_index;
+                       lmv->lmv_layout_version++;
+                       lfsck_lmv_header_cpu_to_le(lmv, lmv);
+               } else {
+                       GOTO(unlock, rc);
+               }
+       }
+
+       if (unlikely(!dt_try_as_dir(env, parent)))
+               GOTO(unlock, rc = -ENOTDIR);
+
        th = dt_trans_create(env, dev);
        if (IS_ERR(th))
                GOTO(unlock, rc = PTR_ERR(th));
@@ -1160,7 +1330,7 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
        rec->rec_type = lfsck_object_type(child) & S_IFMT;
        rec->rec_fid = cfid;
        rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
-                              (const struct dt_key *)name, th);
+                              (const struct dt_key *)lname->ln_name, th);
        if (rc != 0)
                GOTO(stop, rc);
 
@@ -1170,7 +1340,13 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
                        GOTO(stop, rc);
        }
 
-       memset(la, 0, sizeof(*la));
+       if (parent_lmv_lost) {
+               rc = dt_declare_xattr_set(env, parent, &buf, XATTR_NAME_LMV,
+                                         0, th);
+               if (rc)
+                       GOTO(stop, rc);
+       }
+
        la->la_ctime = ktime_get_real_seconds();
        la->la_valid = LA_CTIME;
        rc = dt_declare_attr_set(env, parent, la, th);
@@ -1186,7 +1362,7 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
                GOTO(stop, rc);
 
        rc = dt_insert(env, parent, (const struct dt_rec *)rec,
-                      (const struct dt_key *)name, th, 1);
+                      (const struct dt_key *)lname->ln_name, th);
        if (rc != 0)
                GOTO(stop, rc);
 
@@ -1198,7 +1374,12 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
                        GOTO(stop, rc);
        }
 
-       la->la_ctime = ktime_get_real_seconds();
+       if (parent_lmv_lost) {
+               rc = dt_xattr_set(env, parent, &buf, XATTR_NAME_LMV, 0, th);
+               if (rc)
+                       GOTO(stop, rc);
+       }
+
        rc = dt_attr_set(env, parent, la, th);
        if (rc != 0)
                GOTO(stop, rc);
@@ -1211,12 +1392,15 @@ stop:
        dt_trans_stop(env, dev, th);
 
 unlock:
+       if (parent_lmv_deleted)
+               lfsck_lmv_set(env, lfsck, parent, lmv);
+
        lfsck_unlock(llh);
 
 log:
        CDEBUG(D_LFSCK, "%s: namespace LFSCK insert object "DFID" with "
               "the name %s and type %o to the parent "DFID": rc = %d\n",
-              lfsck_lfsck2name(lfsck), PFID(cfid), name,
+              lfsck_lfsck2name(lfsck), PFID(cfid), lname->ln_name,
               lfsck_object_type(child) & S_IFMT, PFID(pfid), rc);
 
        if (rc != 0) {
@@ -1399,8 +1583,8 @@ again:
                lmv->lmv_master_mdt_index = lfsck_dev_idx(lfsck);
                lfsck_lmv_header_cpu_to_le(lmv2, lmv);
                lfsck_buf_init(&lmv_buf, lmv2, sizeof(*lmv2));
-               rc = dt_declare_xattr_set(env, orphan, &lmv_buf,
-                                         XATTR_NAME_LMV, 0, th);
+               rc = dt_declare_xattr_set(env, orphan, &lmv_buf, XATTR_NAME_LMV,
+                                         0, th);
                if (rc != 0)
                        GOTO(stop, rc);
        }
@@ -1436,13 +1620,13 @@ again:
 
        rec->rec_fid = cfid;
        rc = dt_insert(env, orphan, (const struct dt_rec *)rec,
-                      (const struct dt_key *)dot, th, 1);
+                      (const struct dt_key *)dot, th);
        if (rc != 0)
                GOTO(unlock2, rc);
 
        rec->rec_fid = lfsck_dto2fid(parent);
        rc = dt_insert(env, orphan, (const struct dt_rec *)rec,
-                      (const struct dt_key *)dotdot, th, 1);
+                      (const struct dt_key *)dotdot, th);
        if (rc != 0)
                GOTO(unlock2, rc);
 
@@ -1460,7 +1644,7 @@ again:
 
        rec->rec_fid = cfid;
        rc = dt_insert(env, parent, (const struct dt_rec *)rec,
-                      (const struct dt_key *)name, th, 1);
+                      (const struct dt_key *)name, th);
        if (rc == 0) {
                dt_write_lock(env, parent, 0);
                rc = dt_ref_add(env, parent, th);
@@ -1591,16 +1775,22 @@ again:
                lfsck_namespace_filter_linkea_entry(&ldata_new, cname, pfid,
                                                    true);
 
-       if (buflen < ldata_new.ld_leh->leh_len) {
+       /*
+        * linkea may change because it doesn't take lock in the first read, if
+        * it becomes larger, restart from beginning.
+        */
+       if ((ldata_new.ld_leh->leh_reccount > 0 ||
+            unlikely(ldata_new.ld_leh->leh_overflow_time)) &&
+           buflen < ldata_new.ld_leh->leh_len) {
                dt_write_unlock(env, obj);
                dt_trans_stop(env, dev, th);
                lfsck_buf_init(&linkea_buf, ldata_new.ld_buf->lb_buf,
                               ldata_new.ld_leh->leh_len);
+               buflen = linkea_buf.lb_len;
                goto again;
        }
 
-       if (ldata_new.ld_leh->leh_reccount > 0 ||
-           unlikely(ldata->ld_leh->leh_overflow_time))
+       if (buflen)
                rc = lfsck_links_write(env, obj, &ldata_new, th);
        else
                rc = dt_xattr_del(env, obj, XATTR_NAME_LINK, th);
@@ -1900,7 +2090,7 @@ replace:
                GOTO(stop, rc);
 
        rc = dt_insert(env, parent, (const struct dt_rec *)rec,
-                      (const struct dt_key *)name, th, 1);
+                      (const struct dt_key *)name, th);
 
        GOTO(stop, rc = (rc == 0 ? 1 : rc));
 
@@ -2086,7 +2276,7 @@ int lfsck_namespace_repair_dirent(const struct lu_env *env,
 
 
        dt_write_lock(env, parent, 0);
-       rc = dt_lookup(env, parent, (struct dt_rec *)&tfid,
+       rc = dt_lookup(env, dt_object_child(parent), (struct dt_rec *)&tfid,
                       (const struct dt_key *)name);
        /* Someone has removed the bad name entry by race. */
        if (rc == -ENOENT)
@@ -2110,7 +2300,7 @@ int lfsck_namespace_repair_dirent(const struct lu_env *env,
        if (update) {
                rc = dt_insert(env, parent,
                               (const struct dt_rec *)rec,
-                              (const struct dt_key *)name2, th, 1);
+                              (const struct dt_key *)name2, th);
                if (rc != 0)
                        GOTO(unlock2, rc);
        }
@@ -2238,7 +2428,7 @@ static int lfsck_namespace_repair_unmatched_pairs(const struct lu_env *env,
        dt_delete(env, obj, (const struct dt_key *)dotdot, th);
 
        rc = dt_insert(env, obj, (const struct dt_rec *)rec,
-                      (const struct dt_key *)dotdot, th, 1);
+                      (const struct dt_key *)dotdot, th);
        if (rc != 0)
                GOTO(unlock, rc);
 
@@ -2418,7 +2608,7 @@ lfsck_namespace_dsd_single(const struct lu_env *env,
 
 lost_parent:
                lmv = &info->lti_lmv;
-               rc = lfsck_read_stripe_lmv(env, child, lmv);
+               rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv);
                if (rc != 0 && rc != -ENODATA)
                        GOTO(out, rc);
 
@@ -2447,7 +2637,7 @@ lost_parent:
                if (rc >= 0) {
                        /* Add the missing name entry to the parent. */
                        rc = lfsck_namespace_insert_normal(env, com, parent,
-                                                       child, cname->ln_name);
+                                                          child, cname);
                        if (unlikely(rc == -EEXIST)) {
                                /* Unfortunately, someone reused the name
                                 * under the parent by race. So we have
@@ -2505,7 +2695,8 @@ lost_parent:
                }
 
                lfsck_ibits_unlock(lh, LCK_EX);
-               rc = lfsck_namespace_check_name(env, parent, child, cname);
+               rc = lfsck_namespace_check_name(env, lfsck, parent, child,
+                                               cname);
                if (rc == -ENOENT)
                        goto lost_parent;
 
@@ -2529,7 +2720,7 @@ lost_parent:
 
                /* Add the missing name entry back to the namespace. */
                rc = lfsck_namespace_insert_normal(env, com, parent, child,
-                                                  cname->ln_name);
+                                                  cname);
                if (unlikely(rc == -ESTALE))
                        /* It may happen when the remote object has been
                         * removed, but the local MDT is not aware of that. */
@@ -3056,11 +3247,8 @@ static int lfsck_namespace_double_scan_dir(const struct lu_env *env,
 
        if (flags & (LNTF_CHECK_LINKEA | LNTF_CHECK_PARENT) &&
            !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) {
-               CDEBUG(D_LFSCK, "%s: some MDT(s) maybe NOT take part in the"
-                      "the namespace LFSCK, then the LFSCK cannot guarantee"
-                      "all the name entries have been verified in first-stage"
-                      "scanning. So have to skip orphan related handling for"
-                      "the directory object "DFID" with remote name entry\n",
+               CDEBUG(D_LFSCK,
+                      "%s: some MDT(s) maybe NOT take part in the the namespace LFSCK, then the LFSCK cannot guarantee all the name entries have been verified in first-stage scanning. So have to skip orphan related handling for the directory object "DFID" with remote name entry\n",
                       lfsck_lfsck2name(lfsck), PFID(cfid));
 
                RETURN(0);
@@ -3386,8 +3574,7 @@ static int lfsck_namespace_check_agent_entry(const struct lu_env *env,
        while (ldata.ld_lee != NULL && !remote) {
                linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
                                    cname, pfid);
-               /* If parent FID is unknown, not verify agent entry. */
-               if (!fid_is_sane(pfid))
+               if (!linkea_entry_is_valid(&ldata, cname, pfid))
                        GOTO(out, rc = 0);
 
                fld_range_set_mdt(range);
@@ -3639,7 +3826,7 @@ lost_parent:
 
                                /* Add the missing name entry to the parent. */
                                rc = lfsck_namespace_insert_normal(env, com,
-                                               parent, child, cname->ln_name);
+                                                       parent, child, cname);
                                if (unlikely(rc == -EEXIST))
                                        /* Unfortunately, someone reused the
                                         * name under the parent by race. So we
@@ -3759,7 +3946,8 @@ lost_parent:
                        GOTO(out, rc = 0);
                }
 
-               rc = lfsck_namespace_check_name(env, parent, child, cname);
+               rc = lfsck_namespace_check_name(env, lfsck, parent, child,
+                                               cname);
                if (rc == -ENOENT)
                        goto lost_parent;
 
@@ -3785,7 +3973,7 @@ lost_parent:
 
                /* Add the missing name entry back to the namespace. */
                rc = lfsck_namespace_insert_normal(env, com, parent, child,
-                                                  cname->ln_name);
+                                                  cname);
                if (unlikely(rc == -ESTALE))
                        /* It may happen when the remote object has been
                         * removed, but the local MDT is not aware of that. */
@@ -4072,7 +4260,7 @@ static int lfsck_namespace_reset(const struct lu_env *env,
        if (rc != 0)
                GOTO(out, rc);
 
-       lad->lad_incomplete = 0;
+       clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
        CFS_RESET_BITMAP(lad->lad_bitmap);
 
        rc = lfsck_namespace_store(env, com);
@@ -4112,14 +4300,19 @@ static void lfsck_namespace_close_dir(const struct lu_env *env,
        struct lfsck_instance           *lfsck  = com->lc_lfsck;
        struct lfsck_lmv                *llmv   = lfsck->li_lmv;
        struct lfsck_namespace_req      *lnr;
-       __u32                            size   =
-                               sizeof(*lnr) + LFSCK_TMPBUF_LEN;
-       bool                             wakeup = false;
+       struct lu_attr *la = &lfsck_env_info(env)->lti_la2;
+       __u32 size = sizeof(*lnr) + LFSCK_TMPBUF_LEN;
+       int rc;
+       bool wakeup = false;
        ENTRY;
 
        if (llmv == NULL)
                RETURN_EXIT;
 
+       rc = dt_attr_get(env, lfsck->li_obj_dir, la);
+       if (rc)
+               RETURN_EXIT;
+
        OBD_ALLOC(lnr, size);
        if (lnr == NULL) {
                ns->ln_striped_dirs_skipped++;
@@ -4128,7 +4321,7 @@ static void lfsck_namespace_close_dir(const struct lu_env *env,
        }
 
        lso = lfsck_assistant_object_init(env, lfsck_dto2fid(lfsck->li_obj_dir),
-                       NULL, lfsck->li_pos_current.lp_oit_cookie, true);
+                       la, lfsck->li_pos_current.lp_oit_cookie, true);
        if (IS_ERR(lso)) {
                OBD_FREE(lnr, size);
                ns->ln_striped_dirs_skipped++;
@@ -4144,6 +4337,7 @@ static void lfsck_namespace_close_dir(const struct lu_env *env,
        lnr->lnr_fid = *lfsck_dto2fid(lfsck->li_obj_dir);
        lnr->lnr_dir_cookie = MDS_DIR_END_OFF;
        lnr->lnr_size = size;
+       lnr->lnr_type = lso->lso_attr.la_mode;
 
        spin_lock(&lad->lad_lock);
        if (lad->lad_assistant_status < 0 ||
@@ -4340,20 +4534,32 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env,
                                    struct lfsck_component *com,
                                    struct dt_object *obj)
 {
-       struct lfsck_thread_info *info  = lfsck_env_info(env);
-       struct lfsck_namespace   *ns    = com->lc_file_ram;
-       struct lfsck_instance    *lfsck = com->lc_lfsck;
-       const struct lu_fid      *fid   = lfsck_dto2fid(obj);
-       struct lu_fid            *pfid  = &info->lti_fid2;
-       struct lu_name           *cname = &info->lti_name;
-       struct lu_seq_range      *range = &info->lti_range;
-       struct seq_server_site   *ss    = lfsck_dev_site(lfsck);
-       struct linkea_data        ldata = { NULL };
-       __u32                     idx   = lfsck_dev_idx(lfsck);
-       int                       rc;
+       struct lfsck_thread_info *info = lfsck_env_info(env);
+       struct lfsck_namespace *ns = com->lc_file_ram;
+       struct lfsck_instance *lfsck = com->lc_lfsck;
+       const struct lu_fid *fid = lfsck_dto2fid(obj);
+       struct lu_fid *pfid = &info->lti_fid2;
+       struct lu_name *cname = &info->lti_name;
+       struct lu_seq_range *range = &info->lti_range;
+       struct seq_server_site *ss = lfsck_dev_site(lfsck);
+       struct linkea_data ldata = { NULL };
+       __u32 idx = lfsck_dev_idx(lfsck);
+       struct lu_attr la = { .la_valid = 0 };
        bool remote = false;
+       int rc;
        ENTRY;
 
+       rc = dt_attr_get(env, obj, &la);
+       if (unlikely(rc || (la.la_valid & LA_FLAGS &&
+                           la.la_flags & LUSTRE_ORPHAN_FL))) {
+               CDEBUG(D_INFO,
+                      "%s: skip orphan "DFID", %llx/%x: rc = %d\n",
+                      lfsck_lfsck2name(lfsck), PFID(fid),
+                      la.la_valid, la.la_flags, rc);
+
+               return rc;
+       }
+
        rc = lfsck_links_read(env, obj, &ldata);
        if (rc == -ENOENT)
                GOTO(out, rc = 0);
@@ -4394,6 +4600,8 @@ static int lfsck_namespace_exec_oit(const struct lu_env *env,
                if (!fid_is_sane(pfid)) {
                        rc = lfsck_namespace_trace_update(env, com, fid,
                                                  LNTF_CHECK_PARENT, true);
+               } else if (!linkea_entry_is_valid(&ldata, cname, pfid)) {
+                       GOTO(out, rc);
                } else {
                        fld_range_set_mdt(range);
                        rc = fld_server_lookup(env, ss->ss_server_fld,
@@ -4456,14 +4664,12 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env,
        struct lfsck_bookmark           *bk      = &lfsck->li_bookmark_ram;
        struct ptlrpc_thread            *mthread = &lfsck->li_thread;
        struct ptlrpc_thread            *athread = &lad->lad_thread;
-       struct l_wait_info               lwi     = { 0 };
        bool                             wakeup  = false;
 
-       l_wait_event(mthread->t_ctl_waitq,
-                    lad->lad_prefetched < bk->lb_async_windows ||
-                    !thread_is_running(mthread) ||
-                    !thread_is_running(athread),
-                    &lwi);
+       wait_event_idle(mthread->t_ctl_waitq,
+                       lad->lad_prefetched < bk->lb_async_windows ||
+                       !thread_is_running(mthread) ||
+                       !thread_is_running(athread));
 
        if (unlikely(!thread_is_running(mthread) ||
                     !thread_is_running(athread)))
@@ -5071,26 +5277,28 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
                                    struct dt_object *child,
                                    struct lfsck_namespace_req *lnr)
 {
-       struct lfsck_thread_info        *info   = lfsck_env_info(env);
-       struct lu_attr                  *la     = &info->lti_la;
-       struct dt_allocation_hint       *hint   = &info->lti_hint;
-       struct dt_object_format         *dof    = &info->lti_dof;
-       struct dt_insert_rec            *rec    = &info->lti_dt_rec;
-       struct lmv_mds_md_v1            *lmv2   = &info->lti_lmv2;
-       const struct lu_name            *cname;
-       const struct lu_fid             *pfid   = lfsck_dto2fid(parent);
-       const struct lu_fid             *cfid   = lfsck_dto2fid(child);
-       struct linkea_data               ldata  = { NULL };
-       struct lfsck_lock_handle        *llh    = &info->lti_llh;
-       struct lu_buf                    linkea_buf;
-       struct lu_buf                    lmv_buf;
-       struct lfsck_instance           *lfsck  = com->lc_lfsck;
-       struct lfsck_bookmark           *bk     = &lfsck->li_bookmark_ram;
-       struct dt_device                *dev    = lfsck->li_next;
-       struct thandle                  *th     = NULL;
-       int                              rc     = 0;
-       __u16                            type   = lnr->lnr_type;
-       bool                             create;
+       struct lfsck_thread_info *info = lfsck_env_info(env);
+       struct lu_attr *la = &info->lti_la;
+       struct dt_allocation_hint *hint = &info->lti_hint;
+       struct dt_object_format *dof = &info->lti_dof;
+       struct dt_insert_rec *rec = &info->lti_dt_rec;
+       struct lmv_mds_md_v1 *lmv2 = &info->lti_lmv2;
+       const struct lu_name *cname;
+       const struct lu_fid *pfid = lfsck_dto2fid(parent);
+       const struct lu_fid *cfid = lfsck_dto2fid(child);
+       struct linkea_data ldata = { NULL };
+       struct lfsck_lock_handle *llh = &info->lti_llh;
+       struct lustre_handle rlh = { 0 };
+       struct lustre_handle clh = { 0 };
+       struct lu_buf linkea_buf;
+       struct lu_buf lmv_buf;
+       struct lfsck_instance *lfsck = com->lc_lfsck;
+       struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
+       struct dt_device *dev = lfsck->li_next;
+       struct thandle *th = NULL;
+       int rc = 0;
+       __u16 type = lnr->lnr_type;
+       bool create;
        ENTRY;
 
        cname = lfsck_name_get_const(env, lnr->lnr_name, lnr->lnr_namelen);
@@ -5122,7 +5330,7 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
                GOTO(log, rc);
 
        rc = lfsck_lock(env, lfsck, parent, lnr->lnr_name, llh,
-                       MDS_INODELOCK_UPDATE, LCK_PR);
+                       MDS_INODELOCK_UPDATE, LCK_PW);
        if (rc != 0)
                GOTO(log, rc);
 
@@ -5130,17 +5338,40 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
        if (rc != 0)
                GOTO(log, rc);
 
+       if (dt_object_remote(child)) {
+               rc = lfsck_remote_lookup_lock(env, lfsck, parent, child, &rlh,
+                                             LCK_EX);
+               if (rc != 0)
+                       GOTO(log, rc);
+       }
+
+       rc = lfsck_ibits_lock(env, lfsck, child, &clh,
+                             MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP |
+                             MDS_INODELOCK_XATTR, LCK_EX);
+       if (rc != 0)
+               GOTO(unlock_remote_lookup, rc);
+
        /* Set the ctime as zero, then others can know it is created for
         * repairing dangling name entry by LFSCK. And if the LFSCK made
         * wrong decision and the real MDT-object has been found later,
         * then the LFSCK has chance to fix the incosistency properly. */
        memset(la, 0, sizeof(*la));
-       la->la_mode = (type & S_IFMT) | 0600;
-       la->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
-                       LA_ATIME | LA_MTIME | LA_CTIME;
-
-       child->do_ops->do_ah_init(env, hint, parent, child,
-                                la->la_mode & S_IFMT);
+       if (S_ISDIR(type))
+               la->la_mode = (type & S_IFMT) | 0700;
+       else
+               la->la_mode = (type & S_IFMT) | 0600;
+       la->la_valid = LA_TYPE | LA_MODE | LA_CTIME;
+
+       /*
+        * if it's directory, skip do_ah_init() to create a plain directory
+        * because it may have shards already, which will be inserted back
+        * later, besides, it may be remote, and creating stripe directory
+        * remotely is not supported.
+        */
+       if (S_ISREG(type))
+               child->do_ops->do_ah_init(env, hint,  parent, child, type);
+       else if (S_ISDIR(type))
+               child->do_ops->do_ah_init(env, hint,  NULL, child, type);
 
        memset(dof, 0, sizeof(*dof));
        dof->dof_type = dt_mode_to_dft(type);
@@ -5150,7 +5381,7 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
 
        th = dt_trans_create(env, dev);
        if (IS_ERR(th))
-               GOTO(log, rc = PTR_ERR(th));
+               GOTO(unlock_child, rc = PTR_ERR(th));
 
        /* 1a. create child. */
        rc = dt_declare_create(env, child, la, hint, dof, th);
@@ -5214,6 +5445,21 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
        if (rc != 0)
                GOTO(stop, rc);
 
+       /* 7a. if child is remote, delete and insert to generate local agent */
+       if (dt_object_remote(child)) {
+               rc = dt_declare_delete(env, parent,
+                                      (const struct dt_key *)lnr->lnr_name,
+                                      th);
+               if (rc)
+                       GOTO(stop, rc);
+
+               rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
+                                      (const struct dt_key *)lnr->lnr_name,
+                                      th);
+               if (rc)
+                       GOTO(stop, rc);
+       }
+
        rc = dt_trans_start_local(env, dev, th);
        if (rc != 0)
                GOTO(stop, rc = (rc == -EEXIST ? 1 : rc));
@@ -5234,14 +5480,14 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
                rec->rec_type = S_IFDIR;
                rec->rec_fid = cfid;
                rc = dt_insert(env, child, (const struct dt_rec *)rec,
-                              (const struct dt_key *)dot, th, 1);
+                              (const struct dt_key *)dot, th);
                if (rc != 0)
                        GOTO(unlock, rc);
 
                /* 4b. insert dotdot into child dir */
                rec->rec_fid = pfid;
                rc = dt_insert(env, child, (const struct dt_rec *)rec,
-                              (const struct dt_key *)dotdot, th, 1);
+                              (const struct dt_key *)dotdot, th);
                if (rc != 0)
                        GOTO(unlock, rc);
 
@@ -5257,6 +5503,23 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
        /* 6b. insert linkEA for child. */
        rc = dt_xattr_set(env, child, &linkea_buf,
                          XATTR_NAME_LINK, 0, th);
+       if (rc)
+               GOTO(unlock, rc);
+
+       /* 7b. if child is remote, delete and insert to generate local agent */
+       if (dt_object_remote(child)) {
+               rc = dt_delete(env, parent,
+                              (const struct dt_key *)lnr->lnr_name, th);
+               if (rc)
+                       GOTO(unlock, rc);
+
+               rec->rec_type = type;
+               rec->rec_fid = cfid;
+               rc = dt_insert(env, parent, (const struct dt_rec *)rec,
+                              (const struct dt_key *)lnr->lnr_name, th);
+               if (rc)
+                       GOTO(unlock, rc);
+       }
 
        GOTO(unlock, rc);
 
@@ -5266,6 +5529,11 @@ unlock:
 stop:
        dt_trans_stop(env, dev, th);
 
+unlock_child:
+       lfsck_ibits_unlock(&clh, LCK_EX);
+unlock_remote_lookup:
+       if (dt_object_remote(child))
+               lfsck_ibits_unlock(&rlh, LCK_EX);
 log:
        lfsck_unlock(llh);
        CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant found dangling "
@@ -5298,7 +5566,7 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
        const struct lu_name       *cname;
        struct thandle             *handle   = NULL;
        struct lfsck_namespace_req *lnr      =
-                       container_of0(lar, struct lfsck_namespace_req, lnr_lar);
+               container_of(lar, struct lfsck_namespace_req, lnr_lar);
        struct dt_object           *dir      = NULL;
        struct dt_object           *obj      = NULL;
        struct lfsck_assistant_object *lso   = lar->lar_parent;
@@ -5332,11 +5600,9 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
                repaired = true;
        }
 
-       if (unlikely(fid_is_zero(&lnr->lnr_fid))) {
-               if (strcmp(lnr->lnr_name, dotdot) != 0)
-                       LBUG();
-               else
-                       rc = lfsck_namespace_trace_update(env, com, pfid,
+       if (unlikely(fid_is_zero(&lnr->lnr_fid) &&
+                    strcmp(lnr->lnr_name, dotdot) == 0)) {
+               rc = lfsck_namespace_trace_update(env, com, pfid,
                                                LNTF_CHECK_PARENT, true);
 
                GOTO(out, rc);
@@ -5926,7 +6192,7 @@ static int lfsck_namespace_scan_local_lpf_one(const struct lu_env *env,
 
        /* b5. insert child's FID into the LFSCK trace file. */
        rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
-                      (const struct dt_key *)key, th, 1);
+                      (const struct dt_key *)key, th);
 
        GOTO(stop, rc = (rc == 0 ? 1 : rc));
 
@@ -6519,7 +6785,7 @@ static void lfsck_namespace_assistant_sync_failures(const struct lu_env *env,
        int                                rc    = 0;
        ENTRY;
 
-       if (!lad->lad_incomplete)
+       if (!test_bit(LAD_INCOMPLETE, &lad->lad_flags))
                RETURN_EXIT;
 
        set = ptlrpc_prep_set();
@@ -6547,7 +6813,7 @@ static void lfsck_namespace_assistant_sync_failures(const struct lu_env *env,
        }
        up_read(&ltds->ltd_rw_sem);
 
-       rc = ptlrpc_set_wait(set);
+       rc = ptlrpc_set_wait(env, set);
        ptlrpc_set_destroy(set);
 
        GOTO(out, rc);
@@ -6673,6 +6939,9 @@ int lfsck_links_get_first(const struct lu_env *env, struct dt_object *obj,
 
        linkea_first_entry(&ldata);
        linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname, pfid);
+       if (!linkea_entry_is_valid(&ldata, cname, pfid))
+               return -EINVAL;
+
        /* To guarantee the 'name' is terminated with '0'. */
        memcpy(name, cname->ln_name, cname->ln_namelen);
        name[cname->ln_namelen] = 0;
@@ -6746,7 +7015,7 @@ int lfsck_update_name_entry(const struct lu_env *env,
                GOTO(stop, rc);
 
        rc = dt_insert(env, dir, (const struct dt_rec *)rec,
-                      (const struct dt_key *)name, th, 1);
+                      (const struct dt_key *)name, th);
        if (rc == 0 && S_ISDIR(type) && !exists) {
                dt_write_lock(env, dir, 0);
                rc = dt_ref_add(env, dir, th);
@@ -6825,13 +7094,16 @@ int lfsck_namespace_setup(const struct lu_env *env,
 
        com->lc_obj = obj;
        rc = lfsck_namespace_load(env, com);
-       if (rc == -ENODATA)
+       if (rc == -ENODATA) {
                rc = lfsck_namespace_init(env, com);
-       else if (rc < 0)
+       } else if (rc < 0) {
                rc = lfsck_namespace_reset(env, com, true);
-       else
+       } else {
                rc = lfsck_load_sub_trace_files(env, com,
                        &dt_lfsck_namespace_features, LFSCK_NAMESPACE, false);
+               if (rc)
+                       rc = lfsck_namespace_reset(env, com, true);
+       }
        if (rc != 0)
                GOTO(out, rc);