Whamcloud - gitweb
LU-2240 mds: Assign special fid sequence to root.
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index af3f674..d589556 100644 (file)
@@ -370,13 +370,13 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
         * unexpected reason, we should be able to detect it later by calling
         * do_create->osd_oi_insert()
         */
-       if (conf != NULL && (conf->loc_flags & LOC_F_NEW) != 0)
+       if (conf != NULL && conf->loc_flags & LOC_F_NEW)
                GOTO(out, result = 0);
 
        /* Search order: 3. OI files. */
        result = osd_oi_lookup(info, dev, fid, id, true);
        if (result == -ENOENT) {
-               if (!fid_is_norm(fid) ||
+               if (!fid_is_norm(fid) || fid_is_on_ost(info, dev, fid) ||
                    !ldiskfs_test_bit(osd_oi_fid2idx(dev,fid),
                                      sf->sf_oi_bitmap))
                        GOTO(out, result = 0);
@@ -1363,7 +1363,8 @@ static void osd_inode_getattr(const struct lu_env *env,
 {
         attr->la_valid      |= LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE |
                                LA_SIZE | LA_BLOCKS | LA_UID | LA_GID |
-                               LA_FLAGS | LA_NLINK | LA_RDEV | LA_BLKSIZE;
+                               LA_FLAGS | LA_NLINK | LA_RDEV | LA_BLKSIZE |
+                              LA_TYPE;
 
         attr->la_atime      = LTIME_S(inode->i_atime);
         attr->la_mtime      = LTIME_S(inode->i_mtime);
@@ -1387,7 +1388,7 @@ static int osd_attr_get(const struct lu_env *env,
 {
         struct osd_object *obj = osd_dt_obj(dt);
 
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LINVRNT(osd_invariant(obj));
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ))
@@ -1546,8 +1547,6 @@ static int osd_inode_setattr(const struct lu_env *env,
 
         bits = attr->la_valid;
 
-        LASSERT(!(bits & LA_TYPE)); /* Huh? You want too much. */
-
         if (bits & LA_ATIME)
                 inode->i_atime  = *osd_inode_time(env, inode, attr->la_atime);
         if (bits & LA_CTIME)
@@ -1622,7 +1621,7 @@ static int osd_attr_set(const struct lu_env *env,
         int rc;
 
         LASSERT(handle != NULL);
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(osd_invariant(obj));
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE))
@@ -1630,6 +1629,31 @@ static int osd_attr_set(const struct lu_env *env,
 
        osd_trans_exec_op(env, handle, OSD_OT_ATTR_SET);
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_OSD_FID_MAPPING)) {
+               struct osd_thread_info  *oti  = osd_oti_get(env);
+               const struct lu_fid     *fid0 = lu_object_fid(&dt->do_lu);
+               struct lu_fid           *fid1 = &oti->oti_fid;
+               struct osd_inode_id     *id   = &oti->oti_id;
+               struct iam_path_descr   *ipd;
+               struct iam_container    *bag;
+               struct osd_thandle      *oh;
+               int                      rc;
+
+               fid_cpu_to_be(fid1, fid0);
+               memset(id, 1, sizeof(*id));
+               bag = &osd_fid2oi(osd_dev(dt->do_lu.lo_dev),
+                                 fid0)->oi_dir.od_container;
+               ipd = osd_idx_ipd_get(env, bag);
+               if (unlikely(ipd == NULL))
+                       RETURN(-ENOMEM);
+
+               oh = container_of0(handle, struct osd_thandle, ot_super);
+               rc = iam_update(oh->ot_handle, bag, (const struct iam_key *)fid1,
+                               (const struct iam_rec *)id, ipd);
+               osd_ipd_put(env, bag, ipd);
+               return(rc > 0 ? 0 : rc);
+       }
+
         inode = obj->oo_inode;
        ll_vfs_dq_init(inode);
 
@@ -1950,19 +1974,13 @@ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
        struct seq_server_site  *ss = osd_seq_site(osd);
        int                     rc;
 
-       if (fid_is_igif(fid)) {
-               range->lsr_flags = LU_SEQ_RANGE_MDT;
-               range->lsr_index = 0;
-               return 0;
-       }
-
        if (fid_is_idif(fid)) {
                range->lsr_flags = LU_SEQ_RANGE_OST;
                range->lsr_index = fid_idif_ost_idx(fid);
                return 0;
        }
 
-       if (!fid_is_norm(fid)) {
+       if (!fid_seq_in_fldb(fid_seq(fid))) {
                range->lsr_flags = LU_SEQ_RANGE_MDT;
                if (ss != NULL)
                        /* FIXME: If ss is NULL, it suppose not get lsr_index
@@ -1976,12 +1994,14 @@ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
        rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range);
        if (rc != 0) {
                CERROR("%s can not find "DFID": rc = %d\n",
-                      osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid), rc);
+                      osd_name(osd), PFID(fid), rc);
        }
        return rc;
 }
 
-
+/*
+ * Concurrency: no external locking is necessary.
+ */
 static int osd_declare_object_create(const struct lu_env *env,
                                     struct dt_object *dt,
                                     struct lu_attr *attr,
@@ -2001,16 +2021,13 @@ static int osd_declare_object_create(const struct lu_env *env,
 
        osd_trans_declare_op(env, oh, OSD_OT_CREATE,
                             osd_dto_credits_noquota[DTO_OBJECT_CREATE]);
-       /* XXX: So far, only normal fid needs be inserted into the oi,
-        *      things could be changed later. Revise following code then. */
-       if (fid_is_norm(lu_object_fid(&dt->do_lu)) &&
-           !fid_is_on_ost(osd_oti_get(env), osd_dt_dev(handle->th_dev),
-                          lu_object_fid(&dt->do_lu))) {
+       if (!fid_is_on_ost(osd_oti_get(env), osd_dt_dev(handle->th_dev),
+                          lu_object_fid(&dt->do_lu)))
                /* Reuse idle OI block may cause additional one OI block
                 * to be changed. */
                osd_trans_declare_op(env, oh, OSD_OT_INSERT,
                                osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
-       }
+
        /* If this is directory, then we expect . and .. to be inserted as
         * well. The one directory block always needs to be created for the
         * directory, so we could use DTO_WRITE_BASE here (GDT, block bitmap,
@@ -2057,7 +2074,7 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt,
         ENTRY;
 
         LINVRNT(osd_invariant(obj));
-        LASSERT(!dt_object_exists(dt));
+       LASSERT(!dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
@@ -2073,7 +2090,9 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt,
         if (result == 0)
                 result = __osd_oi_insert(env, obj, fid, th);
 
-        LASSERT(ergo(result == 0, dt_object_exists(dt)));
+       LASSERT(ergo(result == 0,
+                    dt_object_exists(dt) && !dt_object_remote(dt)));
+
         LASSERT(osd_invariant(obj));
         RETURN(result);
 }
@@ -2099,13 +2118,10 @@ static int osd_declare_object_destroy(const struct lu_env *env,
 
        osd_trans_declare_op(env, oh, OSD_OT_DELETE,
                             osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
-       /* XXX: So far, only normal fid needs to be inserted into the OI,
-        *      so only normal fid needs to be removed from the OI also.
-        * Recycle idle OI leaf may cause additional three OI blocks
+       /* Recycle idle OI leaf may cause additional three OI blocks
         * to be changed. */
        osd_trans_declare_op(env, oh, OSD_OT_DESTROY,
-                            fid_is_norm(lu_object_fid(&dt->do_lu)) ?
-                            osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3 : 0);
+                            osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3);
 
        /* one less inode */
        rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, -1, oh,
@@ -2142,8 +2158,15 @@ static int osd_object_destroy(const struct lu_env *env,
         * lock contention. So it will not affect unlink performance. */
        mutex_lock(&inode->i_mutex);
        if (S_ISDIR(inode->i_mode)) {
-               LASSERT(osd_inode_unlinked(inode) ||
-                       inode->i_nlink == 1);
+               LASSERT(osd_inode_unlinked(inode) || inode->i_nlink == 1);
+               /* it will check/delete the agent inode for every dir
+                * destory, how to optimize it? unlink performance
+                * impaction XXX */
+               result = osd_delete_from_agent(env, osd, obj, oh);
+               if (result != 0 && result != -ENOENT) {
+                       CERROR("%s: delete agent inode "DFID": rc = %d\n",
+                              osd_name(osd), PFID(fid), result);
+               }
                spin_lock(&obj->oo_guard);
                clear_nlink(inode);
                spin_unlock(&obj->oo_guard);
@@ -2191,6 +2214,12 @@ int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
        struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
        int                      rc;
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_FID_INLMA))
+               return 0;
+
+       if (OBD_FAIL_CHECK(OBD_FAIL_FID_IGIF) && fid_is_client_visible(fid))
+               return 0;
+
        lustre_lma_init(lma, fid);
        lustre_lma_swab(lma);
 
@@ -2213,10 +2242,7 @@ int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
 void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param,
                                  const struct dt_rec *fid)
 {
-       /* XXX: replace the check with "!fid_is_client_mdt_visible()"
-        *      when FID in OI file introduced for local object. */
-       if (!fid_is_norm((const struct lu_fid *)fid) &&
-           !fid_is_igif((const struct lu_fid *)fid)) {
+       if (!fid_is_client_mdt_visible((const struct lu_fid *)fid)) {
                param->edp_magic = 0;
                return;
        }
@@ -2227,8 +2253,7 @@ void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param,
 }
 
 /**
- * Try to read the fid from inode ea into dt_rec, if return value
- * i.e. rc is +ve, then we got fid, otherwise we will have to form igif
+ * Try to read the fid from inode ea into dt_rec.
  *
  * \param fid object fid.
  *
@@ -2251,6 +2276,95 @@ static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj,
        RETURN(0);
 }
 
+static int osd_add_dot_dotdot_internal(struct osd_thread_info *info,
+                                       struct inode *dir,
+                                       struct inode  *parent_dir,
+                                       const struct dt_rec *dot_fid,
+                                       const struct dt_rec *dot_dot_fid,
+                                       struct osd_thandle *oth)
+{
+       struct ldiskfs_dentry_param *dot_ldp;
+       struct ldiskfs_dentry_param *dot_dot_ldp;
+
+       dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2;
+       osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
+
+       dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
+       dot_ldp->edp_magic = 0;
+       return ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir,
+                                       dir, dot_ldp, dot_dot_ldp);
+}
+
+/**
+ * Create an local inode for remote entry
+ */
+static struct inode *osd_create_remote_inode(const struct lu_env *env,
+                                            struct osd_device *osd,
+                                            struct osd_object *pobj,
+                                            const struct lu_fid *fid,
+                                            struct thandle *th)
+{
+       struct osd_thread_info  *info = osd_oti_get(env);
+       struct inode            *local;
+       struct osd_thandle      *oh;
+       int                     rc;
+       ENTRY;
+
+       LASSERT(th);
+       oh = container_of(th, struct osd_thandle, ot_super);
+       LASSERT(oh->ot_handle->h_transaction != NULL);
+
+       /* FIXME: Insert index api needs to know the mode of
+        * the remote object. Just use S_IFDIR for now */
+       local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode, S_IFDIR);
+       if (IS_ERR(local)) {
+               CERROR("%s: create local error %d\n", osd_name(osd),
+                      (int)PTR_ERR(local));
+               RETURN(local);
+       }
+
+       rc = osd_add_dot_dotdot_internal(info, local, pobj->oo_inode,
+               (const struct dt_rec *)lu_object_fid(&pobj->oo_dt.do_lu),
+               (const struct dt_rec *)fid, oh);
+       if (rc != 0) {
+               CERROR("%s: "DFID" add dot dotdot error: rc = %d\n",
+                       osd_name(osd), PFID(fid), rc);
+               RETURN(ERR_PTR(rc));
+       }
+
+       RETURN(local);
+}
+
+/**
+ * Delete local inode for remote entry
+ */
+static int osd_delete_remote_inode(const struct lu_env *env,
+                                  struct osd_device *osd,
+                                  const struct lu_fid *fid,
+                                   __u32 ino, struct osd_thandle *oh)
+{
+       struct osd_thread_info  *oti = osd_oti_get(env);
+       struct osd_inode_id     *id = &oti->oti_id;
+       struct inode            *inode;
+       ENTRY;
+
+       id->oii_ino = le32_to_cpu(ino);
+       id->oii_gen = OSD_OII_NOGEN;
+       inode = osd_iget(oti, osd, id);
+       if (IS_ERR(inode)) {
+               CERROR("%s: iget error "DFID" id %u:%u\n", osd_name(osd),
+                      PFID(fid), id->oii_ino, id->oii_gen);
+               RETURN(PTR_ERR(inode));
+       }
+
+       clear_nlink(inode);
+       mark_inode_dirty(inode);
+       CDEBUG(D_INODE, "%s: delete remote inode "DFID" %lu\n",
+               osd_name(osd), PFID(fid), inode->i_ino);
+       iput(inode);
+       RETURN(0);
+}
+
 /**
  * OSD layer object create function for interoperability mode (b11826).
  * This is mostly similar to osd_object_create(). Only difference being, fid is
@@ -2273,7 +2387,7 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
         ENTRY;
 
         LASSERT(osd_invariant(obj));
-        LASSERT(!dt_object_exists(dt));
+       LASSERT(!dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
@@ -2286,16 +2400,16 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
        osd_trans_declare_rb(env, th, OSD_OT_REF_ADD);
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
-        /* objects under osd root shld have igif fid, so dont add fid EA */
-       /* For ost object, the fid will be stored during first write */
-       if (result == 0 && fid_seq(fid) >= FID_SEQ_NORMAL &&
-           !fid_is_on_ost(info, osd_dt_dev(th->th_dev), fid))
+       if ((result == 0) &&
+           (fid_is_last_id(fid) ||
+            !fid_is_on_ost(info, osd_dt_dev(th->th_dev), fid)))
                result = osd_ea_fid_set(info, obj->oo_inode, fid);
 
         if (result == 0)
                 result = __osd_oi_insert(env, obj, fid, th);
 
-        LASSERT(ergo(result == 0, dt_object_exists(dt)));
+       LASSERT(ergo(result == 0,
+                    dt_object_exists(dt) && !dt_object_remote(dt)));
         LINVRNT(osd_invariant(obj));
         RETURN(result);
 }
@@ -2328,7 +2442,7 @@ static int osd_object_ref_add(const struct lu_env *env,
         struct inode      *inode = obj->oo_inode;
 
         LINVRNT(osd_invariant(obj));
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
@@ -2369,7 +2483,7 @@ static int osd_declare_object_ref_del(const struct lu_env *env,
 {
         struct osd_thandle *oh;
 
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(handle != NULL);
 
         oh = container_of0(handle, struct osd_thandle, ot_super);
@@ -2391,7 +2505,7 @@ static int osd_object_ref_del(const struct lu_env *env, struct dt_object *dt,
         struct inode      *inode = obj->oo_inode;
 
         LINVRNT(osd_invariant(obj));
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
@@ -2447,7 +2561,7 @@ static int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
                 return sizeof(dt_obj_version_t);
         }
 
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL);
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ))
@@ -2543,7 +2657,7 @@ static int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
         struct osd_thread_info *info   = osd_oti_get(env);
         struct dentry          *dentry = &info->oti_obj_dentry;
 
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(inode->i_op != NULL && inode->i_op->listxattr != NULL);
         LASSERT(osd_read_locked(env, obj) || osd_write_locked(env, obj));
 
@@ -2560,7 +2674,7 @@ static int osd_declare_xattr_del(const struct lu_env *env,
 {
         struct osd_thandle *oh;
 
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(handle != NULL);
 
         oh = container_of0(handle, struct osd_thandle, ot_super);
@@ -2585,7 +2699,7 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
         struct dentry          *dentry = &info->oti_obj_dentry;
         int                     rc;
 
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(inode->i_op != NULL && inode->i_op->removexattr != NULL);
         LASSERT(handle != NULL);
 
@@ -2619,7 +2733,7 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env,
         if (!dev->od_fl_capa)
                 RETURN(ERR_PTR(-ENOENT));
 
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LINVRNT(osd_invariant(obj));
 
         /* renewal sanity check */
@@ -2784,14 +2898,13 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
        struct osd_object       *obj = osd_dt_obj(dt);
 
         LINVRNT(osd_invariant(obj));
-        LASSERT(dt_object_exists(dt));
 
         if (osd_object_is_root(obj)) {
                 dt->do_index_ops = &osd_index_ea_ops;
                 result = 0;
        } else if (feat == &dt_directory_features) {
                 dt->do_index_ops = &osd_index_ea_ops;
-                if (S_ISDIR(obj->oo_inode->i_mode))
+               if (obj->oo_inode != NULL && S_ISDIR(obj->oo_inode->i_mode))
                         result = 0;
                 else
                         result = -ENOTDIR;
@@ -2971,7 +3084,7 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt,
         ENTRY;
 
         LINVRNT(osd_invariant(obj));
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(bag->ic_object == obj->oo_inode);
         LASSERT(handle != NULL);
 
@@ -3010,7 +3123,7 @@ static int osd_index_declare_ea_delete(const struct lu_env *env,
        int                 rc;
        ENTRY;
 
-       LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
        LASSERT(handle != NULL);
 
        oh = container_of0(handle, struct osd_thandle, ot_super);
@@ -3037,7 +3150,29 @@ static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de,
                 rec = (struct osd_fid_pack *) (de->name + de->name_len + 1);
                 rc = osd_fid_unpack((struct lu_fid *)fid, rec);
         }
-        RETURN(rc);
+       return rc;
+}
+
+static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd,
+                         struct lu_fid *fid)
+{
+       struct lu_seq_range     *range = &osd_oti_get(env)->oti_seq_range;
+       struct seq_server_site  *ss = osd_seq_site(osd);
+       int                     rc;
+       ENTRY;
+
+       /* Those FID seqs, which are not in FLDB, must be local seq */
+       if (unlikely(!fid_seq_in_fldb(fid_seq(fid)) || ss == NULL))
+               RETURN(0);
+
+       rc = osd_fld_lookup(env, osd, fid, range);
+       if (rc != 0) {
+               CERROR("%s: Can not lookup fld for "DFID"\n",
+                      osd_name(osd), PFID(fid));
+               RETURN(rc);
+       }
+
+       RETURN(ss->ss_node_id != range->lsr_index);
 }
 
 /**
@@ -3059,15 +3194,16 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt,
         struct inode               *dir    = obj->oo_inode;
         struct dentry              *dentry;
         struct osd_thandle         *oh;
-        struct ldiskfs_dir_entry_2 *de;
+       struct ldiskfs_dir_entry_2 *de = NULL;
         struct buffer_head         *bh;
         struct htree_lock          *hlock = NULL;
-        int                         rc;
-
+       struct lu_fid              *fid = &osd_oti_get(env)->oti_fid;
+       struct osd_device          *osd = osd_dev(dt->do_lu.lo_dev);
+       int                        rc;
         ENTRY;
 
         LINVRNT(osd_invariant(obj));
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(handle != NULL);
 
        osd_trans_exec_op(env, handle, OSD_OT_DELETE);
@@ -3104,6 +3240,30 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt,
         else
                up_write(&obj->oo_ext_idx_sem);
 
+       if (rc != 0)
+               GOTO(out, rc);
+
+       /* For inode on the remote MDT, .. will point to
+        * /Agent directory. So do not try to lookup/delete
+        * remote inode for .. */
+       if (strcmp((char *)key, dotdot) == 0)
+               GOTO(out, rc = 0);
+
+       LASSERT(de != NULL);
+       rc = osd_get_fid_from_dentry(de, (struct dt_rec *)fid);
+       if (rc == 0 && osd_remote_fid(env, osd, fid)) {
+               __u32 ino = le32_to_cpu(de->inode);
+
+               rc = osd_delete_remote_inode(env, osd, fid, ino, oh);
+               if (rc != 0)
+                       CERROR("%s: del local inode "DFID": rc = %d\n",
+                               osd_name(osd), PFID(fid), rc);
+       } else {
+               if (rc == -ENODATA)
+                       rc = 0;
+       }
+out:
+
         LASSERT(osd_invariant(obj));
         RETURN(rc);
 }
@@ -3134,7 +3294,7 @@ static int osd_index_iam_lookup(const struct lu_env *env, struct dt_object *dt,
         ENTRY;
 
         LASSERT(osd_invariant(obj));
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(bag->ic_object == obj->oo_inode);
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_LOOKUP))
@@ -3186,7 +3346,6 @@ static int osd_index_declare_iam_insert(const struct lu_env *env,
 {
         struct osd_thandle *oh;
 
-        LASSERT(dt_object_exists(dt));
         LASSERT(handle != NULL);
 
         oh = container_of0(handle, struct osd_thandle, ot_super);
@@ -3225,7 +3384,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         ENTRY;
 
         LINVRNT(osd_invariant(obj));
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(bag->ic_object == obj->oo_inode);
         LASSERT(th != NULL);
 
@@ -3319,8 +3478,6 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
                               struct thandle *th)
 {
         struct inode                *inode = dir->oo_inode;
-        struct ldiskfs_dentry_param *dot_ldp;
-        struct ldiskfs_dentry_param *dot_dot_ldp;
         struct osd_thandle          *oth;
         int result = 0;
 
@@ -3336,21 +3493,18 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
                         dir->oo_compat_dot_created = 1;
                         result = 0;
                 }
-        } else if(strcmp(name, dotdot) == 0) {
+       } else if (strcmp(name, dotdot) == 0) {
                if (!dir->oo_compat_dot_created)
                        return -EINVAL;
-
-               dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2;
-               osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
                /* in case of rename, dotdot is already created */
-               if (dir->oo_compat_dotdot_created)
+               if (dir->oo_compat_dotdot_created) {
                        return __osd_ea_add_rec(info, dir, parent_dir, name,
                                                dot_dot_fid, NULL, th);
+               }
 
-               dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
-               dot_ldp->edp_magic = 0;
-               result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir,
-                                               inode, dot_ldp, dot_dot_ldp);
+               result = osd_add_dot_dotdot_internal(info, dir->oo_inode,
+                                               parent_dir, dot_fid,
+                                               dot_dot_fid, oth);
                if (result == 0)
                        dir->oo_compat_dotdot_created = 1;
        }
@@ -3392,8 +3546,18 @@ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj,
                        down_write(&pobj->oo_ext_idx_sem);
                 }
 
-                rc = __osd_ea_add_rec(info, pobj, cinode, name, fid,
-                                      hlock, th);
+               if (OBD_FAIL_CHECK(OBD_FAIL_FID_INDIR)) {
+                       struct lu_fid *tfid = &info->oti_fid;
+
+                       *tfid = *(const struct lu_fid *)fid;
+                       tfid->f_ver = ~0;
+                       rc = __osd_ea_add_rec(info, pobj, cinode, name,
+                                             (const struct dt_rec *)tfid,
+                                             hlock, th);
+               } else {
+                       rc = __osd_ea_add_rec(info, pobj, cinode, name, fid,
+                                             hlock, th);
+               }
         }
         if (hlock != NULL)
                 ldiskfs_htree_unlock(hlock);
@@ -3452,6 +3616,31 @@ again:
        EXIT;
 }
 
+static int osd_fail_fid_lookup(struct osd_thread_info *oti,
+                              struct osd_device *dev,
+                              struct osd_idmap_cache *oic,
+                              struct lu_fid *fid, __u32 ino)
+{
+       struct lustre_mdt_attrs *lma   = &oti->oti_mdt_attrs;
+       struct inode            *inode;
+       int                      rc;
+
+       osd_id_gen(&oic->oic_lid, ino, OSD_OII_NOGEN);
+       inode = osd_iget(oti, dev, &oic->oic_lid);
+       if (IS_ERR(inode)) {
+               fid_zero(&oic->oic_fid);
+               return PTR_ERR(inode);
+       }
+
+       rc = osd_get_lma(oti, inode, &oti->oti_obj_dentry, lma);
+       iput(inode);
+       if (rc != 0)
+               fid_zero(&oic->oic_fid);
+       else
+               *fid = oic->oic_fid = lma->lma_self_fid;
+       return rc;
+}
+
 /**
  * Calls ->lookup() to find dentry. From dentry get inode and
  * read inode's ea to get fid. This is required for  interoperability
@@ -3489,28 +3678,37 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj,
         bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock);
         if (bh) {
                struct osd_thread_info *oti = osd_oti_get(env);
+               struct osd_inode_id *id = &oti->oti_id;
                struct osd_idmap_cache *oic = &oti->oti_cache;
                struct osd_device *dev = osd_obj2dev(obj);
                struct osd_scrub *scrub = &dev->od_scrub;
                struct scrub_file *sf = &scrub->os_file;
 
                ino = le32_to_cpu(de->inode);
+               if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP)) {
+                       brelse(bh);
+                       rc = osd_fail_fid_lookup(oti, dev, oic, fid, ino);
+                       GOTO(out, rc);
+               }
+
                rc = osd_get_fid_from_dentry(de, rec);
 
                /* done with de, release bh */
                brelse(bh);
                if (rc != 0)
-                       rc = osd_ea_fid_get(env, obj, ino, fid, &oic->oic_lid);
+                       rc = osd_ea_fid_get(env, obj, ino, fid, id);
                else
-                       osd_id_gen(&oic->oic_lid, ino, OSD_OII_NOGEN);
-               if (rc != 0) {
+                       osd_id_gen(id, ino, OSD_OII_NOGEN);
+               if (rc != 0 || osd_remote_fid(env, dev, fid)) {
                        fid_zero(&oic->oic_fid);
                        GOTO(out, rc);
                }
 
+               oic->oic_lid = *id;
                oic->oic_fid = *fid;
                if ((scrub->os_pos_current <= ino) &&
-                   (sf->sf_flags & SF_INCONSISTENT ||
+                   ((sf->sf_flags & SF_INCONSISTENT) ||
+                    (sf->sf_flags & SF_UPGRADE && fid_is_igif(fid)) ||
                     ldiskfs_test_bit(osd_oi_fid2idx(dev, fid),
                                      sf->sf_oi_bitmap)))
                        osd_consistency_check(oti, dev, oic);
@@ -3602,12 +3800,12 @@ static int osd_index_declare_ea_insert(const struct lu_env *env,
                                       struct thandle *handle)
 {
        struct osd_thandle      *oh;
-       struct inode            *inode;
+       struct osd_device       *osd   = osd_dev(dt->do_lu.lo_dev);
        struct lu_fid           *fid = (struct lu_fid *)rec;
        int                     rc;
        ENTRY;
 
-       LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
        LASSERT(handle != NULL);
 
        oh = container_of0(handle, struct osd_thandle, ot_super);
@@ -3616,24 +3814,39 @@ static int osd_index_declare_ea_insert(const struct lu_env *env,
        osd_trans_declare_op(env, oh, OSD_OT_INSERT,
                             osd_dto_credits_noquota[DTO_INDEX_INSERT]);
 
-       inode = osd_dt_obj(dt)->oo_inode;
-       LASSERT(inode);
+       if (osd_dt_obj(dt)->oo_inode == NULL) {
+               const char *name  = (const char *)key;
+               /* Object is not being created yet. Only happens when
+                *     1. declare directory create
+                *     2. declare insert .
+                *     3. declare insert ..
+                */
+               LASSERT(strcmp(name, dotdot) == 0 || strcmp(name, dot) == 0);
+       } else {
+               struct inode *inode = osd_dt_obj(dt)->oo_inode;
+
+               /* We ignore block quota on meta pool (MDTs), so needn't
+                * calculate how many blocks will be consumed by this index
+                * insert */
+               rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, 0,
+                                          oh, true, true, NULL, false);
+       }
 
-       /* We ignore block quota on meta pool (MDTs), so needn't
-        * calculate how many blocks will be consumed by this index
-        * insert */
-       rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, 0, oh,
-                                  true, true, NULL, false);
        if (fid == NULL)
                RETURN(0);
 
-       /* It does fld look up inside declare, and the result will be
-       * added to fld cache, so the following fld lookup inside insert
-       * does not need send RPC anymore, so avoid send rpc with holding
-       * transaction */
-       LASSERTF(fid_is_sane(fid), "fid is insane"DFID"\n", PFID(fid));
-       osd_fld_lookup(env, osd_dt_dev(handle->th_dev), fid,
-                       &osd_oti_get(env)->oti_seq_range);
+       rc = osd_remote_fid(env, osd, fid);
+       if (rc <= 0)
+               RETURN(rc);
+
+       rc = 0;
+
+       osd_trans_declare_op(env, oh, OSD_OT_CREATE,
+                            osd_dto_credits_noquota[DTO_OBJECT_CREATE]);
+       osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+                            osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
+       osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+                            osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
 
        RETURN(rc);
 }
@@ -3654,16 +3867,19 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
                                const struct dt_key *key, struct thandle *th,
                                struct lustre_capa *capa, int ignore_quota)
 {
-        struct osd_object *obj   = osd_dt_obj(dt);
-        struct lu_fid     *fid   = (struct lu_fid *) rec;
-        const char        *name  = (const char *)key;
-        struct osd_object *child;
-        int                rc;
-
-        ENTRY;
+       struct osd_object       *obj = osd_dt_obj(dt);
+       struct osd_device       *osd = osd_dev(dt->do_lu.lo_dev);
+       struct lu_fid           *fid = (struct lu_fid *) rec;
+       const char              *name = (const char *)key;
+       struct osd_thread_info  *oti   = osd_oti_get(env);
+       struct osd_inode_id     *id    = &oti->oti_id;
+       struct inode            *child_inode = NULL;
+       struct osd_object       *child = NULL;
+       int                     rc;
+       ENTRY;
 
         LASSERT(osd_invariant(obj));
-        LASSERT(dt_object_exists(dt));
+       LASSERT(dt_object_exists(dt) && !dt_object_remote(dt));
         LASSERT(th != NULL);
 
        osd_trans_exec_op(env, th, OSD_OT_INSERT);
@@ -3671,16 +3887,59 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
         if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT))
                 RETURN(-EACCES);
 
-        child = osd_object_find(env, dt, fid);
-        if (!IS_ERR(child)) {
-                rc = osd_ea_add_rec(env, obj, child->oo_inode, name, rec, th);
-                osd_object_put(env, child);
-        } else {
-                rc = PTR_ERR(child);
-        }
+       LASSERTF(fid_is_sane(fid), "fid"DFID" is insane!", PFID(fid));
 
-        LASSERT(osd_invariant(obj));
-        RETURN(rc);
+       rc = osd_remote_fid(env, osd, fid);
+       if (rc < 0) {
+               CERROR("%s: Can not find object "DFID" rc %d\n",
+                      osd_name(osd), PFID(fid), rc);
+               RETURN(rc);
+       }
+
+       if (rc == 1) {
+               /* Insert remote entry */
+               if (strcmp(name, dotdot) == 0 && strlen(name) == 2) {
+                       struct osd_mdobj_map    *omm = osd->od_mdt_map;
+                       struct osd_thandle      *oh;
+
+                       /* If parent on remote MDT, we need put this object
+                        * under AGENT */
+                       oh = container_of(th, typeof(*oh), ot_super);
+                       rc = osd_add_to_agent(env, osd, obj, oh);
+                       if (rc != 0) {
+                               CERROR("%s: add agent "DFID" error: rc = %d\n",
+                                      osd_name(osd),
+                                      PFID(lu_object_fid(&dt->do_lu)), rc);
+                               RETURN(rc);
+                       }
+
+                       child_inode = igrab(omm->omm_agent_dentry->d_inode);
+               } else {
+                       child_inode = osd_create_remote_inode(env, osd, obj,
+                                                             fid, th);
+                       if (IS_ERR(child_inode))
+                               RETURN(PTR_ERR(child_inode));
+               }
+       } else {
+               /* Insert local entry */
+               child = osd_object_find(env, dt, fid);
+               if (IS_ERR(child)) {
+                       CERROR("%s: Can not find object "DFID"%u:%u: rc = %d\n",
+                              osd_name(osd), PFID(fid),
+                              id->oii_ino, id->oii_gen,
+                              (int)PTR_ERR(child_inode));
+                       RETURN(PTR_ERR(child_inode));
+               }
+               child_inode = igrab(child->oo_inode);
+       }
+
+       rc = osd_ea_add_rec(env, obj, child_inode, name, rec, th);
+
+       iput(child_inode);
+       if (child != NULL)
+               osd_object_put(env, child);
+       LASSERT(osd_invariant(obj));
+       RETURN(rc);
 }
 
 /**
@@ -3763,7 +4022,6 @@ static int osd_it_iam_get(const struct lu_env *env,
  *
  *  \param  di      osd iterator
  */
-
 static void osd_it_iam_put(const struct lu_env *env, struct dt_it *di)
 {
         struct osd_it_iam *it = (struct osd_it_iam *)di;
@@ -3822,44 +4080,41 @@ static int osd_it_iam_key_size(const struct lu_env *env, const struct dt_it *di)
         return iam_it_key_size(&it->oi_it);
 }
 
-static inline void osd_it_append_attrs(struct lu_dirent *ent, __u32 attr,
-                                       int len, __u16 type)
+static inline void
+osd_it_append_attrs(struct lu_dirent *ent, int len, __u16 type)
 {
-        struct luda_type *lt;
-        const unsigned    align = sizeof(struct luda_type) - 1;
-
-        /* check if file type is required */
-        if (attr & LUDA_TYPE) {
-                        len = (len + align) & ~align;
+       /* check if file type is required */
+       if (ent->lde_attrs & LUDA_TYPE) {
+               int align = sizeof(struct luda_type) - 1;
+               struct luda_type *lt;
 
-                        lt = (void *) ent->lde_name + len;
-                        lt->lt_type = cpu_to_le16(CFS_DTTOIF(type));
-                        ent->lde_attrs |= LUDA_TYPE;
-        }
+               len = (len + align) & ~align;
+               lt = (struct luda_type *)(ent->lde_name + len);
+               lt->lt_type = cpu_to_le16(CFS_DTTOIF(type));
+       }
 
-        ent->lde_attrs = cpu_to_le32(ent->lde_attrs);
+       ent->lde_attrs = cpu_to_le32(ent->lde_attrs);
 }
 
 /**
  * build lu direct from backend fs dirent.
  */
 
-static inline void osd_it_pack_dirent(struct lu_dirent *ent,
-                                      struct lu_fid *fid, __u64 offset,
-                                      char *name, __u16 namelen,
-                                      __u16 type, __u32 attr)
+static inline void
+osd_it_pack_dirent(struct lu_dirent *ent, struct lu_fid *fid, __u64 offset,
+                  char *name, __u16 namelen, __u16 type, __u32 attr)
 {
-        fid_cpu_to_le(&ent->lde_fid, fid);
-        ent->lde_attrs = LUDA_FID;
+       ent->lde_attrs = attr | LUDA_FID;
+       fid_cpu_to_le(&ent->lde_fid, fid);
 
-        ent->lde_hash = cpu_to_le64(offset);
-        ent->lde_reclen = cpu_to_le16(lu_dirent_calc_size(namelen, attr));
+       ent->lde_hash = cpu_to_le64(offset);
+       ent->lde_reclen = cpu_to_le16(lu_dirent_calc_size(namelen, attr));
 
-        strncpy(ent->lde_name, name, namelen);
-        ent->lde_namelen = cpu_to_le16(namelen);
+       strncpy(ent->lde_name, name, namelen);
+       ent->lde_namelen = cpu_to_le16(namelen);
 
-        /* append lustre attributes */
-        osd_it_append_attrs(ent, attr, namelen, type);
+       /* append lustre attributes */
+       osd_it_append_attrs(ent, namelen, type);
 }
 
 /**
@@ -4227,10 +4482,337 @@ static int osd_it_ea_key_size(const struct lu_env *env, const struct dt_it *di)
         return it->oie_dirent->oied_namelen;
 }
 
+static int
+osd_dirent_update(handle_t *jh, struct super_block *sb,
+                 struct osd_it_ea_dirent *ent, struct lu_fid *fid,
+                 struct buffer_head *bh, struct ldiskfs_dir_entry_2 *de)
+{
+       struct osd_fid_pack *rec;
+       int                  rc;
+       ENTRY;
+
+       LASSERT(de->file_type & LDISKFS_DIRENT_LUFID);
+       LASSERT(de->rec_len >= de->name_len + sizeof(struct osd_fid_pack));
+
+       rc = ldiskfs_journal_get_write_access(jh, bh);
+       if (rc != 0) {
+               CERROR("%.16s: fail to write access for update dirent: "
+                      "name = %.*s, rc = %d\n",
+                      LDISKFS_SB(sb)->s_es->s_volume_name,
+                      ent->oied_namelen, ent->oied_name, rc);
+               RETURN(rc);
+       }
+
+       rec = (struct osd_fid_pack *)(de->name + de->name_len + 1);
+       fid_cpu_to_be((struct lu_fid *)rec->fp_area, fid);
+       rc = ldiskfs_journal_dirty_metadata(jh, bh);
+       if (rc != 0)
+               CERROR("%.16s: fail to dirty metadata for update dirent: "
+                      "name = %.*s, rc = %d\n",
+                      LDISKFS_SB(sb)->s_es->s_volume_name,
+                      ent->oied_namelen, ent->oied_name, rc);
+
+       RETURN(rc);
+}
+
+static inline int
+osd_dirent_has_space(__u16 reclen, __u16 namelen, unsigned blocksize)
+{
+       if (ldiskfs_rec_len_from_disk(reclen, blocksize) >=
+           __LDISKFS_DIR_REC_LEN(namelen + 1 + sizeof(struct osd_fid_pack)))
+               return 1;
+       else
+               return 0;
+}
+
+static int
+osd_dirent_reinsert(const struct lu_env *env, handle_t *jh,
+                   struct inode *dir, struct inode *inode,
+                   struct osd_it_ea_dirent *ent, struct lu_fid *fid,
+                   struct buffer_head *bh, struct ldiskfs_dir_entry_2 *de,
+                   struct htree_lock *hlock)
+{
+       struct dentry               *dentry;
+       struct osd_fid_pack         *rec;
+       struct ldiskfs_dentry_param *ldp;
+       int                          rc;
+       ENTRY;
+
+       if (!LDISKFS_HAS_INCOMPAT_FEATURE(inode->i_sb,
+                                         LDISKFS_FEATURE_INCOMPAT_DIRDATA))
+               RETURN(0);
+
+       /* There is enough space to hold the FID-in-dirent. */
+       if (osd_dirent_has_space(de->rec_len, ent->oied_namelen,
+                                dir->i_sb->s_blocksize)) {
+               rc = ldiskfs_journal_get_write_access(jh, bh);
+               if (rc != 0) {
+                       CERROR("%.16s: fail to write access for reinsert "
+                              "dirent: name = %.*s, rc = %d\n",
+                              LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                              ent->oied_namelen, ent->oied_name, rc);
+                       RETURN(rc);
+               }
+
+               de->name[de->name_len] = 0;
+               rec = (struct osd_fid_pack *)(de->name + de->name_len + 1);
+               rec->fp_len = sizeof(struct lu_fid) + 1;
+               fid_cpu_to_be((struct lu_fid *)rec->fp_area, fid);
+               de->file_type |= LDISKFS_DIRENT_LUFID;
+
+               rc = ldiskfs_journal_dirty_metadata(jh, bh);
+               if (rc != 0)
+                       CERROR("%.16s: fail to dirty metadata for reinsert "
+                              "dirent: name = %.*s, rc = %d\n",
+                              LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                              ent->oied_namelen, ent->oied_name, rc);
+
+               RETURN(rc);
+       }
+
+       rc = ldiskfs_delete_entry(jh, dir, de, bh);
+       if (rc != 0) {
+               CERROR("%.16s: fail to delete entry for reinsert dirent: "
+                      "name = %.*s, rc = %d\n",
+                      LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                      ent->oied_namelen, ent->oied_name, rc);
+               RETURN(rc);
+       }
+
+       dentry = osd_child_dentry_by_inode(env, dir, ent->oied_name,
+                                          ent->oied_namelen);
+       ldp = (struct ldiskfs_dentry_param *)osd_oti_get(env)->oti_ldp;
+       osd_get_ldiskfs_dirent_param(ldp, (const struct dt_rec *)fid);
+       dentry->d_fsdata = (void *)ldp;
+       ll_vfs_dq_init(dir);
+       rc = osd_ldiskfs_add_entry(jh, dentry, inode, hlock);
+       /* It is too bad, we cannot reinsert the name entry back.
+        * That means we lose it! */
+       if (rc != 0)
+               CERROR("%.16s: fail to insert entry for reinsert dirent: "
+                      "name = %.*s, rc = %d\n",
+                      LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                      ent->oied_namelen, ent->oied_name, rc);
+
+       RETURN(rc);
+}
+
+static int
+osd_dirent_check_repair(const struct lu_env *env, struct osd_object *obj,
+                       struct osd_it_ea *it, struct lu_fid *fid,
+                       struct osd_inode_id *id, __u32 *attr)
+{
+       struct osd_thread_info     *info        = osd_oti_get(env);
+       struct lustre_mdt_attrs    *lma         = &info->oti_mdt_attrs;
+       struct osd_device          *dev         = osd_obj2dev(obj);
+       struct super_block         *sb          = osd_sb(dev);
+       const char                 *devname     =
+                                       LDISKFS_SB(sb)->s_es->s_volume_name;
+       struct osd_it_ea_dirent    *ent         = it->oie_dirent;
+       struct inode               *dir         = obj->oo_inode;
+       struct htree_lock          *hlock       = NULL;
+       struct buffer_head         *bh          = NULL;
+       handle_t                   *jh          = NULL;
+       struct ldiskfs_dir_entry_2 *de;
+       struct dentry              *dentry;
+       struct inode               *inode;
+       int                         credits;
+       int                         rc;
+       bool                        dirty       = false;
+       bool                        is_dotdot   = false;
+       ENTRY;
+
+       if (ent->oied_name[0] == '.') {
+               /* Skip dot entry, even if it has stale FID-in-dirent, because
+                * we do not use such FID-in-dirent anymore, it is harmless. */
+               if (ent->oied_namelen == 1)
+                       RETURN(0);
+
+               if (ent->oied_namelen == 2 && ent->oied_name[1] == '.')
+                       is_dotdot = true;
+       }
+
+       dentry = osd_child_dentry_get(env, obj, ent->oied_name,
+                                     ent->oied_namelen);
+
+       /* We need to ensure that the name entry is still valid.
+        * Because it may be removed or renamed by other already.
+        *
+        * The unlink or rename operation will start journal before PDO lock,
+        * so to avoid deadlock, here we need to start journal handle before
+        * related PDO lock also. But because we do not know whether there
+        * will be something to be repaired before PDO lock, we just start
+        * journal without conditions.
+        *
+        * We may need to remove the name entry firstly, then insert back.
+        * One credit is for user quota file update.
+        * One credit is for group quota file update.
+        * Two credits are for dirty inode. */
+       credits = osd_dto_credits_noquota[DTO_INDEX_DELETE] +
+                 osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1 + 1 + 2;
+
+again:
+       if (dev->od_dirent_journal) {
+               jh = ldiskfs_journal_start_sb(sb, credits);
+               if (IS_ERR(jh)) {
+                       rc = PTR_ERR(jh);
+                       CERROR("%.16s: fail to start trans for dirent "
+                              "check_repair: credits %d, name %.*s, rc %d\n",
+                              devname, credits, ent->oied_namelen,
+                              ent->oied_name, rc);
+                       RETURN(rc);
+               }
+       }
+
+       if (obj->oo_hl_head != NULL) {
+               hlock = osd_oti_get(env)->oti_hlock;
+               ldiskfs_htree_lock(hlock, obj->oo_hl_head, dir,
+                                  LDISKFS_HLOCK_DEL);
+       } else {
+               down_write(&obj->oo_ext_idx_sem);
+       }
+
+       bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock);
+       /* For dotdot entry, if there is not enough space to hold FID-in-dirent,
+        * just keep it there. It only happens when the device upgraded from 1.8
+        * or restored from MDT file-level backup. For the whole directory, only
+        * dotdot entry has no FID-in-dirent and needs to get FID from LMA when
+        * readdir, it will not affect the performance much. */
+       if ((bh == NULL) || (le32_to_cpu(de->inode) != ent->oied_ino) ||
+           (is_dotdot && !osd_dirent_has_space(de->rec_len,
+                                               ent->oied_namelen,
+                                               sb->s_blocksize))) {
+               *attr |= LUDA_IGNORE;
+               GOTO(out_journal, rc = 0);
+       }
+
+       osd_id_gen(id, ent->oied_ino, OSD_OII_NOGEN);
+       inode = osd_iget(info, dev, id);
+       if (IS_ERR(inode)) {
+               rc = PTR_ERR(inode);
+               if (rc == -ENOENT || rc == -ESTALE) {
+                       *attr |= LUDA_IGNORE;
+                       rc = 0;
+               }
+
+               GOTO(out_journal, rc);
+       }
+
+       rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
+       if (rc == 0) {
+               if (fid_is_sane(fid)) {
+                       /* FID-in-dirent is valid. */
+                       if (lu_fid_eq(fid, &lma->lma_self_fid))
+                               GOTO(out_inode, rc = 0);
+
+                       /* Do not repair under dryrun mode. */
+                       if (*attr & LUDA_VERIFY_DRYRUN) {
+                               *attr |= LUDA_REPAIR;
+                               GOTO(out_inode, rc = 0);
+                       }
+
+                       if (!dev->od_dirent_journal) {
+                               iput(inode);
+                               brelse(bh);
+                               if (hlock != NULL)
+                                       ldiskfs_htree_unlock(hlock);
+                               else
+                                       up_write(&obj->oo_ext_idx_sem);
+                               dev->od_dirent_journal = 1;
+                               goto again;
+                       }
+
+                       *fid = lma->lma_self_fid;
+                       dirty = true;
+                       /* Update the FID-in-dirent. */
+                       rc = osd_dirent_update(jh, sb, ent, fid, bh, de);
+                       if (rc == 0)
+                               *attr |= LUDA_REPAIR;
+               } else {
+                       /* Do not repair under dryrun mode. */
+                       if (*attr & LUDA_VERIFY_DRYRUN) {
+                               *attr |= LUDA_REPAIR;
+                               GOTO(out_inode, rc = 0);
+                       }
+
+                       if (!dev->od_dirent_journal) {
+                               iput(inode);
+                               brelse(bh);
+                               if (hlock != NULL)
+                                       ldiskfs_htree_unlock(hlock);
+                               else
+                                       up_write(&obj->oo_ext_idx_sem);
+                               dev->od_dirent_journal = 1;
+                               goto again;
+                       }
+
+                       *fid = lma->lma_self_fid;
+                       dirty = true;
+                       /* Append the FID-in-dirent. */
+                       rc = osd_dirent_reinsert(env, jh, dir, inode, ent,
+                                                fid, bh, de, hlock);
+                       if (rc == 0)
+                               *attr |= LUDA_REPAIR;
+               }
+       } else if (rc == -ENODATA) {
+               /* Do not repair under dryrun mode. */
+               if (*attr & LUDA_VERIFY_DRYRUN) {
+                       if (fid_is_sane(fid))
+                               *attr |= LUDA_REPAIR;
+                       else
+                               *attr |= LUDA_UPGRADE;
+                       GOTO(out_inode, rc = 0);
+               }
+
+               if (!dev->od_dirent_journal) {
+                       iput(inode);
+                       brelse(bh);
+                       if (hlock != NULL)
+                               ldiskfs_htree_unlock(hlock);
+                       else
+                               up_write(&obj->oo_ext_idx_sem);
+                       dev->od_dirent_journal = 1;
+                       goto again;
+               }
+
+               dirty = true;
+               if (unlikely(fid_is_sane(fid))) {
+                       /* FID-in-dirent exists, but FID-in-LMA is lost.
+                        * Trust the FID-in-dirent, and add FID-in-LMA. */
+                       rc = osd_ea_fid_set(info, inode, fid);
+                       if (rc == 0)
+                               *attr |= LUDA_REPAIR;
+               } else {
+                       lu_igif_build(fid, inode->i_ino, inode->i_generation);
+                       /* It is probably IGIF object. Only aappend the
+                        * FID-in-dirent. OI scrub will process FID-in-LMA. */
+                       rc = osd_dirent_reinsert(env, jh, dir, inode, ent,
+                                                fid, bh, de, hlock);
+                       if (rc == 0)
+                               *attr |= LUDA_UPGRADE;
+               }
+       }
+
+       GOTO(out_inode, rc);
+
+out_inode:
+       iput(inode);
+
+out_journal:
+       brelse(bh);
+       if (hlock != NULL)
+               ldiskfs_htree_unlock(hlock);
+       else
+               up_write(&obj->oo_ext_idx_sem);
+       if (jh != NULL)
+               ldiskfs_journal_stop(jh);
+       if (rc >= 0 && !dirty)
+               dev->od_dirent_journal = 0;
+       return rc;
+}
 
 /**
- * Returns the value (i.e. fid/igif) at current position from iterator's
- * in memory structure.
+ * Returns the value at current position from iterator's in memory structure.
  *
  * \param di struct osd_it_ea, iterator's in memory structure
  * \param attr attr requested for dirent.
@@ -4249,6 +4831,7 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
        struct osd_scrub       *scrub = &dev->od_scrub;
        struct scrub_file      *sf    = &scrub->os_file;
        struct osd_thread_info *oti   = osd_oti_get(env);
+       struct osd_inode_id    *id    = &oti->oti_id;
        struct osd_idmap_cache *oic   = &oti->oti_cache;
        struct lu_fid          *fid   = &it->oie_dirent->oied_fid;
        struct lu_dirent       *lde   = (struct lu_dirent *)dtrec;
@@ -4256,23 +4839,48 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
        int                     rc    = 0;
        ENTRY;
 
-       if (!fid_is_sane(fid)) {
-               rc = osd_ea_fid_get(env, obj, ino, fid, &oic->oic_lid);
-               if (rc != 0) {
-                       fid_zero(&oic->oic_fid);
-                       RETURN(rc);
+       if (attr & LUDA_VERIFY) {
+               attr |= LUDA_TYPE;
+               if (unlikely(ino == osd_sb(dev)->s_root->d_inode->i_ino)) {
+                       attr |= LUDA_IGNORE;
+                       rc = 0;
+                       goto pack;
                }
+
+               rc = osd_dirent_check_repair(env, obj, it, fid, id, &attr);
        } else {
-               osd_id_gen(&oic->oic_lid, ino, OSD_OII_NOGEN);
+               attr &= ~LU_DIRENT_ATTRS_MASK;
+               if (!fid_is_sane(fid)) {
+                       if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP))
+                               RETURN(-ENOENT);
+
+                       rc = osd_ea_fid_get(env, obj, ino, fid, id);
+               } else {
+                       osd_id_gen(id, ino, OSD_OII_NOGEN);
+               }
        }
 
+       if (rc < 0)
+               RETURN(rc);
+
+pack:
        osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off,
                           it->oie_dirent->oied_name,
                           it->oie_dirent->oied_namelen,
                           it->oie_dirent->oied_type, attr);
-       oic->oic_fid = *fid;
-       if ((scrub->os_pos_current <= ino) &&
-           (sf->sf_flags & SF_INCONSISTENT ||
+
+       if (osd_remote_fid(env, dev, fid))
+               RETURN(0);
+
+       if (likely(!(attr & LUDA_IGNORE))) {
+               oic->oic_lid = *id;
+               oic->oic_fid = *fid;
+       }
+
+       if (!(attr & LUDA_VERIFY) &&
+           (scrub->os_pos_current <= ino) &&
+           ((sf->sf_flags & SF_INCONSISTENT) ||
+            (sf->sf_flags & SF_UPGRADE && fid_is_igif(fid)) ||
             ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap)))
                osd_consistency_check(oti, dev, oic);