Whamcloud - gitweb
LU-1866 osd: ancillary work for initial OI scrub
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index 6d0771e..b1edd8f 100644 (file)
 #include <obd_support.h>
 /* struct ptlrpc_thread */
 #include <lustre_net.h>
-
-/* fid_is_local() */
 #include <lustre_fid.h>
 
 #include "osd_internal.h"
-#include "osd_igif.h"
 
 /* llo_* api support */
 #include <md_object.h>
 #include <lustre_quota.h>
 
-#ifdef HAVE_LDISKFS_PDO
 int ldiskfs_pdo = 1;
 CFS_MODULE_PARM(ldiskfs_pdo, "i", int, 0644,
                 "ldiskfs with parallel directory operations");
-#else
-int ldiskfs_pdo = 0;
-#endif
 
 static const char dot[] = ".";
 static const char dotdot[] = "..";
@@ -93,6 +86,22 @@ static const struct dt_object_operations      osd_obj_otable_it_ops;
 static const struct dt_index_operations       osd_index_iam_ops;
 static const struct dt_index_operations       osd_index_ea_ops;
 
+#ifdef OSD_TRACK_DECLARES
+int osd_trans_declare_op2rb[] = {
+       [OSD_OT_ATTR_SET]       = OSD_OT_ATTR_SET,
+       [OSD_OT_PUNCH]          = OSD_OT_MAX,
+       [OSD_OT_XATTR_SET]      = OSD_OT_XATTR_SET,
+       [OSD_OT_CREATE]         = OSD_OT_DESTROY,
+       [OSD_OT_DESTROY]        = OSD_OT_CREATE,
+       [OSD_OT_REF_ADD]        = OSD_OT_REF_DEL,
+       [OSD_OT_REF_DEL]        = OSD_OT_REF_ADD,
+       [OSD_OT_WRITE]          = OSD_OT_WRITE,
+       [OSD_OT_INSERT]         = OSD_OT_DELETE,
+       [OSD_OT_DELETE]         = OSD_OT_INSERT,
+       [OSD_OT_QUOTA]          = OSD_OT_MAX,
+};
+#endif
+
 static int osd_has_index(const struct osd_object *obj)
 {
         return obj->oo_dt.do_index_ops != NULL;
@@ -161,14 +170,20 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env,
         }
 }
 
-static int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
-                      struct dentry *dentry, struct lustre_mdt_attrs *lma)
+static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
+                                 const char *name, void *buf, int len)
+{
+       dentry->d_inode = inode;
+       return inode->i_op->getxattr(dentry, name, buf, len);
+}
+
+int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
+               struct dentry *dentry, struct lustre_mdt_attrs *lma)
 {
        int rc;
 
-       dentry->d_inode = inode;
-       rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)lma,
-                                  sizeof(*lma));
+       rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA, (void *)lma,
+                            sizeof(*lma));
        if (rc == -ERANGE) {
                /* try with old lma size */
                rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA,
@@ -243,8 +258,9 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
        return inode;
 }
 
-struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
-                          struct osd_inode_id *id, struct lu_fid *fid)
+static struct inode *
+osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
+            struct osd_inode_id *id, struct lu_fid *fid)
 {
        struct lustre_mdt_attrs *lma   = &info->oti_mdt_attrs;
        struct inode            *inode;
@@ -258,7 +274,10 @@ struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
        if (rc == 0) {
                *fid = lma->lma_self_fid;
        } else if (rc == -ENODATA) {
-               LU_IGIF_BUILD(fid, inode->i_ino, inode->i_generation);
+               if (unlikely(inode == osd_sb(dev)->s_root->d_inode))
+                       lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
+               else
+                       lu_igif_build(fid, inode->i_ino, inode->i_generation);
        } else {
                iput(inode);
                inode = ERR_PTR(rc);
@@ -355,7 +374,7 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
                GOTO(out, result = 0);
 
        /* Search order: 3. OI files. */
-       result = osd_oi_lookup(info, dev, fid, id);
+       result = osd_oi_lookup(info, dev, fid, id, true);
        if (result == -ENOENT) {
                if (!fid_is_norm(fid) ||
                    !ldiskfs_test_bit(osd_oi_fid2idx(dev,fid),
@@ -445,17 +464,17 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l,
 
        LINVRNT(osd_invariant(obj));
 
+       if (fid_is_otable_it(&l->lo_header->loh_fid)) {
+               obj->oo_dt.do_ops = &osd_obj_otable_it_ops;
+               l->lo_header->loh_attr |= LOHA_EXISTS;
+               return 0;
+       }
+
        result = osd_fid_lookup(env, obj, lu_object_fid(l), conf);
        obj->oo_dt.do_body_ops = &osd_body_ops_new;
-       if (result == 0) {
-               if (obj->oo_inode != NULL) {
-                       osd_object_init0(obj);
-               } else if (fid_is_otable_it(&l->lo_header->loh_fid)) {
-                       obj->oo_dt.do_ops = &osd_obj_otable_it_ops;
-                       /* LFSCK iterator object is special without inode */
-                       l->lo_header->loh_attr |= LOHA_EXISTS;
-               }
-       }
+       if (result == 0 && obj->oo_inode != NULL)
+               osd_object_init0(obj);
+
        LINVRNT(osd_invariant(obj));
        return result;
 }
@@ -666,6 +685,11 @@ static struct thandle *osd_trans_create(const struct lu_env *env,
                 oti->oti_dev = osd_dt_dev(d);
                 CFS_INIT_LIST_HEAD(&oh->ot_dcb_list);
                 osd_th_alloced(oh);
+
+               memset(oti->oti_declare_ops, 0, OSD_OT_MAX);
+               memset(oti->oti_declare_ops_rb, 0, OSD_OT_MAX);
+               memset(oti->oti_declare_ops_cred, 0, OSD_OT_MAX);
+               oti->oti_rollback = false;
         }
         RETURN(th);
 }
@@ -704,22 +728,34 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d,
                      osd_journal(dev)->j_max_transaction_buffers);
 #ifdef OSD_TRACK_DECLARES
                CWARN("  create: %u/%u, delete: %u/%u, destroy: %u/%u\n",
-                     oh->ot_declare_create, oh->ot_declare_create_cred,
-                     oh->ot_declare_delete, oh->ot_declare_delete_cred,
-                     oh->ot_declare_destroy, oh->ot_declare_destroy_cred);
+                     oti->oti_declare_ops[OSD_OT_CREATE],
+                     oti->oti_declare_ops_cred[OSD_OT_CREATE],
+                     oti->oti_declare_ops[OSD_OT_DELETE],
+                     oti->oti_declare_ops_cred[OSD_OT_DELETE],
+                     oti->oti_declare_ops[OSD_OT_DESTROY],
+                     oti->oti_declare_ops_cred[OSD_OT_DESTROY]);
                CWARN("  attr_set: %u/%u, xattr_set: %u/%u\n",
-                     oh->ot_declare_attr_set, oh->ot_declare_attr_set_cred,
-                     oh->ot_declare_xattr_set, oh->ot_declare_xattr_set_cred);
+                     oti->oti_declare_ops[OSD_OT_ATTR_SET],
+                     oti->oti_declare_ops_cred[OSD_OT_ATTR_SET],
+                     oti->oti_declare_ops[OSD_OT_XATTR_SET],
+                     oti->oti_declare_ops_cred[OSD_OT_XATTR_SET]);
                CWARN("  write: %u/%u, punch: %u/%u, quota %u/%u\n",
-                     oh->ot_declare_write, oh->ot_declare_write_cred,
-                     oh->ot_declare_punch, oh->ot_declare_punch_cred,
-                     oh->ot_declare_quota, oh->ot_declare_quota_cred);
+                     oti->oti_declare_ops[OSD_OT_WRITE],
+                     oti->oti_declare_ops_cred[OSD_OT_WRITE],
+                     oti->oti_declare_ops[OSD_OT_PUNCH],
+                     oti->oti_declare_ops_cred[OSD_OT_PUNCH],
+                     oti->oti_declare_ops[OSD_OT_QUOTA],
+                     oti->oti_declare_ops_cred[OSD_OT_QUOTA]);
                CWARN("  insert: %u/%u, delete: %u/%u\n",
-                     oh->ot_declare_insert, oh->ot_declare_insert_cred,
-                     oh->ot_declare_delete, oh->ot_declare_destroy_cred);
+                     oti->oti_declare_ops[OSD_OT_INSERT],
+                     oti->oti_declare_ops_cred[OSD_OT_INSERT],
+                     oti->oti_declare_ops[OSD_OT_DESTROY],
+                     oti->oti_declare_ops_cred[OSD_OT_DESTROY]);
                CWARN("  ref_add: %u/%u, ref_del: %u/%u\n",
-                     oh->ot_declare_ref_add, oh->ot_declare_ref_add_cred,
-                     oh->ot_declare_ref_del, oh->ot_declare_ref_del_cred);
+                     oti->oti_declare_ops[OSD_OT_REF_ADD],
+                     oti->oti_declare_ops_cred[OSD_OT_REF_ADD],
+                     oti->oti_declare_ops[OSD_OT_REF_DEL],
+                     oti->oti_declare_ops_cred[OSD_OT_REF_DEL]);
 
                if (last_credits != oh->ot_credits &&
                    time_after(jiffies, last_printed + 60 * HZ)) {
@@ -1006,19 +1042,6 @@ static void osd_conf_get(const struct lu_env *env,
 
 }
 
-/**
- * Helper function to get and fill the buffer with input values.
- */
-static struct lu_buf *osd_buf_get(const struct lu_env *env, void *area, ssize_t len)
-{
-        struct lu_buf *buf;
-
-        buf = &osd_oti_get(env)->oti_buf;
-        buf->lb_buf = area;
-        buf->lb_len = len;
-        return buf;
-}
-
 /*
  * Concurrency: shouldn't matter.
  */
@@ -1399,8 +1422,8 @@ static int osd_declare_attr_set(const struct lu_env *env,
        oh = container_of0(handle, struct osd_thandle, ot_super);
        LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, attr_set,
-                      osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
+       osd_trans_declare_op(env, oh, OSD_OT_ATTR_SET,
+                            osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
 
        if (attr == NULL || obj->oo_inode == NULL)
                RETURN(rc);
@@ -1605,7 +1628,7 @@ static int osd_attr_set(const struct lu_env *env,
         if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE))
                 return -EACCES;
 
-        OSD_EXEC_OP(handle, attr_set);
+       osd_trans_exec_op(env, handle, OSD_OT_ATTR_SET);
 
         inode = obj->oo_inode;
        ll_vfs_dq_init(inode);
@@ -1666,7 +1689,10 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
                  * NB: don't need any lock because no contention at this
                  * early stage */
                 inode->i_flags |= S_NOCMTIME;
-               inode->i_state |= I_LUSTRE_NOSCRUB;
+
+               /* For new created object, it must be consistent,
+                * and it is unnecessary to scrub against it. */
+               ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
                 obj->oo_inode = inode;
                 result = 0;
         } else {
@@ -1918,6 +1944,44 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj,
        return osd_oi_insert(info, osd, fid, id, th);
 }
 
+int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
+                  const struct lu_fid *fid, struct lu_seq_range *range)
+{
+       struct seq_server_site  *ss = osd_seq_site(osd);
+       int                     rc;
+
+       if (fid_is_igif(fid)) {
+               range->lsr_flags = LU_SEQ_RANGE_MDT;
+               range->lsr_index = 0;
+               return 0;
+       }
+
+       if (fid_is_idif(fid)) {
+               range->lsr_flags = LU_SEQ_RANGE_OST;
+               range->lsr_index = fid_idif_ost_idx(fid);
+               return 0;
+       }
+
+       if (!fid_is_norm(fid)) {
+               range->lsr_flags = LU_SEQ_RANGE_MDT;
+               if (ss != NULL)
+                       /* FIXME: If ss is NULL, it suppose not get lsr_index
+                        * at all */
+                       range->lsr_index = ss->ss_node_id;
+               return 0;
+       }
+
+       LASSERT(ss != NULL);
+       range->lsr_flags = -1;
+       rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range);
+       if (rc != 0) {
+               CERROR("%s can not find "DFID": rc = %d\n",
+                      osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid), rc);
+       }
+       return rc;
+}
+
+
 static int osd_declare_object_create(const struct lu_env *env,
                                     struct dt_object *dt,
                                     struct lu_attr *attr,
@@ -1925,6 +1989,7 @@ static int osd_declare_object_create(const struct lu_env *env,
                                     struct dt_object_format *dof,
                                     struct thandle *handle)
 {
+       struct lu_seq_range     *range = &osd_oti_get(env)->oti_seq_range;
        struct osd_thandle      *oh;
        int                      rc;
        ENTRY;
@@ -1934,14 +1999,17 @@ static int osd_declare_object_create(const struct lu_env *env,
        oh = container_of0(handle, struct osd_thandle, ot_super);
        LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, create, osd_dto_credits_noquota[DTO_OBJECT_CREATE]);
+       osd_trans_declare_op(env, oh, OSD_OT_CREATE,
+                            osd_dto_credits_noquota[DTO_OBJECT_CREATE]);
        /* XXX: So far, only normal fid needs be inserted into the oi,
         *      things could be changed later. Revise following code then. */
-       if (fid_is_norm(lu_object_fid(&dt->do_lu))) {
+       if (fid_is_norm(lu_object_fid(&dt->do_lu)) &&
+           !fid_is_on_ost(osd_oti_get(env), osd_dt_dev(handle->th_dev),
+                          lu_object_fid(&dt->do_lu))) {
                /* Reuse idle OI block may cause additional one OI block
                 * to be changed. */
-               OSD_DECLARE_OP(oh, insert,
-                              osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
+               osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+                               osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
        }
        /* If this is directory, then we expect . and .. to be inserted as
         * well. The one directory block always needs to be created for the
@@ -1949,9 +2017,9 @@ static int osd_declare_object_create(const struct lu_env *env,
         * block), there is no danger of needing a tree for the first block.
         */
        if (attr && S_ISDIR(attr->la_mode)) {
-               OSD_DECLARE_OP(oh, insert,
-                              osd_dto_credits_noquota[DTO_WRITE_BASE]);
-               OSD_DECLARE_OP(oh, insert, 0);
+               osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+                                    osd_dto_credits_noquota[DTO_WRITE_BASE]);
+               osd_trans_declare_op(env, oh, OSD_OT_INSERT, 0);
        }
 
        if (!attr)
@@ -1959,6 +2027,19 @@ static int osd_declare_object_create(const struct lu_env *env,
 
        rc = osd_declare_inode_qid(env, attr->la_uid, attr->la_gid, 1, oh,
                                   false, false, NULL, false);
+       if (rc != 0)
+               RETURN(rc);
+
+       /* It does fld look up inside declare, and the result will be
+        * added to fld cache, so the following fld lookup inside insert
+        * does not need send RPC anymore, so avoid send rpc with holding
+        * transaction */
+       if (fid_is_norm(lu_object_fid(&dt->do_lu)) &&
+               !fid_is_last_id(lu_object_fid(&dt->do_lu)))
+               osd_fld_lookup(env, osd_dt_dev(handle->th_dev),
+                              lu_object_fid(&dt->do_lu), range);
+
+
        RETURN(rc);
 }
 
@@ -1985,7 +2066,8 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt,
                 * 'tune2fs -O quota' will take care of creating them */
                RETURN(-EPERM);
 
-        OSD_EXEC_OP(th, create);
+       osd_trans_exec_op(env, th, OSD_OT_CREATE);
+       osd_trans_declare_rb(env, th, OSD_OT_REF_ADD);
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
         if (result == 0)
@@ -2015,13 +2097,15 @@ static int osd_declare_object_destroy(const struct lu_env *env,
        LASSERT(oh->ot_handle == NULL);
        LASSERT(inode);
 
-       OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
+       osd_trans_declare_op(env, oh, OSD_OT_DELETE,
+                            osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
        /* XXX: So far, only normal fid needs to be inserted into the OI,
         *      so only normal fid needs to be removed from the OI also.
         * Recycle idle OI leaf may cause additional three OI blocks
         * to be changed. */
-       OSD_DECLARE_OP(oh, destroy, fid_is_norm(lu_object_fid(&dt->do_lu)) ?
-                       osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3 : 0);
+       osd_trans_declare_op(env, oh, OSD_OT_DESTROY,
+                            fid_is_norm(lu_object_fid(&dt->do_lu)) ?
+                            osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3 : 0);
 
        /* one less inode */
        rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, -1, oh,
@@ -2064,11 +2148,9 @@ static int osd_object_destroy(const struct lu_env *env,
                clear_nlink(inode);
                spin_unlock(&obj->oo_guard);
                inode->i_sb->s_op->dirty_inode(inode);
-       } else {
-               LASSERT(osd_inode_unlinked(inode));
        }
 
-        OSD_EXEC_OP(th, destroy);
+       osd_trans_exec_op(env, th, OSD_OT_DESTROY);
 
         result = osd_oi_delete(osd_oti_get(env), osd, fid, th);
        mutex_unlock(&inode->i_mutex);
@@ -2082,33 +2164,15 @@ static int osd_object_destroy(const struct lu_env *env,
         RETURN(0);
 }
 
-/**
- * Helper function for osd_xattr_set()
- */
-static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
-                           const struct lu_buf *buf, const char *name, int fl)
-{
-        struct osd_object      *obj      = osd_dt_obj(dt);
-        struct inode           *inode    = obj->oo_inode;
-        struct osd_thread_info *info     = osd_oti_get(env);
-        struct dentry          *dentry   = &info->oti_child_dentry;
-        int                     fs_flags = 0;
-        int                     rc;
-
-        LASSERT(dt_object_exists(dt));
-        LASSERT(inode->i_op != NULL && inode->i_op->setxattr != NULL);
-
-        if (fl & LU_XATTR_REPLACE)
-                fs_flags |= XATTR_REPLACE;
-
-        if (fl & LU_XATTR_CREATE)
-                fs_flags |= XATTR_CREATE;
+static inline int __osd_xattr_set(struct osd_thread_info *info,
+                                 struct inode *inode, const char *name,
+                                 const void *buf, int buflen, int fl)
+{
+       struct dentry *dentry = &info->oti_child_dentry;
 
        ll_vfs_dq_init(inode);
-        dentry->d_inode = inode;
-        rc = inode->i_op->setxattr(dentry, name, buf->lb_buf,
-                                   buf->lb_len, fs_flags);
-        return rc;
+       dentry->d_inode = inode;
+       return inode->i_op->setxattr(dentry, name, buf, buflen, fl);
 }
 
 /**
@@ -2124,15 +2188,17 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
 static int osd_ea_fid_set(const struct lu_env *env, struct dt_object *dt,
                           const struct lu_fid *fid)
 {
-        struct osd_thread_info  *info      = osd_oti_get(env);
-        struct lustre_mdt_attrs *mdt_attrs = &info->oti_mdt_attrs;
+       struct osd_thread_info  *info   = osd_oti_get(env);
+       struct inode            *inode  = osd_dt_obj(dt)->oo_inode;
+       struct lustre_mdt_attrs *lma    = &info->oti_mdt_attrs;
+       int                      rc;
 
-        lustre_lma_init(mdt_attrs, fid);
-        lustre_lma_swab(mdt_attrs);
-        return __osd_xattr_set(env, dt,
-                               osd_buf_get(env, mdt_attrs, sizeof *mdt_attrs),
-                               XATTR_NAME_LMA, LU_XATTR_CREATE);
+       lustre_lma_init(lma, fid);
+       lustre_lma_swab(lma);
 
+       rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma, sizeof(*lma),
+                            XATTR_CREATE);
+       return rc;
 }
 
 /**
@@ -2144,13 +2210,19 @@ static int osd_ea_fid_set(const struct lu_env *env, struct dt_object *dt,
  * its inmemory API.
  */
 void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param,
-                                  const struct dt_rec *fid)
+                                 const struct dt_rec *fid)
 {
-        param->edp_magic = LDISKFS_LUFID_MAGIC;
-        param->edp_len =  sizeof(struct lu_fid) + 1;
+       /* XXX: replace the check with "!fid_is_client_mdt_visible()"
+        *      when FID in OI file introduced for local object. */
+       if (!fid_is_norm((const struct lu_fid *)fid) &&
+           !fid_is_igif((const struct lu_fid *)fid)) {
+               param->edp_magic = 0;
+               return;
+       }
 
-        fid_cpu_to_be((struct lu_fid *)param->edp_data,
-                      (struct lu_fid *)fid);
+       param->edp_magic = LDISKFS_LUFID_MAGIC;
+       param->edp_len =  sizeof(struct lu_fid) + 1;
+       fid_cpu_to_be((struct lu_fid *)param->edp_data, (struct lu_fid *)fid);
 }
 
 /**
@@ -2209,11 +2281,14 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
                 * 'tune2fs -O quota' will take care of creating them */
                RETURN(-EPERM);
 
-        OSD_EXEC_OP(th, create);
+       osd_trans_exec_op(env, th, OSD_OT_CREATE);
+       osd_trans_declare_rb(env, th, OSD_OT_REF_ADD);
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
         /* objects under osd root shld have igif fid, so dont add fid EA */
-        if (result == 0 && fid_seq(fid) >= FID_SEQ_NORMAL)
+       /* For ost object, the fid will be stored during first write */
+       if (result == 0 && fid_seq(fid) >= FID_SEQ_NORMAL &&
+           !fid_is_on_ost(info, osd_dt_dev(th->th_dev), fid))
                 result = osd_ea_fid_set(env, dt, fid);
 
         if (result == 0)
@@ -2228,7 +2303,7 @@ static int osd_declare_object_ref_add(const struct lu_env *env,
                                       struct dt_object *dt,
                                       struct thandle *handle)
 {
-        struct osd_thandle *oh;
+       struct osd_thandle       *oh;
 
         /* it's possible that object doesn't exist yet */
         LASSERT(handle != NULL);
@@ -2236,7 +2311,8 @@ static int osd_declare_object_ref_add(const struct lu_env *env,
         oh = container_of0(handle, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, ref_add, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
+       osd_trans_declare_op(env, oh, OSD_OT_REF_ADD,
+                            osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
 
        return 0;
 }
@@ -2255,7 +2331,7 @@ static int osd_object_ref_add(const struct lu_env *env,
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
-        OSD_EXEC_OP(th, ref_add);
+       osd_trans_exec_op(env, th, OSD_OT_REF_ADD);
 
        /*
         * DIR_NLINK feature is set for compatibility reasons if:
@@ -2298,7 +2374,8 @@ static int osd_declare_object_ref_del(const struct lu_env *env,
         oh = container_of0(handle, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, ref_del, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
+       osd_trans_declare_op(env, oh, OSD_OT_REF_DEL,
+                            osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
 
        return 0;
 }
@@ -2317,7 +2394,7 @@ static int osd_object_ref_del(const struct lu_env *env, struct dt_object *dt,
         LASSERT(osd_write_locked(env, obj));
         LASSERT(th != NULL);
 
-        OSD_EXEC_OP(th, ref_del);
+       osd_trans_exec_op(env, th, OSD_OT_REF_DEL);
 
        spin_lock(&obj->oo_guard);
        LASSERT(inode->i_nlink > 0);
@@ -2375,8 +2452,7 @@ static int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
         if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ))
                 return -EACCES;
 
-        dentry->d_inode = inode;
-        return inode->i_op->getxattr(dentry, name, buf->lb_buf, buf->lb_len);
+       return __osd_xattr_get(inode, dentry, name, buf->lb_buf, buf->lb_len);
 }
 
 
@@ -2392,9 +2468,10 @@ static int osd_declare_xattr_set(const struct lu_env *env,
        oh = container_of0(handle, struct osd_thandle, ot_super);
        LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, xattr_set, strcmp(name, XATTR_NAME_VERSION) == 0 ?
-                      osd_dto_credits_noquota[DTO_ATTR_SET_BASE] :
-                      osd_dto_credits_noquota[DTO_XATTR_SET]);
+       osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET,
+                            strcmp(name, XATTR_NAME_VERSION) == 0 ?
+                            osd_dto_credits_noquota[DTO_ATTR_SET_BASE] :
+                            osd_dto_credits_noquota[DTO_XATTR_SET]);
 
        return 0;
 }
@@ -2424,6 +2501,11 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
                          const struct lu_buf *buf, const char *name, int fl,
                          struct thandle *handle, struct lustre_capa *capa)
 {
+       struct osd_object      *obj      = osd_dt_obj(dt);
+       struct inode           *inode    = obj->oo_inode;
+       struct osd_thread_info *info     = osd_oti_get(env);
+       int                     fs_flags = 0;
+
         LASSERT(handle != NULL);
 
         /* version set is not real XATTR */
@@ -2438,8 +2520,15 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
         if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE))
                 return -EACCES;
 
-        OSD_EXEC_OP(handle, xattr_set);
-        return __osd_xattr_set(env, dt, buf, name, fl);
+       osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
+       if (fl & LU_XATTR_REPLACE)
+               fs_flags |= XATTR_REPLACE;
+
+       if (fl & LU_XATTR_CREATE)
+               fs_flags |= XATTR_CREATE;
+
+       return __osd_xattr_set(info, inode, name, buf->lb_buf, buf->lb_len,
+                              fs_flags);
 }
 
 /*
@@ -2476,7 +2565,8 @@ static int osd_declare_xattr_del(const struct lu_env *env,
         oh = container_of0(handle, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, xattr_set, osd_dto_credits_noquota[DTO_XATTR_SET]);
+       osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET,
+                            osd_dto_credits_noquota[DTO_XATTR_SET]);
 
        return 0;
 }
@@ -2496,13 +2586,12 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
 
         LASSERT(dt_object_exists(dt));
         LASSERT(inode->i_op != NULL && inode->i_op->removexattr != NULL);
-        LASSERT(osd_write_locked(env, obj));
         LASSERT(handle != NULL);
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE))
                 return -EACCES;
 
-        OSD_EXEC_OP(handle, xattr_set);
+       osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
 
        ll_vfs_dq_init(inode);
         dentry->d_inode = inode;
@@ -2709,7 +2798,7 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
        } else if (unlikely(feat == &dt_otable_features)) {
                dt->do_index_ops = &osd_otable_ops;
                return 0;
-       } else if (feat == &dt_acct_features) {
+       } else if (unlikely(feat == &dt_acct_features)) {
                dt->do_index_ops = &osd_acct_index_ops;
                result = 0;
                skip_iam = 1;
@@ -2848,7 +2937,8 @@ static int osd_index_declare_iam_delete(const struct lu_env *env,
         oh = container_of0(handle, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_INDEX_DELETE]);
+       osd_trans_declare_op(env, oh, OSD_OT_DELETE,
+                            osd_dto_credits_noquota[DTO_INDEX_DELETE]);
 
        return 0;
 }
@@ -2887,7 +2977,7 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt,
         if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_DELETE))
                 RETURN(-EACCES);
 
-        OSD_EXEC_OP(handle, delete);
+       osd_trans_exec_op(env, handle, OSD_OT_DELETE);
 
         ipd = osd_idx_ipd_get(env, bag);
         if (unlikely(ipd == NULL))
@@ -2925,7 +3015,8 @@ static int osd_index_declare_ea_delete(const struct lu_env *env,
        oh = container_of0(handle, struct osd_thandle, ot_super);
        LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_INDEX_DELETE]);
+       osd_trans_declare_op(env, oh, OSD_OT_DELETE,
+                            osd_dto_credits_noquota[DTO_INDEX_DELETE]);
 
        inode = osd_dt_obj(dt)->oo_inode;
        LASSERT(inode);
@@ -2978,7 +3069,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt,
         LASSERT(dt_object_exists(dt));
         LASSERT(handle != NULL);
 
-        OSD_EXEC_OP(handle, delete);
+       osd_trans_exec_op(env, handle, OSD_OT_DELETE);
 
         oh = container_of(handle, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle != NULL);
@@ -3100,7 +3191,8 @@ static int osd_index_declare_iam_insert(const struct lu_env *env,
         oh = container_of0(handle, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, insert, osd_dto_credits_noquota[DTO_INDEX_INSERT]);
+       osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+                            osd_dto_credits_noquota[DTO_INDEX_INSERT]);
 
        return 0;
 }
@@ -3139,7 +3231,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT))
                RETURN(-EACCES);
 
-        OSD_EXEC_OP(th, insert);
+       osd_trans_exec_op(env, th, OSD_OT_INSERT);
 
         ipd = osd_idx_ipd_get(env, bag);
         if (unlikely(ipd == NULL))
@@ -3190,27 +3282,20 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
         oth = container_of(th, struct osd_thandle, ot_super);
         LASSERT(oth->ot_handle != NULL);
         LASSERT(oth->ot_handle->h_transaction != NULL);
-
-        child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
-
-        /* XXX: remove fid_is_igif() check here.
-         * IGIF check is just to handle insertion of .. when it is 'ROOT',
-         * it is IGIF now but needs FID in dir entry as well for readdir
-         * to work.
-         * LU-838 should fix that and remove fid_is_igif() check */
-        if (fid_is_igif((struct lu_fid *)fid) ||
-            fid_is_norm((struct lu_fid *)fid)) {
-                ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
-                osd_get_ldiskfs_dirent_param(ldp, fid);
-                child->d_fsdata = (void *)ldp;
-        } else {
-                child->d_fsdata = NULL;
-        }
        LASSERT(pobj->oo_inode);
+
+       ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
+       if (unlikely(pobj->oo_inode ==
+                    osd_sb(osd_obj2dev(pobj))->s_root->d_inode))
+               ldp->edp_magic = 0;
+       else
+               osd_get_ldiskfs_dirent_param(ldp, fid);
+       child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
+       child->d_fsdata = (void *)ldp;
        ll_vfs_dq_init(pobj->oo_inode);
-        rc = osd_ldiskfs_add_entry(oth->ot_handle, child, cinode, hlock);
+       rc = osd_ldiskfs_add_entry(oth->ot_handle, child, cinode, hlock);
 
-        RETURN(rc);
+       RETURN(rc);
 }
 
 /**
@@ -3251,31 +3336,25 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
                         result = 0;
                 }
         } else if(strcmp(name, dotdot) == 0) {
-                dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
-                dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2;
-
-                if (!dir->oo_compat_dot_created)
-                        return -EINVAL;
-                if (!fid_is_igif((struct lu_fid *)dot_fid)) {
-                        osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid);
-                        osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
-                } else {
-                        dot_ldp = NULL;
-                        dot_dot_ldp = NULL;
-                }
-                /* in case of rename, dotdot is already created */
-                if (dir->oo_compat_dotdot_created) {
-                        return __osd_ea_add_rec(info, dir, parent_dir, name,
-                                                dot_dot_fid, NULL, th);
-                }
-
-                result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir,
-                                                inode, dot_ldp, dot_dot_ldp);
-                if (result == 0)
-                       dir->oo_compat_dotdot_created = 1;
-        }
+               if (!dir->oo_compat_dot_created)
+                       return -EINVAL;
+
+               dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2;
+               osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
+               /* in case of rename, dotdot is already created */
+               if (dir->oo_compat_dotdot_created)
+                       return __osd_ea_add_rec(info, dir, parent_dir, name,
+                                               dot_dot_fid, NULL, th);
+
+               dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
+               dot_ldp->edp_magic = 0;
+               result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir,
+                                               inode, dot_ldp, dot_dot_ldp);
+               if (result == 0)
+                       dir->oo_compat_dotdot_created = 1;
+       }
 
-        return result;
+       return result;
 }
 
 
@@ -3338,7 +3417,7 @@ osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev,
                RETURN_EXIT;
 
 again:
-       rc = osd_oi_lookup(oti, dev, fid, id);
+       rc = osd_oi_lookup(oti, dev, fid, id, true);
        if (rc != 0 && rc != -ENOENT)
                RETURN_EXIT;
 
@@ -3391,6 +3470,7 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj,
         struct htree_lock          *hlock = NULL;
         int                         ino;
         int                         rc;
+       ENTRY;
 
         LASSERT(dir->i_op != NULL && dir->i_op->lookup != NULL);
 
@@ -3520,9 +3600,10 @@ static int osd_index_declare_ea_insert(const struct lu_env *env,
                                       const struct dt_key *key,
                                       struct thandle *handle)
 {
-       struct osd_thandle *oh;
-       struct inode       *inode;
-       int                 rc;
+       struct osd_thandle      *oh;
+       struct inode            *inode;
+       struct lu_fid           *fid = (struct lu_fid *)rec;
+       int                     rc;
        ENTRY;
 
        LASSERT(dt_object_exists(dt));
@@ -3531,7 +3612,8 @@ static int osd_index_declare_ea_insert(const struct lu_env *env,
        oh = container_of0(handle, struct osd_thandle, ot_super);
        LASSERT(oh->ot_handle == NULL);
 
-       OSD_DECLARE_OP(oh, insert, osd_dto_credits_noquota[DTO_INDEX_INSERT]);
+       osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+                            osd_dto_credits_noquota[DTO_INDEX_INSERT]);
 
        inode = osd_dt_obj(dt)->oo_inode;
        LASSERT(inode);
@@ -3541,6 +3623,17 @@ static int osd_index_declare_ea_insert(const struct lu_env *env,
         * insert */
        rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, 0, oh,
                                   true, true, NULL, false);
+       if (fid == NULL)
+               RETURN(0);
+
+       /* It does fld look up inside declare, and the result will be
+       * added to fld cache, so the following fld lookup inside insert
+       * does not need send RPC anymore, so avoid send rpc with holding
+       * transaction */
+       LASSERTF(fid_is_sane(fid), "fid is insane"DFID"\n", PFID(fid));
+       osd_fld_lookup(env, osd_dt_dev(handle->th_dev), fid,
+                       &osd_oti_get(env)->oti_seq_range);
+
        RETURN(rc);
 }
 
@@ -3572,6 +3665,8 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
         LASSERT(dt_object_exists(dt));
         LASSERT(th != NULL);
 
+       osd_trans_exec_op(env, th, OSD_OT_INSERT);
+
         if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT))
                 RETURN(-EACCES);
 
@@ -3973,6 +4068,7 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen,
                                unsigned d_type)
 {
         struct osd_it_ea        *it   = (struct osd_it_ea *)buf;
+       struct osd_object       *obj  = it->oie_obj;
         struct osd_it_ea_dirent *ent  = it->oie_dirent;
         struct lu_fid           *fid  = &ent->oied_fid;
         struct osd_fid_pack     *rec;
@@ -3988,16 +4084,23 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen,
             OSD_IT_EA_BUFSIZE)
                 RETURN(1);
 
-        if (d_type & LDISKFS_DIRENT_LUFID) {
-                rec = (struct osd_fid_pack*) (name + namelen + 1);
-
-                if (osd_fid_unpack(fid, rec) != 0)
-                        fid_zero(fid);
+       /* "." is just the object itself. */
+       if (namelen == 1 && name[0] == '.') {
+               *fid = obj->oo_dt.do_lu.lo_header->loh_fid;
+       } else if (d_type & LDISKFS_DIRENT_LUFID) {
+               rec = (struct osd_fid_pack*) (name + namelen + 1);
+               if (osd_fid_unpack(fid, rec) != 0)
+                       fid_zero(fid);
+       } else {
+               fid_zero(fid);
+       }
+       d_type &= ~LDISKFS_DIRENT_LUFID;
 
-                d_type &= ~LDISKFS_DIRENT_LUFID;
-        } else {
-                fid_zero(fid);
-        }
+       /* NOT export local root. */
+       if (unlikely(osd_sb(osd_obj2dev(obj))->s_root->d_inode->i_ino == ino)) {
+               ino = obj->oo_inode->i_ino;
+               *fid = obj->oo_dt.do_lu.lo_header->loh_fid;
+       }
 
         ent->oied_ino     = ino;
         ent->oied_off     = offset;
@@ -4440,6 +4543,8 @@ static int osd_mount(const struct lu_env *env,
                GOTO(out, rc = -EINVAL);
        }
 
+       ldiskfs_set_inode_state(osd_sb(o)->s_root->d_inode,
+                               LDISKFS_STATE_LUSTRE_NO_OI);
        if (lmd_flags & LMD_FLG_NOSCRUB)
                o->od_noscrub = 1;