Whamcloud - gitweb
LU-1405 osd: Kernel panic when running sanityn test_12
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index 4252479..827621b 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -90,194 +88,12 @@ static const char dot[] = ".";
 static const char dotdot[] = "..";
 static const char remote_obj_dir[] = "REM_OBJ_DIR";
 
-struct osd_directory {
-        struct iam_container od_container;
-        struct iam_descr     od_descr;
-};
-
-struct osd_object {
-        struct dt_object       oo_dt;
-        /**
-         * Inode for file system object represented by this osd_object. This
-         * inode is pinned for the whole duration of lu_object life.
-         *
-         * Not modified concurrently (either setup early during object
-         * creation, or assigned by osd_object_create() under write lock).
-         */
-        struct inode          *oo_inode;
-        /**
-         * to protect index ops.
-         */
-        struct htree_lock_head *oo_hl_head;
-        cfs_rw_semaphore_t     oo_ext_idx_sem;
-        cfs_rw_semaphore_t     oo_sem;
-        struct osd_directory  *oo_dir;
-        /** protects inode attributes. */
-        cfs_spinlock_t         oo_guard;
-        /**
-         * Following two members are used to indicate the presence of dot and
-         * dotdot in the given directory. This is required for interop mode
-         * (b11826).
-         */
-        int                    oo_compat_dot_created;
-        int                    oo_compat_dotdot_created;
-
-        const struct lu_env   *oo_owner;
-#ifdef CONFIG_LOCKDEP
-        struct lockdep_map     oo_dep_map;
-#endif
-};
-
 static const struct lu_object_operations      osd_lu_obj_ops;
-static const struct lu_device_operations      osd_lu_ops;
-static       struct lu_context_key            osd_key;
 static const struct dt_object_operations      osd_obj_ops;
 static const struct dt_object_operations      osd_obj_ea_ops;
-static const struct dt_body_operations        osd_body_ops;
-static const struct dt_body_operations        osd_body_ops_new;
 static const struct dt_index_operations       osd_index_iam_ops;
 static const struct dt_index_operations       osd_index_ea_ops;
 
-#define OSD_TRACK_DECLARES
-#ifdef OSD_TRACK_DECLARES
-#define OSD_DECLARE_OP(oh, op)   {                               \
-        LASSERT(oh->ot_handle == NULL);                          \
-        ((oh)->ot_declare_ ##op)++; }
-#define OSD_EXEC_OP(handle, op)     {                            \
-        struct osd_thandle *oh;                                  \
-        oh = container_of0(handle, struct osd_thandle, ot_super);\
-        if (((oh)->ot_declare_ ##op) > 0) {                      \
-                ((oh)->ot_declare_ ##op)--;                      \
-        }                                                        \
-        }
-#else
-#define OSD_DECLARE_OP(oh, op)
-#define OSD_EXEC_OP(oh, op)
-#endif
-
-/* There are at most 10 uid/gids are affected in a transaction, and
- * that's rename case:
- * - 2 for source parent uid & gid;
- * - 2 for source child uid & gid ('..' entry update when the child
- *   is directory);
- * - 2 for target parent uid & gid;
- * - 2 for target child uid & gid (if the target child exists);
- * - 2 for root uid & gid (last_rcvd, llog, etc);
- *
- * The 0 to (OSD_MAX_UGID_CNT - 1) bits of ot_id_type is for indicating
- * the id type of each id in the ot_id_array.
- */
-#define OSD_MAX_UGID_CNT        10
-
-struct osd_thandle {
-        struct thandle          ot_super;
-        handle_t               *ot_handle;
-        struct journal_callback ot_jcb;
-        cfs_list_t              ot_dcb_list;
-        /* Link to the device, for debugging. */
-        struct lu_ref_link     *ot_dev_link;
-        unsigned short          ot_credits;
-        unsigned short          ot_id_cnt;
-        unsigned short          ot_id_type;
-        uid_t                   ot_id_array[OSD_MAX_UGID_CNT];
-
-#ifdef OSD_TRACK_DECLARES
-        unsigned char           ot_declare_attr_set;
-        unsigned char           ot_declare_punch;
-        unsigned char           ot_declare_xattr_set;
-        unsigned char           ot_declare_create;
-        unsigned char           ot_declare_destroy;
-        unsigned char           ot_declare_ref_add;
-        unsigned char           ot_declare_ref_del;
-        unsigned char           ot_declare_write;
-        unsigned char           ot_declare_insert;
-        unsigned char           ot_declare_delete;
-#endif
-
-#if OSD_THANDLE_STATS
-        /** time when this handle was allocated */
-        cfs_time_t oth_alloced;
-
-        /** time when this thanle was started */
-        cfs_time_t oth_started;
-#endif
-};
-
-/**
- * Basic transaction credit op
- */
-enum dt_txn_op {
-        DTO_INDEX_INSERT,
-        DTO_INDEX_DELETE,
-        DTO_INDEX_UPDATE,
-        DTO_OBJECT_CREATE,
-        DTO_OBJECT_DELETE,
-        DTO_ATTR_SET_BASE,
-        DTO_XATTR_SET,
-        DTO_LOG_REC, /**< XXX temporary: dt layer knows nothing about llog. */
-        DTO_WRITE_BASE,
-        DTO_WRITE_BLOCK,
-        DTO_ATTR_SET_CHOWN,
-
-        DTO_NR
-};
-
-/*
- * Helpers.
- */
-static int lu_device_is_osd(const struct lu_device *d)
-{
-        return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &osd_lu_ops);
-}
-
-static struct osd_device *osd_dt_dev(const struct dt_device *d)
-{
-        LASSERT(lu_device_is_osd(&d->dd_lu_dev));
-        return container_of0(d, struct osd_device, od_dt_dev);
-}
-
-static struct osd_device *osd_dev(const struct lu_device *d)
-{
-        LASSERT(lu_device_is_osd(d));
-        return osd_dt_dev(container_of0(d, struct dt_device, dd_lu_dev));
-}
-
-static struct osd_device *osd_obj2dev(const struct osd_object *o)
-{
-        return osd_dev(o->oo_dt.do_lu.lo_dev);
-}
-
-static struct super_block *osd_sb(const struct osd_device *dev)
-{
-        return dev->od_mount->lmi_mnt->mnt_sb;
-}
-
-static int osd_object_is_root(const struct osd_object *obj)
-{
-        return osd_sb(osd_obj2dev(obj))->s_root->d_inode == obj->oo_inode;
-}
-
-static struct osd_object *osd_obj(const struct lu_object *o)
-{
-        LASSERT(lu_device_is_osd(o->lo_dev));
-        return container_of0(o, struct osd_object, oo_dt.do_lu);
-}
-
-static struct osd_object *osd_dt_obj(const struct dt_object *d)
-{
-        return osd_obj(&d->do_lu);
-}
-
-static struct lu_device *osd2lu_dev(struct osd_device *osd)
-{
-        return &osd->od_dt_dev.dd_lu_dev;
-}
-
-static journal_t *osd_journal(const struct osd_device *dev)
-{
-        return LDISKFS_SB(osd_sb(dev))->s_journal;
-}
-
 static int osd_has_index(const struct osd_object *obj)
 {
         return obj->oo_dt.do_index_ops != NULL;
@@ -292,8 +108,8 @@ static int osd_object_invariant(const struct lu_object *l)
 static inline void
 osd_push_ctxt(const struct lu_env *env, struct osd_ctxt *save)
 {
-        struct md_ucred    *uc = md_ucred(env);
-        struct cred        *tc;
+        struct md_ucred *uc = md_ucred(env);
+        struct cred     *tc;
 
         LASSERT(uc != NULL);
 
@@ -323,11 +139,6 @@ osd_pop_ctxt(struct osd_ctxt *save)
 }
 #endif
 
-static inline struct osd_thread_info *osd_oti_get(const struct lu_env *env)
-{
-        return lu_context_key_get(&env->le_ctx, &osd_key);
-}
-
 /*
  * Concurrency: doesn't matter
  */
@@ -351,10 +162,7 @@ static int osd_write_locked(const struct lu_env *env, struct osd_object *o)
 static int osd_root_get(const struct lu_env *env,
                         struct dt_device *dev, struct lu_fid *f)
 {
-        struct inode *inode;
-
-        inode = osd_sb(osd_dt_dev(dev))->s_root->d_inode;
-        LU_IGIF_BUILD(f, inode->i_ino, inode->i_generation);
+        lu_local_obj_fid(f, OSD_FS_ROOT_OID);
         return 0;
 }
 
@@ -368,8 +176,8 @@ static inline void osd_qid_set_type(struct osd_thandle *oh, int i, int type)
         oh->ot_id_type |= ((type == GRPQUOTA) ? (1 << i) : 0);
 }
 
-static void osd_declare_qid(struct dt_object *dt, struct osd_thandle *oh,
-                            int type, uid_t id, struct inode *inode)
+void osd_declare_qid(struct dt_object *dt, struct osd_thandle *oh,
+                     int type, uid_t id, struct inode *inode)
 {
 #ifdef CONFIG_QUOTA
         int i, allocated = 0;
@@ -441,23 +249,15 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env,
 /*
  * retrieve object from backend ext fs.
  **/
-static struct inode *osd_iget(struct osd_thread_info *info,
-                              struct osd_device *dev,
-                              const struct osd_inode_id *id)
+struct inode *osd_iget(struct osd_thread_info *info,
+                       struct osd_device *dev,
+                       const struct osd_inode_id *id)
 {
         struct inode *inode = NULL;
 
-#ifdef HAVE_EXT4_LDISKFS
         inode = ldiskfs_iget(osd_sb(dev), id->oii_ino);
-        if (IS_ERR(inode))
-        /* Newer kernels return an error instead of a NULL pointer */
-                inode = NULL;
-#else
-        inode = iget(osd_sb(dev), id->oii_ino);
-#endif
-        if (inode == NULL) {
-                CERROR("no inode\n");
-                inode = ERR_PTR(-EACCES);
+        if (IS_ERR(inode)) {
+                CERROR("Cannot get inode, rc = %li\n", PTR_ERR(inode));
         } else if (id->oii_gen != OSD_OII_NOGEN &&
                    inode->i_generation != id->oii_gen) {
                 iput(inode);
@@ -497,7 +297,7 @@ static int osd_fid_lookup(const struct lu_env *env,
 
         LINVRNT(osd_invariant(obj));
         LASSERT(obj->oo_inode == NULL);
-        LASSERTF(fid_is_sane(fid) || osd_fid_is_root(fid), DFID, PFID(fid));
+        LASSERTF(fid_is_sane(fid) || fid_is_idif(fid), DFID, PFID(fid));
         /*
          * This assertion checks that osd layer sees only local
          * fids. Unfortunately it is somewhat expensive (does a
@@ -508,17 +308,18 @@ static int osd_fid_lookup(const struct lu_env *env,
         ENTRY;
 
         info = osd_oti_get(env);
+        LASSERT(info);
         dev  = osd_dev(ldev);
         id   = &info->oti_id;
 
         if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT))
                 RETURN(-ENOENT);
 
-        result = osd_oi_lookup(info, osd_fid2oi(dev, fid), fid, id);
+        result = osd_oi_lookup(info, dev, fid, id);
         if (result != 0) {
                 if (result == -ENOENT)
                         result = 0;
-                goto out;
+                GOTO(out, result);
         }
 
         inode = osd_iget(info, dev, id);
@@ -531,7 +332,7 @@ static int osd_fid_lookup(const struct lu_env *env,
                  * place holders for objects yet to be created.
                  */
                 result = PTR_ERR(inode);
-                goto out;
+                GOTO(out, result);
         }
 
         obj->oo_inode = inode;
@@ -553,7 +354,6 @@ static int osd_fid_lookup(const struct lu_env *env,
         }
 out:
         LINVRNT(osd_invariant(obj));
-
         RETURN(result);
 }
 
@@ -606,30 +406,6 @@ static void osd_object_free(const struct lu_env *env, struct lu_object *l)
         OBD_FREE_PTR(obj);
 }
 
-/**
- * IAM Iterator
- */
-static struct iam_path_descr *osd_it_ipd_get(const struct lu_env *env,
-                                             const struct iam_container *bag)
-{
-        return bag->ic_descr->id_ops->id_ipd_alloc(bag,
-                                           osd_oti_get(env)->oti_it_ipd);
-}
-
-static struct iam_path_descr *osd_idx_ipd_get(const struct lu_env *env,
-                                              const struct iam_container *bag)
-{
-        return bag->ic_descr->id_ops->id_ipd_alloc(bag,
-                                           osd_oti_get(env)->oti_idx_ipd);
-}
-
-static void osd_ipd_put(const struct lu_env *env,
-                        const struct iam_container *bag,
-                        struct iam_path_descr *ipd)
-{
-        bag->ic_descr->id_ops->id_ipd_free(ipd);
-}
-
 /*
  * Concurrency: no concurrent access is possible that late in object
  * life-cycle.
@@ -778,9 +554,11 @@ static void osd_trans_commit_cb(struct journal_callback *jcb, int error)
 
         dt_txn_hook_commit(th);
 
-        /* call per-transaction callbacks if any */
-        cfs_list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage)
-                dcb->dcb_func(NULL, th, dcb, error);
+       /* call per-transaction callbacks if any */
+       cfs_list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage) {
+               cfs_list_del_init(&dcb->dcb_linkage);
+               dcb->dcb_func(NULL, th, dcb, error);
+       }
 
         lu_ref_del_at(&lud->ld_reference, oh->ot_dev_link, "osd-tx", th);
         lu_device_put(lud);
@@ -795,10 +573,14 @@ static struct thandle *osd_trans_create(const struct lu_env *env,
                                         struct dt_device *d)
 {
         struct osd_thread_info *oti = osd_oti_get(env);
+        struct osd_iobuf       *iobuf = &oti->oti_iobuf;
         struct osd_thandle     *oh;
         struct thandle         *th;
         ENTRY;
 
+        /* on pending IO in this thread should left from prev. request */
+        LASSERT(cfs_atomic_read(&iobuf->dr_numreqs) == 0);
+
         th = ERR_PTR(-ENOMEM);
         OBD_ALLOC_GFP(oh, sizeof *oh, CFS_ALLOC_IO);
         if (oh != NULL) {
@@ -910,6 +692,7 @@ static int osd_trans_stop(const struct lu_env *env, struct thandle *th)
         int                     rc = 0;
         struct osd_thandle     *oh;
         struct osd_thread_info *oti = osd_oti_get(env);
+        struct osd_iobuf       *iobuf = &oti->oti_iobuf;
 
         ENTRY;
 
@@ -956,6 +739,20 @@ static int osd_trans_stop(const struct lu_env *env, struct thandle *th)
                 OBD_FREE_PTR(oh);
         }
 
+        /* as we want IO to journal and data IO be concurrent, we don't block
+         * awaiting data IO completion in osd_do_bio(), instead we wait here
+         * once transaction is submitted to the journal. all reqular requests
+         * don't do direct IO (except read/write), thus this wait_event becomes
+         * no-op for them.
+         *
+         * IMPORTANT: we have to wait till any IO submited by the thread is
+         * completed otherwise iobuf may be corrupted by different request
+         */
+        cfs_wait_event(iobuf->dr_wait,
+                       cfs_atomic_read(&iobuf->dr_numreqs) == 0);
+        if (!rc)
+                rc = iobuf->dr_error;
+
         RETURN(rc);
 }
 
@@ -1026,24 +823,39 @@ static int osd_object_print(const struct lu_env *env, void *cookie,
  * Concurrency: shouldn't matter.
  */
 int osd_statfs(const struct lu_env *env, struct dt_device *d,
-               cfs_kstatfs_t *sfs)
+               struct obd_statfs *sfs)
 {
-        struct osd_device *osd = osd_dt_dev(d);
+        struct osd_device  *osd = osd_dt_dev(d);
         struct super_block *sb = osd_sb(osd);
+        struct kstatfs     *ksfs;
         int result = 0;
 
+        /* osd_lproc.c call this without env, allocate ksfs for that case */
+        if (unlikely(env == NULL)) {
+                OBD_ALLOC_PTR(ksfs);
+                if (ksfs == NULL)
+                        return -ENOMEM;
+        } else {
+                ksfs = &osd_oti_get(env)->oti_ksfs;
+        }
+
         cfs_spin_lock(&osd->od_osfs_lock);
         /* cache 1 second */
         if (cfs_time_before_64(osd->od_osfs_age, cfs_time_shift_64(-1))) {
-                result = ll_do_statfs(sb, &osd->od_kstatfs);
-                if (likely(result == 0)) /* N.B. statfs can't really fail */
+                result = ll_do_statfs(sb, ksfs);
+                if (likely(result == 0)) /* N.B. statfs can't really fail */
                         osd->od_osfs_age = cfs_time_current_64();
+                        statfs_pack(&osd->od_statfs, ksfs);
+                }
         }
 
         if (likely(result == 0))
-                *sfs = osd->od_kstatfs;
+                *sfs = osd->od_statfs;
         cfs_spin_unlock(&osd->od_osfs_lock);
 
+        if (unlikely(env == NULL))
+                OBD_FREE_PTR(ksfs);
+
         return result;
 }
 
@@ -1125,15 +937,16 @@ static int osd_commit_async(const struct lu_env *env,
  * Concurrency: shouldn't matter.
  */
 
-static void osd_ro(const struct lu_env *env, struct dt_device *d)
+static int osd_ro(const struct lu_env *env, struct dt_device *d)
 {
         struct super_block *sb = osd_sb(osd_dt_dev(d));
+        int rc;
         ENTRY;
 
         CERROR("*** setting device %s read-only ***\n", LUSTRE_OSD_NAME);
 
-        __lvfs_set_rdonly(sb->s_bdev, LDISKFS_SB(sb)->journal_bdev);
-        EXIT;
+        rc = __lvfs_set_rdonly(sb->s_bdev, LDISKFS_SB(sb)->journal_bdev);
+        RETURN(rc);
 }
 
 /*
@@ -1176,7 +989,7 @@ static void osd_init_quota_ctxt(const struct lu_env *env, struct dt_device *d,
  * Note: we do not count into QUOTA here.
  * If we mount with --data_journal we may need more.
  */
-static const int osd_dto_credits_noquota[DTO_NR] = {
+const int osd_dto_credits_noquota[DTO_NR] = {
         /**
          * Insert/Delete.
          * INDEX_EXTRA_TRANS_BLOCKS(8) +
@@ -1367,8 +1180,8 @@ static int capa_is_sane(const struct lu_env *env,
         RETURN(0);
 }
 
-static int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
-                           struct lustre_capa *capa, __u64 opc)
+int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
+                    struct lustre_capa *capa, __u64 opc)
 {
         const struct lu_fid *fid = lu_object_fid(&dt->do_lu);
         struct osd_device *dev = osd_dev(dt->do_lu.lo_dev);
@@ -1511,28 +1324,6 @@ static int osd_inode_setattr(const struct lu_env *env,
 
         LASSERT(!(bits & LA_TYPE)); /* Huh? You want too much. */
 
-#ifdef HAVE_QUOTA_SUPPORT
-        if ((bits & LA_UID && attr->la_uid != inode->i_uid) ||
-            (bits & LA_GID && attr->la_gid != inode->i_gid)) {
-                struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt;
-                struct iattr iattr;
-                int rc;
-
-                iattr.ia_valid = 0;
-                if (bits & LA_UID)
-                        iattr.ia_valid |= ATTR_UID;
-                if (bits & LA_GID)
-                        iattr.ia_valid |= ATTR_GID;
-                iattr.ia_uid = attr->la_uid;
-                iattr.ia_gid = attr->la_gid;
-                osd_push_ctxt(env, save);
-                rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
-                osd_pop_ctxt(save);
-                if (rc != 0)
-                        return rc;
-        }
-#endif
-
         if (bits & LA_ATIME)
                 inode->i_atime  = *osd_inode_time(env, inode, attr->la_atime);
         if (bits & LA_CTIME)
@@ -1577,6 +1368,7 @@ static int osd_attr_set(const struct lu_env *env,
                         struct lustre_capa *capa)
 {
         struct osd_object *obj = osd_dt_obj(dt);
+        struct inode      *inode;
         int rc;
 
         LASSERT(handle != NULL);
@@ -1588,12 +1380,35 @@ static int osd_attr_set(const struct lu_env *env,
 
         OSD_EXEC_OP(handle, attr_set);
 
+        inode = obj->oo_inode;
+#ifdef HAVE_QUOTA_SUPPORT
+        if ((attr->la_valid & LA_UID && attr->la_uid != inode->i_uid) ||
+            (attr->la_valid & LA_GID && attr->la_gid != inode->i_gid)) {
+                struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt;
+                struct iattr iattr;
+                int rc;
+
+                iattr.ia_valid = 0;
+                if (attr->la_valid & LA_UID)
+                        iattr.ia_valid |= ATTR_UID;
+                if (attr->la_valid & LA_GID)
+                        iattr.ia_valid |= ATTR_GID;
+                iattr.ia_uid = attr->la_uid;
+                iattr.ia_gid = attr->la_gid;
+                osd_push_ctxt(env, save);
+                rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
+                osd_pop_ctxt(save);
+                if (rc != 0)
+                        return rc;
+        }
+#endif
+
         cfs_spin_lock(&obj->oo_guard);
-        rc = osd_inode_setattr(env, obj->oo_inode, attr);
+        rc = osd_inode_setattr(env, inode, attr);
         cfs_spin_unlock(&obj->oo_guard);
 
         if (!rc)
-                obj->oo_inode->i_sb->s_op->dirty_inode(obj->oo_inode);
+                inode->i_sb->s_op->dirty_inode(inode);
         return rc;
 }
 
@@ -1617,27 +1432,13 @@ static int osd_create_post(struct osd_thread_info *info, struct osd_object *obj,
         return 0;
 }
 
-static struct dentry * osd_child_dentry_get(const struct lu_env *env,
-                                            struct osd_object *obj,
-                                            const char *name,
-                                            const int namelen)
+struct dentry *osd_child_dentry_get(const struct lu_env *env,
+                                    struct osd_object *obj,
+                                    const char *name, const int namelen)
 {
-        struct osd_thread_info *info   = osd_oti_get(env);
-        struct dentry *child_dentry = &info->oti_child_dentry;
-        struct dentry *obj_dentry = &info->oti_obj_dentry;
-
-        obj_dentry->d_inode = obj->oo_inode;
-        obj_dentry->d_sb = osd_sb(osd_obj2dev(obj));
-        obj_dentry->d_name.hash = 0;
-
-        child_dentry->d_name.hash = 0;
-        child_dentry->d_parent = obj_dentry;
-        child_dentry->d_name.name = name;
-        child_dentry->d_name.len = namelen;
-        return child_dentry;
+        return osd_child_dentry_by_inode(env, obj->oo_inode, name, namelen);
 }
 
-
 static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
                       cfs_umode_t mode,
                       struct dt_allocation_hint *hint,
@@ -1646,7 +1447,7 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
         int result;
         struct osd_device  *osd = osd_obj2dev(obj);
         struct osd_thandle *oth;
-        struct dt_object   *parent;
+        struct dt_object   *parent = NULL;
         struct inode       *inode;
 #ifdef HAVE_QUOTA_SUPPORT
         struct osd_ctxt    *save = &info->oti_ctxt;
@@ -1667,17 +1468,14 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
 
         if (hint && hint->dah_parent)
                 parent = hint->dah_parent;
-        else
-                parent = osd->od_obj_area;
-
-        LASSERT(parent != NULL);
-        LASSERT(osd_dt_obj(parent)->oo_inode->i_op != NULL);
 
 #ifdef HAVE_QUOTA_SUPPORT
         osd_push_ctxt(info->oti_env, save);
 #endif
         inode = ldiskfs_create_inode(oth->ot_handle,
-                                     osd_dt_obj(parent)->oo_inode, mode);
+                                     parent ? osd_dt_obj(parent)->oo_inode :
+                                              osd_sb(osd)->s_root->d_inode,
+                                     mode);
 #ifdef HAVE_QUOTA_SUPPORT
         osd_pop_ctxt(save);
 #endif
@@ -1902,7 +1700,7 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj,
         id->oii_ino = obj->oo_inode->i_ino;
         id->oii_gen = obj->oo_inode->i_generation;
 
-        return osd_oi_insert(info, osd_fid2oi(osd, fid), fid, id, th,
+        return osd_oi_insert(info, osd, fid, id, th,
                              uc->mu_cap & CFS_CAP_SYS_RESOURCE_MASK);
 }
 
@@ -1984,9 +1782,10 @@ static int osd_declare_object_destroy(const struct lu_env *env,
                                       struct dt_object *dt,
                                       struct thandle *th)
 {
-        struct osd_object      *obj = osd_dt_obj(dt);
-        struct inode           *inode = obj->oo_inode;
-        struct osd_thandle     *oh;
+        struct osd_object  *obj = osd_dt_obj(dt);
+        struct inode       *inode = obj->oo_inode;
+        struct osd_thandle *oh;
+
         ENTRY;
 
         oh = container_of0(th, struct osd_thandle, ot_super);
@@ -2034,8 +1833,7 @@ static int osd_object_destroy(const struct lu_env *env,
 
         OSD_EXEC_OP(th, destroy);
 
-        result = osd_oi_delete(osd_oti_get(env),
-                               osd_fid2oi(osd, fid), fid, th);
+        result = osd_oi_delete(osd_oti_get(env), osd, fid, th);
 
         /* XXX: add to ext3 orphan list */
         /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */
@@ -2057,7 +1855,7 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
         struct osd_thread_info *info     = osd_oti_get(env);
         struct dentry          *dentry   = &info->oti_child_dentry;
         int                     fs_flags = 0;
-        int  rc;
+        int                     rc;
 
         LASSERT(dt_object_exists(dt));
         LASSERT(inode->i_op != NULL && inode->i_op->setxattr != NULL);
@@ -2109,17 +1907,6 @@ static inline void osd_igif_get(const struct lu_env *env, struct inode  *inode,
 }
 
 /**
- * Helper function to pack the fid, ldiskfs stores fid in packed format.
- */
-void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid,
-                  struct lu_fid *befider)
-{
-        fid_cpu_to_be(befider, (struct lu_fid *)fid);
-        memcpy(pack->fp_area, befider, sizeof(*befider));
-        pack->fp_len =  sizeof(*befider) + 1;
-}
-
-/**
  * ldiskfs supports fid in dirent, it is passed in dentry->d_fsdata.
  * lustre 1.8 also uses d_fsdata for passing other info to ldiskfs.
  * To have compatilibility with 1.8 ldiskfs driver we need to have
@@ -2137,23 +1924,6 @@ void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param,
                       (struct lu_fid *)fid);
 }
 
-int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack)
-{
-        int result;
-
-        result = 0;
-        switch (pack->fp_len) {
-        case sizeof *fid + 1:
-                memcpy(fid, pack->fp_area, sizeof *fid);
-                fid_be_to_cpu(fid, fid);
-                break;
-        default:
-                CERROR("Unexpected packed fid size: %d\n", pack->fp_len);
-                result = -EIO;
-        }
-        return result;
-}
-
 /**
  * Try to read the fid from inode ea into dt_rec, if return value
  * i.e. rc is +ve, then we got fid, otherwise we will have to form igif
@@ -2222,15 +1992,15 @@ out:
  * \retval -ve, on error
  */
 static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
-                             struct lu_attr *attr,
-                             struct dt_allocation_hint *hint,
-                             struct dt_object_format *dof,
-                             struct thandle *th)
+                                struct lu_attr *attr,
+                                struct dt_allocation_hint *hint,
+                                struct dt_object_format *dof,
+                                struct thandle *th)
 {
         const struct lu_fid    *fid    = lu_object_fid(&dt->do_lu);
         struct osd_object      *obj    = osd_dt_obj(dt);
         struct osd_thread_info *info   = osd_oti_get(env);
-        int result;
+        int                     result;
 
         ENTRY;
 
@@ -2242,7 +2012,6 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
         OSD_EXEC_OP(th, create);
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
-
         /* objects under osd root shld have igif fid, so dont add fid EA */
         if (result == 0 && fid_seq(fid) >= FID_SEQ_NORMAL)
                 result = osd_ea_fid_set(env, dt, fid);
@@ -2256,8 +2025,8 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
 }
 
 static int osd_declare_object_ref_add(const struct lu_env *env,
-                               struct dt_object *dt,
-                               struct thandle *handle)
+                                      struct dt_object *dt,
+                                      struct thandle *handle)
 {
         struct osd_thandle *oh;
 
@@ -2277,11 +2046,10 @@ static int osd_declare_object_ref_add(const struct lu_env *env,
  * Concurrency: @dt is write locked.
  */
 static int osd_object_ref_add(const struct lu_env *env,
-                              struct dt_object *dt,
-                              struct thandle *th)
+                              struct dt_object *dt, struct thandle *th)
 {
         struct osd_object *obj = osd_dt_obj(dt);
-        struct inode *inode = obj->oo_inode;
+        struct inode      *inode = obj->oo_inode;
 
         LINVRNT(osd_invariant(obj));
         LASSERT(dt_object_exists(dt));
@@ -2307,7 +2075,7 @@ static int osd_object_ref_add(const struct lu_env *env,
                     inode->i_nlink == 2)
                         inode->i_nlink = 1;
         }
-        LASSERT(inode->i_nlink < LDISKFS_LINK_MAX);
+        LASSERT(inode->i_nlink <= LDISKFS_LINK_MAX);
         cfs_spin_unlock(&obj->oo_guard);
         inode->i_sb->s_op->dirty_inode(inode);
         LINVRNT(osd_invariant(obj));
@@ -2316,8 +2084,8 @@ static int osd_object_ref_add(const struct lu_env *env,
 }
 
 static int osd_declare_object_ref_del(const struct lu_env *env,
-                               struct dt_object *dt,
-                               struct thandle *handle)
+                                      struct dt_object *dt,
+                                      struct thandle *handle)
 {
         struct osd_thandle *oh;
 
@@ -2336,12 +2104,11 @@ static int osd_declare_object_ref_del(const struct lu_env *env,
 /*
  * Concurrency: @dt is write locked.
  */
-static int osd_object_ref_del(const struct lu_env *env,
-                              struct dt_object *dt,
+static int osd_object_ref_del(const struct lu_env *env, struct dt_object *dt,
                               struct thandle *th)
 {
         struct osd_object *obj = osd_dt_obj(dt);
-        struct inode *inode = obj->oo_inode;
+        struct inode      *inode = obj->oo_inode;
 
         LINVRNT(osd_invariant(obj));
         LASSERT(dt_object_exists(dt));
@@ -2382,10 +2149,8 @@ static int osd_object_version_get(const struct lu_env *env,
 /*
  * Concurrency: @dt is read locked.
  */
-static int osd_xattr_get(const struct lu_env *env,
-                         struct dt_object *dt,
-                         struct lu_buf *buf,
-                         const char *name,
+static int osd_xattr_get(const struct lu_env *env, struct dt_object *dt,
+                         struct lu_buf *buf, const char *name,
                          struct lustre_capa *capa)
 {
         struct osd_object      *obj    = osd_dt_obj(dt);
@@ -2414,7 +2179,8 @@ static int osd_xattr_get(const struct lu_env *env,
 }
 
 
-static int osd_declare_xattr_set(const struct lu_env *env, struct dt_object *dt,
+static int osd_declare_xattr_set(const struct lu_env *env,
+                                 struct dt_object *dt,
                                  const struct lu_buf *buf, const char *name,
                                  int fl, struct thandle *handle)
 {
@@ -2482,10 +2248,8 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
 /*
  * Concurrency: @dt is read locked.
  */
-static int osd_xattr_list(const struct lu_env *env,
-                          struct dt_object *dt,
-                          struct lu_buf *buf,
-                          struct lustre_capa *capa)
+static int osd_xattr_list(const struct lu_env *env, struct dt_object *dt,
+                          struct lu_buf *buf, struct lustre_capa *capa)
 {
         struct osd_object      *obj    = osd_dt_obj(dt);
         struct inode           *inode  = obj->oo_inode;
@@ -2504,9 +2268,8 @@ static int osd_xattr_list(const struct lu_env *env,
 }
 
 static int osd_declare_xattr_del(const struct lu_env *env,
-                                 struct dt_object *dt,
-                                const char *name,
-                                struct thandle *handle)
+                                 struct dt_object *dt, const char *name,
+                                 struct thandle *handle)
 {
         struct osd_thandle *oh;
 
@@ -2525,10 +2288,8 @@ static int osd_declare_xattr_del(const struct lu_env *env,
 /*
  * Concurrency: @dt is write locked.
  */
-static int osd_xattr_del(const struct lu_env *env,
-                         struct dt_object *dt,
-                         const char *name,
-                         struct thandle *handle,
+static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
+                         const char *name, struct thandle *handle,
                          struct lustre_capa *capa)
 {
         struct osd_object      *obj    = osd_dt_obj(dt);
@@ -2641,12 +2402,13 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env,
 
 static int osd_object_sync(const struct lu_env *env, struct dt_object *dt)
 {
-        int rc;
         struct osd_object      *obj    = osd_dt_obj(dt);
         struct inode           *inode  = obj->oo_inode;
         struct osd_thread_info *info   = osd_oti_get(env);
         struct dentry          *dentry = &info->oti_obj_dentry;
         struct file            *file   = &info->oti_file;
+        int                     rc;
+
         ENTRY;
 
         dentry->d_inode = inode;
@@ -2867,277 +2629,6 @@ static const struct dt_object_operations osd_obj_ea_ops = {
         .do_data_get          = osd_data_get,
 };
 
-/*
- * Body operations.
- */
-
-/*
- * XXX: Another layering violation for now.
- *
- * We don't want to use ->f_op->read methods, because generic file write
- *
- *         - serializes on ->i_sem, and
- *
- *         - does a lot of extra work like balance_dirty_pages(),
- *
- * which doesn't work for globally shared files like /last-received.
- */
-static int osd_ldiskfs_readlink(struct inode *inode, char *buffer, int buflen)
-{
-        struct ldiskfs_inode_info *ei = LDISKFS_I(inode);
-
-        memcpy(buffer, (char*)ei->i_data, buflen);
-
-        return  buflen;
-}
-
-static int osd_ldiskfs_read(struct inode *inode, void *buf, int size,
-                            loff_t *offs)
-{
-        struct buffer_head *bh;
-        unsigned long block;
-        int osize = size;
-        int blocksize;
-        int csize;
-        int boffs;
-        int err;
-
-        /* prevent reading after eof */
-        spin_lock(&inode->i_lock);
-        if (i_size_read(inode) < *offs + size) {
-                size = i_size_read(inode) - *offs;
-                spin_unlock(&inode->i_lock);
-                if (size < 0) {
-                        CDEBUG(D_EXT2, "size %llu is too short to read @%llu\n",
-                               i_size_read(inode), *offs);
-                        return -EBADR;
-                } else if (size == 0) {
-                        return 0;
-                }
-        } else {
-                spin_unlock(&inode->i_lock);
-        }
-
-        blocksize = 1 << inode->i_blkbits;
-
-        while (size > 0) {
-                block = *offs >> inode->i_blkbits;
-                boffs = *offs & (blocksize - 1);
-                csize = min(blocksize - boffs, size);
-                bh = ldiskfs_bread(NULL, inode, block, 0, &err);
-                if (!bh) {
-                        CERROR("can't read block: %d\n", err);
-                        return err;
-                }
-
-                memcpy(buf, bh->b_data + boffs, csize);
-                brelse(bh);
-
-                *offs += csize;
-                buf += csize;
-                size -= csize;
-        }
-        return osize;
-}
-
-static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt,
-                        struct lu_buf *buf, loff_t *pos,
-                        struct lustre_capa *capa)
-{
-        struct osd_object      *obj    = osd_dt_obj(dt);
-        struct inode           *inode  = obj->oo_inode;
-        int rc;
-
-        if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ))
-                RETURN(-EACCES);
-
-        /* Read small symlink from inode body as we need to maintain correct
-         * on-disk symlinks for ldiskfs.
-         */
-        if (S_ISLNK(obj->oo_dt.do_lu.lo_header->loh_attr) &&
-            (buf->lb_len <= sizeof (LDISKFS_I(inode)->i_data)))
-                rc = osd_ldiskfs_readlink(inode, buf->lb_buf, buf->lb_len);
-        else
-                rc = osd_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos);
-
-        return rc;
-}
-
-static int osd_ldiskfs_writelink(struct inode *inode, char *buffer, int buflen)
-{
-
-        memcpy((char*)&LDISKFS_I(inode)->i_data, (char *)buffer,
-               buflen);
-        LDISKFS_I(inode)->i_disksize = buflen;
-        i_size_write(inode, buflen);
-        inode->i_sb->s_op->dirty_inode(inode);
-
-        return 0;
-}
-
-static int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
-                                    loff_t *offs, handle_t *handle)
-{
-        struct buffer_head *bh = NULL;
-        loff_t offset = *offs;
-        loff_t new_size = i_size_read(inode);
-        unsigned long block;
-        int blocksize = 1 << inode->i_blkbits;
-        int err = 0;
-        int size;
-        int boffs;
-        int dirty_inode = 0;
-
-        while (bufsize > 0) {
-                if (bh != NULL)
-                        brelse(bh);
-
-                block = offset >> inode->i_blkbits;
-                boffs = offset & (blocksize - 1);
-                size = min(blocksize - boffs, bufsize);
-                bh = ldiskfs_bread(handle, inode, block, 1, &err);
-                if (!bh) {
-                        CERROR("can't read/create block: %d\n", err);
-                        break;
-                }
-
-                err = ldiskfs_journal_get_write_access(handle, bh);
-                if (err) {
-                        CERROR("journal_get_write_access() returned error %d\n",
-                               err);
-                        break;
-                }
-                LASSERTF(boffs + size <= bh->b_size,
-                         "boffs %d size %d bh->b_size %lu",
-                         boffs, size, (unsigned long)bh->b_size);
-                memcpy(bh->b_data + boffs, buf, size);
-                err = ldiskfs_journal_dirty_metadata(handle, bh);
-                if (err)
-                        break;
-
-                if (offset + size > new_size)
-                        new_size = offset + size;
-                offset += size;
-                bufsize -= size;
-                buf += size;
-        }
-        if (bh)
-                brelse(bh);
-
-        /* correct in-core and on-disk sizes */
-        if (new_size > i_size_read(inode)) {
-                spin_lock(&inode->i_lock);
-                if (new_size > i_size_read(inode))
-                        i_size_write(inode, new_size);
-                if (i_size_read(inode) > LDISKFS_I(inode)->i_disksize) {
-                        LDISKFS_I(inode)->i_disksize = i_size_read(inode);
-                        dirty_inode = 1;
-                }
-                spin_unlock(&inode->i_lock);
-                if (dirty_inode)
-                        inode->i_sb->s_op->dirty_inode(inode);
-        }
-
-        if (err == 0)
-                *offs = offset;
-        return err;
-}
-
-static ssize_t osd_declare_write(const struct lu_env *env, struct dt_object *dt,
-                                 const loff_t size, loff_t pos,
-                                 struct thandle *handle)
-{
-        struct osd_thandle *oh;
-        int credits;
-
-        LASSERT(handle != NULL);
-
-        oh = container_of0(handle, struct osd_thandle, ot_super);
-        LASSERT(oh->ot_handle == NULL);
-
-        /* XXX: size == 0 or INT_MAX indicating a catalog header update or
-         *      llog write, see comment in mdd_declare_llog_record().
-         *
-         *      This hack should be removed in 2.3
-         */
-        if (size == DECLARE_LLOG_REWRITE)
-                credits = 2;
-        else if (size == DECLARE_LLOG_WRITE)
-                credits = 6;
-        else
-                credits = osd_dto_credits_noquota[DTO_WRITE_BLOCK];
-
-        OSD_DECLARE_OP(oh, write);
-        oh->ot_credits += credits;
-
-        if (osd_dt_obj(dt)->oo_inode == NULL)
-                return 0;
-
-        osd_declare_qid(dt, oh, USRQUOTA, osd_dt_obj(dt)->oo_inode->i_uid,
-                        osd_dt_obj(dt)->oo_inode);
-        osd_declare_qid(dt, oh, GRPQUOTA, osd_dt_obj(dt)->oo_inode->i_gid,
-                        osd_dt_obj(dt)->oo_inode);
-        return 0;
-}
-
-static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
-                         const struct lu_buf *buf, loff_t *pos,
-                         struct thandle *handle, struct lustre_capa *capa,
-                         int ignore_quota)
-{
-        struct osd_object  *obj   = osd_dt_obj(dt);
-        struct inode       *inode = obj->oo_inode;
-        struct osd_thandle *oh;
-        ssize_t            result = 0;
-#ifdef HAVE_QUOTA_SUPPORT
-        cfs_cap_t           save = cfs_curproc_cap_pack();
-#endif
-
-        LASSERT(handle != NULL);
-
-        if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_WRITE))
-                RETURN(-EACCES);
-
-        oh = container_of(handle, struct osd_thandle, ot_super);
-        LASSERT(oh->ot_handle->h_transaction != NULL);
-#ifdef HAVE_QUOTA_SUPPORT
-        if (ignore_quota)
-                cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
-        else
-                cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
-#endif
-        /* Write small symlink to inode body as we need to maintain correct
-         * on-disk symlinks for ldiskfs.
-         */
-        if(S_ISLNK(obj->oo_dt.do_lu.lo_header->loh_attr) &&
-           (buf->lb_len < sizeof (LDISKFS_I(inode)->i_data)))
-                result = osd_ldiskfs_writelink(inode, buf->lb_buf, buf->lb_len);
-        else
-                result = osd_ldiskfs_write_record(inode, buf->lb_buf,
-                                                  buf->lb_len, pos,
-                                                  oh->ot_handle);
-#ifdef HAVE_QUOTA_SUPPORT
-        cfs_curproc_cap_unpack(save);
-#endif
-        if (result == 0)
-                result = buf->lb_len;
-        return result;
-}
-
-/*
- * in some cases we may need declare methods for objects being created
- * e.g., when we create symlink
- */
-static const struct dt_body_operations osd_body_ops_new = {
-        .dbo_declare_write = osd_declare_write,
-};
-
-static const struct dt_body_operations osd_body_ops = {
-        .dbo_read          = osd_read,
-        .dbo_declare_write = osd_declare_write,
-        .dbo_write         = osd_write
-};
-
 static int osd_index_declare_iam_delete(const struct lu_env *env,
                                         struct dt_object *dt,
                                         const struct dt_key *key,
@@ -3167,14 +2658,15 @@ static int osd_index_declare_iam_delete(const struct lu_env *env,
  */
 
 static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt,
-                                const struct dt_key *key, struct thandle *handle,
+                                const struct dt_key *key,
+                                struct thandle *handle,
                                 struct lustre_capa *capa)
 {
         struct osd_object     *obj = osd_dt_obj(dt);
         struct osd_thandle    *oh;
         struct iam_path_descr *ipd;
         struct iam_container  *bag = &obj->oo_dir->od_container;
-        int rc;
+        int                    rc;
 
         ENTRY;
 
@@ -3231,7 +2723,7 @@ static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de,
                                           struct dt_rec *fid)
 {
         struct osd_fid_pack *rec;
-        int rc = -ENODATA;
+        int                  rc = -ENODATA;
 
         if (de->file_type & LDISKFS_DIRENT_LUFID) {
                 rec = (struct osd_fid_pack *) (de->name + de->name_len + 1);
@@ -3251,7 +2743,8 @@ static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de,
  * \retval -ve, on error
  */
 static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt,
-                               const struct dt_key *key, struct thandle *handle,
+                               const struct dt_key *key,
+                               struct thandle *handle,
                                struct lustre_capa *capa)
 {
         struct osd_object          *obj    = osd_dt_obj(dt);
@@ -3261,8 +2754,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt,
         struct ldiskfs_dir_entry_2 *de;
         struct buffer_head         *bh;
         struct htree_lock          *hlock = NULL;
-
-        int rc;
+        int                         rc;
 
         ENTRY;
 
@@ -3322,13 +2814,14 @@ static int osd_index_iam_lookup(const struct lu_env *env, struct dt_object *dt,
                                 struct dt_rec *rec, const struct dt_key *key,
                                 struct lustre_capa *capa)
 {
-        struct osd_object     *obj = osd_dt_obj(dt);
-        struct iam_path_descr *ipd;
-        struct iam_container  *bag = &obj->oo_dir->od_container;
+        struct osd_object      *obj = osd_dt_obj(dt);
+        struct iam_path_descr  *ipd;
+        struct iam_container   *bag = &obj->oo_dir->od_container;
         struct osd_thread_info *oti = osd_oti_get(env);
         struct iam_iterator    *it = &oti->oti_idx_it;
-        struct iam_rec *iam_rec;
-        int rc;
+        struct iam_rec         *iam_rec;
+        int                     rc;
+
         ENTRY;
 
         LASSERT(osd_invariant(obj));
@@ -3398,9 +2891,9 @@ static int osd_index_declare_iam_insert(const struct lu_env *env,
  *      \retval -ve failure
  */
 static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
-                                const struct dt_rec *rec, const struct dt_key *key,
-                                struct thandle *th, struct lustre_capa *capa,
-                                int ignore_quota)
+                                const struct dt_rec *rec,
+                                const struct dt_key *key, struct thandle *th,
+                                struct lustre_capa *capa, int ignore_quota)
 {
         struct osd_object     *obj = osd_dt_obj(dt);
         struct iam_path_descr *ipd;
@@ -3410,8 +2903,8 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         cfs_cap_t              save = cfs_curproc_cap_pack();
 #endif
         struct osd_thread_info *oti = osd_oti_get(env);
-        struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp;
-        int rc;
+        struct iam_rec         *iam_rec = (struct iam_rec *)oti->oti_ldp;
+        int                     rc;
 
         ENTRY;
 
@@ -3461,17 +2954,14 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
  * \retval -ve, on error
  */
 static int __osd_ea_add_rec(struct osd_thread_info *info,
-                            struct osd_object *pobj,
-                            struct inode  *cinode,
-                            const char *name,
-                            const struct dt_rec *fid,
-                            struct htree_lock *hlock,
-                            struct thandle *th)
+                            struct osd_object *pobj, struct inode  *cinode,
+                            const char *name, const struct dt_rec *fid,
+                            struct htree_lock *hlock, struct thandle *th)
 {
         struct ldiskfs_dentry_param *ldp;
-        struct dentry      *child;
-        struct osd_thandle *oth;
-        int rc;
+        struct dentry               *child;
+        struct osd_thandle          *oth;
+        int                          rc;
 
         oth = container_of(th, struct osd_thandle, ot_super);
         LASSERT(oth->ot_handle != NULL);
@@ -3479,13 +2969,19 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
 
         child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
 
+        /* XXX: remove fid_is_igif() check here.
+         * IGIF check is just to handle insertion of .. when it is 'ROOT',
+         * it is IGIF now but needs FID in dir entry as well for readdir
+         * to work.
+         * LU-838 should fix that and remove fid_is_igif() check */
         if (fid_is_igif((struct lu_fid *)fid) ||
             fid_is_norm((struct lu_fid *)fid)) {
                 ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
                 osd_get_ldiskfs_dirent_param(ldp, fid);
-                child->d_fsdata = (void*) ldp;
-        } else
+                child->d_fsdata = (void *)ldp;
+        } else {
                 child->d_fsdata = NULL;
+        }
         rc = osd_ldiskfs_add_entry(oth->ot_handle, child, cinode, hlock);
 
         RETURN(rc);
@@ -3510,10 +3006,10 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
                               const struct dt_rec *dot_dot_fid,
                               struct thandle *th)
 {
-        struct inode            *inode  = dir->oo_inode;
+        struct inode                *inode = dir->oo_inode;
         struct ldiskfs_dentry_param *dot_ldp;
         struct ldiskfs_dentry_param *dot_dot_ldp;
-        struct osd_thandle      *oth;
+        struct osd_thandle          *oth;
         int result = 0;
 
         oth = container_of(th, struct osd_thandle, ot_super);
@@ -3534,7 +3030,7 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
 
                 if (!dir->oo_compat_dot_created)
                         return -EINVAL;
-                if (fid_seq((struct lu_fid *)dot_fid) >= FID_SEQ_NORMAL) {
+                if (!fid_is_igif((struct lu_fid *)dot_fid)) {
                         osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid);
                         osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
                 } else {
@@ -3561,16 +3057,13 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
  * It will call the appropriate osd_add* function and return the
  * value, return by respective functions.
  */
-static int osd_ea_add_rec(const struct lu_env *env,
-                          struct osd_object *pobj,
-                          struct inode *cinode,
-                          const char *name,
-                          const struct dt_rec *fid,
-                          struct thandle *th)
-{
-        struct osd_thread_info    *info   = osd_oti_get(env);
-        struct htree_lock         *hlock;
-        int rc;
+static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj,
+                          struct inode *cinode, const char *name,
+                          const struct dt_rec *fid, struct thandle *th)
+{
+        struct osd_thread_info *info   = osd_oti_get(env);
+        struct htree_lock      *hlock;
+        int                     rc;
 
         hlock = pobj->oo_hl_head != NULL ? info->oti_hlock : NULL;
 
@@ -3621,8 +3114,8 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj,
         struct buffer_head         *bh;
         struct lu_fid              *fid = (struct lu_fid *) rec;
         struct htree_lock          *hlock = NULL;
-        int ino;
-        int rc;
+        int                         ino;
+        int                         rc;
 
         LASSERT(dir->i_op != NULL && dir->i_op->lookup != NULL);
 
@@ -3669,10 +3162,10 @@ struct osd_object *osd_object_find(const struct lu_env *env,
                                    struct dt_object *dt,
                                    const struct lu_fid *fid)
 {
-        struct lu_device         *ludev = dt->do_lu.lo_dev;
-        struct osd_object        *child = NULL;
-        struct lu_object         *luch;
-        struct lu_object         *lo;
+        struct lu_device  *ludev = dt->do_lu.lo_dev;
+        struct osd_object *child = NULL;
+        struct lu_object  *luch;
+        struct lu_object  *lo;
 
         luch = lu_object_find(env, ludev, fid, NULL);
         if (!IS_ERR(luch)) {
@@ -3755,14 +3248,14 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
                                const struct dt_key *key, struct thandle *th,
                                struct lustre_capa *capa, int ignore_quota)
 {
-        struct osd_object        *obj   = osd_dt_obj(dt);
-        struct lu_fid            *fid   = (struct lu_fid *) rec;
-        const char               *name  = (const char *)key;
-        struct osd_object        *child;
+        struct osd_object *obj   = osd_dt_obj(dt);
+        struct lu_fid     *fid   = (struct lu_fid *) rec;
+        const char        *name  = (const char *)key;
+        struct osd_object *child;
 #ifdef HAVE_QUOTA_SUPPORT
-        cfs_cap_t                 save  = cfs_curproc_cap_pack();
+        cfs_cap_t          save  = cfs_curproc_cap_pack();
 #endif
-        int rc;
+        int                rc;
 
         ENTRY;
 
@@ -3805,12 +3298,12 @@ static struct dt_it *osd_it_iam_init(const struct lu_env *env,
                                      __u32 unused,
                                      struct lustre_capa *capa)
 {
-        struct osd_it_iam         *it;
+        struct osd_it_iam      *it;
         struct osd_thread_info *oti = osd_oti_get(env);
-        struct osd_object     *obj = osd_dt_obj(dt);
-        struct lu_object      *lo  = &dt->do_lu;
-        struct iam_path_descr *ipd;
-        struct iam_container  *bag = &obj->oo_dir->od_container;
+        struct osd_object      *obj = osd_dt_obj(dt);
+        struct lu_object       *lo  = &dt->do_lu;
+        struct iam_path_descr  *ipd;
+        struct iam_container   *bag = &obj->oo_dir->od_container;
 
         LASSERT(lu_object_exists(lo));
 
@@ -3835,7 +3328,7 @@ static struct dt_it *osd_it_iam_init(const struct lu_env *env,
 
 static void osd_it_iam_fini(const struct lu_env *env, struct dt_it *di)
 {
-        struct osd_it_iam     *it = (struct osd_it_iam *)di;
+        struct osd_it_iam *it = (struct osd_it_iam *)di;
         struct osd_object *obj = it->oi_obj;
 
         iam_it_fini(&it->oi_it);
@@ -3855,7 +3348,7 @@ static void osd_it_iam_fini(const struct lu_env *env, struct dt_it *di)
  */
 
 static int osd_it_iam_get(const struct lu_env *env,
-                      struct dt_it *di, const struct dt_key *key)
+                          struct dt_it *di, const struct dt_key *key)
 {
         struct osd_it_iam *it = (struct osd_it_iam *)di;
 
@@ -3915,13 +3408,11 @@ static int osd_it_iam_key_size(const struct lu_env *env, const struct dt_it *di)
         return iam_it_key_size(&it->oi_it);
 }
 
-static inline void osd_it_append_attrs(struct lu_dirent*ent,
-                                       __u32 attr,
-                                       int len,
-                                       __u16 type)
+static inline void osd_it_append_attrs(struct lu_dirent *ent, __u32 attr,
+                                       int len, __u16 type)
 {
-        struct luda_type        *lt;
-        const unsigned           align = sizeof(struct luda_type) - 1;
+        struct luda_type *lt;
+        const unsigned    align = sizeof(struct luda_type) - 1;
 
         /* check if file type is required */
         if (attr & LUDA_TYPE) {
@@ -3940,12 +3431,9 @@ static inline void osd_it_append_attrs(struct lu_dirent*ent,
  */
 
 static inline void osd_it_pack_dirent(struct lu_dirent *ent,
-                                      struct lu_fid *fid,
-                                      __u64 offset,
-                                      char *name,
-                                      __u16 namelen,
-                                      __u16 type,
-                                      __u32 attr)
+                                      struct lu_fid *fid, __u64 offset,
+                                      char *name, __u16 namelen,
+                                      __u16 type, __u32 attr)
 {
         fid_cpu_to_le(&ent->lde_fid, fid);
         ent->lde_attrs = LUDA_FID;
@@ -3965,8 +3453,7 @@ static inline void osd_it_pack_dirent(struct lu_dirent *ent,
  */
 static int osd_it_iam_rec(const struct lu_env *env,
                           const struct dt_it *di,
-                          struct dt_rec *dtrec,
-                          __u32 attr)
+                          struct dt_rec *dtrec, __u32 attr)
 {
         struct osd_it_iam *it        = (struct osd_it_iam *)di;
         struct osd_thread_info *info = osd_oti_get(env);
@@ -4023,7 +3510,7 @@ static __u64 osd_it_iam_store(const struct lu_env *env, const struct dt_it *di)
  */
 
 static int osd_it_iam_load(const struct lu_env *env,
-                       const struct dt_it *di, __u64 hash)
+                           const struct dt_it *di, __u64 hash)
 {
         struct osd_it_iam *it = (struct osd_it_iam *)di;
 
@@ -4288,8 +3775,8 @@ static struct dt_key *osd_it_ea_key(const struct lu_env *env,
                                     const struct dt_it *di)
 {
         struct osd_it_ea *it = (struct osd_it_ea *)di;
-        ENTRY;
-        RETURN((struct dt_key *)it->oie_dirent->oied_name);
+
+        return (struct dt_key *)it->oie_dirent->oied_name;
 }
 
 /**
@@ -4302,8 +3789,8 @@ static struct dt_key *osd_it_ea_key(const struct lu_env *env,
 static int osd_it_ea_key_size(const struct lu_env *env, const struct dt_it *di)
 {
         struct osd_it_ea *it = (struct osd_it_ea *)di;
-        ENTRY;
-        RETURN(it->oie_dirent->oied_namelen);
+
+        return it->oie_dirent->oied_namelen;
 }
 
 
@@ -4320,8 +3807,7 @@ static int osd_it_ea_key_size(const struct lu_env *env, const struct dt_it *di)
  */
 static inline int osd_it_ea_rec(const struct lu_env *env,
                                 const struct dt_it *di,
-                                struct dt_rec *dtrec,
-                                __u32 attr)
+                                struct dt_rec *dtrec, __u32 attr)
 {
         struct osd_it_ea        *it     = (struct osd_it_ea *)di;
         struct osd_object       *obj    = it->oie_obj;
@@ -4354,8 +3840,8 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
 static __u64 osd_it_ea_store(const struct lu_env *env, const struct dt_it *di)
 {
         struct osd_it_ea *it = (struct osd_it_ea *)di;
-        ENTRY;
-        RETURN(it->oie_dirent->oied_off);
+
+        return it->oie_dirent->oied_off;
 }
 
 /**
@@ -4490,8 +3976,8 @@ static void osd_key_exit(const struct lu_context *ctx,
 /* type constructor/destructor: osd_type_init, osd_type_fini */
 LU_TYPE_INIT_FINI(osd, &osd_key);
 
-static struct lu_context_key osd_key = {
-        .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD,
+struct lu_context_key osd_key = {
+        .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD | LCT_MG_THREAD | LCT_LOCAL,
         .lct_init = osd_key_init,
         .lct_fini = osd_key_fini,
         .lct_exit = osd_key_exit
@@ -4507,13 +3993,16 @@ static int osd_device_init(const struct lu_env *env, struct lu_device *d,
 static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
 {
         struct osd_thread_info *info = osd_oti_get(env);
+
         ENTRY;
-        if (o->od_obj_area != NULL) {
-                lu_object_put(env, &o->od_obj_area->do_lu);
-                o->od_obj_area = NULL;
-        }
+
         if (o->od_oi_table != NULL)
-                osd_oi_fini(info, &o->od_oi_table, o->od_oi_count);
+                osd_oi_fini(info, o);
+
+        if (o->od_fsops) {
+                fsfilt_put_ops(o->od_fsops);
+                o->od_fsops = NULL;
+        }
 
         RETURN(0);
 }
@@ -4525,8 +4014,16 @@ static int osd_mount(const struct lu_env *env,
         const char               *dev  = lustre_cfg_string(cfg, 0);
         struct lustre_disk_data  *ldd;
         struct lustre_sb_info    *lsi;
+        int                       rc = 0;
 
         ENTRY;
+
+        o->od_fsops = fsfilt_get_ops(mt_str(LDD_MT_LDISKFS));
+        if (o->od_fsops == NULL) {
+                CERROR("Can't find fsfilt_ldiskfs\n");
+                RETURN(-ENOTSUPP);
+        }
+
         if (o->od_mount != NULL) {
                 CERROR("Already mounted (%s)\n", dev);
                 RETURN(-EEXIST);
@@ -4542,18 +4039,24 @@ static int osd_mount(const struct lu_env *env,
         LASSERT(lmi != NULL);
         /* save lustre_mount_info in dt_device */
         o->od_mount = lmi;
+        o->od_mnt = lmi->lmi_mnt;
 
         lsi = s2lsi(lmi->lmi_sb);
         ldd = lsi->lsi_ldd;
 
         if (ldd->ldd_flags & LDD_F_IAM_DIR) {
                 o->od_iop_mode = 0;
-                LCONSOLE_WARN("OSD: IAM mode enabled\n");
+                LCONSOLE_WARN("%s: OSD: IAM mode enabled\n", dev);
         } else
                 o->od_iop_mode = 1;
 
-        o->od_obj_area = NULL;
-        RETURN(0);
+        if (ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
+                rc = osd_compat_init(o);
+                if (rc)
+                        CERROR("%s: can't initialize compats: %d\n", dev, rc);
+        }
+
+        RETURN(rc);
 }
 
 static struct lu_device *osd_device_fini(const struct lu_env *env,
@@ -4562,6 +4065,8 @@ static struct lu_device *osd_device_fini(const struct lu_env *env,
         int rc;
         ENTRY;
 
+        osd_compat_fini(osd_dev(d));
+
         shrink_dcache_sb(osd_sb(osd_dev(d)));
         osd_sync(env, lu2dt_dev(d));
 
@@ -4651,49 +4156,25 @@ static int osd_recovery_complete(const struct lu_env *env,
         RETURN(0);
 }
 
-static int osd_prepare(const struct lu_env *env,
-                       struct lu_device *pdev,
+static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
                        struct lu_device *dev)
 {
-        struct osd_device *osd = osd_dev(dev);
-        struct lustre_sb_info *lsi;
-        struct lustre_disk_data *ldd;
-        struct lustre_mount_info  *lmi;
+        struct osd_device      *osd = osd_dev(dev);
         struct osd_thread_info *oti = osd_oti_get(env);
-        struct dt_object *d;
-        int result;
+        int                     result;
 
         ENTRY;
+
         /* 1. initialize oi before any file create or file open */
-        result = osd_oi_init(oti, &osd->od_oi_table,
-                             &osd->od_dt_dev, lu2md_dev(pdev));
+        result = osd_oi_init(oti, osd);
         if (result < 0)
                 RETURN(result);
 
-        LASSERT(result > 0);
-        osd->od_oi_count = result;
-
-        lmi = osd->od_mount;
-        lsi = s2lsi(lmi->lmi_sb);
-        ldd = lsi->lsi_ldd;
+        if (!lu_device_is_md(pdev))
+                RETURN(0);
 
         /* 2. setup local objects */
         result = llo_local_objects_setup(env, lu2md_dev(pdev), lu2dt_dev(dev));
-        if (result)
-                goto out;
-
-        /* 3. open remote object dir */
-        d = dt_store_open(env, lu2dt_dev(dev), "",
-                          remote_obj_dir, &oti->oti_fid);
-        if (!IS_ERR(d)) {
-                osd->od_obj_area = d;
-                result = 0;
-        } else {
-                result = PTR_ERR(d);
-                osd->od_obj_area = NULL;
-        }
-
-out:
         RETURN(result);
 }
 
@@ -4706,7 +4187,7 @@ static const struct lu_object_operations osd_lu_obj_ops = {
         .loo_object_invariant = osd_object_invariant
 };
 
-static const struct lu_device_operations osd_lu_ops = {
+const struct lu_device_operations osd_lu_ops = {
         .ldo_object_alloc      = osd_object_alloc,
         .ldo_process_config    = osd_process_config,
         .ldo_recovery_complete = osd_recovery_complete,
@@ -4731,7 +4212,7 @@ static struct lu_device_type osd_device_type = {
         .ldt_tags     = LU_DEVICE_DT,
         .ldt_name     = LUSTRE_OSD_NAME,
         .ldt_ops      = &osd_device_type_ops,
-        .ldt_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD
+        .ldt_ctx_tags = LCT_LOCAL,
 };
 
 /*
@@ -4741,19 +4222,11 @@ static struct obd_ops osd_obd_device_ops = {
         .o_owner = THIS_MODULE
 };
 
-static struct lu_local_obj_desc llod_osd_rem_obj_dir = {
-        .llod_name      = remote_obj_dir,
-        .llod_oid       = OSD_REM_OBJ_DIR_OID,
-        .llod_is_index  = 1,
-        .llod_feat      = &dt_directory_features,
-};
-
 static int __init osd_mod_init(void)
 {
         struct lprocfs_static_vars lvars;
 
         osd_oi_mod_init();
-        llo_local_obj_register(&llod_osd_rem_obj_dir);
         lprocfs_osd_init_vars(&lvars);
         return class_register_type(&osd_obd_device_ops, NULL, lvars.module_vars,
                                    LUSTRE_OSD_NAME, &osd_device_type);
@@ -4761,7 +4234,6 @@ static int __init osd_mod_init(void)
 
 static void __exit osd_mod_exit(void)
 {
-        llo_local_obj_unregister(&llod_osd_rem_obj_dir);
         class_unregister_type(LUSTRE_OSD_NAME);
 }
 
@@ -4769,4 +4241,4 @@ MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
 MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_NAME")");
 MODULE_LICENSE("GPL");
 
-cfs_module(osd, "0.0.2", osd_mod_init, osd_mod_exit);
+cfs_module(osd, "0.1.0", osd_mod_init, osd_mod_exit);