Whamcloud - gitweb
LU-7343 osd-ldiskfs: handle ldiskfs_append failure
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_internal.h
index e67bd62..7ad0c24 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2011, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #ifndef _OSD_INTERNAL_H
 #define _OSD_INTERNAL_H
 
-#if defined(__KERNEL__)
 
+/* struct mutex */
+#include <linux/mutex.h>
 /* struct rw_semaphore */
 #include <linux/rwsem.h>
 /* struct dentry */
 #include <linux/dcache.h>
 /* struct dirent64 */
 #include <linux/dirent.h>
-
+#include <linux/statfs.h>
 #include <ldiskfs/ldiskfs.h>
 #include <ldiskfs/ldiskfs_jbd2.h>
-#ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
-# define journal_callback ldiskfs_journal_cb_entry
-# define osd_journal_callback_set(handle, func, jcb) \
-         ldiskfs_journal_callback_add(handle, func, jcb)
-#else
-# define osd_journal_callback_set(handle, func, jcb) \
-         jbd2_journal_callback_set(handle, func, jcb)
-#endif
-
-/* fsfilt_{get|put}_ops */
-#include <lustre_fsfilt.h>
 
 /* LUSTRE_OSD_NAME */
 #include <obd.h>
 #include "osd_quota_fmt.h"
 
 struct inode;
+extern struct kmem_cache *dynlock_cachep;
 
 #define OSD_COUNTERS (0)
 
-/* Lustre special inode::i_state to indicate OI scrub skip this inode. */
-#define I_LUSTRE_NOSCRUB       (1 << 31)
+/* ldiskfs special inode::i_state_flags need to be accessed with
+ * ldiskfs_{set,clear,test}_inode_state() only */
+
+/* OI scrub should skip this inode. */
+#define LDISKFS_STATE_LUSTRE_NOSCRUB   31
+#define LDISKFS_STATE_LUSTRE_DESTROY   30
 
 /** Enable thandle usage statistics */
 #define OSD_THANDLE_STATS (0)
 
+#define MAX_OBJID_GROUP (FID_SEQ_ECHO + 1)
+
+#define OBJECTS        "OBJECTS"
+#define ADMIN_USR      "admin_quotafile_v2.usr"
+#define ADMIN_GRP      "admin_quotafile_v2.grp"
+
 struct osd_directory {
         struct iam_container od_container;
         struct iam_descr     od_descr;
@@ -126,6 +127,8 @@ struct osd_object {
        struct osd_directory    *oo_dir;
        /** protects inode attributes. */
        spinlock_t              oo_guard;
+
+       __u32                   oo_destroyed:1;
         /**
          * Following two members are used to indicate the presence of dot and
          * dotdot in the given directory. This is required for interop mode
@@ -140,62 +143,43 @@ struct osd_object {
 #endif
 };
 
-#ifdef HAVE_LDISKFS_PDO
-
-#define osd_ldiskfs_find_entry(dir, dentry, de, lock)   \
-        ll_ldiskfs_find_entry(dir, dentry, de, lock)
-#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \
-        ldiskfs_add_entry(handle, child, cinode, hlock)
-
-#else /* HAVE_LDISKFS_PDO */
-
-struct htree_lock {
-        int     dummy;
+struct osd_obj_seq {
+       /* protects on-fly initialization */
+       int              oos_subdir_count; /* subdir count for each seq */
+       struct dentry    *oos_root;        /* O/<seq> */
+       struct dentry    **oos_dirs;       /* O/<seq>/d0-dXX */
+       u64              oos_seq;          /* seq number */
+       struct list_head oos_seq_list;     /* list to seq_list */
 };
 
-struct htree_lock_head {
-        int     dummy;
+struct osd_obj_map {
+       struct dentry    *om_root;        /* dentry for /O */
+       rwlock_t         om_seq_list_lock; /* lock for seq_list */
+       struct list_head om_seq_list;      /* list head for seq */
+       int              om_subdir_count;
+       struct mutex     om_dir_init_mutex;
 };
 
-#define ldiskfs_htree_lock(lock, head, inode, op)  do { LBUG(); } while (0)
-#define ldiskfs_htree_unlock(lock)                 do { LBUG(); } while (0)
-
-static inline struct htree_lock_head *ldiskfs_htree_lock_head_alloc(int dep)
-{
-        LBUG();
-        return NULL;
-}
-
-#define ldiskfs_htree_lock_head_free(lh)           do { LBUG(); } while (0)
-
-#define LDISKFS_DUMMY_HTREE_LOCK        0xbabecafe
-
-static inline struct htree_lock *ldiskfs_htree_lock_alloc(void)
-{
-        return (struct htree_lock *)LDISKFS_DUMMY_HTREE_LOCK;
-}
-
-static inline void ldiskfs_htree_lock_free(struct htree_lock *lk)
-{
-        LASSERT((unsigned long)lk == LDISKFS_DUMMY_HTREE_LOCK);
-}
-
-#define HTREE_HBITS_DEF         0
+struct osd_mdobj {
+       struct dentry   *om_root;      /* AGENT/<index> */
+       u64              om_index;     /* mdt index */
+       struct list_head om_list;      /* list to omm_list */
+};
 
-#define osd_ldiskfs_find_entry(dir, dentry, de, lock)   \
-        ll_ldiskfs_find_entry(dir, dentry, de)
-#define osd_ldiskfs_add_entry(handle, child, cinode, lock) \
-        ldiskfs_add_entry(handle, child, cinode)
+struct osd_mdobj_map {
+       struct dentry   *omm_remote_parent;
+};
 
-#endif /* HAVE_LDISKFS_PDO */
+#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \
+       __ldiskfs_add_entry(handle, child, cinode, hlock)
 
-#define OSD_OTABLE_IT_CACHE_SIZE       128
+#define OSD_OTABLE_IT_CACHE_SIZE       64
 #define OSD_OTABLE_IT_CACHE_MASK       (~(OSD_OTABLE_IT_CACHE_SIZE - 1))
 
 struct osd_inconsistent_item {
        /* link into osd_scrub::os_inconsistent_items,
         * protected by osd_scrub::os_lock. */
-       cfs_list_t             oii_list;
+       struct list_head       oii_list;
 
        /* The right FID <=> ino#/gen mapping. */
        struct osd_idmap_cache oii_cache;
@@ -223,9 +207,6 @@ struct osd_otable_it {
        struct osd_device       *ooi_dev;
        struct osd_otable_cache  ooi_cache;
 
-       /* For osd_otable_it_key. */
-       __u8                     ooi_key[16];
-
        /* The following bits can be updated/checked w/o lock protection.
         * If more bits will be introduced in the future and need lock to
         * protect, please add comment. */
@@ -238,7 +219,11 @@ struct osd_otable_it {
                                 ooi_waiting:1; /* it::next is waiting. */
 };
 
-extern const int osd_dto_credits_noquota[];
+struct osd_obj_orphan {
+       struct list_head oor_list;
+       struct lu_env   *oor_env; /* to identify "own" records */
+       __u32 oor_ino;
+};
 
 /*
  * osd device.
@@ -256,70 +241,75 @@ struct osd_device {
          * Fid Capability
          */
        unsigned int              od_fl_capa:1,
-                                 od_is_md:1; /* set in ->ldo_prepare */
-        unsigned long             od_capa_timeout;
-        __u32                     od_capa_alg;
-        struct lustre_capa_key   *od_capa_keys;
-        cfs_hlist_head_t         *od_capa_hash;
-
-        cfs_proc_dir_entry_t     *od_proc_entry;
-        struct lprocfs_stats     *od_stats;
-        /*
-         * statfs optimization: we cache a bit.
-         */
-        cfs_time_t                od_osfs_age;
-        struct obd_statfs         od_statfs;
-       spinlock_t                od_osfs_lock;
+                                 od_maybe_new:1,
+                                 od_noscrub:1,
+                                 od_igif_inoi:1,
+                                 od_check_ff:1,
+                                 od_is_ost:1,
+                                 od_index_in_idif:1;
+
+       __u32                     od_dirent_journal;
+       int                       od_index;
+       struct proc_dir_entry    *od_proc_entry;
+       struct lprocfs_stats     *od_stats;
 
-       unsigned int              od_noscrub:1;
+       spinlock_t                od_osfs_lock;
 
-       struct fsfilt_operations *od_fsops;
        int                       od_connects;
        struct lu_site            od_site;
 
-        /*
-         * mapping for legacy OST objids
-         */
-        struct osd_compat_objid  *od_ost_map;
+       struct osd_obj_map      *od_ost_map;
+       struct osd_mdobj_map    *od_mdt_map;
 
-        unsigned long long        od_readcache_max_filesize;
-        int                       od_read_cache;
-        int                       od_writethrough_cache;
+       unsigned long long      od_readcache_max_filesize;
+       int                     od_read_cache;
+       int                     od_writethrough_cache;
 
-        struct brw_stats          od_brw_stats;
-        cfs_atomic_t              od_r_in_flight;
-        cfs_atomic_t              od_w_in_flight;
+       struct brw_stats        od_brw_stats;
+       atomic_t                od_r_in_flight;
+       atomic_t                od_w_in_flight;
 
        struct mutex              od_otable_mutex;
        struct osd_otable_it     *od_otable_it;
        struct osd_scrub          od_scrub;
+       struct list_head                  od_ios_list;
 
        /* service name associated with the osd device */
        char                      od_svname[MAX_OBD_NAME];
+       char                      od_mntdev[MAX_OBD_NAME];
 
        /* quota slave instance */
        struct qsd_instance      *od_quota_slave;
+
+       /* osd seq instance */
+       struct lu_client_seq    *od_cl_seq;
+       /* If the ratio of "the total OI mappings count" vs
+        * "the bad OI mappings count" is lower than the
+        * osd_device::od_full_scrub_ratio, then trigger
+        * OI scrub to scan the whole the device. */
+       __u64                    od_full_scrub_ratio;
+       /* If the speed of found bad OI mappings (per minute)
+        * exceeds the osd_device::od_full_scrub_threshold_rate,
+        * then trigger OI scrub to scan the whole device. */
+       __u64                    od_full_scrub_threshold_rate;
+
+       /* a list of orphaned agent inodes, protected with od_osfs_lock */
+       struct list_head         od_orphan_list;
 };
 
-#define OSD_TRACK_DECLARES
-#ifdef OSD_TRACK_DECLARES
-#define OSD_DECLARE_OP(oh, op, credits)                                        \
-do {                                                                   \
-       LASSERT((oh)->ot_handle == NULL);                               \
-       ((oh)->ot_declare_ ##op)++;                                     \
-       ((oh)->ot_declare_ ##op ##_cred) += (credits);                  \
-       (oh)->ot_credits += (credits);                                  \
-} while (0)
-#define OSD_EXEC_OP(handle, op)                                                \
-do {                                                                   \
-       struct osd_thandle *oh = container_of(handle, typeof(*oh), ot_super); \
-       LASSERT((oh)->ot_declare_ ##op > 0);                            \
-       ((oh)->ot_declare_ ##op)--;                                     \
-} while (0)
-#else
-#define OSD_DECLARE_OP(oh, op, credits) (oh)->ot_credits += (credits)
-#define OSD_EXEC_OP(oh, op)
-#endif
+enum osd_full_scrub_ratio {
+       /* Trigger OI scrub to scan the whole device directly. */
+       OFSR_DIRECTLY   = 0,
+
+       /* Because the bad OI mappings count cannot be larger than
+        * the total OI mappints count, then setting OFSR_NEVER means
+        * that the whole device scanning cannot be triggered by auto
+        * detected bad OI mappings during the RPC services. */
+       OFSR_NEVER      = 1,
+       OFSR_DEFAULT    = 10000,
+};
+
+#define FULL_SCRUB_THRESHOLD_RATE_DEFAULT      60
 
 /* There are at most 10 uid/gids are affected in a transaction, and
  * that's rename case:
@@ -334,48 +324,35 @@ do {                                                                      \
  */
 #define OSD_MAX_UGID_CNT        10
 
+enum {
+       OSD_OT_ATTR_SET         = 0,
+       OSD_OT_PUNCH            = 1,
+       OSD_OT_XATTR_SET        = 2,
+       OSD_OT_CREATE           = 3,
+       OSD_OT_DESTROY          = 4,
+       OSD_OT_REF_ADD          = 5,
+       OSD_OT_REF_DEL          = 6,
+       OSD_OT_WRITE            = 7,
+       OSD_OT_INSERT           = 8,
+       OSD_OT_DELETE           = 9,
+       OSD_OT_QUOTA            = 10,
+       OSD_OT_MAX              = 11
+};
+
 struct osd_thandle {
         struct thandle          ot_super;
         handle_t               *ot_handle;
-        struct journal_callback ot_jcb;
-        cfs_list_t              ot_dcb_list;
-        /* Link to the device, for debugging. */
-        struct lu_ref_link     *ot_dev_link;
+        struct ldiskfs_journal_cb_entry ot_jcb;
+       struct list_head       ot_commit_dcb_list;
+       struct list_head       ot_stop_dcb_list;
+       /* Link to the device, for debugging. */
+       struct lu_ref_link      ot_dev_link;
         unsigned short          ot_credits;
         unsigned short          ot_id_cnt;
         unsigned short          ot_id_type;
+       int                     ot_remove_agents:1;
         uid_t                   ot_id_array[OSD_MAX_UGID_CNT];
        struct lquota_trans    *ot_quota_trans;
-
-#ifdef OSD_TRACK_DECLARES
-       /* Tracking for transaction credits, to allow debugging and optimizing
-        * cases where a large number of credits are being allocated for
-        * single transaction. */
-       unsigned char           ot_declare_attr_set;
-       unsigned char           ot_declare_punch;
-       unsigned char           ot_declare_xattr_set;
-       unsigned char           ot_declare_create;
-       unsigned char           ot_declare_destroy;
-       unsigned char           ot_declare_ref_add;
-       unsigned char           ot_declare_ref_del;
-       unsigned char           ot_declare_write;
-       unsigned char           ot_declare_insert;
-       unsigned char           ot_declare_delete;
-       unsigned char           ot_declare_quota;
-
-       unsigned short          ot_declare_attr_set_cred;
-       unsigned short          ot_declare_punch_cred;
-       unsigned short          ot_declare_xattr_set_cred;
-       unsigned short          ot_declare_create_cred;
-       unsigned short          ot_declare_destroy_cred;
-       unsigned short          ot_declare_ref_add_cred;
-       unsigned short          ot_declare_ref_del_cred;
-       unsigned short          ot_declare_write_cred;
-       unsigned short          ot_declare_insert_cred;
-       unsigned short          ot_declare_delete_cred;
-       unsigned short          ot_declare_quota_cred;
-#endif
-
 #if OSD_THANDLE_STATS
         /** time when this handle was allocated */
         cfs_time_t oth_alloced;
@@ -396,7 +373,6 @@ enum dt_txn_op {
         DTO_OBJECT_DELETE,
         DTO_ATTR_SET_BASE,
         DTO_XATTR_SET,
-        DTO_LOG_REC, /**< XXX temporary: dt layer knows nothing about llog. */
         DTO_WRITE_BASE,
         DTO_WRITE_BLOCK,
         DTO_ATTR_SET_CHOWN,
@@ -408,7 +384,7 @@ enum dt_txn_op {
  * osd dev stats
  */
 
-#ifdef LPROCFS
+#ifdef CONFIG_PROC_FS
 enum {
         LPROC_OSD_READ_BYTES    = 0,
         LPROC_OSD_WRITE_BYTES   = 1,
@@ -453,24 +429,25 @@ struct osd_it_ea_dirent {
  * there  would be one ext3 readdir for every mdd readdir page.
  */
 
-#define OSD_IT_EA_BUFSIZE       (CFS_PAGE_SIZE + CFS_PAGE_SIZE/4)
+#define OSD_IT_EA_BUFSIZE       (PAGE_CACHE_SIZE + PAGE_CACHE_SIZE/4)
 
 /**
  * This is iterator's in-memory data structure in interoperability
  * mode (i.e. iterator over ldiskfs style directory)
  */
 struct osd_it_ea {
-        struct osd_object   *oie_obj;
-        /** used in ldiskfs iterator, to stored file pointer */
-        struct file          oie_file;
-        /** how many entries have been read-cached from storage */
-        int                  oie_rd_dirent;
-        /** current entry is being iterated by caller */
-        int                  oie_it_dirent;
-        /** current processing entry */
-        struct osd_it_ea_dirent *oie_dirent;
-        /** buffer to hold entries, size == OSD_IT_EA_BUFSIZE */
-        void                *oie_buf;
+       struct osd_object       *oie_obj;
+       /** used in ldiskfs iterator, to stored file pointer */
+       struct file             oie_file;
+       /** how many entries have been read-cached from storage */
+       int                     oie_rd_dirent;
+       /** current entry is being iterated by caller */
+       int                     oie_it_dirent;
+       /** current processing entry */
+       struct osd_it_ea_dirent *oie_dirent;
+       /** buffer to hold entries, size == OSD_IT_EA_BUFSIZE */
+       void                    *oie_buf;
+       struct dentry           oie_dentry;
 };
 
 /**
@@ -483,7 +460,7 @@ struct osd_it_iam {
 };
 
 struct osd_quota_leaf {
-       cfs_list_t      oql_link;
+       struct list_head        oql_link;
        uint            oql_blk;
 };
 
@@ -501,60 +478,63 @@ struct osd_it_quota {
        /** the record index in the leaf/index block */
        uint                     oiq_index[LUSTRE_DQTREEDEPTH + 1];
        /** list of already processed leaf blocks */
-       cfs_list_t               oiq_list;
+       struct list_head         oiq_list;
 };
 
-#define MAX_BLOCKS_PER_PAGE (CFS_PAGE_SIZE / 512)
+#define MAX_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / 512)
 
 struct osd_iobuf {
-        cfs_waitq_t        dr_wait;
-        cfs_atomic_t       dr_numreqs;  /* number of reqs being processed */
-        int                dr_max_pages;
-        int                dr_npages;
-        int                dr_error;
-        int                dr_frags;
-        unsigned int       dr_ignore_quota:1;
-        unsigned int       dr_elapsed_valid:1; /* we really did count time */
-        unsigned int       dr_rw:1;
-        struct page       *dr_pages[PTLRPC_MAX_BRW_PAGES];
-        unsigned long      dr_blocks[PTLRPC_MAX_BRW_PAGES*MAX_BLOCKS_PER_PAGE];
-        unsigned long      dr_start_time;
-        unsigned long      dr_elapsed;  /* how long io took */
-        struct osd_device *dr_dev;
+       wait_queue_head_t  dr_wait;
+       atomic_t       dr_numreqs;  /* number of reqs being processed */
+       int                dr_max_pages;
+       int                dr_npages;
+       int                dr_error;
+       int                dr_frags;
+       unsigned int       dr_ignore_quota:1;
+       unsigned int       dr_elapsed_valid:1; /* we really did count time */
+       unsigned int       dr_rw:1;
+       struct lu_buf      dr_pg_buf;
+       struct page      **dr_pages;
+       struct lu_buf      dr_bl_buf;
+       sector_t          *dr_blocks;
+       unsigned long      dr_start_time;
+       unsigned long      dr_elapsed;  /* how long io took */
+       struct osd_device *dr_dev;
        unsigned int       dr_init_at;  /* the line iobuf was initialized */
 };
 
 struct osd_thread_info {
-        const struct lu_env   *oti_env;
-        /**
-         * used for index operations.
-         */
-        struct dentry          oti_obj_dentry;
-        struct dentry          oti_child_dentry;
+       const struct lu_env   *oti_env;
+       /**
+        * used for index operations.
+        */
+       struct dentry          oti_obj_dentry;
+       struct dentry          oti_child_dentry;
+
+       /** dentry for Iterator context. */
+       struct dentry           oti_it_dentry;
+
+       union {
+               /* fake struct file for osd_object_sync */
+               struct file             oti_file;
+               /* osd_statfs() */
+               struct kstatfs          oti_ksfs;
+       };
 
-        /** dentry for Iterator context. */
-        struct dentry          oti_it_dentry;
-        struct htree_lock     *oti_hlock;
+       struct htree_lock     *oti_hlock;
 
-        struct lu_fid          oti_fid;
+       struct lu_fid          oti_fid;
        struct lu_fid          oti_fid2;
+       struct lu_fid          oti_fid3;
        struct osd_inode_id    oti_id;
        struct osd_inode_id    oti_id2;
+       struct osd_inode_id    oti_id3;
         struct ost_id          oti_ostid;
 
         /*
          * XXX temporary: for ->i_op calls.
          */
         struct timespec        oti_time;
-        /*
-         * XXX temporary: fake struct file for osd_object_sync
-         */
-        struct file            oti_file;
-        /*
-         * XXX temporary: for capa operations.
-         */
-        struct lustre_capa_key oti_capa_key;
-        struct lustre_capa     oti_capa;
 
         /** osd_device reference, initialized in osd_trans_start() and
             used in osd_trans_stop() */
@@ -566,26 +546,16 @@ struct osd_thread_info {
          * in open iterator session.
          */
 
-        /** osd iterator context used for iterator session */
-
-       union {
-               struct osd_it_iam       oti_it;
-               /* ldiskfs iterator data structure,
-                * see osd_it_ea_{init, fini} */
-               struct osd_it_ea        oti_it_ea;
-               struct osd_it_quota     oti_it_quota;
-       };
-
-        /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */
-        void                  *oti_it_ea_buf;
-
-        cfs_kstatfs_t          oti_ksfs;
+       /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */
+       void                    *oti_it_ea_buf;
+       unsigned int            oti_it_ea_buf_used:1;
 
-        /** IAM iterator for index operation. */
-        struct iam_iterator    oti_idx_it;
+       /* IAM iterator for index operation. */
+       struct iam_iterator    oti_idx_it;
 
         /** union to guarantee that ->oti_ipd[] has proper alignment. */
         union {
+               char           oti_name[48];
                 char           oti_it_ipd[DX_IPD_MAX_SIZE];
                 long long      oti_alignment_lieutenant;
         };
@@ -602,85 +572,149 @@ struct osd_thread_info {
         int                    oti_txns;
         /** used in osd_fid_set() to put xattr */
         struct lu_buf          oti_buf;
+       struct lu_buf          oti_big_buf;
         /** used in osd_ea_fid_set() to set fid into common ea */
        union {
                struct lustre_mdt_attrs oti_mdt_attrs;
                /* old LMA for compatibility */
                char                    oti_mdt_attrs_old[LMA_OLD_SIZE];
+               struct filter_fid_old   oti_ff;
+               struct filter_fid       oti_ff_new;
        };
-        /** 0-copy IO */
-        struct osd_iobuf       oti_iobuf;
-        struct inode           oti_inode;
-        int                    oti_created[PTLRPC_MAX_BRW_PAGES];
-        struct lu_env          oti_obj_delete_tx_env;
+       /** 0-copy IO */
+       struct osd_iobuf       oti_iobuf;
+       /* used to access objects in /O */
+       struct inode          *oti_inode;
 #define OSD_FID_REC_SZ 32
-        char                   oti_ldp[OSD_FID_REC_SZ];
-        char                   oti_ldp2[OSD_FID_REC_SZ];
+       char                   oti_ldp[OSD_FID_REC_SZ];
+       char                   oti_ldp2[OSD_FID_REC_SZ];
 
        /* used by quota code */
        union {
+#ifdef HAVE_DQUOT_FS_DISK_QUOTA
+               struct fs_disk_quota    oti_fdq;
+#else
                struct if_dqblk         oti_dqblk;
+#endif
                struct if_dqinfo        oti_dqinfo;
        };
        struct lquota_id_info   oti_qi;
        struct lquota_trans     oti_quota_trans;
        union lquota_rec        oti_quota_rec;
        __u64                   oti_quota_id;
+       struct lu_seq_range     oti_seq_range;
+
+       /* Tracking for transaction credits, to allow debugging and optimizing
+        * cases where a large number of credits are being allocated for
+        * single transaction. */
+       unsigned int            oti_credits_before;
+       unsigned short          oti_declare_ops[OSD_OT_MAX];
+       unsigned short          oti_declare_ops_cred[OSD_OT_MAX];
+       unsigned short          oti_declare_ops_used[OSD_OT_MAX];
 };
 
 extern int ldiskfs_pdo;
 
-#ifdef LPROCFS
+static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
+                                 const char *name, void *buf, int len)
+{
+       if (inode == NULL)
+               return -EINVAL;
+
+       dentry->d_inode = inode;
+       dentry->d_sb = inode->i_sb;
+       return inode->i_op->getxattr(dentry, name, buf, len);
+}
+
+static inline int __osd_xattr_set(struct osd_thread_info *info,
+                                 struct inode *inode, const char *name,
+                                 const void *buf, int buflen, int fl)
+{
+       struct dentry *dentry = &info->oti_child_dentry;
+
+       ll_vfs_dq_init(inode);
+       dentry->d_inode = inode;
+       dentry->d_sb = inode->i_sb;
+       return inode->i_op->setxattr(dentry, name, buf, buflen, fl);
+}
+
+#ifdef CONFIG_PROC_FS
 /* osd_lproc.c */
-void lprocfs_osd_init_vars(struct lprocfs_static_vars *lvars);
+extern struct lprocfs_vars lprocfs_osd_obd_vars[];
+extern struct lprocfs_vars lprocfs_osd_module_vars[];
 int osd_procfs_init(struct osd_device *osd, const char *name);
 int osd_procfs_fini(struct osd_device *osd);
-void osd_lprocfs_time_start(const struct lu_env *env);
-void osd_lprocfs_time_end(const struct lu_env *env,
-                          struct osd_device *osd, int op);
 void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
+int osd_register_proc_index_in_idif(struct osd_device *osd);
+#endif
 
 #endif
 int osd_statfs(const struct lu_env *env, struct dt_device *dev,
                struct obd_statfs *sfs);
-int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
-                    struct lustre_capa *capa, __u64 opc);
 struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
                       struct osd_inode_id *id);
-struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
-                          struct osd_inode_id *id, struct lu_fid *fid);
-
-int osd_compat_init(struct osd_device *dev);
-void osd_compat_fini(struct osd_device *dev);
-int osd_compat_objid_lookup(struct osd_thread_info *info,
-                            struct osd_device *osd,
-                            const struct lu_fid *fid, struct osd_inode_id *id);
-int osd_compat_objid_insert(struct osd_thread_info *info,
-                            struct osd_device *osd,
-                            const struct lu_fid *fid,
-                            const struct osd_inode_id *id, struct thandle *th);
-int osd_compat_objid_delete(struct osd_thread_info *info,
-                            struct osd_device *osd,
-                            const struct lu_fid *fid, struct thandle *th);
-int osd_compat_spec_lookup(struct osd_thread_info *info,
-                           struct osd_device *osd,
-                           const struct lu_fid *fid, struct osd_inode_id *id);
-int osd_compat_spec_insert(struct osd_thread_info *info,
-                           struct osd_device *osd,
-                           const struct lu_fid *fid,
-                           const struct osd_inode_id *id, struct thandle *th);
+int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
+                  const struct lu_fid *fid, __u32 compat, __u32 incompat);
+int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
+               struct dentry *dentry, struct lustre_mdt_attrs *lma);
+void osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd,
+                     struct osd_inode_id *id, const struct lu_fid *fid);
+int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
+                struct dentry *dentry, struct lu_fid *fid);
+
+int osd_obj_map_init(const struct lu_env *env, struct osd_device *osd);
+void osd_obj_map_fini(struct osd_device *dev);
+int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, struct osd_inode_id *id);
+int osd_obj_map_insert(struct osd_thread_info *info, struct osd_device *osd,
+                      const struct lu_fid *fid, const struct osd_inode_id *id,
+                      handle_t *th);
+int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, handle_t *th);
+int osd_obj_map_update(struct osd_thread_info *info, struct osd_device *osd,
+                      const struct lu_fid *fid, const struct osd_inode_id *id,
+                      handle_t *th);
+int osd_obj_map_recover(struct osd_thread_info *info, struct osd_device *osd,
+                       struct inode *src_parent, struct dentry *src_child,
+                       const struct lu_fid *fid);
+int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, struct osd_inode_id *id);
+int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, const struct osd_inode_id *id,
+                       handle_t *th);
+int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, const struct osd_inode_id *id,
+                       handle_t *th);
 
 void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags);
 int osd_scrub_file_store(struct osd_scrub *scrub);
-int osd_scrub_start(struct osd_device *dev);
+char *osd_lf_fid2name(const struct lu_fid *fid);
+int osd_scrub_start(struct osd_device *dev, __u32 flags);
 int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev);
 void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev);
 int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic,
                   int insert);
 int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
                   struct osd_inode_id *id);
-int osd_scrub_dump(struct osd_device *dev, char *buf, int len);
-
+int osd_scrub_dump(struct seq_file *m, struct osd_device *dev);
+
+int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
+                  u64 seq, struct lu_seq_range *range);
+
+int osd_delete_from_remote_parent(const struct lu_env *env,
+                                 struct osd_device *osd,
+                                 struct osd_object *obj,
+                                 struct osd_thandle *oh);
+int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
+                            struct osd_object *obj, struct osd_thandle *oh);
+int osd_lookup_in_remote_parent(struct osd_thread_info *oti,
+                               struct osd_device *osd,
+                               const struct lu_fid *fid,
+                               struct osd_inode_id *id);
+
+int osd_ost_seq_exists(struct osd_thread_info *info, struct osd_device *osd,
+                      __u64 seq);
 /* osd_quota_fmt.c */
 int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj,
                       int type, uint blk, int depth, uint index,
@@ -694,25 +728,78 @@ loff_t find_tree_dqentry(const struct lu_env *env,
                          struct osd_it_quota *it);
 /* osd_quota.c */
 int osd_declare_qid(const struct lu_env *env, struct osd_thandle *oh,
-                   struct lquota_id_info *qi, bool allocated, int *flags);
+                   struct lquota_id_info *qi, struct osd_object *obj,
+                   bool enforce, int *flags);
 int osd_declare_inode_qid(const struct lu_env *env, qid_t uid, qid_t gid,
                          long long space, struct osd_thandle *oh,
-                         bool is_blk, bool allocated, int *flags, bool force);
+                         struct osd_object *obj, bool is_blk, int *flags,
+                         bool force);
 const struct dt_rec *osd_quota_pack(struct osd_object *obj,
                                    const struct dt_rec *rec,
                                    union lquota_rec *quota_rec);
 void osd_quota_unpack(struct osd_object *obj, const struct dt_rec *rec);
-int osd_quota_migration(const struct lu_env *env, struct dt_object *dt,
-                       const struct dt_index_features *feat);
+int osd_quota_migration(const struct lu_env *env, struct dt_object *dt);
+
+#ifndef HAVE_I_UID_READ
+static inline uid_t i_uid_read(const struct inode *inode)
+{
+       return inode->i_uid;
+}
+
+static inline gid_t i_gid_read(const struct inode *inode)
+{
+       return inode->i_gid;
+}
+
+static inline void i_uid_write(struct inode *inode, uid_t uid)
+{
+       inode->i_uid = uid;
+}
+
+static inline void i_gid_write(struct inode *inode, gid_t gid)
+{
+       inode->i_gid = gid;
+}
+#endif
+
+#ifdef LDISKFS_HT_MISC
+# define osd_journal_start_sb(sb, type, nblock) \
+               ldiskfs_journal_start_sb(sb, type, nblock)
+# define osd_ldiskfs_append(handle, inode, nblock) \
+               ldiskfs_append(handle, inode, nblock)
+# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \
+               __ldiskfs_find_entry(dir, name, de, inlined, lock)
+# define osd_journal_start(inode, type, nblocks) \
+               ldiskfs_journal_start(inode, type, nblocks)
+# define osd_transaction_size(dev) \
+               (osd_journal(dev)->j_max_transaction_buffers / 2)
+#else
+# define LDISKFS_HT_MISC       0
+# define osd_journal_start_sb(sb, type, nblock) \
+               ldiskfs_journal_start_sb(sb, nblock)
 
-static inline bool is_quota_glb_feat(const struct dt_index_features *feat)
+static inline struct buffer_head *osd_ldiskfs_append(handle_t *handle,
+                                                    struct inode *inode,
+                                                    ldiskfs_lblk_t *nblock)
 {
-       return (feat == &dt_quota_iusr_features ||
-               feat == &dt_quota_busr_features ||
-               feat == &dt_quota_igrp_features ||
-               feat == &dt_quota_bgrp_features) ? true : false;
+       struct buffer_head *bh;
+       int err = 0;
+
+       bh = ldiskfs_append(handle, inode, nblock, &err);
+       if (bh == NULL)
+               bh = ERR_PTR(err);
+
+       return bh;
 }
 
+# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \
+               __ldiskfs_find_entry(dir, name, de, lock)
+# define osd_journal_start(inode, type, nblocks) \
+               ldiskfs_journal_start(inode, nblocks)
+# define osd_transaction_size(dev) \
+               (osd_journal(dev)->j_max_transaction_buffers)
+#endif
+
 /*
  * Invariants, assertions.
  */
@@ -753,8 +840,9 @@ static inline struct osd_oi *osd_fid2oi(struct osd_device *osd,
                                         const struct lu_fid *fid)
 {
        LASSERTF(!fid_is_idif(fid), DFID"\n", PFID(fid));
-       LASSERTF(!fid_is_igif(fid), DFID"\n", PFID(fid));
-       LASSERT(osd->od_oi_table != NULL && osd->od_oi_count >= 1);
+       LASSERTF(!fid_is_last_id(fid), DFID"\n", PFID(fid));
+       LASSERTF(osd->od_oi_table != NULL && osd->od_oi_count >= 1,
+                DFID"\n", PFID(fid));
        /* It can work even od_oi_count equals to 1 although it's unexpected,
         * the only reason we set it to 1 is for performance measurement */
        return osd->od_oi_table[osd_oi_fid2idx(osd, fid)];
@@ -815,6 +903,16 @@ static inline journal_t *osd_journal(const struct osd_device *dev)
         return LDISKFS_SB(osd_sb(dev))->s_journal;
 }
 
+static inline struct seq_server_site *osd_seq_site(struct osd_device *osd)
+{
+       return osd->od_dt_dev.dd_lu_dev.ld_site->ld_seq_site;
+}
+
+static inline char *osd_name(struct osd_device *osd)
+{
+       return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name;
+}
+
 extern const struct dt_body_operations osd_body_ops;
 extern struct lu_context_key osd_key;
 
@@ -851,6 +949,10 @@ static inline void osd_ipd_put(const struct lu_env *env,
         bag->ic_descr->id_ops->id_ipd_free(ipd);
 }
 
+int osd_calc_bkmap_credits(struct super_block *sb, struct inode *inode,
+                          const loff_t size, const loff_t pos,
+                          const int blocks);
+
 int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs);
 int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
                             int write_NUL, loff_t *offs, handle_t *handle);
@@ -875,6 +977,148 @@ struct dentry *osd_child_dentry_by_inode(const struct lu_env *env,
         return child_dentry;
 }
 
+extern int osd_trans_declare_op2rb[];
+extern int ldiskfs_track_declares_assert;
+void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th);
+
+static inline void osd_trans_declare_op(const struct lu_env *env,
+                                       struct osd_thandle *oh,
+                                       unsigned int op, int credits)
+{
+       struct osd_thread_info *oti = osd_oti_get(env);
+
+       LASSERT(oh->ot_handle == NULL);
+       if (unlikely(op >= OSD_OT_MAX)) {
+               if (unlikely(ldiskfs_track_declares_assert)) {
+                       LASSERT(op < OSD_OT_MAX);
+               } else {
+                       CWARN("%s: Invalid operation index %d\n",
+                             osd_name(osd_dt_dev(oh->ot_super.th_dev)), op);
+                       libcfs_debug_dumpstack(NULL);
+               }
+       } else {
+               oti->oti_declare_ops[op]++;
+               oti->oti_declare_ops_cred[op] += credits;
+       }
+       oh->ot_credits += credits;
+}
+
+static inline void osd_trans_exec_op(const struct lu_env *env,
+                                    struct thandle *th, unsigned int op)
+{
+       struct osd_thread_info *oti = osd_oti_get(env);
+       struct osd_thandle     *oh  = container_of(th, struct osd_thandle,
+                                                  ot_super);
+       unsigned int            rb, left;
+
+       LASSERT(oh->ot_handle != NULL);
+       if (unlikely(op >= OSD_OT_MAX)) {
+               if (unlikely(ldiskfs_track_declares_assert))
+                       LASSERT(op < OSD_OT_MAX);
+               else {
+                       CWARN("%s: Invalid operation index %d\n",
+                             osd_name(osd_dt_dev(oh->ot_super.th_dev)), op);
+                       libcfs_debug_dumpstack(NULL);
+                       return;
+               }
+       }
+
+       /* find rollback (or reverse) operation for the given one
+        * such an operation doesn't require additional credits
+        * as the same set of blocks are modified */
+       rb = osd_trans_declare_op2rb[op];
+
+       /* check whether credits for this operation were reserved at all */
+       if (unlikely(oti->oti_declare_ops_cred[op] == 0 &&
+                    oti->oti_declare_ops_cred[rb] == 0)) {
+               /* the API is not perfect yet: CREATE does REF_ADD internally
+                * while DESTROY does not. To rollback CREATE the callers
+                * needs to call REF_DEL+DESTROY which is hard to detect using
+                * a simple table of rollback operations */
+               if (op == OSD_OT_REF_DEL &&
+                   oti->oti_declare_ops_cred[OSD_OT_CREATE] > 0)
+                       goto proceed;
+               if (op == OSD_OT_REF_ADD &&
+                   oti->oti_declare_ops_cred[OSD_OT_DESTROY] > 0)
+                       goto proceed;
+               osd_trans_dump_creds(env, th);
+               CERROR("%s: op = %d, rb = %d\n",
+                      osd_name(osd_dt_dev(oh->ot_super.th_dev)), op, rb);
+               if (unlikely(ldiskfs_track_declares_assert))
+                       LBUG();
+       }
+
+proceed:
+       /* remember how many credits we have unused before the operation */
+       oti->oti_credits_before = oh->ot_handle->h_buffer_credits;
+       left = oti->oti_declare_ops_cred[op] - oti->oti_declare_ops_used[op];
+       if (unlikely(oti->oti_credits_before < left)) {
+               osd_trans_dump_creds(env, th);
+               CERROR("%s: op = %d, rb = %d\n",
+                      osd_name(osd_dt_dev(oh->ot_super.th_dev)), op, rb);
+               /* on a very small fs (testing?) it's possible that
+                * the transaction can't fit 1/4 of journal, so we
+                * just request less credits (see osd_trans_start()).
+                * ignore the same case here */
+               rb = osd_transaction_size(osd_dt_dev(th->th_dev));
+               if (unlikely(oh->ot_credits < rb)) {
+                       if (unlikely(ldiskfs_track_declares_assert))
+                               LBUG();
+               }
+       }
+}
+
+static inline void osd_trans_exec_check(const struct lu_env *env,
+                                       struct thandle *th,
+                                       unsigned int op)
+{
+       struct osd_thread_info *oti = osd_oti_get(env);
+       struct osd_thandle     *oh  = container_of(th, struct osd_thandle,
+                                                  ot_super);
+       int                     used, over, quota;
+
+       /* how many credits have been used by the operation */
+       used = oti->oti_credits_before - oh->ot_handle->h_buffer_credits;
+
+       if (unlikely(used < 0)) {
+               /* if some block was allocated and released in the same
+                * transaction, then it won't be a part of the transaction
+                * and delta can be negative */
+               return;
+       }
+
+       if (used == 0) {
+               /* rollback operations (e.g. when we destroy just created
+                * object) should not consume any credits. there is no point
+                * to confuse the checks below */
+               return;
+       }
+
+       oti->oti_declare_ops_used[op] += used;
+       if (oti->oti_declare_ops_used[op] <= oti->oti_declare_ops_cred[op])
+               return;
+
+       /* we account quota for a whole transaction and any operation can
+        * consume corresponding credits */
+       over = oti->oti_declare_ops_used[op] -
+               oti->oti_declare_ops_cred[op];
+       quota = oti->oti_declare_ops_cred[OSD_OT_QUOTA] -
+               oti->oti_declare_ops_used[OSD_OT_QUOTA];
+       if (over <= quota) {
+               /* probably that credits were consumed by
+                * quota indirectly (in the depths of ldiskfs) */
+               oti->oti_declare_ops_used[OSD_OT_QUOTA] += over;
+               oti->oti_declare_ops_used[op] -= over;
+       } else {
+               CWARN("op %d: used %u, used now %u, reserved %u\n",
+                     op, oti->oti_declare_ops_used[op], used,
+                     oti->oti_declare_ops_cred[op]);
+               osd_trans_dump_creds(env, th);
+               if (unlikely(ldiskfs_track_declares_assert))
+                       LBUG();
+       }
+}
+
 /**
  * Helper function to pack the fid, ldiskfs stores fid in packed format.
  */
@@ -931,5 +1175,19 @@ static inline loff_t ldiskfs_get_htree_eof(struct file *filp)
                return LDISKFS_HTREE_EOF_64BIT;
 }
 
-#endif /* __KERNEL__ */
+static inline int fid_is_internal(const struct lu_fid *fid)
+{
+       return (!fid_is_namespace_visible(fid) && !fid_is_idif(fid));
+}
+
+static inline unsigned long osd_remote_parent_ino(struct osd_device *dev)
+{
+       return dev->od_mdt_map->omm_remote_parent->d_inode->i_ino;
+}
+
+void ldiskfs_inc_count(handle_t *handle, struct inode *inode);
+void ldiskfs_dec_count(handle_t *handle, struct inode *inode);
+
+void osd_fini_iobuf(struct osd_device *d, struct osd_iobuf *iobuf);
+
 #endif /* _OSD_INTERNAL_H */