Whamcloud - gitweb
LU-3540 lod: update recovery thread
[fs/lustre-release.git] / lustre / include / obd.h
index 3b76c34..ce5457f 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2013, Intel Corporation.
+ * Copyright (c) 2011, 2014, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #ifndef __OBD_H
 #define __OBD_H
 
-#if defined(__linux__)
-#include <linux/obd.h>
-#elif defined(__APPLE__)
-#include <darwin/obd.h>
-#elif defined(__WINNT__)
-#include <winnt/obd.h>
-#else
-#error Unsupported operating system.
-#endif
-
-#define IOC_OSC_TYPE         'h'
-#define IOC_OSC_MIN_NR       20
-#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
-#define IOC_OSC_MAX_NR       50
-
-#define IOC_MDC_TYPE         'i'
-#define IOC_MDC_MIN_NR       20
-#define IOC_MDC_MAX_NR       50
+#include <linux/spinlock.h>
 
 #include <lustre/lustre_idl.h>
 #include <lustre_lib.h>
 #include <lustre_export.h>
 #include <lustre_fid.h>
 #include <lustre_fld.h>
+#include <lustre_handles.h>
+#include <lustre_intent.h>
 #include <lustre_capa.h>
+#include <lvfs.h>
 
 #define MAX_OBD_DEVICES 8192
 
@@ -98,143 +84,59 @@ static inline void loi_init(struct lov_oinfo *loi)
 {
 }
 
-struct lov_stripe_md {
-       cfs_atomic_t     lsm_refc;
-       spinlock_t      lsm_lock;
-        pid_t            lsm_lock_owner; /* debugging */
-
-        /* maximum possible file size, might change as OSTs status changes,
-         * e.g. disconnected, deactivated */
-        __u64            lsm_maxbytes;
-        struct {
-                /* Public members. */
-               struct ost_id lw_object_oi; /* lov object id/seq */
-
-                /* LOV-private members start here -- only for use in lov/. */
-                __u32 lw_magic;
-                __u32 lw_stripe_size;      /* size of the stripe */
-                __u32 lw_pattern;          /* striping pattern (RAID0, RAID1) */
-                __u16 lw_stripe_count;  /* number of objects being striped over */
-                __u16 lw_layout_gen;       /* generation of the layout */
-                char  lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */
-        } lsm_wire;
-
-        struct lov_oinfo *lsm_oinfo[0];
-};
-
-#define lsm_oi          lsm_wire.lw_object_oi
-#define lsm_magic        lsm_wire.lw_magic
-#define lsm_layout_gen   lsm_wire.lw_layout_gen
-#define lsm_stripe_size  lsm_wire.lw_stripe_size
-#define lsm_pattern      lsm_wire.lw_pattern
-#define lsm_stripe_count lsm_wire.lw_stripe_count
-#define lsm_pool_name    lsm_wire.lw_pool_name
-
-static inline bool lsm_is_released(struct lov_stripe_md *lsm)
-{
-       return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED);
-}
-
-static inline bool lsm_has_objects(struct lov_stripe_md *lsm)
-{
-       if (lsm == NULL)
-               return false;
-       if (lsm_is_released(lsm))
-               return false;
-       return true;
-}
-
+struct lov_stripe_md;
 struct obd_info;
 
 typedef int (*obd_enqueue_update_f)(void *cookie, int rc);
 
 /* obd info for a particular level (lov, osc). */
 struct obd_info {
-        /* Lock policy. It keeps an extent which is specific for a particular
-         * OSC. (e.g. lov_prep_enqueue_set initialises extent of the policy,
-         * and osc_enqueue passes it into ldlm_lock_match & ldlm_cli_enqueue. */
-        ldlm_policy_data_t      oi_policy;
-        /* Flags used for set request specific flags:
-           - while lock handling, the flags obtained on the enqueue
-           request are set here.
-           - while stats, the flags used for control delay/resend.
-           - while setattr, the flags used for distinguish punch operation
-         */
+       /* OBD_STATFS_* flags */
        __u64                   oi_flags;
-        /* Lock handle specific for every OSC lock. */
-        struct lustre_handle   *oi_lockh;
-        /* lsm data specific for every OSC. */
-        struct lov_stripe_md   *oi_md;
-        /* obdo data specific for every OSC, if needed at all. */
-        struct obdo            *oi_oa;
         /* statfs data specific for every OSC, if needed at all. */
         struct obd_statfs      *oi_osfs;
         /* An update callback which is called to update some data on upper
-         * level. E.g. it is used for update lsm->lsm_oinfo at every recieved
+        * level. E.g. it is used for update lsm->lsm_oinfo at every received
          * request in osc level for enqueue requests. It is also possible to
          * update some caller data from LOV layer if needed. */
         obd_enqueue_update_f    oi_cb_up;
-        /* oss capability, its type is obd_capa in client to avoid copy.
-         * in contrary its type is lustre_capa in OSS. */
-        void                   *oi_capa;
-       /* transfer jobid from ost_sync() to filter_sync()... */
-       char                   *oi_jobid;
 };
 
 struct obd_type {
-        cfs_list_t typ_chain;
-        struct obd_ops *typ_dt_ops;
-        struct md_ops *typ_md_ops;
-        cfs_proc_dir_entry_t *typ_procroot;
-        char *typ_name;
-        int  typ_refcnt;
-        struct lu_device_type *typ_lu;
-       spinlock_t obd_type_lock;
+       struct list_head         typ_chain;
+       struct obd_ops          *typ_dt_ops;
+       struct md_ops           *typ_md_ops;
+       struct proc_dir_entry   *typ_procroot;
+       struct proc_dir_entry   *typ_procsym;
+       __u32                    typ_sym_filter;
+       char                    *typ_name;
+       int                      typ_refcnt;
+       struct lu_device_type   *typ_lu;
+       spinlock_t               obd_type_lock;
 };
 
 struct brw_page {
-       obd_off  off;
-       struct page *pg;
-       int count;
-       obd_flag flag;
-};
-
-/* llog contexts */
-enum llog_ctxt_id {
-       LLOG_CONFIG_ORIG_CTXT  =  0,
-       LLOG_CONFIG_REPL_CTXT,
-       LLOG_MDS_OST_ORIG_CTXT,
-       LLOG_MDS_OST_REPL_CTXT,
-       LLOG_SIZE_ORIG_CTXT,
-       LLOG_SIZE_REPL_CTXT,
-       LLOG_RD1_ORIG_CTXT,
-       LLOG_RD1_REPL_CTXT,
-       LLOG_TEST_ORIG_CTXT,
-       LLOG_TEST_REPL_CTXT,
-       LLOG_LOVEA_ORIG_CTXT,
-       LLOG_LOVEA_REPL_CTXT,
-       LLOG_CHANGELOG_ORIG_CTXT,       /**< changelog generation on mdd */
-       LLOG_CHANGELOG_REPL_CTXT,       /**< changelog access on clients */
-       LLOG_CHANGELOG_USER_ORIG_CTXT,  /**< for multiple changelog consumers */
-       LLOG_AGENT_ORIG_CTXT,           /**< agent requests generation on cdt */
-       LLOG_MAX_CTXTS
+       u64              off;
+       struct page     *pg;
+       u32              count;
+       u32              flag;
 };
 
 struct timeout_item {
-        enum timeout_event ti_event;
-        cfs_time_t         ti_timeout;
-        timeout_cb_t       ti_cb;
-        void              *ti_cb_data;
-        cfs_list_t         ti_obd_list;
-        cfs_list_t         ti_chain;
+       enum timeout_event ti_event;
+       cfs_time_t         ti_timeout;
+       timeout_cb_t       ti_cb;
+       void              *ti_cb_data;
+       struct list_head   ti_obd_list;
+       struct list_head   ti_chain;
 };
 
-#define OSC_MAX_RIF_DEFAULT       8
-#define MDS_OSC_MAX_RIF_DEFAULT   50
-#define OSC_MAX_RIF_MAX         256
-#define OSC_MAX_DIRTY_DEFAULT  (OSC_MAX_RIF_DEFAULT * 4)
-#define OSC_MAX_DIRTY_MB_MAX   2048     /* arbitrary, but < MAX_LONG bytes */
-#define OSC_DEFAULT_RESENDS      10
+#define OBD_MAX_RIF_DEFAULT    8
+#define OBD_MAX_RIF_MAX                512
+#define OSC_MAX_RIF_MAX                256
+#define OSC_MAX_DIRTY_DEFAULT  (OBD_MAX_RIF_DEFAULT * 4)
+#define OSC_MAX_DIRTY_MB_MAX   2048     /* arbitrary, but < MAX_LONG bytes */
+#define OSC_DEFAULT_RESENDS    10
 
 /* possible values for fo_sync_lock_cancel */
 enum {
@@ -244,52 +146,72 @@ enum {
         NUM_SYNC_ON_CANCEL_STATES
 };
 
-#define MDC_MAX_RIF_DEFAULT       8
-#define MDC_MAX_RIF_MAX         512
+/*
+ * Limit reply buffer size for striping data to one x86_64 page. This
+ * value is chosen to fit the striping data for common use cases while
+ * staying well below the limit at which the buffer must be backed by
+ * vmalloc(). Excessive use of vmalloc() may cause spinlock contention
+ * on the MDS.
+ */
+#define OBD_MAX_DEFAULT_EA_SIZE                4096
 
 struct mdc_rpc_lock;
 struct obd_import;
 struct client_obd {
-       struct rw_semaphore  cl_sem;
-        struct obd_uuid          cl_target_uuid;
-        struct obd_import       *cl_import; /* ptlrpc connection state */
-        int                      cl_conn_count;
-        /* max_mds_easize is purely a performance thing so we don't have to
-         * call obd_size_diskmd() all the time. */
-        int                      cl_default_mds_easize;
-        int                      cl_max_mds_easize;
-        int                      cl_max_mds_cookiesize;
-
-        enum lustre_sec_part     cl_sp_me;
-        enum lustre_sec_part     cl_sp_to;
-        struct sptlrpc_flavor    cl_flvr_mgc;   /* fixed flavor of mgc->mgs */
-
-        /* the grant values are protected by loi_list_lock below */
-        long                     cl_dirty;         /* all _dirty_ in bytes */
-        long                     cl_dirty_max;     /* allowed w/o rpc */
-        long                     cl_dirty_transit; /* dirty synchronous */
-        long                     cl_avail_grant;   /* bytes of credit for ost */
-        long                     cl_lost_grant;    /* lost credits (trunc) */
+       struct rw_semaphore      cl_sem;
+       struct obd_uuid          cl_target_uuid;
+       struct obd_import       *cl_import; /* ptlrpc connection state */
+       size_t                   cl_conn_count;
+
+       /* Cache maximum and default values for easize. This is
+        * strictly a performance optimization to minimize calls to
+        * obd_size_diskmd(). The default values are used to calculate the
+        * initial size of a request buffer. The ptlrpc layer will resize the
+        * buffer as needed to accommodate a larger reply from the
+        * server. The default values should be small enough to avoid wasted
+        * memory and excessive use of vmalloc(), yet large enough to avoid
+        * reallocating the buffer in the common use case. */
+
+       /* Default EA size for striping attributes. It is initialized at
+        * mount-time based on the default stripe width of the filesystem,
+        * then it tracks the largest observed EA size advertised by
+        * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE. */
+       __u32                    cl_default_mds_easize;
+
+       /* Maximum possible EA size computed at mount-time based on
+        * the number of OSTs in the filesystem. May be increased at
+        * run-time if a larger observed size is advertised by the MDT. */
+       __u32                    cl_max_mds_easize;
+
+       enum lustre_sec_part     cl_sp_me;
+       enum lustre_sec_part     cl_sp_to;
+       struct sptlrpc_flavor    cl_flvr_mgc; /* fixed flavor of mgc->mgs */
+
+       /* the grant values are protected by loi_list_lock below */
+       unsigned long            cl_dirty_pages;      /* all _dirty_ in pages */
+       unsigned long            cl_dirty_max_pages;  /* allowed w/o rpc */
+       unsigned long            cl_dirty_transit;    /* dirty synchronous */
+       unsigned long            cl_avail_grant;   /* bytes of credit for ost */
+       unsigned long            cl_lost_grant;    /* lost credits (trunc) */
 
        /* since we allocate grant by blocks, we don't know how many grant will
         * be used to add a page into cache. As a solution, we reserve maximum
         * grant before trying to dirty a page and unreserve the rest.
         * See osc_{reserve|unreserve}_grant for details. */
-       long                 cl_reserved_grant;
-       cfs_list_t           cl_cache_waiters; /* waiting for cache/grant */
-       cfs_time_t           cl_next_shrink_grant;   /* jiffies */
-       cfs_list_t           cl_grant_shrink_list;  /* Timeout event list */
-       int                  cl_grant_shrink_interval; /* seconds */
+       long                    cl_reserved_grant;
+       struct list_head        cl_cache_waiters; /* waiting for cache/grant */
+       cfs_time_t              cl_next_shrink_grant;   /* jiffies */
+       struct list_head        cl_grant_shrink_list;  /* Timeout event list */
+       int                     cl_grant_shrink_interval; /* seconds */
 
        /* A chunk is an optimal size used by osc_extent to determine
         * the extent size. A chunk is max(PAGE_CACHE_SIZE, OST block size) */
-       int                  cl_chunkbits;
-       int                  cl_chunk;
-       int                  cl_extent_tax; /* extent overhead, by bytes */
+       int                     cl_chunkbits;
+       unsigned int            cl_extent_tax;  /* extent overhead, by bytes */
 
-        /* keep track of objects that have lois that contain pages which
-         * have been queued for async brw.  this lock also protects the
-         * lists of osc_client_pages that hang off of the loi */
+       /* keep track of objects that have lois that contain pages which
+        * have been queued for async brw.  this lock also protects the
+        * lists of osc_client_pages that hang off of the loi */
         /*
          * ->cl_loi_list_lock protects consistency of
          * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and
@@ -301,56 +223,57 @@ struct client_obd {
          * blocking everywhere, but we don't want to slow down fast-path of
          * our main platform.)
          *
-         * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together
-         * with client_obd_list_{un,}lock() and
-         * client_obd_list_lock_{init,done}() functions.
-        *
         * NB by Jinshan: though field names are still _loi_, but actually
         * osc_object{}s are in the list.
         */
-        client_obd_lock_t        cl_loi_list_lock;
-        cfs_list_t               cl_loi_ready_list;
-        cfs_list_t               cl_loi_hp_ready_list;
-        cfs_list_t               cl_loi_write_list;
-        cfs_list_t               cl_loi_read_list;
-        int                      cl_r_in_flight;
-        int                      cl_w_in_flight;
-        /* just a sum of the loi/lop pending numbers to be exported by /proc */
-       cfs_atomic_t             cl_pending_w_pages;
-       cfs_atomic_t             cl_pending_r_pages;
-       __u32                    cl_max_pages_per_rpc;
-        int                      cl_max_rpcs_in_flight;
-        struct obd_histogram     cl_read_rpc_hist;
-        struct obd_histogram     cl_write_rpc_hist;
-        struct obd_histogram     cl_read_page_hist;
-        struct obd_histogram     cl_write_page_hist;
-        struct obd_histogram     cl_read_offset_hist;
-        struct obd_histogram     cl_write_offset_hist;
+       spinlock_t              cl_loi_list_lock;
+       struct list_head        cl_loi_ready_list;
+       struct list_head        cl_loi_hp_ready_list;
+       struct list_head        cl_loi_write_list;
+       struct list_head        cl_loi_read_list;
+       __u32                   cl_r_in_flight;
+       __u32                   cl_w_in_flight;
+       /* just a sum of the loi/lop pending numbers to be exported by /proc */
+       atomic_t                cl_pending_w_pages;
+       atomic_t                cl_pending_r_pages;
+       __u32                   cl_max_pages_per_rpc;
+       __u32                   cl_max_rpcs_in_flight;
+       struct obd_histogram    cl_read_rpc_hist;
+       struct obd_histogram    cl_write_rpc_hist;
+       struct obd_histogram    cl_read_page_hist;
+       struct obd_histogram    cl_write_page_hist;
+       struct obd_histogram    cl_read_offset_hist;
+       struct obd_histogram    cl_write_offset_hist;
 
        /* lru for osc caching pages */
        struct cl_client_cache  *cl_cache;
-       cfs_list_t               cl_lru_osc; /* member of cl_cache->ccc_lru */
-       cfs_atomic_t            *cl_lru_left;
-       cfs_atomic_t             cl_lru_busy;
-       cfs_atomic_t             cl_lru_shrinkers;
-       cfs_atomic_t             cl_lru_in_list;
-       cfs_list_t               cl_lru_list; /* lru page list */
-       client_obd_lock_t        cl_lru_list_lock; /* page list protector */
-       cfs_atomic_t             cl_unstable_count;
-
-        /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
-        cfs_atomic_t             cl_destroy_in_flight;
-       wait_queue_head_t        cl_destroy_waitq;
+       struct list_head         cl_lru_osc; /* member of cl_cache->ccc_lru */
+       atomic_long_t           *cl_lru_left;
+       atomic_long_t            cl_lru_busy;
+       atomic_long_t            cl_lru_in_list;
+       atomic_long_t            cl_unstable_count;
+       struct list_head         cl_lru_list; /* lru page list */
+       spinlock_t               cl_lru_list_lock; /* page list protector */
+       atomic_t                 cl_lru_shrinkers;
+
+       /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
+       atomic_t                 cl_destroy_in_flight;
+       wait_queue_head_t        cl_destroy_waitq;
 
         struct mdc_rpc_lock     *cl_rpc_lock;
         struct mdc_rpc_lock     *cl_close_lock;
 
+       /* modify rpcs in flight
+        * currently used for metadata only */
+       spinlock_t               cl_mod_rpcs_lock;
+       __u16                    cl_max_mod_rpcs_in_flight;
+
         /* mgc datastruct */
-       struct semaphore         cl_mgc_sem;
+       struct mutex              cl_mgc_mutex;
        struct local_oid_storage *cl_mgc_los;
-       struct dt_object        *cl_mgc_configs_dir;
-        cfs_atomic_t             cl_mgc_refcount;
-        struct obd_export       *cl_mgc_mgsexp;
+       struct dt_object         *cl_mgc_configs_dir;
+       atomic_t                  cl_mgc_refcount;
+       struct obd_export        *cl_mgc_mgsexp;
 
         /* checksumming for data sent over the network */
         unsigned int             cl_checksum:1; /* 0 = disabled, 1 = enabled */
@@ -362,17 +285,10 @@ struct client_obd {
         /* also protected by the poorly named _loi_list_lock lock above */
         struct osc_async_rc      cl_ar;
 
-       /* used by quotacheck when the servers are older than 2.4 */
-       int                      cl_qchk_stat; /* quotacheck stat of the peer */
-#define CL_NOT_QUOTACHECKED 1   /* client->cl_qchk_stat init value */
-#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 7, 50, 0)
-#warning "please consider removing quotacheck compatibility code"
-#endif
-
-        /* sequence manager */
-        struct lu_client_seq    *cl_seq;
+       /* sequence manager */
+       struct lu_client_seq    *cl_seq;
 
-        cfs_atomic_t             cl_resends; /* resend count */
+       atomic_t             cl_resends; /* resend count */
 
        /* ptlrpc work for writeback in ptlrpcd context */
        void                    *cl_writeback_work;
@@ -383,17 +299,16 @@ struct client_obd {
 #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
 
 struct obd_id_info {
-        __u32   idx;
-        obd_id  *data;
+       u32      idx;
+       u64     *data;
 };
 
 struct echo_client_obd {
-       struct obd_export       *ec_exp;   /* the local connection to osc/lov */
+       struct obd_export      *ec_exp; /* the local connection to osc/lov */
        spinlock_t              ec_lock;
-        cfs_list_t           ec_objects;
-        cfs_list_t           ec_locks;
-        int                  ec_nstripes;
-        __u64                ec_unique;
+       struct list_head        ec_objects;
+       struct list_head        ec_locks;
+       __u64                   ec_unique;
 };
 
 /* Generic subset of OSTs */
@@ -409,7 +324,7 @@ struct ost_pool {
 #define OBD_STATFS_CACHE_SECONDS 1
 
 struct lov_tgt_desc {
-        cfs_list_t          ltd_kill;
+       struct list_head    ltd_kill;
         struct obd_uuid     ltd_uuid;
         struct obd_device  *ltd_obd;
         struct obd_export  *ltd_exp;
@@ -421,32 +336,33 @@ struct lov_tgt_desc {
 };
 
 struct lov_obd {
-        struct lov_desc         desc;
-        struct lov_tgt_desc   **lov_tgts;              /* sparse array */
-        struct ost_pool         lov_packed;            /* all OSTs in a packed
-                                                          array */
+       struct lov_desc         desc;
+       struct lov_tgt_desc   **lov_tgts;               /* sparse array */
+       struct ost_pool         lov_packed;             /* all OSTs in a packed
+                                                          array */
        struct mutex            lov_lock;
-        struct obd_connect_data lov_ocd;
-        cfs_atomic_t            lov_refcount;
-        __u32                   lov_death_row;/* tgts scheduled to be deleted */
-        __u32                   lov_tgt_size;   /* size of tgts array */
-        int                     lov_connects;
-        int                     lov_pool_count;
-        cfs_hash_t             *lov_pools_hash_body; /* used for key access */
-        cfs_list_t              lov_pool_list; /* used for sequential access */
-        cfs_proc_dir_entry_t   *lov_pool_proc_entry;
-        enum lustre_sec_part    lov_sp_me;
+       struct obd_connect_data lov_ocd;
+       struct proc_dir_entry  *targets_proc_entry;
+       atomic_t                lov_refcount;
+       __u32                   lov_death_row;  /* tgts scheduled to be deleted */
+       __u32                   lov_tgt_size;   /* size of tgts array */
+       int                     lov_connects;
+       int                     lov_pool_count;
+       cfs_hash_t             *lov_pools_hash_body; /* used for key access */
+       struct list_head        lov_pool_list;  /* used for sequential access */
+       struct proc_dir_entry  *lov_pool_proc_entry;
+       enum lustre_sec_part    lov_sp_me;
 
        /* Cached LRU and unstable data from upper layer */
-       void                   *lov_cache;
+       struct cl_client_cache *lov_cache;
 
-       struct rw_semaphore     lov_notify_lock;
+       struct rw_semaphore     lov_notify_lock;
 };
 
 struct lmv_tgt_desc {
        struct obd_uuid         ltd_uuid;
        struct obd_export       *ltd_exp;
-       int                     ltd_idx;
+       __u32                   ltd_idx;
        struct mutex            ltd_fid_mutex;
        unsigned long           ltd_active:1; /* target up for requests */
 };
@@ -468,15 +384,14 @@ struct lmv_obd {
        struct lmv_desc         desc;
        struct obd_uuid         cluuid;
        struct obd_export       *exp;
+       struct proc_dir_entry   *targets_proc_entry;
 
-       struct mutex            init_mutex;
+       struct mutex            lmv_init_mutex;
        int                     connected;
        int                     max_easize;
        int                     max_def_easize;
-       int                     max_cookiesize;
-       int                     server_timeout;
 
-       int                     tgts_size; /* size of tgts array */
+       __u32                   tgts_size; /* size of tgts array */
        struct lmv_tgt_desc     **tgts;
 
        struct obd_connect_data conn_data;
@@ -485,12 +400,11 @@ struct lmv_obd {
 struct niobuf_local {
        __u64           lnb_file_offset;
        __u32           lnb_page_offset;
-       __u32           len;
-       __u32           flags;
-       struct page     *page;
-       struct dentry   *dentry;
-       int             lnb_grant_used;
-       int             rc;
+       __u32           lnb_len;
+       __u32           lnb_flags;
+       struct page     *lnb_page;
+       void            *lnb_data;
+       int             lnb_rc;
 };
 
 #define LUSTRE_FLD_NAME         "fld"
@@ -528,119 +442,6 @@ struct niobuf_local {
 #define LUSTRE_MGS_OBDNAME "MGS"
 #define LUSTRE_MGC_OBDNAME "MGC"
 
-static inline int is_osp_on_mdt(char *name)
-{
-       char   *ptr;
-
-       ptr = strrchr(name, '-');
-       if (ptr == NULL) {
-               CERROR("%s is not a obdname\n", name);
-               return 0;
-       }
-
-       /* 1.8 OSC/OSP name on MDT is fsname-OSTxxxx-osc */
-       if (strncmp(ptr + 1, "osc", 3) == 0)
-               return 1;
-
-       if (strncmp(ptr + 1, "MDT", 3) != 0)
-               return 0;
-
-       while (*(--ptr) != '-' && ptr != name);
-
-       if (ptr == name)
-               return 0;
-
-       if (strncmp(ptr + 1, LUSTRE_OSP_NAME, strlen(LUSTRE_OSP_NAME)) != 0 &&
-           strncmp(ptr + 1, LUSTRE_OSC_NAME, strlen(LUSTRE_OSC_NAME)) != 0)
-               return 0;
-
-       return 1;
-}
-
-/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
-#define N_LOCAL_TEMP_PAGE 0x10000000
-
-struct obd_trans_info {
-        __u64                    oti_transno;
-        __u64                    oti_xid;
-        /* Only used on the server side for tracking acks. */
-        struct oti_req_ack_lock {
-                struct lustre_handle lock;
-                __u32                mode;
-        }                        oti_ack_locks[4];
-        void                    *oti_handle;
-        struct llog_cookie       oti_onecookie;
-        struct llog_cookie      *oti_logcookies;
-        int                      oti_numcookies;
-       /** synchronous write is needed */
-       unsigned long            oti_sync_write:1;
-
-        /* initial thread handling transaction */
-        struct ptlrpc_thread *   oti_thread;
-        __u32                    oti_conn_cnt;
-        /** VBR: versions */
-        __u64                    oti_pre_version;
-       /** JobID */
-       char                    *oti_jobid;
-
-        struct obd_uuid         *oti_ost_uuid;
-};
-
-static inline void oti_init(struct obd_trans_info *oti,
-                            struct ptlrpc_request *req)
-{
-        if (oti == NULL)
-                return;
-        memset(oti, 0, sizeof(*oti));
-
-        if (req == NULL)
-                return;
-
-        oti->oti_xid = req->rq_xid;
-        /** VBR: take versions from request */
-        if (req->rq_reqmsg != NULL &&
-            lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) {
-                __u64 *pre_version = lustre_msg_get_versions(req->rq_reqmsg);
-                oti->oti_pre_version = pre_version ? pre_version[0] : 0;
-                oti->oti_transno = lustre_msg_get_transno(req->rq_reqmsg);
-        }
-
-        /** called from mds_create_objects */
-        if (req->rq_repmsg != NULL)
-                oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
-        oti->oti_thread = req->rq_svc_thread;
-        if (req->rq_reqmsg != NULL)
-                oti->oti_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg);
-}
-
-static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
-{
-        if (!oti)
-                return;
-
-        if (num_cookies == 1)
-                oti->oti_logcookies = &oti->oti_onecookie;
-        else
-                OBD_ALLOC_LARGE(oti->oti_logcookies,
-                                num_cookies * sizeof(oti->oti_onecookie));
-
-        oti->oti_numcookies = num_cookies;
-}
-
-static inline void oti_free_cookies(struct obd_trans_info *oti)
-{
-        if (!oti || !oti->oti_logcookies)
-                return;
-
-        if (oti->oti_logcookies == &oti->oti_onecookie)
-                LASSERT(oti->oti_numcookies == 1);
-        else
-                OBD_FREE_LARGE(oti->oti_logcookies,
-                               oti->oti_numcookies*sizeof(oti->oti_onecookie));
-        oti->oti_logcookies = NULL;
-        oti->oti_numcookies = 0;
-}
-
 /*
  * Events signalled through obd_notify() upcall-chain.
  */
@@ -693,7 +494,6 @@ struct target_recovery_data {
 };
 
 struct obd_llog_group {
-       int                olg_seq;
        struct llog_ctxt   *olg_ctxts[LLOG_MAX_CTXTS];
        wait_queue_head_t  olg_waitq;
        spinlock_t         olg_lock;
@@ -702,7 +502,6 @@ struct obd_llog_group {
 
 /* corresponds to one of the obd's */
 #define OBD_DEVICE_MAGIC        0XAB5CD6EF
-#define OBD_DEV_BY_DEVNAME      0xffffd0de
 
 struct obd_device {
        struct obd_type         *obd_type;
@@ -729,13 +528,13 @@ struct obd_device {
                obd_starting:1,         /* started setup */
                obd_force:1,            /* cleanup with > 0 obd refcount */
                obd_fail:1,             /* cleanup with failover */
-               obd_async_recov:1,      /* allow asynchronous orphan cleanup */
                obd_no_conn:1,          /* deny new connections */
                obd_inactive:1,         /* device active/inactive
                                         * (for /proc/status only!!) */
                obd_no_ir:1,            /* no imperative recovery. */
                obd_process_conf:1,     /* device is processing mgs config */
-               obd_uses_nid_stats:1;   /* maintain per-client OBD stats */
+               obd_uses_nid_stats:1,   /* maintain per-client OBD stats */
+               obd_force_abort_recovery:1; /* abort recovery forcely */
 
         /* use separate field as it is set in interrupt to don't mess with
          * protection of other bits using _bh lock */
@@ -744,14 +543,15 @@ struct obd_device {
         cfs_hash_t             *obd_uuid_hash;
         /* nid-export hash body */
         cfs_hash_t             *obd_nid_hash;
-        /* nid stats body */
-        cfs_hash_t             *obd_nid_stats_hash;
-        cfs_list_t              obd_nid_stats;
-        cfs_atomic_t            obd_refcount;
-        cfs_list_t              obd_exports;
-        cfs_list_t              obd_unlinked_exports;
-        cfs_list_t              obd_delayed_exports;
-        int                     obd_num_exports;
+       /* nid stats body */
+       cfs_hash_t             *obd_nid_stats_hash;
+       struct list_head        obd_nid_stats;
+       atomic_t                obd_refcount;
+       struct list_head        obd_exports;
+       struct list_head        obd_unlinked_exports;
+       struct list_head        obd_delayed_exports;
+       struct list_head        obd_lwp_list;
+       int                     obd_num_exports;
        spinlock_t              obd_nid_lock;
        struct ldlm_namespace  *obd_namespace;
        struct ptlrpc_client    obd_ldlm_client; /* XXX OST/MDS only */
@@ -768,38 +568,40 @@ struct obd_device {
        struct rw_semaphore     obd_observer_link_sem;
         struct obd_notify_upcall obd_upcall;
         struct obd_export       *obd_self_export;
-        /* list of exports in LRU order, for ping evictor, with obd_dev_lock */
-        cfs_list_t              obd_exports_timed;
-        time_t                  obd_eviction_timer; /* for ping evictor */
-
-        int                              obd_max_recoverable_clients;
-        cfs_atomic_t                     obd_connected_clients;
-        int                              obd_stale_clients;
-        int                              obd_delayed_clients;
+       struct obd_export       *obd_lwp_export;
+       /* list of exports in LRU order, for ping evictor, with obd_dev_lock */
+       struct list_head        obd_exports_timed;
+       time_t                  obd_eviction_timer;     /* for ping evictor */
+
+       int                     obd_max_recoverable_clients;
+       atomic_t                obd_connected_clients;
+       int                     obd_stale_clients;
         /* this lock protects all recovery list_heads, timer and
          * obd_next_recovery_transno value */
-       spinlock_t                       obd_recovery_task_lock;
-        __u64                            obd_next_recovery_transno;
-       int                              obd_replayed_requests;
-       int                              obd_requests_queued_for_recovery;
-       wait_queue_head_t                obd_next_transno_waitq;
+       spinlock_t              obd_recovery_task_lock;
+       __u64                   obd_next_recovery_transno;
+       int                     obd_replayed_requests;
+       int                     obd_requests_queued_for_recovery;
+       wait_queue_head_t       obd_next_transno_waitq;
        /* protected by obd_recovery_task_lock */
-       struct timer_list                obd_recovery_timer;
-       time_t                           obd_recovery_start; /* seconds */
-       time_t                           obd_recovery_end; /* seconds, for lprocfs_status */
-       int                              obd_recovery_time_hard;
-       int                              obd_recovery_timeout;
-       int                              obd_recovery_ir_factor;
-
-        /* new recovery stuff from CMD2 */
-        struct target_recovery_data      obd_recovery_data;
-        int                              obd_replayed_locks;
-        cfs_atomic_t                     obd_req_replay_clients;
-        cfs_atomic_t                     obd_lock_replay_clients;
-        /* all lists are protected by obd_recovery_task_lock */
-        cfs_list_t                       obd_req_replay_queue;
-        cfs_list_t                       obd_lock_replay_queue;
-        cfs_list_t                       obd_final_req_queue;
+       struct timer_list       obd_recovery_timer;
+       /* seconds */
+       time_t                  obd_recovery_start;
+       /* seconds, for lprocfs_status */
+       time_t                  obd_recovery_end;
+       int                     obd_recovery_time_hard;
+       int                     obd_recovery_timeout;
+       int                     obd_recovery_ir_factor;
+
+       /* new recovery stuff from CMD2 */
+       struct target_recovery_data     obd_recovery_data;
+       int                             obd_replayed_locks;
+       atomic_t                        obd_req_replay_clients;
+       atomic_t                        obd_lock_replay_clients;
+       /* all lists are protected by obd_recovery_task_lock */
+       struct list_head                obd_req_replay_queue;
+       struct list_head                obd_lock_replay_queue;
+       struct list_head                obd_final_req_queue;
 
        union {
 #ifdef HAVE_SERVER_SUPPORT
@@ -822,20 +624,19 @@ struct obd_device {
 
        struct proc_dir_entry   *obd_proc_entry;
        struct proc_dir_entry   *obd_proc_exports_entry;
-       void                    *obd_proc_private;      /* type private PDEs */
        struct proc_dir_entry   *obd_svc_procroot;
        struct lprocfs_stats    *obd_svc_stats;
-       struct lprocfs_seq_vars *obd_vars;
-       cfs_atomic_t           obd_evict_inprogress;
-       wait_queue_head_t      obd_evict_inprogress_waitq;
-       cfs_list_t             obd_evict_list; /* protected with pet_lock */
+       struct lprocfs_vars     *obd_vars;
+       atomic_t                obd_evict_inprogress;
+       wait_queue_head_t       obd_evict_inprogress_waitq;
+       struct list_head        obd_evict_list; /* protected with pet_lock */
 
         /**
          * Ldlm pool part. Save last calculated SLV and Limit.
          */
        rwlock_t                obd_pool_lock;
-        int                    obd_pool_limit;
-        __u64                  obd_pool_slv;
+       int                     obd_pool_limit;
+       __u64                   obd_pool_slv;
 
         /**
          * A list of outstanding class_incref()'s against this obd. For
@@ -846,9 +647,6 @@ struct obd_device {
        int                    obd_conn_inprogress;
 };
 
-#define OBD_LLOG_FL_SENDNOW     0x0001
-#define OBD_LLOG_FL_EXIT       0x0002
-
 enum obd_cleanup_stage {
 /* Special case hack for MDS LOVs */
         OBD_CLEANUP_EARLY,
@@ -858,8 +656,6 @@ enum obd_cleanup_stage {
 
 /* get/set_info keys */
 #define KEY_ASYNC               "async"
-#define KEY_BLOCKSIZE_BITS      "blocksize_bits"
-#define KEY_BLOCKSIZE           "blocksize"
 #define KEY_CAPA_KEY            "capa_key"
 #define KEY_CHANGELOG_CLEAR     "changelog_clear"
 #define KEY_FID2PATH            "fid2path"
@@ -872,29 +668,21 @@ enum obd_cleanup_stage {
 #define KEY_GRANT_SHRINK        "grant_shrink"
 #define KEY_HSM_COPYTOOL_SEND   "hsm_send"
 #define KEY_INIT_RECOV_BACKUP   "init_recov_bk"
-#define KEY_INIT_RECOV          "initial_recov"
 #define KEY_INTERMDS            "inter_mds"
 #define KEY_LAST_ID             "last_id"
 #define KEY_LAST_FID           "last_fid"
-#define KEY_LOCK_TO_STRIPE      "lock_to_stripe"
-#define KEY_LOVDESC             "lovdesc"
-#define KEY_LOV_IDX             "lov_idx"
-#define KEY_MAX_EASIZE          "max_easize"
-#define KEY_MDS_CONN            "mds_conn"
+#define KEY_MAX_EASIZE         "max_easize"
+#define KEY_DEFAULT_EASIZE     "default_easize"
 #define KEY_MGSSEC              "mgssec"
-#define KEY_NEXT_ID             "next_id"
 #define KEY_READ_ONLY           "read-only"
 #define KEY_REGISTER_TARGET     "register_target"
 #define KEY_SET_FS              "set_fs"
 #define KEY_TGT_COUNT           "tgt_count"
 /*      KEY_SET_INFO in lustre_idl.h */
 #define KEY_SPTLRPC_CONF        "sptlrpc_conf"
-#define KEY_CONNECT_FLAG        "connect_flags"
-#define KEY_SYNC_LOCK_CANCEL    "sync_lock_cancel"
 
 #define KEY_CACHE_SET          "cache_set"
 #define KEY_CACHE_LRU_SHRINK   "cache_lru_shrink"
-#define KEY_CHANGELOG_INDEX    "changelog_index"
 #define KEY_OSP_CONNECTED      "osp_connected"
 
 struct lu_context;
@@ -934,16 +722,32 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
        return -EINVAL;
 }
 
+enum md_op_flags {
+       MF_MDC_CANCEL_FID1      = 1 << 0,
+       MF_MDC_CANCEL_FID2      = 1 << 1,
+       MF_MDC_CANCEL_FID3      = 1 << 2,
+       MF_MDC_CANCEL_FID4      = 1 << 3,
+       MF_GET_MDT_IDX          = 1 << 4,
+};
+
+enum md_cli_flags {
+       CLI_SET_MEA     = 1 << 0,
+       CLI_RM_ENTRY    = 1 << 1,
+       CLI_HASH64      = 1 << 2,
+       CLI_API32       = 1 << 3,
+       CLI_MIGRATE     = 1 << 4,
+};
+
 struct md_op_data {
         struct lu_fid           op_fid1; /* operation fid1 (usualy parent) */
         struct lu_fid           op_fid2; /* operation fid2 (usualy child) */
         struct lu_fid           op_fid3; /* 2 extra fids to find conflicting */
         struct lu_fid           op_fid4; /* to the operation locks. */
-        mdsno_t                 op_mds;  /* what mds server open will go to */
-        struct lustre_handle    op_handle;
-        obd_time                op_mod_time;
+       u32                     op_mds;  /* what mds server open will go to */
+       struct lustre_handle    op_handle;
+       s64                     op_mod_time;
         const char             *op_name;
-        int                     op_namelen;
+       size_t                  op_namelen;
         __u32                   op_mode;
         struct lmv_stripe_md   *op_mea1;
         struct lmv_stripe_md   *op_mea2;
@@ -952,18 +756,15 @@ struct md_op_data {
         __u32                   op_fsgid;
         cfs_cap_t               op_cap;
         void                   *op_data;
+       size_t                  op_data_size;
 
         /* iattr fields and blocks. */
-        struct iattr            op_attr;
-#ifdef __KERNEL__
-       unsigned int            op_attr_flags;
-#endif
-       __u64                   op_valid;
+       struct iattr            op_attr;
        loff_t                  op_attr_blocks;
+       unsigned int            op_attr_flags; /* LUSTRE_{SYNC,..}_FL */
+       __u64                   op_valid; /* OBD_MD_* */
 
-       /* Size-on-MDS epoch and flags. */
-       __u64                   op_ioepoch;
-       __u32                   op_flags;
+       enum md_op_flags        op_flags;
 
        /* Capa fields */
        struct obd_capa        *op_capa1;
@@ -972,26 +773,20 @@ struct md_op_data {
        /* Various operation flags. */
        enum mds_op_bias        op_bias;
 
-       /* Operation type */
-       __u32                   op_opc;
-
        /* Used by readdir */
-       __u64                   op_hash_offset;
-
-       /* Used by readdir */
-       __u32                   op_npages;
+       unsigned int            op_max_pages;
 
        /* used to transfer info between the stacks of MD client
         * see enum op_cli_flags */
-       __u32                   op_cli_flags;
+       enum md_cli_flags       op_cli_flags;
 
        /* File object data version for HSM release, on client */
        __u64                   op_data_version;
        struct lustre_handle    op_lease_handle;
-};
 
-#define op_stripe_offset       op_ioepoch
-#define op_max_pages           op_valid
+       /* default stripe offset */
+       __u32                   op_default_stripe_offset;
+};
 
 struct md_callback {
        int (*md_blocking_ast)(struct ldlm_lock *lock,
@@ -1006,34 +801,29 @@ typedef int (* md_enqueue_cb_t)(struct ptlrpc_request *req,
                                 int rc);
 
 struct md_enqueue_info {
-        struct md_op_data       mi_data;
-        struct lookup_intent    mi_it;
-        struct lustre_handle    mi_lockh;
-        struct inode           *mi_dir;
-        md_enqueue_cb_t         mi_cb;
-        __u64                   mi_cbdata;
-        unsigned int            mi_generation;
+       struct md_op_data       mi_data;
+       struct lookup_intent    mi_it;
+       struct lustre_handle    mi_lockh;
+       struct inode           *mi_dir;
+       md_enqueue_cb_t         mi_cb;
+       void                   *mi_cbdata;
 };
 
 struct obd_ops {
        struct module *o_owner;
        int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
-                          void *karg, void *uarg);
-        int (*o_get_info)(const struct lu_env *env, struct obd_export *,
-                          __u32 keylen, void *key, __u32 *vallen, void *val,
-                          struct lov_stripe_md *lsm);
-        int (*o_set_info_async)(const struct lu_env *, struct obd_export *,
-                                __u32 keylen, void *key,
-                                __u32 vallen, void *val,
-                                struct ptlrpc_request_set *set);
-        int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
-        int (*o_detach)(struct obd_device *dev);
+                          void *karg, void __user *uarg);
+       int (*o_get_info)(const struct lu_env *env, struct obd_export *,
+                         __u32 keylen, void *key, __u32 *vallen, void *val);
+       int (*o_set_info_async)(const struct lu_env *, struct obd_export *,
+                               __u32 keylen, void *key,
+                               __u32 vallen, void *val,
+                               struct ptlrpc_request_set *set);
         int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg);
         int (*o_precleanup)(struct obd_device *dev,
                             enum obd_cleanup_stage cleanup_stage);
         int (*o_cleanup)(struct obd_device *dev);
-        int (*o_process_config)(struct obd_device *dev, obd_count len,
-                                void *data);
+       int (*o_process_config)(struct obd_device *dev, size_t len, void *data);
         int (*o_postrecov)(struct obd_device *dev);
         int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid,
                           int priority);
@@ -1058,9 +848,9 @@ struct obd_ops {
                          struct obd_export *exp, enum lu_cli_type type);
        int (*o_fid_fini)(struct obd_device *obd);
 
-        /* Allocate new fid according to passed @hint. */
-        int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid,
-                           struct md_op_data *op_data);
+       /* Allocate new fid according to passed @hint. */
+       int (*o_fid_alloc)(const struct lu_env *env, struct obd_export *exp,
+                          struct lu_fid *fid, struct md_op_data *op_data);
 
         /*
          * Object with @fid is getting deleted, we may want to do something
@@ -1070,92 +860,27 @@ struct obd_ops {
                         struct obd_statfs *osfs, __u64 max_age, __u32 flags);
         int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo,
                               __u64 max_age, struct ptlrpc_request_set *set);
-        int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
-                        struct lov_stripe_md *mem_src);
         int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt,
                           struct lov_mds_md *disk_src, int disk_len);
-        int (*o_preallocate)(struct lustre_handle *, obd_count *req,
-                             obd_id *ids);
-        /* FIXME: add fid capability support for create & destroy! */
-        int (*o_precreate)(struct obd_export *exp);
-        int (*o_create)(const struct lu_env *env, struct obd_export *exp,
-                        struct obdo *oa, struct lov_stripe_md **ea,
-                        struct obd_trans_info *oti);
-        int (*o_create_async)(struct obd_export *exp,  struct obd_info *oinfo,
-                              struct lov_stripe_md **ea,
-                              struct obd_trans_info *oti);
-        int (*o_destroy)(const struct lu_env *env, struct obd_export *exp,
-                         struct obdo *oa, struct lov_stripe_md *ea,
-                         struct obd_trans_info *oti, struct obd_export *md_exp,
-                         void *capa);
-        int (*o_setattr)(const struct lu_env *, struct obd_export *exp,
-                         struct obd_info *oinfo, struct obd_trans_info *oti);
-        int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
-                               struct obd_trans_info *oti,
-                               struct ptlrpc_request_set *rqset);
-        int (*o_getattr)(const struct lu_env *env, struct obd_export *exp,
-                         struct obd_info *oinfo);
-        int (*o_getattr_async)(struct obd_export *exp, struct obd_info *oinfo,
-                               struct ptlrpc_request_set *set);
-        int (*o_brw)(int rw, struct obd_export *exp, struct obd_info *oinfo,
-                     obd_count oa_bufs, struct brw_page *pgarr,
-                     struct obd_trans_info *oti);
-        int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm,
-                           struct ost_lvb *lvb, int kms_only);
-        int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
-                            obd_off size, int shrink);
-        int (*o_punch)(const struct lu_env *, struct obd_export *exp,
-                       struct obd_info *oinfo, struct obd_trans_info *oti,
-                       struct ptlrpc_request_set *rqset);
-        int (*o_sync)(const struct lu_env *env, struct obd_export *exp,
-                      struct obd_info *oinfo, obd_size start, obd_size end,
-                      struct ptlrpc_request_set *set);
-        int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
-                         struct lov_stripe_md *src, obd_size start,
-                         obd_size end, struct obd_trans_info *oti);
-        int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
-                      struct lustre_handle *srconn, struct lov_stripe_md *src,
-                      obd_size start, obd_size end, struct obd_trans_info *);
-        int (*o_iterate)(struct lustre_handle *conn,
-                         int (*)(obd_id, obd_seq, void *),
-                         obd_id *startid, obd_seq seq, void *data);
-        int (*o_preprw)(const struct lu_env *env, int cmd,
-                        struct obd_export *exp, struct obdo *oa, int objcount,
-                        struct obd_ioobj *obj, struct niobuf_remote *remote,
-                        int *nr_pages, struct niobuf_local *local,
-                        struct obd_trans_info *oti, struct lustre_capa *capa);
-        int (*o_commitrw)(const struct lu_env *env, int cmd,
-                          struct obd_export *exp, struct obdo *oa,
-                          int objcount, struct obd_ioobj *obj,
-                          struct niobuf_remote *remote, int pages,
-                          struct niobuf_local *local,
-                          struct obd_trans_info *oti, int rc);
-        int (*o_enqueue)(struct obd_export *, struct obd_info *oinfo,
-                         struct ldlm_enqueue_info *einfo,
-                         struct ptlrpc_request_set *rqset);
-        int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *,
-                               ldlm_iterator_t it, void *data);
-        int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *,
-                             ldlm_iterator_t it, void *data);
-        int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md,
-                        __u32 mode, struct lustre_handle *);
-        int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *,
-                               ldlm_cancel_flags_t flags, void *opaque);
+       int (*o_create)(const struct lu_env *env, struct obd_export *exp,
+                       struct obdo *oa);
+       int (*o_destroy)(const struct lu_env *env, struct obd_export *exp,
+                        struct obdo *oa);
+       int (*o_setattr)(const struct lu_env *, struct obd_export *exp,
+                        struct obdo *oa);
+       int (*o_getattr)(const struct lu_env *env, struct obd_export *exp,
+                        struct obdo *oa);
+       int (*o_preprw)(const struct lu_env *env, int cmd,
+                       struct obd_export *exp, struct obdo *oa, int objcount,
+                       struct obd_ioobj *obj, struct niobuf_remote *remote,
+                       int *nr_pages, struct niobuf_local *local);
+       int (*o_commitrw)(const struct lu_env *env, int cmd,
+                         struct obd_export *exp, struct obdo *oa,
+                         int objcount, struct obd_ioobj *obj,
+                         struct niobuf_remote *remote, int pages,
+                         struct niobuf_local *local, int rc);
         int (*o_init_export)(struct obd_export *exp);
         int (*o_destroy_export)(struct obd_export *exp);
-        int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *,
-                             int cmd, obd_off *);
-
-        /* llog related obd_methods */
-        int (*o_llog_init)(struct obd_device *obd, struct obd_llog_group *grp,
-                           struct obd_device *disk_obd, int *idx);
-        int (*o_llog_finish)(struct obd_device *obd, int count);
-        int (*o_llog_connect)(struct obd_export *, struct llogd_conn_body *);
-
-        /* metadata-only methods */
-        int (*o_pin)(struct obd_export *, const struct lu_fid *fid,
-                     struct obd_capa *, struct obd_client_handle *, int flag);
-        int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int);
 
         int (*o_import_event)(struct obd_device *, struct obd_import *,
                               enum obd_import_event);
@@ -1167,8 +892,6 @@ struct obd_ops {
         struct obd_uuid *(*o_get_uuid) (struct obd_export *exp);
 
         /* quota methods */
-        int (*o_quotacheck)(struct obd_device *, struct obd_export *,
-                            struct obd_quotactl *);
         int (*o_quotactl)(struct obd_device *, struct obd_export *,
                           struct obd_quotactl *);
 
@@ -1189,23 +912,7 @@ struct obd_ops {
          * Also, add a wrapper function in include/linux/obd_class.h. */
 };
 
-enum {
-        LUSTRE_OPC_MKDIR    = (1 << 0),
-        LUSTRE_OPC_SYMLINK  = (1 << 1),
-        LUSTRE_OPC_MKNOD    = (1 << 2),
-        LUSTRE_OPC_CREATE   = (1 << 3),
-        LUSTRE_OPC_ANY      = (1 << 4)
-};
-
 /* lmv structures */
-#define MEA_MAGIC_LAST_CHAR      0xb2221ca1
-#define MEA_MAGIC_ALL_CHARS      0xb222a11c
-#define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
-
-#define MAX_HASH_SIZE_32         0x7fffffffUL
-#define MAX_HASH_SIZE            0x7fffffffffffffffULL
-#define MAX_HASH_HIGHEST_BIT     0x1000000000000000ULL
-
 struct lustre_md {
        struct mdt_body         *body;
        struct lov_stripe_md    *lsm;
@@ -1216,18 +923,29 @@ struct lustre_md {
        struct mdt_remote_perm  *remote_perm;
        struct obd_capa         *mds_capa;
        struct obd_capa         *oss_capa;
-       __u64                   lm_flags;
 };
 
 struct md_open_data {
        struct obd_client_handle        *mod_och;
        struct ptlrpc_request           *mod_open_req;
        struct ptlrpc_request           *mod_close_req;
-       cfs_atomic_t                     mod_refcount;
+       atomic_t                         mod_refcount;
        bool                             mod_is_create;
 };
 
+struct obd_client_handle {
+       struct lustre_handle     och_fh;
+       struct lu_fid            och_fid;
+       struct md_open_data     *och_mod;
+       struct lustre_handle     och_lease_handle; /* open lock for lease */
+       __u32                    och_magic;
+       int                      och_flags;
+};
+
+#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
+
 struct lookup_intent;
+struct cl_attr;
 
 struct md_ops {
        /* Every operation from MD_STATS_FIRST_OP up to and including
@@ -1246,19 +964,19 @@ struct md_ops {
                       struct md_open_data *, struct ptlrpc_request **);
 
        int (*m_create)(struct obd_export *, struct md_op_data *,
-                       const void *, int, int, __u32, __u32, cfs_cap_t,
-                       __u64, struct ptlrpc_request **);
+                       const void *, size_t, umode_t, uid_t, gid_t,
+                       cfs_cap_t, __u64, struct ptlrpc_request **);
 
        int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
+                        const union ldlm_policy_data *,
                         struct lookup_intent *, struct md_op_data *,
-                        struct lustre_handle *, void *, int,
-                        struct ptlrpc_request **, __u64);
+                        struct lustre_handle *, __u64);
 
        int (*m_getattr)(struct obd_export *, struct md_op_data *,
                         struct ptlrpc_request **);
 
        int (*m_intent_lock)(struct obd_export *, struct md_op_data *,
-                            void *, int, struct lookup_intent *, int,
+                            struct lookup_intent *,
                             struct ptlrpc_request **,
                             ldlm_blocking_callback, __u64);
 
@@ -1266,32 +984,29 @@ struct md_ops {
                      struct ptlrpc_request **);
 
        int (*m_rename)(struct obd_export *, struct md_op_data *,
-                       const char *, int, const char *, int,
+                       const char *, size_t, const char *, size_t,
                        struct ptlrpc_request **);
 
        int (*m_setattr)(struct obd_export *, struct md_op_data *, void *,
-                        int , void *, int, struct ptlrpc_request **,
-                        struct md_open_data **mod);
+                        size_t , struct ptlrpc_request **);
 
        int (*m_fsync)(struct obd_export *, const struct lu_fid *,
                       struct obd_capa *, struct ptlrpc_request **);
 
-       int (*m_readpage)(struct obd_export *, struct md_op_data *,
-                         struct page **, struct ptlrpc_request **);
-
-       int (*m_read_entry)(struct obd_export *, struct md_op_data *,
-                           struct md_callback *cb_op, struct lu_dirent **ld);
+       int (*m_read_page)(struct obd_export *, struct md_op_data *,
+                          struct md_callback *cb_op, __u64 hash_offset,
+                          struct page **ppage);
 
        int (*m_unlink)(struct obd_export *, struct md_op_data *,
                        struct ptlrpc_request **);
 
        int (*m_setxattr)(struct obd_export *, const struct lu_fid *,
-                         struct obd_capa *, obd_valid, const char *,
+                         struct obd_capa *, u64, const char *,
                          const char *, int, int, int, __u32,
                          struct ptlrpc_request **);
 
        int (*m_getxattr)(struct obd_export *, const struct lu_fid *,
-                         struct obd_capa *, obd_valid, const char *,
+                         struct obd_capa *, u64, const char *,
                          const char *, int, int, int,
                          struct ptlrpc_request **);
 
@@ -1301,6 +1016,7 @@ struct md_ops {
 
         int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *,
                                  struct lu_fid *, __u64 *bits);
+
 #define MD_STATS_LAST_OP m_revalidate_lock
 
        int (*m_getstatus)(struct obd_export *, struct lu_fid *,
@@ -1311,17 +1027,10 @@ struct md_ops {
        int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *,
                             ldlm_iterator_t, void *);
 
-       int (*m_done_writing)(struct obd_export *, struct md_op_data  *,
-                             struct md_open_data *);
-
        int (*m_getattr_name)(struct obd_export *, struct md_op_data *,
                              struct ptlrpc_request **);
 
-       int (*m_is_subdir)(struct obd_export *, const struct lu_fid *,
-                          const struct lu_fid *,
-                          struct ptlrpc_request **);
-
-       int (*m_init_ea_size)(struct obd_export *, int, int, int);
+       int (*m_init_ea_size)(struct obd_export *, __u32, __u32);
 
        int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
                               struct obd_export *, struct obd_export *,
@@ -1329,6 +1038,10 @@ struct md_ops {
 
        int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *);
 
+       int (*m_merge_attr)(struct obd_export *,
+                           const struct lmv_stripe_md *lsm,
+                           struct cl_attr *attr, ldlm_blocking_callback);
+
        int (*m_set_open_replay_data)(struct obd_export *,
                                      struct obd_client_handle *,
                                      struct lookup_intent *);
@@ -1356,74 +1069,41 @@ struct md_ops {
        int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *,
                                 struct obd_capa *, __u32,
                                 struct ptlrpc_request **);
-};
 
-struct lsm_operations {
-        void (*lsm_free)(struct lov_stripe_md *);
-        int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa,
-                           struct obd_export *md_exp);
-        void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *,
-                                    obd_off *);
-        void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *,
-                                     obd_off *);
-        int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes,
-                               __u16 *stripe_count);
-        int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm,
-                             struct lov_mds_md *lmm);
+       int (*m_get_fid_from_lsm)(struct obd_export *,
+                                 const struct lmv_stripe_md *,
+                                 const char *name, int namelen,
+                                 struct lu_fid *fid);
 };
 
-extern const struct lsm_operations lsm_v1_ops;
-extern const struct lsm_operations lsm_v3_ops;
-static inline const struct lsm_operations *lsm_op_find(int magic)
-{
-        switch(magic) {
-        case LOV_MAGIC_V1:
-               return &lsm_v1_ops;
-        case LOV_MAGIC_V3:
-               return &lsm_v3_ops;
-        default:
-               CERROR("Cannot recognize lsm_magic %08x\n", magic);
-               return NULL;
-        }
-}
-
-/* Requests for obd_extent_calc() */
-#define OBD_CALC_STRIPE_START   1
-#define OBD_CALC_STRIPE_END     2
-
-static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo)
-{
-        return oinfo->oi_capa;
-}
-
 static inline struct md_open_data *obd_mod_alloc(void)
 {
-        struct md_open_data *mod;
-        OBD_ALLOC_PTR(mod);
-        if (mod == NULL)
-                return NULL;
-        cfs_atomic_set(&mod->mod_refcount, 1);
-        return mod;
+       struct md_open_data *mod;
+       OBD_ALLOC_PTR(mod);
+       if (mod == NULL)
+               return NULL;
+       atomic_set(&mod->mod_refcount, 1);
+       return mod;
 }
 
-#define obd_mod_get(mod) cfs_atomic_inc(&(mod)->mod_refcount)
+#define obd_mod_get(mod) atomic_inc(&(mod)->mod_refcount)
 #define obd_mod_put(mod)                                          \
 ({                                                                \
-       if (cfs_atomic_dec_and_test(&(mod)->mod_refcount)) {      \
+       if (atomic_dec_and_test(&(mod)->mod_refcount)) {          \
                if ((mod)->mod_open_req)                          \
                        ptlrpc_req_finished((mod)->mod_open_req); \
                OBD_FREE_PTR(mod);                                \
        }                                                         \
 })
 
-void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
+void obdo_from_inode(struct obdo *dst, struct inode *src, u64 valid);
 void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent);
 
 /* return 1 if client should be resend request */
 static inline int client_should_resend(int resend, struct client_obd *cli)
 {
-        return cfs_atomic_read(&cli->cl_resends) ?
-               cfs_atomic_read(&cli->cl_resends) > resend : 1;
+       return atomic_read(&cli->cl_resends) ?
+              atomic_read(&cli->cl_resends) > resend : 1;
 }
 
 /**
@@ -1438,7 +1118,8 @@ static inline const char *lu_dev_name(const struct lu_device *lu_dev)
         return lu_dev->ld_obd->obd_name;
 }
 
-static inline bool filename_is_volatile(const char *name, int namelen, int *idx)
+static inline bool filename_is_volatile(const char *name, size_t namelen,
+                                       int *idx)
 {
        const char      *start;
        char            *end;
@@ -1463,7 +1144,7 @@ static inline bool filename_is_volatile(const char *name, int namelen, int *idx)
        }
        /* we have an idx, read it */
        start = name + LUSTRE_VOLATILE_HDR_LEN + 1;
-       *idx = strtoul(start, &end, 16);
+       *idx = simple_strtoul(start, &end, 16);
        /* error cases:
         * no digit, no trailing :, negative value
         */
@@ -1486,4 +1167,27 @@ static inline int cli_brw_size(struct obd_device *obd)
        return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
 }
 
+/* when RPC size or the max RPCs in flight is increased, the max dirty pages
+ * of the client should be increased accordingly to avoid sending fragmented
+ * RPCs over the network when the client runs out of the maximum dirty space
+ * when so many RPCs are being generated.
+ */
+static inline void client_adjust_max_dirty(struct client_obd *cli)
+{
+        /* initializing */
+       if (cli->cl_dirty_max_pages <= 0)
+               cli->cl_dirty_max_pages = (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024)
+                                                       >> PAGE_CACHE_SHIFT;
+       else {
+               unsigned long dirty_max = cli->cl_max_rpcs_in_flight *
+                                         cli->cl_max_pages_per_rpc;
+
+               if (dirty_max > cli->cl_dirty_max_pages)
+                       cli->cl_dirty_max_pages = dirty_max;
+       }
+
+       if (cli->cl_dirty_max_pages > totalram_pages / 8)
+               cli->cl_dirty_max_pages = totalram_pages / 8;
+}
+
 #endif /* __OBD_H */