Whamcloud - gitweb
LU-819 utils: Fix lfs getstripe -M
[fs/lustre-release.git] / lustre / include / obd.h
index 2c7da99..870c844 100644 (file)
@@ -30,6 +30,9 @@
  * Use is subject to license terms.
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
@@ -54,9 +57,6 @@
 
 #define IOC_MDC_TYPE         'i'
 #define IOC_MDC_MIN_NR       20
-/* Moved to lustre_user.h
-#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_ioctl_data *)
-#define IOC_MDC_GETSTRIPE    _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */
 #define IOC_MDC_MAX_NR       50
 
 #include <lustre/lustre_idl.h>
@@ -132,17 +132,20 @@ struct lov_stripe_md {
         cfs_spinlock_t   lsm_lock;
         pid_t            lsm_lock_owner; /* debugging */
 
+        /* maximum possible file size, might change as OSTs status changes,
+         * e.g. disconnected, deactivated */
+        __u64            lsm_maxbytes;
         struct {
                 /* Public members. */
                 __u64 lw_object_id;        /* lov object id */
                 __u64 lw_object_seq;       /* lov object seq */
-                __u64 lw_maxbytes;         /* maximum possible file size */
 
                 /* LOV-private members start here -- only for use in lov/. */
                 __u32 lw_magic;
                 __u32 lw_stripe_size;      /* size of the stripe */
                 __u32 lw_pattern;          /* striping pattern (RAID0, RAID1) */
-                unsigned lw_stripe_count;  /* number of objects being striped over */
+                __u16 lw_stripe_count;  /* number of objects being striped over */
+                __u16 lw_layout_gen;       /* generation of the layout */
                 char  lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */
         } lsm_wire;
 
@@ -151,8 +154,8 @@ struct lov_stripe_md {
 
 #define lsm_object_id    lsm_wire.lw_object_id
 #define lsm_object_seq   lsm_wire.lw_object_seq
-#define lsm_maxbytes     lsm_wire.lw_maxbytes
 #define lsm_magic        lsm_wire.lw_magic
+#define lsm_layout_gen   lsm_wire.lw_layout_gen
 #define lsm_stripe_size  lsm_wire.lw_stripe_size
 #define lsm_pattern      lsm_wire.lw_pattern
 #define lsm_stripe_count lsm_wire.lw_stripe_count
@@ -172,6 +175,7 @@ struct obd_info {
            - while lock handling, the flags obtained on the enqueue
            request are set here.
            - while stats, the flags used for control delay/resend.
+           - while setattr, the flags used for distinguish punch operation
          */
         int                     oi_flags;
         /* Lock handle specific for every OSC lock. */
@@ -203,6 +207,57 @@ static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1,
         return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire);
 }
 
+static inline int lov_lum_lsm_cmp(struct lov_user_md *lum,
+                                  struct lov_stripe_md  *lsm)
+{
+        if (lsm->lsm_magic != lum->lmm_magic)
+                return 1;
+        if ((lsm->lsm_stripe_count != 0) && (lum->lmm_stripe_count != 0) &&
+            (lsm->lsm_stripe_count != lum->lmm_stripe_count))
+                return 2;
+        if ((lsm->lsm_stripe_size != 0) && (lum->lmm_stripe_size != 0) &&
+            (lsm->lsm_stripe_size != lum->lmm_stripe_size))
+                return 3;
+        if ((lsm->lsm_pattern != 0) && (lum->lmm_pattern != 0) &&
+            (lsm->lsm_pattern != lum->lmm_pattern))
+                return 4;
+        if ((lsm->lsm_magic == LOV_MAGIC_V3) &&
+            (strncmp(lsm->lsm_pool_name,
+                     ((struct lov_user_md_v3 *)lum)->lmm_pool_name,
+                     LOV_MAXPOOLNAME) != 0))
+                return 5;
+        return 0;
+}
+
+static inline int lov_lum_swab_if_needed(struct lov_user_md_v3 *lumv3,
+                                         int *lmm_magic,
+                                         struct lov_user_md *lum)
+{
+        if (lum && cfs_copy_from_user(lumv3, lum,sizeof(struct lov_user_md_v1)))
+                return -EFAULT;
+
+        *lmm_magic = lumv3->lmm_magic;
+
+        if (*lmm_magic == __swab32(LOV_USER_MAGIC_V1)) {
+                lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lumv3);
+                *lmm_magic = LOV_USER_MAGIC_V1;
+        } else if (*lmm_magic == LOV_USER_MAGIC_V3) {
+                if (lum && cfs_copy_from_user(lumv3, lum, sizeof(*lumv3)))
+                        return -EFAULT;
+        } else if (*lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
+                if (lum && cfs_copy_from_user(lumv3, lum, sizeof(*lumv3)))
+                        return -EFAULT;
+                lustre_swab_lov_user_md_v3(lumv3);
+                *lmm_magic = LOV_USER_MAGIC_V3;
+        } else if (*lmm_magic != LOV_USER_MAGIC_V1) {
+                CDEBUG(D_IOCTL,
+                       "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
+                       *lmm_magic, LOV_USER_MAGIC_V1, LOV_USER_MAGIC_V3);
+                       return -EINVAL;
+        }
+        return 0;
+}
+
 void lov_stripe_lock(struct lov_stripe_md *md);
 void lov_stripe_unlock(struct lov_stripe_md *md);
 
@@ -228,10 +283,15 @@ struct brw_page {
 
 struct ost_server_data;
 
+struct osd_properties {
+        size_t osd_max_ea_size;
+};
+
 #define OBT_MAGIC       0xBDDECEAE
 /* hold common fields for "target" device */
 struct obd_device_target {
         __u32                     obt_magic;
+        __u32                     obt_instance;
         struct super_block       *obt_sb;
         /** last_rcvd file */
         struct file              *obt_rcvd_filp;
@@ -243,6 +303,7 @@ struct obd_device_target {
         cfs_rw_semaphore_t        obt_rwsem;
         struct vfsmount          *obt_vfsmnt;
         struct file              *obt_health_check_filp;
+        struct osd_properties    obt_osd_properties;
 };
 
 /* llog contexts */
@@ -303,7 +364,8 @@ struct filter_obd {
         int                  fo_tot_granted_clients;
 
         obd_size             fo_readcache_max_filesize;
-        int                  fo_read_cache:1,   /**< enable read-only cache */
+        cfs_spinlock_t       fo_flags_lock;
+        unsigned int         fo_read_cache:1,   /**< enable read-only cache */
                              fo_writethrough_cache:1,/**< read cache writes */
                              fo_mds_ost_sync:1, /**< MDS-OST orphan recovery*/
                              fo_raid_degraded:1;/**< RAID device degraded */
@@ -461,7 +523,6 @@ struct client_obd {
         cfs_waitq_t              cl_destroy_waitq;
 
         struct mdc_rpc_lock     *cl_rpc_lock;
-        struct mdc_rpc_lock     *cl_setattr_lock;
         struct mdc_rpc_lock     *cl_close_lock;
         struct osc_creator       cl_oscc;
 
@@ -489,20 +550,24 @@ struct client_obd {
         struct lu_client_seq    *cl_seq;
 
         cfs_atomic_t             cl_resends; /* resend count */
+
+        /* ptlrpc work for writeback in ptlrpcd context */
+        void                    *cl_writeback_work;
 };
 #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
 
 #define CL_NOT_QUOTACHECKED 1   /* client->cl_qchk_stat init value */
 
 struct mgs_obd {
+        struct obd_device_target         mgs_obt;
         struct ptlrpc_service           *mgs_service;
         struct vfsmount                 *mgs_vfsmnt;
         struct super_block              *mgs_sb;
         struct dentry                   *mgs_configs_dir;
-        struct dentry                   *mgs_fid_de;
         cfs_list_t                       mgs_fs_db_list;
         cfs_semaphore_t                  mgs_sem;
         cfs_proc_dir_entry_t            *mgs_proc_live;
+        cfs_time_t                       mgs_start_time;
 };
 
 struct mds_obd {
@@ -511,7 +576,6 @@ struct mds_obd {
         struct ptlrpc_service           *mds_service;
         struct ptlrpc_service           *mds_setattr_service;
         struct ptlrpc_service           *mds_readpage_service;
-        cfs_dentry_t                    *mds_fid_de;
         int                              mds_max_mdsize;
         int                              mds_max_cookiesize;
         __u64                            mds_io_epoch;
@@ -580,11 +644,12 @@ struct obd_id_info {
 /* */
 
 struct echo_obd {
-        struct obdo          eo_oa;
-        cfs_spinlock_t       eo_lock;
-        __u64                eo_lastino;
-        struct lustre_handle eo_nl_lock;
-        cfs_atomic_t         eo_prep;
+        struct obd_device_target eo_obt;
+        struct obdo              eo_oa;
+        cfs_spinlock_t           eo_lock;
+        __u64                    eo_lastino;
+        struct lustre_handle     eo_nl_lock;
+        cfs_atomic_t             eo_prep;
 };
 
 struct ost_obd {
@@ -865,8 +930,8 @@ static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
         if (num_cookies == 1)
                 oti->oti_logcookies = &oti->oti_onecookie;
         else
-                OBD_ALLOC(oti->oti_logcookies,
-                          num_cookies * sizeof(oti->oti_onecookie));
+                OBD_ALLOC_LARGE(oti->oti_logcookies,
+                                num_cookies * sizeof(oti->oti_onecookie));
 
         oti->oti_numcookies = num_cookies;
 }
@@ -879,8 +944,8 @@ static inline void oti_free_cookies(struct obd_trans_info *oti)
         if (oti->oti_logcookies == &oti->oti_onecookie)
                 LASSERT(oti->oti_numcookies == 1);
         else
-                OBD_FREE(oti->oti_logcookies,
-                         oti->oti_numcookies * sizeof(oti->oti_onecookie));
+                OBD_FREE_LARGE(oti->oti_logcookies,
+                               oti->oti_numcookies*sizeof(oti->oti_onecookie));
         oti->oti_logcookies = NULL;
         oti->oti_numcookies = 0;
 }
@@ -889,6 +954,8 @@ static inline void oti_free_cookies(struct obd_trans_info *oti)
  * Events signalled through obd_notify() upcall-chain.
  */
 enum obd_notify_event {
+        /* target added */
+        OBD_NOTIFY_CREATE,
         /* Device connect start */
         OBD_NOTIFY_CONNECT,
         /* Device activated */
@@ -905,7 +972,10 @@ enum obd_notify_event {
         /* Configuration event */
         OBD_NOTIFY_CONFIG,
         /* Trigger quota recovery */
-        OBD_NOTIFY_QUOTA
+        OBD_NOTIFY_QUOTA,
+        /* Administratively deactivate/activate event */
+        OBD_NOTIFY_DEACTIVATE,
+        OBD_NOTIFY_ACTIVATE
 };
 
 /* bit-mask flags for config events */
@@ -976,9 +1046,9 @@ struct obd_llog_group {
 };
 
 /* corresponds to one of the obd's */
-#define MAX_OBD_NAME 128
 #define OBD_DEVICE_MAGIC        0XAB5CD6EF
 #define OBD_DEV_BY_DEVNAME      0xffffd0de
+
 struct obd_device {
         struct obd_type        *obd_type;
         __u32                   obd_magic;
@@ -990,16 +1060,15 @@ struct obd_device {
         struct lu_device       *obd_lu_dev;
 
         int                     obd_minor;
+        /* bitfield modification is protected by obd_dev_lock */
         unsigned long obd_attached:1,      /* finished attach */
                       obd_set_up:1,        /* finished setup */
                       obd_recovering:1,    /* there are recoverable clients */
                       obd_abort_recovery:1,/* recovery expired */
                       obd_version_recov:1, /* obd uses version checking */
-                      obd_recovery_expired:1,
                       obd_replayable:1,    /* recovery is enabled; inform clients */
                       obd_no_transno:1,    /* no committed-transno notification */
                       obd_no_recov:1,      /* fail instead of retry messages */
-                      obd_req_replaying:1, /* replaying requests */
                       obd_stopping:1,      /* started cleanup */
                       obd_starting:1,      /* started setup */
                       obd_force:1,         /* cleanup with > 0 obd refcount */
@@ -1008,7 +1077,11 @@ struct obd_device {
                       obd_no_conn:1,       /* deny new connections */
                       obd_inactive:1,      /* device active/inactive
                                            * (for /proc/status only!!) */
+                      obd_no_ir:1,         /* no imperative recovery. */
                       obd_process_conf:1;  /* device is processing mgs config */
+        /* use separate field as it is set in interrupt to don't mess with
+         * protection of other bits using _bh lock */
+        unsigned long obd_recovery_expired:1;
         /* uuid-export hash body */
         cfs_hash_t             *obd_uuid_hash;
         /* nid-export hash body */
@@ -1026,7 +1099,7 @@ struct obd_device {
         struct ldlm_namespace  *obd_namespace;
         struct ptlrpc_client    obd_ldlm_client; /* XXX OST/MDS only */
         /* a spinlock is OK for what we do now, may need a semaphore later */
-        cfs_spinlock_t          obd_dev_lock;
+        cfs_spinlock_t          obd_dev_lock; /* protects obd bitfield above */
         cfs_semaphore_t         obd_dev_sem;
         __u64                   obd_last_committed;
         struct fsfilt_operations *obd_fsops;
@@ -1044,25 +1117,30 @@ struct obd_device {
         time_t                  obd_eviction_timer; /* for ping evictor */
 
         int                              obd_max_recoverable_clients;
-        int                              obd_connected_clients;
+        cfs_atomic_t                     obd_connected_clients;
         int                              obd_stale_clients;
         int                              obd_delayed_clients;
-        cfs_spinlock_t                   obd_processing_task_lock; /* BH lock (timer) */
+        /* this lock protects all recovery list_heads, timer and
+         * obd_next_recovery_transno value */
+        cfs_spinlock_t                   obd_recovery_task_lock;
         __u64                            obd_next_recovery_transno;
         int                              obd_replayed_requests;
         int                              obd_requests_queued_for_recovery;
         cfs_waitq_t                      obd_next_transno_waitq;
+        /* protected by obd_recovery_task_lock */
         cfs_timer_t                      obd_recovery_timer;
         time_t                           obd_recovery_start; /* seconds */
         time_t                           obd_recovery_end; /* seconds, for lprocfs_status */
-        time_t                           obd_recovery_time_hard;
+        int                              obd_recovery_time_hard;
         int                              obd_recovery_timeout;
+        int                              obd_recovery_ir_factor;
 
         /* new recovery stuff from CMD2 */
         struct target_recovery_data      obd_recovery_data;
         int                              obd_replayed_locks;
         cfs_atomic_t                     obd_req_replay_clients;
         cfs_atomic_t                     obd_lock_replay_clients;
+        /* all lists are protected by obd_recovery_task_lock */
         cfs_list_t                       obd_req_replay_queue;
         cfs_list_t                       obd_lock_replay_queue;
         cfs_list_t                       obd_final_req_queue;
@@ -1148,20 +1226,34 @@ enum obd_cleanup_stage {
 #define KEY_REGISTER_TARGET     "register_target"
 #define KEY_REVIMP_UPD          "revimp_update"
 #define KEY_SET_FS              "set_fs"
+#define KEY_TGT_COUNT           "tgt_count"
 /*      KEY_SET_INFO in lustre_idl.h */
 #define KEY_SPTLRPC_CONF        "sptlrpc_conf"
 #define KEY_CONNECT_FLAG        "connect_flags"
 #define KEY_SYNC_LOCK_CANCEL    "sync_lock_cancel"
 
-
 struct lu_context;
 
+/* /!\ must be coherent with include/linux/namei.h on patched kernel */
+#define IT_OPEN     (1 << 0)
+#define IT_CREAT    (1 << 1)
+#define IT_READDIR  (1 << 2)
+#define IT_GETATTR  (1 << 3)
+#define IT_LOOKUP   (1 << 4)
+#define IT_UNLINK   (1 << 5)
+#define IT_TRUNC    (1 << 6)
+#define IT_GETXATTR (1 << 7)
+#define IT_EXEC     (1 << 8)
+#define IT_PIN      (1 << 9)
+#define IT_LAYOUT   (1 << 10)
+
 static inline int it_to_lock_mode(struct lookup_intent *it)
 {
         /* CREAT needs to be tested before open (both could be set) */
         if (it->it_op & IT_CREAT)
                 return LCK_CW;
-        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
+        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP |
+                              IT_LAYOUT))
                 return LCK_CR;
 
         LASSERTF(0, "Invalid it_op: %d\n", it->it_op);
@@ -1175,7 +1267,7 @@ struct md_op_data {
         struct lu_fid           op_fid4; /* to the operation locks. */
         mdsno_t                 op_mds;  /* what mds server open will go to */
         struct lustre_handle    op_handle;
-        __u64                   op_mod_time;
+        obd_time                op_mod_time;
         const char             *op_name;
         int                     op_namelen;
         __u32                   op_mode;
@@ -1222,11 +1314,10 @@ struct md_enqueue_info {
         struct md_op_data       mi_data;
         struct lookup_intent    mi_it;
         struct lustre_handle    mi_lockh;
-        struct dentry          *mi_dentry;
         struct inode           *mi_dir;
         md_enqueue_cb_t         mi_cb;
+        __u64                   mi_cbdata;
         unsigned int            mi_generation;
-        void                   *mi_cbdata;
 };
 
 struct obd_ops {
@@ -1317,9 +1408,9 @@ struct obd_ops {
         int (*o_punch)(struct obd_export *exp, struct obd_info *oinfo,
                        struct obd_trans_info *oti,
                        struct ptlrpc_request_set *rqset);
-        int (*o_sync)(struct obd_export *exp, struct obdo *oa,
-                      struct lov_stripe_md *ea, obd_size start, obd_size end,
-                      void *capa);
+        int (*o_sync)(struct obd_export *exp, struct obd_info *oinfo,
+                      obd_size start, obd_size end,
+                      struct ptlrpc_request_set *set);
         int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
                          struct lov_stripe_md *src, obd_size start,
                          obd_size end, struct obd_trans_info *oti);
@@ -1383,7 +1474,8 @@ struct obd_ops {
                           struct obd_quotactl *);
         int (*o_quota_adjust_qunit)(struct obd_export *exp,
                                     struct quota_adjust_qunit *oqaq,
-                                    struct lustre_quota_ctxt *qctxt);
+                                    struct lustre_quota_ctxt *qctxt,
+                                    struct ptlrpc_request_set *rqset);
 
 
         int (*o_ping)(struct obd_export *exp);
@@ -1481,8 +1573,8 @@ struct md_ops {
         int (*m_sync)(struct obd_export *, const struct lu_fid *,
                       struct obd_capa *, struct ptlrpc_request **);
         int (*m_readpage)(struct obd_export *, const struct lu_fid *,
-                          struct obd_capa *, __u64, struct page *,
-                          struct ptlrpc_request **);
+                          struct obd_capa *, __u64, struct page **,
+                          unsigned, struct ptlrpc_request **);
 
         int (*m_unlink)(struct obd_export *, struct md_op_data *,
                         struct ptlrpc_request **);
@@ -1510,7 +1602,7 @@ struct md_ops {
                                       struct ptlrpc_request *);
         int (*m_clear_open_replay_data)(struct obd_export *,
                                         struct obd_client_handle *);
-        int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u32 *);
+        int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *);
 
         ldlm_mode_t (*m_lock_match)(struct obd_export *, int,
                                     const struct lu_fid *, ldlm_type_t,
@@ -1534,7 +1626,7 @@ struct md_ops {
                                       struct ldlm_enqueue_info *);
 
         int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *,
-                                 struct lu_fid *, __u32 *);
+                                 struct lu_fid *, __u64 *bits);
 
         /*
          * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
@@ -1552,7 +1644,7 @@ struct lsm_operations {
         void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *,
                                      obd_off *);
         int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes,
-                               int *stripe_count);
+                               __u16 *stripe_count);
         int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm,
                              struct lov_mds_md *lmm);
 };
@@ -1639,4 +1731,23 @@ static inline struct md_open_data *obd_mod_alloc(void)
 extern void obdo_from_inode(struct obdo *dst, struct inode *src,
                             struct lu_fid *parent, obd_flag valid);
 
+/* return 1 if client should be resend request */
+static inline int client_should_resend(int resend, struct client_obd *cli)
+{
+        return cfs_atomic_read(&cli->cl_resends) ?
+               cfs_atomic_read(&cli->cl_resends) > resend : 1;
+}
+
+/**
+ * Return device name for this device
+ *
+ * XXX: lu_device is declared before obd_device, while a pointer pointing
+ * back to obd_device in lu_device, so this helper function defines here
+ * instead of in lu_object.h
+ */
+static inline const char *lu_dev_name(const struct lu_device *lu_dev)
+{
+        return lu_dev->ld_obd->obd_name;
+}
+
 #endif /* __OBD_H */