X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Finclude%2Fobd.h;h=bb561509d05a039fe9d1dc35c4181944b2efd4d5;hb=dfbc98ead8c357255909aafa7f81b3448337dde2;hp=81cd3d83e589547660c215e03d9dcebea7409cee;hpb=665e36b780faa2144cecccd29a0d8a8196a76903;p=fs%2Flustre-release.git diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 81cd3d8..bb56150 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -26,7 +26,7 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* @@ -54,19 +54,16 @@ #define IOC_MDC_TYPE 'i' #define IOC_MDC_MIN_NR 20 -/* Moved to lustre_user.h -#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_ioctl_data *) -#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */ #define IOC_MDC_MAX_NR 50 #include -#include +#include +#include #include #include #include #include #include -#include #include @@ -75,9 +72,9 @@ /* this is really local to the OSC */ struct loi_oap_pages { - struct list_head lop_pending; - struct list_head lop_urgent; - struct list_head lop_pending_group; + cfs_list_t lop_pending; + cfs_list_t lop_urgent; + cfs_list_t lop_pending_group; int lop_num_pending; }; @@ -88,24 +85,31 @@ struct osc_async_rc { }; struct lov_oinfo { /* per-stripe data structure */ - __u64 loi_id; /* object ID on the target OST */ - __u64 loi_gr; /* object group on the target OST */ + struct ost_id loi_oi; /* object ID/Sequence on the target OST */ int loi_ost_idx; /* OST stripe index in lov_tgt_desc->tgts */ int loi_ost_gen; /* generation of this loi_ost_idx */ /* used by the osc to keep track of what objects to build into rpcs */ struct loi_oap_pages loi_read_lop; struct loi_oap_pages loi_write_lop; - /* _cli_ is poorly named, it should be _ready_ */ - struct list_head loi_cli_item; - struct list_head loi_write_item; - struct list_head loi_read_item; + cfs_list_t loi_ready_item; + cfs_list_t loi_hp_ready_item; + cfs_list_t loi_write_item; + cfs_list_t loi_read_item; unsigned long loi_kms_valid:1; __u64 loi_kms; /* known minimum size */ struct ost_lvb loi_lvb; struct osc_async_rc loi_ar; }; +#define loi_id loi_oi.oi_id +#define loi_seq loi_oi.oi_seq + +static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms) +{ + oinfo->loi_kms = kms; + oinfo->loi_kms_valid = 1; +} static inline void loi_init(struct lov_oinfo *loi) { @@ -115,34 +119,20 @@ static inline void loi_init(struct lov_oinfo *loi) CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending); CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_urgent); CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending_group); - CFS_INIT_LIST_HEAD(&loi->loi_cli_item); + CFS_INIT_LIST_HEAD(&loi->loi_ready_item); + CFS_INIT_LIST_HEAD(&loi->loi_hp_ready_item); CFS_INIT_LIST_HEAD(&loi->loi_write_item); CFS_INIT_LIST_HEAD(&loi->loi_read_item); } -/*extent array item for describing the joined file extent info*/ -struct lov_extent { - __u64 le_start; /* extent start */ - __u64 le_len; /* extent length */ - int le_loi_idx; /* extent #1 loi's index in lsm loi array */ - int le_stripe_count; /* extent stripe count*/ -}; - -/*Lov array info for describing joined file array EA info*/ -struct lov_array_info { - struct llog_logid lai_array_id; /* MDS med llog object id */ - unsigned lai_ext_count; /* number of extent count */ - struct lov_extent *lai_ext_array; /* extent desc array */ -}; - struct lov_stripe_md { - spinlock_t lsm_lock; + cfs_spinlock_t lsm_lock; pid_t lsm_lock_owner; /* debugging */ struct { /* Public members. */ __u64 lw_object_id; /* lov object id */ - __u64 lw_object_gr; /* lov object group */ + __u64 lw_object_seq; /* lov object seq */ __u64 lw_maxbytes; /* maximum possible file size */ /* LOV-private members start here -- only for use in lov/. */ @@ -150,15 +140,14 @@ struct lov_stripe_md { __u32 lw_stripe_size; /* size of the stripe */ __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */ unsigned lw_stripe_count; /* number of objects being striped over */ - char lw_pool_name[MAXPOOLNAME]; /* pool name */ + char lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */ } lsm_wire; - struct lov_array_info *lsm_array; /*Only for joined file array info*/ struct lov_oinfo *lsm_oinfo[0]; }; #define lsm_object_id lsm_wire.lw_object_id -#define lsm_object_gr lsm_wire.lw_object_gr +#define lsm_object_seq lsm_wire.lw_object_seq #define lsm_maxbytes lsm_wire.lw_maxbytes #define lsm_magic lsm_wire.lw_magic #define lsm_stripe_size lsm_wire.lw_stripe_size @@ -168,7 +157,7 @@ struct lov_stripe_md { struct obd_info; -typedef int (*obd_enqueue_update_f)(struct obd_info *oinfo, int rc); +typedef int (*obd_enqueue_update_f)(void *cookie, int rc); /* obd info for a particular level (lov, osc). */ struct obd_info { @@ -215,14 +204,14 @@ void lov_stripe_lock(struct lov_stripe_md *md); void lov_stripe_unlock(struct lov_stripe_md *md); struct obd_type { - struct list_head typ_chain; + cfs_list_t typ_chain; struct obd_ops *typ_dt_ops; struct md_ops *typ_md_ops; cfs_proc_dir_entry_t *typ_procroot; char *typ_name; int typ_refcnt; struct lu_device_type *typ_lu; - spinlock_t obd_type_lock; + cfs_spinlock_t obd_type_lock; }; struct brw_page { @@ -232,68 +221,27 @@ struct brw_page { obd_flag flag; }; -enum async_flags { - ASYNC_READY = 0x1, /* ap_make_ready will not be called before this - page is added to an rpc */ - ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */ - ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called - to give the caller a chance to update - or cancel the size of the io */ - ASYNC_GROUP_SYNC = 0x8, /* ap_completion will not be called, instead - the page is accounted for in the - obd_io_group given to - obd_queue_group_io */ -}; - -struct obd_async_page_ops { - int (*ap_make_ready)(void *data, int cmd); - int (*ap_refresh_count)(void *data, int cmd); - void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa); - void (*ap_update_obdo)(void *data, int cmd, struct obdo *oa, - obd_valid valid); - int (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); - struct obd_capa *(*ap_lookup_capa)(void *data, int cmd); -}; - -/* the `oig' is passed down from a caller of obd rw methods. the callee - * records enough state such that the caller can sleep on the oig and - * be woken when all the callees have finished their work */ -struct obd_io_group { - spinlock_t oig_lock; - atomic_t oig_refcount; - int oig_pending; - int oig_rc; - struct list_head oig_occ_list; - cfs_waitq_t oig_waitq; -}; - -/* the oig callback context lets the callee of obd rw methods register - * for callbacks from the caller. */ -struct oig_callback_context { - struct list_head occ_oig_item; - /* called when the caller has received a signal while sleeping. - * callees of this method are encouraged to abort their state - * in the oig. This may be called multiple times. */ - void (*occ_interrupted)(struct oig_callback_context *occ); - unsigned long interrupted:1; -}; - /* Individual type definitions */ struct ost_server_data; +#define OBT_MAGIC 0xBDDECEAE /* hold common fields for "target" device */ struct obd_device_target { + __u32 obt_magic; struct super_block *obt_sb; - atomic_t obt_quotachecking; + /** last_rcvd file */ + struct file *obt_rcvd_filp; + struct lu_target *obt_lut; + __u64 obt_mount_count; + cfs_semaphore_t obt_quotachecking; struct lustre_quota_ctxt obt_qctxt; + lustre_quota_version_t obt_qfmt; + cfs_rw_semaphore_t obt_rwsem; + struct vfsmount *obt_vfsmnt; + struct file *obt_health_check_filp; }; -typedef void (*obd_pin_extent_cb)(void *data); -typedef int (*obd_page_removal_cb_t)(void *data, int discard); -typedef int (*obd_lock_cancel_cb)(struct ldlm_lock *,struct ldlm_lock_desc *, - void *, int); - /* llog contexts */ enum llog_ctxt_id { LLOG_CONFIG_ORIG_CTXT = 0, @@ -308,6 +256,9 @@ enum llog_ctxt_id { LLOG_TEST_REPL_CTXT, LLOG_LOVEA_ORIG_CTXT, LLOG_LOVEA_REPL_CTXT, + LLOG_CHANGELOG_ORIG_CTXT, /**< changelog generation on mdd */ + LLOG_CHANGELOG_REPL_CTXT, /**< changelog access on clients */ + LLOG_CHANGELOG_USER_ORIG_CTXT, /**< for multiple changelog consumers */ LLOG_MAX_CTXTS }; @@ -327,35 +278,32 @@ struct filter_obd { /* NB this field MUST be first */ struct obd_device_target fo_obt; const char *fo_fstype; - struct vfsmount *fo_vfsmnt; int fo_group_count; cfs_dentry_t *fo_dentry_O; cfs_dentry_t **fo_dentry_O_groups; struct filter_subdirs *fo_dentry_O_sub; - struct semaphore fo_init_lock; /* group initialization lock */ + cfs_semaphore_t fo_init_lock; /* group initialization lock */ int fo_committed_group; - - spinlock_t fo_objidlock; /* protect fo_lastobjid */ - spinlock_t fo_translock; /* protect fsd_last_transno */ - struct file *fo_rcvd_filp; - struct file *fo_health_check_filp; - struct lr_server_data *fo_fsd; - unsigned long *fo_last_rcvd_slots; - __u64 fo_mount_count; + cfs_spinlock_t fo_objidlock; /* protect fo_lastobjid */ unsigned long fo_destroys_in_progress; - struct semaphore fo_create_locks[FILTER_SUBDIR_COUNT]; + cfs_semaphore_t fo_create_locks[FILTER_SUBDIR_COUNT]; - struct list_head fo_export_list; + cfs_list_t fo_export_list; int fo_subdir_count; obd_size fo_tot_dirty; /* protected by obd_osfs_lock */ obd_size fo_tot_granted; /* all values in bytes */ obd_size fo_tot_pending; + int fo_tot_granted_clients; obd_size fo_readcache_max_filesize; + int fo_read_cache:1, /**< enable read-only cache */ + fo_writethrough_cache:1,/**< read cache writes */ + fo_mds_ost_sync:1, /**< MDS-OST orphan recovery*/ + fo_raid_degraded:1;/**< RAID device degraded */ struct obd_import *fo_mdc_imp; struct obd_uuid fo_mdc_uuid; @@ -364,10 +312,10 @@ struct filter_obd { __u64 *fo_last_objids; /* last created objid for groups, * protected by fo_objidlock */ - struct semaphore fo_alloc_lock; + cfs_semaphore_t fo_alloc_lock; - atomic_t fo_r_in_flight; - atomic_t fo_w_in_flight; + cfs_atomic_t fo_r_in_flight; + cfs_atomic_t fo_w_in_flight; /* * per-filter pool of kiobuf's allocated by filter_common_setup() and @@ -384,26 +332,39 @@ struct filter_obd { struct filter_iobuf **fo_iobuf_pool; int fo_iobuf_count; - struct list_head fo_llog_list; - spinlock_t fo_llog_list_lock; + cfs_list_t fo_llog_list; + cfs_spinlock_t fo_llog_list_lock; struct brw_stats fo_filter_stats; struct lustre_quota_ctxt fo_quota_ctxt; - spinlock_t fo_quotacheck_lock; - atomic_t fo_quotachecking; + cfs_spinlock_t fo_quotacheck_lock; + cfs_atomic_t fo_quotachecking; int fo_fmd_max_num; /* per exp filter_mod_data */ int fo_fmd_max_age; /* jiffies to fmd expiry */ + unsigned long fo_syncjournal:1, /* sync journal on writes */ + fo_sync_lock_cancel:2;/* sync on lock cancel */ + /* sptlrpc stuff */ - rwlock_t fo_sptlrpc_lock; + cfs_rwlock_t fo_sptlrpc_lock; struct sptlrpc_rule_set fo_sptlrpc_rset; /* capability related */ unsigned int fo_fl_oss_capa; - struct list_head fo_capa_keys; - struct hlist_head *fo_capa_hash; + cfs_list_t fo_capa_keys; + cfs_hlist_head_t *fo_capa_hash; struct llog_commit_master *fo_lcm; + int fo_sec_level; +}; + +struct timeout_item { + enum timeout_event ti_event; + cfs_time_t ti_timeout; + timeout_cb_t ti_cb; + void *ti_cb_data; + cfs_list_t ti_obd_list; + cfs_list_t ti_chain; }; #define OSC_MAX_RIF_DEFAULT 8 @@ -412,14 +373,21 @@ struct filter_obd { #define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */ #define OSC_DEFAULT_RESENDS 10 +/* possible values for fo_sync_lock_cancel */ +enum { + NEVER_SYNC_ON_CANCEL = 0, + BLOCKING_SYNC_ON_CANCEL = 1, + ALWAYS_SYNC_ON_CANCEL = 2, + NUM_SYNC_ON_CANCEL_STATES +}; + #define MDC_MAX_RIF_DEFAULT 8 #define MDC_MAX_RIF_MAX 512 struct mdc_rpc_lock; struct obd_import; -struct lustre_cache; struct client_obd { - struct rw_semaphore cl_sem; + cfs_rw_semaphore_t cl_sem; struct obd_uuid cl_target_uuid; struct obd_import *cl_import; /* ptlrpc connection state */ int cl_conn_count; @@ -429,9 +397,9 @@ struct client_obd { int cl_max_mds_easize; int cl_max_mds_cookiesize; - /* security configuration */ - struct sptlrpc_rule_set cl_sptlrpc_rset; - enum lustre_sec_part cl_sec_part; + enum lustre_sec_part cl_sp_me; + enum lustre_sec_part cl_sp_to; + struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */ void *cl_llcd_offset; @@ -439,9 +407,14 @@ struct client_obd { /* the grant values are protected by loi_list_lock below */ long cl_dirty; /* all _dirty_ in bytes */ long cl_dirty_max; /* allowed w/o rpc */ + long cl_dirty_transit; /* dirty synchronous */ long cl_avail_grant; /* bytes of credit for ost */ long cl_lost_grant; /* lost credits (trunc) */ - struct list_head cl_cache_waiters; /* waiting for cache/grant */ + cfs_list_t cl_cache_waiters; /* waiting for cache/grant */ + cfs_time_t cl_next_shrink_grant; /* jiffies */ + cfs_list_t cl_grant_shrink_list; /* Timeout event list */ + cfs_semaphore_t cl_grant_sem; /*grant shrink list cfs_semaphore*/ + int cl_grant_shrink_interval; /* seconds */ /* keep track of objects that have lois that contain pages which * have been queued for async brw. this lock also protects the @@ -462,9 +435,10 @@ struct client_obd { * client_obd_list_lock_{init,done}() functions. */ client_obd_lock_t cl_loi_list_lock; - struct list_head cl_loi_ready_list; - struct list_head cl_loi_write_list; - struct list_head cl_loi_read_list; + cfs_list_t cl_loi_ready_list; + cfs_list_t cl_loi_hp_ready_list; + cfs_list_t cl_loi_write_list; + cfs_list_t cl_loi_read_list; int cl_r_in_flight; int cl_w_in_flight; /* just a sum of the loi/lop pending numbers to be exported by /proc */ @@ -480,7 +454,7 @@ struct client_obd { struct obd_histogram cl_write_offset_hist; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ - atomic_t cl_destroy_in_flight; + cfs_atomic_t cl_destroy_in_flight; cfs_waitq_t cl_destroy_waitq; struct mdc_rpc_lock *cl_rpc_lock; @@ -489,10 +463,10 @@ struct client_obd { struct osc_creator cl_oscc; /* mgc datastruct */ - struct semaphore cl_mgc_sem; + cfs_semaphore_t cl_mgc_sem; struct vfsmount *cl_mgc_vfsmnt; struct dentry *cl_mgc_configs_dir; - atomic_t cl_mgc_refcount; + cfs_atomic_t cl_mgc_refcount; struct obd_export *cl_mgc_mgsexp; /* checksumming for data sent over the network */ @@ -501,7 +475,7 @@ struct client_obd { __u32 cl_supp_cksum_types; /* checksum algorithm to be used */ cksum_type_t cl_cksum_type; - + /* also protected by the poorly named _loi_list_lock lock above */ struct osc_async_rc cl_ar; @@ -511,24 +485,21 @@ struct client_obd { /* sequence manager */ struct lu_client_seq *cl_seq; - atomic_t cl_resends; /* resend count */ - - /* Cache of triples */ - struct lustre_cache *cl_cache; - obd_lock_cancel_cb cl_ext_lock_cancel_cb; + cfs_atomic_t cl_resends; /* resend count */ }; #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) #define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */ struct mgs_obd { + struct obd_device_target mgs_obt; struct ptlrpc_service *mgs_service; struct vfsmount *mgs_vfsmnt; struct super_block *mgs_sb; struct dentry *mgs_configs_dir; struct dentry *mgs_fid_de; - struct list_head mgs_fs_db_list; - struct semaphore mgs_sem; + cfs_list_t mgs_fs_db_list; + cfs_semaphore_t mgs_sem; cfs_proc_dir_entry_t *mgs_proc_live; }; @@ -538,58 +509,53 @@ struct mds_obd { struct ptlrpc_service *mds_service; struct ptlrpc_service *mds_setattr_service; struct ptlrpc_service *mds_readpage_service; - struct vfsmount *mds_vfsmnt; cfs_dentry_t *mds_fid_de; int mds_max_mdsize; int mds_max_cookiesize; - struct file *mds_rcvd_filp; - spinlock_t mds_transno_lock; - __u64 mds_last_transno; - __u64 mds_mount_count; __u64 mds_io_epoch; unsigned long mds_atime_diff; - struct semaphore mds_epoch_sem; + cfs_semaphore_t mds_epoch_sem; struct ll_fid mds_rootfid; - struct lr_server_data *mds_server_data; cfs_dentry_t *mds_pending_dir; cfs_dentry_t *mds_logs_dir; cfs_dentry_t *mds_objects_dir; struct llog_handle *mds_cfg_llh; - struct obd_device *mds_osc_obd; /* XXX lov_obd */ + struct obd_device *mds_lov_obd; struct obd_uuid mds_lov_uuid; char *mds_profile; - struct obd_export *mds_osc_exp; /* XXX lov_exp */ + struct obd_export *mds_lov_exp; struct lov_desc mds_lov_desc; __u32 mds_id; /* mark pages dirty for write. */ - bitmap_t *mds_lov_page_dirty; + cfs_bitmap_t *mds_lov_page_dirty; /* array for store pages with obd_id */ - void **mds_lov_page_array; + void **mds_lov_page_array; /* file for store objid */ struct file *mds_lov_objid_filp; __u32 mds_lov_objid_count; + __u32 mds_lov_objid_max_index; __u32 mds_lov_objid_lastpage; __u32 mds_lov_objid_lastidx; - struct file *mds_health_check_filp; - unsigned long *mds_client_bitmap; -// struct upcall_cache *mds_group_hash; struct lustre_quota_info mds_quota_info; - struct semaphore mds_qonoff_sem; - struct semaphore mds_health_sem; + cfs_rw_semaphore_t mds_qonoff_sem; + cfs_semaphore_t mds_health_sem; unsigned long mds_fl_user_xattr:1, mds_fl_acl:1, mds_evict_ost_nids:1, mds_fl_cfglog:1, - mds_fl_synced:1; + mds_fl_synced:1, + mds_quota:1, + mds_fl_target:1; /* mds have one or + * more targets */ struct upcall_cache *mds_identity_cache; /* for capability keys update */ struct lustre_capa_key *mds_capa_keys; - struct rw_semaphore mds_notify_lock; + cfs_rw_semaphore_t mds_notify_lock; }; /* lov objid */ @@ -613,34 +579,36 @@ struct obd_id_info { struct echo_obd { struct obdo eo_oa; - spinlock_t eo_lock; + cfs_spinlock_t eo_lock; __u64 eo_lastino; struct lustre_handle eo_nl_lock; - atomic_t eo_prep; + cfs_atomic_t eo_prep; }; struct ost_obd { struct ptlrpc_service *ost_service; struct ptlrpc_service *ost_create_service; struct ptlrpc_service *ost_io_service; - struct semaphore ost_health_sem; + cfs_semaphore_t ost_health_sem; }; struct echo_client_obd { struct obd_export *ec_exp; /* the local connection to osc/lov */ - spinlock_t ec_lock; - struct list_head ec_objects; + cfs_spinlock_t ec_lock; + cfs_list_t ec_objects; + cfs_list_t ec_locks; int ec_nstripes; __u64 ec_unique; }; struct lov_qos_oss { struct obd_uuid lqo_uuid; /* ptlrpc's c_remote_uuid */ - struct list_head lqo_oss_list; /* link to lov_qos */ - __u32 lqo_ost_count; /* number of osts on this oss */ + cfs_list_t lqo_oss_list; /* link to lov_qos */ __u64 lqo_bavail; /* total bytes avail on OSS */ __u64 lqo_penalty; /* current penalty */ - __u64 lqo_penalty_per_obj; /* penalty decrease every obj*/ + __u64 lqo_penalty_per_obj;/* penalty decrease every obj*/ + time_t lqo_used; /* last used time, seconds */ + __u32 lqo_ost_count; /* number of osts on this oss */ }; struct ltd_qos { @@ -648,16 +616,17 @@ struct ltd_qos { __u64 ltq_penalty; /* current penalty */ __u64 ltq_penalty_per_obj; /* penalty decrease every obj*/ __u64 ltq_weight; /* net weighting */ + time_t ltq_used; /* last used time, seconds */ unsigned int ltq_usable:1; /* usable for striping */ }; /* Generic subset of OSTs */ struct ost_pool { - __u32 *op_array; /* array of index of - lov_obd->lov_tgts */ - unsigned int op_count; /* number of OSTs in the array */ - unsigned int op_size; /* allocated size of lp_array */ - rwlock_t op_rwlock; /* to protect lov_pool use */ + __u32 *op_array; /* array of index of + lov_obd->lov_tgts */ + unsigned int op_count; /* number of OSTs in the array */ + unsigned int op_size; /* allocated size of lp_array */ + cfs_rw_semaphore_t op_rw_sem; /* to protect ost_pool use */ }; /* Round-robin allocator data */ @@ -669,27 +638,43 @@ struct lov_qos_rr { unsigned long lqr_dirty:1; /* recalc round-robin list */ }; +/* allow statfs data caching for 1 second */ +#define OBD_STATFS_CACHE_SECONDS 1 + +struct lov_statfs_data { + struct obd_info lsd_oi; + struct obd_statfs lsd_statfs; +}; /* Stripe placement optimization */ struct lov_qos { - struct list_head lq_oss_list; /* list of OSSs that targets use */ - struct rw_semaphore lq_rw_sem; + cfs_list_t lq_oss_list; /* list of OSSs that targets use */ + cfs_rw_semaphore_t lq_rw_sem; __u32 lq_active_oss_count; unsigned int lq_prio_free; /* priority for free space */ + unsigned int lq_threshold_rr;/* priority for rr */ struct lov_qos_rr lq_rr; /* round robin qos data */ unsigned long lq_dirty:1, /* recalc qos data */ lq_same_space:1,/* the ost's all have approx. the same space avail */ - lq_reset:1; /* zero current penalties */ + lq_reset:1, /* zero current penalties */ + lq_statfs_in_progress:1; /* statfs op in + progress */ + /* qos statfs data */ + struct lov_statfs_data *lq_statfs_data; + cfs_waitq_t lq_statfs_waitq; /* waitqueue to notify statfs + * requests completion */ }; struct lov_tgt_desc { + cfs_list_t ltd_kill; struct obd_uuid ltd_uuid; + struct obd_device *ltd_obd; struct obd_export *ltd_exp; struct ltd_qos ltd_qos; /* qos info per target */ __u32 ltd_gen; __u32 ltd_index; /* index in lov_obd->tgts */ unsigned long ltd_active:1,/* is this target up for requests */ - ltd_activate:1,/* should this target be activated */ + ltd_activate:1,/* should target be activated */ ltd_reap:1; /* should this target be deleted */ }; @@ -697,18 +682,19 @@ struct lov_tgt_desc { #define pool_tgt_size(_p) _p->pool_obds.op_size #define pool_tgt_count(_p) _p->pool_obds.op_count #define pool_tgt_array(_p) _p->pool_obds.op_array -#define pool_tgt_rwlock(_p) _p->pool_obds.op_rwlock +#define pool_tgt_rw_sem(_p) _p->pool_obds.op_rw_sem #define pool_tgt(_p, _i) _p->pool_lov->lov_tgts[_p->pool_obds.op_array[_i]] struct pool_desc { - char pool_name[MAXPOOLNAME + 1]; /* name of pool */ - struct ost_pool pool_obds; /* pool members */ - struct lov_qos_rr pool_rr; /* round robin qos */ - struct hlist_node pool_hash; /* access by poolname */ - struct list_head pool_list; /* serial access */ - cfs_proc_dir_entry_t *pool_proc_entry; /* file in /proc */ - struct lov_obd *pool_lov; /* lov obd to which this - pool belong */ + char pool_name[LOV_MAXPOOLNAME + 1]; /* name of pool */ + struct ost_pool pool_obds; /* pool members */ + cfs_atomic_t pool_refcount; /* pool ref. counter */ + struct lov_qos_rr pool_rr; /* round robin qos */ + cfs_hlist_node_t pool_hash; /* access by poolname */ + cfs_list_t pool_list; /* serial access */ + cfs_proc_dir_entry_t *pool_proc_entry; /* file in /proc */ + struct lov_obd *pool_lov; /* lov obd to which this + pool belong */ }; struct lov_obd { @@ -716,36 +702,44 @@ struct lov_obd { struct lov_tgt_desc **lov_tgts; /* sparse array */ struct ost_pool lov_packed; /* all OSTs in a packed array */ - struct semaphore lov_lock; + cfs_semaphore_t lov_lock; struct obd_connect_data lov_ocd; struct lov_qos lov_qos; /* qos info per lov */ - atomic_t lov_refcount; + cfs_atomic_t lov_refcount; __u32 lov_tgt_count; /* how many OBD's */ __u32 lov_active_tgt_count; /* how many active */ __u32 lov_death_row;/* tgts scheduled to be deleted */ __u32 lov_tgt_size; /* size of tgts array */ int lov_connects; - obd_page_removal_cb_t lov_page_removal_cb; - obd_pin_extent_cb lov_page_pin_cb; - obd_lock_cancel_cb lov_lock_cancel_cb; int lov_pool_count; - lustre_hash_t *lov_pools_hash_body; /* used for key access */ - struct list_head lov_pool_list; /* used for sequential access */ + cfs_hash_t *lov_pools_hash_body; /* used for key access */ + cfs_list_t lov_pool_list; /* used for sequential access */ cfs_proc_dir_entry_t *lov_pool_proc_entry; + enum lustre_sec_part lov_sp_me; }; struct lmv_tgt_desc { struct obd_uuid ltd_uuid; struct obd_export *ltd_exp; - int ltd_active; /* is this target up for requests */ + int ltd_active; /* is this target up for requests */ int ltd_idx; - struct semaphore ltd_fid_sem; + cfs_semaphore_t ltd_fid_sem; }; +enum placement_policy { + PLACEMENT_CHAR_POLICY = 0, + PLACEMENT_NID_POLICY = 1, + PLACEMENT_INVAL_POLICY = 2, + PLACEMENT_MAX_POLICY +}; + +typedef enum placement_policy placement_policy_t; + struct lmv_obd { int refcount; struct lu_client_fld lmv_fld; - spinlock_t lmv_lock; + cfs_spinlock_t lmv_lock; + placement_policy_t lmv_placement; struct lmv_desc desc; struct obd_uuid cluuid; struct obd_export *exp; @@ -755,7 +749,7 @@ struct lmv_obd { int max_def_easize; int max_cookiesize; int server_timeout; - struct semaphore init_sem; + cfs_semaphore_t init_sem; struct lmv_tgt_desc *tgts; int tgts_size; @@ -781,9 +775,11 @@ struct niobuf_local { #define LUSTRE_CMM_NAME "cmm" #define LUSTRE_MDD_NAME "mdd" -#define LUSTRE_OSD_NAME "osd" +#define LUSTRE_OSD_NAME "osd-ldiskfs" +#define LUSTRE_VVP_NAME "vvp" #define LUSTRE_LMV_NAME "lmv" #define LUSTRE_CMM_MDC_NAME "cmm-mdc" +#define LUSTRE_SLP_NAME "slp" /* obd device type names */ /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */ @@ -824,8 +820,10 @@ struct obd_trans_info { int oti_numcookies; /* initial thread handling transaction */ - int oti_thread_id; + struct ptlrpc_thread * oti_thread; __u32 oti_conn_cnt; + /** VBR: versions */ + __u64 oti_pre_version; struct obd_uuid *oti_ost_uuid; }; @@ -841,10 +839,18 @@ static inline void oti_init(struct obd_trans_info *oti, return; oti->oti_xid = req->rq_xid; + /** VBR: take versions from request */ + if (req->rq_reqmsg != NULL && + lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { + __u64 *pre_version = lustre_msg_get_versions(req->rq_reqmsg); + oti->oti_pre_version = pre_version ? pre_version[0] : 0; + oti->oti_transno = lustre_msg_get_transno(req->rq_reqmsg); + } + /** called from mds_create_objects */ if (req->rq_repmsg != NULL) oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg); - oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1; + oti->oti_thread = req->rq_svc_thread; if (req->rq_reqmsg != NULL) oti->oti_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg); } @@ -881,6 +887,8 @@ static inline void oti_free_cookies(struct obd_trans_info *oti) * Events signalled through obd_notify() upcall-chain. */ enum obd_notify_event { + /* Device connect start */ + OBD_NOTIFY_CONNECT, /* Device activated */ OBD_NOTIFY_ACTIVE, /* Device deactivated */ @@ -893,13 +901,16 @@ enum obd_notify_event { OBD_NOTIFY_SYNC_NONBLOCK, OBD_NOTIFY_SYNC, /* Configuration event */ - OBD_NOTIFY_CONFIG + OBD_NOTIFY_CONFIG, + /* Trigger quota recovery */ + OBD_NOTIFY_QUOTA }; /* bit-mask flags for config events */ enum config_flags { - CONFIG_LOG = 0x1, /* finished processing config log */ - CONFIG_SYNC = 0x2 /* mdt synced 1 ost */ + CONFIG_LOG = 0x1, /* finished processing config log */ + CONFIG_SYNC = 0x2, /* mdt synced 1 ost */ + CONFIG_TARGET = 0x4 /* one target is added */ }; /* @@ -908,7 +919,7 @@ enum config_flags { */ struct obd_notify_upcall { int (*onu_upcall)(struct obd_device *host, struct obd_device *watched, - enum obd_notify_event ev, void *owner); + enum obd_notify_event ev, void *owner, void *data); /* Opaque datum supplied by upper layer listener */ void *onu_owner; }; @@ -916,31 +927,56 @@ struct obd_notify_upcall { struct target_recovery_data { svc_handler_t trd_recovery_handler; pid_t trd_processing_task; - struct completion trd_starting; - struct completion trd_finishing; + cfs_completion_t trd_starting; + cfs_completion_t trd_finishing; }; -#define OBD_LLOG_GROUP 0 +/** + * In HEAD for CMD, the object is created in group number which is 3>= + * or indexing starts from 3. To test this assertions are added to disallow + * group 0. But to run 2.0 mds server on 1.8.x disk format (i.e. interop_mode) + * object in group 0 needs to be allowed. + * So for interop mode following changes needs to be done: + * 1. No need to assert on group 0 or allow group 0 + * 2. The group number indexing starts from 0 instead of 3 + */ -enum filter_groups { - FILTER_GROUP_LLOG = 1, - FILTER_GROUP_ECHO, - FILTER_GROUP_MDS0 -}; +#define LASSERT_SEQ_IS_MDT(seq) LASSERT(fid_seq_is_mdt(seq)) + +static inline __u64 objseq_to_mdsno(obd_seq seq) +{ + LASSERT_SEQ_IS_MDT(seq); + if (seq == FID_SEQ_OST_MDT0) + return 0; + return seq - FID_SEQ_OST_MDT1 + 1; +} + +static inline int mdt_to_obd_objseq(int mdtid) +{ + /** + * MDS0 uses seq 0 pre FID-on-OST, other MDSes will use seq from + * FID_SEQ_OST_MDT1 + */ + if (mdtid) + return FID_SEQ_OST_MDT1 + mdtid - 1; + return 0; +} struct obd_llog_group { - struct list_head olg_list; - int olg_group; + cfs_list_t olg_list; + int olg_seq; struct llog_ctxt *olg_ctxts[LLOG_MAX_CTXTS]; cfs_waitq_t olg_waitq; - spinlock_t olg_lock; + cfs_spinlock_t olg_lock; struct obd_export *olg_exp; int olg_initializing; + cfs_semaphore_t olg_cat_processing; }; /* corresponds to one of the obd's */ #define MAX_OBD_NAME 128 #define OBD_DEVICE_MAGIC 0XAB5CD6EF +#define OBD_DEV_BY_DEVNAME 0xffffd0de struct obd_device { struct obd_type *obd_type; __u32 obd_magic; @@ -955,7 +991,9 @@ struct obd_device { unsigned long obd_attached:1, /* finished attach */ obd_set_up:1, /* finished setup */ obd_recovering:1, /* there are recoverable clients */ - obd_abort_recovery:1,/* somebody ioctl'ed us to abort */ + obd_abort_recovery:1,/* recovery expired */ + obd_version_recov:1, /* obd uses version checking */ + obd_recovery_expired:1, obd_replayable:1, /* recovery is enabled; inform clients */ obd_no_transno:1, /* no committed-transno notification */ obd_no_recov:1, /* fail instead of retry messages */ @@ -964,69 +1002,68 @@ struct obd_device { obd_starting:1, /* started setup */ obd_force:1, /* cleanup with > 0 obd refcount */ obd_fail:1, /* cleanup with failover */ - obd_async_recov:1, /* allow asyncronous orphan cleanup */ + obd_async_recov:1, /* allow asynchronous orphan cleanup */ obd_no_conn:1, /* deny new connections */ - obd_inactive:1; /* device active/inactive + obd_inactive:1, /* device active/inactive * (for /proc/status only!!) */ + obd_process_conf:1; /* device is processing mgs config */ /* uuid-export hash body */ - struct lustre_hash *obd_uuid_hash; + cfs_hash_t *obd_uuid_hash; /* nid-export hash body */ - struct lustre_hash *obd_nid_hash; + cfs_hash_t *obd_nid_hash; /* nid stats body */ - struct lustre_hash *obd_nid_stats_hash; - struct list_head obd_nid_stats; - atomic_t obd_refcount; + cfs_hash_t *obd_nid_stats_hash; + cfs_list_t obd_nid_stats; + cfs_atomic_t obd_refcount; cfs_waitq_t obd_refcount_waitq; - struct list_head obd_exports; + cfs_list_t obd_exports; + cfs_list_t obd_unlinked_exports; + cfs_list_t obd_delayed_exports; int obd_num_exports; - spinlock_t obd_nid_lock; + cfs_spinlock_t obd_nid_lock; struct ldlm_namespace *obd_namespace; struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ /* a spinlock is OK for what we do now, may need a semaphore later */ - spinlock_t obd_dev_lock; - struct semaphore obd_dev_sem; + cfs_spinlock_t obd_dev_lock; + cfs_semaphore_t obd_dev_sem; __u64 obd_last_committed; struct fsfilt_operations *obd_fsops; - spinlock_t obd_osfs_lock; + cfs_spinlock_t obd_osfs_lock; struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ __u64 obd_osfs_age; struct lvfs_run_ctxt obd_lvfs_ctxt; struct obd_llog_group obd_olg; /* default llog group */ - struct obd_device *obd_observer; + struct obd_device *obd_observer; + cfs_rw_semaphore_t obd_observer_link_sem; struct obd_notify_upcall obd_upcall; struct obd_export *obd_self_export; /* list of exports in LRU order, for ping evictor, with obd_dev_lock */ - struct list_head obd_exports_timed; + cfs_list_t obd_exports_timed; time_t obd_eviction_timer; /* for ping evictor */ - /* XXX encapsulate all this recovery data into one struct */ - svc_handler_t obd_recovery_handler; - pid_t obd_processing_task; - int obd_max_recoverable_clients; int obd_connected_clients; - int obd_recoverable_clients; - spinlock_t obd_processing_task_lock; /* BH lock (timer) */ + int obd_stale_clients; + int obd_delayed_clients; + cfs_spinlock_t obd_processing_task_lock; /* BH lock (timer) */ __u64 obd_next_recovery_transno; int obd_replayed_requests; int obd_requests_queued_for_recovery; cfs_waitq_t obd_next_transno_waitq; - struct list_head obd_uncommitted_replies; - spinlock_t obd_uncommitted_replies_lock; cfs_timer_t obd_recovery_timer; time_t obd_recovery_start; /* seconds */ time_t obd_recovery_end; /* seconds, for lprocfs_status */ - time_t obd_recovery_max_time; /* seconds, bz13079 */ + time_t obd_recovery_time_hard; int obd_recovery_timeout; - + /* new recovery stuff from CMD2 */ struct target_recovery_data obd_recovery_data; int obd_replayed_locks; - atomic_t obd_req_replay_clients; - atomic_t obd_lock_replay_clients; - struct list_head obd_req_replay_queue; - struct list_head obd_lock_replay_queue; - struct list_head obd_final_req_queue; + cfs_atomic_t obd_req_replay_clients; + cfs_atomic_t obd_lock_replay_clients; + cfs_list_t obd_req_replay_queue; + cfs_list_t obd_lock_replay_queue; + cfs_list_t obd_final_req_queue; int obd_recovery_stage; union { @@ -1052,19 +1089,23 @@ struct obd_device { cfs_proc_dir_entry_t *obd_proc_exports_entry; cfs_proc_dir_entry_t *obd_svc_procroot; struct lprocfs_stats *obd_svc_stats; - atomic_t obd_evict_inprogress; + cfs_atomic_t obd_evict_inprogress; cfs_waitq_t obd_evict_inprogress_waitq; + cfs_list_t obd_evict_list; /* protected with pet_lock */ - /** - * Ldlm pool part. Save last calculated SLV and Limit. + /** + * Ldlm pool part. Save last calculated SLV and Limit. */ - rwlock_t obd_pool_lock; + cfs_rwlock_t obd_pool_lock; int obd_pool_limit; __u64 obd_pool_slv; -}; -#define OBD_OPT_FORCE 0x0001 -#define OBD_OPT_FAILOVER 0x0002 + /** + * A list of outstanding class_incref()'s against this obd. For + * debugging. + */ + struct lu_ref obd_reference; +}; #define OBD_LLOG_FL_SENDNOW 0x0001 @@ -1076,33 +1117,40 @@ enum obd_cleanup_stage { }; /* get/set_info keys */ -#define KEY_READ_ONLY "read-only" -#define KEY_MDS_CONN "mds_conn" -#define KEY_NEXT_ID "next_id" -#define KEY_LOVDESC "lovdesc" -#define KEY_INIT_RECOV "initial_recov" -#define KEY_INIT_RECOV_BACKUP "init_recov_bk" -#define KEY_FLUSH_CTX "flush_ctx" +#define KEY_ASYNC "async" +#define KEY_BLOCKSIZE_BITS "blocksize_bits" +#define KEY_BLOCKSIZE "blocksize" #define KEY_CAPA_KEY "capa_key" +#define KEY_CHANGELOG_CLEAR "changelog_clear" +#define KEY_FID2PATH "fid2path" +#define KEY_CHECKSUM "checksum" +#define KEY_CLEAR_FS "clear_fs" #define KEY_CONN_DATA "conn_data" -#define KEY_MAX_EASIZE "max_easize" -#define KEY_REVIMP_UPD "revimp_update" -#define KEY_LOV_IDX "lov_idx" +#define KEY_EVICT_BY_NID "evict_by_nid" +#define KEY_FIEMAP "fiemap" +#define KEY_FLUSH_CTX "flush_ctx" +#define KEY_GRANT_SHRINK "grant_shrink" +#define KEY_HSM_COPYTOOL_SEND "hsm_send" +#define KEY_INIT_RECOV_BACKUP "init_recov_bk" +#define KEY_INIT_RECOV "initial_recov" +#define KEY_INTERMDS "inter_mds" #define KEY_LAST_ID "last_id" -#define KEY_READONLY "read-only" #define KEY_LOCK_TO_STRIPE "lock_to_stripe" -#define KEY_CHECKSUM "checksum" -#define KEY_UNLINKED "unlinked" -#define KEY_EVICT_BY_NID "evict_by_nid" +#define KEY_LOVDESC "lovdesc" +#define KEY_LOV_IDX "lov_idx" +#define KEY_MAX_EASIZE "max_easize" +#define KEY_MDS_CONN "mds_conn" +#define KEY_MGSSEC "mgssec" +#define KEY_NEXT_ID "next_id" +#define KEY_READ_ONLY "read-only" #define KEY_REGISTER_TARGET "register_target" +#define KEY_REVIMP_UPD "revimp_update" #define KEY_SET_FS "set_fs" -#define KEY_CLEAR_FS "clear_fs" -#define KEY_BLOCKSIZE "blocksize" -#define KEY_BLOCKSIZE_BITS "blocksize_bits" -#define KEY_FIEMAP "FIEMAP" -/* XXX unused ?*/ -#define KEY_INTERMDS "inter_mds" -#define KEY_ASYNC "async" +/* KEY_SET_INFO in lustre_idl.h */ +#define KEY_SPTLRPC_CONF "sptlrpc_conf" +#define KEY_CONNECT_FLAG "connect_flags" +#define KEY_SYNC_LOCK_CANCEL "sync_lock_cancel" + struct lu_context; @@ -1113,7 +1161,7 @@ static inline int it_to_lock_mode(struct lookup_intent *it) return LCK_CW; else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP)) return LCK_CR; - + LASSERTF(0, "Invalid it_op: %d\n", it->it_op); return -EINVAL; } @@ -1144,6 +1192,7 @@ struct md_op_data { unsigned int op_attr_flags; #endif #endif + __u64 op_valid; loff_t op_attr_blocks; /* Size-on-MDS epoch and flags. */ @@ -1172,13 +1221,14 @@ struct md_enqueue_info { struct lookup_intent mi_it; struct lustre_handle mi_lockh; struct dentry *mi_dentry; + struct inode *mi_dir; md_enqueue_cb_t mi_cb; unsigned int mi_generation; void *mi_cbdata; }; struct obd_ops { - struct module *o_owner; + cfs_module_t *o_owner; int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg); int (*o_get_info)(struct obd_export *, __u32 keylen, void *key, @@ -1203,13 +1253,14 @@ struct obd_ops { * granted by the target, which are guaranteed to be a subset of flags * asked for. If @ocd == NULL, use default parameters. */ int (*o_connect)(const struct lu_env *env, - struct lustre_handle *conn, struct obd_device *src, + struct obd_export **exp, struct obd_device *src, struct obd_uuid *cluuid, struct obd_connect_data *ocd, void *localdata); int (*o_reconnect)(const struct lu_env *env, struct obd_export *exp, struct obd_device *src, struct obd_uuid *cluuid, - struct obd_connect_data *ocd); + struct obd_connect_data *ocd, + void *localdata); int (*o_disconnect)(struct obd_export *exp); /* Initialize/finalize fids infrastructure. */ @@ -1220,7 +1271,7 @@ struct obd_ops { int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid, struct md_op_data *op_data); - /* + /* * Object with @fid is getting deleted, we may want to do something * about this. */ @@ -1234,17 +1285,18 @@ struct obd_ops { struct lov_stripe_md *mem_src); int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt, struct lov_mds_md *disk_src, int disk_len); - int (*o_checkmd)(struct obd_export *exp, struct obd_export *md_exp, - struct lov_stripe_md *mem_tgt); int (*o_preallocate)(struct lustre_handle *, obd_count *req, obd_id *ids); /* FIXME: add fid capability support for create & destroy! */ int (*o_precreate)(struct obd_export *exp); int (*o_create)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); + int (*o_create_async)(struct obd_export *exp, struct obd_info *oinfo, + struct lov_stripe_md **ea, + struct obd_trans_info *oti); int (*o_destroy)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp); + struct obd_export *md_exp, void *capa); int (*o_setattr)(struct obd_export *exp, struct obd_info *oinfo, struct obd_trans_info *oti); int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo, @@ -1256,47 +1308,6 @@ struct obd_ops { int (*o_brw)(int rw, struct obd_export *exp, struct obd_info *oinfo, obd_count oa_bufs, struct brw_page *pgarr, struct obd_trans_info *oti); - int (*o_brw_async)(int rw, struct obd_export *exp, - struct obd_info *oinfo, obd_count oa_bufs, - struct brw_page *pgarr, struct obd_trans_info *oti, - struct ptlrpc_request_set *); - int (*o_prep_async_page)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - cfs_page_t *page, obd_off offset, - struct obd_async_page_ops *ops, void *data, - void **res, int nocache, - struct lustre_handle *lockh); - int (*o_reget_short_lock)(struct obd_export *exp, - struct lov_stripe_md *lsm, - void **res, int rw, - obd_off start, obd_off end, - void **cookie); - int (*o_release_short_lock)(struct obd_export *exp, - struct lov_stripe_md *lsm, obd_off end, - void *cookie, int rw); - int (*o_queue_async_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - int cmd, obd_off off, int count, - obd_flag brw_flags, obd_flag async_flags); - int (*o_queue_group_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_io_group *oig, - void *cookie, int cmd, obd_off off, int count, - obd_flag brw_flags, obd_flag async_flags); - int (*o_trigger_group_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_io_group *oig); - int (*o_set_async_flags)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - obd_flag async_flags); - int (*o_teardown_async_page)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie); int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm, struct ost_lvb *lvb, int kms_only); int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm, @@ -1314,31 +1325,30 @@ struct obd_ops { struct lustre_handle *srconn, struct lov_stripe_md *src, obd_size start, obd_size end, struct obd_trans_info *); int (*o_iterate)(struct lustre_handle *conn, - int (*)(obd_id, obd_gr, void *), - obd_id *startid, obd_gr group, void *data); + int (*)(obd_id, obd_seq, void *), + obd_id *startid, obd_seq seq, void *data); int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, struct obd_trans_info *oti, + struct niobuf_remote *remote, int *nr_pages, + struct niobuf_local *local, + struct obd_trans_info *oti, struct lustre_capa *capa); int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *local, + struct niobuf_remote *remote, int pages, + struct niobuf_local *local, struct obd_trans_info *oti, int rc); int (*o_enqueue)(struct obd_export *, struct obd_info *oinfo, struct ldlm_enqueue_info *einfo, struct ptlrpc_request_set *rqset); - int (*o_match)(struct obd_export *, struct lov_stripe_md *, __u32 type, - ldlm_policy_data_t *, __u32 mode, int *flags, void *data, - struct lustre_handle *lockh); int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *, ldlm_iterator_t it, void *data); + int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *, + ldlm_iterator_t it, void *data); int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md, __u32 mode, struct lustre_handle *); int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *, - int flags, void *opaque); - int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *, - int join); + ldlm_cancel_flags_t flags, void *opaque); int (*o_init_export)(struct obd_export *exp); int (*o_destroy_export)(struct obd_export *exp); int (*o_extent_calc)(struct obd_export *, struct lov_stripe_md *, @@ -1346,8 +1356,7 @@ struct obd_ops { /* llog related obd_methods */ int (*o_llog_init)(struct obd_device *obd, struct obd_llog_group *grp, - struct obd_device *disk_obd, int count, - struct llog_catid *logid, struct obd_uuid *uuid); + struct obd_device *disk_obd, int *idx); int (*o_llog_finish)(struct obd_device *obd, int count); int (*o_llog_connect)(struct obd_export *, struct llogd_conn_body *); @@ -1366,20 +1375,17 @@ struct obd_ops { struct obd_uuid *(*o_get_uuid) (struct obd_export *exp); /* quota methods */ - int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *); - int (*o_quotactl)(struct obd_export *, struct obd_quotactl *); + int (*o_quotacheck)(struct obd_device *, struct obd_export *, + struct obd_quotactl *); + int (*o_quotactl)(struct obd_device *, struct obd_export *, + struct obd_quotactl *); + int (*o_quota_adjust_qunit)(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt); + int (*o_ping)(struct obd_export *exp); - int (*o_register_page_removal_cb)(struct obd_export *exp, - obd_page_removal_cb_t cb, - obd_pin_extent_cb pin_cb); - int (*o_unregister_page_removal_cb)(struct obd_export *exp, - obd_page_removal_cb_t cb); - int (*o_register_lock_cancel_cb)(struct obd_export *exp, - obd_lock_cancel_cb cb); - int (*o_unregister_lock_cancel_cb)(struct obd_export *exp, - obd_lock_cancel_cb cb); /* pools methods */ int (*o_pool_new)(struct obd_device *obd, char *poolname); int (*o_pool_del)(struct obd_device *obd, char *poolname); @@ -1387,21 +1393,14 @@ struct obd_ops { char *ostname); int (*o_pool_rem)(struct obd_device *obd, char *poolname, char *ostname); + void (*o_getref)(struct obd_device *obd); + void (*o_putref)(struct obd_device *obd); /* * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. * Also, add a wrapper function in include/linux/obd_class.h. */ }; -/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */ -struct lmv_stripe_md { - __u32 mea_magic; - __u32 mea_count; - __u32 mea_master; - __u32 mea_padding; - struct lu_fid mea_ids[0]; -}; - enum { LUSTRE_OPC_MKDIR = (1 << 0), LUSTRE_OPC_SYMLINK = (1 << 1), @@ -1417,7 +1416,7 @@ enum { #define MAX_HASH_SIZE_32 0x7fffffffUL #define MAX_HASH_SIZE 0x7fffffffffffffffULL -#define MAX_HASH_HIGHEST_BIT 0x1000000000000000 +#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL struct lustre_md { struct mdt_body *body; @@ -1433,7 +1432,9 @@ struct lustre_md { struct md_open_data { struct obd_client_handle *mod_och; - struct list_head mod_replay_list; + struct ptlrpc_request *mod_open_req; + struct ptlrpc_request *mod_close_req; + cfs_atomic_t mod_refcount; }; struct lookup_intent; @@ -1443,6 +1444,8 @@ struct md_ops { struct obd_capa **); int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *, ldlm_iterator_t, void *); + int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *, + ldlm_iterator_t, void *); int (*m_close)(struct obd_export *, struct md_op_data *, struct md_open_data *, struct ptlrpc_request **); int (*m_create)(struct obd_export *, struct md_op_data *, @@ -1454,12 +1457,10 @@ struct md_ops { struct lookup_intent *, struct md_op_data *, struct lustre_handle *, void *, int, struct ptlrpc_request **, int); - int (*m_getattr)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, obd_valid, int, + int (*m_getattr)(struct obd_export *, struct md_op_data *, struct ptlrpc_request **); - int (*m_getattr_name)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, const char *, int, obd_valid, - int, __u32, struct ptlrpc_request **); + int (*m_getattr_name)(struct obd_export *, struct md_op_data *, + struct ptlrpc_request **); int (*m_intent_lock)(struct obd_export *, struct md_op_data *, void *, int, struct lookup_intent *, int, struct ptlrpc_request **, @@ -1507,7 +1508,7 @@ struct md_ops { struct ptlrpc_request *); int (*m_clear_open_replay_data)(struct obd_export *, struct obd_client_handle *); - int (*m_set_lock_data)(struct obd_export *, __u64 *, void *); + int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u32 *); ldlm_mode_t (*m_lock_match)(struct obd_export *, int, const struct lu_fid *, ldlm_type_t, @@ -1515,10 +1516,12 @@ struct md_ops { struct lustre_handle *); int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *, - ldlm_policy_data_t *, ldlm_mode_t, int flags, - void *opaque); + ldlm_policy_data_t *, ldlm_mode_t, + ldlm_cancel_flags_t flags, void *opaque); int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc, renew_capa_cb_t cb); + int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *, + const struct req_msg_field *, struct obd_capa **); int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *, struct obd_capa *, __u32, @@ -1528,9 +1531,8 @@ struct md_ops { struct md_enqueue_info *, struct ldlm_enqueue_info *); - int (*m_revalidate_lock)(struct obd_export *, - struct lookup_intent *, - struct lu_fid *); + int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *, + struct lu_fid *, __u32 *); /* * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to @@ -1544,33 +1546,26 @@ struct lsm_operations { int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa, struct obd_export *md_exp); void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *, - unsigned long *); + obd_off *); void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *, - unsigned long *); - obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int); - obd_off (*lsm_stripe_offset_by_offset)(struct lov_stripe_md *, obd_off); - int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off); - int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd); + obd_off *); int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes, int *stripe_count); int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm, struct lov_mds_md *lmm); }; -extern struct lsm_operations lsm_v1_ops; -extern struct lsm_operations lsm_join_ops; -extern struct lsm_operations lsm_v3_ops; -static inline struct lsm_operations *lsm_op_find(int magic) +extern const struct lsm_operations lsm_v1_ops; +extern const struct lsm_operations lsm_v3_ops; +static inline const struct lsm_operations *lsm_op_find(int magic) { switch(magic) { case LOV_MAGIC_V1: return &lsm_v1_ops; - case LOV_MAGIC_JOIN: - return &lsm_join_ops; case LOV_MAGIC_V3: return &lsm_v3_ops; default: - CERROR("Cannot recognize lsm_magic %d\n", magic); + CERROR("Cannot recognize lsm_magic %08x\n", magic); return NULL; } } @@ -1582,22 +1577,24 @@ int lvfs_check_io_health(struct obd_device *obd, struct file *file); #define OBD_CALC_STRIPE_END 2 static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, - int error) + struct obd_export *exp, int error) { if (error) { CERROR("%s: transno "LPU64" commit error: %d\n", obd->obd_name, transno, error); return; } - if (transno > obd->obd_last_committed) { - CDEBUG(D_HA, "%s: transno "LPD64" committed\n", + if (exp && transno > exp->exp_last_committed) { + CDEBUG(D_HA, "%s: transno "LPU64" committed\n", obd->obd_name, transno); - obd->obd_last_committed = transno; - ptlrpc_commit_replies (obd); + exp->exp_last_committed = transno; + ptlrpc_commit_replies(exp); } else { - CDEBUG(D_INFO, "%s: transno "LPD64" committed\n", + CDEBUG(D_INFO, "%s: transno "LPU64" committed\n", obd->obd_name, transno); } + if (transno > obd->obd_last_committed) + obd->obd_last_committed = transno; } static inline void init_obd_quota_ops(quota_interface_t *interface, @@ -1609,16 +1606,35 @@ static inline void init_obd_quota_ops(quota_interface_t *interface, LASSERT(obd_ops); obd_ops->o_quotacheck = QUOTA_OP(interface, check); obd_ops->o_quotactl = QUOTA_OP(interface, ctl); + obd_ops->o_quota_adjust_qunit = QUOTA_OP(interface, adjust_qunit); } -static inline __u64 oinfo_mdsno(struct obd_info *oinfo) +static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo) { - return oinfo->oi_oa->o_gr - FILTER_GROUP_MDS0; + return oinfo->oi_capa; } -static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo) +static inline struct md_open_data *obd_mod_alloc(void) { - return oinfo->oi_capa; + struct md_open_data *mod; + OBD_ALLOC_PTR(mod); + if (mod == NULL) + return NULL; + cfs_atomic_set(&mod->mod_refcount, 1); + return mod; } +#define obd_mod_get(mod) cfs_atomic_inc(&(mod)->mod_refcount) +#define obd_mod_put(mod) \ +({ \ + if (cfs_atomic_dec_and_test(&(mod)->mod_refcount)) { \ + if ((mod)->mod_open_req) \ + ptlrpc_req_finished((mod)->mod_open_req); \ + OBD_FREE_PTR(mod); \ + } \ +}) + +extern void obdo_from_inode(struct obdo *dst, struct inode *src, + struct lu_fid *parent, obd_flag valid); + #endif /* __OBD_H */