X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Fobd.h;h=087df0a28d5f462c16262498333f44ed679472fb;hp=60acd3372c81615d5dae5c3318c8e8d5b7d842e1;hb=b32e55b600ca2c9bf8b62287d9f889791d157426;hpb=2305c36139a7deaf25a0dc737d412eed42ca54e9 diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 60acd33..087df0a 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -37,7 +37,7 @@ #ifndef __OBD_H #define __OBD_H -#include +#include #include #include @@ -52,7 +52,6 @@ #include #include #include -#include #include #define MAX_OBD_DEVICES 8192 @@ -84,95 +83,22 @@ static inline void loi_init(struct lov_oinfo *loi) { } -/* If we are unable to get the maximum object size from the OST in - * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using - * the old maximum object size from ext3. */ -#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL - -struct lov_stripe_md { - atomic_t lsm_refc; - spinlock_t lsm_lock; - pid_t lsm_lock_owner; /* debugging */ - - /* maximum possible file size, might change as OSTs status changes, - * e.g. disconnected, deactivated */ - __u64 lsm_maxbytes; - struct { - /* Public members. */ - struct ost_id lw_object_oi; /* lov object id/seq */ - - /* LOV-private members start here -- only for use in lov/. */ - __u32 lw_magic; - __u32 lw_stripe_size; /* size of the stripe */ - __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */ - __u16 lw_stripe_count; /* number of objects being striped over */ - __u16 lw_layout_gen; /* generation of the layout */ - char lw_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */ - } lsm_wire; - - struct lov_oinfo *lsm_oinfo[0]; -}; - -#define lsm_oi lsm_wire.lw_object_oi -#define lsm_magic lsm_wire.lw_magic -#define lsm_layout_gen lsm_wire.lw_layout_gen -#define lsm_stripe_size lsm_wire.lw_stripe_size -#define lsm_pattern lsm_wire.lw_pattern -#define lsm_stripe_count lsm_wire.lw_stripe_count -#define lsm_pool_name lsm_wire.lw_pool_name - -static inline bool lsm_is_released(struct lov_stripe_md *lsm) -{ - return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED); -} - -static inline bool lsm_has_objects(struct lov_stripe_md *lsm) -{ - if (lsm == NULL) - return false; - if (lsm_is_released(lsm)) - return false; - return true; -} - -static inline int lov_stripe_md_size(unsigned int stripe_count) -{ - struct lov_stripe_md lsm; - - return sizeof(lsm) + stripe_count * sizeof(lsm.lsm_oinfo[0]); -} - +struct lov_stripe_md; struct obd_info; typedef int (*obd_enqueue_update_f)(void *cookie, int rc); /* obd info for a particular level (lov, osc). */ struct obd_info { - /* Lock policy. It keeps an extent which is specific for a particular - * OSC. (e.g. lov_prep_enqueue_set initialises extent of the policy, - * and osc_enqueue passes it into ldlm_lock_match & ldlm_cli_enqueue. */ - ldlm_policy_data_t oi_policy; - /* Flags used for set request specific flags: - - while lock handling, the flags obtained on the enqueue - request are set here. - - while stats, the flags used for control delay/resend. - - while setattr, the flags used for distinguish punch operation - */ + /* OBD_STATFS_* flags */ __u64 oi_flags; - /* lsm data specific for every OSC. */ - struct lov_stripe_md *oi_md; - /* obdo data specific for every OSC, if needed at all. */ - struct obdo *oi_oa; /* statfs data specific for every OSC, if needed at all. */ struct obd_statfs *oi_osfs; /* An update callback which is called to update some data on upper - * level. E.g. it is used for update lsm->lsm_oinfo at every recieved + * level. E.g. it is used for update lsm->lsm_oinfo at every received * request in osc level for enqueue requests. It is also possible to * update some caller data from LOV layer if needed. */ obd_enqueue_update_f oi_cb_up; - /* oss capability, its type is obd_capa in client to avoid copy. - * in contrary its type is lustre_capa in OSS. */ - void *oi_capa; }; struct obd_type { @@ -189,10 +115,10 @@ struct obd_type { }; struct brw_page { - obd_off off; - struct page *pg; - int count; - obd_flag flag; + u64 off; + struct page *pg; + u32 count; + u32 flag; }; struct timeout_item { @@ -219,23 +145,46 @@ enum { NUM_SYNC_ON_CANCEL_STATES }; +/* + * Limit reply buffer size for striping data to one x86_64 page. This + * value is chosen to fit the striping data for common use cases while + * staying well below the limit at which the buffer must be backed by + * vmalloc(). Excessive use of vmalloc() may cause spinlock contention + * on the MDS. + */ +#define OBD_MAX_DEFAULT_EA_SIZE 4096 + struct mdc_rpc_lock; struct obd_import; struct client_obd { struct rw_semaphore cl_sem; - struct obd_uuid cl_target_uuid; - struct obd_import *cl_import; /* ptlrpc connection state */ - int cl_conn_count; - /* max_mds_easize is purely a performance thing so we don't have to - * call obd_size_diskmd() all the time. */ - int cl_default_mds_easize; - int cl_max_mds_easize; - int cl_default_mds_cookiesize; - int cl_max_mds_cookiesize; - - enum lustre_sec_part cl_sp_me; - enum lustre_sec_part cl_sp_to; - struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ + struct obd_uuid cl_target_uuid; + struct obd_import *cl_import; /* ptlrpc connection state */ + size_t cl_conn_count; + + /* Cache maximum and default values for easize. This is + * strictly a performance optimization to minimize calls to + * obd_size_diskmd(). The default values are used to calculate the + * initial size of a request buffer. The ptlrpc layer will resize the + * buffer as needed to accommodate a larger reply from the + * server. The default values should be small enough to avoid wasted + * memory and excessive use of vmalloc(), yet large enough to avoid + * reallocating the buffer in the common use case. */ + + /* Default EA size for striping attributes. It is initialized at + * mount-time based on the default stripe width of the filesystem, + * then it tracks the largest observed EA size advertised by + * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE. */ + __u32 cl_default_mds_easize; + + /* Maximum possible EA size computed at mount-time based on + * the number of OSTs in the filesystem. May be increased at + * run-time if a larger observed size is advertised by the MDT. */ + __u32 cl_max_mds_easize; + + enum lustre_sec_part cl_sp_me; + enum lustre_sec_part cl_sp_to; + struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ /* the grant values are protected by loi_list_lock below */ unsigned long cl_dirty_pages; /* all _dirty_ in pages */ @@ -257,7 +206,7 @@ struct client_obd { /* A chunk is an optimal size used by osc_extent to determine * the extent size. A chunk is max(PAGE_CACHE_SIZE, OST block size) */ int cl_chunkbits; - int cl_extent_tax; /* extent overhead, by bytes */ + unsigned int cl_extent_tax; /* extent overhead, by bytes */ /* keep track of objects that have lois that contain pages which * have been queued for async brw. this lock also protects the @@ -273,14 +222,10 @@ struct client_obd { * blocking everywhere, but we don't want to slow down fast-path of * our main platform.) * - * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together - * with client_obd_list_{un,}lock() and - * client_obd_list_lock_{init,done}() functions. - * * NB by Jinshan: though field names are still _loi_, but actually * osc_object{}s are in the list. */ - client_obd_lock_t cl_loi_list_lock; + spinlock_t cl_loi_list_lock; struct list_head cl_loi_ready_list; struct list_head cl_loi_hp_ready_list; struct list_head cl_loi_write_list; @@ -299,23 +244,56 @@ struct client_obd { struct obd_histogram cl_read_offset_hist; struct obd_histogram cl_write_offset_hist; - /* lru for osc caching pages */ - struct cl_client_cache *cl_cache; - struct list_head cl_lru_osc; /* member of cl_cache->ccc_lru */ - atomic_long_t *cl_lru_left; - atomic_long_t cl_lru_busy; - atomic_long_t cl_lru_in_list; - atomic_long_t cl_unstable_count; - struct list_head cl_lru_list; /* lru page list */ - client_obd_lock_t cl_lru_list_lock; /* page list protector */ - atomic_t cl_lru_shrinkers; + /** LRU for osc caching pages */ + struct cl_client_cache *cl_cache; + /** member of cl_cache->ccc_lru */ + struct list_head cl_lru_osc; + /** # of available LRU slots left in the per-OSC cache. + * Available LRU slots are shared by all OSCs of the same file system, + * therefore this is a pointer to cl_client_cache::ccc_lru_left. */ + atomic_long_t *cl_lru_left; + /** # of busy LRU pages. A page is considered busy if it's in writeback + * queue, or in transfer. Busy pages can't be discarded so they are not + * in LRU cache. */ + atomic_long_t cl_lru_busy; + /** # of LRU pages in the cache for this client_obd */ + atomic_long_t cl_lru_in_list; + /** # of threads are shrinking LRU cache. To avoid contention, it's not + * allowed to have multiple threads shrinking LRU cache. */ + atomic_t cl_lru_shrinkers; + /** The time when this LRU cache was last used. */ + time_t cl_lru_last_used; + /** stats: how many reclaims have happened for this client_obd. + * reclaim and shrink - shrink is async, voluntarily rebalancing; + * reclaim is sync, initiated by IO thread when the LRU slots are + * in shortage. */ + __u64 cl_lru_reclaim; + /** List of LRU pages for this client_obd */ + struct list_head cl_lru_list; + /** Lock for LRU page list */ + spinlock_t cl_lru_list_lock; + /** # of unstable pages in this client_obd. + * An unstable page is a page state that WRITE RPC has finished but + * the transaction has NOT yet committed. */ + atomic_long_t cl_unstable_count; + /** Link to osc_shrinker_list */ + struct list_head cl_shrink_list; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ atomic_t cl_destroy_in_flight; wait_queue_head_t cl_destroy_waitq; struct mdc_rpc_lock *cl_rpc_lock; - struct mdc_rpc_lock *cl_close_lock; + + /* modify rpcs in flight + * currently used for metadata only */ + spinlock_t cl_mod_rpcs_lock; + __u16 cl_max_mod_rpcs_in_flight; + __u16 cl_mod_rpcs_in_flight; + __u16 cl_close_rpcs_in_flight; + wait_queue_head_t cl_mod_rpcs_waitq; + unsigned long *cl_mod_tag_bitmap; + struct obd_histogram cl_mod_rpcs_hist; /* mgc datastruct */ struct mutex cl_mgc_mutex; @@ -334,13 +312,6 @@ struct client_obd { /* also protected by the poorly named _loi_list_lock lock above */ struct osc_async_rc cl_ar; - /* used by quotacheck when the servers are older than 2.4 */ - int cl_qchk_stat; /* quotacheck stat of the peer */ -#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */ -#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 7, 53, 0) -#warning "please consider removing quotacheck compatibility code" -#endif - /* sequence manager */ struct lu_client_seq *cl_seq; @@ -350,13 +321,13 @@ struct client_obd { void *cl_writeback_work; void *cl_lru_work; /* hash tables for osc_quota_info */ - cfs_hash_t *cl_quota_hash[MAXQUOTAS]; + struct cfs_hash *cl_quota_hash[MAXQUOTAS]; }; #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) struct obd_id_info { - __u32 idx; - obd_id *data; + u32 idx; + u64 *data; }; struct echo_client_obd { @@ -364,7 +335,6 @@ struct echo_client_obd { spinlock_t ec_lock; struct list_head ec_objects; struct list_head ec_locks; - int ec_nstripes; __u64 ec_unique; }; @@ -405,13 +375,13 @@ struct lov_obd { __u32 lov_tgt_size; /* size of tgts array */ int lov_connects; int lov_pool_count; - cfs_hash_t *lov_pools_hash_body; /* used for key access */ + struct cfs_hash *lov_pools_hash_body; /* used for key access */ struct list_head lov_pool_list; /* used for sequential access */ struct proc_dir_entry *lov_pool_proc_entry; enum lustre_sec_part lov_sp_me; /* Cached LRU and unstable data from upper layer */ - void *lov_cache; + struct cl_client_cache *lov_cache; struct rw_semaphore lov_notify_lock; }; @@ -443,12 +413,10 @@ struct lmv_obd { struct obd_export *exp; struct proc_dir_entry *targets_proc_entry; - struct mutex init_mutex; + struct mutex lmv_init_mutex; int connected; int max_easize; int max_def_easize; - int max_cookiesize; - int max_def_cookiesize; __u32 tgts_size; /* size of tgts array */ struct lmv_tgt_desc **tgts; @@ -461,9 +429,9 @@ struct niobuf_local { __u32 lnb_page_offset; __u32 lnb_len; __u32 lnb_flags; + int lnb_rc; struct page *lnb_page; void *lnb_data; - int lnb_rc; }; #define LUSTRE_FLD_NAME "fld" @@ -501,20 +469,57 @@ struct niobuf_local { #define LUSTRE_MGS_OBDNAME "MGS" #define LUSTRE_MGC_OBDNAME "MGC" -struct obd_trans_info { - __u64 oti_xid; - /* Only used on the server side for tracking acks. */ - struct oti_req_ack_lock { - struct lustre_handle lock; - __u32 mode; - } oti_ack_locks[4]; - void *oti_handle; - struct llog_cookie oti_onecookie; - struct llog_cookie *oti_logcookies; - - /** VBR: versions */ - __u64 oti_pre_version; -}; +static inline int is_lwp_on_mdt(char *name) +{ + char *ptr; + + ptr = strrchr(name, '-'); + if (ptr == NULL) { + CERROR("%s is not a obdname\n", name); + return 0; + } + + /* LWP name on MDT is fsname-MDTxxxx-lwp-MDTxxxx */ + + if (strncmp(ptr + 1, "MDT", 3) != 0) + return 0; + + while (*(--ptr) != '-' && ptr != name); + + if (ptr == name) + return 0; + + if (strncmp(ptr + 1, LUSTRE_LWP_NAME, strlen(LUSTRE_LWP_NAME)) != 0) + return 0; + + return 1; +} + +static inline int is_lwp_on_ost(char *name) +{ + char *ptr; + + ptr = strrchr(name, '-'); + if (ptr == NULL) { + CERROR("%s is not a obdname\n", name); + return 0; + } + + /* LWP name on OST is fsname-MDTxxxx-lwp-OSTxxxx */ + + if (strncmp(ptr + 1, "OST", 3) != 0) + return 0; + + while (*(--ptr) != '-' && ptr != name); + + if (ptr == name) + return 0; + + if (strncmp(ptr + 1, LUSTRE_LWP_NAME, strlen(LUSTRE_LWP_NAME)) != 0) + return 0; + + return 1; +} /* * Events signalled through obd_notify() upcall-chain. @@ -578,14 +583,14 @@ struct obd_llog_group { #define OBD_DEVICE_MAGIC 0XAB5CD6EF struct obd_device { - struct obd_type *obd_type; - __u32 obd_magic; + struct obd_type *obd_type; + __u32 obd_magic; /* OBD_DEVICE_MAGIC */ + int obd_minor; /* device number: lctl dl */ + struct lu_device *obd_lu_dev; - /* common and UUID name of this device */ - char obd_name[MAX_OBD_NAME]; - struct obd_uuid obd_uuid; - int obd_minor; - struct lu_device *obd_lu_dev; + /* common and UUID name of this device */ + struct obd_uuid obd_uuid; + char obd_name[MAX_OBD_NAME]; /* bitfield modification is protected by obd_dev_lock */ unsigned long @@ -594,8 +599,7 @@ struct obd_device { obd_recovering:1, /* there are recoverable clients */ obd_abort_recovery:1, /* recovery expired */ obd_version_recov:1, /* obd uses version checking */ - obd_replayable:1, /* recovery is enabled; - * inform clients */ + obd_replayable:1, /* recovery enabled; inform clients */ obd_no_transno:1, /* no committed-transno notification */ obd_no_recov:1, /* fail instead of retry messages */ obd_stopping:1, /* started cleanup */ @@ -613,28 +617,30 @@ struct obd_device { * protection of other bits using _bh lock */ unsigned long obd_recovery_expired:1; /* uuid-export hash body */ - cfs_hash_t *obd_uuid_hash; + struct cfs_hash *obd_uuid_hash; /* nid-export hash body */ - cfs_hash_t *obd_nid_hash; + struct cfs_hash *obd_nid_hash; /* nid stats body */ - cfs_hash_t *obd_nid_stats_hash; + struct cfs_hash *obd_nid_stats_hash; + /* client_generation-export hash body */ + struct cfs_hash *obd_gen_hash; struct list_head obd_nid_stats; - atomic_t obd_refcount; struct list_head obd_exports; struct list_head obd_unlinked_exports; struct list_head obd_delayed_exports; struct list_head obd_lwp_list; + atomic_t obd_refcount; int obd_num_exports; spinlock_t obd_nid_lock; struct ldlm_namespace *obd_namespace; struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ /* a spinlock is OK for what we do now, may need a semaphore later */ spinlock_t obd_dev_lock; /* protect OBD bitfield above */ - struct mutex obd_dev_mutex; - __u64 obd_last_committed; spinlock_t obd_osfs_lock; struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ __u64 obd_osfs_age; + __u64 obd_last_committed; + struct mutex obd_dev_mutex; struct lvfs_run_ctxt obd_lvfs_ctxt; struct obd_llog_group obd_olg; /* default llog group */ struct obd_device *obd_observer; @@ -667,10 +673,11 @@ struct obd_device { int obd_recovery_ir_factor; /* new recovery stuff from CMD2 */ - struct target_recovery_data obd_recovery_data; int obd_replayed_locks; atomic_t obd_req_replay_clients; atomic_t obd_lock_replay_clients; + struct target_recovery_data obd_recovery_data; + /* all lists are protected by obd_recovery_task_lock */ struct list_head obd_req_replay_queue; struct list_head obd_lock_replay_queue; @@ -688,48 +695,39 @@ struct obd_device { struct lov_obd lov; struct lmv_obd lmv; } u; + /* Fields used by LProcFS */ - unsigned int obd_cntr_base; - struct lprocfs_stats *obd_stats; + struct lprocfs_stats *obd_stats; + unsigned int obd_cntr_base; - unsigned int obd_md_cntr_base; - struct lprocfs_stats *obd_md_stats; + unsigned int obd_md_cntr_base; + struct lprocfs_stats *obd_md_stats; struct proc_dir_entry *obd_proc_entry; struct proc_dir_entry *obd_proc_exports_entry; struct proc_dir_entry *obd_svc_procroot; struct lprocfs_stats *obd_svc_stats; - struct lprocfs_seq_vars *obd_vars; + struct lprocfs_vars *obd_vars; atomic_t obd_evict_inprogress; wait_queue_head_t obd_evict_inprogress_waitq; struct list_head obd_evict_list; /* protected with pet_lock */ - /** - * Ldlm pool part. Save last calculated SLV and Limit. - */ - rwlock_t obd_pool_lock; - int obd_pool_limit; - __u64 obd_pool_slv; + /** + * LDLM pool part. Save last calculated SLV and Limit. + */ + rwlock_t obd_pool_lock; + __u64 obd_pool_slv; + int obd_pool_limit; - /** - * A list of outstanding class_incref()'s against this obd. For - * debugging. - */ - struct lu_ref obd_reference; + int obd_conn_inprogress; - int obd_conn_inprogress; -}; - -enum obd_cleanup_stage { -/* Special case hack for MDS LOVs */ - OBD_CLEANUP_EARLY, -/* can be directly mapped to .ldto_device_fini() */ - OBD_CLEANUP_EXPORTS, + /** + * List of outstanding class_incref()'s fo this OBD. For debugging. */ + struct lu_ref obd_reference; }; /* get/set_info keys */ #define KEY_ASYNC "async" -#define KEY_CAPA_KEY "capa_key" #define KEY_CHANGELOG_CLEAR "changelog_clear" #define KEY_FID2PATH "fid2path" #define KEY_CHECKSUM "checksum" @@ -744,22 +742,15 @@ enum obd_cleanup_stage { #define KEY_INTERMDS "inter_mds" #define KEY_LAST_ID "last_id" #define KEY_LAST_FID "last_fid" -#define KEY_LOCK_TO_STRIPE "lock_to_stripe" -#define KEY_LOVDESC "lovdesc" #define KEY_MAX_EASIZE "max_easize" #define KEY_DEFAULT_EASIZE "default_easize" -#define KEY_MAX_COOKIESIZE "max_cookiesize" -#define KEY_DEFAULT_COOKIESIZE "default_cookiesize" -#define KEY_MDS_CONN "mds_conn" #define KEY_MGSSEC "mgssec" -#define KEY_NEXT_ID "next_id" #define KEY_READ_ONLY "read-only" #define KEY_REGISTER_TARGET "register_target" #define KEY_SET_FS "set_fs" #define KEY_TGT_COUNT "tgt_count" /* KEY_SET_INFO in lustre_idl.h */ #define KEY_SPTLRPC_CONF "sptlrpc_conf" -#define KEY_CONNECT_FLAG "connect_flags" #define KEY_CACHE_SET "cache_set" #define KEY_CACHE_LRU_SHRINK "cache_lru_shrink" @@ -767,22 +758,6 @@ enum obd_cleanup_stage { struct lu_context; -/* /!\ must be coherent with include/linux/namei.h on patched kernel */ -#define IT_OPEN (1 << 0) -#define IT_CREAT (1 << 1) -#define IT_READDIR (1 << 2) -#define IT_GETATTR (1 << 3) -#define IT_LOOKUP (1 << 4) -#define IT_UNLINK (1 << 5) -#define IT_TRUNC (1 << 6) -#define IT_GETXATTR (1 << 7) -#define IT_EXEC (1 << 8) -#define IT_PIN (1 << 9) -#define IT_LAYOUT (1 << 10) -#define IT_QUOTA_DQACQ (1 << 11) -#define IT_QUOTA_CONN (1 << 12) -#define IT_SETXATTR (1 << 13) - static inline int it_to_lock_mode(struct lookup_intent *it) { /* CREAT needs to be tested before open (both could be set) */ @@ -802,16 +777,32 @@ static inline int it_to_lock_mode(struct lookup_intent *it) return -EINVAL; } +enum md_op_flags { + MF_MDC_CANCEL_FID1 = 1 << 0, + MF_MDC_CANCEL_FID2 = 1 << 1, + MF_MDC_CANCEL_FID3 = 1 << 2, + MF_MDC_CANCEL_FID4 = 1 << 3, + MF_GET_MDT_IDX = 1 << 4, +}; + +enum md_cli_flags { + CLI_SET_MEA = 1 << 0, + CLI_RM_ENTRY = 1 << 1, + CLI_HASH64 = 1 << 2, + CLI_API32 = 1 << 3, + CLI_MIGRATE = 1 << 4, +}; + struct md_op_data { struct lu_fid op_fid1; /* operation fid1 (usualy parent) */ struct lu_fid op_fid2; /* operation fid2 (usualy child) */ struct lu_fid op_fid3; /* 2 extra fids to find conflicting */ struct lu_fid op_fid4; /* to the operation locks. */ u32 op_mds; /* what mds server open will go to */ - struct lustre_handle op_handle; - obd_time op_mod_time; + struct lustre_handle op_handle; + s64 op_mod_time; const char *op_name; - int op_namelen; + size_t op_namelen; __u32 op_mode; struct lmv_stripe_md *op_mea1; struct lmv_stripe_md *op_mea2; @@ -823,35 +814,30 @@ struct md_op_data { size_t op_data_size; /* iattr fields and blocks. */ - struct iattr op_attr; - __u64 op_valid; + struct iattr op_attr; loff_t op_attr_blocks; + unsigned int op_attr_flags; /* LUSTRE_{SYNC,..}_FL */ + __u64 op_valid; /* OBD_MD_* */ - /* Size-on-MDS epoch and flags. */ - __u64 op_ioepoch; - __u32 op_flags; - - /* Capa fields */ - struct obd_capa *op_capa1; - struct obd_capa *op_capa2; + enum md_op_flags op_flags; /* Various operation flags. */ enum mds_op_bias op_bias; /* Used by readdir */ - __u32 op_npages; + unsigned int op_max_pages; /* used to transfer info between the stacks of MD client * see enum op_cli_flags */ - __u32 op_cli_flags; + enum md_cli_flags op_cli_flags; /* File object data version for HSM release, on client */ __u64 op_data_version; struct lustre_handle op_lease_handle; -}; -#define op_stripe_offset op_ioepoch -#define op_max_pages op_valid + /* default stripe offset */ + __u32 op_default_stripe_offset; +}; struct md_callback { int (*md_blocking_ast)(struct ldlm_lock *lock, @@ -866,49 +852,47 @@ typedef int (* md_enqueue_cb_t)(struct ptlrpc_request *req, int rc); struct md_enqueue_info { - struct md_op_data mi_data; - struct lookup_intent mi_it; - struct lustre_handle mi_lockh; - struct inode *mi_dir; - md_enqueue_cb_t mi_cb; - __u64 mi_cbdata; + struct md_op_data mi_data; + struct lookup_intent mi_it; + struct lustre_handle mi_lockh; + struct inode *mi_dir; + struct ldlm_enqueue_info mi_einfo; + md_enqueue_cb_t mi_cb; + void *mi_cbdata; }; struct obd_ops { struct module *o_owner; int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len, void *karg, void __user *uarg); - int (*o_get_info)(const struct lu_env *env, struct obd_export *, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm); - int (*o_set_info_async)(const struct lu_env *, struct obd_export *, - __u32 keylen, void *key, - __u32 vallen, void *val, - struct ptlrpc_request_set *set); - int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg); - int (*o_precleanup)(struct obd_device *dev, - enum obd_cleanup_stage cleanup_stage); - int (*o_cleanup)(struct obd_device *dev); - int (*o_process_config)(struct obd_device *dev, obd_count len, - void *data); - int (*o_postrecov)(struct obd_device *dev); - int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid, - int priority); - int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid); - /* connect to the target device with given connection - * data. @ocd->ocd_connect_flags is modified to reflect flags actually - * granted by the target, which are guaranteed to be a subset of flags - * asked for. If @ocd == NULL, use default parameters. */ - int (*o_connect)(const struct lu_env *env, - struct obd_export **exp, struct obd_device *src, - struct obd_uuid *cluuid, struct obd_connect_data *ocd, - void *localdata); - int (*o_reconnect)(const struct lu_env *env, - struct obd_export *exp, struct obd_device *src, - struct obd_uuid *cluuid, - struct obd_connect_data *ocd, - void *localdata); - int (*o_disconnect)(struct obd_export *exp); + int (*o_get_info)(const struct lu_env *env, struct obd_export *, + __u32 keylen, void *key, __u32 *vallen, void *val); + int (*o_set_info_async)(const struct lu_env *, struct obd_export *, + __u32 keylen, void *key, + __u32 vallen, void *val, + struct ptlrpc_request_set *set); + int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg); + int (*o_precleanup)(struct obd_device *dev); + int (*o_cleanup)(struct obd_device *dev); + int (*o_process_config)(struct obd_device *dev, size_t len, void *data); + int (*o_postrecov)(struct obd_device *dev); + int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid, + int priority); + int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid); + /* connect to the target device with given connection + * data. @ocd->ocd_connect_flags is modified to reflect flags actually + * granted by the target, which are guaranteed to be a subset of flags + * asked for. If @ocd == NULL, use default parameters. */ + int (*o_connect)(const struct lu_env *env, + struct obd_export **exp, struct obd_device *src, + struct obd_uuid *cluuid, struct obd_connect_data *ocd, + void *localdata); + int (*o_reconnect)(const struct lu_env *env, + struct obd_export *exp, struct obd_device *src, + struct obd_uuid *cluuid, + struct obd_connect_data *ocd, + void *localdata); + int (*o_disconnect)(struct obd_export *exp); /* Initialize/finalize fids infrastructure. */ int (*o_fid_init)(struct obd_device *obd, @@ -919,100 +903,73 @@ struct obd_ops { int (*o_fid_alloc)(const struct lu_env *env, struct obd_export *exp, struct lu_fid *fid, struct md_op_data *op_data); - /* - * Object with @fid is getting deleted, we may want to do something - * about this. - */ - int (*o_statfs)(const struct lu_env *, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags); - int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo, - __u64 max_age, struct ptlrpc_request_set *set); - int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src); - int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, int disk_len); - int (*o_create)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct lov_stripe_md **ea, - struct obd_trans_info *oti); - int (*o_destroy)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct lov_stripe_md *ea, - struct obd_trans_info *oti, struct obd_export *md_exp, - void *capa); - int (*o_setattr)(const struct lu_env *, struct obd_export *exp, - struct obd_info *oinfo, struct obd_trans_info *oti); - int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *rqset); - int (*o_getattr)(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo); - int (*o_getattr_async)(struct obd_export *exp, struct obd_info *oinfo, - struct ptlrpc_request_set *set); - int (*o_preprw)(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, int objcount, - struct obd_ioobj *obj, struct niobuf_remote *remote, - int *nr_pages, struct niobuf_local *local, - struct obd_trans_info *oti, struct lustre_capa *capa); - int (*o_commitrw)(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - struct niobuf_remote *remote, int pages, - struct niobuf_local *local, - struct obd_trans_info *oti, int rc); - int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *, - ldlm_iterator_t it, void *data); - int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *, - ldlm_iterator_t it, void *data); - int (*o_init_export)(struct obd_export *exp); - int (*o_destroy_export)(struct obd_export *exp); - - /* llog related obd_methods */ - int (*o_llog_init)(struct obd_device *obd, struct obd_llog_group *grp, - struct obd_device *disk_obd, int *idx); - int (*o_llog_finish)(struct obd_device *obd, int count); - - int (*o_import_event)(struct obd_device *, struct obd_import *, - enum obd_import_event); - - int (*o_notify)(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data); - - int (*o_health_check)(const struct lu_env *env, struct obd_device *); - struct obd_uuid *(*o_get_uuid) (struct obd_export *exp); - - /* quota methods */ - int (*o_quotacheck)(struct obd_device *, struct obd_export *, - struct obd_quotactl *); - int (*o_quotactl)(struct obd_device *, struct obd_export *, - struct obd_quotactl *); - - int (*o_ping)(const struct lu_env *, struct obd_export *exp); - - /* pools methods */ - int (*o_pool_new)(struct obd_device *obd, char *poolname); - int (*o_pool_del)(struct obd_device *obd, char *poolname); - int (*o_pool_add)(struct obd_device *obd, char *poolname, - char *ostname); - int (*o_pool_rem)(struct obd_device *obd, char *poolname, - char *ostname); - void (*o_getref)(struct obd_device *obd); - void (*o_putref)(struct obd_device *obd); - /* - * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line - * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. - * Also, add a wrapper function in include/linux/obd_class.h. */ + /* + * Object with @fid is getting deleted, we may want to do something + * about this. + */ + int (*o_statfs)(const struct lu_env *, struct obd_export *exp, + struct obd_statfs *osfs, __u64 max_age, __u32 flags); + int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo, + __u64 max_age, struct ptlrpc_request_set *set); + int (*o_create)(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa); + int (*o_destroy)(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa); + int (*o_setattr)(const struct lu_env *, struct obd_export *exp, + struct obdo *oa); + int (*o_getattr)(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa); + int (*o_preprw)(const struct lu_env *env, int cmd, + struct obd_export *exp, struct obdo *oa, int objcount, + struct obd_ioobj *obj, struct niobuf_remote *remote, + int *nr_pages, struct niobuf_local *local); + int (*o_commitrw)(const struct lu_env *env, int cmd, + struct obd_export *exp, struct obdo *oa, + int objcount, struct obd_ioobj *obj, + struct niobuf_remote *remote, int pages, + struct niobuf_local *local, int rc); + int (*o_init_export)(struct obd_export *exp); + int (*o_destroy_export)(struct obd_export *exp); + + int (*o_import_event)(struct obd_device *, struct obd_import *, + enum obd_import_event); + + int (*o_notify)(struct obd_device *obd, struct obd_device *watched, + enum obd_notify_event ev, void *data); + + int (*o_health_check)(const struct lu_env *env, struct obd_device *); + struct obd_uuid *(*o_get_uuid) (struct obd_export *exp); + + /* quota methods */ + int (*o_quotactl)(struct obd_device *, struct obd_export *, + struct obd_quotactl *); + + int (*o_ping)(const struct lu_env *, struct obd_export *exp); + + /* pools methods */ + int (*o_pool_new)(struct obd_device *obd, char *poolname); + int (*o_pool_del)(struct obd_device *obd, char *poolname); + int (*o_pool_add)(struct obd_device *obd, char *poolname, + char *ostname); + int (*o_pool_rem)(struct obd_device *obd, char *poolname, + char *ostname); + void (*o_getref)(struct obd_device *obd); + void (*o_putref)(struct obd_device *obd); + /* + * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line + * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. + * Also, add a wrapper function in include/linux/obd_class.h. */ }; /* lmv structures */ struct lustre_md { struct mdt_body *body; - struct lov_stripe_md *lsm; + struct lu_buf layout; struct lmv_stripe_md *lmv; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl; #endif struct mdt_remote_perm *remote_perm; - struct obd_capa *mds_capa; - struct obd_capa *oss_capa; }; struct md_open_data { @@ -1054,8 +1011,8 @@ struct md_ops { struct md_open_data *, struct ptlrpc_request **); int (*m_create)(struct obd_export *, struct md_op_data *, - const void *, int, int, __u32, __u32, cfs_cap_t, - __u64, struct ptlrpc_request **); + const void *, size_t, umode_t, uid_t, gid_t, + cfs_cap_t, __u64, struct ptlrpc_request **); int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *, const union ldlm_policy_data *, @@ -1074,15 +1031,14 @@ struct md_ops { struct ptlrpc_request **); int (*m_rename)(struct obd_export *, struct md_op_data *, - const char *, int, const char *, int, + const char *, size_t, const char *, size_t, struct ptlrpc_request **); int (*m_setattr)(struct obd_export *, struct md_op_data *, void *, - int , void *, int, struct ptlrpc_request **, - struct md_open_data **mod); + size_t , struct ptlrpc_request **); int (*m_fsync)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, struct ptlrpc_request **); + struct ptlrpc_request **); int (*m_read_page)(struct obd_export *, struct md_op_data *, struct md_callback *cb_op, __u64 hash_offset, @@ -1092,39 +1048,31 @@ struct md_ops { struct ptlrpc_request **); int (*m_setxattr)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, obd_valid, const char *, - const char *, int, int, int, __u32, + u64, const char *, const char *, int, int, int, u32, struct ptlrpc_request **); int (*m_getxattr)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, obd_valid, const char *, - const char *, int, int, int, + u64, const char *, const char *, int, int, int, struct ptlrpc_request **); - int (*m_intent_getattr_async)(struct obd_export *, - struct md_enqueue_info *, - struct ldlm_enqueue_info *); + int (*m_intent_getattr_async)(struct obd_export *, + struct md_enqueue_info *); int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *, struct lu_fid *, __u64 *bits); #define MD_STATS_LAST_OP m_revalidate_lock - int (*m_getstatus)(struct obd_export *, struct lu_fid *, - struct obd_capa **); - + int (*m_getstatus)(struct obd_export *, struct lu_fid *); int (*m_null_inode)(struct obd_export *, const struct lu_fid *); int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *, ldlm_iterator_t, void *); - int (*m_done_writing)(struct obd_export *, struct md_op_data *, - struct md_open_data *); - int (*m_getattr_name)(struct obd_export *, struct md_op_data *, struct ptlrpc_request **); - int (*m_init_ea_size)(struct obd_export *, int, int, int, int); + int (*m_init_ea_size)(struct obd_export *, __u32, __u32); int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *, struct obd_export *, struct obd_export *, @@ -1134,10 +1082,7 @@ struct md_ops { int (*m_merge_attr)(struct obd_export *, const struct lmv_stripe_md *lsm, - struct cl_attr *attr); - - int (*m_update_lsm_md)(struct obd_export *, struct lmv_stripe_md *lsm, - struct mdt_body *, ldlm_blocking_callback); + struct cl_attr *attr, ldlm_blocking_callback); int (*m_set_open_replay_data)(struct obd_export *, struct obd_client_handle *, @@ -1148,60 +1093,26 @@ struct md_ops { int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *); - ldlm_mode_t (*m_lock_match)(struct obd_export *, __u64, - const struct lu_fid *, ldlm_type_t, - ldlm_policy_data_t *, ldlm_mode_t, - struct lustre_handle *); + enum ldlm_mode (*m_lock_match)(struct obd_export *, __u64, + const struct lu_fid *, enum ldlm_type, + union ldlm_policy_data *, enum ldlm_mode, + struct lustre_handle *); int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *, - ldlm_policy_data_t *, ldlm_mode_t, - ldlm_cancel_flags_t flags, void *opaque); - - int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc, - renew_capa_cb_t cb); - - int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *, - const struct req_msg_field *, struct obd_capa **); + union ldlm_policy_data *, enum ldlm_mode, + enum ldlm_cancel_flags flags, void *opaque); int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, __u32, - struct ptlrpc_request **); + u32, struct ptlrpc_request **); int (*m_get_fid_from_lsm)(struct obd_export *, const struct lmv_stripe_md *, const char *name, int namelen, struct lu_fid *fid); + int (*m_unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm, + const union lmv_mds_md *lmv, size_t lmv_size); }; -struct lsm_operations { - void (*lsm_free)(struct lov_stripe_md *); - int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa, - struct obd_export *md_exp); - void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *, - obd_off *); - void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *, - obd_off *); - int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes, - __u16 *stripe_count); - int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md *lmm); -}; - -extern const struct lsm_operations lsm_v1_ops; -extern const struct lsm_operations lsm_v3_ops; -static inline const struct lsm_operations *lsm_op_find(int magic) -{ - switch(magic) { - case LOV_MAGIC_V1: - return &lsm_v1_ops; - case LOV_MAGIC_V3: - return &lsm_v3_ops; - default: - CERROR("Cannot recognize lsm_magic %08x\n", magic); - return NULL; - } -} - static inline struct md_open_data *obd_mod_alloc(void) { struct md_open_data *mod; @@ -1222,7 +1133,7 @@ static inline struct md_open_data *obd_mod_alloc(void) } \ }) -void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); +void obdo_from_inode(struct obdo *dst, struct inode *src, u64 valid); void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent); /* return 1 if client should be resend request */ @@ -1244,7 +1155,8 @@ static inline const char *lu_dev_name(const struct lu_device *lu_dev) return lu_dev->ld_obd->obd_name; } -static inline bool filename_is_volatile(const char *name, int namelen, int *idx) +static inline bool filename_is_volatile(const char *name, size_t namelen, + int *idx) { const char *start; char *end; @@ -1269,7 +1181,7 @@ static inline bool filename_is_volatile(const char *name, int namelen, int *idx) } /* we have an idx, read it */ start = name + LUSTRE_VOLATILE_HDR_LEN + 1; - *idx = strtoul(start, &end, 16); + *idx = simple_strtoul(start, &end, 16); /* error cases: * no digit, no trailing :, negative value */