X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Fobd.h;h=04cdca5f9a93172cef01a7ec67326df3c56ee235;hp=bda672347c59cf10fce33a8c0bd60d81e06a93bc;hb=f35328e1e312259d917726f9e953cb6b834e60cf;hpb=29bd1e0228c0dee31bf876548c7621b51d8a09af diff --git a/lustre/include/obd.h b/lustre/include/obd.h index bda6723..04cdca5 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -36,19 +36,8 @@ #ifndef __OBD_H #define __OBD_H -#ifndef __KERNEL__ -# error "userspace should not include " -#endif -#if defined(__linux__) -#include -#elif defined(__APPLE__) -#include -#elif defined(__WINNT__) -#include -#else -#error Unsupported operating system. -#endif +#include #include #include @@ -61,8 +50,10 @@ #include #include #include -#include +#include +#include #include +#include #define MAX_OBD_DEVICES 8192 @@ -93,144 +84,51 @@ static inline void loi_init(struct lov_oinfo *loi) { } -struct lov_stripe_md { - atomic_t lsm_refc; - spinlock_t lsm_lock; - pid_t lsm_lock_owner; /* debugging */ - - /* maximum possible file size, might change as OSTs status changes, - * e.g. disconnected, deactivated */ - __u64 lsm_maxbytes; - struct { - /* Public members. */ - struct ost_id lw_object_oi; /* lov object id/seq */ - - /* LOV-private members start here -- only for use in lov/. */ - __u32 lw_magic; - __u32 lw_stripe_size; /* size of the stripe */ - __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */ - __u16 lw_stripe_count; /* number of objects being striped over */ - __u16 lw_layout_gen; /* generation of the layout */ - char lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */ - } lsm_wire; - - struct lov_oinfo *lsm_oinfo[0]; -}; - -#define lsm_oi lsm_wire.lw_object_oi -#define lsm_magic lsm_wire.lw_magic -#define lsm_layout_gen lsm_wire.lw_layout_gen -#define lsm_stripe_size lsm_wire.lw_stripe_size -#define lsm_pattern lsm_wire.lw_pattern -#define lsm_stripe_count lsm_wire.lw_stripe_count -#define lsm_pool_name lsm_wire.lw_pool_name - -static inline bool lsm_is_released(struct lov_stripe_md *lsm) -{ - return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED); -} - -static inline bool lsm_has_objects(struct lov_stripe_md *lsm) -{ - if (lsm == NULL) - return false; - if (lsm_is_released(lsm)) - return false; - return true; -} - -static inline int lov_stripe_md_size(unsigned int stripe_count) -{ - struct lov_stripe_md lsm; - - return sizeof(lsm) + stripe_count * sizeof(lsm.lsm_oinfo[0]); -} - +struct lov_stripe_md; struct obd_info; typedef int (*obd_enqueue_update_f)(void *cookie, int rc); /* obd info for a particular level (lov, osc). */ struct obd_info { - /* Lock policy. It keeps an extent which is specific for a particular - * OSC. (e.g. lov_prep_enqueue_set initialises extent of the policy, - * and osc_enqueue passes it into ldlm_lock_match & ldlm_cli_enqueue. */ - ldlm_policy_data_t oi_policy; - /* Flags used for set request specific flags: - - while lock handling, the flags obtained on the enqueue - request are set here. - - while stats, the flags used for control delay/resend. - - while setattr, the flags used for distinguish punch operation - */ + /* OBD_STATFS_* flags */ __u64 oi_flags; - /* Lock handle specific for every OSC lock. */ - struct lustre_handle *oi_lockh; - /* lsm data specific for every OSC. */ - struct lov_stripe_md *oi_md; - /* obdo data specific for every OSC, if needed at all. */ - struct obdo *oi_oa; /* statfs data specific for every OSC, if needed at all. */ struct obd_statfs *oi_osfs; /* An update callback which is called to update some data on upper - * level. E.g. it is used for update lsm->lsm_oinfo at every recieved + * level. E.g. it is used for update lsm->lsm_oinfo at every received * request in osc level for enqueue requests. It is also possible to * update some caller data from LOV layer if needed. */ obd_enqueue_update_f oi_cb_up; - /* oss capability, its type is obd_capa in client to avoid copy. - * in contrary its type is lustre_capa in OSS. */ - void *oi_capa; - /* transfer jobid from ost_sync() to filter_sync()... */ - char *oi_jobid; }; struct obd_type { - struct list_head typ_chain; - struct obd_ops *typ_dt_ops; - struct md_ops *typ_md_ops; - struct proc_dir_entry *typ_procroot; - struct proc_dir_entry *typ_procsym; - __u32 typ_sym_filter; - char *typ_name; - int typ_refcnt; - struct lu_device_type *typ_lu; - spinlock_t obd_type_lock; + struct list_head typ_chain; + struct obd_ops *typ_dt_ops; + struct md_ops *typ_md_ops; + struct proc_dir_entry *typ_procroot; + struct proc_dir_entry *typ_procsym; + __u32 typ_sym_filter; + char *typ_name; + int typ_refcnt; + struct lu_device_type *typ_lu; + spinlock_t obd_type_lock; }; struct brw_page { - obd_off off; - struct page *pg; - int count; - obd_flag flag; -}; - -/* llog contexts */ -enum llog_ctxt_id { - LLOG_CONFIG_ORIG_CTXT = 0, - LLOG_CONFIG_REPL_CTXT, - LLOG_MDS_OST_ORIG_CTXT, - LLOG_MDS_OST_REPL_CTXT, - LLOG_SIZE_ORIG_CTXT, - LLOG_SIZE_REPL_CTXT, - LLOG_RD1_ORIG_CTXT, - LLOG_RD1_REPL_CTXT, - LLOG_TEST_ORIG_CTXT, - LLOG_TEST_REPL_CTXT, - LLOG_LOVEA_ORIG_CTXT, - LLOG_LOVEA_REPL_CTXT, - LLOG_CHANGELOG_ORIG_CTXT, /**< changelog generation on mdd */ - LLOG_CHANGELOG_REPL_CTXT, /**< changelog access on clients */ - LLOG_CHANGELOG_USER_ORIG_CTXT, /**< for multiple changelog consumers */ - LLOG_AGENT_ORIG_CTXT, /**< agent requests generation on cdt */ - LLOG_MAX_CTXTS + u64 off; + struct page *pg; + u32 count; + u32 flag; }; struct timeout_item { - enum timeout_event ti_event; - cfs_time_t ti_timeout; - timeout_cb_t ti_cb; - void *ti_cb_data; - cfs_list_t ti_obd_list; - cfs_list_t ti_chain; + enum timeout_event ti_event; + cfs_time_t ti_timeout; + timeout_cb_t ti_cb; + void *ti_cb_data; + struct list_head ti_obd_list; + struct list_head ti_chain; }; #define OBD_MAX_RIF_DEFAULT 8 @@ -248,46 +146,80 @@ enum { NUM_SYNC_ON_CANCEL_STATES }; +/* + * Limit reply buffer size for striping data to one x86_64 page. This + * value is chosen to fit the striping data for common use cases while + * staying well below the limit at which the buffer must be backed by + * vmalloc(). Excessive use of vmalloc() may cause spinlock contention + * on the MDS. + */ +#define OBD_MAX_DEFAULT_EA_SIZE 4096 + +enum obd_cl_sem_lock_class { + OBD_CLI_SEM_NORMAL, + OBD_CLI_SEM_MGC, + OBD_CLI_SEM_MDCOSC, +}; + struct mdc_rpc_lock; struct obd_import; struct client_obd { - struct rw_semaphore cl_sem; - struct obd_uuid cl_target_uuid; - struct obd_import *cl_import; /* ptlrpc connection state */ - int cl_conn_count; - /* max_mds_easize is purely a performance thing so we don't have to - * call obd_size_diskmd() all the time. */ - int cl_default_mds_easize; - int cl_max_mds_easize; - int cl_default_mds_cookiesize; - int cl_max_mds_cookiesize; - - enum lustre_sec_part cl_sp_me; - enum lustre_sec_part cl_sp_to; - struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ - - /* the grant values are protected by loi_list_lock below */ - long cl_dirty; /* all _dirty_ in bytes */ - long cl_dirty_max; /* allowed w/o rpc */ - long cl_dirty_transit; /* dirty synchronous */ - long cl_avail_grant; /* bytes of credit for ost */ - long cl_lost_grant; /* lost credits (trunc) */ + struct rw_semaphore cl_sem; + struct obd_uuid cl_target_uuid; + struct obd_import *cl_import; /* ptlrpc connection state */ + size_t cl_conn_count; + + /* Cache maximum and default values for easize. This is + * strictly a performance optimization to minimize calls to + * obd_size_diskmd(). The default values are used to calculate the + * initial size of a request buffer. The ptlrpc layer will resize the + * buffer as needed to accommodate a larger reply from the + * server. The default values should be small enough to avoid wasted + * memory and excessive use of vmalloc(), yet large enough to avoid + * reallocating the buffer in the common use case. */ + + /* Default EA size for striping attributes. It is initialized at + * mount-time based on the default stripe width of the filesystem, + * then it tracks the largest observed EA size advertised by + * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE. */ + __u32 cl_default_mds_easize; + + /* Maximum possible EA size computed at mount-time based on + * the number of OSTs in the filesystem. May be increased at + * run-time if a larger observed size is advertised by the MDT. */ + __u32 cl_max_mds_easize; + + enum lustre_sec_part cl_sp_me; + enum lustre_sec_part cl_sp_to; + struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ + + /* the grant values are protected by loi_list_lock below */ + unsigned long cl_dirty_pages; /* all _dirty_ in pages */ + unsigned long cl_dirty_max_pages; /* allowed w/o rpc */ + unsigned long cl_dirty_transit; /* dirty synchronous */ + unsigned long cl_avail_grant; /* bytes of credit for ost */ + unsigned long cl_lost_grant; /* lost credits (trunc) */ + /* grant consumed for dirty pages */ + unsigned long cl_dirty_grant; /* since we allocate grant by blocks, we don't know how many grant will * be used to add a page into cache. As a solution, we reserve maximum * grant before trying to dirty a page and unreserve the rest. * See osc_{reserve|unreserve}_grant for details. */ - long cl_reserved_grant; - cfs_list_t cl_cache_waiters; /* waiting for cache/grant */ - cfs_time_t cl_next_shrink_grant; /* jiffies */ - cfs_list_t cl_grant_shrink_list; /* Timeout event list */ - int cl_grant_shrink_interval; /* seconds */ + long cl_reserved_grant; + struct list_head cl_cache_waiters; /* waiting for cache/grant */ + cfs_time_t cl_next_shrink_grant; /* jiffies */ + struct list_head cl_grant_shrink_list; /* Timeout event list */ + int cl_grant_shrink_interval; /* seconds */ /* A chunk is an optimal size used by osc_extent to determine * the extent size. A chunk is max(PAGE_CACHE_SIZE, OST block size) */ - int cl_chunkbits; - int cl_chunk; - int cl_extent_tax; /* extent overhead, by bytes */ + int cl_chunkbits; + /* extent insertion metadata overhead to be accounted in grant, + * in bytes */ + unsigned int cl_grant_extent_tax; + /* maximum extent size, in number of pages */ + unsigned int cl_max_extent_pages; /* keep track of objects that have lois that contain pages which * have been queued for async brw. this lock also protects the @@ -303,56 +235,85 @@ struct client_obd { * blocking everywhere, but we don't want to slow down fast-path of * our main platform.) * - * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together - * with client_obd_list_{un,}lock() and - * client_obd_list_lock_{init,done}() functions. - * * NB by Jinshan: though field names are still _loi_, but actually * osc_object{}s are in the list. */ - client_obd_lock_t cl_loi_list_lock; - cfs_list_t cl_loi_ready_list; - cfs_list_t cl_loi_hp_ready_list; - cfs_list_t cl_loi_write_list; - cfs_list_t cl_loi_read_list; - int cl_r_in_flight; - int cl_w_in_flight; + spinlock_t cl_loi_list_lock; + struct list_head cl_loi_ready_list; + struct list_head cl_loi_hp_ready_list; + struct list_head cl_loi_write_list; + struct list_head cl_loi_read_list; + __u32 cl_r_in_flight; + __u32 cl_w_in_flight; /* just a sum of the loi/lop pending numbers to be exported by /proc */ - atomic_t cl_pending_w_pages; - atomic_t cl_pending_r_pages; - __u32 cl_max_pages_per_rpc; - int cl_max_rpcs_in_flight; - struct obd_histogram cl_read_rpc_hist; - struct obd_histogram cl_write_rpc_hist; - struct obd_histogram cl_read_page_hist; - struct obd_histogram cl_write_page_hist; - struct obd_histogram cl_read_offset_hist; - struct obd_histogram cl_write_offset_hist; - - /* lru for osc caching pages */ - struct cl_client_cache *cl_cache; - cfs_list_t cl_lru_osc; /* member of cl_cache->ccc_lru */ - atomic_t *cl_lru_left; - atomic_t cl_lru_busy; - atomic_t cl_lru_shrinkers; - atomic_t cl_lru_in_list; - cfs_list_t cl_lru_list; /* lru page list */ - client_obd_lock_t cl_lru_list_lock; /* page list protector */ - atomic_t cl_unstable_count; + atomic_t cl_pending_w_pages; + atomic_t cl_pending_r_pages; + __u32 cl_max_pages_per_rpc; + __u32 cl_max_rpcs_in_flight; + struct obd_histogram cl_read_rpc_hist; + struct obd_histogram cl_write_rpc_hist; + struct obd_histogram cl_read_page_hist; + struct obd_histogram cl_write_page_hist; + struct obd_histogram cl_read_offset_hist; + struct obd_histogram cl_write_offset_hist; + + /** LRU for osc caching pages */ + struct cl_client_cache *cl_cache; + /** member of cl_cache->ccc_lru */ + struct list_head cl_lru_osc; + /** # of available LRU slots left in the per-OSC cache. + * Available LRU slots are shared by all OSCs of the same file system, + * therefore this is a pointer to cl_client_cache::ccc_lru_left. */ + atomic_long_t *cl_lru_left; + /** # of busy LRU pages. A page is considered busy if it's in writeback + * queue, or in transfer. Busy pages can't be discarded so they are not + * in LRU cache. */ + atomic_long_t cl_lru_busy; + /** # of LRU pages in the cache for this client_obd */ + atomic_long_t cl_lru_in_list; + /** # of threads are shrinking LRU cache. To avoid contention, it's not + * allowed to have multiple threads shrinking LRU cache. */ + atomic_t cl_lru_shrinkers; + /** The time when this LRU cache was last used. */ + time_t cl_lru_last_used; + /** stats: how many reclaims have happened for this client_obd. + * reclaim and shrink - shrink is async, voluntarily rebalancing; + * reclaim is sync, initiated by IO thread when the LRU slots are + * in shortage. */ + __u64 cl_lru_reclaim; + /** List of LRU pages for this client_obd */ + struct list_head cl_lru_list; + /** Lock for LRU page list */ + spinlock_t cl_lru_list_lock; + /** # of unstable pages in this client_obd. + * An unstable page is a page state that WRITE RPC has finished but + * the transaction has NOT yet committed. */ + atomic_long_t cl_unstable_count; + /** Link to osc_shrinker_list */ + struct list_head cl_shrink_list; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ - atomic_t cl_destroy_in_flight; - wait_queue_head_t cl_destroy_waitq; + atomic_t cl_destroy_in_flight; + wait_queue_head_t cl_destroy_waitq; struct mdc_rpc_lock *cl_rpc_lock; - struct mdc_rpc_lock *cl_close_lock; + + /* modify rpcs in flight + * currently used for metadata only */ + spinlock_t cl_mod_rpcs_lock; + __u16 cl_max_mod_rpcs_in_flight; + __u16 cl_mod_rpcs_in_flight; + __u16 cl_close_rpcs_in_flight; + wait_queue_head_t cl_mod_rpcs_waitq; + unsigned long *cl_mod_tag_bitmap; + struct obd_histogram cl_mod_rpcs_hist; /* mgc datastruct */ - struct mutex cl_mgc_mutex; + struct mutex cl_mgc_mutex; struct local_oid_storage *cl_mgc_los; - struct dt_object *cl_mgc_configs_dir; - atomic_t cl_mgc_refcount; - struct obd_export *cl_mgc_mgsexp; + struct dt_object *cl_mgc_configs_dir; + atomic_t cl_mgc_refcount; + struct obd_export *cl_mgc_mgsexp; /* checksumming for data sent over the network */ unsigned int cl_checksum:1; /* 0 = disabled, 1 = enabled */ @@ -364,13 +325,6 @@ struct client_obd { /* also protected by the poorly named _loi_list_lock lock above */ struct osc_async_rc cl_ar; - /* used by quotacheck when the servers are older than 2.4 */ - int cl_qchk_stat; /* quotacheck stat of the peer */ -#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */ -#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 7, 50, 0) -#warning "please consider removing quotacheck compatibility code" -#endif - /* sequence manager */ struct lu_client_seq *cl_seq; @@ -380,22 +334,21 @@ struct client_obd { void *cl_writeback_work; void *cl_lru_work; /* hash tables for osc_quota_info */ - cfs_hash_t *cl_quota_hash[MAXQUOTAS]; + struct cfs_hash *cl_quota_hash[LL_MAXQUOTAS]; }; #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) struct obd_id_info { - __u32 idx; - obd_id *data; + u32 idx; + u64 *data; }; struct echo_client_obd { - struct obd_export *ec_exp; /* the local connection to osc/lov */ + struct obd_export *ec_exp; /* the local connection to osc/lov */ spinlock_t ec_lock; - cfs_list_t ec_objects; - cfs_list_t ec_locks; - int ec_nstripes; - __u64 ec_unique; + struct list_head ec_objects; + struct list_head ec_locks; + __u64 ec_unique; }; /* Generic subset of OSTs */ @@ -411,7 +364,7 @@ struct ost_pool { #define OBD_STATFS_CACHE_SECONDS 1 struct lov_tgt_desc { - cfs_list_t ltd_kill; + struct list_head ltd_kill; struct obd_uuid ltd_uuid; struct obd_device *ltd_obd; struct obd_export *ltd_exp; @@ -435,13 +388,13 @@ struct lov_obd { __u32 lov_tgt_size; /* size of tgts array */ int lov_connects; int lov_pool_count; - cfs_hash_t *lov_pools_hash_body; /* used for key access */ + struct cfs_hash *lov_pools_hash_body; /* used for key access */ struct list_head lov_pool_list; /* used for sequential access */ struct proc_dir_entry *lov_pool_proc_entry; enum lustre_sec_part lov_sp_me; /* Cached LRU and unstable data from upper layer */ - void *lov_cache; + struct cl_client_cache *lov_cache; struct rw_semaphore lov_notify_lock; }; @@ -449,39 +402,24 @@ struct lov_obd { struct lmv_tgt_desc { struct obd_uuid ltd_uuid; struct obd_export *ltd_exp; - int ltd_idx; + __u32 ltd_idx; struct mutex ltd_fid_mutex; unsigned long ltd_active:1; /* target up for requests */ }; -enum placement_policy { - PLACEMENT_CHAR_POLICY = 0, - PLACEMENT_NID_POLICY = 1, - PLACEMENT_INVAL_POLICY = 2, - PLACEMENT_MAX_POLICY -}; - -typedef enum placement_policy placement_policy_t; - struct lmv_obd { - int refcount; struct lu_client_fld lmv_fld; spinlock_t lmv_lock; - placement_policy_t lmv_placement; struct lmv_desc desc; struct obd_uuid cluuid; - struct obd_export *exp; struct proc_dir_entry *targets_proc_entry; - struct mutex init_mutex; + struct mutex lmv_init_mutex; int connected; int max_easize; int max_def_easize; - int max_cookiesize; - int max_def_cookiesize; - int server_timeout; - int tgts_size; /* size of tgts array */ + __u32 tgts_size; /* size of tgts array */ struct lmv_tgt_desc **tgts; struct obd_connect_data conn_data; @@ -490,12 +428,11 @@ struct lmv_obd { struct niobuf_local { __u64 lnb_file_offset; __u32 lnb_page_offset; - __u32 len; - __u32 flags; - struct page *page; - struct dentry *dentry; - int lnb_grant_used; - int rc; + __u32 lnb_len; + __u32 lnb_flags; + int lnb_rc; + struct page *lnb_page; + void *lnb_data; }; #define LUSTRE_FLD_NAME "fld" @@ -533,7 +470,7 @@ struct niobuf_local { #define LUSTRE_MGS_OBDNAME "MGS" #define LUSTRE_MGC_OBDNAME "MGC" -static inline int is_osp_on_mdt(char *name) +static inline int is_lwp_on_mdt(char *name) { char *ptr; @@ -543,9 +480,7 @@ static inline int is_osp_on_mdt(char *name) return 0; } - /* 1.8 OSC/OSP name on MDT is fsname-OSTxxxx-osc */ - if (strncmp(ptr + 1, "osc", 3) == 0) - return 1; + /* LWP name on MDT is fsname-MDTxxxx-lwp-MDTxxxx */ if (strncmp(ptr + 1, "MDT", 3) != 0) return 0; @@ -555,95 +490,36 @@ static inline int is_osp_on_mdt(char *name) if (ptr == name) return 0; - if (strncmp(ptr + 1, LUSTRE_OSP_NAME, strlen(LUSTRE_OSP_NAME)) != 0 && - strncmp(ptr + 1, LUSTRE_OSC_NAME, strlen(LUSTRE_OSC_NAME)) != 0) + if (strncmp(ptr + 1, LUSTRE_LWP_NAME, strlen(LUSTRE_LWP_NAME)) != 0) return 0; return 1; } -/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ -#define N_LOCAL_TEMP_PAGE 0x10000000 - -struct obd_trans_info { - __u64 oti_transno; - __u64 oti_xid; - /* Only used on the server side for tracking acks. */ - struct oti_req_ack_lock { - struct lustre_handle lock; - __u32 mode; - } oti_ack_locks[4]; - void *oti_handle; - struct llog_cookie oti_onecookie; - struct llog_cookie *oti_logcookies; - int oti_numcookies; - /** synchronous write is needed */ - unsigned long oti_sync_write:1; - - /* initial thread handling transaction */ - struct ptlrpc_thread * oti_thread; - __u32 oti_conn_cnt; - /** VBR: versions */ - __u64 oti_pre_version; - /** JobID */ - char *oti_jobid; - - struct obd_uuid *oti_ost_uuid; -}; - -static inline void oti_init(struct obd_trans_info *oti, - struct ptlrpc_request *req) +static inline int is_lwp_on_ost(char *name) { - if (oti == NULL) - return; - memset(oti, 0, sizeof(*oti)); - - if (req == NULL) - return; - - oti->oti_xid = req->rq_xid; - /** VBR: take versions from request */ - if (req->rq_reqmsg != NULL && - lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { - __u64 *pre_version = lustre_msg_get_versions(req->rq_reqmsg); - oti->oti_pre_version = pre_version ? pre_version[0] : 0; - oti->oti_transno = lustre_msg_get_transno(req->rq_reqmsg); - } - - /** called from mds_create_objects */ - if (req->rq_repmsg != NULL) - oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg); - oti->oti_thread = req->rq_svc_thread; - if (req->rq_reqmsg != NULL) - oti->oti_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg); -} + char *ptr; -static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies) -{ - if (!oti) - return; + ptr = strrchr(name, '-'); + if (ptr == NULL) { + CERROR("%s is not a obdname\n", name); + return 0; + } - if (num_cookies == 1) - oti->oti_logcookies = &oti->oti_onecookie; - else - OBD_ALLOC_LARGE(oti->oti_logcookies, - num_cookies * sizeof(oti->oti_onecookie)); + /* LWP name on OST is fsname-MDTxxxx-lwp-OSTxxxx */ - oti->oti_numcookies = num_cookies; -} + if (strncmp(ptr + 1, "OST", 3) != 0) + return 0; -static inline void oti_free_cookies(struct obd_trans_info *oti) -{ - if (!oti || !oti->oti_logcookies) - return; - - if (oti->oti_logcookies == &oti->oti_onecookie) - LASSERT(oti->oti_numcookies == 1); - else - OBD_FREE_LARGE(oti->oti_logcookies, - oti->oti_numcookies*sizeof(oti->oti_onecookie)); - oti->oti_logcookies = NULL; - oti->oti_numcookies = 0; + while (*(--ptr) != '-' && ptr != name); + + if (ptr == name) + return 0; + + if (strncmp(ptr + 1, LUSTRE_LWP_NAME, strlen(LUSTRE_LWP_NAME)) != 0) + return 0; + + return 1; } /* @@ -658,8 +534,6 @@ enum obd_notify_event { OBD_NOTIFY_ACTIVE, /* Device deactivated */ OBD_NOTIFY_INACTIVE, - /* Device disconnected */ - OBD_NOTIFY_DISCON, /* Connect data for import were changed */ OBD_NOTIFY_OCD, /* Sync request */ @@ -698,7 +572,6 @@ struct target_recovery_data { }; struct obd_llog_group { - int olg_seq; struct llog_ctxt *olg_ctxts[LLOG_MAX_CTXTS]; wait_queue_head_t olg_waitq; spinlock_t olg_lock; @@ -709,14 +582,14 @@ struct obd_llog_group { #define OBD_DEVICE_MAGIC 0XAB5CD6EF struct obd_device { - struct obd_type *obd_type; - __u32 obd_magic; + struct obd_type *obd_type; + __u32 obd_magic; /* OBD_DEVICE_MAGIC */ + int obd_minor; /* device number: lctl dl */ + struct lu_device *obd_lu_dev; - /* common and UUID name of this device */ - char obd_name[MAX_OBD_NAME]; - struct obd_uuid obd_uuid; - int obd_minor; - struct lu_device *obd_lu_dev; + /* common and UUID name of this device */ + struct obd_uuid obd_uuid; + char obd_name[MAX_OBD_NAME]; /* bitfield modification is protected by obd_dev_lock */ unsigned long @@ -725,15 +598,13 @@ struct obd_device { obd_recovering:1, /* there are recoverable clients */ obd_abort_recovery:1, /* recovery expired */ obd_version_recov:1, /* obd uses version checking */ - obd_replayable:1, /* recovery is enabled; - * inform clients */ + obd_replayable:1, /* recovery enabled; inform clients */ obd_no_transno:1, /* no committed-transno notification */ obd_no_recov:1, /* fail instead of retry messages */ obd_stopping:1, /* started cleanup */ obd_starting:1, /* started setup */ obd_force:1, /* cleanup with > 0 obd refcount */ obd_fail:1, /* cleanup with failover */ - obd_async_recov:1, /* allow asynchronous orphan cleanup */ obd_no_conn:1, /* deny new connections */ obd_inactive:1, /* device active/inactive * (for /proc/status only!!) */ @@ -745,28 +616,30 @@ struct obd_device { * protection of other bits using _bh lock */ unsigned long obd_recovery_expired:1; /* uuid-export hash body */ - cfs_hash_t *obd_uuid_hash; + struct cfs_hash *obd_uuid_hash; /* nid-export hash body */ - cfs_hash_t *obd_nid_hash; + struct cfs_hash *obd_nid_hash; /* nid stats body */ - cfs_hash_t *obd_nid_stats_hash; - cfs_list_t obd_nid_stats; - atomic_t obd_refcount; - cfs_list_t obd_exports; - cfs_list_t obd_unlinked_exports; - cfs_list_t obd_delayed_exports; + struct cfs_hash *obd_nid_stats_hash; + /* client_generation-export hash body */ + struct cfs_hash *obd_gen_hash; + struct list_head obd_nid_stats; + struct list_head obd_exports; + struct list_head obd_unlinked_exports; + struct list_head obd_delayed_exports; struct list_head obd_lwp_list; + atomic_t obd_refcount; int obd_num_exports; spinlock_t obd_nid_lock; struct ldlm_namespace *obd_namespace; struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ /* a spinlock is OK for what we do now, may need a semaphore later */ spinlock_t obd_dev_lock; /* protect OBD bitfield above */ - struct mutex obd_dev_mutex; - __u64 obd_last_committed; spinlock_t obd_osfs_lock; struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ __u64 obd_osfs_age; + __u64 obd_last_committed; + struct mutex obd_dev_mutex; struct lvfs_run_ctxt obd_lvfs_ctxt; struct obd_llog_group obd_olg; /* default llog group */ struct obd_device *obd_observer; @@ -774,38 +647,40 @@ struct obd_device { struct obd_notify_upcall obd_upcall; struct obd_export *obd_self_export; struct obd_export *obd_lwp_export; - /* list of exports in LRU order, for ping evictor, with obd_dev_lock */ - cfs_list_t obd_exports_timed; - time_t obd_eviction_timer; /* for ping evictor */ + /* list of exports in LRU order, for ping evictor, with obd_dev_lock */ + struct list_head obd_exports_timed; + time_t obd_eviction_timer; /* for ping evictor */ int obd_max_recoverable_clients; atomic_t obd_connected_clients; int obd_stale_clients; - int obd_delayed_clients; /* this lock protects all recovery list_heads, timer and * obd_next_recovery_transno value */ - spinlock_t obd_recovery_task_lock; - __u64 obd_next_recovery_transno; - int obd_replayed_requests; - int obd_requests_queued_for_recovery; - wait_queue_head_t obd_next_transno_waitq; + spinlock_t obd_recovery_task_lock; + __u64 obd_next_recovery_transno; + int obd_replayed_requests; + int obd_requests_queued_for_recovery; + wait_queue_head_t obd_next_transno_waitq; /* protected by obd_recovery_task_lock */ - struct timer_list obd_recovery_timer; - time_t obd_recovery_start; /* seconds */ - time_t obd_recovery_end; /* seconds, for lprocfs_status */ - int obd_recovery_time_hard; - int obd_recovery_timeout; - int obd_recovery_ir_factor; + struct timer_list obd_recovery_timer; + /* seconds */ + time_t obd_recovery_start; + /* seconds, for lprocfs_status */ + time_t obd_recovery_end; + int obd_recovery_time_hard; + int obd_recovery_timeout; + int obd_recovery_ir_factor; /* new recovery stuff from CMD2 */ - struct target_recovery_data obd_recovery_data; - int obd_replayed_locks; - atomic_t obd_req_replay_clients; - atomic_t obd_lock_replay_clients; + int obd_replayed_locks; + atomic_t obd_req_replay_clients; + atomic_t obd_lock_replay_clients; + struct target_recovery_data obd_recovery_data; + /* all lists are protected by obd_recovery_task_lock */ - cfs_list_t obd_req_replay_queue; - cfs_list_t obd_lock_replay_queue; - cfs_list_t obd_final_req_queue; + struct list_head obd_req_replay_queue; + struct list_head obd_lock_replay_queue; + struct list_head obd_final_req_queue; union { #ifdef HAVE_SERVER_SUPPORT @@ -819,53 +694,39 @@ struct obd_device { struct lov_obd lov; struct lmv_obd lmv; } u; + /* Fields used by LProcFS */ - unsigned int obd_cntr_base; - struct lprocfs_stats *obd_stats; + struct lprocfs_stats *obd_stats; + unsigned int obd_cntr_base; - unsigned int obd_md_cntr_base; - struct lprocfs_stats *obd_md_stats; + unsigned int obd_md_cntr_base; + struct lprocfs_stats *obd_md_stats; struct proc_dir_entry *obd_proc_entry; struct proc_dir_entry *obd_proc_exports_entry; struct proc_dir_entry *obd_svc_procroot; struct lprocfs_stats *obd_svc_stats; - struct lprocfs_seq_vars *obd_vars; + struct lprocfs_vars *obd_vars; atomic_t obd_evict_inprogress; wait_queue_head_t obd_evict_inprogress_waitq; - cfs_list_t obd_evict_list; /* protected with pet_lock */ - - /** - * Ldlm pool part. Save last calculated SLV and Limit. - */ - rwlock_t obd_pool_lock; - int obd_pool_limit; - __u64 obd_pool_slv; - - /** - * A list of outstanding class_incref()'s against this obd. For - * debugging. - */ - struct lu_ref obd_reference; - - int obd_conn_inprogress; -}; + struct list_head obd_evict_list; /* protected with pet_lock */ + + /** + * LDLM pool part. Save last calculated SLV and Limit. + */ + rwlock_t obd_pool_lock; + __u64 obd_pool_slv; + int obd_pool_limit; -#define OBD_LLOG_FL_SENDNOW 0x0001 -#define OBD_LLOG_FL_EXIT 0x0002 + int obd_conn_inprogress; -enum obd_cleanup_stage { -/* Special case hack for MDS LOVs */ - OBD_CLEANUP_EARLY, -/* can be directly mapped to .ldto_device_fini() */ - OBD_CLEANUP_EXPORTS, + /** + * List of outstanding class_incref()'s fo this OBD. For debugging. */ + struct lu_ref obd_reference; }; /* get/set_info keys */ #define KEY_ASYNC "async" -#define KEY_BLOCKSIZE_BITS "blocksize_bits" -#define KEY_BLOCKSIZE "blocksize" -#define KEY_CAPA_KEY "capa_key" #define KEY_CHANGELOG_CLEAR "changelog_clear" #define KEY_FID2PATH "fid2path" #define KEY_CHECKSUM "checksum" @@ -877,52 +738,25 @@ enum obd_cleanup_stage { #define KEY_GRANT_SHRINK "grant_shrink" #define KEY_HSM_COPYTOOL_SEND "hsm_send" #define KEY_INIT_RECOV_BACKUP "init_recov_bk" -#define KEY_INIT_RECOV "initial_recov" #define KEY_INTERMDS "inter_mds" #define KEY_LAST_ID "last_id" #define KEY_LAST_FID "last_fid" -#define KEY_LOCK_TO_STRIPE "lock_to_stripe" -#define KEY_LOVDESC "lovdesc" -#define KEY_LOV_IDX "lov_idx" #define KEY_MAX_EASIZE "max_easize" #define KEY_DEFAULT_EASIZE "default_easize" -#define KEY_MAX_COOKIESIZE "max_cookiesize" -#define KEY_DEFAULT_COOKIESIZE "default_cookiesize" -#define KEY_MDS_CONN "mds_conn" #define KEY_MGSSEC "mgssec" -#define KEY_NEXT_ID "next_id" #define KEY_READ_ONLY "read-only" #define KEY_REGISTER_TARGET "register_target" #define KEY_SET_FS "set_fs" #define KEY_TGT_COUNT "tgt_count" /* KEY_SET_INFO in lustre_idl.h */ #define KEY_SPTLRPC_CONF "sptlrpc_conf" -#define KEY_CONNECT_FLAG "connect_flags" -#define KEY_SYNC_LOCK_CANCEL "sync_lock_cancel" #define KEY_CACHE_SET "cache_set" #define KEY_CACHE_LRU_SHRINK "cache_lru_shrink" -#define KEY_CHANGELOG_INDEX "changelog_index" #define KEY_OSP_CONNECTED "osp_connected" struct lu_context; -/* /!\ must be coherent with include/linux/namei.h on patched kernel */ -#define IT_OPEN (1 << 0) -#define IT_CREAT (1 << 1) -#define IT_READDIR (1 << 2) -#define IT_GETATTR (1 << 3) -#define IT_LOOKUP (1 << 4) -#define IT_UNLINK (1 << 5) -#define IT_TRUNC (1 << 6) -#define IT_GETXATTR (1 << 7) -#define IT_EXEC (1 << 8) -#define IT_PIN (1 << 9) -#define IT_LAYOUT (1 << 10) -#define IT_QUOTA_DQACQ (1 << 11) -#define IT_QUOTA_CONN (1 << 12) -#define IT_SETXATTR (1 << 13) - static inline int it_to_lock_mode(struct lookup_intent *it) { /* CREAT needs to be tested before open (both could be set) */ @@ -942,16 +776,32 @@ static inline int it_to_lock_mode(struct lookup_intent *it) return -EINVAL; } +enum md_op_flags { + MF_MDC_CANCEL_FID1 = 1 << 0, + MF_MDC_CANCEL_FID2 = 1 << 1, + MF_MDC_CANCEL_FID3 = 1 << 2, + MF_MDC_CANCEL_FID4 = 1 << 3, + MF_GET_MDT_IDX = 1 << 4, +}; + +enum md_cli_flags { + CLI_SET_MEA = 1 << 0, + CLI_RM_ENTRY = 1 << 1, + CLI_HASH64 = 1 << 2, + CLI_API32 = 1 << 3, + CLI_MIGRATE = 1 << 4, +}; + struct md_op_data { struct lu_fid op_fid1; /* operation fid1 (usualy parent) */ struct lu_fid op_fid2; /* operation fid2 (usualy child) */ struct lu_fid op_fid3; /* 2 extra fids to find conflicting */ struct lu_fid op_fid4; /* to the operation locks. */ - mdsno_t op_mds; /* what mds server open will go to */ - struct lustre_handle op_handle; - obd_time op_mod_time; + u32 op_mds; /* what mds server open will go to */ + struct lustre_handle op_handle; + s64 op_mod_time; const char *op_name; - int op_namelen; + size_t op_namelen; __u32 op_mode; struct lmv_stripe_md *op_mea1; struct lmv_stripe_md *op_mea2; @@ -963,45 +813,30 @@ struct md_op_data { size_t op_data_size; /* iattr fields and blocks. */ - struct iattr op_attr; -#ifdef __KERNEL__ - unsigned int op_attr_flags; -#endif - __u64 op_valid; + struct iattr op_attr; loff_t op_attr_blocks; + unsigned int op_attr_flags; /* LUSTRE_{SYNC,..}_FL */ + __u64 op_valid; /* OBD_MD_* */ - /* Size-on-MDS epoch and flags. */ - __u64 op_ioepoch; - __u32 op_flags; - - /* Capa fields */ - struct obd_capa *op_capa1; - struct obd_capa *op_capa2; + enum md_op_flags op_flags; /* Various operation flags. */ enum mds_op_bias op_bias; /* Used by readdir */ - __u64 op_hash_offset; - - /* The offset of dir entry among the same hash entries, which - * is used to resolve the hash conflict. */ - __u32 op_same_hash_offset; - - /* Used by readdir */ - __u32 op_npages; + unsigned int op_max_pages; /* used to transfer info between the stacks of MD client * see enum op_cli_flags */ - __u32 op_cli_flags; + enum md_cli_flags op_cli_flags; /* File object data version for HSM release, on client */ __u64 op_data_version; struct lustre_handle op_lease_handle; -}; -#define op_stripe_offset op_ioepoch -#define op_max_pages op_valid + /* default stripe offset */ + __u32 op_default_stripe_offset; +}; struct md_callback { int (*md_blocking_ast)(struct ldlm_lock *lock, @@ -1016,52 +851,47 @@ typedef int (* md_enqueue_cb_t)(struct ptlrpc_request *req, int rc); struct md_enqueue_info { - struct md_op_data mi_data; - struct lookup_intent mi_it; - struct lustre_handle mi_lockh; - struct inode *mi_dir; - md_enqueue_cb_t mi_cb; - __u64 mi_cbdata; - unsigned int mi_generation; + struct md_op_data mi_data; + struct lookup_intent mi_it; + struct lustre_handle mi_lockh; + struct inode *mi_dir; + struct ldlm_enqueue_info mi_einfo; + md_enqueue_cb_t mi_cb; + void *mi_cbdata; }; struct obd_ops { struct module *o_owner; int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg); - int (*o_get_info)(const struct lu_env *env, struct obd_export *, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm); - int (*o_set_info_async)(const struct lu_env *, struct obd_export *, - __u32 keylen, void *key, - __u32 vallen, void *val, - struct ptlrpc_request_set *set); - int (*o_attach)(struct obd_device *dev, obd_count len, void *data); - int (*o_detach)(struct obd_device *dev); - int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg); - int (*o_precleanup)(struct obd_device *dev, - enum obd_cleanup_stage cleanup_stage); - int (*o_cleanup)(struct obd_device *dev); - int (*o_process_config)(struct obd_device *dev, obd_count len, - void *data); - int (*o_postrecov)(struct obd_device *dev); - int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid, - int priority); - int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid); - /* connect to the target device with given connection - * data. @ocd->ocd_connect_flags is modified to reflect flags actually - * granted by the target, which are guaranteed to be a subset of flags - * asked for. If @ocd == NULL, use default parameters. */ - int (*o_connect)(const struct lu_env *env, - struct obd_export **exp, struct obd_device *src, - struct obd_uuid *cluuid, struct obd_connect_data *ocd, - void *localdata); - int (*o_reconnect)(const struct lu_env *env, - struct obd_export *exp, struct obd_device *src, - struct obd_uuid *cluuid, - struct obd_connect_data *ocd, - void *localdata); - int (*o_disconnect)(struct obd_export *exp); + void *karg, void __user *uarg); + int (*o_get_info)(const struct lu_env *env, struct obd_export *, + __u32 keylen, void *key, __u32 *vallen, void *val); + int (*o_set_info_async)(const struct lu_env *, struct obd_export *, + __u32 keylen, void *key, + __u32 vallen, void *val, + struct ptlrpc_request_set *set); + int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg); + int (*o_precleanup)(struct obd_device *dev); + int (*o_cleanup)(struct obd_device *dev); + int (*o_process_config)(struct obd_device *dev, size_t len, void *data); + int (*o_postrecov)(struct obd_device *dev); + int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid, + int priority); + int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid); + /* connect to the target device with given connection + * data. @ocd->ocd_connect_flags is modified to reflect flags actually + * granted by the target, which are guaranteed to be a subset of flags + * asked for. If @ocd == NULL, use default parameters. */ + int (*o_connect)(const struct lu_env *env, + struct obd_export **exp, struct obd_device *src, + struct obd_uuid *cluuid, struct obd_connect_data *ocd, + void *localdata); + int (*o_reconnect)(const struct lu_env *env, + struct obd_export *exp, struct obd_device *src, + struct obd_uuid *cluuid, + struct obd_connect_data *ocd, + void *localdata); + int (*o_disconnect)(struct obd_export *exp); /* Initialize/finalize fids infrastructure. */ int (*o_fid_init)(struct obd_device *obd, @@ -1072,110 +902,72 @@ struct obd_ops { int (*o_fid_alloc)(const struct lu_env *env, struct obd_export *exp, struct lu_fid *fid, struct md_op_data *op_data); - /* - * Object with @fid is getting deleted, we may want to do something - * about this. - */ - int (*o_statfs)(const struct lu_env *, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags); - int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo, - __u64 max_age, struct ptlrpc_request_set *set); - int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src); - int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, int disk_len); - int (*o_create)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct lov_stripe_md **ea, - struct obd_trans_info *oti); - int (*o_create_async)(struct obd_export *exp, struct obd_info *oinfo, - struct lov_stripe_md **ea, - struct obd_trans_info *oti); - int (*o_destroy)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa, struct lov_stripe_md *ea, - struct obd_trans_info *oti, struct obd_export *md_exp, - void *capa); - int (*o_setattr)(const struct lu_env *, struct obd_export *exp, - struct obd_info *oinfo, struct obd_trans_info *oti); - int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo, - struct obd_trans_info *oti, - struct ptlrpc_request_set *rqset); - int (*o_getattr)(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo); - int (*o_getattr_async)(struct obd_export *exp, struct obd_info *oinfo, - struct ptlrpc_request_set *set); - int (*o_preprw)(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, int objcount, - struct obd_ioobj *obj, struct niobuf_remote *remote, - int *nr_pages, struct niobuf_local *local, - struct obd_trans_info *oti, struct lustre_capa *capa); - int (*o_commitrw)(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - struct niobuf_remote *remote, int pages, - struct niobuf_local *local, - struct obd_trans_info *oti, int rc); - int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *, - ldlm_iterator_t it, void *data); - int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *, - ldlm_iterator_t it, void *data); - int (*o_init_export)(struct obd_export *exp); - int (*o_destroy_export)(struct obd_export *exp); - - /* llog related obd_methods */ - int (*o_llog_init)(struct obd_device *obd, struct obd_llog_group *grp, - struct obd_device *disk_obd, int *idx); - int (*o_llog_finish)(struct obd_device *obd, int count); - int (*o_llog_connect)(struct obd_export *, struct llogd_conn_body *); - - /* metadata-only methods */ - int (*o_pin)(struct obd_export *, const struct lu_fid *fid, - struct obd_capa *, struct obd_client_handle *, int flag); - int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int); - - int (*o_import_event)(struct obd_device *, struct obd_import *, - enum obd_import_event); - - int (*o_notify)(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data); - - int (*o_health_check)(const struct lu_env *env, struct obd_device *); - struct obd_uuid *(*o_get_uuid) (struct obd_export *exp); - - /* quota methods */ - int (*o_quotacheck)(struct obd_device *, struct obd_export *, - struct obd_quotactl *); - int (*o_quotactl)(struct obd_device *, struct obd_export *, - struct obd_quotactl *); - - int (*o_ping)(const struct lu_env *, struct obd_export *exp); - - /* pools methods */ - int (*o_pool_new)(struct obd_device *obd, char *poolname); - int (*o_pool_del)(struct obd_device *obd, char *poolname); - int (*o_pool_add)(struct obd_device *obd, char *poolname, - char *ostname); - int (*o_pool_rem)(struct obd_device *obd, char *poolname, - char *ostname); - void (*o_getref)(struct obd_device *obd); - void (*o_putref)(struct obd_device *obd); - /* - * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line - * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. - * Also, add a wrapper function in include/linux/obd_class.h. */ + /* + * Object with @fid is getting deleted, we may want to do something + * about this. + */ + int (*o_statfs)(const struct lu_env *, struct obd_export *exp, + struct obd_statfs *osfs, __u64 max_age, __u32 flags); + int (*o_statfs_async)(struct obd_export *exp, struct obd_info *oinfo, + __u64 max_age, struct ptlrpc_request_set *set); + int (*o_create)(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa); + int (*o_destroy)(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa); + int (*o_setattr)(const struct lu_env *, struct obd_export *exp, + struct obdo *oa); + int (*o_getattr)(const struct lu_env *env, struct obd_export *exp, + struct obdo *oa); + int (*o_preprw)(const struct lu_env *env, int cmd, + struct obd_export *exp, struct obdo *oa, int objcount, + struct obd_ioobj *obj, struct niobuf_remote *remote, + int *nr_pages, struct niobuf_local *local); + int (*o_commitrw)(const struct lu_env *env, int cmd, + struct obd_export *exp, struct obdo *oa, + int objcount, struct obd_ioobj *obj, + struct niobuf_remote *remote, int pages, + struct niobuf_local *local, int rc); + int (*o_init_export)(struct obd_export *exp); + int (*o_destroy_export)(struct obd_export *exp); + + int (*o_import_event)(struct obd_device *, struct obd_import *, + enum obd_import_event); + + int (*o_notify)(struct obd_device *obd, struct obd_device *watched, + enum obd_notify_event ev, void *data); + + int (*o_health_check)(const struct lu_env *env, struct obd_device *); + struct obd_uuid *(*o_get_uuid) (struct obd_export *exp); + + /* quota methods */ + int (*o_quotactl)(struct obd_device *, struct obd_export *, + struct obd_quotactl *); + + int (*o_ping)(const struct lu_env *, struct obd_export *exp); + + /* pools methods */ + int (*o_pool_new)(struct obd_device *obd, char *poolname); + int (*o_pool_del)(struct obd_device *obd, char *poolname); + int (*o_pool_add)(struct obd_device *obd, char *poolname, + char *ostname); + int (*o_pool_rem)(struct obd_device *obd, char *poolname, + char *ostname); + void (*o_getref)(struct obd_device *obd); + void (*o_putref)(struct obd_device *obd); + /* + * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line + * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. + * Also, add a wrapper function in include/linux/obd_class.h. */ }; /* lmv structures */ struct lustre_md { struct mdt_body *body; - struct lov_stripe_md *lsm; + struct lu_buf layout; struct lmv_stripe_md *lmv; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl; #endif - struct mdt_remote_perm *remote_perm; - struct obd_capa *mds_capa; - struct obd_capa *oss_capa; - __u64 lm_flags; }; struct md_open_data { @@ -1186,6 +978,17 @@ struct md_open_data { bool mod_is_create; }; +struct obd_client_handle { + struct lustre_handle och_fh; + struct lu_fid och_fid; + struct md_open_data *och_mod; + struct lustre_handle och_lease_handle; /* open lock for lease */ + __u32 och_magic; + int och_flags; +}; + +#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed + struct lookup_intent; struct cl_attr; @@ -1206,8 +1009,8 @@ struct md_ops { struct md_open_data *, struct ptlrpc_request **); int (*m_create)(struct obd_export *, struct md_op_data *, - const void *, int, int, __u32, __u32, cfs_cap_t, - __u64, struct ptlrpc_request **); + const void *, size_t, umode_t, uid_t, gid_t, + cfs_cap_t, __u64, struct ptlrpc_request **); int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *, const union ldlm_policy_data *, @@ -1226,64 +1029,45 @@ struct md_ops { struct ptlrpc_request **); int (*m_rename)(struct obd_export *, struct md_op_data *, - const char *, int, const char *, int, + const char *, size_t, const char *, size_t, struct ptlrpc_request **); int (*m_setattr)(struct obd_export *, struct md_op_data *, void *, - int , void *, int, struct ptlrpc_request **, - struct md_open_data **mod); + size_t , struct ptlrpc_request **); int (*m_fsync)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, struct ptlrpc_request **); + struct ptlrpc_request **); - int (*m_readpage)(struct obd_export *, struct md_op_data *, - struct page **, struct ptlrpc_request **); - - int (*m_read_entry)(struct obd_export *, struct md_op_data *, - struct md_callback *cb_op, struct lu_dirent **ld, - struct page **ppage); + int (*m_read_page)(struct obd_export *, struct md_op_data *, + struct md_callback *cb_op, __u64 hash_offset, + struct page **ppage); int (*m_unlink)(struct obd_export *, struct md_op_data *, struct ptlrpc_request **); int (*m_setxattr)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, obd_valid, const char *, - const char *, int, int, int, __u32, + u64, const char *, const char *, int, int, int, u32, struct ptlrpc_request **); int (*m_getxattr)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, obd_valid, const char *, - const char *, int, int, int, + u64, const char *, const char *, int, int, int, struct ptlrpc_request **); - int (*m_intent_getattr_async)(struct obd_export *, - struct md_enqueue_info *, - struct ldlm_enqueue_info *); + int (*m_intent_getattr_async)(struct obd_export *, + struct md_enqueue_info *); int (*m_revalidate_lock)(struct obd_export *, struct lookup_intent *, struct lu_fid *, __u64 *bits); #define MD_STATS_LAST_OP m_revalidate_lock - int (*m_getstatus)(struct obd_export *, struct lu_fid *, - struct obd_capa **); - + int (*m_get_root)(struct obd_export *, const char *, struct lu_fid *); int (*m_null_inode)(struct obd_export *, const struct lu_fid *); - int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *, - ldlm_iterator_t, void *); - - int (*m_done_writing)(struct obd_export *, struct md_op_data *, - struct md_open_data *); - int (*m_getattr_name)(struct obd_export *, struct md_op_data *, struct ptlrpc_request **); - int (*m_is_subdir)(struct obd_export *, const struct lu_fid *, - const struct lu_fid *, - struct ptlrpc_request **); - - int (*m_init_ea_size)(struct obd_export *, int, int, int, int); + int (*m_init_ea_size)(struct obd_export *, __u32, __u32); int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *, struct obd_export *, struct obd_export *, @@ -1293,10 +1077,7 @@ struct md_ops { int (*m_merge_attr)(struct obd_export *, const struct lmv_stripe_md *lsm, - struct cl_attr *attr); - - int (*m_update_lsm_md)(struct obd_export *, struct lmv_stripe_md *lsm, - struct mdt_body *, ldlm_blocking_callback); + struct cl_attr *attr, ldlm_blocking_callback); int (*m_set_open_replay_data)(struct obd_export *, struct obd_client_handle *, @@ -1305,71 +1086,26 @@ struct md_ops { int (*m_clear_open_replay_data)(struct obd_export *, struct obd_client_handle *); - int (*m_set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *); + int (*m_set_lock_data)(struct obd_export *, + const struct lustre_handle *, void *, __u64 *); - ldlm_mode_t (*m_lock_match)(struct obd_export *, __u64, - const struct lu_fid *, ldlm_type_t, - ldlm_policy_data_t *, ldlm_mode_t, - struct lustre_handle *); + enum ldlm_mode (*m_lock_match)(struct obd_export *, __u64, + const struct lu_fid *, enum ldlm_type, + union ldlm_policy_data *, enum ldlm_mode, + struct lustre_handle *); int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *, - ldlm_policy_data_t *, ldlm_mode_t, - ldlm_cancel_flags_t flags, void *opaque); - - int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc, - renew_capa_cb_t cb); - - int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *, - const struct req_msg_field *, struct obd_capa **); - - int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *, - struct obd_capa *, __u32, - struct ptlrpc_request **); + union ldlm_policy_data *, enum ldlm_mode, + enum ldlm_cancel_flags flags, void *opaque); int (*m_get_fid_from_lsm)(struct obd_export *, const struct lmv_stripe_md *, const char *name, int namelen, struct lu_fid *fid); + int (*m_unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm, + const union lmv_mds_md *lmv, size_t lmv_size); }; -struct lsm_operations { - void (*lsm_free)(struct lov_stripe_md *); - int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa, - struct obd_export *md_exp); - void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *, - obd_off *); - void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *, - obd_off *); - int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes, - __u16 *stripe_count); - int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md *lmm); -}; - -extern const struct lsm_operations lsm_v1_ops; -extern const struct lsm_operations lsm_v3_ops; -static inline const struct lsm_operations *lsm_op_find(int magic) -{ - switch(magic) { - case LOV_MAGIC_V1: - return &lsm_v1_ops; - case LOV_MAGIC_V3: - return &lsm_v3_ops; - default: - CERROR("Cannot recognize lsm_magic %08x\n", magic); - return NULL; - } -} - -/* Requests for obd_extent_calc() */ -#define OBD_CALC_STRIPE_START 1 -#define OBD_CALC_STRIPE_END 2 - -static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo) -{ - return oinfo->oi_capa; -} - static inline struct md_open_data *obd_mod_alloc(void) { struct md_open_data *mod; @@ -1390,7 +1126,7 @@ static inline struct md_open_data *obd_mod_alloc(void) } \ }) -void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); +void obdo_from_inode(struct obdo *dst, struct inode *src, u64 valid); void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent); /* return 1 if client should be resend request */ @@ -1412,7 +1148,8 @@ static inline const char *lu_dev_name(const struct lu_device *lu_dev) return lu_dev->ld_obd->obd_name; } -static inline bool filename_is_volatile(const char *name, int namelen, int *idx) +static inline bool filename_is_volatile(const char *name, size_t namelen, + int *idx) { const char *start; char *end; @@ -1437,7 +1174,7 @@ static inline bool filename_is_volatile(const char *name, int namelen, int *idx) } /* we have an idx, read it */ start = name + LUSTRE_VOLATILE_HDR_LEN + 1; - *idx = strtoul(start, &end, 16); + *idx = simple_strtoul(start, &end, 16); /* error cases: * no digit, no trailing :, negative value */ @@ -1460,16 +1197,27 @@ static inline int cli_brw_size(struct obd_device *obd) return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT; } +/* when RPC size or the max RPCs in flight is increased, the max dirty pages + * of the client should be increased accordingly to avoid sending fragmented + * RPCs over the network when the client runs out of the maximum dirty space + * when so many RPCs are being generated. + */ static inline void client_adjust_max_dirty(struct client_obd *cli) { /* initializing */ - if (cli->cl_dirty_max <= 0) - cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024; - else - cli->cl_dirty_max = cli->cl_max_rpcs_in_flight * - (cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT); - if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8) - cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3); + if (cli->cl_dirty_max_pages <= 0) + cli->cl_dirty_max_pages = (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) + >> PAGE_CACHE_SHIFT; + else { + unsigned long dirty_max = cli->cl_max_rpcs_in_flight * + cli->cl_max_pages_per_rpc; + + if (dirty_max > cli->cl_dirty_max_pages) + cli->cl_dirty_max_pages = dirty_max; + } + + if (cli->cl_dirty_max_pages > totalram_pages / 8) + cli->cl_dirty_max_pages = totalram_pages / 8; } #endif /* __OBD_H */