X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Flustre_dlm.h;h=f1071bd1de785232b2e98ca324fefeb8b67a0727;hp=ae5a59908fd33814ab09a521ee765758dbd0e689;hb=6052cc88eb1232ac3b0193f0d47881887a2dcfdc;hpb=18aee6838907192c03c5f70e88624686c1c074da diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index ae5a599..f1071bd 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -68,6 +68,7 @@ extern struct kset *ldlm_svc_kset; * client shows interest in that lock, e.g. glimpse is occured. */ #define LDLM_DIRTY_AGE_LIMIT (10) #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 +#define LDLM_DEFAULT_LRU_SHRINK_BATCH (16) /** * LDLM non-error return states @@ -272,9 +273,10 @@ struct ldlm_pool { struct completion pl_kobj_unregister; }; -typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **, - void *req_cookie, enum ldlm_mode mode, - __u64 flags, void *data); +typedef int (*ldlm_res_policy)(const struct lu_env *env, + struct ldlm_namespace *, + struct ldlm_lock **, void *req_cookie, + enum ldlm_mode mode, __u64 flags, void *data); typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock); @@ -294,12 +296,12 @@ typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock); struct ldlm_valblock_ops { int (*lvbo_init)(struct ldlm_resource *res); int (*lvbo_update)(struct ldlm_resource *res, struct ldlm_lock *lock, - struct ptlrpc_request *r, int increase); + struct ptlrpc_request *r, int increase); int (*lvbo_free)(struct ldlm_resource *res); /* Return size of lvb data appropriate RPC size can be reserved */ int (*lvbo_size)(struct ldlm_lock *lock); /* Called to fill in lvb data to RPC buffer @buf */ - int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int buflen); + int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int *buflen); }; /** @@ -307,8 +309,8 @@ struct ldlm_valblock_ops { * Greedy means release cached locks aggressively */ enum ldlm_appetite { - LDLM_NAMESPACE_GREEDY = 1 << 0, - LDLM_NAMESPACE_MODEST = 1 << 1 + LDLM_NAMESPACE_GREEDY = BIT(0), + LDLM_NAMESPACE_MODEST = BIT(1), }; /** @@ -332,6 +334,8 @@ struct ldlm_ns_bucket { * Which res in the bucket should we start with the reclaim. */ int nsb_reclaim_start; + /* counter of entries in this bucket */ + atomic_t nsb_count; }; enum { @@ -350,6 +354,14 @@ enum ldlm_ns_type { LDLM_NS_TYPE_MGT, /**< MGT namespace */ }; +enum ldlm_namespace_flags { + /** + * Flag to indicate the LRU cancel is in progress. + * Used to limit the process by 1 thread only. + */ + LDLM_LRU_CANCEL = 0 +}; + /** * LDLM Namespace. * @@ -378,8 +390,13 @@ struct ldlm_namespace { /** Flag indicating if namespace is on client instead of server */ enum ldlm_side ns_client; + /** name of this namespace */ + char *ns_name; + /** Resource hash table for namespace. */ struct cfs_hash *ns_rs_hash; + struct ldlm_ns_bucket *ns_rs_buckets; + unsigned int ns_bucket_bits; /** serialize */ spinlock_t ns_lock; @@ -427,7 +444,15 @@ struct ldlm_namespace { */ unsigned int ns_max_unused; - /** Maximum allowed age (last used time) for locks in the LRU */ + /** + * Cancel batch, if unused lock count exceed lru_size + * Only be used if LRUR disable. + */ + unsigned int ns_cancel_batch; + + /** Maximum allowed age (last used time) for locks in the LRU. Set in + * seconds from userspace, but stored in ns to avoid repeat conversions. + */ ktime_t ns_max_age; /** @@ -436,19 +461,20 @@ struct ldlm_namespace { */ unsigned int ns_timeouts; /** - * Number of seconds since the file change time after which the - * MDT will return an UPDATE lock along with a LOOKUP lock. + * Number of seconds since the file change time after which + * the MDT will return an UPDATE lock along with a LOOKUP lock. * This allows the client to start caching negative dentries * for a directory and may save an RPC for a later stat. */ - time64_t ns_ctime_age_limit; + timeout_t ns_ctime_age_limit; /** - * Number of seconds since the lock was last used. The client may - * cancel the lock limited by this age and flush related data if - * any other client shows interest in it doing glimpse request. - * This allows to cache stat data locally for such files early. + * Number of (nano)seconds since the lock was last used. The client + * may cancel the lock older than this age and flush related data if + * another client shows interest in this lock by doing glimpse request. + * This allows to cache stat data locally for such files early. Set in + * seconds from userspace, but stored in ns to avoid repeat conversions. */ - time64_t ns_dirty_age_limit; + ktime_t ns_dirty_age_limit; /** * Used to rate-limit ldlm_namespace_dump calls. * \see ldlm_namespace_dump. Increased by 10 seconds every time @@ -492,7 +518,7 @@ struct ldlm_namespace { * The resources in this namespace remember contended state during * \a ns_contention_time, in seconds. */ - time64_t ns_contention_time; + timeout_t ns_contention_time; /** * Limit size of contended extent locks, in bytes. @@ -527,6 +553,11 @@ struct ldlm_namespace { struct kobject ns_kobj; /* sysfs object */ struct completion ns_kobj_unregister; + + /** + * To avoid another ns_lock usage, a separate bitops field. + */ + unsigned long ns_flags; }; /** @@ -535,8 +566,6 @@ struct ldlm_namespace { static inline int ns_is_client(struct ldlm_namespace *ns) { LASSERT(ns != NULL); - LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT | - LDLM_NAMESPACE_SERVER))); LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT || ns->ns_client == LDLM_NAMESPACE_SERVER); return ns->ns_client == LDLM_NAMESPACE_CLIENT; @@ -548,8 +577,6 @@ static inline int ns_is_client(struct ldlm_namespace *ns) static inline int ns_is_server(struct ldlm_namespace *ns) { LASSERT(ns != NULL); - LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT | - LDLM_NAMESPACE_SERVER))); LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT || ns->ns_client == LDLM_NAMESPACE_SERVER); return ns->ns_client == LDLM_NAMESPACE_SERVER; @@ -592,6 +619,9 @@ typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, __u64 flags, /** Type for glimpse callback function of a lock. */ typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data); +/** Type for created callback function of a lock. */ +typedef void (*ldlm_created_callback)(struct ldlm_lock *lock); + /** Work list for sending GL ASTs to multiple locks. */ struct ldlm_glimpse_work { struct ldlm_lock *gl_lock; /* lock to glimpse */ @@ -603,6 +633,11 @@ struct ldlm_glimpse_work { void *gl_interpret_data; }; +struct ldlm_bl_desc { + unsigned int bl_same_client:1, + bl_cos_incompat:1; +}; + struct ldlm_cb_set_arg { struct ptlrpc_request_set *set; int type; /* LDLM_{CP,BL,GL}_CALLBACK */ @@ -611,6 +646,7 @@ struct ldlm_cb_set_arg { union ldlm_gl_desc *gl_desc; /* glimpse AST descriptor */ ptlrpc_interpterer_t gl_interpret_reply; void *gl_interpret_data; + struct ldlm_bl_desc *bl_desc; }; struct ldlm_cb_async_args { @@ -642,6 +678,19 @@ struct ldlm_interval_tree { struct interval_node *lit_root; /* actual ldlm_interval */ }; +/** + * Lists of waiting locks for each inodebit type. + * A lock can be in several liq_waiting lists and it remains in lr_waiting. + */ +struct ldlm_ibits_queues { + struct list_head liq_waiting[MDS_INODELOCK_NUMBITS]; +}; + +struct ldlm_ibits_node { + struct list_head lin_link[MDS_INODELOCK_NUMBITS]; + struct ldlm_lock *lock; +}; + /** Whether to track references to exports by LDLM locks. */ #define LUSTRE_TRACKS_LOCK_EXP_REFS (0) @@ -650,7 +699,6 @@ enum ldlm_cancel_flags { LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */ LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */ LCF_BL_AST = 0x4, /* Cancel LDLM_FL_BL_AST locks in the same RPC */ - LCF_CONVERT = 0x8, /* Try to convert IBITS lock before cancel */ }; struct ldlm_flock { @@ -711,12 +759,6 @@ struct ldlm_lock { */ struct portals_handle l_handle; /** - * Lock reference count. - * This is how many users have pointers to actual structure, so that - * we do not accidentally free lock structure that is in use. - */ - atomic_t l_refc; - /** * Internal spinlock protects l_resource. We should hold this lock * first before taking res_lock. */ @@ -738,9 +780,12 @@ struct ldlm_lock { */ struct list_head l_res_link; /** - * Tree node for ldlm_extent. + * Internal structures per lock type.. */ - struct ldlm_interval *l_tree_node; + union { + struct ldlm_interval *l_tree_node; + struct ldlm_ibits_node *l_ibits_node; + }; /** * Per export hash of locks. * Protected by per-bucket exp->exp_lock_hash locks. @@ -825,12 +870,6 @@ struct ldlm_lock { wait_queue_head_t l_waitq; /** - * Seconds. It will be updated if there is any activity related to - * the lock, e.g. enqueue the lock or send blocking AST. - */ - time64_t l_last_activity; - - /** * Time, in nanoseconds, last used by e.g. being matched by lock match. */ ktime_t l_last_used; @@ -853,6 +892,17 @@ struct ldlm_lock { /** Private storage for lock user. Opaque to LDLM. */ void *l_ast_data; + + union { + /** + * Seconds. It will be updated if there is any activity related to + * the lock at client, e.g. enqueue the lock. For server it is the + * time when blocking ast was sent. + */ + time64_t l_activity; + time64_t l_blast_sent; + }; + /* separate ost_lvb used mostly by Data-on-MDT for now. * It is introduced to don't mix with layout lock data. */ struct ost_lvb l_ost_lvb; @@ -883,7 +933,7 @@ struct ldlm_lock { * under this lock. * \see ost_rw_prolong_locks */ - time64_t l_callback_timeout; + time64_t l_callback_timestamp; /** Local PID of process which created this lock. */ __u32 l_pid; @@ -935,6 +985,25 @@ struct ldlm_lock { struct list_head l_exp_list; }; +enum ldlm_match_flags { + LDLM_MATCH_UNREF = BIT(0), + LDLM_MATCH_AST = BIT(1), + LDLM_MATCH_AST_ANY = BIT(2), +}; + +/** + * Describe the overlap between two locks. itree_overlap_cb data. + */ +struct ldlm_match_data { + struct ldlm_lock *lmd_old; + struct ldlm_lock *lmd_lock; + enum ldlm_mode *lmd_mode; + union ldlm_policy_data *lmd_policy; + __u64 lmd_flags; + __u64 lmd_skip_flags; + enum ldlm_match_flags lmd_match; +}; + /** For uncommitted cross-MDT lock, store transno this lock belongs to */ #define l_transno l_client_cookie @@ -991,10 +1060,14 @@ struct ldlm_resource { /** Resource name */ struct ldlm_res_id lr_name; - /** - * Interval trees (only for extent locks) for all modes of this resource - */ - struct ldlm_interval_tree *lr_itree; + union { + /** + * Interval trees (only for extent locks) for all modes of + * this resource + */ + struct ldlm_interval_tree *lr_itree; + struct ldlm_ibits_queues *lr_ibits_queues; + }; union { /** @@ -1026,6 +1099,11 @@ struct ldlm_resource { struct lu_ref lr_reference; }; +static inline int ldlm_is_granted(struct ldlm_lock *lock) +{ + return lock->l_req_mode == lock->l_granted_mode; +} + static inline bool ldlm_has_layout(struct ldlm_lock *lock) { return lock->l_resource->lr_type == LDLM_IBITS && @@ -1041,7 +1119,7 @@ static inline bool ldlm_has_dom(struct ldlm_lock *lock) static inline char * ldlm_ns_name(struct ldlm_namespace *ns) { - return ns->ns_rs_hash->hs_name; + return ns->ns_name; } static inline struct ldlm_namespace * @@ -1109,7 +1187,7 @@ static inline int ldlm_lvbo_size(struct ldlm_lock *lock) return 0; } -static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int len) +static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int *len) { struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); int rc; @@ -1148,16 +1226,16 @@ struct ldlm_enqueue_info { void *ei_cb_local_bl; /** blocking local lock callback */ void *ei_cb_cp; /** lock completion callback */ void *ei_cb_gl; /** lock glimpse callback */ + ldlm_created_callback ei_cb_created; /** lock created callback */ void *ei_cbdata; /** Data to be passed into callbacks. */ void *ei_namespace; /** lock namespace **/ u64 ei_inodebits; /** lock inode bits **/ unsigned int ei_enq_slave:1; /** whether enqueue slave stripes */ + unsigned int ei_enq_slot:1; /** whether acquire rpc slot */ }; #define ei_res_id ei_cb_gl -extern struct obd_ops ldlm_obd_ops; - extern char *ldlm_lockname[]; extern char *ldlm_typename[]; extern const char *ldlm_it2str(enum ldlm_intent_flags it); @@ -1254,6 +1332,12 @@ typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, __u64 *flags, enum ldlm_error *err, struct list_head *work_list); +typedef int (*ldlm_reprocessing_policy)(struct ldlm_resource *res, + struct list_head *queue, + struct list_head *work_list, + enum ldlm_process_intention intention, + struct ldlm_lock *hint); + /** * Return values for lock iterators. * Also used during deciding of lock grants and cancellations. @@ -1290,7 +1374,7 @@ struct ldlm_prolong_args { struct ldlm_res_id lpa_resid; struct ldlm_extent lpa_extent; enum ldlm_mode lpa_mode; - time64_t lpa_timeout; + timeout_t lpa_timeout; int lpa_locks_cnt; int lpa_blocks_cnt; }; @@ -1336,10 +1420,10 @@ int ldlm_request_cancel(struct ptlrpc_request *req, /** @} ldlm_handlers */ void ldlm_revoke_export_locks(struct obd_export *exp); -time64_t ldlm_bl_timeout(struct ldlm_lock *lock); +timeout_t ldlm_bl_timeout(struct ldlm_lock *lock); #endif int ldlm_del_waiting_lock(struct ldlm_lock *lock); -int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, time64_t timeout); +int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, timeout_t timeout); int ldlm_get_ref(void); void ldlm_put_ref(void); int ldlm_init_export(struct obd_export *exp); @@ -1349,6 +1433,8 @@ struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req); /* ldlm_lock.c */ #ifdef HAVE_SERVER_SUPPORT ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res); +ldlm_reprocessing_policy +ldlm_get_reprocessing_policy(struct ldlm_resource *res); #endif void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg); void ldlm_lock2handle(const struct ldlm_lock *lock, @@ -1367,7 +1453,7 @@ static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h) } #define LDLM_LOCK_REF_DEL(lock) \ - lu_ref_del(&lock->l_reference, "handle", current) + lu_ref_del(&lock->l_reference, "handle", lock) static inline struct ldlm_lock * ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags) @@ -1405,11 +1491,14 @@ static inline int ldlm_lvbo_update(struct ldlm_resource *res, } static inline int ldlm_res_lvbo_update(struct ldlm_resource *res, - struct ptlrpc_request *req, int increase) + struct ptlrpc_request *req, + int increase) { return ldlm_lvbo_update(res, NULL, req, increase); } +int is_granted_or_cancelled_nolock(struct ldlm_lock *lock); + int ldlm_error2errno(enum ldlm_error error); enum ldlm_error ldlm_errno2error(int err_no); /* don't call it `errno': this * confuses user-space. */ @@ -1472,15 +1561,33 @@ void ldlm_lock_fail_match_locked(struct ldlm_lock *lock); void ldlm_lock_fail_match(struct ldlm_lock *lock); void ldlm_lock_allow_match(struct ldlm_lock *lock); void ldlm_lock_allow_match_locked(struct ldlm_lock *lock); -enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, - const struct ldlm_res_id *, enum ldlm_type type, - union ldlm_policy_data *, enum ldlm_mode mode, - struct lustre_handle *, int unref); + +enum ldlm_mode ldlm_lock_match_with_skip(struct ldlm_namespace *ns, + __u64 flags, __u64 skip_flags, + const struct ldlm_res_id *res_id, + enum ldlm_type type, + union ldlm_policy_data *policy, + enum ldlm_mode mode, + struct lustre_handle *lh, + enum ldlm_match_flags match_flags); +static inline enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, + __u64 flags, + const struct ldlm_res_id *res_id, + enum ldlm_type type, + union ldlm_policy_data *policy, + enum ldlm_mode mode, + struct lustre_handle *lh) +{ + return ldlm_lock_match_with_skip(ns, flags, 0, res_id, type, policy, + mode, lh, 0); +} +struct ldlm_lock *search_itree(struct ldlm_resource *res, + struct ldlm_match_data *data); enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, __u64 *bits); void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode); void ldlm_lock_cancel(struct ldlm_lock *lock); -void ldlm_reprocess_all(struct ldlm_resource *res); +void ldlm_reprocess_all(struct ldlm_resource *res, struct ldlm_lock *hint); void ldlm_reprocess_recovery_done(struct ldlm_namespace *ns); void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh); void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); @@ -1533,8 +1640,6 @@ static inline void ldlm_svc_get_eopc(const struct ldlm_request *dlm_req, if (op != 0) lprocfs_counter_incr(srv_stats, op); - - return; } /* resource.c - internal */ @@ -1564,7 +1669,6 @@ int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, } while (0) /* ldlm_request.c */ -int ldlm_expired_completion_wait(void *data); /** \defgroup ldlm_local_ast Default AST handlers for local locks * These AST handlers are typically used for server-side local locks and are * also used by client-side lock handlers to perform minimum level base @@ -1605,7 +1709,8 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, enum ldlm_mode mode, __u64 *flags, void *lvb, __u32 lvb_len, const struct lustre_handle *lockh, int rc); -int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, +int ldlm_cli_enqueue_local(const struct lu_env *env, + struct ldlm_namespace *ns, const struct ldlm_res_id *res_id, enum ldlm_type type, union ldlm_policy_data *policy, enum ldlm_mode mode, __u64 *flags, @@ -1615,7 +1720,9 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, void *data, __u32 lvb_len, enum lvb_type lvb_type, const __u64 *client_cookie, struct lustre_handle *lockh); -int ldlm_cli_convert(struct ldlm_lock *lock, __u32 *flags); +int ldlm_cli_convert_req(struct ldlm_lock *lock, __u32 *flags, __u64 new_bits); +int ldlm_cli_convert(struct ldlm_lock *lock, + enum ldlm_cancel_flags cancel_flags); int ldlm_cli_update_pool(struct ptlrpc_request *req); int ldlm_cli_cancel(const struct lustre_handle *lockh, enum ldlm_cancel_flags cancel_flags); @@ -1641,11 +1748,13 @@ int ldlm_cli_cancel_list(struct list_head *head, int count, enum ldlm_cancel_flags flags); int ldlm_inodebits_drop(struct ldlm_lock *lock, __u64 to_drop); -int ldlm_cli_dropbits(struct ldlm_lock *lock, __u64 drop_bits); -int ldlm_cli_dropbits_list(struct list_head *converts, __u64 drop_bits); +int ldlm_cli_inodebits_convert(struct ldlm_lock *lock, + enum ldlm_cancel_flags cancel_flags); /** @} ldlm_cli_api */ +extern unsigned int ldlm_enqueue_min; + /* mds/handler.c */ /* This has to be here because recursive inclusion sucks. */ int intent_disposition(struct ldlm_reply *rep, int flag); @@ -1693,7 +1802,6 @@ void unlock_res_and_lock(struct ldlm_lock *lock); * There are not used outside of ldlm. * @{ */ -time64_t ldlm_pools_recalc(enum ldlm_side client); int ldlm_pools_init(void); void ldlm_pools_fini(void);