X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Flustre_dlm.h;h=4f69be73952385d395b88374641050157639465c;hp=5ad5f9be8418d151186f902504d3e159ea9447c2;hb=366959b8cba5b022b4c1ea9aaba47ac14d4fff7e;hpb=5517eab06eb99e4ecb66be251a10e70c37547610 diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 5ad5f9b..4f69be739 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2010, 2012, Intel Corporation. + * Copyright (c) 2010, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -48,16 +44,6 @@ #ifndef _LUSTRE_DLM_H__ #define _LUSTRE_DLM_H__ -#if defined(__linux__) -#include -#elif defined(__APPLE__) -#include -#elif defined(__WINNT__) -#include -#else -#error Unsupported operating system. -#endif - #include #include #include @@ -65,36 +51,40 @@ #include /* for interval_node{}, ldlm_extent */ #include +#include "lustre_dlm_flags.h" + struct obd_ops; struct obd_device; +extern struct kset *ldlm_ns_kset; +extern struct kset *ldlm_svc_kset; + #define OBD_LDLM_DEVICENAME "ldlm" -#ifdef HAVE_BGL_SUPPORT -/* 1.5 times the maximum 128 tasks available in VN mode */ -#define LDLM_DEFAULT_LRU_SIZE 196 -#else -#define LDLM_DEFAULT_LRU_SIZE (100 * cfs_num_online_cpus()) -#endif -#define LDLM_DEFAULT_MAX_ALIVE (cfs_time_seconds(36000)) +#define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) +#define LDLM_DEFAULT_MAX_ALIVE 3900 /* 3900 seconds ~65 min */ #define LDLM_CTIME_AGE_LIMIT (10) +/* if client lock is unused for that time it can be cancelled if any other + * client shows interest in that lock, e.g. glimpse is occured. */ +#define LDLM_DIRTY_AGE_LIMIT (10) #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 /** * LDLM non-error return states */ -typedef enum { - ELDLM_OK = 0, - - ELDLM_LOCK_CHANGED = 300, - ELDLM_LOCK_ABORTED = 301, - ELDLM_LOCK_REPLACED = 302, - ELDLM_NO_LOCK_DATA = 303, - ELDLM_LOCK_WOULDBLOCK = 304, - - ELDLM_NAMESPACE_EXISTS = 400, - ELDLM_BAD_NAMESPACE = 401 -} ldlm_error_t; +enum ldlm_error { + ELDLM_OK = 0, + ELDLM_LOCK_MATCHED = 1, + + ELDLM_LOCK_CHANGED = 300, + ELDLM_LOCK_ABORTED = 301, + ELDLM_LOCK_REPLACED = 302, + ELDLM_NO_LOCK_DATA = 303, + ELDLM_LOCK_WOULDBLOCK = 304, + + ELDLM_NAMESPACE_EXISTS = 400, + ELDLM_BAD_NAMESPACE = 401, +}; /** * LDLM namespace type. @@ -103,165 +93,10 @@ typedef enum { * decisions about lack of conflicts or do any autonomous lock granting without * first speaking to a server. */ -typedef enum { - LDLM_NAMESPACE_SERVER = 1 << 0, - LDLM_NAMESPACE_CLIENT = 1 << 1 -} ldlm_side_t; - -/** - * Declaration of flags sent through the wire. - **/ -#define LDLM_FL_LOCK_CHANGED 0x000001 /* extent, mode, or resource changed */ - -/** - * If the server returns one of these flags, then the lock was put on that list. - * If the client sends one of these flags (during recovery ONLY!), it wants the - * lock added to the specified list, no questions asked. - */ -#define LDLM_FL_BLOCK_GRANTED 0x000002 -#define LDLM_FL_BLOCK_CONV 0x000004 -#define LDLM_FL_BLOCK_WAIT 0x000008 - -/* Used to be LDLM_FL_CBPENDING 0x000010 moved to non-wire flags */ - -#define LDLM_FL_AST_SENT 0x000020 /* blocking or cancel packet was - * queued for sending. */ -/* Used to be LDLM_FL_WAIT_NOREPROC 0x000040 moved to non-wire flags */ -/* Used to be LDLM_FL_CANCEL 0x000080 moved to non-wire flags */ - -/** - * Lock is being replayed. This could probably be implied by the fact that one - * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. - */ -#define LDLM_FL_REPLAY 0x000100 - -#define LDLM_FL_INTENT_ONLY 0x000200 /* Don't grant lock, just do intent. */ - -/* Used to be LDLM_FL_LOCAL_ONLY 0x000400 moved to non-wire flags */ -/* Used to be LDLM_FL_FAILED 0x000800 moved to non-wire flags */ - -#define LDLM_FL_HAS_INTENT 0x001000 /* lock request has intent */ - -/* Used to be LDLM_FL_CANCELING 0x002000 moved to non-wire flags */ -/* Used to be LDLM_FL_LOCAL 0x004000 moved to non-wire flags */ - -#define LDLM_FL_DISCARD_DATA 0x010000 /* discard (no writeback) on cancel */ - -#define LDLM_FL_NO_TIMEOUT 0x020000 /* Blocked by group lock - wait - * indefinitely */ - -/** file & record locking */ -#define LDLM_FL_BLOCK_NOWAIT 0x040000 /* Server told not to wait if blocked. - * For AGL, OST will not send glimpse - * callback. */ -#define LDLM_FL_TEST_LOCK 0x080000 // return blocking lock - -/* Used to be LDLM_FL_LVB_READY 0x100000 moved to non-wire flags */ -/* Used to be LDLM_FL_KMS_IGNORE 0x200000 moved to non-wire flags */ -/* Used to be LDLM_FL_NO_LRU 0x400000 moved to non-wire flags */ - -/* Immediatelly cancel such locks when they block some other locks. Send - * cancel notification to original lock holder, but expect no reply. This is - * for clients (like liblustre) that cannot be expected to reliably response - * to blocking AST. */ -#define LDLM_FL_CANCEL_ON_BLOCK 0x800000 - -/* Flags flags inherited from parent lock when doing intents. */ -#define LDLM_INHERIT_FLAGS (LDLM_FL_CANCEL_ON_BLOCK) - -/* Used to be LDLM_FL_CP_REQD 0x1000000 moved to non-wire flags */ -/* Used to be LDLM_FL_CLEANED 0x2000000 moved to non-wire flags */ -/* Used to be LDLM_FL_ATOMIC_CB 0x4000000 moved to non-wire flags */ -/* Used to be LDLM_FL_BL_AST 0x10000000 moved to non-wire flags */ -/* Used to be LDLM_FL_BL_DONE 0x20000000 moved to non-wire flags */ - -/* measure lock contention and return -EUSERS if locking contention is high */ -#define LDLM_FL_DENY_ON_CONTENTION 0x40000000 - -/* These are flags that are mapped into the flags and ASTs of blocking locks */ -#define LDLM_AST_DISCARD_DATA 0x80000000 /* Add FL_DISCARD to blocking ASTs */ - -/* Flags sent in AST lock_flags to be mapped into the receiving lock. */ -#define LDLM_AST_FLAGS (LDLM_FL_DISCARD_DATA) - -/* - * -------------------------------------------------------------------------- - * NOTE! Starting from this point, that is, LDLM_FL_* flags with values above - * 0x80000000 will not be sent over the wire. - * -------------------------------------------------------------------------- - */ - -/** - * Declaration of flags not sent through the wire. - **/ - -/** - * Used for marking lock as a target for -EINTR while cp_ast sleep - * emulation + race with upcoming bl_ast. - */ -#define LDLM_FL_FAIL_LOC 0x100000000ULL - -/** - * Used while processing the unused list to know that we have already - * handled this lock and decided to skip it. - */ -#define LDLM_FL_SKIPPED 0x200000000ULL -/* this lock is being destroyed */ -#define LDLM_FL_CBPENDING 0x400000000ULL -/* not a real flag, not saved in lock */ -#define LDLM_FL_WAIT_NOREPROC 0x800000000ULL -/* cancellation callback already run */ -#define LDLM_FL_CANCEL 0x1000000000ULL -#define LDLM_FL_LOCAL_ONLY 0x2000000000ULL -/* don't run the cancel callback under ldlm_cli_cancel_unused */ -#define LDLM_FL_FAILED 0x4000000000ULL -/* lock cancel has already been sent */ -#define LDLM_FL_CANCELING 0x8000000000ULL -/* local lock (ie, no srv/cli split) */ -#define LDLM_FL_LOCAL 0x10000000000ULL -/* XXX FIXME: This is being added to b_size as a low-risk fix to the fact that - * the LVB filling happens _after_ the lock has been granted, so another thread - * can match it before the LVB has been updated. As a dirty hack, we set - * LDLM_FL_LVB_READY only after we've done the LVB poop. - * this is only needed on LOV/OSC now, where LVB is actually used and callers - * must set it in input flags. - * - * The proper fix is to do the granting inside of the completion AST, which can - * be replaced with a LVB-aware wrapping function for OSC locks. That change is - * pretty high-risk, though, and would need a lot more testing. */ -#define LDLM_FL_LVB_READY 0x20000000000ULL -/* A lock contributes to the known minimum size (KMS) calculation until it has - * finished the part of its cancelation that performs write back on its dirty - * pages. It can remain on the granted list during this whole time. Threads - * racing to update the KMS after performing their writeback need to know to - * exclude each other's locks from the calculation as they walk the granted - * list. */ -#define LDLM_FL_KMS_IGNORE 0x40000000000ULL -/* completion AST to be executed */ -#define LDLM_FL_CP_REQD 0x80000000000ULL -/* cleanup_resource has already handled the lock */ -#define LDLM_FL_CLEANED 0x100000000000ULL -/* optimization hint: LDLM can run blocking callback from current context - * w/o involving separate thread. in order to decrease cs rate */ -#define LDLM_FL_ATOMIC_CB 0x200000000000ULL - -/* It may happen that a client initiates two operations, e.g. unlink and - * mkdir, such that the server sends a blocking AST for conflicting - * locks to this client for the first operation, whereas the second - * operation has canceled this lock and is waiting for rpc_lock which is - * taken by the first operation. LDLM_FL_BL_AST is set by - * ldlm_callback_handler() in the lock to prevent the Early Lock Cancel - * (ELC) code from cancelling it. - * - * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock - * cache is dropped to let ldlm_callback_handler() return EINVAL to the - * server. It is used when ELC RPC is already prepared and is waiting - * for rpc_lock, too late to send a separate CANCEL RPC. */ -#define LDLM_FL_BL_AST 0x400000000000ULL -#define LDLM_FL_BL_DONE 0x800000000000ULL -/* Don't put lock into the LRU list, so that it is not canceled due to aging. - * Used by MGC locks, they are cancelled only at unmount or by callback. */ -#define LDLM_FL_NO_LRU 0x1000000000000ULL +enum ldlm_side { + LDLM_NAMESPACE_SERVER = 0x01, + LDLM_NAMESPACE_CLIENT = 0x02 +}; /** * The blocking callback is overloaded to perform two functions. These flags @@ -314,16 +149,17 @@ typedef enum { #define LCK_COMPAT_COS (LCK_COS) /** @} Lock Compatibility Matrix */ -extern ldlm_mode_t lck_compat_array[]; +extern enum ldlm_mode lck_compat_array[]; -static inline void lockmode_verify(ldlm_mode_t mode) +static inline void lockmode_verify(enum ldlm_mode mode) { - LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE); + LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE); } -static inline int lockmode_compat(ldlm_mode_t exist_mode, ldlm_mode_t new_mode) +static inline int lockmode_compat(enum ldlm_mode exist_mode, + enum ldlm_mode new_mode) { - return (lck_compat_array[exist_mode] & new_mode); + return lck_compat_array[exist_mode] & new_mode; } /* @@ -377,8 +213,7 @@ struct ldlm_pool_ops { /** Recalculate pool \a pl usage */ int (*po_recalc)(struct ldlm_pool *pl); /** Cancel at least \a nr locks from pool \a pl */ - int (*po_shrink)(struct ldlm_pool *pl, int nr, - unsigned int gfp_mask); + int (*po_shrink)(struct ldlm_pool *pl, int nr, gfp_t gfp_mask); int (*po_setup)(struct ldlm_pool *pl, int limit); }; @@ -400,44 +235,49 @@ struct ldlm_pool_ops { * This feature is commonly referred to as lru_resize. */ struct ldlm_pool { - /** Pool proc directory. */ - cfs_proc_dir_entry_t *pl_proc_dir; + /** Pool debugfs directory. */ + struct dentry *pl_debugfs_entry; /** Pool name, must be long enough to hold compound proc entry name. */ char pl_name[100]; /** Lock for protecting SLV/CLV updates. */ spinlock_t pl_lock; /** Number of allowed locks in in pool, both, client and server side. */ - cfs_atomic_t pl_limit; + atomic_t pl_limit; /** Number of granted locks in */ - cfs_atomic_t pl_granted; + atomic_t pl_granted; /** Grant rate per T. */ - cfs_atomic_t pl_grant_rate; + atomic_t pl_grant_rate; /** Cancel rate per T. */ - cfs_atomic_t pl_cancel_rate; + atomic_t pl_cancel_rate; /** Server lock volume (SLV). Protected by pl_lock. */ __u64 pl_server_lock_volume; /** Current biggest client lock volume. Protected by pl_lock. */ __u64 pl_client_lock_volume; /** Lock volume factor. SLV on client is calculated as following: * server_slv * lock_volume_factor. */ - cfs_atomic_t pl_lock_volume_factor; + atomic_t pl_lock_volume_factor; /** Time when last SLV from server was obtained. */ - time_t pl_recalc_time; + time64_t pl_recalc_time; /** Recalculation period for pool. */ - time_t pl_recalc_period; + time64_t pl_recalc_period; /** Recalculation and shrink operations. */ struct ldlm_pool_ops *pl_ops; /** Number of planned locks for next period. */ int pl_grant_plan; /** Pool statistics. */ struct lprocfs_stats *pl_stats; + + /* sysfs object */ + struct kobject pl_kobj; + struct completion pl_kobj_unregister; }; -typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **, - void *req_cookie, ldlm_mode_t mode, __u64 flags, - void *data); +typedef int (*ldlm_res_policy)(const struct lu_env *env, + struct ldlm_namespace *, + struct ldlm_lock **, void *req_cookie, + enum ldlm_mode mode, __u64 flags, void *data); -typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock); +typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock); /** * LVB operations. @@ -448,27 +288,29 @@ typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock); * Currently LVBs are used by: * - OSC-OST code to maintain current object size/times * - layout lock code to return the layout when the layout lock is granted + * + * To ensure delayed LVB initialization, it is highly recommended to use the set + * of ldlm_[res_]lvbo_[init,update,fill]() functions. */ struct ldlm_valblock_ops { - int (*lvbo_init)(struct ldlm_resource *res); - int (*lvbo_update)(struct ldlm_resource *res, - struct ptlrpc_request *r, - int increase); - int (*lvbo_free)(struct ldlm_resource *res); + int (*lvbo_init)(struct ldlm_resource *res); + int (*lvbo_update)(struct ldlm_resource *res, struct ldlm_lock *lock, + struct ptlrpc_request *r, int increase); + int (*lvbo_free)(struct ldlm_resource *res); /* Return size of lvb data appropriate RPC size can be reserved */ int (*lvbo_size)(struct ldlm_lock *lock); /* Called to fill in lvb data to RPC buffer @buf */ - int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int buflen); + int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int *buflen); }; /** * LDLM pools related, type of lock pool in the namespace. * Greedy means release cached locks aggressively */ -typedef enum { +enum ldlm_appetite { LDLM_NAMESPACE_GREEDY = 1 << 0, LDLM_NAMESPACE_MODEST = 1 << 1 -} ldlm_appetite_t; +}; /** * Default values for the "max_nolock_size", "contention_time" and @@ -487,30 +329,29 @@ struct ldlm_ns_bucket { * fact the network or overall system load is at fault */ struct adaptive_timeout nsb_at_estimate; + /** + * Which res in the bucket should we start with the reclaim. + */ + int nsb_reclaim_start; + /* counter of entries in this bucket */ + atomic_t nsb_count; }; enum { /** LDLM namespace lock stats */ - LDLM_NSS_LOCKS = 0, - LDLM_NSS_LAST + LDLM_NSS_LOCKS = 0, + LDLM_NSS_LAST }; -typedef enum { - /** invalide type */ - LDLM_NS_TYPE_UNKNOWN = 0, - /** mdc namespace */ - LDLM_NS_TYPE_MDC, - /** mds namespace */ - LDLM_NS_TYPE_MDT, - /** osc namespace */ - LDLM_NS_TYPE_OSC, - /** ost namespace */ - LDLM_NS_TYPE_OST, - /** mgc namespace */ - LDLM_NS_TYPE_MGC, - /** mgs namespace */ - LDLM_NS_TYPE_MGT, -} ldlm_ns_type_t; +enum ldlm_ns_type { + LDLM_NS_TYPE_UNKNOWN = 0, /**< invalid type */ + LDLM_NS_TYPE_MDC, /**< MDC namespace */ + LDLM_NS_TYPE_MDT, /**< MDT namespace */ + LDLM_NS_TYPE_OSC, /**< OSC namespace */ + LDLM_NS_TYPE_OST, /**< OST namespace */ + LDLM_NS_TYPE_MGC, /**< MGC namespace */ + LDLM_NS_TYPE_MGT, /**< MGT namespace */ +}; /** * LDLM Namespace. @@ -538,16 +379,21 @@ struct ldlm_namespace { struct obd_device *ns_obd; /** Flag indicating if namespace is on client instead of server */ - ldlm_side_t ns_client; + enum ldlm_side ns_client; + + /** name of this namespace */ + char *ns_name; /** Resource hash table for namespace. */ - cfs_hash_t *ns_rs_hash; + struct cfs_hash *ns_rs_hash; + struct ldlm_ns_bucket *ns_rs_buckets; + unsigned int ns_bucket_bits; /** serialize */ spinlock_t ns_lock; /** big refcount (by bucket) */ - cfs_atomic_t ns_bref; + atomic_t ns_bref; /** * Namespace connect flags supported by server (may be changed via @@ -558,11 +404,14 @@ struct ldlm_namespace { /** Client side original connect flags supported by server. */ __u64 ns_orig_connect_flags; + /* namespace debugfs dir entry */ + struct dentry *ns_debugfs_entry; + /** * Position in global namespace list linking all namespaces on * the node. */ - cfs_list_t ns_list_chain; + struct list_head ns_list_chain; /** * List of unused locks for this namespace. This list is also called @@ -574,9 +423,10 @@ struct ldlm_namespace { * to release from the head of this list. * Locks are linked via l_lru field in \see struct ldlm_lock. */ - cfs_list_t ns_unused_list; + struct list_head ns_unused_list; /** Number of locks in the LRU list above */ int ns_nr_unused; + struct list_head *ns_last_pos; /** * Maximum number of locks permitted in the LRU. If 0, means locks @@ -584,8 +434,10 @@ struct ldlm_namespace { * controlled by available memory on this client and on server. */ unsigned int ns_max_unused; + /** Maximum allowed age (last used time) for locks in the LRU */ - unsigned int ns_max_age; + ktime_t ns_max_age; + /** * Server only: number of times we evicted clients due to lack of reply * to ASTs. @@ -597,14 +449,20 @@ struct ldlm_namespace { * This allows the client to start caching negative dentries * for a directory and may save an RPC for a later stat. */ - unsigned int ns_ctime_age_limit; - + time64_t ns_ctime_age_limit; + /** + * Number of seconds since the lock was last used. The client may + * cancel the lock limited by this age and flush related data if + * any other client shows interest in it doing glimpse request. + * This allows to cache stat data locally for such files early. + */ + time64_t ns_dirty_age_limit; /** * Used to rate-limit ldlm_namespace_dump calls. * \see ldlm_namespace_dump. Increased by 10 seconds every time * it is called. */ - cfs_time_t ns_next_dump; + time64_t ns_next_dump; /** "policy" function that does actual lock conflict determination */ ldlm_res_policy ns_policy; @@ -625,11 +483,11 @@ struct ldlm_namespace { * Wait queue used by __ldlm_namespace_free. Gets woken up every time * a resource is removed. */ - cfs_waitq_t ns_waitq; + wait_queue_head_t ns_waitq; /** LDLM pool structure for this namespace */ struct ldlm_pool ns_pool; /** Definition of how eagerly unused locks will be released from LRU */ - ldlm_appetite_t ns_appetite; + enum ldlm_appetite ns_appetite; /** * If more than \a ns_contended_locks are found, the resource is @@ -642,7 +500,7 @@ struct ldlm_namespace { * The resources in this namespace remember contended state during * \a ns_contention_time, in seconds. */ - unsigned ns_contention_time; + time64_t ns_contention_time; /** * Limit size of contended extent locks, in bytes. @@ -655,8 +513,11 @@ struct ldlm_namespace { /** Limit of parallel AST RPC count. */ unsigned ns_max_parallel_ast; - /** Callback to cancel locks before replaying it during recovery. */ - ldlm_cancel_for_recovery ns_cancel_for_recovery; + /** + * Callback to check if a lock is good to be canceled by ELC or + * during recovery. + */ + ldlm_cancel_cbt ns_cancel; /** LDLM lock stats */ struct lprocfs_stats *ns_stats; @@ -666,6 +527,14 @@ struct ldlm_namespace { * recalculation of LDLM pool statistics should be skipped. */ unsigned ns_stopping:1; + + /** + * Which bucket should we start with the lock reclaim. + */ + int ns_reclaim_start; + + struct kobject ns_kobj; /* sysfs object */ + struct completion ns_kobj_unregister; }; /** @@ -674,8 +543,6 @@ struct ldlm_namespace { static inline int ns_is_client(struct ldlm_namespace *ns) { LASSERT(ns != NULL); - LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT | - LDLM_NAMESPACE_SERVER))); LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT || ns->ns_client == LDLM_NAMESPACE_SERVER); return ns->ns_client == LDLM_NAMESPACE_CLIENT; @@ -687,8 +554,6 @@ static inline int ns_is_client(struct ldlm_namespace *ns) static inline int ns_is_server(struct ldlm_namespace *ns) { LASSERT(ns != NULL); - LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT | - LDLM_NAMESPACE_SERVER))); LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT || ns->ns_client == LDLM_NAMESPACE_SERVER); return ns->ns_client == LDLM_NAMESPACE_SERVER; @@ -713,10 +578,10 @@ static inline int ns_connect_lru_resize(struct ldlm_namespace *ns) } static inline void ns_register_cancel(struct ldlm_namespace *ns, - ldlm_cancel_for_recovery arg) + ldlm_cancel_cbt arg) { - LASSERT(ns != NULL); - ns->ns_cancel_for_recovery = arg; + LASSERT(ns != NULL); + ns->ns_cancel = arg; } struct ldlm_lock; @@ -730,25 +595,49 @@ typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, __u64 flags, void *data); /** Type for glimpse callback function of a lock. */ typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data); -/** Type for weight callback function of a lock. */ -typedef unsigned long (*ldlm_weigh_callback)(struct ldlm_lock *lock); + +/** Type for created callback function of a lock. */ +typedef void (*ldlm_created_callback)(struct ldlm_lock *lock); /** Work list for sending GL ASTs to multiple locks. */ struct ldlm_glimpse_work { struct ldlm_lock *gl_lock; /* lock to glimpse */ - cfs_list_t gl_list; /* linkage to other gl work structs */ + struct list_head gl_list; /* linkage to other gl work structs */ __u32 gl_flags;/* see LDLM_GL_WORK_* below */ union ldlm_gl_desc *gl_desc; /* glimpse descriptor to be packed in * glimpse callback request */ + ptlrpc_interpterer_t gl_interpret_reply; + void *gl_interpret_data; }; -/** The ldlm_glimpse_work is allocated on the stack and should not be freed. */ -#define LDLM_GL_WORK_NOFREE 0x1 +struct ldlm_bl_desc { + unsigned int bl_same_client:1, + bl_cos_incompat:1; +}; + +struct ldlm_cb_set_arg { + struct ptlrpc_request_set *set; + int type; /* LDLM_{CP,BL,GL}_CALLBACK */ + atomic_t restart; + struct list_head *list; + union ldlm_gl_desc *gl_desc; /* glimpse AST descriptor */ + ptlrpc_interpterer_t gl_interpret_reply; + void *gl_interpret_data; + struct ldlm_bl_desc *bl_desc; +}; + +struct ldlm_cb_async_args { + struct ldlm_cb_set_arg *ca_set_arg; + struct ldlm_lock *ca_lock; +}; + +/** The ldlm_glimpse_work was slab allocated & must be freed accordingly.*/ +#define LDLM_GL_WORK_SLAB_ALLOCATED 0x1 /** Interval node data for each LDLM_EXTENT lock. */ struct ldlm_interval { struct interval_node li_node; /* node for tree management */ - cfs_list_t li_group; /* the locks which have the same + struct list_head li_group; /* the locks which have the same * policy - group of the policy */ }; #define to_ldlm_interval(n) container_of(n, struct ldlm_interval, li_node) @@ -762,44 +651,55 @@ struct ldlm_interval { struct ldlm_interval_tree { /** Tree size. */ int lit_size; - ldlm_mode_t lit_mode; /* lock mode */ + enum ldlm_mode lit_mode; /* lock mode */ struct interval_node *lit_root; /* actual ldlm_interval */ }; +/** + * Lists of waiting locks for each inodebit type. + * A lock can be in several liq_waiting lists and it remains in lr_waiting. + */ +struct ldlm_ibits_queues { + struct list_head liq_waiting[MDS_INODELOCK_NUMBITS]; +}; + +struct ldlm_ibits_node { + struct list_head lin_link[MDS_INODELOCK_NUMBITS]; + struct ldlm_lock *lock; +}; + /** Whether to track references to exports by LDLM locks. */ #define LUSTRE_TRACKS_LOCK_EXP_REFS (0) /** Cancel flags. */ -typedef enum { - LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */ - LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */ - LCF_BL_AST = 0x4, /* Cancel locks marked as LDLM_FL_BL_AST - * in the same RPC */ -} ldlm_cancel_flags_t; +enum ldlm_cancel_flags { + LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */ + LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */ + LCF_BL_AST = 0x4, /* Cancel LDLM_FL_BL_AST locks in the same RPC */ +}; struct ldlm_flock { - __u64 start; - __u64 end; - __u64 owner; - __u64 blocking_owner; - struct obd_export *blocking_export; - /* Protected by the hash lock */ - __u32 blocking_refs; - __u32 pid; + __u64 start; + __u64 end; + __u64 owner; + __u64 blocking_owner; + struct obd_export *blocking_export; + atomic_t blocking_refs; + __u32 pid; }; -typedef union { - struct ldlm_extent l_extent; - struct ldlm_flock l_flock; - struct ldlm_inodebits l_inodebits; -} ldlm_policy_data_t; +union ldlm_policy_data { + struct ldlm_extent l_extent; + struct ldlm_flock l_flock; + struct ldlm_inodebits l_inodebits; +}; -void ldlm_convert_policy_to_wire(ldlm_type_t type, - const ldlm_policy_data_t *lpolicy, - ldlm_wire_policy_data_t *wpolicy); -void ldlm_convert_policy_to_local(struct obd_export *exp, ldlm_type_t type, - const ldlm_wire_policy_data_t *wpolicy, - ldlm_policy_data_t *lpolicy); +void ldlm_convert_policy_to_wire(enum ldlm_type type, + const union ldlm_policy_data *lpolicy, + union ldlm_wire_policy_data *wpolicy); +void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type, + const union ldlm_wire_policy_data *wpolicy, + union ldlm_policy_data *lpolicy); enum lvb_type { LVB_T_NONE = 0, @@ -809,6 +709,11 @@ enum lvb_type { }; /** + * LDLM_GID_ANY is used to match any group id in ldlm_lock_match(). + */ +#define LDLM_GID_ANY ((__u64)-1) + +/** * LDLM lock structure * * Represents a single LDLM lock and its state in memory. Each lock is @@ -831,12 +736,6 @@ struct ldlm_lock { */ struct portals_handle l_handle; /** - * Lock reference count. - * This is how many users have pointers to actual structure, so that - * we do not accidentally free lock structure that is in use. - */ - cfs_atomic_t l_refc; - /** * Internal spinlock protects l_resource. We should hold this lock * first before taking res_lock. */ @@ -850,36 +749,39 @@ struct ldlm_lock { * List item for client side LRU list. * Protected by ns_lock in struct ldlm_namespace. */ - cfs_list_t l_lru; + struct list_head l_lru; /** * Linkage to resource's lock queues according to current lock state. - * (could be granted, waiting or converting) + * (could be granted or waiting) * Protected by lr_lock in struct ldlm_resource. */ - cfs_list_t l_res_link; + struct list_head l_res_link; /** - * Tree node for ldlm_extent. + * Internal structures per lock type.. */ - struct ldlm_interval *l_tree_node; + union { + struct ldlm_interval *l_tree_node; + struct ldlm_ibits_node *l_ibits_node; + }; /** * Per export hash of locks. * Protected by per-bucket exp->exp_lock_hash locks. */ - cfs_hlist_node_t l_exp_hash; + struct hlist_node l_exp_hash; /** * Per export hash of flock locks. * Protected by per-bucket exp->exp_flock_hash locks. */ - cfs_hlist_node_t l_exp_flock_hash; + struct hlist_node l_exp_flock_hash; /** * Requested mode. * Protected by lr_lock. */ - ldlm_mode_t l_req_mode; + enum ldlm_mode l_req_mode; /** * Granted mode, also protected by lr_lock. */ - ldlm_mode_t l_granted_mode; + enum ldlm_mode l_granted_mode; /** Lock completion handler pointer. Called when lock is granted. */ ldlm_completion_callback l_completion_ast; /** @@ -900,9 +802,6 @@ struct ldlm_lock { */ ldlm_glimpse_callback l_glimpse_ast; - /** XXX apparently unused "weight" handler. To be removed? */ - ldlm_weigh_callback l_weigh_ast; - /** * Lock export. * This is a pointer to actual client export for locks that were granted @@ -926,14 +825,14 @@ struct ldlm_lock { * Representation of private data specific for a lock type. * Examples are: extent range for extent lock or bitmask for ibits locks */ - ldlm_policy_data_t l_policy_data; + union ldlm_policy_data l_policy_data; /** - * Lock state flags. - * Like whenever we receive any blocking requests for this lock, etc. - * Protected by lr_lock. + * Lock state flags. Protected by lr_lock. + * \see lustre_dlm_flags.h where the bits are defined. */ __u64 l_flags; + /** * Lock r/w usage counters. * Protected by lr_lock. @@ -945,51 +844,16 @@ struct ldlm_lock { * it's no longer in use. If the lock is not granted, a process sleeps * on this waitq to learn when it becomes granted. */ - cfs_waitq_t l_waitq; + wait_queue_head_t l_waitq; /** - * Seconds. It will be updated if there is any activity related to - * the lock, e.g. enqueue the lock or send blocking AST. + * Time, in nanoseconds, last used by e.g. being matched by lock match. */ - cfs_time_t l_last_activity; - - /** - * Time last used by e.g. being matched by lock match. - * Jiffies. Should be converted to time if needed. - */ - cfs_time_t l_last_used; + ktime_t l_last_used; /** Originally requested extent for the extent lock. */ struct ldlm_extent l_req_extent; - unsigned int l_failed:1, - /** - * Set for locks that were removed from class hash table and will be - * destroyed when last reference to them is released. Set by - * ldlm_lock_destroy_internal(). - * - * Protected by lock and resource locks. - */ - l_destroyed:1, - /* - * it's set in lock_res_and_lock() and unset in unlock_res_and_lock(). - * - * NB: compared with check_res_locked(), checking this bit is cheaper. - * Also, spin_is_locked() is deprecated for kernel code; one reason is - * because it works only for SMP so user needs to add extra macros like - * LASSERT_SPIN_LOCKED for uniprocessor kernels. - */ - l_res_locked:1, - /* - * It's set once we call ldlm_add_waiting_lock_res_locked() - * to start the lock-timeout timer and it will never be reset. - * - * Protected by lock_res_and_lock(). - */ - l_waited:1, - /** Flag whether this is a server namespace lock. */ - l_ns_srv:1; - /* * Client-side-only members. */ @@ -998,6 +862,7 @@ struct ldlm_lock { /** * Temporary storage for a LVB received during an enqueue operation. + * May be vmalloc'd, so needs to be freed with OBD_FREE_LARGE(). */ __u32 l_lvb_len; void *l_lvb_data; @@ -1005,6 +870,19 @@ struct ldlm_lock { /** Private storage for lock user. Opaque to LDLM. */ void *l_ast_data; + union { + /** + * Seconds. It will be updated if there is any activity related to + * the lock at client, e.g. enqueue the lock. For server it is the + * time when blocking ast was sent. + */ + time64_t l_activity; + time64_t l_blast_sent; + }; + + /* separate ost_lvb used mostly by Data-on-MDT for now. + * It is introduced to don't mix with layout lock data. */ + struct ost_lvb l_ost_lvb; /* * Server-side-only members. */ @@ -1021,10 +899,9 @@ struct ldlm_lock { * The lists this could be linked into are: * waiting_locks_list (protected by waiting_locks_spinlock), * then if the lock timed out, it is moved to - * expired_lock_thread.elt_expired_locks for further processing. - * Protected by elt_lock. + * expired_lock_list for further processing. */ - cfs_list_t l_pending_chain; + struct list_head l_pending_chain; /** * Set when lock is sent a blocking AST. Time in seconds when timeout @@ -1033,7 +910,7 @@ struct ldlm_lock { * under this lock. * \see ost_rw_prolong_locks */ - cfs_time_t l_callback_timeout; + time64_t l_callback_timeout; /** Local PID of process which created this lock. */ __u32 l_pid; @@ -1046,11 +923,11 @@ struct ldlm_lock { */ int l_bl_ast_run; /** List item ldlm_add_ast_work_item() for case of blocking ASTs. */ - cfs_list_t l_bl_ast; + struct list_head l_bl_ast; /** List item ldlm_add_ast_work_item() for case of completion ASTs. */ - cfs_list_t l_cp_ast; + struct list_head l_cp_ast; /** For ldlm_add_ast_work_item() for "revoke" AST used in COS. */ - cfs_list_t l_rk_ast; + struct list_head l_rk_ast; /** * Pointer to a conflicting lock that caused blocking AST to be sent @@ -1062,8 +939,8 @@ struct ldlm_lock { * Protected by lr_lock, linkages to "skip lists". * For more explanations of skip lists see ldlm/ldlm_inodebits.c */ - cfs_list_t l_sl_mode; - cfs_list_t l_sl_policy; + struct list_head l_sl_mode; + struct list_head l_sl_policy; /** Reference tracking structure to debug leaked locks. */ struct lu_ref l_reference; @@ -1072,7 +949,7 @@ struct ldlm_lock { /** number of export references taken */ int l_exp_refs_nr; /** link all locks referencing one export */ - cfs_list_t l_exp_refs_link; + struct list_head l_exp_refs_link; /** referenced export object */ struct obd_export *l_exp_refs_target; #endif @@ -1082,7 +959,37 @@ struct ldlm_lock { * Lock order of waiting_lists_spinlock, exp_bl_list_lock and res lock * is: res lock -> exp_bl_list_lock -> wanting_lists_spinlock. */ - cfs_list_t l_exp_list; + struct list_head l_exp_list; +}; + +/** + * Describe the overlap between two locks. itree_overlap_cb data. + */ +struct ldlm_match_data { + struct ldlm_lock *lmd_old; + struct ldlm_lock *lmd_lock; + enum ldlm_mode *lmd_mode; + union ldlm_policy_data *lmd_policy; + __u64 lmd_flags; + __u64 lmd_skip_flags; + int lmd_unref; + bool lmd_has_ast_data; +}; + +/** For uncommitted cross-MDT lock, store transno this lock belongs to */ +#define l_transno l_client_cookie + +/** For uncommitted cross-MDT lock, which is client lock, share with l_rk_ast + * which is for server. */ +#define l_slc_link l_rk_ast + +#define HANDLE_MAP_SIZE ((LMV_MAX_STRIPE_COUNT + 7) >> 3) + +struct lustre_handle_array { + unsigned int ha_count; + /* ha_map is used as bit flag to indicate handle is remote or local */ + char ha_map[HANDLE_MAP_SIZE]; + struct lustre_handle ha_handles[0]; }; /** @@ -1103,7 +1010,10 @@ struct ldlm_resource { * List item for list in namespace hash. * protected by ns_lock */ - cfs_hlist_node_t lr_hash; + struct hlist_node lr_hash; + + /** Reference count for this resource */ + atomic_t lr_refcount; /** Spinlock to protect locks under this resource. */ spinlock_t lr_lock; @@ -1112,58 +1022,76 @@ struct ldlm_resource { * protected by lr_lock * @{ */ /** List of locks in granted state */ - cfs_list_t lr_granted; - /** List of locks waiting to change their granted mode (converted) */ - cfs_list_t lr_converting; + struct list_head lr_granted; /** * List of locks that could not be granted due to conflicts and * that are waiting for conflicts to go away */ - cfs_list_t lr_waiting; + struct list_head lr_waiting; /** @} */ - /* XXX No longer needed? Remove ASAP */ - ldlm_mode_t lr_most_restr; - - /** Type of locks this resource can hold. Only one type per resource. */ - ldlm_type_t lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */ - /** Resource name */ struct ldlm_res_id lr_name; - /** Reference count for this resource */ - cfs_atomic_t lr_refcount; - /** - * Interval trees (only for extent locks) for all modes of this resource - */ - struct ldlm_interval_tree lr_itree[LCK_MODE_NUM]; + union { + /** + * Interval trees (only for extent locks) for all modes of + * this resource + */ + struct ldlm_interval_tree *lr_itree; + struct ldlm_ibits_queues *lr_ibits_queues; + }; + + union { + /** + * When the resource was considered as contended, + * used only on server side. + */ + time64_t lr_contention_time; + /** + * Associated inode, used only on client side. + */ + struct inode *lr_lvb_inode; + }; + + /** Type of locks this resource can hold. Only one type per resource. */ + enum ldlm_type lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */ /** * Server-side-only lock value block elements. * To serialize lvbo_init. */ + int lr_lvb_len; struct mutex lr_lvb_mutex; - __u32 lr_lvb_len; /** protected by lr_lock */ void *lr_lvb_data; + /** is lvb initialized ? */ + bool lr_lvb_initialized; - /** When the resource was considered as contended. */ - cfs_time_t lr_contention_time; /** List of references to this resource. For debugging. */ struct lu_ref lr_reference; - - struct inode *lr_lvb_inode; }; +static inline int ldlm_is_granted(struct ldlm_lock *lock) +{ + return lock->l_req_mode == lock->l_granted_mode; +} + static inline bool ldlm_has_layout(struct ldlm_lock *lock) { return lock->l_resource->lr_type == LDLM_IBITS && lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_LAYOUT; } +static inline bool ldlm_has_dom(struct ldlm_lock *lock) +{ + return lock->l_resource->lr_type == LDLM_IBITS && + lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_DOM; +} + static inline char * ldlm_ns_name(struct ldlm_namespace *ns) { - return ns->ns_rs_hash->hs_name; + return ns->ns_name; } static inline struct ldlm_namespace * @@ -1193,11 +1121,32 @@ ldlm_lock_to_ns_at(struct ldlm_lock *lock) static inline int ldlm_lvbo_init(struct ldlm_resource *res) { struct ldlm_namespace *ns = ldlm_res_to_ns(res); + int rc = 0; - if (ns->ns_lvbo != NULL && ns->ns_lvbo->lvbo_init != NULL) - return ns->ns_lvbo->lvbo_init(res); + if (ns->ns_lvbo == NULL || ns->ns_lvbo->lvbo_init == NULL || + res->lr_lvb_initialized) + return 0; - return 0; + mutex_lock(&res->lr_lvb_mutex); + /* Did we lose the race? */ + if (res->lr_lvb_initialized) { + mutex_unlock(&res->lr_lvb_mutex); + return 0; + } + rc = ns->ns_lvbo->lvbo_init(res); + if (rc < 0) { + CDEBUG(D_DLMTRACE, "lvbo_init failed for resource : rc = %d\n", + rc); + if (res->lr_lvb_data != NULL) { + OBD_FREE(res->lr_lvb_data, res->lr_lvb_len); + res->lr_lvb_data = NULL; + } + res->lr_lvb_len = rc; + } else { + res->lr_lvb_initialized = true; + } + mutex_unlock(&res->lr_lvb_mutex); + return rc; } static inline int ldlm_lvbo_size(struct ldlm_lock *lock) @@ -1210,45 +1159,58 @@ static inline int ldlm_lvbo_size(struct ldlm_lock *lock) return 0; } -static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int len) +static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int *len) { struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); + int rc; if (ns->ns_lvbo != NULL) { LASSERT(ns->ns_lvbo->lvbo_fill != NULL); + /* init lvb now if not already */ + rc = ldlm_lvbo_init(lock->l_resource); + if (rc < 0) { + CERROR("lock %p: delayed lvb init failed (rc %d)", + lock, rc); + return rc; + } return ns->ns_lvbo->lvbo_fill(lock, buf, len); } return 0; } struct ldlm_ast_work { - struct ldlm_lock *w_lock; - int w_blocking; - struct ldlm_lock_desc w_desc; - cfs_list_t w_list; - int w_flags; - void *w_data; - int w_datalen; + struct ldlm_lock *w_lock; + int w_blocking; + struct ldlm_lock_desc w_desc; + struct list_head w_list; + int w_flags; + void *w_data; + int w_datalen; }; /** * Common ldlm_enqueue parameters */ struct ldlm_enqueue_info { - __u32 ei_type; /** Type of the lock being enqueued. */ - __u32 ei_mode; /** Mode of the lock being enqueued. */ - void *ei_cb_bl; /** blocking lock callback */ - void *ei_cb_cp; /** lock completion callback */ - void *ei_cb_gl; /** lock glimpse callback */ - void *ei_cb_wg; /** lock weigh callback */ - void *ei_cbdata; /** Data to be passed into callbacks. */ + enum ldlm_type ei_type; /** Type of the lock being enqueued. */ + enum ldlm_mode ei_mode; /** Mode of the lock being enqueued. */ + void *ei_cb_bl; /** blocking lock callback */ + void *ei_cb_local_bl; /** blocking local lock callback */ + void *ei_cb_cp; /** lock completion callback */ + void *ei_cb_gl; /** lock glimpse callback */ + ldlm_created_callback ei_cb_created; /** lock created callback */ + void *ei_cbdata; /** Data to be passed into callbacks. */ + void *ei_namespace; /** lock namespace **/ + u64 ei_inodebits; /** lock inode bits **/ + unsigned int ei_enq_slave:1; /** whether enqueue slave stripes */ + unsigned int ei_enq_slot:1; /** whether acquire rpc slot */ }; -extern struct obd_ops ldlm_obd_ops; +#define ei_res_id ei_cb_gl extern char *ldlm_lockname[]; extern char *ldlm_typename[]; -extern char *ldlm_it2str(int it); +extern const char *ldlm_it2str(enum ldlm_intent_flags it); /** * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG. @@ -1281,7 +1243,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, * Rate-limited version of lock printing function. */ #define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do { \ - static cfs_debug_limit_state_t _ldlm_cdls; \ + static struct cfs_debug_limit_state _ldlm_cdls; \ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, &_ldlm_cdls); \ ldlm_lock_debug(&msgdata, mask, &_ldlm_cdls, lock, "### " fmt , ##a);\ } while (0) @@ -1305,9 +1267,48 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, # define LDLM_ERROR(lock, fmt, a...) ((void)0) #endif +/* + * Three intentions can be used for the policy functions in + * ldlm_processing_policy. + * + * LDLM_PROCESS_RESCAN: + * + * It's used when policy functions are called from ldlm_reprocess_queue() to + * reprocess the wait list and try to grant locks, blocking ASTs + * have already been sent in this situation, completion ASTs need be sent for + * the locks being granted. + * + * LDLM_PROCESS_ENQUEUE: + * + * It's used when policy functions are called from ldlm_lock_enqueue() to + * process the wait list for handling an enqueue request, blocking + * ASTs have not been sent yet, so list of conflicting locks would be + * collected and ASTs sent. + * + * LDLM_PROCESS_RECOVERY: + * + * It's used when policy functions are called from ldlm_reprocess_queue() to + * reprocess the wait list when recovery done. In case of blocking + * ASTs are lost before recovery, it needs not only to grant locks if + * available, but also send blocking ASTs to the locks doesn't have AST sent + * flag. Completion ASTs need be sent for the locks being granted. + */ +enum ldlm_process_intention { + LDLM_PROCESS_RESCAN = 0, + LDLM_PROCESS_ENQUEUE = 1, + LDLM_PROCESS_RECOVERY = 2, +}; + typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, __u64 *flags, - int first_enq, ldlm_error_t *err, - cfs_list_t *work_list); + enum ldlm_process_intention intention, + enum ldlm_error *err, + struct list_head *work_list); + +typedef int (*ldlm_reprocessing_policy)(struct ldlm_resource *res, + struct list_head *queue, + struct list_head *work_list, + enum ldlm_process_intention intention, + struct ldlm_lock *hint); /** * Return values for lock iterators. @@ -1340,11 +1341,23 @@ int ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); /* ldlm_extent.c */ __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms); +struct ldlm_prolong_args { + struct obd_export *lpa_export; + struct ldlm_res_id lpa_resid; + struct ldlm_extent lpa_extent; + enum ldlm_mode lpa_mode; + time64_t lpa_timeout; + int lpa_locks_cnt; + int lpa_blocks_cnt; +}; +void ldlm_lock_prolong_one(struct ldlm_lock *lock, + struct ldlm_prolong_args *arg); +void ldlm_resource_prolong(struct ldlm_prolong_args *arg); + struct ldlm_callback_suite { ldlm_completion_callback lcs_completion; ldlm_blocking_callback lcs_blocking; ldlm_glimpse_callback lcs_glimpse; - ldlm_weigh_callback lcs_weigh; }; /* ldlm_lockd.c */ @@ -1358,7 +1371,8 @@ int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, void *data, int flag); int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data); -int ldlm_glimpse_locks(struct ldlm_resource *res, cfs_list_t *gl_work_list); +int ldlm_glimpse_locks(struct ldlm_resource *res, + struct list_head *gl_work_list); /** @} ldlm_srv_ast */ /** \defgroup ldlm_handlers Server LDLM handlers @@ -1366,23 +1380,22 @@ int ldlm_glimpse_locks(struct ldlm_resource *res, cfs_list_t *gl_work_list); * MDT or OST to pass through LDLM requests to LDLM for handling * @{ */ -int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback, - ldlm_blocking_callback, ldlm_glimpse_callback); int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req, - const struct ldlm_request *dlm_req, - const struct ldlm_callback_suite *cbs); -int ldlm_handle_convert(struct ptlrpc_request *req); + const struct ldlm_request *dlm_req, + const struct ldlm_callback_suite *cbs); int ldlm_handle_convert0(struct ptlrpc_request *req, - const struct ldlm_request *dlm_req); + const struct ldlm_request *dlm_req); int ldlm_handle_cancel(struct ptlrpc_request *req); int ldlm_request_cancel(struct ptlrpc_request *req, - const struct ldlm_request *dlm_req, int first); + const struct ldlm_request *dlm_req, + int first, enum lustre_at_flags flags); /** @} ldlm_handlers */ void ldlm_revoke_export_locks(struct obd_export *exp); +time64_t ldlm_bl_timeout(struct ldlm_lock *lock); #endif int ldlm_del_waiting_lock(struct ldlm_lock *lock); -int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, int timeout); +int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, time64_t timeout); int ldlm_get_ref(void); void ldlm_put_ref(void); int ldlm_init_export(struct obd_export *exp); @@ -1392,6 +1405,8 @@ struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req); /* ldlm_lock.c */ #ifdef HAVE_SERVER_SUPPORT ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res); +ldlm_reprocessing_policy +ldlm_get_reprocessing_policy(struct ldlm_resource *res); #endif void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg); void ldlm_lock2handle(const struct ldlm_lock *lock, @@ -1399,7 +1414,7 @@ void ldlm_lock2handle(const struct ldlm_lock *lock, struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags); void ldlm_cancel_callback(struct ldlm_lock *); int ldlm_lock_remove_from_lru(struct ldlm_lock *); -int ldlm_lock_set_data(struct lustre_handle *, void *); +int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data); /** * Obtain a lock reference by its handle. @@ -1410,7 +1425,7 @@ static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h) } #define LDLM_LOCK_REF_DEL(lock) \ - lu_ref_del(&lock->l_reference, "handle", cfs_current()) + lu_ref_del(&lock->l_reference, "handle", current) static inline struct ldlm_lock * ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags) @@ -1425,22 +1440,40 @@ ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags) /** * Update Lock Value Block Operations (LVBO) on a resource taking into account - * data from reqest \a r + * data from request \a r */ +static inline int ldlm_lvbo_update(struct ldlm_resource *res, + struct ldlm_lock *lock, + struct ptlrpc_request *req, int increase) +{ + struct ldlm_namespace *ns = ldlm_res_to_ns(res); + int rc; + + /* delayed lvb init may be required */ + rc = ldlm_lvbo_init(res); + if (rc < 0) { + CERROR("delayed lvb init failed (rc %d)\n", rc); + return rc; + } + + if (ns->ns_lvbo && ns->ns_lvbo->lvbo_update) + return ns->ns_lvbo->lvbo_update(res, lock, req, increase); + + return 0; +} + static inline int ldlm_res_lvbo_update(struct ldlm_resource *res, - struct ptlrpc_request *r, int increase) + struct ptlrpc_request *req, + int increase) { - if (ldlm_res_to_ns(res)->ns_lvbo && - ldlm_res_to_ns(res)->ns_lvbo->lvbo_update) { - return ldlm_res_to_ns(res)->ns_lvbo->lvbo_update(res, r, - increase); - } - return 0; + return ldlm_lvbo_update(res, NULL, req, increase); } -int ldlm_error2errno(ldlm_error_t error); -ldlm_error_t ldlm_errno2error(int err_no); /* don't call it `errno': this - * confuses user-space. */ +int is_granted_or_cancelled_nolock(struct ldlm_lock *lock); + +int ldlm_error2errno(enum ldlm_error error); +enum ldlm_error ldlm_errno2error(int err_no); /* don't call it `errno': this + * confuses user-space. */ #if LUSTRE_TRACKS_LOCK_EXP_REFS void ldlm_dump_export_locks(struct obd_export *exp); #endif @@ -1473,95 +1506,141 @@ do { \ lock; \ }) -#define ldlm_lock_list_put(head, member, count) \ -({ \ - struct ldlm_lock *_lock, *_next; \ - int c = count; \ - cfs_list_for_each_entry_safe(_lock, _next, head, member) { \ - if (c-- == 0) \ - break; \ - cfs_list_del_init(&_lock->member); \ - LDLM_LOCK_RELEASE(_lock); \ - } \ - LASSERT(c <= 0); \ +#define ldlm_lock_list_put(head, member, count) \ +({ \ + struct ldlm_lock *_lock, *_next; \ + int c = count; \ + list_for_each_entry_safe(_lock, _next, head, member) { \ + if (c-- == 0) \ + break; \ + list_del_init(&_lock->member); \ + LDLM_LOCK_RELEASE(_lock); \ + } \ + LASSERT(c <= 0); \ }) struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); void ldlm_lock_put(struct ldlm_lock *lock); void ldlm_lock_destroy(struct ldlm_lock *lock); void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); -void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode); -int ldlm_lock_addref_try(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode); +int ldlm_lock_addref_try(const struct lustre_handle *lockh, + enum ldlm_mode mode); +void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode); +void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, + enum ldlm_mode mode); void ldlm_lock_fail_match_locked(struct ldlm_lock *lock); void ldlm_lock_fail_match(struct ldlm_lock *lock); void ldlm_lock_allow_match(struct ldlm_lock *lock); void ldlm_lock_allow_match_locked(struct ldlm_lock *lock); -ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, - const struct ldlm_res_id *, ldlm_type_t type, - ldlm_policy_data_t *, ldlm_mode_t mode, - struct lustre_handle *, int unref); -ldlm_mode_t ldlm_revalidate_lock_handle(struct lustre_handle *lockh, - __u64 *bits); -struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, - __u32 *flags); -void ldlm_lock_downgrade(struct ldlm_lock *lock, int new_mode); +enum ldlm_mode ldlm_lock_match_with_skip(struct ldlm_namespace *ns, + __u64 flags, __u64 skip_flags, + const struct ldlm_res_id *res_id, + enum ldlm_type type, + union ldlm_policy_data *policy, + enum ldlm_mode mode, + struct lustre_handle *lh, + int unref); +static inline enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, + __u64 flags, + const struct ldlm_res_id *res_id, + enum ldlm_type type, + union ldlm_policy_data *policy, + enum ldlm_mode mode, + struct lustre_handle *lh, + int unref) +{ + return ldlm_lock_match_with_skip(ns, flags, 0, res_id, type, policy, + mode, lh, unref); +} +struct ldlm_lock *search_itree(struct ldlm_resource *res, + struct ldlm_match_data *data); +enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, + __u64 *bits); +void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode); void ldlm_lock_cancel(struct ldlm_lock *lock); -void ldlm_reprocess_all(struct ldlm_resource *res); -void ldlm_reprocess_all_ns(struct ldlm_namespace *ns); -void ldlm_lock_dump_handle(int level, struct lustre_handle *); +void ldlm_reprocess_all(struct ldlm_resource *res, struct ldlm_lock *hint); +void ldlm_reprocess_recovery_done(struct ldlm_namespace *ns); +void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh); void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); /* resource.c */ -struct ldlm_namespace * -ldlm_namespace_new(struct obd_device *obd, char *name, - ldlm_side_t client, ldlm_appetite_t apt, - ldlm_ns_type_t ns_type); +struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name, + enum ldlm_side client, + enum ldlm_appetite apt, + enum ldlm_ns_type ns_type); int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags); +void ldlm_namespace_free_prior(struct ldlm_namespace *ns, + struct obd_import *imp, + int force); +void ldlm_namespace_free_post(struct ldlm_namespace *ns); void ldlm_namespace_free(struct ldlm_namespace *ns, - struct obd_import *imp, int force); -void ldlm_namespace_register(struct ldlm_namespace *ns, ldlm_side_t client); -void ldlm_namespace_unregister(struct ldlm_namespace *ns, ldlm_side_t client); -void ldlm_namespace_move_locked(struct ldlm_namespace *ns, ldlm_side_t client); -struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t client); + struct obd_import *imp, int force); +void ldlm_namespace_register(struct ldlm_namespace *ns, enum ldlm_side client); +void ldlm_namespace_unregister(struct ldlm_namespace *ns, + enum ldlm_side client); void ldlm_namespace_get(struct ldlm_namespace *ns); void ldlm_namespace_put(struct ldlm_namespace *ns); -int ldlm_proc_setup(void); -#ifdef LPROCFS -void ldlm_proc_cleanup(void); -#else -static inline void ldlm_proc_cleanup(void) {} -#endif + +int ldlm_debugfs_setup(void); +void ldlm_debugfs_cleanup(void); + +static inline void ldlm_svc_get_eopc(const struct ldlm_request *dlm_req, + struct lprocfs_stats *srv_stats) +{ + int lock_type = 0, op = 0; + + lock_type = dlm_req->lock_desc.l_resource.lr_type; + + switch (lock_type) { + case LDLM_PLAIN: + op = PTLRPC_LAST_CNTR + LDLM_PLAIN_ENQUEUE; + break; + case LDLM_EXTENT: + op = PTLRPC_LAST_CNTR + LDLM_EXTENT_ENQUEUE; + break; + case LDLM_FLOCK: + op = PTLRPC_LAST_CNTR + LDLM_FLOCK_ENQUEUE; + break; + case LDLM_IBITS: + op = PTLRPC_LAST_CNTR + LDLM_IBITS_ENQUEUE; + break; + default: + op = 0; + break; + } + + if (op != 0) + lprocfs_counter_incr(srv_stats, op); +} /* resource.c - internal */ struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, - struct ldlm_resource *parent, - const struct ldlm_res_id *, - ldlm_type_t type, int create); + struct ldlm_resource *parent, + const struct ldlm_res_id *, + enum ldlm_type type, int create); struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res); int ldlm_resource_putref(struct ldlm_resource *res); void ldlm_resource_add_lock(struct ldlm_resource *res, - cfs_list_t *head, - struct ldlm_lock *lock); + struct list_head *head, + struct ldlm_lock *lock); void ldlm_resource_unlink_lock(struct ldlm_lock *lock); void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(ldlm_side_t client, int level); +void ldlm_dump_all_namespaces(enum ldlm_side client, int level); void ldlm_namespace_dump(int level, struct ldlm_namespace *); void ldlm_resource_dump(int level, struct ldlm_resource *); int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, const struct ldlm_res_id *); #define LDLM_RESOURCE_ADDREF(res) do { \ - lu_ref_add_atomic(&(res)->lr_reference, __FUNCTION__, cfs_current()); \ + lu_ref_add_atomic(&(res)->lr_reference, __FUNCTION__, current); \ } while (0) #define LDLM_RESOURCE_DELREF(res) do { \ - lu_ref_del(&(res)->lr_reference, __FUNCTION__, cfs_current()); \ + lu_ref_del(&(res)->lr_reference, __FUNCTION__, current); \ } while (0) /* ldlm_request.c */ -int ldlm_expired_completion_wait(void *data); /** \defgroup ldlm_local_ast Default AST handlers for local locks * These AST handlers are typically used for server-side local locks and are * also used by client-side lock handlers to perform minimum level base @@ -1580,57 +1659,70 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); * to obtain and release locks. * @{ */ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, - struct ldlm_enqueue_info *einfo, - const struct ldlm_res_id *res_id, - ldlm_policy_data_t const *policy, __u64 *flags, + struct ldlm_enqueue_info *einfo, + const struct ldlm_res_id *res_id, + union ldlm_policy_data const *policy, __u64 *flags, void *lvb, __u32 lvb_len, enum lvb_type lvb_type, struct lustre_handle *lockh, int async); int ldlm_prep_enqueue_req(struct obd_export *exp, - struct ptlrpc_request *req, - cfs_list_t *cancels, - int count); -int ldlm_prep_elc_req(struct obd_export *exp, - struct ptlrpc_request *req, - int version, int opc, int canceloff, - cfs_list_t *cancels, int count); + struct ptlrpc_request *req, + struct list_head *cancels, + int count); +int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req, + int version, int opc, int canceloff, + struct list_head *cancels, int count); + +struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp, int lvb_len); +int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req, + const struct ldlm_request *dlm_req, + const struct ldlm_callback_suite *cbs); int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, - ldlm_type_t type, __u8 with_policy, ldlm_mode_t mode, - __u64 *flags, void *lvb, __u32 lvb_len, - struct lustre_handle *lockh, int rc); -int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - ldlm_type_t type, ldlm_policy_data_t *policy, - ldlm_mode_t mode, __u64 *flags, - ldlm_blocking_callback blocking, - ldlm_completion_callback completion, - ldlm_glimpse_callback glimpse, + enum ldlm_type type, __u8 with_policy, + enum ldlm_mode mode, __u64 *flags, void *lvb, + __u32 lvb_len, + const struct lustre_handle *lockh, int rc); +int ldlm_cli_enqueue_local(const struct lu_env *env, + struct ldlm_namespace *ns, + const struct ldlm_res_id *res_id, + enum ldlm_type type, union ldlm_policy_data *policy, + enum ldlm_mode mode, __u64 *flags, + ldlm_blocking_callback blocking, + ldlm_completion_callback completion, + ldlm_glimpse_callback glimpse, void *data, __u32 lvb_len, enum lvb_type lvb_type, - const __u64 *client_cookie, - struct lustre_handle *lockh); -int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new, - void *data, __u32 data_len); -int ldlm_cli_convert(struct lustre_handle *, int new_mode, __u32 *flags); + const __u64 *client_cookie, + struct lustre_handle *lockh); +int ldlm_cli_convert_req(struct ldlm_lock *lock, __u32 *flags, __u64 new_bits); +int ldlm_cli_convert(struct ldlm_lock *lock, + enum ldlm_cancel_flags cancel_flags); int ldlm_cli_update_pool(struct ptlrpc_request *req); -int ldlm_cli_cancel(struct lustre_handle *lockh); +int ldlm_cli_cancel(const struct lustre_handle *lockh, + enum ldlm_cancel_flags cancel_flags); int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *, - ldlm_cancel_flags_t flags, void *opaque); + enum ldlm_cancel_flags flags, void *opaque); int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - ldlm_policy_data_t *policy, - ldlm_mode_t mode, - ldlm_cancel_flags_t flags, - void *opaque); -int ldlm_cli_cancel_req(struct obd_export *exp, cfs_list_t *head, - int count, ldlm_cancel_flags_t flags); + const struct ldlm_res_id *res_id, + union ldlm_policy_data *policy, + enum ldlm_mode mode, + enum ldlm_cancel_flags flags, void *opaque); +int ldlm_cli_cancel_req(struct obd_export *exp, struct list_head *head, + int count, enum ldlm_cancel_flags flags); int ldlm_cancel_resource_local(struct ldlm_resource *res, - cfs_list_t *cancels, - ldlm_policy_data_t *policy, - ldlm_mode_t mode, int lock_flags, - ldlm_cancel_flags_t cancel_flags, void *opaque); -int ldlm_cli_cancel_list_local(cfs_list_t *cancels, int count, - ldlm_cancel_flags_t flags); -int ldlm_cli_cancel_list(cfs_list_t *head, int count, - struct ptlrpc_request *req, ldlm_cancel_flags_t flags); + struct list_head *cancels, + union ldlm_policy_data *policy, + enum ldlm_mode mode, __u64 lock_flags, + enum ldlm_cancel_flags cancel_flags, + void *opaque); +int ldlm_cli_cancel_list_local(struct list_head *cancels, int count, + enum ldlm_cancel_flags flags); +int ldlm_cli_cancel_list(struct list_head *head, int count, + struct ptlrpc_request *req, + enum ldlm_cancel_flags flags); + +int ldlm_inodebits_drop(struct ldlm_lock *lock, __u64 to_drop); +int ldlm_cli_inodebits_convert(struct ldlm_lock *lock, + enum ldlm_cancel_flags cancel_flags); + /** @} ldlm_cli_api */ /* mds/handler.c */ @@ -1638,17 +1730,6 @@ int ldlm_cli_cancel_list(cfs_list_t *head, int count, int intent_disposition(struct ldlm_reply *rep, int flag); void intent_set_disposition(struct ldlm_reply *rep, int flag); - -/* ioctls for trying requests */ -#define IOC_LDLM_TYPE 'f' -#define IOC_LDLM_MIN_NR 40 - -#define IOC_LDLM_TEST _IOWR('f', 40, long) -#define IOC_LDLM_DUMP _IOWR('f', 41, long) -#define IOC_LDLM_REGRESS_START _IOWR('f', 42, long) -#define IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long) -#define IOC_LDLM_MAX_NR 43 - /** * "Modes" of acquiring lock_res, necessary to tell lockdep that taking more * than one lock_res is dead-lock safe. @@ -1680,7 +1761,7 @@ static inline void unlock_res(struct ldlm_resource *res) /** Check if resource is already locked, assert if not. */ static inline void check_res_locked(struct ldlm_resource *res) { - LASSERT_SPIN_LOCKED(&res->lr_lock); + assert_spin_locked(&res->lr_lock); } struct ldlm_resource * lock_res_and_lock(struct ldlm_lock *lock); @@ -1691,17 +1772,15 @@ void unlock_res_and_lock(struct ldlm_lock *lock); * There are not used outside of ldlm. * @{ */ -void ldlm_pools_recalc(ldlm_side_t client); int ldlm_pools_init(void); void ldlm_pools_fini(void); int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, - int idx, ldlm_side_t client); -int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, - unsigned int gfp_mask); + int idx, enum ldlm_side client); +int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask); void ldlm_pool_fini(struct ldlm_pool *pl); int ldlm_pool_setup(struct ldlm_pool *pl, int limit); -int ldlm_pool_recalc(struct ldlm_pool *pl); +time64_t ldlm_pool_recalc(struct ldlm_pool *pl); __u32 ldlm_pool_get_lvf(struct ldlm_pool *pl); __u64 ldlm_pool_get_slv(struct ldlm_pool *pl); __u64 ldlm_pool_get_clv(struct ldlm_pool *pl); @@ -1713,5 +1792,20 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock); void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock); /** @} */ +static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1, + const struct ldlm_extent *ex2) +{ + return ex1->start <= ex2->end && ex2->start <= ex1->end; +} + +/* check if @ex1 contains @ex2 */ +static inline int ldlm_extent_contain(const struct ldlm_extent *ex1, + const struct ldlm_extent *ex2) +{ + return ex1->start <= ex2->start && ex1->end >= ex2->end; +} + +int ldlm_inodebits_drop(struct ldlm_lock *lock, __u64 to_drop); + #endif /** @} LDLM */