* Lustre is a trademark of Sun Microsystems, Inc.
*/
-#ifndef _LUSTRE_DLM_H__
-#define _LUSTRE_DLM_H__
-
-/** \defgroup ldlm ldlm
+/** \defgroup LDLM Lustre Distributed Lock Manager
+ *
+ * Lustre DLM is based on VAX DLM.
+ * Its two main roles are:
+ * - To provide locking assuring consistency of data on all Lustre nodes.
+ * - To allow clients to cache state protected by a lock by holding the
+ * lock until a conflicting lock is requested or it is expired by the LRU.
*
* @{
*/
+#ifndef _LUSTRE_DLM_H__
+#define _LUSTRE_DLM_H__
+
#if defined(__linux__)
#include <linux/lustre_dlm.h>
#elif defined(__APPLE__)
#define LDLM_CTIME_AGE_LIMIT (10)
#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
+/**
+ * LDLM non-error return states
+ */
typedef enum {
ELDLM_OK = 0,
ELDLM_BAD_NAMESPACE = 401
} ldlm_error_t;
+/**
+ * LDLM namespace type.
+ * The "client" type is actually an indication that this is a narrow local view
+ * into complete namespace on the server. Such namespaces cannot make any
+ * decisions about lack of conflicts or do any autonomous lock granting without
+ * first speaking to a server.
+ */
typedef enum {
LDLM_NAMESPACE_SERVER = 1 << 0,
LDLM_NAMESPACE_CLIENT = 1 << 1
**/
#define LDLM_FL_LOCK_CHANGED 0x000001 /* extent, mode, or resource changed */
-/* If the server returns one of these flags, then the lock was put on that list.
+/**
+ * If the server returns one of these flags, then the lock was put on that list.
* If the client sends one of these flags (during recovery ONLY!), it wants the
- * lock added to the specified list, no questions asked. -p */
+ * lock added to the specified list, no questions asked.
+ */
#define LDLM_FL_BLOCK_GRANTED 0x000002
#define LDLM_FL_BLOCK_CONV 0x000004
#define LDLM_FL_BLOCK_WAIT 0x000008
/* Used to be LDLM_FL_WAIT_NOREPROC 0x000040 moved to non-wire flags */
/* Used to be LDLM_FL_CANCEL 0x000080 moved to non-wire flags */
-/* Lock is being replayed. This could probably be implied by the fact that one
- * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. */
+/**
+ * Lock is being replayed. This could probably be implied by the fact that one
+ * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous.
+ */
#define LDLM_FL_REPLAY 0x000100
-#define LDLM_FL_INTENT_ONLY 0x000200 /* don't grant lock, just do intent */
+#define LDLM_FL_INTENT_ONLY 0x000200 /* Don't grant lock, just do intent. */
/* Used to be LDLM_FL_LOCAL_ONLY 0x000400 moved to non-wire flags */
/* Used to be LDLM_FL_FAILED 0x000800 moved to non-wire flags */
#define LDLM_FL_NO_TIMEOUT 0x020000 /* Blocked by group lock - wait
* indefinitely */
-/* file & record locking */
-#define LDLM_FL_BLOCK_NOWAIT 0x040000 /* server told not to wait if blocked.
+/** file & record locking */
+#define LDLM_FL_BLOCK_NOWAIT 0x040000 /* Server told not to wait if blocked.
* For AGL, OST will not send glimpse
* callback. */
#define LDLM_FL_TEST_LOCK 0x080000 // return blocking lock
/* Immediatelly cancel such locks when they block some other locks. Send
* cancel notification to original lock holder, but expect no reply. This is
* for clients (like liblustre) that cannot be expected to reliably response
- * to blocking ast. */
+ * to blocking AST. */
#define LDLM_FL_CANCEL_ON_BLOCK 0x800000
/* Flags flags inherited from parent lock when doing intents. */
/**
* Declaration of flags not sent through the wire.
**/
-/* Used for marking lock as an target for -EINTR while cp_ast sleep
- * emulation + race with upcoming bl_ast. */
+
+/**
+ * Used for marking lock as a target for -EINTR while cp_ast sleep
+ * emulation + race with upcoming bl_ast.
+ */
#define LDLM_FL_FAIL_LOC 0x100000000ULL
-/* Used while processing the unused list to know that we have already
- * handled this lock and decided to skip it */
+
+/**
+ * Used while processing the unused list to know that we have already
+ * handled this lock and decided to skip it.
+ */
#define LDLM_FL_SKIPPED 0x200000000ULL
/* this lock is being destroyed */
#define LDLM_FL_CBPENDING 0x400000000ULL
#define LDLM_FL_LOCAL 0x10000000000ULL
/* XXX FIXME: This is being added to b_size as a low-risk fix to the fact that
* the LVB filling happens _after_ the lock has been granted, so another thread
- * can match`t before the LVB has been updated. As a dirty hack, we set
+ * can match it before the LVB has been updated. As a dirty hack, we set
* LDLM_FL_LVB_READY only after we've done the LVB poop.
- * this is only needed on lov/osc now, where lvb is actually used and callers
+ * this is only needed on LOV/OSC now, where LVB is actually used and callers
* must set it in input flags.
*
* The proper fix is to do the granting inside of the completion AST, which can
* be replaced with a LVB-aware wrapping function for OSC locks. That change is
* pretty high-risk, though, and would need a lot more testing. */
#define LDLM_FL_LVB_READY 0x20000000000ULL
-/* A lock contributes to the kms calculation until it has finished the part
- * of it's cancelation that performs write back on its dirty pages. It
- * can remain on the granted list during this whole time. Threads racing
- * to update the kms after performing their writeback need to know to
- * exclude each others locks from the calculation as they walk the granted
+/* A lock contributes to the known minimum size (KMS) calculation until it has
+ * finished the part of its cancelation that performs write back on its dirty
+ * pages. It can remain on the granted list during this whole time. Threads
+ * racing to update the KMS after performing their writeback need to know to
+ * exclude each other's locks from the calculation as they walk the granted
* list. */
#define LDLM_FL_KMS_IGNORE 0x40000000000ULL
-/* completion ast to be executed */
+/* completion AST to be executed */
#define LDLM_FL_CP_REQD 0x80000000000ULL
/* cleanup_resource has already handled the lock */
#define LDLM_FL_CLEANED 0x100000000000ULL
/* optimization hint: LDLM can run blocking callback from current context
* w/o involving separate thread. in order to decrease cs rate */
#define LDLM_FL_ATOMIC_CB 0x200000000000ULL
-/* It may happen that a client initiate 2 operations, e.g. unlink and mkdir,
- * such that server send blocking ast for conflict locks to this client for
- * the 1st operation, whereas the 2nd operation has canceled this lock and
- * is waiting for rpc_lock which is taken by the 1st operation.
- * LDLM_FL_BL_AST is to be set by ldlm_callback_handler() to the lock not allow
- * ELC code to cancel it.
- * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock cache is
- * droped to let ldlm_callback_handler() return EINVAL to the server. It is
- * used when ELC rpc is already prepared and is waiting for rpc_lock, too late
- * to send a separate CANCEL rpc. */
+
+/* It may happen that a client initiates two operations, e.g. unlink and
+ * mkdir, such that the server sends a blocking AST for conflicting
+ * locks to this client for the first operation, whereas the second
+ * operation has canceled this lock and is waiting for rpc_lock which is
+ * taken by the first operation. LDLM_FL_BL_AST is set by
+ * ldlm_callback_handler() in the lock to prevent the Early Lock Cancel
+ * (ELC) code from cancelling it.
+ *
+ * LDLM_FL_BL_DONE is to be set by ldlm_cancel_callback() when lock
+ * cache is dropped to let ldlm_callback_handler() return EINVAL to the
+ * server. It is used when ELC RPC is already prepared and is waiting
+ * for rpc_lock, too late to send a separate CANCEL RPC. */
#define LDLM_FL_BL_AST 0x400000000000ULL
#define LDLM_FL_BL_DONE 0x800000000000ULL
/* Don't put lock into the LRU list, so that it is not canceled due to aging.
* Used by MGC locks, they are cancelled only at unmount or by callback. */
#define LDLM_FL_NO_LRU 0x1000000000000ULL
-
-/* The blocking callback is overloaded to perform two functions. These flags
- * indicate which operation should be performed. */
+/**
+ * The blocking callback is overloaded to perform two functions. These flags
+ * indicate which operation should be performed.
+ */
#define LDLM_CB_BLOCKING 1
#define LDLM_CB_CANCELING 2
-/* compatibility matrix */
+/**
+ * \name Lock Compatibility Matrix.
+ *
+ * A lock has both a type (extent, flock, inode bits, or plain) and a mode.
+ * Lock types are described in their respective implementation files:
+ * ldlm_{extent,flock,inodebits,plain}.c.
+ *
+ * There are six lock modes along with a compatibility matrix to indicate if
+ * two locks are compatible.
+ *
+ * - EX: Exclusive mode. Before a new file is created, MDS requests EX lock
+ * on the parent.
+ * - PW: Protective Write (normal write) mode. When a client requests a write
+ * lock from an OST, a lock with PW mode will be issued.
+ * - PR: Protective Read (normal read) mode. When a client requests a read from
+ * an OST, a lock with PR mode will be issued. Also, if the client opens a
+ * file for execution, it is granted a lock with PR mode.
+ * - CW: Concurrent Write mode. The type of lock that the MDS grants if a client
+ * requests a write lock during a file open operation.
+ * - CR Concurrent Read mode. When a client performs a path lookup, MDS grants
+ * an inodebit lock with the CR mode on the intermediate path component.
+ * - NL Null mode.
+ *
+ * <PRE>
+ * NL CR CW PR PW EX
+ * NL 1 1 1 1 1 1
+ * CR 1 1 1 1 1 0
+ * CW 1 1 1 0 0 0
+ * PR 1 1 0 1 0 0
+ * PW 1 1 0 0 0 0
+ * EX 1 0 0 0 0 0
+ * </PRE>
+ */
+/** @{ */
#define LCK_COMPAT_EX LCK_NL
#define LCK_COMPAT_PW (LCK_COMPAT_EX | LCK_CR)
#define LCK_COMPAT_PR (LCK_COMPAT_PW | LCK_PR)
#define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX | LCK_GROUP)
#define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL)
#define LCK_COMPAT_COS (LCK_COS)
+/** @} Lock Compatibility Matrix */
extern ldlm_mode_t lck_compat_array[];
-
*/
-/*
- * Locking rules:
+/**
+ * Locking rules for LDLM:
*
* lr_lock
*
struct ldlm_resource;
struct ldlm_namespace;
+/**
+ * Operations on LDLM pools.
+ * LDLM pool is a pool of locks in the namespace without any implicitly
+ * specified limits.
+ * Locks in the pool are organized in LRU.
+ * Local memory pressure or server instructions (e.g. mempressure on server)
+ * can trigger freeing of locks from the pool
+ */
struct ldlm_pool_ops {
- int (*po_recalc)(struct ldlm_pool *pl);
- int (*po_shrink)(struct ldlm_pool *pl, int nr,
- unsigned int gfp_mask);
- int (*po_setup)(struct ldlm_pool *pl, int limit);
+ /** Recalculate pool \a pl usage */
+ int (*po_recalc)(struct ldlm_pool *pl);
+ /** Cancel at least \a nr locks from pool \a pl */
+ int (*po_shrink)(struct ldlm_pool *pl, int nr,
+ unsigned int gfp_mask);
+ int (*po_setup)(struct ldlm_pool *pl, int limit);
};
-/**
- * One second for pools thread check interval. Each pool has own period.
- */
+/** One second for pools thread check interval. Each pool has own period. */
#define LDLM_POOLS_THREAD_PERIOD (1)
-/**
- * ~6% margin for modest pools. See ldlm_pool.c for details.
- */
+/** ~6% margin for modest pools. See ldlm_pool.c for details. */
#define LDLM_POOLS_MODEST_MARGIN_SHIFT (4)
-/**
- * Default recalc period for server side pools in sec.
- */
+/** Default recalc period for server side pools in sec. */
#define LDLM_POOL_SRV_DEF_RECALC_PERIOD (1)
-/**
- * Default recalc period for client side pools in sec.
- */
+/** Default recalc period for client side pools in sec. */
#define LDLM_POOL_CLI_DEF_RECALC_PERIOD (10)
+/**
+ * LDLM pool structure to track granted locks.
+ * For purposes of determining when to release locks on e.g. memory pressure.
+ * This feature is commonly referred to as lru_resize.
+ */
struct ldlm_pool {
- /**
- * Pool proc directory.
- */
- cfs_proc_dir_entry_t *pl_proc_dir;
- /**
- * Pool name, should be long enough to contain compound proc entry name.
- */
- char pl_name[100];
- /**
- * Lock for protecting slv/clv updates.
- */
+ /** Pool proc directory. */
+ cfs_proc_dir_entry_t *pl_proc_dir;
+ /** Pool name, must be long enough to hold compound proc entry name. */
+ char pl_name[100];
+ /** Lock for protecting SLV/CLV updates. */
spinlock_t pl_lock;
- /**
- * Number of allowed locks in in pool, both, client and server side.
- */
- cfs_atomic_t pl_limit;
- /**
- * Number of granted locks in
- */
- cfs_atomic_t pl_granted;
- /**
- * Grant rate per T.
- */
- cfs_atomic_t pl_grant_rate;
- /**
- * Cancel rate per T.
- */
- cfs_atomic_t pl_cancel_rate;
- /**
- * Server lock volume. Protected by pl_lock.
- */
- __u64 pl_server_lock_volume;
- /**
- * Current biggest client lock volume. Protected by pl_lock.
- */
- __u64 pl_client_lock_volume;
- /**
- * Lock volume factor. SLV on client is calculated as following:
- * server_slv * lock_volume_factor.
- */
- cfs_atomic_t pl_lock_volume_factor;
- /**
- * Time when last slv from server was obtained.
- */
- time_t pl_recalc_time;
- /**
- * Recalc period for pool.
- */
- time_t pl_recalc_period;
- /**
- * Recalc and shrink ops.
- */
- struct ldlm_pool_ops *pl_ops;
- /**
- * Number of planned locks for next period.
- */
- int pl_grant_plan;
- /**
- * Pool statistics.
- */
- struct lprocfs_stats *pl_stats;
+ /** Number of allowed locks in in pool, both, client and server side. */
+ cfs_atomic_t pl_limit;
+ /** Number of granted locks in */
+ cfs_atomic_t pl_granted;
+ /** Grant rate per T. */
+ cfs_atomic_t pl_grant_rate;
+ /** Cancel rate per T. */
+ cfs_atomic_t pl_cancel_rate;
+ /** Server lock volume (SLV). Protected by pl_lock. */
+ __u64 pl_server_lock_volume;
+ /** Current biggest client lock volume. Protected by pl_lock. */
+ __u64 pl_client_lock_volume;
+ /** Lock volume factor. SLV on client is calculated as following:
+ * server_slv * lock_volume_factor. */
+ cfs_atomic_t pl_lock_volume_factor;
+ /** Time when last SLV from server was obtained. */
+ time_t pl_recalc_time;
+ /** Recalculation period for pool. */
+ time_t pl_recalc_period;
+ /** Recalculation and shrink operations. */
+ struct ldlm_pool_ops *pl_ops;
+ /** Number of planned locks for next period. */
+ int pl_grant_plan;
+ /** Pool statistics. */
+ struct lprocfs_stats *pl_stats;
};
typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
typedef int (*ldlm_cancel_for_recovery)(struct ldlm_lock *lock);
+/**
+ * LVB operations.
+ * LVB is Lock Value Block. This is a special opaque (to LDLM) value that could
+ * be associated with an LDLM lock and transferred from client to server and
+ * back.
+ *
+ * Currently LVBs are used by:
+ * - OSC-OST code to maintain current object size/times
+ * - layout lock code to return the layout when the layout lock is granted
+ */
struct ldlm_valblock_ops {
int (*lvbo_init)(struct ldlm_resource *res);
int (*lvbo_update)(struct ldlm_resource *res,
int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int buflen);
};
+/**
+ * LDLM pools related, type of lock pool in the namespace.
+ * Greedy means release cached locks aggressively
+ */
typedef enum {
- LDLM_NAMESPACE_GREEDY = 1 << 0,
- LDLM_NAMESPACE_MODEST = 1 << 1
+ LDLM_NAMESPACE_GREEDY = 1 << 0,
+ LDLM_NAMESPACE_MODEST = 1 << 1
} ldlm_appetite_t;
-/*
+/**
* Default values for the "max_nolock_size", "contention_time" and
* "contended_locks" namespace tunables.
*/
#define NS_DEFAULT_CONTENDED_LOCKS 32
struct ldlm_ns_bucket {
- /** refer back */
- struct ldlm_namespace *nsb_namespace;
- /** estimated lock callback time */
- struct adaptive_timeout nsb_at_estimate;
+ /** back pointer to namespace */
+ struct ldlm_namespace *nsb_namespace;
+ /**
+ * Estimated lock callback time. Used by adaptive timeout code to
+ * avoid spurious client evictions due to unresponsiveness when in
+ * fact the network or overall system load is at fault
+ */
+ struct adaptive_timeout nsb_at_estimate;
};
enum {
- /** ldlm namespace lock stats */
+ /** LDLM namespace lock stats */
LDLM_NSS_LOCKS = 0,
LDLM_NSS_LAST
};
LDLM_NS_TYPE_MGT,
} ldlm_ns_type_t;
+/**
+ * LDLM Namespace.
+ *
+ * Namespace serves to contain locks related to a particular service.
+ * There are two kinds of namespaces:
+ * - Server namespace has knowledge of all locks and is therefore authoritative
+ * to make decisions like what locks could be granted and what conflicts
+ * exist during new lock enqueue.
+ * - Client namespace only has limited knowledge about locks in the namespace,
+ * only seeing locks held by the client.
+ *
+ * Every Lustre service has one server namespace present on the server serving
+ * that service. Every client connected to the service has a client namespace
+ * for it.
+ * Every lock obtained by client in that namespace is actually represented by
+ * two in-memory locks. One on the server and one on the client. The locks are
+ * linked by a special cookie by which one node can tell to the other which lock
+ * it actually means during communications. Such locks are called remote locks.
+ * The locks held by server only without any reference to a client are called
+ * local locks.
+ */
struct ldlm_namespace {
- /**
- * Backward link to obd, required for ldlm pool to store new SLV.
- */
- struct obd_device *ns_obd;
-
- /**
- * Is this a client-side lock tree?
- */
- ldlm_side_t ns_client;
-
- /**
- * resource hash
- */
- cfs_hash_t *ns_rs_hash;
-
- /**
- * serialize
- */
+ /** Backward link to OBD, required for LDLM pool to store new SLV. */
+ struct obd_device *ns_obd;
+
+ /** Flag indicating if namespace is on client instead of server */
+ ldlm_side_t ns_client;
+
+ /** Resource hash table for namespace. */
+ cfs_hash_t *ns_rs_hash;
+
+ /** serialize */
spinlock_t ns_lock;
- /**
- * big refcount (by bucket)
- */
- cfs_atomic_t ns_bref;
-
- /**
- * Namespce connect flags supported by server (may be changed via proc,
- * lru resize may be disabled/enabled).
- */
- __u64 ns_connect_flags;
-
- /**
- * Client side orig connect flags supported by server.
- */
- __u64 ns_orig_connect_flags;
-
- /**
- * Position in global namespace list.
- */
- cfs_list_t ns_list_chain;
-
- /**
- * All root resources in namespace.
- */
- cfs_list_t ns_unused_list;
- int ns_nr_unused;
-
- unsigned int ns_max_unused;
- unsigned int ns_max_age;
- unsigned int ns_timeouts;
- /**
- * Seconds.
- */
- unsigned int ns_ctime_age_limit;
-
- /**
- * Next debug dump, jiffies.
- */
- cfs_time_t ns_next_dump;
-
- ldlm_res_policy ns_policy;
- struct ldlm_valblock_ops *ns_lvbo;
- void *ns_lvbp;
- cfs_waitq_t ns_waitq;
- struct ldlm_pool ns_pool;
- ldlm_appetite_t ns_appetite;
-
- /**
- * If more than \a ns_contended_locks found, the resource is considered
- * to be contended.
- */
- unsigned ns_contended_locks;
-
- /**
- * The resource remembers contended state during \a ns_contention_time,
- * in seconds.
- */
- unsigned ns_contention_time;
-
- /**
- * Limit size of nolock requests, in bytes.
- */
- unsigned ns_max_nolock_size;
-
- /**
- * Limit of parallel AST RPC count.
- */
- unsigned ns_max_parallel_ast;
-
- /* callback to cancel locks before replaying it during recovery */
- ldlm_cancel_for_recovery ns_cancel_for_recovery;
- /**
- * ldlm lock stats
- */
- struct lprocfs_stats *ns_stats;
-
- unsigned ns_stopping:1; /* namespace cleanup */
+ /** big refcount (by bucket) */
+ cfs_atomic_t ns_bref;
+
+ /**
+ * Namespace connect flags supported by server (may be changed via
+ * /proc, LRU resize may be disabled/enabled).
+ */
+ __u64 ns_connect_flags;
+
+ /** Client side original connect flags supported by server. */
+ __u64 ns_orig_connect_flags;
+
+ /**
+ * Position in global namespace list linking all namespaces on
+ * the node.
+ */
+ cfs_list_t ns_list_chain;
+
+ /**
+ * List of unused locks for this namespace. This list is also called
+ * LRU lock list.
+ * Unused locks are locks with zero reader/writer reference counts.
+ * This list is only used on clients for lock caching purposes.
+ * When we want to release some locks voluntarily or if server wants
+ * us to release some locks due to e.g. memory pressure, we take locks
+ * to release from the head of this list.
+ * Locks are linked via l_lru field in \see struct ldlm_lock.
+ */
+ cfs_list_t ns_unused_list;
+ /** Number of locks in the LRU list above */
+ int ns_nr_unused;
+
+ /**
+ * Maximum number of locks permitted in the LRU. If 0, means locks
+ * are managed by pools and there is no preset limit, rather it is all
+ * controlled by available memory on this client and on server.
+ */
+ unsigned int ns_max_unused;
+ /** Maximum allowed age (last used time) for locks in the LRU */
+ unsigned int ns_max_age;
+ /**
+ * Server only: number of times we evicted clients due to lack of reply
+ * to ASTs.
+ */
+ unsigned int ns_timeouts;
+ /**
+ * Number of seconds since the file change time after which the
+ * MDT will return an UPDATE lock along with a LOOKUP lock.
+ * This allows the client to start caching negative dentries
+ * for a directory and may save an RPC for a later stat.
+ */
+ unsigned int ns_ctime_age_limit;
+
+ /**
+ * Used to rate-limit ldlm_namespace_dump calls.
+ * \see ldlm_namespace_dump. Increased by 10 seconds every time
+ * it is called.
+ */
+ cfs_time_t ns_next_dump;
+
+ /** "policy" function that does actual lock conflict determination */
+ ldlm_res_policy ns_policy;
+
+ /**
+ * LVB operations for this namespace.
+ * \see struct ldlm_valblock_ops
+ */
+ struct ldlm_valblock_ops *ns_lvbo;
+
+ /**
+ * Used by filter code to store pointer to OBD of the service.
+ * Should be dropped in favor of \a ns_obd
+ */
+ void *ns_lvbp;
+
+ /**
+ * Wait queue used by __ldlm_namespace_free. Gets woken up every time
+ * a resource is removed.
+ */
+ cfs_waitq_t ns_waitq;
+ /** LDLM pool structure for this namespace */
+ struct ldlm_pool ns_pool;
+ /** Definition of how eagerly unused locks will be released from LRU */
+ ldlm_appetite_t ns_appetite;
+
+ /**
+ * If more than \a ns_contended_locks are found, the resource is
+ * considered to be contended. Lock enqueues might specify that no
+ * contended locks should be granted
+ */
+ unsigned ns_contended_locks;
+
+ /**
+ * The resources in this namespace remember contended state during
+ * \a ns_contention_time, in seconds.
+ */
+ unsigned ns_contention_time;
+
+ /**
+ * Limit size of contended extent locks, in bytes.
+ * If extended lock is requested for more then this many bytes and
+ * caller instructs us not to grant contended locks, we would disregard
+ * such a request.
+ */
+ unsigned ns_max_nolock_size;
+
+ /** Limit of parallel AST RPC count. */
+ unsigned ns_max_parallel_ast;
+
+ /** Callback to cancel locks before replaying it during recovery. */
+ ldlm_cancel_for_recovery ns_cancel_for_recovery;
+
+ /** LDLM lock stats */
+ struct lprocfs_stats *ns_stats;
+
+ /**
+ * Flag to indicate namespace is being freed. Used to determine if
+ * recalculation of LDLM pool statistics should be skipped.
+ */
+ unsigned ns_stopping:1;
};
+/**
+ * Returns 1 if namespace \a ns is a client namespace.
+ */
static inline int ns_is_client(struct ldlm_namespace *ns)
{
LASSERT(ns != NULL);
return ns->ns_client == LDLM_NAMESPACE_CLIENT;
}
+/**
+ * Returns 1 if namespace \a ns is a server namespace.
+ */
static inline int ns_is_server(struct ldlm_namespace *ns)
{
LASSERT(ns != NULL);
return ns->ns_client == LDLM_NAMESPACE_SERVER;
}
+/**
+ * Returns 1 if namespace \a ns supports early lock cancel (ELC).
+ */
static inline int ns_connect_cancelset(struct ldlm_namespace *ns)
{
LASSERT(ns != NULL);
return !!(ns->ns_connect_flags & OBD_CONNECT_CANCELSET);
}
+/**
+ * Returns 1 if this namespace supports lru_resize.
+ */
static inline int ns_connect_lru_resize(struct ldlm_namespace *ns)
{
LASSERT(ns != NULL);
struct ldlm_lock;
+/** Type for blocking callback function of a lock. */
typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock,
- struct ldlm_lock_desc *new, void *data,
- int flag);
+ struct ldlm_lock_desc *new, void *data,
+ int flag);
+/** Type for completion callback function of a lock. */
typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, __u64 flags,
void *data);
+/** Type for glimpse callback function of a lock. */
typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data);
+/** Type for weight callback function of a lock. */
typedef unsigned long (*ldlm_weigh_callback)(struct ldlm_lock *lock);
+/** Work list for sending GL ASTs to multiple locks. */
struct ldlm_glimpse_work {
struct ldlm_lock *gl_lock; /* lock to glimpse */
cfs_list_t gl_list; /* linkage to other gl work structs */
* glimpse callback request */
};
-/* the ldlm_glimpse_work is allocated on the stack and should not be freed */
+/** The ldlm_glimpse_work is allocated on the stack and should not be freed. */
#define LDLM_GL_WORK_NOFREE 0x1
-/* Interval node data for each LDLM_EXTENT lock */
+/** Interval node data for each LDLM_EXTENT lock. */
struct ldlm_interval {
- struct interval_node li_node; /* node for tree mgmt */
- cfs_list_t li_group; /* the locks which have the same
- * policy - group of the policy */
+ struct interval_node li_node; /* node for tree management */
+ cfs_list_t li_group; /* the locks which have the same
+ * policy - group of the policy */
};
#define to_ldlm_interval(n) container_of(n, struct ldlm_interval, li_node)
-/* the interval tree must be accessed inside the resource lock. */
+/**
+ * Interval tree for extent locks.
+ * The interval tree must be accessed under the resource lock.
+ * Interval trees are used for granted extent locks to speed up conflicts
+ * lookup. See ldlm/interval_tree.c for more details.
+ */
struct ldlm_interval_tree {
- /* tree size, this variable is used to count
- * granted PW locks in ldlm_extent_policy()*/
- int lit_size;
- ldlm_mode_t lit_mode; /* lock mode */
- struct interval_node *lit_root; /* actually ldlm_interval */
+ /** Tree size. */
+ int lit_size;
+ ldlm_mode_t lit_mode; /* lock mode */
+ struct interval_node *lit_root; /* actual ldlm_interval */
};
+/** Whether to track references to exports by LDLM locks. */
#define LUSTRE_TRACKS_LOCK_EXP_REFS (0)
-/* Cancel flag. */
+/** Cancel flags. */
typedef enum {
LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */
LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */
LVB_T_LAYOUT = 3,
};
+/**
+ * LDLM lock structure
+ *
+ * Represents a single LDLM lock and its state in memory. Each lock is
+ * associated with a single ldlm_resource, the object which is being
+ * locked. There may be multiple ldlm_locks on a single resource,
+ * depending on the lock type and whether the locks are conflicting or
+ * not.
+ */
struct ldlm_lock {
- /**
- * Must be first in the structure.
- */
- struct portals_handle l_handle;
- /**
- * Lock reference count.
- */
- cfs_atomic_t l_refc;
- /**
- * Internal spinlock protects l_resource. we should hold this lock
- * first before grabbing res_lock.
- */
+ /**
+ * Local lock handle.
+ * When remote side wants to tell us about a lock, they address
+ * it by this opaque handle. The handle does not hold a
+ * reference on the ldlm_lock, so it can be safely passed to
+ * other threads or nodes. When the lock needs to be accessed
+ * from the handle, it is looked up again in the lock table, and
+ * may no longer exist.
+ *
+ * Must be first in the structure.
+ */
+ struct portals_handle l_handle;
+ /**
+ * Lock reference count.
+ * This is how many users have pointers to actual structure, so that
+ * we do not accidentally free lock structure that is in use.
+ */
+ cfs_atomic_t l_refc;
+ /**
+ * Internal spinlock protects l_resource. We should hold this lock
+ * first before taking res_lock.
+ */
spinlock_t l_lock;
- /**
- * ldlm_lock_change_resource() can change this.
- */
- struct ldlm_resource *l_resource;
- /**
- * Protected by ns_hash_lock. List item for client side lru list.
- */
- cfs_list_t l_lru;
- /**
- * Protected by lr_lock, linkage to resource's lock queues.
- */
- cfs_list_t l_res_link;
- /**
- * Tree node for ldlm_extent.
- */
- struct ldlm_interval *l_tree_node;
- /**
- * Protected by per-bucket exp->exp_lock_hash locks. Per export hash
- * of locks.
- */
- cfs_hlist_node_t l_exp_hash;
- /**
- * Protected by lr_lock. Requested mode.
- */
/**
- * Protected by per-bucket exp->exp_flock_hash locks. Per export hash
- * of locks.
+ * Pointer to actual resource this lock is in.
+ * ldlm_lock_change_resource() can change this.
+ */
+ struct ldlm_resource *l_resource;
+ /**
+ * List item for client side LRU list.
+ * Protected by ns_lock in struct ldlm_namespace.
+ */
+ cfs_list_t l_lru;
+ /**
+ * Linkage to resource's lock queues according to current lock state.
+ * (could be granted, waiting or converting)
+ * Protected by lr_lock in struct ldlm_resource.
+ */
+ cfs_list_t l_res_link;
+ /**
+ * Tree node for ldlm_extent.
+ */
+ struct ldlm_interval *l_tree_node;
+ /**
+ * Per export hash of locks.
+ * Protected by per-bucket exp->exp_lock_hash locks.
+ */
+ cfs_hlist_node_t l_exp_hash;
+ /**
+ * Per export hash of flock locks.
+ * Protected by per-bucket exp->exp_flock_hash locks.
+ */
+ cfs_hlist_node_t l_exp_flock_hash;
+ /**
+ * Requested mode.
+ * Protected by lr_lock.
+ */
+ ldlm_mode_t l_req_mode;
+ /**
+ * Granted mode, also protected by lr_lock.
+ */
+ ldlm_mode_t l_granted_mode;
+ /** Lock completion handler pointer. Called when lock is granted. */
+ ldlm_completion_callback l_completion_ast;
+ /**
+ * Lock blocking AST handler pointer.
+ * It plays two roles:
+ * - as a notification of an attempt to queue a conflicting lock (once)
+ * - as a notification when the lock is being cancelled.
+ *
+ * As such it's typically called twice: once for the initial conflict
+ * and then once more when the last user went away and the lock is
+ * cancelled (could happen recursively).
+ */
+ ldlm_blocking_callback l_blocking_ast;
+ /**
+ * Lock glimpse handler.
+ * Glimpse handler is used to obtain LVB updates from a client by
+ * server
+ */
+ ldlm_glimpse_callback l_glimpse_ast;
+
+ /** XXX apparently unused "weight" handler. To be removed? */
+ ldlm_weigh_callback l_weigh_ast;
+
+ /**
+ * Lock export.
+ * This is a pointer to actual client export for locks that were granted
+ * to clients. Used server-side.
+ */
+ struct obd_export *l_export;
+ /**
+ * Lock connection export.
+ * Pointer to server export on a client.
+ */
+ struct obd_export *l_conn_export;
+
+ /**
+ * Remote lock handle.
+ * If the lock is remote, this is the handle of the other side lock
+ * (l_handle)
+ */
+ struct lustre_handle l_remote_handle;
+
+ /**
+ * Representation of private data specific for a lock type.
+ * Examples are: extent range for extent lock or bitmask for ibits locks
*/
- cfs_hlist_node_t l_exp_flock_hash;
-
- ldlm_mode_t l_req_mode;
- /**
- * Granted mode, also protected by lr_lock.
- */
- ldlm_mode_t l_granted_mode;
- /**
- * Lock enqueue completion handler.
- */
- ldlm_completion_callback l_completion_ast;
- /**
- * Lock blocking ast handler.
- */
- ldlm_blocking_callback l_blocking_ast;
- /**
- * Lock glimpse handler.
- */
- ldlm_glimpse_callback l_glimpse_ast;
- ldlm_weigh_callback l_weigh_ast;
-
- /**
- * Lock export.
- */
- struct obd_export *l_export;
- /**
- * Lock connection export.
- */
- struct obd_export *l_conn_export;
-
- /**
- * Remote lock handle.
- */
- struct lustre_handle l_remote_handle;
-
- ldlm_policy_data_t l_policy_data;
-
- /*
- * Protected by lr_lock. Various counters: readers, writers, etc.
- */
- __u64 l_flags;
- __u32 l_readers;
- __u32 l_writers;
- /**
- * If the lock is granted, a process sleeps on this waitq to learn when
- * it's no longer in use. If the lock is not granted, a process sleeps
- * on this waitq to learn when it becomes granted.
- */
- cfs_waitq_t l_waitq;
-
- /**
- * Seconds. it will be updated if there is any activity related to
- * the lock, e.g. enqueue the lock or send block AST.
- */
- cfs_time_t l_last_activity;
-
- /**
- * Jiffies. Should be converted to time if needed.
- */
- cfs_time_t l_last_used;
-
- struct ldlm_extent l_req_extent;
-
- unsigned int l_failed:1,
- /*
- * Set for locks that were removed from class hash table and will be
- * destroyed when last reference to them is released. Set by
- * ldlm_lock_destroy_internal().
- *
- * Protected by lock and resource locks.
- */
- l_destroyed:1,
+ ldlm_policy_data_t l_policy_data;
+
+ /**
+ * Lock state flags.
+ * Like whenever we receive any blocking requests for this lock, etc.
+ * Protected by lr_lock.
+ */
+ __u64 l_flags;
+ /**
+ * Lock r/w usage counters.
+ * Protected by lr_lock.
+ */
+ __u32 l_readers;
+ __u32 l_writers;
+ /**
+ * If the lock is granted, a process sleeps on this waitq to learn when
+ * it's no longer in use. If the lock is not granted, a process sleeps
+ * on this waitq to learn when it becomes granted.
+ */
+ cfs_waitq_t l_waitq;
+
+ /**
+ * Seconds. It will be updated if there is any activity related to
+ * the lock, e.g. enqueue the lock or send blocking AST.
+ */
+ cfs_time_t l_last_activity;
+
+ /**
+ * Time last used by e.g. being matched by lock match.
+ * Jiffies. Should be converted to time if needed.
+ */
+ cfs_time_t l_last_used;
+
+ /** Originally requested extent for the extent lock. */
+ struct ldlm_extent l_req_extent;
+
+ unsigned int l_failed:1,
+ /**
+ * Set for locks that were removed from class hash table and will be
+ * destroyed when last reference to them is released. Set by
+ * ldlm_lock_destroy_internal().
+ *
+ * Protected by lock and resource locks.
+ */
+ l_destroyed:1,
/*
* it's set in lock_res_and_lock() and unset in unlock_res_and_lock().
*
- * NB: compare with check_res_locked(), check this bit is cheaper,
- * also, spin_is_locked() is deprecated for kernel code, one reason is
- * because it works only for SMP so user needs add extra macros like
+ * NB: compared with check_res_locked(), checking this bit is cheaper.
+ * Also, spin_is_locked() is deprecated for kernel code; one reason is
+ * because it works only for SMP so user needs to add extra macros like
* LASSERT_SPIN_LOCKED for uniprocessor kernels.
*/
- l_res_locked:1,
+ l_res_locked:1,
/*
- * it's set once we call ldlm_add_waiting_lock_res_locked()
+ * It's set once we call ldlm_add_waiting_lock_res_locked()
* to start the lock-timeout timer and it will never be reset.
*
* Protected by lock_res_and_lock().
*/
- l_waited:1,
- /**
- * flag whether this is a server namespace lock.
- */
- l_ns_srv:1;
+ l_waited:1,
+ /** Flag whether this is a server namespace lock. */
+ l_ns_srv:1;
- /*
- * Client-side-only members.
- */
+ /*
+ * Client-side-only members.
+ */
enum lvb_type l_lvb_type;
- /**
- * Temporary storage for an LVB received during an enqueue operation.
- */
- __u32 l_lvb_len;
- void *l_lvb_data;
-
- void *l_ast_data;
-
- /*
- * Server-side-only members.
- */
-
- /** connection cookie for the client originated the operation. */
- __u64 l_client_cookie;
-
- /**
- * Protected by elt_lock. Callbacks pending.
- */
- cfs_list_t l_pending_chain;
-
- cfs_time_t l_callback_timeout;
-
- /**
- * Pid which created this lock.
- */
- __u32 l_pid;
-
- int l_bl_ast_run;
- /**
- * For ldlm_add_ast_work_item().
- */
- cfs_list_t l_bl_ast;
- /**
- * For ldlm_add_ast_work_item().
- */
- cfs_list_t l_cp_ast;
- /**
- * For ldlm_add_ast_work_item().
- */
- cfs_list_t l_rk_ast;
-
- struct ldlm_lock *l_blocking_lock;
-
- /**
- * Protected by lr_lock, linkages to "skip lists".
- */
- cfs_list_t l_sl_mode;
- cfs_list_t l_sl_policy;
- struct lu_ref l_reference;
+
+ /**
+ * Temporary storage for a LVB received during an enqueue operation.
+ */
+ __u32 l_lvb_len;
+ void *l_lvb_data;
+
+ /** Private storage for lock user. Opaque to LDLM. */
+ void *l_ast_data;
+
+ /*
+ * Server-side-only members.
+ */
+
+ /**
+ * Connection cookie for the client originating the operation.
+ * Used by Commit on Share (COS) code. Currently only used for
+ * inodebits locks on MDS.
+ */
+ __u64 l_client_cookie;
+
+ /**
+ * List item for locks waiting for cancellation from clients.
+ * The lists this could be linked into are:
+ * waiting_locks_list (protected by waiting_locks_spinlock),
+ * then if the lock timed out, it is moved to
+ * expired_lock_thread.elt_expired_locks for further processing.
+ * Protected by elt_lock.
+ */
+ cfs_list_t l_pending_chain;
+
+ /**
+ * Set when lock is sent a blocking AST. Time in seconds when timeout
+ * is reached and client holding this lock could be evicted.
+ * This timeout could be further extended by e.g. certain IO activity
+ * under this lock.
+ * \see ost_rw_prolong_locks
+ */
+ cfs_time_t l_callback_timeout;
+
+ /** Local PID of process which created this lock. */
+ __u32 l_pid;
+
+ /**
+ * Number of times blocking AST was sent for this lock.
+ * This is for debugging. Valid values are 0 and 1, if there is an
+ * attempt to send blocking AST more than once, an assertion would be
+ * hit. \see ldlm_work_bl_ast_lock
+ */
+ int l_bl_ast_run;
+ /** List item ldlm_add_ast_work_item() for case of blocking ASTs. */
+ cfs_list_t l_bl_ast;
+ /** List item ldlm_add_ast_work_item() for case of completion ASTs. */
+ cfs_list_t l_cp_ast;
+ /** For ldlm_add_ast_work_item() for "revoke" AST used in COS. */
+ cfs_list_t l_rk_ast;
+
+ /**
+ * Pointer to a conflicting lock that caused blocking AST to be sent
+ * for this lock
+ */
+ struct ldlm_lock *l_blocking_lock;
+
+ /**
+ * Protected by lr_lock, linkages to "skip lists".
+ * For more explanations of skip lists see ldlm/ldlm_inodebits.c
+ */
+ cfs_list_t l_sl_mode;
+ cfs_list_t l_sl_policy;
+
+ /** Reference tracking structure to debug leaked locks. */
+ struct lu_ref l_reference;
#if LUSTRE_TRACKS_LOCK_EXP_REFS
- /* Debugging stuff for bug 20498, for tracking export
- references. */
- /** number of export references taken */
- int l_exp_refs_nr;
- /** link all locks referencing one export */
- cfs_list_t l_exp_refs_link;
- /** referenced export object */
- struct obd_export *l_exp_refs_target;
+ /* Debugging stuff for bug 20498, for tracking export references. */
+ /** number of export references taken */
+ int l_exp_refs_nr;
+ /** link all locks referencing one export */
+ cfs_list_t l_exp_refs_link;
+ /** referenced export object */
+ struct obd_export *l_exp_refs_target;
#endif
- /** export blocking dlm lock list, protected by
- * l_export->exp_bl_list_lock.
- * Lock order of waiting_lists_spinlock, exp_bl_list_lock and res lock
- * is: res lock -> exp_bl_list_lock -> wanting_lists_spinlock. */
- cfs_list_t l_exp_list;
+ /**
+ * export blocking dlm lock list, protected by
+ * l_export->exp_bl_list_lock.
+ * Lock order of waiting_lists_spinlock, exp_bl_list_lock and res lock
+ * is: res lock -> exp_bl_list_lock -> wanting_lists_spinlock.
+ */
+ cfs_list_t l_exp_list;
};
+/**
+ * LDLM resource description.
+ * Basically, resource is a representation for a single object.
+ * Object has a name which is currently 4 64-bit integers. LDLM user is
+ * responsible for creation of a mapping between objects it wants to be
+ * protected and resource names.
+ *
+ * A resource can only hold locks of a single lock type, though there may be
+ * multiple ldlm_locks on a single resource, depending on the lock type and
+ * whether the locks are conflicting or not.
+ */
struct ldlm_resource {
struct ldlm_ns_bucket *lr_ns_bucket;
- /* protected by ns_hash_lock */
+ /**
+ * List item for list in namespace hash.
+ * protected by ns_lock
+ */
cfs_hlist_node_t lr_hash;
+
+ /** Spinlock to protect locks under this resource. */
spinlock_t lr_lock;
- /* protected by lr_lock */
- cfs_list_t lr_granted;
- cfs_list_t lr_converting;
- cfs_list_t lr_waiting;
- ldlm_mode_t lr_most_restr;
- ldlm_type_t lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK} */
- struct ldlm_res_id lr_name;
- cfs_atomic_t lr_refcount;
+ /**
+ * protected by lr_lock
+ * @{ */
+ /** List of locks in granted state */
+ cfs_list_t lr_granted;
+ /** List of locks waiting to change their granted mode (converted) */
+ cfs_list_t lr_converting;
+ /**
+ * List of locks that could not be granted due to conflicts and
+ * that are waiting for conflicts to go away */
+ cfs_list_t lr_waiting;
+ /** @} */
+
+ /* XXX No longer needed? Remove ASAP */
+ ldlm_mode_t lr_most_restr;
+
+ /** Type of locks this resource can hold. Only one type per resource. */
+ ldlm_type_t lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */
- struct ldlm_interval_tree lr_itree[LCK_MODE_NUM]; /* interval trees*/
+ /** Resource name */
+ struct ldlm_res_id lr_name;
+ /** Reference count for this resource */
+ cfs_atomic_t lr_refcount;
- /* Server-side-only lock value block elements */
- /** to serialize lvbo_init */
+ /**
+ * Interval trees (only for extent locks) for all modes of this resource
+ */
+ struct ldlm_interval_tree lr_itree[LCK_MODE_NUM];
+
+ /**
+ * Server-side-only lock value block elements.
+ * To serialize lvbo_init.
+ */
struct mutex lr_lvb_mutex;
- __u32 lr_lvb_len;
- /** protect by lr_lock */
- void *lr_lvb_data;
-
- /* when the resource was considered as contended */
- cfs_time_t lr_contention_time;
- /**
- * List of references to this resource. For debugging.
- */
- struct lu_ref lr_reference;
-
- struct inode *lr_lvb_inode;
+ __u32 lr_lvb_len;
+ /** protected by lr_lock */
+ void *lr_lvb_data;
+
+ /** When the resource was considered as contended. */
+ cfs_time_t lr_contention_time;
+ /** List of references to this resource. For debugging. */
+ struct lu_ref lr_reference;
+
+ struct inode *lr_lvb_inode;
};
static inline char *
int w_datalen;
};
-/* ldlm_enqueue parameters common */
+/**
+ * Common ldlm_enqueue parameters
+ */
struct ldlm_enqueue_info {
- __u32 ei_type; /* Type of the lock being enqueued. */
- __u32 ei_mode; /* Mode of the lock being enqueued. */
- void *ei_cb_bl; /* blocking lock callback */
- void *ei_cb_cp; /* lock completion callback */
- void *ei_cb_gl; /* lock glimpse callback */
- void *ei_cb_wg; /* lock weigh callback */
- void *ei_cbdata; /* Data to be passed into callbacks. */
+ __u32 ei_type; /** Type of the lock being enqueued. */
+ __u32 ei_mode; /** Mode of the lock being enqueued. */
+ void *ei_cb_bl; /** blocking lock callback */
+ void *ei_cb_cp; /** lock completion callback */
+ void *ei_cb_gl; /** lock glimpse callback */
+ void *ei_cb_wg; /** lock weigh callback */
+ void *ei_cbdata; /** Data to be passed into callbacks. */
};
extern struct obd_ops ldlm_obd_ops;
extern char *ldlm_typename[];
extern char *ldlm_it2str(int it);
-#define LDLM_DEBUG_NOLOCK(format, a...) \
- CDEBUG(D_DLMTRACE, "### " format "\n" , ##a)
+/**
+ * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG.
+ * For the cases where we do not have actual lock to print along
+ * with a debugging message that is ldlm-related
+ */
+#define LDLM_DEBUG_NOLOCK(format, a...) \
+ CDEBUG(D_DLMTRACE, "### " format "\n" , ##a)
+/**
+ * Support function for lock information printing into debug logs.
+ * \see LDLM_DEBUG
+ */
#ifdef LIBCFS_DEBUG
#define ldlm_lock_debug(msgdata, mask, cdls, lock, fmt, a...) do { \
CFS_CHECK_STACK(msgdata, mask, cdls); \
const char *fmt, ...)
__attribute__ ((format (printf, 3, 4)));
+/**
+ * Rate-limited version of lock printing function.
+ */
#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do { \
static cfs_debug_limit_state_t _ldlm_cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, &_ldlm_cdls); \
#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a)
#define LDLM_WARN(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a)
+/** Non-rate-limited lock printing function for debugging purposes. */
#define LDLM_DEBUG(lock, fmt, a...) do { \
if (likely(lock != NULL)) { \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_DLMTRACE, NULL); \
int first_enq, ldlm_error_t *err,
cfs_list_t *work_list);
-/*
- * Iterators.
+/**
+ * Return values for lock iterators.
+ * Also used during deciding of lock grants and cancellations.
*/
-
#define LDLM_ITER_CONTINUE 1 /* keep iterating */
#define LDLM_ITER_STOP 2 /* stop iterating */
typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *);
typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *);
+/** \defgroup ldlm_iterator Lock iterators
+ *
+ * LDLM provides for a way to iterate through every lock on a resource or
+ * namespace or every resource in a namespace.
+ * @{ */
int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
- void *closure);
+ void *closure);
void ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
- void *closure);
+ void *closure);
+int ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *,
+ ldlm_iterator_t iter, void *data);
+/** @} ldlm_iterator */
int ldlm_replay_locks(struct obd_import *imp);
-int ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *,
- ldlm_iterator_t iter, void *data);
/* ldlm_flock.c */
int ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
/* ldlm_lockd.c */
#ifdef HAVE_SERVER_SUPPORT
+/** \defgroup ldlm_srv_ast Server AST handlers
+ * These are AST handlers used by server code.
+ * Their property is that they are just preparing RPCs to be sent to clients.
+ * @{
+ */
int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
- void *data, int flag);
+ void *data, int flag);
int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data);
int ldlm_glimpse_locks(struct ldlm_resource *res, cfs_list_t *gl_work_list);
+/** @} ldlm_srv_ast */
+
+/** \defgroup ldlm_handlers Server LDLM handlers
+ * These are handler functions that should be called by "frontends" such as
+ * MDT or OST to pass through LDLM requests to LDLM for handling
+ * @{
+ */
int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback,
ldlm_blocking_callback, ldlm_glimpse_callback);
int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req,
int ldlm_handle_cancel(struct ptlrpc_request *req);
int ldlm_request_cancel(struct ptlrpc_request *req,
const struct ldlm_request *dlm_req, int first);
+/** @} ldlm_handlers */
+
void ldlm_revoke_export_locks(struct obd_export *exp);
#endif
int ldlm_del_waiting_lock(struct ldlm_lock *lock);
int ldlm_lock_remove_from_lru(struct ldlm_lock *);
int ldlm_lock_set_data(struct lustre_handle *, void *);
+/**
+ * Obtain a lock reference by its handle.
+ */
static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h)
{
return __ldlm_handle2lock(h, 0);
return lock;
}
+/**
+ * Update Lock Value Block Operations (LVBO) on a resource taking into account
+ * data from reqest \a r
+ */
static inline int ldlm_res_lvbo_update(struct ldlm_resource *res,
struct ptlrpc_request *r, int increase)
{
/* ldlm_request.c */
int ldlm_expired_completion_wait(void *data);
+/** \defgroup ldlm_local_ast Default AST handlers for local locks
+ * These AST handlers are typically used for server-side local locks and are
+ * also used by client-side lock handlers to perform minimum level base
+ * processing.
+ * @{ */
int ldlm_blocking_ast_nocheck(struct ldlm_lock *lock);
int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag);
+ void *data, int flag);
int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp);
int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data);
int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data);
+/** @} ldlm_local_ast */
+
+/** \defgroup ldlm_cli_api API to operate on locks from actual LDLM users.
+ * These are typically used by client and server (*_local versions)
+ * to obtain and release locks.
+ * @{ */
int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
struct ldlm_enqueue_info *einfo,
const struct ldlm_res_id *res_id,
ldlm_cancel_flags_t flags);
int ldlm_cli_cancel_list(cfs_list_t *head, int count,
struct ptlrpc_request *req, ldlm_cancel_flags_t flags);
+/** @} ldlm_cli_api */
/* mds/handler.c */
/* This has to be here because recursive inclusion sucks. */
LRT_NEW
};
+/** Lock resource. */
static inline void lock_res(struct ldlm_resource *res)
{
spin_lock(&res->lr_lock);
}
+/** Lock resource with a way to instruct lockdep code about nestedness-safe. */
static inline void lock_res_nested(struct ldlm_resource *res,
- enum lock_res_type mode)
+ enum lock_res_type mode)
{
spin_lock_nested(&res->lr_lock, mode);
}
+/** Unlock resource. */
static inline void unlock_res(struct ldlm_resource *res)
{
spin_unlock(&res->lr_lock);
}
+/** Check if resource is already locked, assert if not. */
static inline void check_res_locked(struct ldlm_resource *res)
{
LASSERT_SPIN_LOCKED(&res->lr_lock);
void unlock_res_and_lock(struct ldlm_lock *lock);
/* ldlm_pool.c */
+/** \defgroup ldlm_pools Various LDLM pool related functions
+ * There are not used outside of ldlm.
+ * @{
+ */
void ldlm_pools_recalc(ldlm_side_t client);
int ldlm_pools_init(void);
void ldlm_pools_fini(void);
void ldlm_pool_set_limit(struct ldlm_pool *pl, __u32 limit);
void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock);
void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock);
-
-/** @} ldlm */
+/** @} */
#endif
+/** @} LDLM */
#include <lustre_dlm.h>
#include <lustre_lib.h>
-/*
- * ldlm locking uses resource to serialize access to locks
+/**
+ * Lock a lock and its resource.
+ *
+ * LDLM locking uses resource to serialize access to locks
* but there is a case when we change resource of lock upon
- * enqueue reply. we rely on that lock->l_resource = new_res
- * is atomic
+ * enqueue reply. We rely on lock->l_resource = new_res
+ * being an atomic operation.
*/
-struct ldlm_resource * lock_res_and_lock(struct ldlm_lock *lock)
+struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock)
{
/* on server-side resource of lock doesn't change */
if (!lock->l_ns_srv)
}
EXPORT_SYMBOL(lock_res_and_lock);
+/**
+ * Unlock a lock and its resource previously locked with lock_res_and_lock
+ */
void unlock_res_and_lock(struct ldlm_lock *lock)
{
/* on server-side resource of lock doesn't change */
* Author: Phil Schwan <phil@clusterfs.com>
*/
+/**
+ * This file contains implementation of EXTENT lock type
+ *
+ * EXTENT lock type is for locking a contiguous range of values, represented
+ * by 64-bit starting and ending offsets (inclusive). There are several extent
+ * lock modes, some of which may be mutually incompatible. Extent locks are
+ * considered incompatible if their modes are incompatible and their extents
+ * intersect. See the lock mode compatibility matrix in lustre_dlm.h.
+ */
+
#define DEBUG_SUBSYSTEM S_LDLM
#ifndef __KERNEL__
# include <liblustre.h>
#ifdef HAVE_SERVER_SUPPORT
# define LDLM_MAX_GROWN_EXTENT (32 * 1024 * 1024 - 1)
-/* fixup the ldlm_extent after expanding */
+/**
+ * Fix up the ldlm_extent after expanding it.
+ *
+ * After expansion has been done, we might still want to do certain adjusting
+ * based on overall contention of the resource and the like to avoid granting
+ * overly wide locks.
+ */
static void ldlm_extent_internal_policy_fixup(struct ldlm_lock *req,
struct ldlm_extent *new_ex,
int conflicting)
mask, new_ex->end, req_end);
}
-/* The purpose of this function is to return:
- * - the maximum extent
- * - containing the requested extent
- * - and not overlapping existing conflicting extents outside the requested one
+/**
+ * Return the maximum extent that:
+ * - contains the requested extent
+ * - does not overlap existing conflicting extents outside the requested one
+ *
+ * This allows clients to request a small required extent range, but if there
+ * is no contention on the lock the full lock can be granted to the client.
+ * This avoids the need for many smaller lock requests to be granted in the
+ * common (uncontended) case.
*
* Use interval tree to expand the lock extent for granted lock.
*/
lockmode_verify(req_mode);
- /* using interval tree to handle the ldlm extent granted locks */
+ /* Using interval tree to handle the LDLM extent granted locks. */
for (idx = 0; idx < LCK_MODE_NUM; idx++) {
struct interval_node_extent ext = { req_start, req_end };
RETURN(INTERVAL_ITER_CONT);
}
-/* Determine if the lock is compatible with all locks on the queue.
- * We stop walking the queue if we hit ourselves so we don't take
- * conflicting locks enqueued after us into accound, or we'd wait forever.
+/**
+ * Determine if the lock is compatible with all locks on the queue.
+ *
+ * If \a work_list is provided, conflicting locks are linked there.
+ * If \a work_list is not provided, we exit this function on first conflict.
*
- * 0 if the lock is not compatible
- * 1 if the lock is compatible
- * 2 if this group lock is compatible and requires no further checking
- * negative error, such as EWOULDBLOCK for group locks
+ * \retval 0 if the lock is not compatible
+ * \retval 1 if the lock is compatible
+ * \retval 2 if \a req is a group lock and it is compatible and requires
+ * no further checking
+ * \retval negative error, such as EWOULDBLOCK for group locks
*/
static int
ldlm_extent_compat_queue(cfs_list_t *queue, struct ldlm_lock *req,
lock = cfs_list_entry(tmp, struct ldlm_lock,
l_res_link);
+ /* We stop walking the queue if we hit ourselves so
+ * we don't take conflicting locks enqueued after us
+ * into account, or we'd wait forever. */
if (req == lock)
break;
RETURN(compat);
}
+/**
+ * Discard all AST work items from list.
+ *
+ * If for whatever reason we do not want to send ASTs to conflicting locks
+ * anymore, disassemble the list with this function.
+ */
static void discard_bl_list(cfs_list_t *bl_list)
{
cfs_list_t *tmp, *pos;
EXIT;
}
-/* If first_enq is 0 (ie, called from ldlm_reprocess_queue):
- * - blocking ASTs have already been sent
- * - must call this function with the ns lock held
- *
- * If first_enq is 1 (ie, called from ldlm_lock_enqueue):
- * - blocking ASTs have not been sent
- * - must call this function with the ns lock held once */
+/**
+ * Process a granting attempt for extent lock.
+ * Must be called with ns lock held.
+ *
+ * This function looks for any conflicts for \a lock in the granted or
+ * waiting queues. The lock is granted if no conflicts are found in
+ * either queue.
+ *
+ * If \a first_enq is 0 (ie, called from ldlm_reprocess_queue):
+ * - blocking ASTs have already been sent
+ *
+ * If \a first_enq is 1 (ie, called from ldlm_lock_enqueue):
+ * - blocking ASTs have not been sent yet, so list of conflicting locks
+ * would be collected and ASTs sent.
+ */
int ldlm_process_extent_lock(struct ldlm_lock *lock, __u64 *flags,
int first_enq, ldlm_error_t *err,
cfs_list_t *work_list)
lock_res(res);
if (rc == -ERESTART) {
-
/* 15715: The lock was granted and destroyed after
* resource lock was dropped. Interval node was freed
* in ldlm_lock_destroy. Anyway, this always happens
return index;
}
+/** Add newly granted lock into interval tree for the resource. */
void ldlm_extent_add_lock(struct ldlm_resource *res,
struct ldlm_lock *lock)
{
ldlm_resource_add_lock(res, &res->lr_granted, lock);
}
+/** Remove cancelled lock from resource interval tree. */
void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
{
struct ldlm_resource *res = lock->l_resource;
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/**
+ * This file implements POSIX lock type for Lustre.
+ * Its policy properties are start and end of extent and PID.
+ *
+ * These locks are only done through MDS due to POSIX semantics requiring
+ * e.g. that locks could be only partially released and as such split into
+ * two parts, and also that two adjacent locks from the same process may be
+ * merged into a single wider lock.
+ *
+ * Lock modes are mapped like this:
+ * PR and PW for READ and WRITE locks
+ * NL to request a releasing of a portion of the lock
+ *
+ * These flock locks never timeout.
+ */
+
#define DEBUG_SUBSYSTEM S_LDLM
#ifdef __KERNEL__
EXIT;
}
+/**
+ * POSIX locks deadlock detection code.
+ *
+ * Given a new lock \a req and an existing lock \a bl_lock it conflicts
+ * with, we need to iterate through all blocked POSIX locks for this
+ * export and see if there is a deadlock condition arising. (i.e. when
+ * one client holds a lock on something and want a lock on something
+ * else and at the same time another client has the opposite situation).
+ */
static int
ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
{
return 0;
}
+/**
+ * Process a granting attempt for flock lock.
+ * Must be called under ns lock held.
+ *
+ * This function looks for any conflicts for \a lock in the granted or
+ * waiting queues. The lock is granted if no conflicts are found in
+ * either queue.
+ *
+ * It is also responsible for splitting a lock if a portion of the lock
+ * is released.
+ *
+ * If \a first_enq is 0 (ie, called from ldlm_reprocess_queue):
+ * - blocking ASTs have already been sent
+ *
+ * If \a first_enq is 1 (ie, called from ldlm_lock_enqueue):
+ * - blocking ASTs have not been sent yet, so list of conflicting locks
+ * would be collected and ASTs sent.
+ */
int
ldlm_process_flock_lock(struct ldlm_lock *req, __u64 *flags, int first_enq,
ldlm_error_t *err, cfs_list_t *work_list)
#endif /* HAVE_SERVER_SUPPORT */
}
- /* In case we're reprocessing the requested lock we can't destroy
- * it until after calling ldlm_ast_work_item() above so that lawi()
- * can bump the reference count on req. Otherwise req could be freed
- * before the completion AST can be sent. */
+ /* In case we're reprocessing the requested lock we can't destroy
+ * it until after calling ldlm_add_ast_work_item() above so that laawi()
+ * can bump the reference count on \a req. Otherwise \a req
+ * could be freed before the completion AST can be sent. */
if (added)
ldlm_flock_destroy(req, mode, *flags);
lock_res_and_lock(lock);
ldlm_flock_blocking_unlink(lock);
- /* client side - set flag to prevent lock from being put on lru list */
+ /* client side - set flag to prevent lock from being put on LRU list */
lock->l_flags |= LDLM_FL_CBPENDING;
unlock_res_and_lock(lock);
}
/**
- * Flock completion calback function.
+ * Flock completion callback function.
*
* \param lock [in,out]: A lock to be handled
* \param flags [in]: flags
* Author: Phil Schwan <phil@clusterfs.com>
*/
+/**
+ * This file contains implementation of IBITS lock type
+ *
+ * IBITS lock type contains a bit mask determining various properties of an
+ * object. The meanings of specific bits are specific to the caller and are
+ * opaque to LDLM code.
+ *
+ * Locks with intersecting bitmasks and conflicting lock modes (e.g. LCK_PW)
+ * are considered conflicting. See the lock mode compatibility matrix
+ * in lustre_dlm.h.
+ */
+
#define DEBUG_SUBSYSTEM S_LDLM
#ifndef __KERNEL__
# include <liblustre.h>
#include "ldlm_internal.h"
#ifdef HAVE_SERVER_SUPPORT
-/* Determine if the lock is compatible with all locks on the queue. */
+/**
+ * Determine if the lock is compatible with all locks on the queue.
+ *
+ * If \a work_list is provided, conflicting locks are linked there.
+ * If \a work_list is not provided, we exit this function on first conflict.
+ *
+ * \retval 0 if there are conflicting locks in the \a queue
+ * \retval 1 if the lock is compatible to all locks in \a queue
+ *
+ * IBITS locks in granted queue are organized in bunches of
+ * same-mode/same-bits locks called "skip lists". The First lock in the
+ * bunch contains a pointer to the end of the bunch. This allows us to
+ * skip an entire bunch when iterating the list in search for conflicting
+ * locks if first lock of the bunch is not conflicting with us.
+ */
static int
ldlm_inodebits_compat_queue(cfs_list_t *queue, struct ldlm_lock *req,
cfs_list_t *work_list)
lock = cfs_list_entry(tmp, struct ldlm_lock, l_res_link);
+ /* We stop walking the queue if we hit ourselves so we don't
+ * take conflicting locks enqueued after us into account,
+ * or we'd wait forever. */
if (req == lock)
RETURN(compat);
for (;;) {
cfs_list_t *head;
- /* last lock in policy group */
- tmp = &cfs_list_entry(lock->l_sl_policy.prev,
- struct ldlm_lock,
- l_sl_policy)->l_res_link;
-
- /* locks with bits overlapped are conflicting locks */
- if (lock->l_policy_data.l_inodebits.bits & req_bits) {
- /* COS lock from the same client is
- not conflicting */
- if (lock->l_req_mode == LCK_COS &&
- lock->l_client_cookie == req->l_client_cookie)
- goto not_conflicting;
- /* conflicting policy */
- if (!work_list)
- RETURN(0);
-
- compat = 0;
-
- /* add locks of the policy group to
- * @work_list as blocking locks for
- * @req */
+ /* Advance loop cursor to last lock in policy group. */
+ tmp = &cfs_list_entry(lock->l_sl_policy.prev,
+ struct ldlm_lock,
+ l_sl_policy)->l_res_link;
+
+ /* Locks with overlapping bits conflict. */
+ if (lock->l_policy_data.l_inodebits.bits & req_bits) {
+ /* COS lock mode has a special compatibility
+ * requirement: it is only compatible with
+ * locks from the same client. */
+ if (lock->l_req_mode == LCK_COS &&
+ lock->l_client_cookie == req->l_client_cookie)
+ goto not_conflicting;
+ /* Found a conflicting policy group. */
+ if (!work_list)
+ RETURN(0);
+
+ compat = 0;
+
+ /* Add locks of the policy group to @work_list
+ * as blocking locks for @req */
if (lock->l_blocking_ast)
ldlm_add_ast_work_item(lock, req,
work_list);
tmp = tmp->next;
lock = cfs_list_entry(tmp, struct ldlm_lock,
l_res_link);
- } /* loop over policy groups within one mode group */
- } /* loop over mode groups within @queue */
+ } /* Loop over policy groups within one mode group. */
+ } /* Loop over mode groups within @queue. */
- RETURN(compat);
+ RETURN(compat);
}
-/* If first_enq is 0 (ie, called from ldlm_reprocess_queue):
- * - blocking ASTs have already been sent
- * - must call this function with the ns lock held
- *
- * If first_enq is 1 (ie, called from ldlm_lock_enqueue):
- * - blocking ASTs have not been sent
- * - must call this function with the ns lock held once */
+/**
+ * Process a granting attempt for IBITS lock.
+ * Must be called with ns lock held
+ *
+ * This function looks for any conflicts for \a lock in the granted or
+ * waiting queues. The lock is granted if no conflicts are found in
+ * either queue.
+ *
+ * If \a first_enq is 0 (ie, called from ldlm_reprocess_queue):
+ * - blocking ASTs have already been sent
+ *
+ * If \a first_enq is 1 (ie, called from ldlm_lock_enqueue):
+ * - blocking ASTs have not been sent yet, so list of conflicting locks
+ * would be collected and ASTs sent.
+ */
int ldlm_process_inodebits_lock(struct ldlm_lock *lock, __u64 *flags,
int first_enq, ldlm_error_t *err,
cfs_list_t *work_list)
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/**
+ * This file deals with various client/target related logic including recovery.
+ *
+ * TODO: This code more logically belongs in the ptlrpc module than in ldlm and
+ * should be moved.
+ */
+
#define DEBUG_SUBSYSTEM S_LDLM
#ifdef __KERNEL__
#include <lustre_sec.h>
#include "ldlm_internal.h"
-/* @priority: if non-zero, move the selected to the list head
- * @create: if zero, only search in existed connections
+/* @priority: If non-zero, move the selected connection to the list head.
+ * @create: If zero, only search in existing connections.
*/
static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
int priority, int create)
GOTO(out_free, rc = 0);
}
}
- /* not found */
+ /* No existing import connection found for \a uuid. */
if (create) {
imp_conn->oic_conn = ptlrpc_conn;
imp_conn->oic_uuid = *uuid;
continue;
LASSERT(imp_conn->oic_conn);
- /* is current conn? */
if (imp_conn == imp->imp_conn_current) {
LASSERT(imp_conn->oic_conn == imp->imp_connection);
EXPORT_SYMBOL(client_import_del_conn);
/**
- * Find conn uuid by peer nid. @peer is a server nid. This function is used
- * to find a conn uuid of @imp which can reach @peer.
+ * Find conn UUID by peer NID. \a peer is a server NID. This function is used
+ * to find a conn uuid of \a imp which can reach \a peer.
*/
int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer,
struct obd_uuid *uuid)
spin_lock(&imp->imp_lock);
cfs_list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
- /* check if conn uuid does have this peer nid */
+ /* Check if conn UUID does have this peer NID. */
if (class_check_uuid(&conn->oic_uuid, peer)) {
*uuid = conn->oic_uuid;
rc = 0;
void client_destroy_import(struct obd_import *imp)
{
- /* drop security policy instance after all rpc finished/aborted
- * to let all busy contexts be released. */
+ /* Drop security policy instance after all RPCs have finished/aborted
+ * to let all busy contexts be released. */
class_import_get(imp);
class_destroy_import(imp);
sptlrpc_import_sec_put(imp);
EXPORT_SYMBOL(client_destroy_import);
/**
- * check whether the osc is on MDT or not
+ * Check whether or not the OSC is on MDT.
* In the config log,
* osc on MDT
* setup 0:{fsname}-OSTxxxx-osc[-MDTxxxx] 1:lustre-OST0000_UUID 2:NID
return 0;
}
-/* configure an RPC client OBD device
+/* Configure an RPC client OBD device.
*
* lcfg parameters:
* 1 - client UUID
cli->cl_dirty = 0;
cli->cl_avail_grant = 0;
- /* FIXME: should limit this for the sum of all cl_dirty_max */
+ /* FIXME: Should limit this for the sum of all cl_dirty_max. */
cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
if (cli->cl_dirty_max >> CFS_PAGE_SHIFT > cfs_num_physpages / 8)
cli->cl_dirty_max = cfs_num_physpages << (CFS_PAGE_SHIFT - 3);
if (cli->cl_conn_count)
GOTO(out_disconnect, rc = 0);
- /* Mark import deactivated now, so we don't try to reconnect if any
- * of the cleanup RPCs fails (e.g. ldlm cancel, etc). We don't
- * fully deactivate the import, or that would drop all requests. */
+ /* Mark import deactivated now, so we don't try to reconnect if any
+ * of the cleanup RPCs fails (e.g. LDLM cancel, etc). We don't
+ * fully deactivate the import, or that would drop all requests. */
spin_lock(&imp->imp_lock);
imp->imp_deactive = 1;
spin_unlock(&imp->imp_lock);
ldlm_namespace_free_prior(obd->obd_namespace, imp, obd->obd_force);
}
- /*
- * there's no need to hold sem during disconnecting an import,
- * and actually it may cause deadlock in gss.
- */
+ /* There's no need to hold sem while disconnecting an import,
+ * and it may actually cause deadlock in GSS. */
up_write(&cli->cl_sem);
rc = ptlrpc_disconnect_import(imp, 0);
down_write(&cli->cl_sem);
EXIT;
- out_disconnect:
- /* use server style - class_disconnect should be always called for
- * o_disconnect */
+out_disconnect:
+ /* Use server style - class_disconnect should be always called for
+ * o_disconnect. */
err = class_disconnect(exp);
if (!rc && err)
rc = err;
int rc;
ENTRY;
- /* Disconnect early so that clients can't keep using export */
- rc = class_disconnect(exp);
- /* close import for avoid sending any requests */
- if (exp->exp_imp_reverse)
- ptlrpc_cleanup_imp(exp->exp_imp_reverse);
+ /* Disconnect early so that clients can't keep using export. */
+ rc = class_disconnect(exp);
+ /* Close import to avoid sending any requests. */
+ if (exp->exp_imp_reverse)
+ ptlrpc_cleanup_imp(exp->exp_imp_reverse);
- if (exp->exp_obd->obd_namespace != NULL)
- ldlm_cancel_locks_for_export(exp);
+ if (exp->exp_obd->obd_namespace != NULL)
+ ldlm_cancel_locks_for_export(exp);
/* complete all outstanding replies */
spin_lock(&exp->exp_lock);
GOTO(out, rc = -EAGAIN);
}
- /* Make sure the target isn't cleaned up while we're here. Yes,
- there's still a race between the above check and our incref here.
- Really, class_uuid2obd should take the ref. */
+ /* Make sure the target isn't cleaned up while we're here. Yes,
+ * there's still a race between the above check and our incref here.
+ * Really, class_uuid2obd should take the ref. */
targref = class_incref(target, __FUNCTION__, cfs_current());
target->obd_conn_inprogress++;
obd_str2uuid(&cluuid, str);
- /* XXX extract a nettype and format accordingly */
- switch (sizeof(lnet_nid_t)) {
- /* NB the casts only avoid compiler warnings */
+ /* XXX Extract a nettype and format accordingly. */
+ switch (sizeof(lnet_nid_t)) {
+ /* NB the casts only avoid compiler warnings. */
case 8:
snprintf(remote_uuid.uuid, sizeof remote_uuid,
"NET_"LPX64"_UUID", (__u64)req->rq_peer.nid);
if (!export)
goto no_export;
- /* we've found an export in the hash */
+ /* We've found an export in the hash. */
spin_lock(&export->exp_lock);
} else if (mds_conn && export->exp_connection) {
spin_unlock(&export->exp_lock);
if (req->rq_peer.nid != export->exp_connection->c_peer.nid)
- /* mds reconnected after failover */
- LCONSOLE_WARN("%s: Received MDS connection from "
- "%s, removing former export from %s\n",
- target->obd_name, libcfs_nid2str(req->rq_peer.nid),
- libcfs_nid2str(export->exp_connection->c_peer.nid));
- else
- /* new mds connection from the same nid */
+ /* MDS reconnected after failover. */
+ LCONSOLE_WARN("%s: Received MDS connection from "
+ "%s, removing former export from %s\n",
+ target->obd_name, libcfs_nid2str(req->rq_peer.nid),
+ libcfs_nid2str(export->exp_connection->c_peer.nid));
+ else
+ /* New MDS connection from the same NID. */
LCONSOLE_WARN("%s: Received new MDS connection from "
"%s, removing former export from same NID\n",
target->obd_name, libcfs_nid2str(req->rq_peer.nid));
(lustre_msg_get_op_flags(req->rq_reqmsg) &
MSG_CONNECT_INITIAL)) {
spin_unlock(&export->exp_lock);
- /* in mds failover we have static uuid but nid can be
- * changed*/
+ /* In MDS failover we have static UUID but NID can change. */
LCONSOLE_WARN("%s: Client %s seen on new nid %s when "
"existing nid %s is already connected\n",
target->obd_name, cluuid.uuid,
GOTO(out, rc = -EBUSY);
} else if (req->rq_export != NULL &&
(cfs_atomic_read(&export->exp_rpc_count) > 1)) {
- /* the current connect rpc has increased exp_rpc_count */
+ /* The current connect RPC has increased exp_rpc_count. */
LCONSOLE_WARN("%s: Client %s (at %s) refused reconnection, "
"still busy with %d active RPCs\n",
target->obd_name, cluuid.uuid,
export ? (long)export->exp_last_request_time : 0);
/* If this is the first time a client connects, reset the recovery
- * timer. Discard lightweight connections which might be local */
+ * timer. Discard lightweight connections which might be local. */
if (!lw_client && rc == 0 && target->obd_recovering)
- check_and_start_recovery_timer(target, req, export == NULL);
+ check_and_start_recovery_timer(target, req, export == NULL);
- /* We want to handle EALREADY but *not* -EALREADY from
- * target_handle_reconnect(), return reconnection state in a flag */
+ /* We want to handle EALREADY but *not* -EALREADY from
+ * target_handle_reconnect(), return reconnection state in a flag. */
if (rc == EALREADY) {
lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
rc = 0;
LASSERT(rc == 0);
}
- /* Tell the client if we support replayable requests */
+ /* Tell the client if we support replayable requests. */
if (target->obd_replayable)
lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_REPLAYABLE);
client_nid = &req->rq_peer.nid;
if (req->rq_export != NULL)
class_export_put(req->rq_export);
- /* request takes one export refcount */
+ /* Request takes one export reference. */
req->rq_export = class_export_get(export);
spin_lock(&export->exp_lock);
export->exp_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg);
export->exp_abort_active_req = 0;
- /* request from liblustre? Don't evict it for not pinging. */
+ /* Don't evict liblustre clients for not pinging. */
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
export->exp_libclient = 1;
spin_unlock(&export->exp_lock);
}
if (export->exp_connection != NULL) {
- /* Check to see if connection came from another NID */
+ /* Check to see if connection came from another NID. */
if ((export->exp_connection->c_peer.nid != req->rq_peer.nid) &&
!cfs_hlist_unhashed(&export->exp_nid_hash))
cfs_hash_del(export->exp_obd->obd_nid_hash,
if (has_transno && transno > 0 &&
transno < target->obd_next_recovery_transno &&
transno > target->obd_last_committed) {
- /* another way is to use cmpxchg() so it will be
- * lock free */
+ /* Another way is to use cmpxchg() to be lock-free. */
spin_lock(&target->obd_recovery_task_lock);
if (transno < target->obd_next_recovery_transno)
target->obd_next_recovery_transno = transno;
tmp = req_capsule_client_get(&req->rq_pill, &RMF_CONN);
conn = *tmp;
- /* for the rest part, we return -ENOTCONN in case of errors
- * in order to let client initialize connection again.
- */
+ /* Return -ENOTCONN in case of errors to let client reconnect. */
revimp = class_new_import(target);
if (revimp == NULL) {
CERROR("fail to alloc new reverse import.\n");
revimp->imp_dlm_fake = 1;
revimp->imp_state = LUSTRE_IMP_FULL;
- /* unknown versions will be caught in
- * ptlrpc_handle_server_req_in->lustre_unpack_msg() */
+ /* Unknown versions will be caught in
+ * ptlrpc_handle_server_req_in->lustre_unpack_msg(). */
revimp->imp_msg_magic = req->rq_reqmsg->lm_magic;
if ((data->ocd_connect_flags & OBD_CONNECT_AT) &&
if (rc)
RETURN(rc);
- /* keep the rq_export around so we can send the reply */
+ /* Keep the rq_export around so we can send the reply. */
req->rq_status = obd_disconnect(class_export_get(req->rq_export));
RETURN(0);
LASSERT(cfs_list_empty(&req->rq_list));
CFS_INIT_LIST_HEAD(&req->rq_replay_list);
- /* increase refcount to keep request in queue */
- cfs_atomic_inc(&req->rq_refcount);
- /** let export know it has replays to be handled */
+ /* Increase refcount to keep request in queue. */
+ cfs_atomic_inc(&req->rq_refcount);
+ /* Let export know it has replays to be handled. */
cfs_atomic_inc(&req->rq_export->exp_replay_count);
}
}
if (dup) {
- /* we expect it with RESENT and REPLAY flags */
+ /* We expect it with RESENT and REPLAY flags. */
if ((lustre_msg_get_flags(req->rq_reqmsg) &
(MSG_RESENT | MSG_REPLAY)) != (MSG_RESENT | MSG_REPLAY))
CERROR("invalid flags %x of resent replay\n",
{
ENTRY;
- /* only log a recovery message when recovery has occurred */
+ /* Only log a recovery message when recovery has occurred. */
if (obd->obd_recovery_start) {
time_t elapsed_time = max_t(time_t, 1, cfs_time_current_sec() -
obd->obd_recovery_start);
obd->obd_recovery_end = cfs_time_current_sec();
- /* when recovery finished, cleanup orphans on mds and ost */
+ /* When recovery finished, cleanup orphans on MDS and OST. */
if (OBT(obd) && OBP(obd, postrecov)) {
int rc = OBP(obd, postrecov)(obd);
if (rc < 0)
target_start_recovery_timer(obd);
- /* convert the service time to rpc timeout,
- * reuse service_time to limit stack usage */
- service_time = at_est2timeout(service_time);
+ /* Convert the service time to RPC timeout,
+ * and reuse service_time to limit stack usage. */
+ service_time = at_est2timeout(service_time);
- /* We expect other clients to timeout within service_time, then try
- * to reconnect, then try the failover server. The max delay between
- * connect attempts is SWITCH_MAX + SWITCH_INC + INITIAL */
+ /* We expect other clients to timeout within service_time, then try
+ * to reconnect, then try the failover server. The max delay between
+ * connect attempts is SWITCH_MAX + SWITCH_INC + INITIAL. */
service_time += 2 * INITIAL_CONNECT_TIMEOUT;
LASSERT(obt->obt_magic == OBT_MAGIC);
struct obd_device *obd;
ENTRY;
- /*
- * Check that we still have all structures alive as this may
- * be some late rpc in shutdown time.
- */
+ /* Check that we still have all structures alive as this may
+ * be some late RPC at shutdown time. */
if (unlikely(!req->rq_export || !req->rq_export->exp_obd ||
!exp_connect_lru_resize(req->rq_export))) {
lustre_msg_set_slv(req->rq_repmsg, 0);
RETURN(0);
}
- /*
- * OBD is alive here as export is alive, which we checked above.
- */
+ /* OBD is alive here as export is alive, which we checked above. */
obd = req->rq_export->exp_obd;
read_lock(&obd->obd_pool_lock);
int rc = 0;
ENTRY;
- /* Check if there is eviction in progress, and if so, wait for
- * it to finish */
+ /* If there is eviction in progress, wait for it to finish. */
if (unlikely(cfs_atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
*lwi = LWI_INTR(NULL, NULL);
rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
lwi);
}
- /* Check if client was evicted or tried to reconnect already */
+ /* Check if client was evicted or tried to reconnect already. */
if (exp->exp_failed || exp->exp_abort_active_req) {
rc = -ENOTCONN;
} else {
exp->exp_abort_active_req,
lwi);
LASSERT(rc == 0 || rc == -ETIMEDOUT);
- /* Wait again if we changed deadline */
+ /* Wait again if we changed deadline. */
} while ((rc == -ETIMEDOUT) &&
(req->rq_deadline > cfs_time_current_sec()));
} else if (exp->exp_abort_active_req) {
DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s",
bulk2type(desc));
- /* we don't reply anyway */
+ /* We don't reply anyway. */
rc = -ETIMEDOUT;
ptlrpc_abort_bulk(desc);
} else if (!desc->bd_success ||
bulk2type(desc),
desc->bd_nob_transferred,
desc->bd_nob);
- /* XXX should this be a different errno? */
+ /* XXX Should this be a different errno? */
rc = -ETIMEDOUT;
} else if (desc->bd_type == BULK_GET_SINK) {
rc = sptlrpc_svc_unwrap_bulk(req, desc);
*/
-/*
+/**
+ * Get a reference on a lock.
+ *
* Lock refcounts, during creation:
* - one special one for allocation, dec'd only once in destroy
* - one for being a lock that's in-use
}
EXPORT_SYMBOL(ldlm_lock_get);
+/**
+ * Release lock reference.
+ *
+ * Also frees the lock if it was last reference.
+ */
void ldlm_lock_put(struct ldlm_lock *lock)
{
ENTRY;
}
EXPORT_SYMBOL(ldlm_lock_put);
+/**
+ * Removes LDLM lock \a lock from LRU. Assumes LRU is already locked.
+ */
int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
{
int rc = 0;
return rc;
}
+/**
+ * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first.
+ */
int ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
{
struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
return rc;
}
+/**
+ * Adds LDLM lock \a lock to namespace LRU. Assumes LRU is already locked.
+ */
void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
{
struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
ns->ns_nr_unused++;
}
+/**
+ * Adds LDLM lock \a lock to namespace LRU. Obtains necessary LRU locks
+ * first.
+ */
void ldlm_lock_add_to_lru(struct ldlm_lock *lock)
{
struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
EXIT;
}
+/**
+ * Moves LDLM lock \a lock that is already in namespace LRU to the tail of
+ * the LRU. Performs necessary LRU locking
+ */
void ldlm_lock_touch_in_lru(struct ldlm_lock *lock)
{
struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
EXIT;
}
-/* This used to have a 'strict' flag, which recovery would use to mark an
+/**
+ * Helper to destroy a locked lock.
+ *
+ * Used by ldlm_lock_destroy and ldlm_lock_destroy_nolock
+ * Must be called with l_lock and lr_lock held.
+ *
+ * Does not actually free the lock data, but rather marks the lock as
+ * destroyed by setting l_destroyed field in the lock to 1. Destroys a
+ * handle->lock association too, so that the lock can no longer be found
+ * and removes the lock from LRU list. Actual lock freeing occurs when
+ * last lock reference goes away.
+ *
+ * Original comment (of some historical value):
+ * This used to have a 'strict' flag, which recovery would use to mark an
* in-use lock as needing-to-die. Lest I am ever tempted to put it back, I
* shall explain why it's gone: with the new hash table scheme, once you call
* ldlm_lock_destroy, you can never drop your final references on this lock.
- * Because it's not in the hash table anymore. -phil */
+ * Because it's not in the hash table anymore. -phil
+ */
int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
{
ENTRY;
return 1;
}
+/**
+ * Destroys a LDLM lock \a lock. Performs necessary locking first.
+ */
void ldlm_lock_destroy(struct ldlm_lock *lock)
{
int first;
EXIT;
}
+/**
+ * Destroys a LDLM lock \a lock that is already locked.
+ */
void ldlm_lock_destroy_nolock(struct ldlm_lock *lock)
{
int first;
.hop_free = lock_handle_free,
};
-/*
+/**
+ *
+ * Allocate and initialize new lock structure.
+ *
* usage: pass in a resource on which you have done ldlm_resource_get
* new lock will take over the refcount.
* returns: lock with refcount 2 - one for current caller and one for remote
RETURN(lock);
}
+/**
+ * Moves LDLM lock \a lock to another resource.
+ * This is used on client when server returns some other lock than requested
+ * (typically as a result of intent operation)
+ */
int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
const struct ldlm_res_id *new_resid)
{
}
EXPORT_SYMBOL(ldlm_lock_change_resource);
-/*
- * HANDLES
+/** \defgroup ldlm_handles LDLM HANDLES
+ * Ways to get hold of locks without any addresses.
+ * @{
*/
+/**
+ * Fills in handle for LDLM lock \a lock into supplied \a lockh
+ * Does not take any references.
+ */
void ldlm_lock2handle(const struct ldlm_lock *lock, struct lustre_handle *lockh)
{
- lockh->cookie = lock->l_handle.h_cookie;
+ lockh->cookie = lock->l_handle.h_cookie;
}
EXPORT_SYMBOL(ldlm_lock2handle);
-/* if flags: atomically get the lock and set the flags.
- * Return NULL if flag already set
+/**
+ * Obtain a lock reference by handle.
+ *
+ * if \a flags: atomically get the lock and set the flags.
+ * Return NULL if flag already set
*/
-
struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
__u64 flags)
{
RETURN(lock);
}
EXPORT_SYMBOL(__ldlm_handle2lock);
+/** @} ldlm_handles */
+/**
+ * Fill in "on the wire" representation for given LDLM lock into supplied
+ * lock descriptor \a desc structure.
+ */
void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
{
struct obd_export *exp = lock->l_export?:lock->l_conn_export;
}
EXPORT_SYMBOL(ldlm_lock2desc);
+/**
+ * Add a lock to list of conflicting locks to send AST to.
+ *
+ * Only add if we have not sent a blocking AST to the lock yet.
+ */
void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
cfs_list_t *work_list)
{
}
}
+/**
+ * Add a lock to list of just granted locks to send completion AST to.
+ */
void ldlm_add_cp_work_item(struct ldlm_lock *lock, cfs_list_t *work_list)
{
if ((lock->l_flags & LDLM_FL_CP_REQD) == 0) {
}
}
-/* must be called with lr_lock held */
+/**
+ * Aggregator function to add AST work items into a list. Determines
+ * what sort of an AST work needs to be done and calls the proper
+ * adding function.
+ * Must be called with lr_lock held.
+ */
void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
cfs_list_t *work_list)
{
EXIT;
}
+/**
+ * Add specified reader/writer reference to LDLM lock with handle \a lockh.
+ * r/w reference type is determined by \a mode
+ * Calls ldlm_lock_addref_internal.
+ */
void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
{
struct ldlm_lock *lock;
}
EXPORT_SYMBOL(ldlm_lock_addref);
+/**
+ * Helper function.
+ * Add specified reader/writer reference to LDLM lock \a lock.
+ * r/w reference type is determined by \a mode
+ * Removes lock from LRU if it is there.
+ * Assumes the LDLM lock is already locked.
+ */
void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
{
ldlm_lock_remove_from_lru(lock);
}
/**
- * Attempts to addref a lock, and fails if lock is already LDLM_FL_CBPENDING
- * or destroyed.
+ * Attempts to add reader/writer reference to a lock with handle \a lockh, and
+ * fails if lock is already LDLM_FL_CBPENDING or destroyed.
*
* \retval 0 success, lock was addref-ed
*
}
EXPORT_SYMBOL(ldlm_lock_addref_try);
-/* only called for local locks */
+/**
+ * Add specified reader/writer reference to LDLM lock \a lock.
+ * Locks LDLM lock and calls ldlm_lock_addref_internal_nolock to do the work.
+ * Only called for local locks.
+ */
void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
{
lock_res_and_lock(lock);
unlock_res_and_lock(lock);
}
-/* only called in ldlm_flock_destroy and for local locks.
- * * for LDLM_FLOCK type locks, l_blocking_ast is null, and
- * * ldlm_lock_remove_from_lru() does nothing, it is safe
- * * for ldlm_flock_destroy usage by dropping some code */
+/**
+ * Removes reader/writer reference for LDLM lock \a lock.
+ * Assumes LDLM lock is already locked.
+ * only called in ldlm_flock_destroy and for local locks.
+ * Does NOT add lock to LRU if no r/w references left to accomodate flock locks
+ * that cannot be placed in LRU.
+ */
void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, __u32 mode)
{
LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
LDLM_LOCK_RELEASE(lock); /* matches the LDLM_LOCK_GET() in addref */
}
+/**
+ * Removes reader/writer reference for LDLM lock \a lock.
+ * Locks LDLM lock first.
+ * If the lock is determined to be client lock on a client and r/w refcount
+ * drops to zero and the lock is not blocked, the lock is added to LRU lock
+ * on the namespace.
+ * For blocked LDLM locks if r/w count drops to zero, blocking_ast is called.
+ */
void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
{
struct ldlm_namespace *ns;
EXIT;
}
+/**
+ * Decrease reader/writer refcount for LDLM lock with handle \a lockh
+ */
void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
{
struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
}
EXPORT_SYMBOL(ldlm_lock_decref);
-/* This will drop a lock reference and mark it for destruction, but will not
- * necessarily cancel the lock before returning. */
+/**
+ * Decrease reader/writer refcount for LDLM lock with handle
+ * \a lockh and mark it for subsequent cancellation once r/w refcount
+ * drops to zero instead of putting into LRU.
+ *
+ * Typical usage is for GROUP locks which we cannot allow to be cached.
+ */
void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
{
struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
cfs_list_t *policy_link;
};
-/*
- * search_granted_lock
+/**
+ * Finds a position to insert the new lock into granted lock list.
+ *
+ * Used for locks eligible for skiplist optimization.
*
- * Description:
- * Finds a position to insert the new lock.
* Parameters:
* queue [input]: the granted list where search acts on;
* req [input]: the lock whose position to be located;
return;
}
+/**
+ * Add a lock into resource granted list after a position described by
+ * \a prev.
+ */
static void ldlm_granted_list_add_lock(struct ldlm_lock *lock,
struct sl_insert_point *prev)
{
EXIT;
}
+/**
+ * Add a lock to granted list on a resource maintaining skiplist
+ * correctness.
+ */
static void ldlm_grant_lock_with_skiplist(struct ldlm_lock *lock)
{
struct sl_insert_point prev;
EXIT;
}
-/* NOTE: called by
+/**
+ * Perform lock granting bookkeeping.
+ *
+ * Includes putting the lock into granted list and updating lock mode.
+ * NOTE: called by
* - ldlm_lock_enqueue
* - ldlm_reprocess_queue
* - ldlm_lock_convert
EXIT;
}
-/* returns a referenced lock or NULL. See the flag descriptions below, in the
- * comment above ldlm_lock_match */
+/**
+ * Search for a lock with given properties in a queue.
+ *
+ * \retval a referenced lock or NULL. See the flag descriptions below, in the
+ * comment above ldlm_lock_match
+ */
static struct ldlm_lock *search_queue(cfs_list_t *queue,
ldlm_mode_t *mode,
ldlm_policy_data_t *policy,
}
EXPORT_SYMBOL(ldlm_lock_fail_match);
+/**
+ * Mark lock as "matchable" by OST.
+ *
+ * Used to prevent certain races in LOV/OSC where the lock is granted, but LVB
+ * is not yet valid.
+ * Assumes LDLM lock is already locked.
+ */
void ldlm_lock_allow_match_locked(struct ldlm_lock *lock)
{
- lock->l_flags |= LDLM_FL_LVB_READY;
- cfs_waitq_broadcast(&lock->l_waitq);
+ lock->l_flags |= LDLM_FL_LVB_READY;
+ cfs_waitq_broadcast(&lock->l_waitq);
}
EXPORT_SYMBOL(ldlm_lock_allow_match_locked);
+/**
+ * Mark lock as "matchable" by OST.
+ * Locks the lock and then \see ldlm_lock_allow_match_locked
+ */
void ldlm_lock_allow_match(struct ldlm_lock *lock)
{
lock_res_and_lock(lock);
}
EXPORT_SYMBOL(ldlm_lock_allow_match);
-/* Can be called in two ways:
+/**
+ * Attempt to find a lock with specified properties.
+ *
+ * Typically returns a reference to matched lock unless LDLM_FL_TEST_LOCK is
+ * set in \a flags
+ *
+ * Can be called in two ways:
*
* If 'ns' is NULL, then lockh describes an existing lock that we want to look
* for a duplicate of.
* If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
* just tell us if we would have matched.
*
- * Returns 1 if it finds an already-existing lock that is compatible; in this
+ * \retval 1 if it finds an already-existing lock that is compatible; in this
* case, lockh is filled in with a addref()ed lock
*
- * we also check security context, if that failed we simply return 0 (to keep
- * caller code unchanged), the context failure will be discovered by caller
- * sometime later.
+ * We also check security context, and if that fails we simply return 0 (to
+ * keep caller code unchanged), the context failure will be discovered by
+ * caller sometime later.
*/
ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
const struct ldlm_res_id *res_id, ldlm_type_t type,
}
EXPORT_SYMBOL(ldlm_revalidate_lock_handle);
-/* The caller's duty to guarantee the buffer is large enough. */
+/** The caller must guarantee that the buffer is large enough. */
int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
enum req_location loc, void *data, int size)
{
RETURN(0);
}
-/* Returns a referenced lock */
+/**
+ * Create and fill in new LDLM lock with specified properties.
+ * Returns a referenced lock
+ */
struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
const struct ldlm_res_id *res_id,
ldlm_type_t type,
return NULL;
}
+/**
+ * Enqueue (request) a lock.
+ *
+ * Does not block. As a result of enqueue the lock would be put
+ * into granted or waiting list.
+ *
+ * If namespace has intent policy sent and the lock has LDLM_FL_HAS_INTENT flag
+ * set, skip all the enqueueing and delegate lock processing to intent policy
+ * function.
+ */
ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
struct ldlm_lock **lockp,
void *cookie, __u64 *flags)
}
#ifdef HAVE_SERVER_SUPPORT
-/* Must be called with namespace taken: queue is waiting or converting. */
+/**
+ * Iterate through all waiting locks on a given resource queue and attempt to
+ * grant them.
+ *
+ * Must be called with resource lock held.
+ */
int ldlm_reprocess_queue(struct ldlm_resource *res, cfs_list_t *queue,
cfs_list_t *work_list)
{
}
#endif
+/**
+ * Process a call to blocking AST callback for a lock in ast_work list
+ */
static int
ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
{
RETURN(rc);
}
+/**
+ * Process a call to completion AST callback for a lock in ast_work list
+ */
static int
ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
{
RETURN(rc);
}
+/**
+ * Process a call to revocation AST callback for a lock in ast_work list
+ */
static int
ldlm_work_revoke_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
{
RETURN(rc);
}
+/**
+ * Process a call to glimpse AST callback for a lock in ast_work list
+ */
int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
{
struct ldlm_cb_set_arg *arg = opaq;
RETURN(rc);
}
+/**
+ * Process list of locks in need of ASTs being sent.
+ *
+ * Used on server to send multiple ASTs together instead of sending one by
+ * one.
+ */
int ldlm_run_ast_work(struct ldlm_namespace *ns, cfs_list_t *rpc_list,
ldlm_desc_ast_t ast_type)
{
return rc == LDLM_ITER_STOP;
}
+/**
+ * Iterate through all resources on a namespace attempting to grant waiting
+ * locks.
+ */
void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
{
ENTRY;
}
EXPORT_SYMBOL(ldlm_reprocess_all_ns);
+/**
+ * Try to grant all waiting locks on a resource.
+ *
+ * Calls ldlm_reprocess_queue on converting and waiting queues.
+ *
+ * Typically called after some resource locks are cancelled to see
+ * if anything could be granted as a result of the cancellation.
+ */
void ldlm_reprocess_all(struct ldlm_resource *res)
{
CFS_LIST_HEAD(rpc_list);
EXIT;
}
+/**
+ * Helper function to call blocking AST for LDLM lock \a lock in a
+ * "cancelling" mode.
+ */
void ldlm_cancel_callback(struct ldlm_lock *lock)
{
- check_res_locked(lock->l_resource);
- if (!(lock->l_flags & LDLM_FL_CANCEL)) {
- lock->l_flags |= LDLM_FL_CANCEL;
- if (lock->l_blocking_ast) {
- // l_check_no_ns_lock(ns);
+ check_res_locked(lock->l_resource);
+ if (!(lock->l_flags & LDLM_FL_CANCEL)) {
+ lock->l_flags |= LDLM_FL_CANCEL;
+ if (lock->l_blocking_ast) {
unlock_res_and_lock(lock);
lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
LDLM_CB_CANCELING);
lock->l_flags |= LDLM_FL_BL_DONE;
}
+/**
+ * Remove skiplist-enabled LDLM lock \a req from granted list
+ */
void ldlm_unlink_lock_skiplist(struct ldlm_lock *req)
{
if (req->l_resource->lr_type != LDLM_PLAIN &&
cfs_list_del_init(&req->l_sl_mode);
}
+/**
+ * Attempts to cancel LDLM lock \a lock that has no reader/writer references.
+ */
void ldlm_lock_cancel(struct ldlm_lock *lock)
{
struct ldlm_resource *res;
}
EXPORT_SYMBOL(ldlm_lock_cancel);
+/**
+ * Set opaque data into the lock that only makes sense to upper layer.
+ */
int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
{
struct ldlm_lock *lock = ldlm_handle2lock(lockh);
int ecl_loop;
};
+/**
+ * Iterator function for ldlm_cancel_locks_for_export.
+ * Cancels passed locks.
+ */
int ldlm_cancel_locks_for_export_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
cfs_hlist_node_t *hnode, void *data)
return 0;
}
+/**
+ * Cancel all locks for given export.
+ *
+ * Typically called on client disconnection/eviction
+ */
void ldlm_cancel_locks_for_export(struct obd_export *exp)
{
struct export_cl_data ecl = {
*
* A fast variant of ldlm_lock_convert for convertion of exclusive
* locks. The convertion is always successful.
+ * Used by Commit on Sharing (COS) code.
*
* \param lock A lock to convert
* \param new_mode new lock mode
}
EXPORT_SYMBOL(ldlm_lock_downgrade);
+/**
+ * Attempt to convert already granted lock to a different mode.
+ *
+ * While lock conversion is not currently used, future client-side
+ * optimizations could take advantage of it to avoid discarding cached
+ * pages on a file.
+ */
struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
__u32 *flags)
{
struct ldlm_interval *node;
ENTRY;
- if (new_mode == lock->l_granted_mode) { // No changes? Just return.
+ /* Just return if mode is unchanged. */
+ if (new_mode == lock->l_granted_mode) {
*flags |= LDLM_FL_BLOCK_GRANTED;
RETURN(lock->l_resource);
}
}
EXPORT_SYMBOL(ldlm_lock_convert);
+/**
+ * Print lock with lock handle \a lockh description into debug log.
+ *
+ * Used when printing all locks on a resource for debug purposes.
+ */
void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
{
struct ldlm_lock *lock;
}
EXPORT_SYMBOL(ldlm_lock_dump_handle);
+/**
+ * Print lock information with custom message into debug log.
+ * Helper function.
+ */
void _ldlm_lock_debug(struct ldlm_lock *lock,
struct libcfs_debug_msg_data *msgdata,
const char *fmt, ...)
#if defined(HAVE_SERVER_SUPPORT) && defined(__KERNEL__)
-/* w_l_spinlock protects both waiting_locks_list and expired_lock_thread */
+/**
+ * Protects both waiting_locks_list and expired_lock_thread.
+ */
static spinlock_t waiting_locks_spinlock; /* BH lock (timer) */
+
+/**
+ * List for contended locks.
+ *
+ * As soon as a lock is contended, it gets placed on this list and
+ * expected time to get a response is filled in the lock. A special
+ * thread walks the list looking for locks that should be released and
+ * schedules client evictions for those that have not been released in
+ * time.
+ *
+ * All access to it should be under waiting_locks_spinlock.
+ */
static cfs_list_t waiting_locks_list;
static cfs_timer_t waiting_locks_timer;
RETURN(need_to_run);
}
+/**
+ * Check expired lock list for expired locks and time them out.
+ */
static int expired_lock_main(void *arg)
{
cfs_list_t *expired = &expired_lock_thread.elt_expired_locks;
spin_unlock_bh(&waiting_locks_spinlock);
}
-/*
+/**
+ * Add lock to the list of contended locks.
+ *
* Indicate that we're waiting for a client to call us back cancelling a given
* lock. We add it to the pending-callback chain, and schedule the lock-timeout
* timer to fire appropriately. (We round up to the next second, to avoid
return ret;
}
-/*
+/**
* Remove a lock from the pending list, likely because it had its cancellation
* callback arrive without incident. This adjusts the lock-timeout timer if
* needed. Returns 0 if the lock wasn't pending after all, 1 if it was.
}
EXPORT_SYMBOL(ldlm_del_waiting_lock);
-/*
- * Prolong the lock
+/**
+ * Prolong the contended lock waiting time.
*
* Called with namespace lock held.
*/
#ifdef HAVE_SERVER_SUPPORT
+/**
+ * Perform lock cleanup if AST sending failed.
+ */
static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,
const char *ast_type)
{
#endif
}
+/**
+ * Perform lock cleanup if AST reply came with error.
+ */
static int ldlm_handle_ast_error(struct ldlm_lock *lock,
struct ptlrpc_request *req, int rc,
const char *ast_type)
EXIT;
}
-/*
+/**
* ->l_blocking_ast() method for server-side locks. This is invoked when newly
* enqueued server lock conflicts with given one.
*
- * Sends blocking ast rpc to the client owning that lock; arms timeout timer
+ * Sends blocking AST RPC to the client owning that lock; arms timeout timer
* to wait for client response.
*/
int ldlm_server_blocking_ast(struct ldlm_lock *lock,
}
EXPORT_SYMBOL(ldlm_server_blocking_ast);
+/**
+ * ->l_completion_ast callback for a remote lock in server namespace.
+ *
+ * Sends AST to the client notifying it of lock granting. If initial
+ * lock response was not sent yet, instead of sending another RPC, just
+ * mark the lock as granted and client will understand
+ */
int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
{
struct ldlm_cb_set_arg *arg = data;
lock_res_and_lock(lock);
if (lock->l_flags & LDLM_FL_AST_SENT) {
body->lock_flags |= ldlm_flags_to_wire(LDLM_FL_AST_SENT);
- /* copy ast flags like LDLM_FL_DISCARD_DATA */
+ /* Copy AST flags like LDLM_FL_DISCARD_DATA. */
body->lock_flags |= ldlm_flags_to_wire(lock->l_flags &
LDLM_AST_FLAGS);
}
EXPORT_SYMBOL(ldlm_server_completion_ast);
+/**
+ * Server side ->l_glimpse_ast handler for client locks.
+ *
+ * Sends glimpse AST to the client and waits for reply. Then updates
+ * lvbo with the result.
+ */
int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
{
struct ldlm_cb_set_arg *arg = data;
}
EXPORT_SYMBOL(ldlm_glimpse_locks);
-/* return ldlm lock associated with a lock callback request */
+/* return LDLM lock associated with a lock callback request */
struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req)
{
struct ldlm_cb_async_args *ca;
return;
}
-/*
- * Main server-side entry point into LDLM. This is called by ptlrpc service
- * threads to carry out client lock enqueueing requests.
+/**
+ * Main server-side entry point into LDLM for enqueue. This is called by ptlrpc
+ * service threads to carry out client lock enqueueing requests.
*/
int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
struct ptlrpc_request *req,
}
EXPORT_SYMBOL(ldlm_handle_enqueue0);
+/**
+ * Old-style LDLM main entry point for server code enqueue.
+ */
int ldlm_handle_enqueue(struct ptlrpc_request *req,
ldlm_completion_callback completion_callback,
ldlm_blocking_callback blocking_callback,
}
EXPORT_SYMBOL(ldlm_handle_enqueue);
+/**
+ * Main LDLM entry point for server code to process lock conversion requests.
+ */
int ldlm_handle_convert0(struct ptlrpc_request *req,
const struct ldlm_request *dlm_req)
{
}
EXPORT_SYMBOL(ldlm_handle_convert0);
+/**
+ * Old-style main LDLM entry point for server code to process lock conversion
+ * requests.
+ */
int ldlm_handle_convert(struct ptlrpc_request *req)
{
int rc;
}
EXPORT_SYMBOL(ldlm_handle_convert);
-/* Cancel all the locks whos handles are packed into ldlm_request */
+/**
+ * Cancel all the locks whose handles are packed into ldlm_request
+ *
+ * Called by server code expecting such combined cancel activity
+ * requests.
+ */
int ldlm_request_cancel(struct ptlrpc_request *req,
const struct ldlm_request *dlm_req, int first)
{
res = lock->l_resource;
done++;
+ /* This code is an optimization to only attempt lock
+ * granting on the resource (that could be CPU-expensive)
+ * after we are done cancelling lock in that resource. */
if (res != pres) {
if (pres != NULL) {
ldlm_reprocess_all(pres);
}
EXPORT_SYMBOL(ldlm_request_cancel);
+/**
+ * Main LDLM entry point for server code to cancel locks.
+ *
+ * Typically gets called from service handler on LDLM_CANCEL opc.
+ */
int ldlm_handle_cancel(struct ptlrpc_request *req)
{
struct ldlm_request *dlm_req;
EXPORT_SYMBOL(ldlm_handle_cancel);
#endif /* HAVE_SERVER_SUPPORT */
+/**
+ * Callback handler for receiving incoming blocking ASTs.
+ *
+ * This can only happen on client side.
+ */
void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
struct ldlm_lock_desc *ld, struct ldlm_lock *lock)
{
EXIT;
}
+/**
+ * Callback handler for receiving incoming completion ASTs.
+ *
+ * This only can happen on client side.
+ */
static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
struct ldlm_namespace *ns,
struct ldlm_request *dlm_req,
}
if (dlm_req->lock_flags & LDLM_FL_AST_SENT) {
- /* BL_AST locks are not needed in lru.
- * let ldlm_cancel_lru() be fast. */
+ /* BL_AST locks are not needed in LRU.
+ * Let ldlm_cancel_lru() be fast. */
ldlm_lock_remove_from_lru(lock);
lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
LDLM_DEBUG(lock, "completion AST includes blocking AST");
LDLM_LOCK_RELEASE(lock);
}
+/**
+ * Callback handler for receiving incoming glimpse ASTs.
+ *
+ * This only can happen on client side. After handling the glimpse AST
+ * we also consider dropping the lock here if it is unused locally for a
+ * long time.
+ */
static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
struct ldlm_namespace *ns,
struct ldlm_request *dlm_req,
}
}
+/**
+ * Queues a list of locks \a cancels containing \a count locks
+ * for later processing by a blocking thread. If \a count is zero,
+ * then the lock referenced as \a lock is queued instead.
+ *
+ * The blocking thread would then call ->l_blocking_ast callback in the lock.
+ * If list addition fails an error is returned and caller is supposed to
+ * call ->l_blocking_ast itself.
+ */
static int ldlm_bl_to_thread(struct ldlm_namespace *ns,
struct ldlm_lock_desc *ld, struct ldlm_lock *lock,
cfs_list_t *cancels, int count, int mode)
&dlm_req->lock_handle[0]);
RETURN(0);
}
- /* BL_AST locks are not needed in lru.
- * let ldlm_cancel_lru() be fast. */
+ /* BL_AST locks are not needed in LRU.
+ * Let ldlm_cancel_lru() be fast. */
ldlm_lock_remove_from_lru(lock);
lock->l_flags |= LDLM_FL_BL_AST;
}
}
#ifdef HAVE_SERVER_SUPPORT
+/**
+ * Main handler for canceld thread.
+ *
+ * Separated into its own thread to avoid deadlocks.
+ */
static int ldlm_cancel_handler(struct ptlrpc_request *req)
{
int rc;
return 0;
}
+/**
+ * Main blocking requests processing thread.
+ *
+ * Callers put locks into its queue by calling ldlm_bl_to_thread.
+ * This thread in the end ends up doing actual call to ->l_blocking_ast
+ * for queued locks.
+ */
static int ldlm_bl_thread_main(void *arg)
{
struct ldlm_bl_pool *blp;
if (blwi->blwi_count) {
int count;
- /* The special case when we cancel locks in lru
+ /* The special case when we cancel locks in LRU
* asynchronously, we pass the list of locks here.
* Thus locks are marked LDLM_FL_CANCELING, but NOT
* canceled locally yet. */
* Author: Phil Schwan <phil@clusterfs.com>
*/
+/**
+ * This file contains implementation of PLAIN lock type.
+ *
+ * PLAIN locks are the simplest form of LDLM locking, and are used when
+ * there only needs to be a single lock on a resource. This avoids some
+ * of the complexity of EXTENT and IBITS lock types, but doesn't allow
+ * different "parts" of a resource to be locked concurrently. Example
+ * use cases for PLAIN locks include locking of MGS configuration logs
+ * and (as of Lustre 2.4) quota records.
+ */
+
#define DEBUG_SUBSYSTEM S_LDLM
#ifdef __KERNEL__
#include "ldlm_internal.h"
#ifdef HAVE_SERVER_SUPPORT
+/**
+ * Determine if the lock is compatible with all locks on the queue.
+ *
+ * If \a work_list is provided, conflicting locks are linked there.
+ * If \a work_list is not provided, we exit this function on first conflict.
+ *
+ * \retval 0 if there are conflicting locks in the \a queue
+ * \retval 1 if the lock is compatible to all locks in \a queue
+ */
static inline int
ldlm_plain_compat_queue(cfs_list_t *queue, struct ldlm_lock *req,
cfs_list_t *work_list)
cfs_list_for_each(tmp, queue) {
lock = cfs_list_entry(tmp, struct ldlm_lock, l_res_link);
- if (req == lock)
- RETURN(compat);
+ /* We stop walking the queue if we hit ourselves so we don't
+ * take conflicting locks enqueued after us into account,
+ * or we'd wait forever. */
+ if (req == lock)
+ RETURN(compat);
- /* last lock in mode group */
- tmp = &cfs_list_entry(lock->l_sl_mode.prev,
- struct ldlm_lock,
- l_sl_mode)->l_res_link;
+ /* Advance loop cursor to last lock of mode group. */
+ tmp = &cfs_list_entry(lock->l_sl_mode.prev,
+ struct ldlm_lock,
+ l_sl_mode)->l_res_link;
- if (lockmode_compat(lock->l_req_mode, req_mode))
+ if (lockmode_compat(lock->l_req_mode, req_mode))
continue;
if (!work_list)
compat = 0;
- /* add locks of the mode group to @work_list as
- * blocking locks for @req */
+ /* Add locks of the mode group to \a work_list as
+ * blocking locks for \a req. */
if (lock->l_blocking_ast)
ldlm_add_ast_work_item(lock, req, work_list);
RETURN(compat);
}
-/* If first_enq is 0 (ie, called from ldlm_reprocess_queue):
+/**
+ * Process a granting attempt for plain lock.
+ * Must be called with ns lock held.
+ *
+ * This function looks for any conflicts for \a lock in the granted or
+ * waiting queues. The lock is granted if no conflicts are found in
+ * either queue.
+ *
+ * If \a first_enq is 0 (ie, called from ldlm_reprocess_queue):
* - blocking ASTs have already been sent
- * - must call this function with the resource lock held
*
- * If first_enq is 1 (ie, called from ldlm_lock_enqueue):
- * - blocking ASTs have not been sent
- * - must call this function with the resource lock held */
+ * If \a first_enq is 1 (ie, called from ldlm_lock_enqueue):
+ * - blocking ASTs have not been sent yet, so list of conflicting locks
+ * would be collected and ASTs sent.
+ */
int ldlm_process_plain_lock(struct ldlm_lock *lock, __u64 *flags,
int first_enq, ldlm_error_t *err,
cfs_list_t *work_list)
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/**
+ * This file contains Asynchronous System Trap (AST) handlers and related
+ * LDLM request-processing routines.
+ *
+ * An AST is a callback issued on a lock when its state is changed. There are
+ * several different types of ASTs (callbacks) registered for each lock:
+ *
+ * - completion AST: when a lock is enqueued by some process, but cannot be
+ * granted immediately due to other conflicting locks on the same resource,
+ * the completion AST is sent to notify the caller when the lock is
+ * eventually granted
+ *
+ * - blocking AST: when a lock is granted to some process, if another process
+ * enqueues a conflicting (blocking) lock on a resource, a blocking AST is
+ * sent to notify the holder(s) of the lock(s) of the conflicting lock
+ * request. The lock holder(s) must release their lock(s) on that resource in
+ * a timely manner or be evicted by the server.
+ *
+ * - glimpse AST: this is used when a process wants information about a lock
+ * (i.e. the lock value block (LVB)) but does not necessarily require holding
+ * the lock. If the resource is locked, the lock holder(s) are sent glimpse
+ * ASTs and the LVB is returned to the caller, and lock holder(s) may CANCEL
+ * their lock(s) if they are idle. If the resource is not locked, the server
+ * may grant the lock.
+ */
#define DEBUG_SUBSYSTEM S_LDLM
#ifndef __KERNEL__
EXPORT_SYMBOL(ldlm_completion_ast_async);
/**
- * Client side LDLM "completion" AST. This is called in several cases:
+ * Generic LDLM "completion" AST. This is called in several cases:
*
- * - when a reply to an ENQUEUE rpc is received from the server
+ * - when a reply to an ENQUEUE RPC is received from the server
* (ldlm_cli_enqueue_fini()). Lock might be granted or not granted at
* this point (determined by flags);
*
- * - when LDLM_CP_CALLBACK rpc comes to client to notify it that lock has
+ * - when LDLM_CP_CALLBACK RPC comes to client to notify it that lock has
* been granted;
*
* - when ldlm_lock_match(LDLM_FL_LVB_READY) is about to wait until lock
EXPORT_SYMBOL(ldlm_completion_ast);
/**
- * A helper to build a blocking ast function
+ * A helper to build a blocking AST function
*
- * Perform a common operation for blocking asts:
+ * Perform a common operation for blocking ASTs:
* defferred lock cancellation.
*
- * \param lock the lock blocking or canceling ast was called on
+ * \param lock the lock blocking or canceling AST was called on
* \retval 0
* \see mdt_blocking_ast
* \see ldlm_blocking_ast
}
EXPORT_SYMBOL(ldlm_blocking_ast);
-/*
+/**
* ->l_glimpse_ast() for DLM extent locks acquired on the server-side. See
* comment in filter_intent_policy() on why you may need this.
*/
}
EXPORT_SYMBOL(ldlm_glimpse_ast);
+/**
+ * Enqueue a local lock (typically on a server).
+ */
int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
const struct ldlm_res_id *res_id,
ldlm_type_t type, ldlm_policy_data_t *policy,
}
}
+/**
+ * Finishing portion of client lock enqueue code.
+ *
+ * Called after receiving reply from server.
+ */
int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
ldlm_type_t type, __u8 with_policy, ldlm_mode_t mode,
__u64 *flags, void *lvb, __u32 lvb_len,
}
EXPORT_SYMBOL(ldlm_cli_enqueue_fini);
-/* PAGE_SIZE-512 is to allow TCP/IP and LNET headers to fit into
- * a single page on the send/receive side. XXX: 512 should be changed
- * to more adequate value. */
+/**
+ * Estimate number of lock handles that would fit into request of given
+ * size. PAGE_SIZE-512 is to allow TCP/IP and LNET headers to fit into
+ * a single page on the send/receive side. XXX: 512 should be changed to
+ * more adequate value.
+ */
static inline int ldlm_req_handles_avail(int req_size, int off)
{
int avail;
return ldlm_req_handles_avail(size, off);
}
-/* Cancel lru locks and pack them into the enqueue request. Pack there the given
- * @count locks in @cancels. */
+/**
+ * Cancel LRU locks and pack them into the enqueue request. Pack there the given
+ * \a count locks in \a cancels.
+ *
+ * This is to be called by functions preparing their own requests that
+ * might contain lists of locks to cancel in addition to actual operation
+ * that needs to be performed.
+ */
int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req,
int version, int opc, int canceloff,
cfs_list_t *cancels, int count)
to_free = !ns_connect_lru_resize(ns) &&
opc == LDLM_ENQUEUE ? 1 : 0;
- /* Cancel lru locks here _only_ if the server supports
- * EARLY_CANCEL. Otherwise we have to send extra CANCEL
- * rpc, what will make us slower. */
+ /* Cancel LRU locks here _only_ if the server supports
+ * EARLY_CANCEL. Otherwise we have to send extra CANCEL
+ * RPC, which will make us slower. */
if (avail > count)
count += ldlm_cancel_lru_local(ns, cancels, to_free,
avail - count, 0, flags);
}
/* Pack into the request @pack lock handles. */
ldlm_cli_cancel_list(cancels, pack, req, 0);
- /* Prepare and send separate cancel rpc for others. */
+ /* Prepare and send separate cancel RPC for others. */
ldlm_cli_cancel_list(cancels, count - pack, NULL, 0);
} else {
ldlm_lock_list_put(cancels, l_bl_ast, count);
}
EXPORT_SYMBOL(ldlm_prep_enqueue_req);
-/* If a request has some specific initialisation it is passed in @reqp,
+/**
+ * Client-side lock enqueue.
+ *
+ * If a request has some specific initialisation it is passed in \a reqp,
* otherwise it is created in ldlm_cli_enqueue.
*
- * Supports sync and async requests, pass @async flag accordingly. If a
+ * Supports sync and async requests, pass \a async flag accordingly. If a
* request was created in ldlm_cli_enqueue and it is the async request,
- * pass it to the caller in @reqp. */
+ * pass it to the caller in \a reqp.
+ */
int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
struct ldlm_enqueue_info *einfo,
const struct ldlm_res_id *res_id,
}
EXPORT_SYMBOL(ldlm_cli_convert);
-/* Cancel locks locally.
+/**
+ * Cancel locks locally.
* Returns:
- * LDLM_FL_LOCAL_ONLY if tere is no need in a CANCEL rpc to the server;
- * LDLM_FL_CANCELING otherwise;
- * LDLM_FL_BL_AST if there is a need in a separate CANCEL rpc. */
+ * \retval LDLM_FL_LOCAL_ONLY if there is no need for a CANCEL RPC to the server
+ * \retval LDLM_FL_CANCELING otherwise;
+ * \retval LDLM_FL_BL_AST if there is a need for a separate CANCEL RPC.
+ */
static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
{
__u64 rc = LDLM_FL_LOCAL_ONLY;
RETURN(rc);
}
-/* Pack @count locks in @head into ldlm_request buffer at the offset @off,
- of the request @req. */
+/**
+ * Pack \a count locks in \a head into ldlm_request buffer of request \a req.
+ */
static void ldlm_cancel_pack(struct ptlrpc_request *req,
cfs_list_t *head, int count)
{
EXIT;
}
-/* Prepare and send a batched cancel rpc, it will include count lock handles
- * of locks given in @head. */
+/**
+ * Prepare and send a batched cancel RPC. It will include \a count lock
+ * handles of locks given in \a cancels list. */
int ldlm_cli_cancel_req(struct obd_export *exp, cfs_list_t *cancels,
int count, ldlm_cancel_flags_t flags)
{
}
/**
- * Update client's obd pool related fields with new SLV and Limit from \a req.
+ * Update client's OBD pool related fields with new SLV and Limit from \a req.
*/
int ldlm_cli_update_pool(struct ptlrpc_request *req)
{
RETURN(0);
}
- /*
- * In some cases RPC may contain slv and limit zeroed out. This is
- * the case when server does not support lru resize feature. This is
- * also possible in some recovery cases when server side reqs have no
- * ref to obd export and thus access to server side namespace is no
- * possible.
- */
+ /* In some cases RPC may contain SLV and limit zeroed out. This
+ * is the case when server does not support LRU resize feature.
+ * This is also possible in some recovery cases when server-side
+ * reqs have no reference to the OBD export and thus access to
+ * server-side namespace is not possible. */
if (lustre_msg_get_slv(req->rq_repmsg) == 0 ||
lustre_msg_get_limit(req->rq_repmsg) == 0) {
DEBUG_REQ(D_HA, req, "Zero SLV or Limit found "
new_slv = lustre_msg_get_slv(req->rq_repmsg);
obd = req->rq_import->imp_obd;
- /*
- * Set new SLV and Limit to obd fields to make accessible for pool
- * thread. We do not access obd_namespace and pool directly here
- * as there is no reliable way to make sure that they are still
- * alive in cleanup time. Evil races are possible which may cause
- * oops in that time.
- */
+ /* Set new SLV and limit in OBD fields to make them accessible
+ * to the pool thread. We do not access obd_namespace and pool
+ * directly here as there is no reliable way to make sure that
+ * they are still alive at cleanup time. Evil races are possible
+ * which may cause Oops at that time. */
write_lock(&obd->obd_pool_lock);
obd->obd_pool_slv = new_slv;
obd->obd_pool_limit = new_limit;
}
EXPORT_SYMBOL(ldlm_cli_update_pool);
+/**
+ * Client side lock cancel.
+ *
+ * Lock must not have any readers or writers by this time.
+ */
int ldlm_cli_cancel(struct lustre_handle *lockh)
{
struct obd_export *exp;
LDLM_LOCK_RELEASE(lock);
RETURN(0);
}
- /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL
- * rpc which goes to canceld portal, so we can cancel other lru locks
- * here and send them all as one LDLM_CANCEL rpc. */
+ /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL
+ * RPC which goes to canceld portal, so we can cancel other LRU locks
+ * here and send them all as one LDLM_CANCEL RPC. */
LASSERT(cfs_list_empty(&lock->l_bl_ast));
cfs_list_add(&lock->l_bl_ast, &cancels);
}
EXPORT_SYMBOL(ldlm_cli_cancel);
-/* XXX until we will have compound requests and can cut cancels from generic rpc
- * we need send cancels with LDLM_FL_BL_AST flag as separate rpc */
+/**
+ * Locally cancel up to \a count locks in list \a cancels.
+ * Return the number of cancelled locks.
+ */
int ldlm_cli_cancel_list_local(cfs_list_t *cancels, int count,
ldlm_cancel_flags_t flags)
{
} else {
rc = ldlm_cli_cancel_local(lock);
}
- if (!(flags & LCF_BL_AST) && (rc == LDLM_FL_BL_AST)) {
- LDLM_DEBUG(lock, "Cancel lock separately");
- cfs_list_del_init(&lock->l_bl_ast);
- cfs_list_add(&lock->l_bl_ast, &head);
- bl_ast ++;
+ /* Until we have compound requests and can send LDLM_CANCEL
+ * requests batched with generic RPCs, we need to send cancels
+ * with the LDLM_FL_BL_AST flag in a separate RPC from
+ * the one being generated now. */
+ if (!(flags & LCF_BL_AST) && (rc == LDLM_FL_BL_AST)) {
+ LDLM_DEBUG(lock, "Cancel lock separately");
+ cfs_list_del_init(&lock->l_bl_ast);
+ cfs_list_add(&lock->l_bl_ast, &head);
+ bl_ast++;
continue;
}
if (rc == LDLM_FL_LOCAL_ONLY) {
LDLM_LOCK_RELEASE(lock);
count--;
}
-
}
if (bl_ast > 0) {
count -= bl_ast;
EXPORT_SYMBOL(ldlm_cli_cancel_list_local);
/**
- * Cancel as many locks as possible w/o sending any rpcs (e.g. to write back
- * dirty data, to close a file, ...) or waiting for any rpcs in-flight (e.g.
+ * Cancel as many locks as possible w/o sending any RPCs (e.g. to write back
+ * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g.
* readahead requests, ...)
*/
static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
}
/**
- * Callback function for lru-resize policy. Makes decision whether to keep
- * \a lock in LRU for current \a LRU size \a unused, added in current scan
- * \a added and number of locks to be preferably canceled \a count.
+ * Callback function for LRU-resize policy. Decides whether to keep
+ * \a lock in LRU for current \a LRU size \a unused, added in current
+ * scan \a added and number of locks to be preferably canceled \a count.
*
* \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
*
__u64 slv, lvf, lv;
cfs_time_t la;
- /*
- * Stop lru processing when we reached passed @count or checked all
- * locks in lru.
- */
+ /* Stop LRU processing when we reach past @count or have checked all
+ * locks in LRU. */
if (count && added >= count)
return LDLM_POLICY_KEEP_LOCK;
lvf = ldlm_pool_get_lvf(pl);
la = cfs_duration_sec(cfs_time_sub(cur,
lock->l_last_used));
+ lv = lvf * la * unused;
- /*
- * Stop when slv is not yet come from server or lv is smaller than
- * it is.
- */
- lv = lvf * la * unused;
+ /* Inform pool about current CLV to see it via proc. */
+ ldlm_pool_set_clv(pl, lv);
- /*
- * Inform pool about current CLV to see it via proc.
- */
- ldlm_pool_set_clv(pl, lv);
- return (slv == 0 || lv < slv) ?
- LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+ /* Stop when SLV is not yet come from server or lv is smaller than
+ * it is. */
+ return (slv == 0 || lv < slv) ?
+ LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
}
/**
* Callback function for proc used policy. Makes decision whether to keep
- * \a lock in LRU for current \a LRU size \a unused, added in current scan
- * \a added and number of locks to be preferably canceled \a count.
+ * \a lock in LRU for current \a LRU size \a unused, added in current scan \a
+ * added and number of locks to be preferably canceled \a count.
*
* \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
*
int unused, int added,
int count)
{
- /*
- * Stop lru processing when we reached passed @count or checked all
- * locks in lru.
- */
- return (added >= count) ?
- LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
+ /* Stop LRU processing when we reach past @count or have checked all
+ * locks in LRU. */
+ return (added >= count) ?
+ LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
}
/**
- * Callback function for aged policy. Makes decision whether to keep
- * \a lock in LRU for current \a LRU size \a unused, added in current scan
- * \a added and number of locks to be preferably canceled \a count.
+ * Callback function for aged policy. Makes decision whether to keep \a lock in
+ * LRU for current LRU size \a unused, added in current scan \a added and
+ * number of locks to be preferably canceled \a count.
*
* \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
*
* \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
*/
static ldlm_policy_res_t ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
{
- /*
- * Stop lru processing if young lock is found and we reached passed
- * @count.
- */
+ /* Stop LRU processing if young lock is found and we reach past count */
return ((added >= count) &&
cfs_time_before(cfs_time_current(),
cfs_time_add(lock->l_last_used,
}
/**
- * Callback function for default policy. Makes decision whether to keep
- * \a lock in LRU for current \a LRU size \a unused, added in current scan
- * \a added and number of locks to be preferably canceled \a count.
+ * Callback function for default policy. Makes decision whether to keep \a lock
+ * in LRU for current LRU size \a unused, added in current scan \a added and
+ * number of locks to be preferably canceled \a count.
*
* \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
*
* \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
*/
static ldlm_policy_res_t ldlm_cancel_default_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int count)
{
- /*
- * Stop lru processing when we reached passed @count or checked all
- * locks in lru.
- */
+ /* Stop LRU processing when we reach past count or have checked all
+ * locks in LRU. */
return (added >= count) ?
LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
}
return ldlm_cancel_default_policy;
}
-/* - Free space in lru for @count new locks,
+/**
+ * - Free space in LRU for \a count new locks,
* redundant unused locks are canceled locally;
* - also cancel locally unused aged locks;
- * - do not cancel more than @max locks;
- * - GET the found locks and add them into the @cancels list.
+ * - do not cancel more than \a max locks;
+ * - GET the found locks and add them into the \a cancels list.
*
* A client lock can be added to the l_bl_ast list only when it is
- * marked LDLM_FL_CANCELING. Otherwise, somebody is already doing CANCEL.
- * There are the following use cases: ldlm_cancel_resource_local(),
- * ldlm_cancel_lru_local() and ldlm_cli_cancel(), which check&set this
- * flag properly. As any attempt to cancel a lock rely on this flag,
- * l_bl_ast list is accessed later without any special locking.
+ * marked LDLM_FL_CANCELING. Otherwise, somebody is already doing
+ * CANCEL. There are the following use cases:
+ * ldlm_cancel_resource_local(), ldlm_cancel_lru_local() and
+ * ldlm_cli_cancel(), which check and set this flag properly. As any
+ * attempt to cancel a lock rely on this flag, l_bl_ast list is accessed
+ * later without any special locking.
*
- * Calling policies for enabled lru resize:
+ * Calling policies for enabled LRU resize:
* ----------------------------------------
- * flags & LDLM_CANCEL_LRUR - use lru resize policy (SLV from server) to
- * cancel not more than @count locks;
+ * flags & LDLM_CANCEL_LRUR - use LRU resize policy (SLV from server) to
+ * cancel not more than \a count locks;
*
- * flags & LDLM_CANCEL_PASSED - cancel @count number of old locks (located at
- * the beginning of lru list);
+ * flags & LDLM_CANCEL_PASSED - cancel \a count number of old locks (located at
+ * the beginning of LRU list);
*
- * flags & LDLM_CANCEL_SHRINK - cancel not more than @count locks according to
+ * flags & LDLM_CANCEL_SHRINK - cancel not more than \a count locks according to
* memory pressre policy function;
*
- * flags & LDLM_CANCEL_AGED - cancel alocks according to "aged policy".
+ * flags & LDLM_CANCEL_AGED - cancel \a count locks according to "aged policy".
*
* flags & LDLM_CANCEL_NO_WAIT - cancel as many unused locks as possible
* (typically before replaying locks) w/o
- * sending any rpcs or waiting for any
- * outstanding rpc to complete.
+ * sending any RPCs or waiting for any
+ * outstanding RPC to complete.
*/
static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
int count, int max, int flags)
break;
cfs_list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
- l_lru){
+ l_lru) {
/* No locks which got blocking requests. */
LASSERT(!(lock->l_flags & LDLM_FL_BL_AST));
/* already processed */
continue;
- /* Somebody is already doing CANCEL. No need in this
- * lock in lru, do not traverse it again. */
+ /* Somebody is already doing CANCEL. No need for this
+ * lock in LRU, do not traverse it again. */
if (!(lock->l_flags & LDLM_FL_CANCELING))
break;
spin_unlock(&ns->ns_lock);
lu_ref_add(&lock->l_reference, __FUNCTION__, cfs_current());
- /* Pass the lock through the policy filter and see if it
- * should stay in lru.
- *
- * Even for shrinker policy we stop scanning if
- * we find a lock that should stay in the cache.
- * We should take into account lock age anyway
- * as new lock even if it is small of weight is
- * valuable resource.
- *
- * That is, for shrinker policy we drop only
- * old locks, but additionally chose them by
- * their weight. Big extent locks will stay in
- * the cache. */
+ /* Pass the lock through the policy filter and see if it
+ * should stay in LRU.
+ *
+ * Even for shrinker policy we stop scanning if
+ * we find a lock that should stay in the cache.
+ * We should take into account lock age anyway
+ * as a new lock is a valuable resource even if
+ * it has a low weight.
+ *
+ * That is, for shrinker policy we drop only
+ * old locks, but additionally choose them by
+ * their weight. Big extent locks will stay in
+ * the cache. */
result = pf(ns, lock, unused, added, count);
if (result == LDLM_POLICY_KEEP_LOCK) {
lu_ref_del(&lock->l_reference,
/* Check flags again under the lock. */
if ((lock->l_flags & LDLM_FL_CANCELING) ||
(ldlm_lock_remove_from_lru(lock) == 0)) {
- /* other thread is removing lock from lru or
- * somebody is already doing CANCEL or
- * there is a blocking request which will send
- * cancel by itseft or the lock is matched
- * is already not unused. */
+ /* Another thread is removing lock from LRU, or
+ * somebody is already doing CANCEL, or there
+ * is a blocking request which will send cancel
+ * by itself, or the lock is no longer unused. */
unlock_res_and_lock(lock);
lu_ref_del(&lock->l_reference,
__FUNCTION__, cfs_current());
return ldlm_cli_cancel_list_local(cancels, added, cancel_flags);
}
-/* when called with LDLM_ASYNC the blocking callback will be handled
+/**
+ * Cancel at least \a nr locks from given namespace LRU.
+ *
+ * When called with LDLM_ASYNC the blocking callback will be handled
* in a thread and this function will return after the thread has been
- * asked to call the callback. when called with LDLM_SYNC the blocking
- * callback will be performed in this function. */
+ * asked to call the callback. When called with LDLM_SYNC the blocking
+ * callback will be performed in this function.
+ */
int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t mode,
int flags)
{
RETURN(0);
}
-/* Find and cancel locally unused locks found on resource, matched to the
- * given policy, mode. GET the found locks and add them into the @cancels
- * list. */
+/**
+ * Find and cancel locally unused locks found on resource, matched to the
+ * given policy, mode. GET the found locks and add them into the \a cancels
+ * list.
+ */
int ldlm_cancel_resource_local(struct ldlm_resource *res,
cfs_list_t *cancels,
ldlm_policy_data_t *policy,
if (lock->l_readers || lock->l_writers)
continue;
- /* If somebody is already doing CANCEL, or blocking ast came,
- * skip this lock. */
+ /* If somebody is already doing CANCEL, or blocking AST came,
+ * skip this lock. */
if (lock->l_flags & LDLM_FL_BL_AST ||
lock->l_flags & LDLM_FL_CANCELING)
continue;
}
EXPORT_SYMBOL(ldlm_cancel_resource_local);
-/* If @req is NULL, send CANCEL request to server with handles of locks
- * in the @cancels. If EARLY_CANCEL is not supported, send CANCEL requests
+/**
+ * Cancel client-side locks from a list and send/prepare cancel RPCs to the
+ * server.
+ * If \a req is NULL, send CANCEL request to server with handles of locks
+ * in the \a cancels. If EARLY_CANCEL is not supported, send CANCEL requests
* separately per lock.
- * If @req is not NULL, put handles of locks in @cancels into the request
- * buffer at the offset @off.
- * Destroy @cancels at the end. */
+ * If \a req is not NULL, put handles of locks in \a cancels into the request
+ * buffer at the offset \a off.
+ * Destroy \a cancels at the end.
+ */
int ldlm_cli_cancel_list(cfs_list_t *cancels, int count,
struct ptlrpc_request *req, ldlm_cancel_flags_t flags)
{
/* XXX: requests (both batched and not) could be sent in parallel.
* Usually it is enough to have just 1 RPC, but it is possible that
- * there are to many locks to be cancelled in LRU or on a resource.
+ * there are too many locks to be cancelled in LRU or on a resource.
* It would also speed up the case when the server does not support
* the feature. */
while (count > 0) {
}
EXPORT_SYMBOL(ldlm_cli_cancel_list);
+/**
+ * Cancel all locks on a resource that have 0 readers/writers.
+ *
+ * If flags & LDLM_FL_LOCAL_ONLY, throw the locks away without trying
+ * to notify the server. */
int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
const struct ldlm_res_id *res_id,
ldlm_policy_data_t *policy,
return 0;
}
-/* Cancel all locks on a namespace (or a specific resource, if given)
+/**
+ * Cancel all locks on a namespace (or a specific resource, if given)
* that have 0 readers/writers.
*
* If flags & LCF_LOCAL, throw the locks away without trying
ns->ns_max_unused = (unsigned int)tmp;
ldlm_cancel_lru(ns, 0, LDLM_ASYNC, LDLM_CANCEL_PASSED);
- /* Make sure that originally lru resize was supported before
- * turning it on here. */
+ /* Make sure that LRU resize was originally supported before
+ * turning it on here. */
if (lru_resize &&
(ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) {
CDEBUG(D_DLMTRACE,
},
};
+/**
+ * Create and initialize new empty namespace.
+ */
struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name,
ldlm_side_t client,
ldlm_appetite_t apt,
extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
-/* If flags contains FL_LOCAL_ONLY, don't try to tell the server, just cleanup.
- * This is currently only used for recovery, and we make certain assumptions
- * as a result--notably, that we shouldn't cancel locks with refs. -phil */
+/**
+ * Cancel and destroy all locks on a resource.
+ *
+ * If flags contains FL_LOCAL_ONLY, don't try to tell the server, just
+ * clean up. This is currently only used for recovery, and we make
+ * certain assumptions as a result--notably, that we shouldn't cancel
+ * locks with refs.
+ */
static void cleanup_resource(struct ldlm_resource *res, cfs_list_t *q,
__u64 flags)
{
do {
struct ldlm_lock *lock = NULL;
- /* first, we look for non-cleaned-yet lock
- * all cleaned locks are marked by CLEANED flag */
+ /* First, we look for non-cleaned-yet lock
+ * all cleaned locks are marked by CLEANED flag. */
lock_res(res);
cfs_list_for_each(tmp, q) {
lock = cfs_list_entry(tmp, struct ldlm_lock,
}
/* Set CBPENDING so nothing in the cancellation path
- * can match this lock */
+ * can match this lock. */
lock->l_flags |= LDLM_FL_CBPENDING;
lock->l_flags |= LDLM_FL_FAILED;
lock->l_flags |= flags;
return 0;
}
+/**
+ * Cancel and destroy all locks in the namespace.
+ *
+ * Typically used during evictions when server notified client that it was
+ * evicted and all of its state needs to be destroyed.
+ * Also used during shutdown.
+ */
int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags)
{
if (ns == NULL) {
}
EXPORT_SYMBOL(ldlm_namespace_cleanup);
+/**
+ * Attempts to free namespace.
+ *
+ * Only used when namespace goes away, like during an unmount.
+ */
static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force)
{
ENTRY;
}
/**
- * Performs various cleanups for passed \a ns to make it drop refc and be ready
- * for freeing. Waits for refc == 0.
+ * Performs various cleanups for passed \a ns to make it drop refc and be
+ * ready for freeing. Waits for refc == 0.
*
* The following is done:
- * (0) Unregister \a ns from its list to make inaccessible for potential users
- * like pools thread and others;
+ * (0) Unregister \a ns from its list to make inaccessible for potential
+ * users like pools thread and others;
* (1) Clear all locks in \a ns.
*/
void ldlm_namespace_free_prior(struct ldlm_namespace *ns,
}
/**
- * Performs freeing memory structures related to \a ns. This is only done when
- * ldlm_namespce_free_prior() successfully removed all resources referencing
- * \a ns and its refc == 0.
+ * Performs freeing memory structures related to \a ns. This is only done
+ * when ldlm_namespce_free_prior() successfully removed all resources
+ * referencing \a ns and its refc == 0.
*/
void ldlm_namespace_free_post(struct ldlm_namespace *ns)
{
return;
}
-
- /*
- * Make sure that nobody can find this ns in its list.
- */
- ldlm_namespace_unregister(ns, ns->ns_client);
- /*
- * Fini pool _before_ parent proc dir is removed. This is important as
- * ldlm_pool_fini() removes own proc dir which is child to @dir. Removing
- * it after @dir may cause oops.
- */
- ldlm_pool_fini(&ns->ns_pool);
-
- ldlm_namespace_proc_unregister(ns);
- cfs_hash_putref(ns->ns_rs_hash);
- /*
- * Namespace \a ns should be not on list in this time, otherwise this
- * will cause issues realted to using freed \a ns in pools thread.
- */
- LASSERT(cfs_list_empty(&ns->ns_list_chain));
- OBD_FREE_PTR(ns);
- ldlm_put_ref();
- EXIT;
+ /* Make sure that nobody can find this ns in its list. */
+ ldlm_namespace_unregister(ns, ns->ns_client);
+ /* Fini pool _before_ parent proc dir is removed. This is important as
+ * ldlm_pool_fini() removes own proc dir which is child to @dir.
+ * Removing it after @dir may cause oops. */
+ ldlm_pool_fini(&ns->ns_pool);
+
+ ldlm_namespace_proc_unregister(ns);
+ cfs_hash_putref(ns->ns_rs_hash);
+ /* Namespace \a ns should be not on list at this time, otherwise
+ * this will cause issues related to using freed \a ns in poold
+ * thread. */
+ LASSERT(cfs_list_empty(&ns->ns_list_chain));
+ OBD_FREE_PTR(ns);
+ ldlm_put_ref();
+ EXIT;
}
-
-/* Cleanup the resource, and free namespace.
+/**
+ * Cleanup the resource, and free namespace.
* bug 12864:
* Deadlock issue:
* proc1: destroy import
}
EXPORT_SYMBOL(ldlm_namespace_put);
-/* Register @ns in the list of namespaces */
+/** Register \a ns in the list of namespaces */
void ldlm_namespace_register(struct ldlm_namespace *ns, ldlm_side_t client)
{
mutex_lock(ldlm_namespace_lock(client));
mutex_unlock(ldlm_namespace_lock(client));
}
-/* Unregister @ns from the list of namespaces */
+/** Unregister \a ns from the list of namespaces. */
void ldlm_namespace_unregister(struct ldlm_namespace *ns, ldlm_side_t client)
{
mutex_lock(ldlm_namespace_lock(client));
LASSERT(!cfs_list_empty(&ns->ns_list_chain));
- /*
- * Some asserts and possibly other parts of code still using
- * list_empty(&ns->ns_list_chain). This is why it is important
- * to use list_del_init() here.
- */
+ /* Some asserts and possibly other parts of the code are still
+ * using list_empty(&ns->ns_list_chain). This is why it is
+ * important to use list_del_init() here. */
cfs_list_del_init(&ns->ns_list_chain);
cfs_atomic_dec(ldlm_namespace_nr(client));
mutex_unlock(ldlm_namespace_lock(client));
}
-/* Should be called under ldlm_namespace_lock(client) taken */
+/** Should be called with ldlm_namespace_lock(client) taken. */
void ldlm_namespace_move_locked(struct ldlm_namespace *ns, ldlm_side_t client)
{
LASSERT(!cfs_list_empty(&ns->ns_list_chain));
cfs_list_move_tail(&ns->ns_list_chain, ldlm_namespace_list(client));
}
-/* Should be called under ldlm_namespace_lock(client) taken */
+/** Should be called with ldlm_namespace_lock(client) taken. */
struct ldlm_namespace *ldlm_namespace_first_locked(ldlm_side_t client)
{
LASSERT_MUTEX_LOCKED(ldlm_namespace_lock(client));
struct ldlm_namespace, ns_list_chain);
}
+/** Create and initialize new resource. */
static struct ldlm_resource *ldlm_resource_new(void)
{
struct ldlm_resource *res;
CFS_INIT_LIST_HEAD(&res->lr_converting);
CFS_INIT_LIST_HEAD(&res->lr_waiting);
- /* initialize interval trees for each lock mode*/
+ /* Initialize interval trees for each lock mode. */
for (idx = 0; idx < LCK_MODE_NUM; idx++) {
res->lr_itree[idx].lit_size = 0;
res->lr_itree[idx].lit_mode = 1 << idx;
spin_lock_init(&res->lr_lock);
lu_ref_init(&res->lr_reference);
- /* one who creates the resource must unlock
- * the mutex after lvb initialization */
+ /* The creator of the resource must unlock the mutex after LVB
+ * initialization. */
mutex_init(&res->lr_lvb_mutex);
mutex_lock(&res->lr_lvb_mutex);
return res;
}
-/* Args: unlocked namespace
- * * Locks: takes and releases NS hash-lock and res->lr_lock
- * * Returns: referenced, unlocked ldlm_resource or NULL */
+/**
+ * Return a reference to resource with given name, creating it if necessary.
+ * Args: namespace with ns_lock unlocked
+ * Locks: takes and releases NS hash-lock and res->lr_lock
+ * Returns: referenced, unlocked ldlm_resource or NULL
+ */
struct ldlm_resource *
ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
const struct ldlm_res_id *name, ldlm_type_t type, int create)
if (hnode != NULL) {
cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
res = cfs_hlist_entry(hnode, struct ldlm_resource, lr_hash);
- /* synchronize WRT resource creation */
+ /* Synchronize with regard to resource creation. */
if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
mutex_lock(&res->lr_lvb_mutex);
mutex_unlock(&res->lr_lvb_mutex);
cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name);
if (hnode != NULL) {
- /* someone won the race and added the resource before */
- cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
- /* clean lu_ref for failed resource */
+ /* Someone won the race and already added the resource. */
+ cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
+ /* Clean lu_ref for failed resource. */
lu_ref_fini(&res->lr_reference);
/* We have taken lr_lvb_mutex. Drop it. */
mutex_unlock(&res->lr_lvb_mutex);
OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res);
- res = cfs_hlist_entry(hnode, struct ldlm_resource, lr_hash);
- /* synchronize WRT resource creation */
- if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
+ res = cfs_hlist_entry(hnode, struct ldlm_resource, lr_hash);
+ /* Synchronize with regard to resource creation. */
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
mutex_lock(&res->lr_lvb_mutex);
mutex_unlock(&res->lr_lvb_mutex);
- }
- return res;
- }
- /* we won! let's add the resource */
+ }
+ return res;
+ }
+ /* We won! Let's add the resource. */
cfs_hash_bd_add_locked(ns->ns_rs_hash, &bd, &res->lr_hash);
if (cfs_hash_bd_count_get(&bd) == 1)
ldlm_namespace_get(ns);
LPU64": rc %d\n", name->name[0], rc);
}
- /* we create resource with locked lr_lvb_mutex */
+ /* We create resource with locked lr_lvb_mutex. */
mutex_unlock(&res->lr_lvb_mutex);
return res;
return 0;
}
+/**
+ * Add a lock into a given resource into specified lock list.
+ */
void ldlm_resource_add_lock(struct ldlm_resource *res, cfs_list_t *head,
struct ldlm_lock *lock)
{
cfs_list_add_tail(&lock->l_res_link, head);
}
+/**
+ * Insert a lock into resource after specified lock.
+ *
+ * Obtain resource description from the lock we are inserting after.
+ */
void ldlm_resource_insert_lock_after(struct ldlm_lock *original,
struct ldlm_lock *new)
{
desc->lr_name = res->lr_name;
}
+/**
+ * Print information about all locks in all namespaces on this node to debug
+ * log.
+ */
void ldlm_dump_all_namespaces(ldlm_side_t client, int level)
{
cfs_list_t *tmp;
return 0;
}
+/**
+ * Print information about all locks in this namespace on this node to debug
+ * log.
+ */
void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
{
if (!((libcfs_debug | D_ERROR) & level))
}
EXPORT_SYMBOL(ldlm_namespace_dump);
+/**
+ * Print information about all locks in this resource to debug log.
+ */
void ldlm_resource_dump(int level, struct ldlm_resource *res)
{
struct ldlm_lock *lock;