*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*/
/** \defgroup LDLM Lustre Distributed Lock Manager
* client shows interest in that lock, e.g. glimpse is occured. */
#define LDLM_DIRTY_AGE_LIMIT (10)
#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
+#define LDLM_DEFAULT_LRU_SHRINK_BATCH (16)
+#define LDLM_DEFAULT_SLV_RECALC_PCT (10)
/**
* LDLM non-error return states
*
*/
+/* Cancel lru flag, it indicates we cancel aged locks. */
+enum ldlm_lru_flags {
+ LDLM_LRU_FLAG_NO_WAIT = 0x1, /* Cancel locks w/o blocking (neither
+ * sending nor waiting for any RPCs) */
+ LDLM_LRU_FLAG_CLEANUP = 0x2, /* Used when clearing lru, tells
+ * prepare_lru_list to set discard flag
+ * on PR extent locks so we don't waste
+ * time saving pages that will be
+ * discarded momentarily */
+};
+
struct ldlm_pool;
struct ldlm_lock;
struct ldlm_resource;
*/
struct ldlm_pool_ops {
/** Recalculate pool \a pl usage */
- int (*po_recalc)(struct ldlm_pool *pl);
+ int (*po_recalc)(struct ldlm_pool *pl, bool force);
/** Cancel at least \a nr locks from pool \a pl */
int (*po_shrink)(struct ldlm_pool *pl, int nr, gfp_t gfp_mask);
int (*po_setup)(struct ldlm_pool *pl, int limit);
__u64 pl_server_lock_volume;
/** Current biggest client lock volume. Protected by pl_lock. */
__u64 pl_client_lock_volume;
- /** Lock volume factor. SLV on client is calculated as following:
- * server_slv * lock_volume_factor. */
+ /** Lock volume factor, shown in percents in procfs, but internally
+ * Client SLV calculated as: server_slv * lock_volume_factor >> 8.
+ */
atomic_t pl_lock_volume_factor;
/** Time when last SLV from server was obtained. */
time64_t pl_recalc_time;
* of ldlm_[res_]lvbo_[init,update,fill]() functions.
*/
struct ldlm_valblock_ops {
- int (*lvbo_init)(const struct lu_env *env, struct ldlm_resource *res);
- int (*lvbo_update)(const struct lu_env *env, struct ldlm_resource *res,
- struct ldlm_lock *lock, struct ptlrpc_request *r,
- int increase);
+ int (*lvbo_init)(struct ldlm_resource *res);
+ int (*lvbo_update)(struct ldlm_resource *res, struct ldlm_lock *lock,
+ struct ptlrpc_request *r, int increase);
int (*lvbo_free)(struct ldlm_resource *res);
/* Return size of lvb data appropriate RPC size can be reserved */
int (*lvbo_size)(struct ldlm_lock *lock);
/* Called to fill in lvb data to RPC buffer @buf */
- int (*lvbo_fill)(const struct lu_env *env, struct ldlm_lock *lock,
- void *buf, int *buflen);
+ int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int *buflen);
};
/**
* Greedy means release cached locks aggressively
*/
enum ldlm_appetite {
- LDLM_NAMESPACE_GREEDY = 1 << 0,
- LDLM_NAMESPACE_MODEST = 1 << 1
+ LDLM_NAMESPACE_GREEDY = BIT(0),
+ LDLM_NAMESPACE_MODEST = BIT(1),
};
/**
* Which res in the bucket should we start with the reclaim.
*/
int nsb_reclaim_start;
+ /* counter of entries in this bucket */
+ atomic_t nsb_count;
};
enum {
LDLM_NS_TYPE_MGT, /**< MGT namespace */
};
+enum ldlm_namespace_flags {
+ /**
+ * Flag to indicate the LRU cancel is in progress.
+ * Used to limit the process by 1 thread only.
+ */
+ LDLM_LRU_CANCEL = 0
+};
+
/**
* LDLM Namespace.
*
/** Resource hash table for namespace. */
struct cfs_hash *ns_rs_hash;
+ struct ldlm_ns_bucket *ns_rs_buckets;
+ unsigned int ns_bucket_bits;
/** serialize */
spinlock_t ns_lock;
*/
unsigned int ns_max_unused;
- /** Maximum allowed age (last used time) for locks in the LRU */
+ /**
+ * Cancel batch, if unused lock count exceed lru_size
+ * Only be used if LRUR disable.
+ */
+ unsigned int ns_cancel_batch;
+
+ /**
+ * How much the SLV should decrease in %% to trigger LRU cancel urgently.
+ */
+ unsigned int ns_recalc_pct;
+
+ /** Maximum allowed age (last used time) for locks in the LRU. Set in
+ * seconds from userspace, but stored in ns to avoid repeat conversions.
+ */
ktime_t ns_max_age;
/**
*/
unsigned int ns_timeouts;
/**
- * Number of seconds since the file change time after which the
- * MDT will return an UPDATE lock along with a LOOKUP lock.
+ * Number of seconds since the file change time after which
+ * the MDT will return an UPDATE lock along with a LOOKUP lock.
* This allows the client to start caching negative dentries
* for a directory and may save an RPC for a later stat.
*/
- time64_t ns_ctime_age_limit;
+ timeout_t ns_ctime_age_limit;
/**
- * Number of seconds since the lock was last used. The client may
- * cancel the lock limited by this age and flush related data if
- * any other client shows interest in it doing glimpse request.
- * This allows to cache stat data locally for such files early.
+ * Number of (nano)seconds since the lock was last used. The client
+ * may cancel the lock older than this age and flush related data if
+ * another client shows interest in this lock by doing glimpse request.
+ * This allows to cache stat data locally for such files early. Set in
+ * seconds from userspace, but stored in ns to avoid repeat conversions.
*/
- time64_t ns_dirty_age_limit;
+ ktime_t ns_dirty_age_limit;
/**
* Used to rate-limit ldlm_namespace_dump calls.
* \see ldlm_namespace_dump. Increased by 10 seconds every time
* The resources in this namespace remember contended state during
* \a ns_contention_time, in seconds.
*/
- time64_t ns_contention_time;
+ timeout_t ns_contention_time;
/**
* Limit size of contended extent locks, in bytes.
* Flag to indicate namespace is being freed. Used to determine if
* recalculation of LDLM pool statistics should be skipped.
*/
- unsigned ns_stopping:1;
+ unsigned ns_stopping:1,
+
+ /**
+ * Flag to indicate the LRU recalc on RPC reply is in progress.
+ * Used to limit the process by 1 thread only.
+ */
+ ns_rpc_recalc:1;
/**
* Which bucket should we start with the lock reclaim.
struct kobject ns_kobj; /* sysfs object */
struct completion ns_kobj_unregister;
+
+ /**
+ * To avoid another ns_lock usage, a separate bitops field.
+ */
+ unsigned long ns_flags;
};
/**
static inline int ns_is_client(struct ldlm_namespace *ns)
{
LASSERT(ns != NULL);
- LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT |
- LDLM_NAMESPACE_SERVER)));
LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT ||
ns->ns_client == LDLM_NAMESPACE_SERVER);
return ns->ns_client == LDLM_NAMESPACE_CLIENT;
static inline int ns_is_server(struct ldlm_namespace *ns)
{
LASSERT(ns != NULL);
- LASSERT(!(ns->ns_client & ~(LDLM_NAMESPACE_CLIENT |
- LDLM_NAMESPACE_SERVER)));
LASSERT(ns->ns_client == LDLM_NAMESPACE_CLIENT ||
ns->ns_client == LDLM_NAMESPACE_SERVER);
return ns->ns_client == LDLM_NAMESPACE_SERVER;
/** Type for glimpse callback function of a lock. */
typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data);
+/** Type for created callback function of a lock. */
+typedef void (*ldlm_created_callback)(struct ldlm_lock *lock);
+
/** Work list for sending GL ASTs to multiple locks. */
struct ldlm_glimpse_work {
struct ldlm_lock *gl_lock; /* lock to glimpse */
struct interval_node *lit_root; /* actual ldlm_interval */
};
+/**
+ * Lists of waiting locks for each inodebit type.
+ * A lock can be in several liq_waiting lists and it remains in lr_waiting.
+ */
+struct ldlm_ibits_queues {
+ struct list_head liq_waiting[MDS_INODELOCK_NUMBITS];
+};
+
+struct ldlm_ibits_node {
+ struct list_head lin_link[MDS_INODELOCK_NUMBITS];
+ struct ldlm_lock *lock;
+};
+
/** Whether to track references to exports by LDLM locks. */
#define LUSTRE_TRACKS_LOCK_EXP_REFS (0)
LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */
LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */
LCF_BL_AST = 0x4, /* Cancel LDLM_FL_BL_AST locks in the same RPC */
- LCF_CONVERT = 0x8, /* Try to convert IBITS lock before cancel */
};
struct ldlm_flock {
*/
struct portals_handle l_handle;
/**
- * Lock reference count.
- * This is how many users have pointers to actual structure, so that
- * we do not accidentally free lock structure that is in use.
- */
- atomic_t l_refc;
- /**
- * Internal spinlock protects l_resource. We should hold this lock
- * first before taking res_lock.
- */
- spinlock_t l_lock;
- /**
* Pointer to actual resource this lock is in.
- * ldlm_lock_change_resource() can change this.
+ * ldlm_lock_change_resource() can change this on the client.
+ * When this is possible, rcu must be used to stablise
+ * the resource while we lock and check it hasn't been changed.
*/
struct ldlm_resource *l_resource;
/**
*/
struct list_head l_res_link;
/**
- * Tree node for ldlm_extent.
+ * Internal structures per lock type..
*/
- struct ldlm_interval *l_tree_node;
+ union {
+ struct ldlm_interval *l_tree_node;
+ struct ldlm_ibits_node *l_ibits_node;
+ };
/**
* Per export hash of locks.
* Protected by per-bucket exp->exp_lock_hash locks.
* under this lock.
* \see ost_rw_prolong_locks
*/
- time64_t l_callback_timeout;
+ time64_t l_callback_timestamp;
/** Local PID of process which created this lock. */
__u32 l_pid;
struct list_head l_exp_list;
};
+enum ldlm_match_flags {
+ LDLM_MATCH_UNREF = BIT(0),
+ LDLM_MATCH_AST = BIT(1),
+ LDLM_MATCH_AST_ANY = BIT(2),
+ LDLM_MATCH_RIGHT = BIT(3),
+};
+
+/**
+ * Describe the overlap between two locks. itree_overlap_cb data.
+ */
+struct ldlm_match_data {
+ struct ldlm_lock *lmd_old;
+ struct ldlm_lock *lmd_lock;
+ enum ldlm_mode *lmd_mode;
+ union ldlm_policy_data *lmd_policy;
+ __u64 lmd_flags;
+ __u64 lmd_skip_flags;
+ enum ldlm_match_flags lmd_match;
+};
+
/** For uncommitted cross-MDT lock, store transno this lock belongs to */
#define l_transno l_client_cookie
/**
* List item for list in namespace hash.
- * protected by ns_lock
+ * protected by ns_lock.
+ * Shared with linkage for RCU-delayed free.
*/
- struct hlist_node lr_hash;
+ union {
+ struct hlist_node lr_hash;
+ struct rcu_head lr_rcu;
+ };
/** Reference count for this resource */
atomic_t lr_refcount;
/** Resource name */
struct ldlm_res_id lr_name;
- /**
- * Interval trees (only for extent locks) for all modes of this resource
- */
- struct ldlm_interval_tree *lr_itree;
+ union {
+ /**
+ * Interval trees (only for extent locks) for all modes of
+ * this resource
+ */
+ struct ldlm_interval_tree *lr_itree;
+ struct ldlm_ibits_queues *lr_ibits_queues;
+ };
union {
/**
struct lu_ref lr_reference;
};
+static inline int ldlm_is_granted(struct ldlm_lock *lock)
+{
+ return lock->l_req_mode == lock->l_granted_mode;
+}
+
static inline bool ldlm_has_layout(struct ldlm_lock *lock)
{
return lock->l_resource->lr_type == LDLM_IBITS &&
return &lock->l_resource->lr_ns_bucket->nsb_at_estimate;
}
-static inline int ldlm_lvbo_init(const struct lu_env *env,
- struct ldlm_resource *res)
+static inline int ldlm_lvbo_init(struct ldlm_resource *res)
{
struct ldlm_namespace *ns = ldlm_res_to_ns(res);
int rc = 0;
mutex_unlock(&res->lr_lvb_mutex);
return 0;
}
- rc = ns->ns_lvbo->lvbo_init(env, res);
+ rc = ns->ns_lvbo->lvbo_init(res);
if (rc < 0) {
CDEBUG(D_DLMTRACE, "lvbo_init failed for resource : rc = %d\n",
rc);
return 0;
}
-static inline int ldlm_lvbo_fill(const struct lu_env *env,
- struct ldlm_lock *lock, void *buf, int *len)
+static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int *len)
{
struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
int rc;
if (ns->ns_lvbo != NULL) {
LASSERT(ns->ns_lvbo->lvbo_fill != NULL);
/* init lvb now if not already */
- rc = ldlm_lvbo_init(env, lock->l_resource);
+ rc = ldlm_lvbo_init(lock->l_resource);
if (rc < 0) {
- CERROR("lock %p: delayed lvb init failed (rc %d)",
+ CERROR("lock %p: delayed lvb init failed (rc %d)\n",
lock, rc);
return rc;
}
- return ns->ns_lvbo->lvbo_fill(env, lock, buf, len);
+ return ns->ns_lvbo->lvbo_fill(lock, buf, len);
}
return 0;
}
void *ei_cb_local_bl; /** blocking local lock callback */
void *ei_cb_cp; /** lock completion callback */
void *ei_cb_gl; /** lock glimpse callback */
+ ldlm_created_callback ei_cb_created; /** lock created callback */
void *ei_cbdata; /** Data to be passed into callbacks. */
void *ei_namespace; /** lock namespace **/
u64 ei_inodebits; /** lock inode bits **/
unsigned int ei_enq_slave:1; /** whether enqueue slave stripes */
+ unsigned int ei_enq_slot:1; /** whether acquire rpc slot */
};
#define ei_res_id ei_cb_gl
-extern struct obd_ops ldlm_obd_ops;
-
extern char *ldlm_lockname[];
extern char *ldlm_typename[];
extern const char *ldlm_it2str(enum ldlm_intent_flags it);
enum ldlm_error *err,
struct list_head *work_list);
+typedef int (*ldlm_reprocessing_policy)(struct ldlm_resource *res,
+ struct list_head *queue,
+ struct list_head *work_list,
+ enum ldlm_process_intention intention,
+ __u64 hint);
+
/**
* Return values for lock iterators.
* Also used during deciding of lock grants and cancellations.
struct ldlm_res_id lpa_resid;
struct ldlm_extent lpa_extent;
enum ldlm_mode lpa_mode;
- time64_t lpa_timeout;
+ timeout_t lpa_timeout;
int lpa_locks_cnt;
int lpa_blocks_cnt;
};
/** @} ldlm_handlers */
void ldlm_revoke_export_locks(struct obd_export *exp);
-time64_t ldlm_bl_timeout(struct ldlm_lock *lock);
+timeout_t ldlm_bl_timeout(struct ldlm_lock *lock);
#endif
int ldlm_del_waiting_lock(struct ldlm_lock *lock);
-int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, time64_t timeout);
+int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, timeout_t timeout);
int ldlm_get_ref(void);
void ldlm_put_ref(void);
int ldlm_init_export(struct obd_export *exp);
/* ldlm_lock.c */
#ifdef HAVE_SERVER_SUPPORT
ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res);
+ldlm_reprocessing_policy
+ldlm_get_reprocessing_policy(struct ldlm_resource *res);
#endif
void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg);
void ldlm_lock2handle(const struct ldlm_lock *lock,
}
#define LDLM_LOCK_REF_DEL(lock) \
- lu_ref_del(&lock->l_reference, "handle", current)
+ lu_ref_del(&lock->l_reference, "handle", lock)
static inline struct ldlm_lock *
ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags)
* Update Lock Value Block Operations (LVBO) on a resource taking into account
* data from request \a r
*/
-static inline int ldlm_lvbo_update(const struct lu_env *env,
- struct ldlm_resource *res,
+static inline int ldlm_lvbo_update(struct ldlm_resource *res,
struct ldlm_lock *lock,
struct ptlrpc_request *req, int increase)
{
int rc;
/* delayed lvb init may be required */
- rc = ldlm_lvbo_init(env, res);
+ rc = ldlm_lvbo_init(res);
if (rc < 0) {
CERROR("delayed lvb init failed (rc %d)\n", rc);
return rc;
}
if (ns->ns_lvbo && ns->ns_lvbo->lvbo_update)
- return ns->ns_lvbo->lvbo_update(env, res, lock, req, increase);
+ return ns->ns_lvbo->lvbo_update(res, lock, req, increase);
return 0;
}
-static inline int ldlm_res_lvbo_update(const struct lu_env *env,
- struct ldlm_resource *res,
+static inline int ldlm_res_lvbo_update(struct ldlm_resource *res,
struct ptlrpc_request *req,
int increase)
{
- return ldlm_lvbo_update(env, res, NULL, req, increase);
+ return ldlm_lvbo_update(res, NULL, req, increase);
}
+int is_granted_or_cancelled_nolock(struct ldlm_lock *lock);
+
int ldlm_error2errno(enum ldlm_error error);
enum ldlm_error ldlm_errno2error(int err_no); /* don't call it `errno': this
* confuses user-space. */
void ldlm_lock_fail_match(struct ldlm_lock *lock);
void ldlm_lock_allow_match(struct ldlm_lock *lock);
void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
-enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
- const struct ldlm_res_id *, enum ldlm_type type,
- union ldlm_policy_data *, enum ldlm_mode mode,
- struct lustre_handle *, int unref);
+
+enum ldlm_mode ldlm_lock_match_with_skip(struct ldlm_namespace *ns,
+ __u64 flags, __u64 skip_flags,
+ const struct ldlm_res_id *res_id,
+ enum ldlm_type type,
+ union ldlm_policy_data *policy,
+ enum ldlm_mode mode,
+ struct lustre_handle *lh,
+ enum ldlm_match_flags match_flags);
+static inline enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns,
+ __u64 flags,
+ const struct ldlm_res_id *res_id,
+ enum ldlm_type type,
+ union ldlm_policy_data *policy,
+ enum ldlm_mode mode,
+ struct lustre_handle *lh)
+{
+ return ldlm_lock_match_with_skip(ns, flags, 0, res_id, type, policy,
+ mode, lh, 0);
+}
+struct ldlm_lock *search_itree(struct ldlm_resource *res,
+ struct ldlm_match_data *data);
enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
__u64 *bits);
void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode);
void ldlm_lock_cancel(struct ldlm_lock *lock);
-void ldlm_reprocess_all(struct ldlm_resource *res);
+void ldlm_reprocess_all(struct ldlm_resource *res, __u64 hint);
void ldlm_reprocess_recovery_done(struct ldlm_namespace *ns);
void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh);
void ldlm_unlink_lock_skiplist(struct ldlm_lock *req);
if (op != 0)
lprocfs_counter_incr(srv_stats, op);
-
- return;
}
/* resource.c - internal */
} while (0)
/* ldlm_request.c */
-int ldlm_expired_completion_wait(void *data);
/** \defgroup ldlm_local_ast Default AST handlers for local locks
* These AST handlers are typically used for server-side local locks and are
* also used by client-side lock handlers to perform minimum level base
const struct ldlm_request *dlm_req,
const struct ldlm_callback_suite *cbs);
int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
- enum ldlm_type type, __u8 with_policy,
- enum ldlm_mode mode, __u64 *flags, void *lvb,
- __u32 lvb_len,
+ struct ldlm_enqueue_info *einfo, __u8 with_policy,
+ __u64 *flags, void *lvb, __u32 lvb_len,
const struct lustre_handle *lockh, int rc);
int ldlm_cli_enqueue_local(const struct lu_env *env,
struct ldlm_namespace *ns,
void *data, __u32 lvb_len, enum lvb_type lvb_type,
const __u64 *client_cookie,
struct lustre_handle *lockh);
-int ldlm_cli_convert(struct ldlm_lock *lock, __u32 *flags);
+int ldlm_cli_convert_req(struct ldlm_lock *lock, __u32 *flags, __u64 new_bits);
+int ldlm_cli_convert(struct ldlm_lock *lock,
+ enum ldlm_cancel_flags cancel_flags);
int ldlm_cli_update_pool(struct ptlrpc_request *req);
int ldlm_cli_cancel(const struct lustre_handle *lockh,
enum ldlm_cancel_flags cancel_flags);
enum ldlm_cancel_flags flags);
int ldlm_inodebits_drop(struct ldlm_lock *lock, __u64 to_drop);
-int ldlm_cli_dropbits(struct ldlm_lock *lock, __u64 drop_bits);
-int ldlm_cli_dropbits_list(struct list_head *converts, __u64 drop_bits);
+int ldlm_cli_inodebits_convert(struct ldlm_lock *lock,
+ enum ldlm_cancel_flags cancel_flags);
/** @} ldlm_cli_api */
+extern unsigned int ldlm_enqueue_min;
+
/* mds/handler.c */
/* This has to be here because recursive inclusion sucks. */
int intent_disposition(struct ldlm_reply *rep, int flag);
int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask);
void ldlm_pool_fini(struct ldlm_pool *pl);
int ldlm_pool_setup(struct ldlm_pool *pl, int limit);
-time64_t ldlm_pool_recalc(struct ldlm_pool *pl);
+time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force);
__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl);
__u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
__u64 ldlm_pool_get_clv(struct ldlm_pool *pl);