Rename variables of ldlm_watermark_low & ldlm_watermark_high to
ldlm_reclaim_threshold & ldlm_lock_limit, and introduce two
additional variables of ldlm_reclaim_threshold_mb and
ldlm_lock_limit_mb for proc interface, so that we needn't to
convert the MB to locknr each time;
Adjust the proc name from watermark_low_mb & watermark_high_mb to
lock_reclaim_threshold_mb & lock_limit_mb;
Remove the misleading comment at the top of ldlm_reclaim.c, add more
comments to functions in ldlm_reclaim.c;
Export the total granted locks via proc for debug purpose;
Signed-off-by: Niu Yawei <yawei.niu@intel.com>
Change-Id: I161e088513ba2319cfdfe0d703512b4d9eeeebf1
Reviewed-on: http://review.whamcloud.com/16123
Tested-by: Jenkins
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
union ldlm_wire_policy_data *wpolicy);
/* ldlm_reclaim.c */
union ldlm_wire_policy_data *wpolicy);
/* ldlm_reclaim.c */
-extern __u64 ldlm_watermark_low;
-extern __u64 ldlm_watermark_high;
+#ifdef HAVE_SERVER_SUPPORT
+extern __u64 ldlm_reclaim_threshold;
+extern __u64 ldlm_lock_limit;
+extern __u64 ldlm_reclaim_threshold_mb;
+extern __u64 ldlm_lock_limit_mb;
+extern struct percpu_counter ldlm_granted_total;
+#endif
int ldlm_reclaim_setup(void);
void ldlm_reclaim_cleanup(void);
void ldlm_reclaim_add(struct ldlm_lock *lock);
int ldlm_reclaim_setup(void);
void ldlm_reclaim_cleanup(void);
void ldlm_reclaim_add(struct ldlm_lock *lock);
/*
* To avoid ldlm lock exhausting server memory, two global parameters:
/*
* To avoid ldlm lock exhausting server memory, two global parameters:
- * ldlm_watermark_low & ldlm_watermark_high are used for reclaiming
+ * ldlm_reclaim_threshold & ldlm_lock_limit are used for reclaiming
* granted locks and rejecting incoming enqueue requests defensively.
*
* granted locks and rejecting incoming enqueue requests defensively.
*
- * ldlm_watermark_low: When the amount of granted locks reaching this
+ * ldlm_reclaim_threshold: When the amount of granted locks reaching this
* threshold, server start to revoke locks gradually.
*
* threshold, server start to revoke locks gradually.
*
- * ldlm_watermark_high: When the amount of granted locks reaching this
+ * ldlm_lock_limit: When the amount of granted locks reaching this
* threshold, server will return -EINPROGRESS to any incoming enqueue
* request until the lock count is shrunk below the threshold again.
*
* threshold, server will return -EINPROGRESS to any incoming enqueue
* request until the lock count is shrunk below the threshold again.
*
- * ldlm_watermark_low & ldlm_watermark_high is set to 20% & 30% of the
+ * ldlm_reclaim_threshold & ldlm_lock_limit is set to 20% & 30% of the
* total memory by default. It is tunable via proc entry, when it's set
* to 0, the feature is disabled.
*/
* total memory by default. It is tunable via proc entry, when it's set
* to 0, the feature is disabled.
*/
-/*
- * FIXME:
- *
- * In current implementation, server identifies which locks should be
- * revoked by choosing locks from namespace/resource in a roundrobin
- * manner, which isn't optimal. The ideal way should be server notifies
- * clients to cancel locks voluntarily, because only client knows exactly
- * when the lock is last used.
- *
- * However how to notify client immediately is a problem, one idea
- * is to leverage the glimplse callbacks on some artificial global
- * lock (like quota global lock does), but that requires protocol
- * changes, let's fix it in future long-term solution.
- */
+#ifdef HAVE_SERVER_SUPPORT
-__u64 ldlm_watermark_low;
-__u64 ldlm_watermark_high;
+/* Lock count is stored in ldlm_reclaim_threshold & ldlm_lock_limit */
+__u64 ldlm_reclaim_threshold;
+__u64 ldlm_lock_limit;
-#ifdef HAVE_SERVER_SUPPORT
+/* Represents ldlm_reclaim_threshold & ldlm_lock_limit in MB, used for
+ * proc interface. */
+__u64 ldlm_reclaim_threshold_mb;
+__u64 ldlm_lock_limit_mb;
-static struct percpu_counter ldlm_granted_total;
+struct percpu_counter ldlm_granted_total;
static atomic_t ldlm_nr_reclaimer;
static cfs_duration_t ldlm_last_reclaim_age;
static cfs_time_t ldlm_last_reclaim_time;
static atomic_t ldlm_nr_reclaimer;
static cfs_duration_t ldlm_last_reclaim_age;
static cfs_time_t ldlm_last_reclaim_time;
+/**
+ * Callback function for revoking locks from certain resource.
+ *
+ * \param [in] hs ns_rs_hash
+ * \param [in] bd current bucket of ns_rsh_hash
+ * \param [in] hnode hnode of the resource
+ * \param [in] arg opaque data
+ *
+ * \retval 0 continue the scan
+ * \retval 1 stop the iteration
+ */
static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
struct hlist_node *hnode, void *arg)
static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
struct hlist_node *hnode, void *arg)
+/**
+ * Revoke locks from the resources of a namespace in a roundrobin
+ * manner.
+ *
+ * \param[in] ns namespace to do the lock revoke on
+ * \param[in] count count of lock to be revoked
+ * \param[in] age only revoke locks older than the 'age'
+ * \param[in] skip scan from the first lock on resource if the
+ * 'skip' is false, otherwise, continue scan
+ * from the last scanned position
+ * \param[out] count count of lock still to be revoked
+ */
static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
cfs_duration_t age, bool skip)
{
static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
cfs_duration_t age, bool skip)
{
+/**
+ * Revoke certain amount of locks from all the server namespaces
+ * in a roundrobin manner. Lock age is used to avoid reclaim on
+ * the non-aged locks.
+ */
static void ldlm_reclaim_ns(void)
{
struct ldlm_namespace *ns;
static void ldlm_reclaim_ns(void)
{
struct ldlm_namespace *ns;
percpu_counter_sub(&ldlm_granted_total, 1);
}
percpu_counter_sub(&ldlm_granted_total, 1);
}
+/**
+ * Check on the total granted locks: return true if it reaches the
+ * high watermark (ldlm_lock_limit), otherwise return false; It also
+ * triggers lock reclaim if the low watermark (ldlm_reclaim_threshold)
+ * is reached.
+ *
+ * \retval true high watermark reached.
+ * \retval false high watermark not reached.
+ */
bool ldlm_reclaim_full(void)
{
bool ldlm_reclaim_full(void)
{
- __u64 high = ldlm_watermark_high;
- __u64 low = ldlm_watermark_low;
+ __u64 high = ldlm_lock_limit;
+ __u64 low = ldlm_reclaim_threshold;
if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
low = cfs_fail_val;
if (low != 0 &&
if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
low = cfs_fail_val;
if (low != 0 &&
- percpu_counter_read_positive(&ldlm_granted_total) > low)
+ percpu_counter_sum_positive(&ldlm_granted_total) > low)
ldlm_reclaim_ns();
if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
high = cfs_fail_val;
if (high != 0 &&
ldlm_reclaim_ns();
if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
high = cfs_fail_val;
if (high != 0 &&
- percpu_counter_read_positive(&ldlm_granted_total) > high)
+ percpu_counter_sum_positive(&ldlm_granted_total) > high)
return true;
return false;
return true;
return false;
+static inline __u64 ldlm_locknr2mb(__u64 locknr)
+{
+ return (locknr * sizeof(struct ldlm_lock) + 512 * 1024) >> 20;
+}
+
#define LDLM_WM_RATIO_LOW_DEFAULT 20
#define LDLM_WM_RATIO_HIGH_DEFAULT 30
int ldlm_reclaim_setup(void)
{
atomic_set(&ldlm_nr_reclaimer, 0);
#define LDLM_WM_RATIO_LOW_DEFAULT 20
#define LDLM_WM_RATIO_HIGH_DEFAULT 30
int ldlm_reclaim_setup(void)
{
atomic_set(&ldlm_nr_reclaimer, 0);
- ldlm_watermark_low = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
- ldlm_watermark_high = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
+
+ ldlm_reclaim_threshold = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
+ ldlm_reclaim_threshold_mb = ldlm_locknr2mb(ldlm_reclaim_threshold);
+ ldlm_lock_limit = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
+ ldlm_lock_limit_mb = ldlm_locknr2mb(ldlm_lock_limit);
+
ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX;
ldlm_last_reclaim_time = cfs_time_current();
ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX;
ldlm_last_reclaim_time = cfs_time_current();
LPROC_SEQ_FOPS_RW_TYPE(ldlm_rw, uint);
LPROC_SEQ_FOPS_RO_TYPE(ldlm, uint);
LPROC_SEQ_FOPS_RW_TYPE(ldlm_rw, uint);
LPROC_SEQ_FOPS_RO_TYPE(ldlm, uint);
-/* Lock count is stored in the watermark, and it's display as number of MB
- * memory consumed by the locks */
+#ifdef HAVE_SERVER_SUPPORT
+
static int seq_watermark_show(struct seq_file *m, void *data)
{
static int seq_watermark_show(struct seq_file *m, void *data)
{
- __u64 locknr = *(__u64 *)m->private;
- return seq_printf(m, LPU64"\n",
- (locknr * sizeof(struct ldlm_lock)) >> 20);
+ return seq_printf(m, LPU64"\n", *(__u64 *)m->private);
}
static ssize_t seq_watermark_write(struct file *file,
}
static ssize_t seq_watermark_write(struct file *file,
{
__u64 watermark;
__u64 *data = ((struct seq_file *)file->private_data)->private;
{
__u64 watermark;
__u64 *data = ((struct seq_file *)file->private_data)->private;
+ bool wm_low = (data == &ldlm_reclaim_threshold_mb) ? true : false;
int rc;
rc = lprocfs_write_frac_u64_helper(buffer, count, &watermark, 1 << 20);
if (rc) {
int rc;
rc = lprocfs_write_frac_u64_helper(buffer, count, &watermark, 1 << 20);
if (rc) {
- CERROR("Failed to set LDLM watermark, rc = %d.\n", rc);
+ CERROR("Failed to set %s, rc = %d.\n",
+ wm_low ? "lock_reclaim_threshold_mb" : "lock_limit_mb",
+ rc);
return rc;
} else if (watermark != 0 && watermark < (1 << 20)) {
return rc;
} else if (watermark != 0 && watermark < (1 << 20)) {
- CERROR("Watermark should be greater than 1MB.\n");
+ CERROR("%s should be greater than 1MB.\n",
+ wm_low ? "lock_reclaim_threshold_mb" : "lock_limit_mb");
+ watermark >>= 20;
+
+ if (wm_low) {
+ if (ldlm_lock_limit_mb != 0 && watermark > ldlm_lock_limit_mb) {
+ CERROR("lock_reclaim_threshold_mb must be smaller than "
+ "lock_limit_mb.\n");
+ return -EINVAL;
+ }
- do_div(watermark, sizeof(struct ldlm_lock));
- *data = watermark;
+ *data = watermark;
+ if (watermark != 0) {
+ watermark <<= 20;
+ do_div(watermark, sizeof(struct ldlm_lock));
+ }
+ ldlm_reclaim_threshold = watermark;
+ } else {
+ if (ldlm_reclaim_threshold_mb != 0 &&
+ watermark < ldlm_reclaim_threshold_mb) {
+ CERROR("lock_limit_mb must be greater than "
+ "lock_reclaim_threshold_mb.\n");
+ return -EINVAL;
+ }
+
+ *data = watermark;
+ if (watermark != 0) {
+ watermark <<= 20;
+ do_div(watermark, sizeof(struct ldlm_lock));
+ }
+ ldlm_lock_limit = watermark;
+ }
- if (ldlm_watermark_low != 0 && ldlm_watermark_high != 0 &&
- ldlm_watermark_low > ldlm_watermark_high)
- ldlm_watermark_low = ldlm_watermark_high;
.release = lprocfs_single_release,
};
.release = lprocfs_single_release,
};
+static int seq_granted_show(struct seq_file *m, void *data)
+{
+ return seq_printf(m, LPU64"\n", percpu_counter_sum_positive(
+ (struct percpu_counter *)m->private));
+}
+
+static int seq_granted_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, seq_granted_show, PDE_DATA(inode));
+}
+
+static const struct file_operations ldlm_granted_fops = {
+ .owner = THIS_MODULE,
+ .open = seq_granted_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+#endif /* HAVE_SERVER_SUPPORT */
+
int ldlm_proc_setup(void)
{
int rc;
int ldlm_proc_setup(void)
{
int rc;
{ .name = "cancel_unused_locks_before_replay",
.fops = &ldlm_rw_uint_fops,
.data = &ldlm_cancel_unused_locks_before_replay },
{ .name = "cancel_unused_locks_before_replay",
.fops = &ldlm_rw_uint_fops,
.data = &ldlm_cancel_unused_locks_before_replay },
- { .name = "watermark_mb_low",
+#ifdef HAVE_SERVER_SUPPORT
+ { .name = "lock_reclaim_threshold_mb",
.fops = &ldlm_watermark_fops,
.fops = &ldlm_watermark_fops,
- .data = &ldlm_watermark_low },
- { .name = "watermark_mb_high",
+ .data = &ldlm_reclaim_threshold_mb },
+ { .name = "lock_limit_mb",
.fops = &ldlm_watermark_fops,
.fops = &ldlm_watermark_fops,
- .data = &ldlm_watermark_high },
+ .data = &ldlm_lock_limit_mb },
+ { .name = "lock_granted_count",
+ .fops = &ldlm_granted_fops,
+ .data = &ldlm_granted_total },
+#endif
{ NULL }};
ENTRY;
LASSERT(ldlm_ns_proc_dir == NULL);
{ NULL }};
ENTRY;
LASSERT(ldlm_ns_proc_dir == NULL);
rm $DIR/$tdir/m
unlinkmany $DIR/$tdir/f $nr
}
rm $DIR/$tdir/m
unlinkmany $DIR/$tdir/f $nr
}
-run_test 134a "Server reclaims locks when reaching low watermark"
+run_test 134a "Server reclaims locks when reaching lock_reclaim_threshold"
test_134b() {
[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] &&
test_134b() {
[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] &&
mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
cancel_lru_locks mdc
mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
cancel_lru_locks mdc
- local low_wm=$(do_facet mds1 $LCTL get_param -n ldlm.watermark_mb_low)
+ local low_wm=$(do_facet mds1 $LCTL get_param -n \
+ ldlm.lock_reclaim_threshold_mb)
# disable reclaim temporarily
# disable reclaim temporarily
- do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=0
+ do_facet mds1 $LCTL set_param ldlm.lock_reclaim_threshold_mb=0
#define OBD_FAIL_LDLM_WATERMARK_HIGH 0x328
do_facet mds1 $LCTL set_param fail_loc=0x328
#define OBD_FAIL_LDLM_WATERMARK_HIGH 0x328
do_facet mds1 $LCTL set_param fail_loc=0x328
if ! ps -p $create_pid > /dev/null 2>&1; then
do_facet mds1 $LCTL set_param fail_loc=0
do_facet mds1 $LCTL set_param fail_val=0
if ! ps -p $create_pid > /dev/null 2>&1; then
do_facet mds1 $LCTL set_param fail_loc=0
do_facet mds1 $LCTL set_param fail_val=0
- do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=$low_wm
+ do_facet mds1 $LCTL set_param \
+ ldlm.lock_reclaim_threshold_mb=${low_wm}m
error "createmany finished incorrectly!"
fi
do_facet mds1 $LCTL set_param fail_loc=0
do_facet mds1 $LCTL set_param fail_val=0
error "createmany finished incorrectly!"
fi
do_facet mds1 $LCTL set_param fail_loc=0
do_facet mds1 $LCTL set_param fail_val=0
- do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=$low_wm
+ do_facet mds1 $LCTL set_param ldlm.lock_reclaim_threshold_mb=${low_wm}m
wait $create_pid || return 1
unlinkmany $DIR/$tdir/f $nr
}
wait $create_pid || return 1
unlinkmany $DIR/$tdir/f $nr
}
-run_test 134b "Server rejects lock request when reaching high watermark"
+run_test 134b "Server rejects lock request when reaching lock_limit_mb"
test_140() { #bug-17379
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
test_140() { #bug-17379
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return