From: Niu Yawei Date: Fri, 28 Aug 2015 07:52:32 +0000 (-0400) Subject: LU-6529 ldlm: improve proc interface of lock reclaim X-Git-Tag: 2.7.60~22 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=33b55f223a42f20916bc417f7e5a21f68b59cd02 LU-6529 ldlm: improve proc interface of lock reclaim Rename variables of ldlm_watermark_low & ldlm_watermark_high to ldlm_reclaim_threshold & ldlm_lock_limit, and introduce two additional variables of ldlm_reclaim_threshold_mb and ldlm_lock_limit_mb for proc interface, so that we needn't to convert the MB to locknr each time; Adjust the proc name from watermark_low_mb & watermark_high_mb to lock_reclaim_threshold_mb & lock_limit_mb; Remove the misleading comment at the top of ldlm_reclaim.c, add more comments to functions in ldlm_reclaim.c; Export the total granted locks via proc for debug purpose; Signed-off-by: Niu Yawei Change-Id: I161e088513ba2319cfdfe0d703512b4d9eeeebf1 Reviewed-on: http://review.whamcloud.com/16123 Tested-by: Jenkins Reviewed-by: John L. Hammond Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index 5e26a40..e79dca3 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -344,8 +344,13 @@ void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy, union ldlm_wire_policy_data *wpolicy); /* ldlm_reclaim.c */ -extern __u64 ldlm_watermark_low; -extern __u64 ldlm_watermark_high; +#ifdef HAVE_SERVER_SUPPORT +extern __u64 ldlm_reclaim_threshold; +extern __u64 ldlm_lock_limit; +extern __u64 ldlm_reclaim_threshold_mb; +extern __u64 ldlm_lock_limit_mb; +extern struct percpu_counter ldlm_granted_total; +#endif int ldlm_reclaim_setup(void); void ldlm_reclaim_cleanup(void); void ldlm_reclaim_add(struct ldlm_lock *lock); diff --git a/lustre/ldlm/ldlm_reclaim.c b/lustre/ldlm/ldlm_reclaim.c index 722dd9f..d256c64 100644 --- a/lustre/ldlm/ldlm_reclaim.c +++ b/lustre/ldlm/ldlm_reclaim.c @@ -35,42 +35,33 @@ /* * To avoid ldlm lock exhausting server memory, two global parameters: - * ldlm_watermark_low & ldlm_watermark_high are used for reclaiming + * ldlm_reclaim_threshold & ldlm_lock_limit are used for reclaiming * granted locks and rejecting incoming enqueue requests defensively. * - * ldlm_watermark_low: When the amount of granted locks reaching this + * ldlm_reclaim_threshold: When the amount of granted locks reaching this * threshold, server start to revoke locks gradually. * - * ldlm_watermark_high: When the amount of granted locks reaching this + * ldlm_lock_limit: When the amount of granted locks reaching this * threshold, server will return -EINPROGRESS to any incoming enqueue * request until the lock count is shrunk below the threshold again. * - * ldlm_watermark_low & ldlm_watermark_high is set to 20% & 30% of the + * ldlm_reclaim_threshold & ldlm_lock_limit is set to 20% & 30% of the * total memory by default. It is tunable via proc entry, when it's set * to 0, the feature is disabled. */ -/* - * FIXME: - * - * In current implementation, server identifies which locks should be - * revoked by choosing locks from namespace/resource in a roundrobin - * manner, which isn't optimal. The ideal way should be server notifies - * clients to cancel locks voluntarily, because only client knows exactly - * when the lock is last used. - * - * However how to notify client immediately is a problem, one idea - * is to leverage the glimplse callbacks on some artificial global - * lock (like quota global lock does), but that requires protocol - * changes, let's fix it in future long-term solution. - */ +#ifdef HAVE_SERVER_SUPPORT -__u64 ldlm_watermark_low; -__u64 ldlm_watermark_high; +/* Lock count is stored in ldlm_reclaim_threshold & ldlm_lock_limit */ +__u64 ldlm_reclaim_threshold; +__u64 ldlm_lock_limit; -#ifdef HAVE_SERVER_SUPPORT +/* Represents ldlm_reclaim_threshold & ldlm_lock_limit in MB, used for + * proc interface. */ +__u64 ldlm_reclaim_threshold_mb; +__u64 ldlm_lock_limit_mb; -static struct percpu_counter ldlm_granted_total; +struct percpu_counter ldlm_granted_total; static atomic_t ldlm_nr_reclaimer; static cfs_duration_t ldlm_last_reclaim_age; static cfs_time_t ldlm_last_reclaim_time; @@ -101,6 +92,17 @@ static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock) return false; } +/** + * Callback function for revoking locks from certain resource. + * + * \param [in] hs ns_rs_hash + * \param [in] bd current bucket of ns_rsh_hash + * \param [in] hnode hnode of the resource + * \param [in] arg opaque data + * + * \retval 0 continue the scan + * \retval 1 stop the iteration + */ static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd, struct hlist_node *hnode, void *arg) @@ -162,6 +164,18 @@ static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd, return rc; } +/** + * Revoke locks from the resources of a namespace in a roundrobin + * manner. + * + * \param[in] ns namespace to do the lock revoke on + * \param[in] count count of lock to be revoked + * \param[in] age only revoke locks older than the 'age' + * \param[in] skip scan from the first lock on resource if the + * 'skip' is false, otherwise, continue scan + * from the last scanned position + * \param[out] count count of lock still to be revoked + */ static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count, cfs_duration_t age, bool skip) { @@ -224,6 +238,11 @@ static inline cfs_duration_t ldlm_reclaim_age(void) return age; } +/** + * Revoke certain amount of locks from all the server namespaces + * in a roundrobin manner. Lock age is used to avoid reclaim on + * the non-aged locks. + */ static void ldlm_reclaim_ns(void) { struct ldlm_namespace *ns; @@ -290,23 +309,32 @@ void ldlm_reclaim_del(struct ldlm_lock *lock) percpu_counter_sub(&ldlm_granted_total, 1); } +/** + * Check on the total granted locks: return true if it reaches the + * high watermark (ldlm_lock_limit), otherwise return false; It also + * triggers lock reclaim if the low watermark (ldlm_reclaim_threshold) + * is reached. + * + * \retval true high watermark reached. + * \retval false high watermark not reached. + */ bool ldlm_reclaim_full(void) { - __u64 high = ldlm_watermark_high; - __u64 low = ldlm_watermark_low; + __u64 high = ldlm_lock_limit; + __u64 low = ldlm_reclaim_threshold; if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW)) low = cfs_fail_val; if (low != 0 && - percpu_counter_read_positive(&ldlm_granted_total) > low) + percpu_counter_sum_positive(&ldlm_granted_total) > low) ldlm_reclaim_ns(); if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH)) high = cfs_fail_val; if (high != 0 && - percpu_counter_read_positive(&ldlm_granted_total) > high) + percpu_counter_sum_positive(&ldlm_granted_total) > high) return true; return false; @@ -322,14 +350,23 @@ static inline __u64 ldlm_ratio2locknr(int ratio) return locknr; } +static inline __u64 ldlm_locknr2mb(__u64 locknr) +{ + return (locknr * sizeof(struct ldlm_lock) + 512 * 1024) >> 20; +} + #define LDLM_WM_RATIO_LOW_DEFAULT 20 #define LDLM_WM_RATIO_HIGH_DEFAULT 30 int ldlm_reclaim_setup(void) { atomic_set(&ldlm_nr_reclaimer, 0); - ldlm_watermark_low = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT); - ldlm_watermark_high = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT); + + ldlm_reclaim_threshold = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT); + ldlm_reclaim_threshold_mb = ldlm_locknr2mb(ldlm_reclaim_threshold); + ldlm_lock_limit = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT); + ldlm_lock_limit_mb = ldlm_locknr2mb(ldlm_lock_limit); + ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX; ldlm_last_reclaim_time = cfs_time_current(); diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index bcc299e..0a81a87 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -84,13 +84,11 @@ LPROC_SEQ_FOPS_WO_TYPE(ldlm, dump_ns); LPROC_SEQ_FOPS_RW_TYPE(ldlm_rw, uint); LPROC_SEQ_FOPS_RO_TYPE(ldlm, uint); -/* Lock count is stored in the watermark, and it's display as number of MB - * memory consumed by the locks */ +#ifdef HAVE_SERVER_SUPPORT + static int seq_watermark_show(struct seq_file *m, void *data) { - __u64 locknr = *(__u64 *)m->private; - return seq_printf(m, LPU64"\n", - (locknr * sizeof(struct ldlm_lock)) >> 20); + return seq_printf(m, LPU64"\n", *(__u64 *)m->private); } static ssize_t seq_watermark_write(struct file *file, @@ -99,23 +97,51 @@ static ssize_t seq_watermark_write(struct file *file, { __u64 watermark; __u64 *data = ((struct seq_file *)file->private_data)->private; + bool wm_low = (data == &ldlm_reclaim_threshold_mb) ? true : false; int rc; rc = lprocfs_write_frac_u64_helper(buffer, count, &watermark, 1 << 20); if (rc) { - CERROR("Failed to set LDLM watermark, rc = %d.\n", rc); + CERROR("Failed to set %s, rc = %d.\n", + wm_low ? "lock_reclaim_threshold_mb" : "lock_limit_mb", + rc); return rc; } else if (watermark != 0 && watermark < (1 << 20)) { - CERROR("Watermark should be greater than 1MB.\n"); + CERROR("%s should be greater than 1MB.\n", + wm_low ? "lock_reclaim_threshold_mb" : "lock_limit_mb"); return -EINVAL; } + watermark >>= 20; + + if (wm_low) { + if (ldlm_lock_limit_mb != 0 && watermark > ldlm_lock_limit_mb) { + CERROR("lock_reclaim_threshold_mb must be smaller than " + "lock_limit_mb.\n"); + return -EINVAL; + } - do_div(watermark, sizeof(struct ldlm_lock)); - *data = watermark; + *data = watermark; + if (watermark != 0) { + watermark <<= 20; + do_div(watermark, sizeof(struct ldlm_lock)); + } + ldlm_reclaim_threshold = watermark; + } else { + if (ldlm_reclaim_threshold_mb != 0 && + watermark < ldlm_reclaim_threshold_mb) { + CERROR("lock_limit_mb must be greater than " + "lock_reclaim_threshold_mb.\n"); + return -EINVAL; + } + + *data = watermark; + if (watermark != 0) { + watermark <<= 20; + do_div(watermark, sizeof(struct ldlm_lock)); + } + ldlm_lock_limit = watermark; + } - if (ldlm_watermark_low != 0 && ldlm_watermark_high != 0 && - ldlm_watermark_low > ldlm_watermark_high) - ldlm_watermark_low = ldlm_watermark_high; return count; } @@ -133,6 +159,27 @@ static const struct file_operations ldlm_watermark_fops = { .release = lprocfs_single_release, }; +static int seq_granted_show(struct seq_file *m, void *data) +{ + return seq_printf(m, LPU64"\n", percpu_counter_sum_positive( + (struct percpu_counter *)m->private)); +} + +static int seq_granted_open(struct inode *inode, struct file *file) +{ + return single_open(file, seq_granted_show, PDE_DATA(inode)); +} + +static const struct file_operations ldlm_granted_fops = { + .owner = THIS_MODULE, + .open = seq_granted_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif /* HAVE_SERVER_SUPPORT */ + int ldlm_proc_setup(void) { int rc; @@ -146,12 +193,17 @@ int ldlm_proc_setup(void) { .name = "cancel_unused_locks_before_replay", .fops = &ldlm_rw_uint_fops, .data = &ldlm_cancel_unused_locks_before_replay }, - { .name = "watermark_mb_low", +#ifdef HAVE_SERVER_SUPPORT + { .name = "lock_reclaim_threshold_mb", .fops = &ldlm_watermark_fops, - .data = &ldlm_watermark_low }, - { .name = "watermark_mb_high", + .data = &ldlm_reclaim_threshold_mb }, + { .name = "lock_limit_mb", .fops = &ldlm_watermark_fops, - .data = &ldlm_watermark_high }, + .data = &ldlm_lock_limit_mb }, + { .name = "lock_granted_count", + .fops = &ldlm_granted_fops, + .data = &ldlm_granted_total }, +#endif { NULL }}; ENTRY; LASSERT(ldlm_ns_proc_dir == NULL); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index e5a0168..e546fa3 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -9492,7 +9492,7 @@ test_134a() { rm $DIR/$tdir/m unlinkmany $DIR/$tdir/f $nr } -run_test 134a "Server reclaims locks when reaching low watermark" +run_test 134a "Server reclaims locks when reaching lock_reclaim_threshold" test_134b() { [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] && @@ -9501,9 +9501,10 @@ test_134b() { mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" cancel_lru_locks mdc - local low_wm=$(do_facet mds1 $LCTL get_param -n ldlm.watermark_mb_low) + local low_wm=$(do_facet mds1 $LCTL get_param -n \ + ldlm.lock_reclaim_threshold_mb) # disable reclaim temporarily - do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=0 + do_facet mds1 $LCTL set_param ldlm.lock_reclaim_threshold_mb=0 #define OBD_FAIL_LDLM_WATERMARK_HIGH 0x328 do_facet mds1 $LCTL set_param fail_loc=0x328 @@ -9520,17 +9521,18 @@ test_134b() { if ! ps -p $create_pid > /dev/null 2>&1; then do_facet mds1 $LCTL set_param fail_loc=0 do_facet mds1 $LCTL set_param fail_val=0 - do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=$low_wm + do_facet mds1 $LCTL set_param \ + ldlm.lock_reclaim_threshold_mb=${low_wm}m error "createmany finished incorrectly!" fi do_facet mds1 $LCTL set_param fail_loc=0 do_facet mds1 $LCTL set_param fail_val=0 - do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=$low_wm + do_facet mds1 $LCTL set_param ldlm.lock_reclaim_threshold_mb=${low_wm}m wait $create_pid || return 1 unlinkmany $DIR/$tdir/f $nr } -run_test 134b "Server rejects lock request when reaching high watermark" +run_test 134b "Server rejects lock request when reaching lock_limit_mb" test_140() { #bug-17379 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return