4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2015, Intel Corporation.
24 * Use is subject to license terms.
26 * Author: Niu Yawei <yawei.niu@intel.com>
29 #define DEBUG_SUBSYSTEM S_LDLM
31 #include <linux/kthread.h>
32 #include <lustre_dlm.h>
33 #include <obd_class.h>
34 #include "ldlm_internal.h"
37 * To avoid ldlm lock exhausting server memory, two global parameters:
38 * ldlm_reclaim_threshold & ldlm_lock_limit are used for reclaiming
39 * granted locks and rejecting incoming enqueue requests defensively.
41 * ldlm_reclaim_threshold: When the amount of granted locks reaching this
42 * threshold, server start to revoke locks gradually.
44 * ldlm_lock_limit: When the amount of granted locks reaching this
45 * threshold, server will return -EINPROGRESS to any incoming enqueue
46 * request until the lock count is shrunk below the threshold again.
48 * ldlm_reclaim_threshold & ldlm_lock_limit is set to 20% & 30% of the
49 * total memory by default. It is tunable via proc entry, when it's set
50 * to 0, the feature is disabled.
53 #ifdef HAVE_SERVER_SUPPORT
55 /* Lock count is stored in ldlm_reclaim_threshold & ldlm_lock_limit */
56 __u64 ldlm_reclaim_threshold;
57 __u64 ldlm_lock_limit;
59 /* Represents ldlm_reclaim_threshold & ldlm_lock_limit in MB, used for
61 __u64 ldlm_reclaim_threshold_mb;
62 __u64 ldlm_lock_limit_mb;
64 struct percpu_counter ldlm_granted_total;
65 static atomic_t ldlm_nr_reclaimer;
66 static s64 ldlm_last_reclaim_age_ns;
67 static ktime_t ldlm_last_reclaim_time;
69 struct ldlm_reclaim_cb_data {
70 struct list_head rcd_rpc_list;
77 struct cfs_hash_bd *rcd_prev_bd;
80 static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock)
82 struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
84 /* FLOCK & PLAIN lock are not reclaimable. FLOCK is
85 * explicitly controlled by application, PLAIN lock
86 * is used by quota global lock and config lock.
88 if (ns->ns_client == LDLM_NAMESPACE_SERVER &&
89 (lock->l_resource->lr_type == LDLM_IBITS ||
90 lock->l_resource->lr_type == LDLM_EXTENT))
96 * Callback function for revoking locks from certain resource.
98 * \param [in] hs ns_rs_hash
99 * \param [in] bd current bucket of ns_rsh_hash
100 * \param [in] hnode hnode of the resource
101 * \param [in] arg opaque data
103 * \retval 0 continue the scan
104 * \retval 1 stop the iteration
106 static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
107 struct hlist_node *hnode, void *arg)
110 struct ldlm_resource *res;
111 struct ldlm_reclaim_cb_data *data;
112 struct ldlm_lock *lock;
113 struct ldlm_ns_bucket *nsb;
116 data = (struct ldlm_reclaim_cb_data *)arg;
118 LASSERTF(data->rcd_added < data->rcd_total, "added:%d >= total:%d\n",
119 data->rcd_added, data->rcd_total);
121 nsb = cfs_hash_bd_extra_get(hs, bd);
122 res = cfs_hash_object(hs, hnode);
124 if (data->rcd_prev_bd != bd) {
125 if (data->rcd_prev_bd != NULL)
126 ldlm_res_to_ns(res)->ns_reclaim_start++;
127 data->rcd_prev_bd = bd;
128 data->rcd_cursor = 0;
129 data->rcd_start = nsb->nsb_reclaim_start %
130 cfs_hash_bd_count_get(bd);
133 if (data->rcd_skip && data->rcd_cursor < data->rcd_start) {
138 nsb->nsb_reclaim_start++;
141 list_for_each_entry(lock, &res->lr_granted, l_res_link) {
142 if (!ldlm_lock_reclaimable(lock))
145 if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW) &&
146 ktime_before(ktime_get(),
147 ktime_add_ns(lock->l_last_used,
151 if (!ldlm_is_ast_sent(lock)) {
152 ldlm_set_ast_sent(lock);
153 LASSERT(list_empty(&lock->l_rk_ast));
154 list_add(&lock->l_rk_ast, &data->rcd_rpc_list);
156 if (++data->rcd_added == data->rcd_total) {
157 rc = 1; /* stop the iteration */
168 * Revoke locks from the resources of a namespace in a roundrobin
171 * \param[in] ns namespace to do the lock revoke on
172 * \param[in] count count of lock to be revoked
173 * \param[in] age only revoke locks older than the 'age'
174 * \param[in] skip scan from the first lock on resource if the
175 * 'skip' is false, otherwise, continue scan
176 * from the last scanned position
177 * \param[out] count count of lock still to be revoked
179 static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
180 s64 age_ns, bool skip)
182 struct ldlm_reclaim_cb_data data;
183 int idx, type, start;
186 LASSERT(*count != 0);
189 type = server_name2index(ns->ns_obd->obd_name, &idx, NULL);
190 if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST) {
196 if (atomic_read(&ns->ns_bref) == 0) {
201 INIT_LIST_HEAD(&data.rcd_rpc_list);
203 data.rcd_total = *count;
204 data.rcd_age_ns = age_ns;
205 data.rcd_skip = skip;
206 data.rcd_prev_bd = NULL;
207 start = ns->ns_reclaim_start % CFS_HASH_NBKT(ns->ns_rs_hash);
209 cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_reclaim_lock_cb, &data,
212 CDEBUG(D_DLMTRACE, "NS(%s): %d locks to be reclaimed, found %d/%d "
213 "locks.\n", ldlm_ns_name(ns), *count, data.rcd_added,
216 LASSERTF(*count >= data.rcd_added, "count:%d, added:%d\n", *count,
219 ldlm_run_ast_work(ns, &data.rcd_rpc_list, LDLM_WORK_REVOKE_AST);
220 *count -= data.rcd_added;
224 #define LDLM_RECLAIM_BATCH 512
225 #define LDLM_RECLAIM_AGE_MIN (300 * NSEC_PER_SEC)
226 #define LDLM_RECLAIM_AGE_MAX (LDLM_DEFAULT_MAX_ALIVE * NSEC_PER_SEC * 3 / 4)
228 static inline s64 ldlm_reclaim_age(void)
230 s64 age_ns = ldlm_last_reclaim_age_ns;
231 ktime_t now = ktime_get();
234 diff = ktime_sub(now, ldlm_last_reclaim_time);
235 age_ns += ktime_to_ns(diff);
236 if (age_ns > LDLM_RECLAIM_AGE_MAX)
237 age_ns = LDLM_RECLAIM_AGE_MAX;
238 else if (age_ns < (LDLM_RECLAIM_AGE_MIN * 2))
239 age_ns = LDLM_RECLAIM_AGE_MIN;
244 * Revoke certain amount of locks from all the server namespaces
245 * in a roundrobin manner. Lock age is used to avoid reclaim on
246 * the non-aged locks.
248 static void ldlm_reclaim_ns(void)
250 struct ldlm_namespace *ns;
251 int count = LDLM_RECLAIM_BATCH;
252 int ns_nr, nr_processed;
253 enum ldlm_side ns_cli = LDLM_NAMESPACE_SERVER;
258 if (!atomic_add_unless(&ldlm_nr_reclaimer, 1, 1)) {
263 age_ns = ldlm_reclaim_age();
266 ns_nr = ldlm_namespace_nr_read(ns_cli);
267 while (count > 0 && nr_processed < ns_nr) {
268 mutex_lock(ldlm_namespace_lock(ns_cli));
270 if (list_empty(ldlm_namespace_list(ns_cli))) {
271 mutex_unlock(ldlm_namespace_lock(ns_cli));
275 ns = ldlm_namespace_first_locked(ns_cli);
276 ldlm_namespace_move_to_active_locked(ns, ns_cli);
277 mutex_unlock(ldlm_namespace_lock(ns_cli));
279 ldlm_reclaim_res(ns, &count, age_ns, skip);
280 ldlm_namespace_put(ns);
284 if (count > 0 && age_ns > LDLM_RECLAIM_AGE_MIN) {
286 if (age_ns < (LDLM_RECLAIM_AGE_MIN * 2))
287 age_ns = LDLM_RECLAIM_AGE_MIN;
292 ldlm_last_reclaim_age_ns = age_ns;
293 ldlm_last_reclaim_time = ktime_get();
295 atomic_add_unless(&ldlm_nr_reclaimer, -1, 0);
299 void ldlm_reclaim_add(struct ldlm_lock *lock)
301 if (!ldlm_lock_reclaimable(lock))
303 percpu_counter_add(&ldlm_granted_total, 1);
304 lock->l_last_used = ktime_get();
307 void ldlm_reclaim_del(struct ldlm_lock *lock)
309 if (!ldlm_lock_reclaimable(lock))
311 percpu_counter_sub(&ldlm_granted_total, 1);
315 * Check on the total granted locks: return true if it reaches the
316 * high watermark (ldlm_lock_limit), otherwise return false; It also
317 * triggers lock reclaim if the low watermark (ldlm_reclaim_threshold)
320 * \retval true high watermark reached.
321 * \retval false high watermark not reached.
323 bool ldlm_reclaim_full(void)
325 __u64 high = ldlm_lock_limit;
326 __u64 low = ldlm_reclaim_threshold;
328 if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
332 percpu_counter_sum_positive(&ldlm_granted_total) > low)
335 if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
339 percpu_counter_sum_positive(&ldlm_granted_total) > high)
345 static inline __u64 ldlm_ratio2locknr(int ratio)
349 locknr = ((__u64)NUM_CACHEPAGES << PAGE_SHIFT) * ratio;
350 do_div(locknr, 100 * sizeof(struct ldlm_lock));
355 static inline __u64 ldlm_locknr2mb(__u64 locknr)
357 return (locknr * sizeof(struct ldlm_lock) + 512 * 1024) >> 20;
360 #define LDLM_WM_RATIO_LOW_DEFAULT 20
361 #define LDLM_WM_RATIO_HIGH_DEFAULT 30
363 int ldlm_reclaim_setup(void)
365 atomic_set(&ldlm_nr_reclaimer, 0);
367 ldlm_reclaim_threshold = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
368 ldlm_reclaim_threshold_mb = ldlm_locknr2mb(ldlm_reclaim_threshold);
369 ldlm_lock_limit = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
370 ldlm_lock_limit_mb = ldlm_locknr2mb(ldlm_lock_limit);
372 ldlm_last_reclaim_age_ns = LDLM_RECLAIM_AGE_MAX;
373 ldlm_last_reclaim_time = ktime_get();
375 #ifdef HAVE_PERCPU_COUNTER_INIT_GFP_FLAG
376 return percpu_counter_init(&ldlm_granted_total, 0, GFP_KERNEL);
378 return percpu_counter_init(&ldlm_granted_total, 0);
382 void ldlm_reclaim_cleanup(void)
384 percpu_counter_destroy(&ldlm_granted_total);
387 #else /* HAVE_SERVER_SUPPORT */
389 bool ldlm_reclaim_full(void)
394 void ldlm_reclaim_add(struct ldlm_lock *lock)
398 void ldlm_reclaim_del(struct ldlm_lock *lock)
402 int ldlm_reclaim_setup(void)
407 void ldlm_reclaim_cleanup(void)
411 #endif /* HAVE_SERVER_SUPPORT */