4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2015, Intel Corporation.
24 * Use is subject to license terms.
26 * Author: Niu Yawei <yawei.niu@intel.com>
29 #define DEBUG_SUBSYSTEM S_LDLM
31 #include <linux/kthread.h>
32 #include <lustre_dlm.h>
33 #include <obd_class.h>
34 #include "ldlm_internal.h"
37 * To avoid ldlm lock exhausting server memory, two global parameters:
38 * ldlm_watermark_low & ldlm_watermark_high are used for reclaiming
39 * granted locks and rejecting incoming enqueue requests defensively.
41 * ldlm_watermark_low: When the amount of granted locks reaching this
42 * threshold, server start to revoke locks gradually.
44 * ldlm_watermark_high: When the amount of granted locks reaching this
45 * threshold, server will return -EINPROGRESS to any incoming enqueue
46 * request until the lock count is shrunk below the threshold again.
48 * ldlm_watermark_low & ldlm_watermark_high is set to 20% & 30% of the
49 * total memory by default. It is tunable via proc entry, when it's set
50 * to 0, the feature is disabled.
56 * In current implementation, server identifies which locks should be
57 * revoked by choosing locks from namespace/resource in a roundrobin
58 * manner, which isn't optimal. The ideal way should be server notifies
59 * clients to cancel locks voluntarily, because only client knows exactly
60 * when the lock is last used.
62 * However how to notify client immediately is a problem, one idea
63 * is to leverage the glimplse callbacks on some artificial global
64 * lock (like quota global lock does), but that requires protocol
65 * changes, let's fix it in future long-term solution.
68 __u64 ldlm_watermark_low;
69 __u64 ldlm_watermark_high;
71 #ifdef HAVE_SERVER_SUPPORT
73 static struct percpu_counter ldlm_granted_total;
74 static atomic_t ldlm_nr_reclaimer;
75 static cfs_duration_t ldlm_last_reclaim_age;
76 static cfs_time_t ldlm_last_reclaim_time;
78 struct ldlm_reclaim_cb_data {
79 struct list_head rcd_rpc_list;
85 cfs_duration_t rcd_age;
86 struct cfs_hash_bd *rcd_prev_bd;
89 static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock)
91 struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
93 /* FLOCK & PLAIN lock are not reclaimable. FLOCK is
94 * explicitly controlled by application, PLAIN lock
95 * is used by quota global lock and config lock.
97 if (ns->ns_client == LDLM_NAMESPACE_SERVER &&
98 (lock->l_resource->lr_type == LDLM_IBITS ||
99 lock->l_resource->lr_type == LDLM_EXTENT))
104 static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
105 struct hlist_node *hnode, void *arg)
108 struct ldlm_resource *res;
109 struct ldlm_reclaim_cb_data *data;
110 struct ldlm_lock *lock;
111 struct ldlm_ns_bucket *nsb;
114 data = (struct ldlm_reclaim_cb_data *)arg;
116 LASSERTF(data->rcd_added < data->rcd_total, "added:%d >= total:%d\n",
117 data->rcd_added, data->rcd_total);
119 nsb = cfs_hash_bd_extra_get(hs, bd);
120 res = cfs_hash_object(hs, hnode);
122 if (data->rcd_prev_bd != bd) {
123 if (data->rcd_prev_bd != NULL)
124 ldlm_res_to_ns(res)->ns_reclaim_start++;
125 data->rcd_prev_bd = bd;
126 data->rcd_cursor = 0;
127 data->rcd_start = nsb->nsb_reclaim_start %
128 cfs_hash_bd_count_get(bd);
131 if (data->rcd_skip && data->rcd_cursor < data->rcd_start) {
136 nsb->nsb_reclaim_start++;
139 list_for_each_entry(lock, &res->lr_granted, l_res_link) {
140 if (!ldlm_lock_reclaimable(lock))
143 if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW) &&
144 cfs_time_before(cfs_time_current(),
145 cfs_time_add(lock->l_last_used,
149 if (!ldlm_is_ast_sent(lock)) {
150 ldlm_set_ast_sent(lock);
151 LASSERT(list_empty(&lock->l_rk_ast));
152 list_add(&lock->l_rk_ast, &data->rcd_rpc_list);
154 if (++data->rcd_added == data->rcd_total) {
155 rc = 1; /* stop the iteration */
165 static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
166 cfs_duration_t age, bool skip)
168 struct ldlm_reclaim_cb_data data;
169 int idx, type, start;
172 LASSERT(*count != 0);
175 type = server_name2index(ns->ns_obd->obd_name, &idx, NULL);
176 if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST) {
182 if (atomic_read(&ns->ns_bref) == 0) {
187 INIT_LIST_HEAD(&data.rcd_rpc_list);
189 data.rcd_total = *count;
191 data.rcd_skip = skip;
192 data.rcd_prev_bd = NULL;
193 start = ns->ns_reclaim_start % CFS_HASH_NBKT(ns->ns_rs_hash);
195 cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_reclaim_lock_cb, &data,
198 CDEBUG(D_DLMTRACE, "NS(%s): %d locks to be reclaimed, found %d/%d "
199 "locks.\n", ldlm_ns_name(ns), *count, data.rcd_added,
202 LASSERTF(*count >= data.rcd_added, "count:%d, added:%d\n", *count,
205 ldlm_run_ast_work(ns, &data.rcd_rpc_list, LDLM_WORK_REVOKE_AST);
206 *count -= data.rcd_added;
210 #define LDLM_RECLAIM_BATCH 512
211 #define LDLM_RECLAIM_AGE_MIN cfs_time_seconds(300)
212 #define LDLM_RECLAIM_AGE_MAX (LDLM_DEFAULT_MAX_ALIVE * 3 / 4)
214 static inline cfs_duration_t ldlm_reclaim_age(void)
218 age = ldlm_last_reclaim_age +
219 cfs_time_sub(cfs_time_current(), ldlm_last_reclaim_time);
220 if (age > LDLM_RECLAIM_AGE_MAX)
221 age = LDLM_RECLAIM_AGE_MAX;
222 else if (age < (LDLM_RECLAIM_AGE_MIN * 2))
223 age = LDLM_RECLAIM_AGE_MIN;
227 static void ldlm_reclaim_ns(void)
229 struct ldlm_namespace *ns;
230 int count = LDLM_RECLAIM_BATCH;
231 int ns_nr, nr_processed;
232 ldlm_side_t ns_cli = LDLM_NAMESPACE_SERVER;
237 if (!atomic_add_unless(&ldlm_nr_reclaimer, 1, 1)) {
242 age = ldlm_reclaim_age();
245 ns_nr = ldlm_namespace_nr_read(ns_cli);
246 while (count > 0 && nr_processed < ns_nr) {
247 mutex_lock(ldlm_namespace_lock(ns_cli));
249 if (list_empty(ldlm_namespace_list(ns_cli))) {
250 mutex_unlock(ldlm_namespace_lock(ns_cli));
254 ns = ldlm_namespace_first_locked(ns_cli);
255 ldlm_namespace_move_to_active_locked(ns, ns_cli);
256 mutex_unlock(ldlm_namespace_lock(ns_cli));
258 ldlm_reclaim_res(ns, &count, age, skip);
259 ldlm_namespace_put(ns);
263 if (count > 0 && age > LDLM_RECLAIM_AGE_MIN) {
265 if (age < (LDLM_RECLAIM_AGE_MIN * 2))
266 age = LDLM_RECLAIM_AGE_MIN;
271 ldlm_last_reclaim_age = age;
272 ldlm_last_reclaim_time = cfs_time_current();
274 atomic_add_unless(&ldlm_nr_reclaimer, -1, 0);
278 void ldlm_reclaim_add(struct ldlm_lock *lock)
280 if (!ldlm_lock_reclaimable(lock))
282 percpu_counter_add(&ldlm_granted_total, 1);
283 lock->l_last_used = cfs_time_current();
286 void ldlm_reclaim_del(struct ldlm_lock *lock)
288 if (!ldlm_lock_reclaimable(lock))
290 percpu_counter_sub(&ldlm_granted_total, 1);
293 bool ldlm_reclaim_full(void)
295 __u64 high = ldlm_watermark_high;
296 __u64 low = ldlm_watermark_low;
298 if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
302 percpu_counter_read_positive(&ldlm_granted_total) > low)
305 if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
309 percpu_counter_read_positive(&ldlm_granted_total) > high)
315 static inline __u64 ldlm_ratio2locknr(int ratio)
319 locknr = ((__u64)NUM_CACHEPAGES << PAGE_CACHE_SHIFT) * ratio;
320 do_div(locknr, 100 * sizeof(struct ldlm_lock));
325 #define LDLM_WM_RATIO_LOW_DEFAULT 20
326 #define LDLM_WM_RATIO_HIGH_DEFAULT 30
328 int ldlm_reclaim_setup(void)
330 atomic_set(&ldlm_nr_reclaimer, 0);
331 ldlm_watermark_low = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
332 ldlm_watermark_high = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
333 ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX;
334 ldlm_last_reclaim_time = cfs_time_current();
336 return percpu_counter_init(&ldlm_granted_total, 0);
339 void ldlm_reclaim_cleanup(void)
341 percpu_counter_destroy(&ldlm_granted_total);
344 #else /* HAVE_SERVER_SUPPORT */
346 bool ldlm_reclaim_full(void)
351 void ldlm_reclaim_add(struct ldlm_lock *lock)
355 void ldlm_reclaim_del(struct ldlm_lock *lock)
359 int ldlm_reclaim_setup(void)
364 void ldlm_reclaim_cleanup(void)
368 #endif /* HAVE_SERVER_SUPPORT */