lustre/ldlm/ldlm_reclaim.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.gnu.org/licenses/gpl-2.0.html
  19  *
  20  * GPL HEADER END
  21  */
  22 /*
  23  * Copyright (c) 2015, Intel Corporation.
  24  * Use is subject to license terms.
  25  *
  26  * Author: Niu    Yawei    <yawei.niu@intel.com>
  27  */
  28
  29 #define DEBUG_SUBSYSTEM S_LDLM
  30
  31 #include <linux/kthread.h>
  32 #include <lustre_dlm.h>
  33 #include <obd_class.h>
  34 #include "ldlm_internal.h"
  35
  36 /*
  37  * To avoid ldlm lock exhausting server memory, two global parameters:
  38  * ldlm_watermark_low & ldlm_watermark_high are used for reclaiming
  39  * granted locks and rejecting incoming enqueue requests defensively.
  40  *
  41  * ldlm_watermark_low: When the amount of granted locks reaching this
  42  * threshold, server start to revoke locks gradually.
  43  *
  44  * ldlm_watermark_high: When the amount of granted locks reaching this
  45  * threshold, server will return -EINPROGRESS to any incoming enqueue
  46  * request until the lock count is shrunk below the threshold again.
  47  *
  48  * ldlm_watermark_low & ldlm_watermark_high is set to 20% & 30% of the
  49  * total memory by default. It is tunable via proc entry, when it's set
  50  * to 0, the feature is disabled.
  51  */
  52
  53 /*
  54  * FIXME:
  55  *
  56  * In current implementation, server identifies which locks should be
  57  * revoked by choosing locks from namespace/resource in a roundrobin
  58  * manner, which isn't optimal. The ideal way should be server notifies
  59  * clients to cancel locks voluntarily, because only client knows exactly
  60  * when the lock is last used.
  61  *
  62  * However how to notify client immediately is a problem, one idea
  63  * is to leverage the glimplse callbacks on some artificial global
  64  * lock (like quota global lock does), but that requires protocol
  65  * changes, let's fix it in future long-term solution.
  66  */
  67
  68 __u64 ldlm_watermark_low;
  69 __u64 ldlm_watermark_high;
  70
  71 #ifdef HAVE_SERVER_SUPPORT
  72
  73 static struct percpu_counter    ldlm_granted_total;
  74 static atomic_t                 ldlm_nr_reclaimer;
  75 static cfs_duration_t           ldlm_last_reclaim_age;
  76 static cfs_time_t               ldlm_last_reclaim_time;
  77
  78 struct ldlm_reclaim_cb_data {
  79         struct list_head         rcd_rpc_list;
  80         int                      rcd_added;
  81         int                      rcd_total;
  82         int                      rcd_cursor;
  83         int                      rcd_start;
  84         bool                     rcd_skip;
  85         cfs_duration_t           rcd_age;
  86         struct cfs_hash_bd      *rcd_prev_bd;
  87 };
  88
  89 static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock)
  90 {
  91         struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
  92
  93         /* FLOCK & PLAIN lock are not reclaimable. FLOCK is
  94          * explicitly controlled by application, PLAIN lock
  95          * is used by quota global lock and config lock.
  96          */
  97         if (ns->ns_client == LDLM_NAMESPACE_SERVER &&
  98             (lock->l_resource->lr_type == LDLM_IBITS ||
  99              lock->l_resource->lr_type == LDLM_EXTENT))
 100                 return true;
 101         return false;
 102 }
 103
 104 static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 105                                 struct hlist_node *hnode, void *arg)
 106
 107 {
 108         struct ldlm_resource            *res;
 109         struct ldlm_reclaim_cb_data     *data;
 110         struct ldlm_lock                *lock;
 111         struct ldlm_ns_bucket           *nsb;
 112         int                              rc = 0;
 113
 114         data = (struct ldlm_reclaim_cb_data *)arg;
 115
 116         LASSERTF(data->rcd_added < data->rcd_total, "added:%d >= total:%d\n",
 117                  data->rcd_added, data->rcd_total);
 118
 119         nsb = cfs_hash_bd_extra_get(hs, bd);
 120         res = cfs_hash_object(hs, hnode);
 121
 122         if (data->rcd_prev_bd != bd) {
 123                 if (data->rcd_prev_bd != NULL)
 124                         ldlm_res_to_ns(res)->ns_reclaim_start++;
 125                 data->rcd_prev_bd = bd;
 126                 data->rcd_cursor = 0;
 127                 data->rcd_start = nsb->nsb_reclaim_start %
 128                                   cfs_hash_bd_count_get(bd);
 129         }
 130
 131         if (data->rcd_skip && data->rcd_cursor < data->rcd_start) {
 132                 data->rcd_cursor++;
 133                 return 0;
 134         }
 135
 136         nsb->nsb_reclaim_start++;
 137
 138         lock_res(res);
 139         list_for_each_entry(lock, &res->lr_granted, l_res_link) {
 140                 if (!ldlm_lock_reclaimable(lock))
 141                         continue;
 142
 143                 if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW) &&
 144                     cfs_time_before(cfs_time_current(),
 145                                     cfs_time_add(lock->l_last_used,
 146                                                  data->rcd_age)))
 147                         continue;
 148
 149                 if (!ldlm_is_ast_sent(lock)) {
 150                         ldlm_set_ast_sent(lock);
 151                         LASSERT(list_empty(&lock->l_rk_ast));
 152                         list_add(&lock->l_rk_ast, &data->rcd_rpc_list);
 153                         LDLM_LOCK_GET(lock);
 154                         if (++data->rcd_added == data->rcd_total) {
 155                                 rc = 1; /* stop the iteration */
 156                                 break;
 157                         }
 158                 }
 159         }
 160         unlock_res(res);
 161
 162         return rc;
 163 }
 164
 165 static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
 166                              cfs_duration_t age, bool skip)
 167 {
 168         struct ldlm_reclaim_cb_data     data;
 169         int                             idx, type, start;
 170         ENTRY;
 171
 172         LASSERT(*count != 0);
 173
 174         if (ns->ns_obd) {
 175                 type = server_name2index(ns->ns_obd->obd_name, &idx, NULL);
 176                 if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST) {
 177                         EXIT;
 178                         return;
 179                 }
 180         }
 181
 182         if (atomic_read(&ns->ns_bref) == 0) {
 183                 EXIT;
 184                 return;
 185         }
 186
 187         INIT_LIST_HEAD(&data.rcd_rpc_list);
 188         data.rcd_added = 0;
 189         data.rcd_total = *count;
 190         data.rcd_age = age;
 191         data.rcd_skip = skip;
 192         data.rcd_prev_bd = NULL;
 193         start = ns->ns_reclaim_start % CFS_HASH_NBKT(ns->ns_rs_hash);
 194
 195         cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_reclaim_lock_cb, &data,
 196                                  start);
 197
 198         CDEBUG(D_DLMTRACE, "NS(%s): %d locks to be reclaimed, found %d/%d "
 199                "locks.\n", ldlm_ns_name(ns), *count, data.rcd_added,
 200                data.rcd_total);
 201
 202         LASSERTF(*count >= data.rcd_added, "count:%d, added:%d\n", *count,
 203                  data.rcd_added);
 204
 205         ldlm_run_ast_work(ns, &data.rcd_rpc_list, LDLM_WORK_REVOKE_AST);
 206         *count -= data.rcd_added;
 207         EXIT;
 208 }
 209
 210 #define LDLM_RECLAIM_BATCH      512
 211 #define LDLM_RECLAIM_AGE_MIN    cfs_time_seconds(300)
 212 #define LDLM_RECLAIM_AGE_MAX    (LDLM_DEFAULT_MAX_ALIVE * 3 / 4)
 213
 214 static inline cfs_duration_t ldlm_reclaim_age(void)
 215 {
 216         cfs_duration_t  age;
 217
 218         age = ldlm_last_reclaim_age +
 219                 cfs_time_sub(cfs_time_current(), ldlm_last_reclaim_time);
 220         if (age > LDLM_RECLAIM_AGE_MAX)
 221                 age = LDLM_RECLAIM_AGE_MAX;
 222         else if (age < (LDLM_RECLAIM_AGE_MIN * 2))
 223                 age = LDLM_RECLAIM_AGE_MIN;
 224         return age;
 225 }
 226
 227 static void ldlm_reclaim_ns(void)
 228 {
 229         struct ldlm_namespace   *ns;
 230         int                      count = LDLM_RECLAIM_BATCH;
 231         int                      ns_nr, nr_processed;
 232         ldlm_side_t              ns_cli = LDLM_NAMESPACE_SERVER;
 233         cfs_duration_t           age;
 234         bool                     skip = true;
 235         ENTRY;
 236
 237         if (!atomic_add_unless(&ldlm_nr_reclaimer, 1, 1)) {
 238                 EXIT;
 239                 return;
 240         }
 241
 242         age = ldlm_reclaim_age();
 243 again:
 244         nr_processed = 0;
 245         ns_nr = ldlm_namespace_nr_read(ns_cli);
 246         while (count > 0 && nr_processed < ns_nr) {
 247                 mutex_lock(ldlm_namespace_lock(ns_cli));
 248
 249                 if (list_empty(ldlm_namespace_list(ns_cli))) {
 250                         mutex_unlock(ldlm_namespace_lock(ns_cli));
 251                         goto out;
 252                 }
 253
 254                 ns = ldlm_namespace_first_locked(ns_cli);
 255                 ldlm_namespace_move_to_active_locked(ns, ns_cli);
 256                 mutex_unlock(ldlm_namespace_lock(ns_cli));
 257
 258                 ldlm_reclaim_res(ns, &count, age, skip);
 259                 ldlm_namespace_put(ns);
 260                 nr_processed++;
 261         }
 262
 263         if (count > 0 && age > LDLM_RECLAIM_AGE_MIN) {
 264                 age >>= 1;
 265                 if (age < (LDLM_RECLAIM_AGE_MIN * 2))
 266                         age = LDLM_RECLAIM_AGE_MIN;
 267                 skip = false;
 268                 goto again;
 269         }
 270
 271         ldlm_last_reclaim_age = age;
 272         ldlm_last_reclaim_time = cfs_time_current();
 273 out:
 274         atomic_add_unless(&ldlm_nr_reclaimer, -1, 0);
 275         EXIT;
 276 }
 277
 278 void ldlm_reclaim_add(struct ldlm_lock *lock)
 279 {
 280         if (!ldlm_lock_reclaimable(lock))
 281                 return;
 282         percpu_counter_add(&ldlm_granted_total, 1);
 283         lock->l_last_used = cfs_time_current();
 284 }
 285
 286 void ldlm_reclaim_del(struct ldlm_lock *lock)
 287 {
 288         if (!ldlm_lock_reclaimable(lock))
 289                 return;
 290         percpu_counter_sub(&ldlm_granted_total, 1);
 291 }
 292
 293 bool ldlm_reclaim_full(void)
 294 {
 295         __u64 high = ldlm_watermark_high;
 296         __u64 low = ldlm_watermark_low;
 297
 298         if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
 299                 low = cfs_fail_val;
 300
 301         if (low != 0 &&
 302             percpu_counter_read_positive(&ldlm_granted_total) > low)
 303                 ldlm_reclaim_ns();
 304
 305         if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
 306                 high = cfs_fail_val;
 307
 308         if (high != 0 &&
 309             percpu_counter_read_positive(&ldlm_granted_total) > high)
 310                 return true;
 311
 312         return false;
 313 }
 314
 315 static inline __u64 ldlm_ratio2locknr(int ratio)
 316 {
 317         __u64 locknr;
 318
 319         locknr = ((__u64)NUM_CACHEPAGES << PAGE_CACHE_SHIFT) * ratio;
 320         do_div(locknr, 100 * sizeof(struct ldlm_lock));
 321
 322         return locknr;
 323 }
 324
 325 #define LDLM_WM_RATIO_LOW_DEFAULT       20
 326 #define LDLM_WM_RATIO_HIGH_DEFAULT      30
 327
 328 int ldlm_reclaim_setup(void)
 329 {
 330         atomic_set(&ldlm_nr_reclaimer, 0);
 331         ldlm_watermark_low = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
 332         ldlm_watermark_high = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
 333         ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX;
 334         ldlm_last_reclaim_time = cfs_time_current();
 335
 336         return percpu_counter_init(&ldlm_granted_total, 0);
 337 }
 338
 339 void ldlm_reclaim_cleanup(void)
 340 {
 341         percpu_counter_destroy(&ldlm_granted_total);
 342 }
 343
 344 #else /* HAVE_SERVER_SUPPORT */
 345
 346 bool ldlm_reclaim_full(void)
 347 {
 348         return false;
 349 }
 350
 351 void ldlm_reclaim_add(struct ldlm_lock *lock)
 352 {
 353 }
 354
 355 void ldlm_reclaim_del(struct ldlm_lock *lock)
 356 {
 357 }
 358
 359 int ldlm_reclaim_setup(void)
 360 {
 361         return 0;
 362 }
 363
 364 void ldlm_reclaim_cleanup(void)
 365 {
 366 }
 367
 368 #endif /* HAVE_SERVER_SUPPORT */