lustre/ldlm/ldlm_reclaim.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.gnu.org/licenses/gpl-2.0.html
  19  *
  20  * GPL HEADER END
  21  */
  22 /*
  23  * Copyright (c) 2015, Intel Corporation.
  24  * Use is subject to license terms.
  25  *
  26  * Author: Niu    Yawei    <yawei.niu@intel.com>
  27  */
  28
  29 #define DEBUG_SUBSYSTEM S_LDLM
  30
  31 #include <linux/kthread.h>
  32 #include <lustre_dlm.h>
  33 #include <obd_class.h>
  34 #include "ldlm_internal.h"
  35
  36 /*
  37  * To avoid ldlm lock exhausting server memory, two global parameters:
  38  * ldlm_reclaim_threshold & ldlm_lock_limit are used for reclaiming
  39  * granted locks and rejecting incoming enqueue requests defensively.
  40  *
  41  * ldlm_reclaim_threshold: When the amount of granted locks reaching this
  42  * threshold, server start to revoke locks gradually.
  43  *
  44  * ldlm_lock_limit: When the amount of granted locks reaching this
  45  * threshold, server will return -EINPROGRESS to any incoming enqueue
  46  * request until the lock count is shrunk below the threshold again.
  47  *
  48  * ldlm_reclaim_threshold & ldlm_lock_limit is set to 20% & 30% of the
  49  * total memory by default. It is tunable via proc entry, when it's set
  50  * to 0, the feature is disabled.
  51  */
  52
  53 #ifdef HAVE_SERVER_SUPPORT
  54
  55 /* Lock count is stored in ldlm_reclaim_threshold & ldlm_lock_limit */
  56 __u64 ldlm_reclaim_threshold;
  57 __u64 ldlm_lock_limit;
  58
  59 /* Represents ldlm_reclaim_threshold & ldlm_lock_limit in MB, used for
  60  * proc interface. */
  61 __u64 ldlm_reclaim_threshold_mb;
  62 __u64 ldlm_lock_limit_mb;
  63
  64 struct percpu_counter           ldlm_granted_total;
  65 static atomic_t                 ldlm_nr_reclaimer;
  66 static s64                      ldlm_last_reclaim_age_ns;
  67 static ktime_t                  ldlm_last_reclaim_time;
  68
  69 struct ldlm_reclaim_cb_data {
  70         struct list_head         rcd_rpc_list;
  71         int                      rcd_added;
  72         int                      rcd_total;
  73         int                      rcd_cursor;
  74         int                      rcd_start;
  75         bool                     rcd_skip;
  76         s64                      rcd_age_ns;
  77         struct cfs_hash_bd      *rcd_prev_bd;
  78 };
  79
  80 static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock)
  81 {
  82         struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
  83
  84         /* FLOCK & PLAIN lock are not reclaimable. FLOCK is
  85          * explicitly controlled by application, PLAIN lock
  86          * is used by quota global lock and config lock.
  87          */
  88         if (ns->ns_client == LDLM_NAMESPACE_SERVER &&
  89             (lock->l_resource->lr_type == LDLM_IBITS ||
  90              lock->l_resource->lr_type == LDLM_EXTENT))
  91                 return true;
  92         return false;
  93 }
  94
  95 /**
  96  * Callback function for revoking locks from certain resource.
  97  *
  98  * \param [in] hs       ns_rs_hash
  99  * \param [in] bd       current bucket of ns_rsh_hash
 100  * \param [in] hnode    hnode of the resource
 101  * \param [in] arg      opaque data
 102  *
 103  * \retval 0            continue the scan
 104  * \retval 1            stop the iteration
 105  */
 106 static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 107                                 struct hlist_node *hnode, void *arg)
 108
 109 {
 110         struct ldlm_resource            *res;
 111         struct ldlm_reclaim_cb_data     *data;
 112         struct ldlm_lock                *lock;
 113         struct ldlm_ns_bucket           *nsb;
 114         int                              rc = 0;
 115
 116         data = (struct ldlm_reclaim_cb_data *)arg;
 117
 118         LASSERTF(data->rcd_added < data->rcd_total, "added:%d >= total:%d\n",
 119                  data->rcd_added, data->rcd_total);
 120
 121         nsb = cfs_hash_bd_extra_get(hs, bd);
 122         res = cfs_hash_object(hs, hnode);
 123
 124         if (data->rcd_prev_bd != bd) {
 125                 if (data->rcd_prev_bd != NULL)
 126                         ldlm_res_to_ns(res)->ns_reclaim_start++;
 127                 data->rcd_prev_bd = bd;
 128                 data->rcd_cursor = 0;
 129                 data->rcd_start = nsb->nsb_reclaim_start %
 130                                   cfs_hash_bd_count_get(bd);
 131         }
 132
 133         if (data->rcd_skip && data->rcd_cursor < data->rcd_start) {
 134                 data->rcd_cursor++;
 135                 return 0;
 136         }
 137
 138         nsb->nsb_reclaim_start++;
 139
 140         lock_res(res);
 141         list_for_each_entry(lock, &res->lr_granted, l_res_link) {
 142                 if (!ldlm_lock_reclaimable(lock))
 143                         continue;
 144
 145                 if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW) &&
 146                     ktime_before(ktime_get(),
 147                                  ktime_add_ns(lock->l_last_used,
 148                                               data->rcd_age_ns)))
 149                         continue;
 150
 151                 if (!ldlm_is_ast_sent(lock)) {
 152                         ldlm_set_ast_sent(lock);
 153                         LASSERT(list_empty(&lock->l_rk_ast));
 154                         list_add(&lock->l_rk_ast, &data->rcd_rpc_list);
 155                         LDLM_LOCK_GET(lock);
 156                         if (++data->rcd_added == data->rcd_total) {
 157                                 rc = 1; /* stop the iteration */
 158                                 break;
 159                         }
 160                 }
 161         }
 162         unlock_res(res);
 163
 164         return rc;
 165 }
 166
 167 /**
 168  * Revoke locks from the resources of a namespace in a roundrobin
 169  * manner.
 170  *
 171  * \param[in] ns        namespace to do the lock revoke on
 172  * \param[in] count     count of lock to be revoked
 173  * \param[in] age       only revoke locks older than the 'age'
 174  * \param[in] skip      scan from the first lock on resource if the
 175  *                      'skip' is false, otherwise, continue scan
 176  *                      from the last scanned position
 177  * \param[out] count    count of lock still to be revoked
 178  */
 179 static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
 180                              s64 age_ns, bool skip)
 181 {
 182         struct ldlm_reclaim_cb_data     data;
 183         int                             idx, type, start;
 184         ENTRY;
 185
 186         LASSERT(*count != 0);
 187
 188         if (ns->ns_obd) {
 189                 type = server_name2index(ns->ns_obd->obd_name, &idx, NULL);
 190                 if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST) {
 191                         EXIT;
 192                         return;
 193                 }
 194         }
 195
 196         if (atomic_read(&ns->ns_bref) == 0) {
 197                 EXIT;
 198                 return;
 199         }
 200
 201         INIT_LIST_HEAD(&data.rcd_rpc_list);
 202         data.rcd_added = 0;
 203         data.rcd_total = *count;
 204         data.rcd_age_ns = age_ns;
 205         data.rcd_skip = skip;
 206         data.rcd_prev_bd = NULL;
 207         start = ns->ns_reclaim_start % CFS_HASH_NBKT(ns->ns_rs_hash);
 208
 209         cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_reclaim_lock_cb, &data,
 210                                  start);
 211
 212         CDEBUG(D_DLMTRACE, "NS(%s): %d locks to be reclaimed, found %d/%d "
 213                "locks.\n", ldlm_ns_name(ns), *count, data.rcd_added,
 214                data.rcd_total);
 215
 216         LASSERTF(*count >= data.rcd_added, "count:%d, added:%d\n", *count,
 217                  data.rcd_added);
 218
 219         ldlm_run_ast_work(ns, &data.rcd_rpc_list, LDLM_WORK_REVOKE_AST);
 220         *count -= data.rcd_added;
 221         EXIT;
 222 }
 223
 224 #define LDLM_RECLAIM_BATCH      512
 225 #define LDLM_RECLAIM_AGE_MIN    (300 * NSEC_PER_SEC)
 226 #define LDLM_RECLAIM_AGE_MAX    (LDLM_DEFAULT_MAX_ALIVE * NSEC_PER_SEC * 3 / 4)
 227
 228 static inline s64 ldlm_reclaim_age(void)
 229 {
 230         s64 age_ns = ldlm_last_reclaim_age_ns;
 231         ktime_t now = ktime_get();
 232         ktime_t diff;
 233
 234         diff = ktime_sub(now, ldlm_last_reclaim_time);
 235         age_ns += ktime_to_ns(diff);
 236         if (age_ns > LDLM_RECLAIM_AGE_MAX)
 237                 age_ns = LDLM_RECLAIM_AGE_MAX;
 238         else if (age_ns < (LDLM_RECLAIM_AGE_MIN * 2))
 239                 age_ns = LDLM_RECLAIM_AGE_MIN;
 240         return age_ns;
 241 }
 242
 243 /**
 244  * Revoke certain amount of locks from all the server namespaces
 245  * in a roundrobin manner. Lock age is used to avoid reclaim on
 246  * the non-aged locks.
 247  */
 248 static void ldlm_reclaim_ns(void)
 249 {
 250         struct ldlm_namespace   *ns;
 251         int                      count = LDLM_RECLAIM_BATCH;
 252         int                      ns_nr, nr_processed;
 253         enum ldlm_side           ns_cli = LDLM_NAMESPACE_SERVER;
 254         s64 age_ns;
 255         bool                     skip = true;
 256         ENTRY;
 257
 258         if (!atomic_add_unless(&ldlm_nr_reclaimer, 1, 1)) {
 259                 EXIT;
 260                 return;
 261         }
 262
 263         age_ns = ldlm_reclaim_age();
 264 again:
 265         nr_processed = 0;
 266         ns_nr = ldlm_namespace_nr_read(ns_cli);
 267         while (count > 0 && nr_processed < ns_nr) {
 268                 mutex_lock(ldlm_namespace_lock(ns_cli));
 269
 270                 if (list_empty(ldlm_namespace_list(ns_cli))) {
 271                         mutex_unlock(ldlm_namespace_lock(ns_cli));
 272                         goto out;
 273                 }
 274
 275                 ns = ldlm_namespace_first_locked(ns_cli);
 276                 ldlm_namespace_move_to_active_locked(ns, ns_cli);
 277                 mutex_unlock(ldlm_namespace_lock(ns_cli));
 278
 279                 ldlm_reclaim_res(ns, &count, age_ns, skip);
 280                 ldlm_namespace_put(ns);
 281                 nr_processed++;
 282         }
 283
 284         if (count > 0 && age_ns > LDLM_RECLAIM_AGE_MIN) {
 285                 age_ns >>= 1;
 286                 if (age_ns < (LDLM_RECLAIM_AGE_MIN * 2))
 287                         age_ns = LDLM_RECLAIM_AGE_MIN;
 288                 skip = false;
 289                 goto again;
 290         }
 291
 292         ldlm_last_reclaim_age_ns = age_ns;
 293         ldlm_last_reclaim_time = ktime_get();
 294 out:
 295         atomic_add_unless(&ldlm_nr_reclaimer, -1, 0);
 296         EXIT;
 297 }
 298
 299 void ldlm_reclaim_add(struct ldlm_lock *lock)
 300 {
 301         if (!ldlm_lock_reclaimable(lock))
 302                 return;
 303         percpu_counter_add(&ldlm_granted_total, 1);
 304         lock->l_last_used = ktime_get();
 305 }
 306
 307 void ldlm_reclaim_del(struct ldlm_lock *lock)
 308 {
 309         if (!ldlm_lock_reclaimable(lock))
 310                 return;
 311         percpu_counter_sub(&ldlm_granted_total, 1);
 312 }
 313
 314 /**
 315  * Check on the total granted locks: return true if it reaches the
 316  * high watermark (ldlm_lock_limit), otherwise return false; It also
 317  * triggers lock reclaim if the low watermark (ldlm_reclaim_threshold)
 318  * is reached.
 319  *
 320  * \retval true         high watermark reached.
 321  * \retval false        high watermark not reached.
 322  */
 323 bool ldlm_reclaim_full(void)
 324 {
 325         __u64 high = ldlm_lock_limit;
 326         __u64 low = ldlm_reclaim_threshold;
 327
 328         if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
 329                 low = cfs_fail_val;
 330
 331         if (low != 0 &&
 332             percpu_counter_sum_positive(&ldlm_granted_total) > low)
 333                 ldlm_reclaim_ns();
 334
 335         if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
 336                 high = cfs_fail_val;
 337
 338         if (high != 0 &&
 339             percpu_counter_sum_positive(&ldlm_granted_total) > high)
 340                 return true;
 341
 342         return false;
 343 }
 344
 345 static inline __u64 ldlm_ratio2locknr(int ratio)
 346 {
 347         __u64 locknr;
 348
 349         locknr = ((__u64)NUM_CACHEPAGES << PAGE_SHIFT) * ratio;
 350         do_div(locknr, 100 * sizeof(struct ldlm_lock));
 351
 352         return locknr;
 353 }
 354
 355 static inline __u64 ldlm_locknr2mb(__u64 locknr)
 356 {
 357         return (locknr * sizeof(struct ldlm_lock) + 512 * 1024) >> 20;
 358 }
 359
 360 #define LDLM_WM_RATIO_LOW_DEFAULT       20
 361 #define LDLM_WM_RATIO_HIGH_DEFAULT      30
 362
 363 int ldlm_reclaim_setup(void)
 364 {
 365         atomic_set(&ldlm_nr_reclaimer, 0);
 366
 367         ldlm_reclaim_threshold = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
 368         ldlm_reclaim_threshold_mb = ldlm_locknr2mb(ldlm_reclaim_threshold);
 369         ldlm_lock_limit = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
 370         ldlm_lock_limit_mb = ldlm_locknr2mb(ldlm_lock_limit);
 371
 372         ldlm_last_reclaim_age_ns = LDLM_RECLAIM_AGE_MAX;
 373         ldlm_last_reclaim_time = ktime_get();
 374
 375 #ifdef HAVE_PERCPU_COUNTER_INIT_GFP_FLAG
 376         return percpu_counter_init(&ldlm_granted_total, 0, GFP_KERNEL);
 377 #else
 378         return percpu_counter_init(&ldlm_granted_total, 0);
 379 #endif
 380 }
 381
 382 void ldlm_reclaim_cleanup(void)
 383 {
 384         percpu_counter_destroy(&ldlm_granted_total);
 385 }
 386
 387 #else /* HAVE_SERVER_SUPPORT */
 388
 389 bool ldlm_reclaim_full(void)
 390 {
 391         return false;
 392 }
 393
 394 void ldlm_reclaim_add(struct ldlm_lock *lock)
 395 {
 396 }
 397
 398 void ldlm_reclaim_del(struct ldlm_lock *lock)
 399 {
 400 }
 401
 402 int ldlm_reclaim_setup(void)
 403 {
 404         return 0;
 405 }
 406
 407 void ldlm_reclaim_cleanup(void)
 408 {
 409 }
 410
 411 #endif /* HAVE_SERVER_SUPPORT */