/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 only, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is included * in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* * Copyright (c) 2015, Intel Corporation. * Use is subject to license terms. * * Author: Niu Yawei */ #define DEBUG_SUBSYSTEM S_LDLM #include #include #include #include "ldlm_internal.h" /* * To avoid ldlm lock exhausting server memory, two global parameters: * ldlm_reclaim_threshold & ldlm_lock_limit are used for reclaiming * granted locks and rejecting incoming enqueue requests defensively. * * ldlm_reclaim_threshold: When the amount of granted locks reaching this * threshold, server start to revoke locks gradually. * * ldlm_lock_limit: When the amount of granted locks reaching this * threshold, server will return -EINPROGRESS to any incoming enqueue * request until the lock count is shrunk below the threshold again. * * ldlm_reclaim_threshold & ldlm_lock_limit is set to 20% & 30% of the * total memory by default. It is tunable via proc entry, when it's set * to 0, the feature is disabled. */ #ifdef HAVE_SERVER_SUPPORT /* Lock count is stored in ldlm_reclaim_threshold & ldlm_lock_limit */ __u64 ldlm_reclaim_threshold; __u64 ldlm_lock_limit; /* Represents ldlm_reclaim_threshold & ldlm_lock_limit in MB, used for * proc interface. */ __u64 ldlm_reclaim_threshold_mb; __u64 ldlm_lock_limit_mb; struct percpu_counter ldlm_granted_total; static atomic_t ldlm_nr_reclaimer; static s64 ldlm_last_reclaim_age_ns; static ktime_t ldlm_last_reclaim_time; struct ldlm_reclaim_cb_data { struct list_head rcd_rpc_list; int rcd_added; int rcd_total; int rcd_cursor; int rcd_start; bool rcd_skip; s64 rcd_age_ns; struct cfs_hash_bd *rcd_prev_bd; }; static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock) { struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); /* FLOCK & PLAIN lock are not reclaimable. FLOCK is * explicitly controlled by application, PLAIN lock * is used by quota global lock and config lock. */ if (ns->ns_client == LDLM_NAMESPACE_SERVER && (lock->l_resource->lr_type == LDLM_IBITS || lock->l_resource->lr_type == LDLM_EXTENT)) return true; return false; } /** * Callback function for revoking locks from certain resource. * * \param [in] hs ns_rs_hash * \param [in] bd current bucket of ns_rsh_hash * \param [in] hnode hnode of the resource * \param [in] arg opaque data * * \retval 0 continue the scan * \retval 1 stop the iteration */ static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd, struct hlist_node *hnode, void *arg) { struct ldlm_resource *res; struct ldlm_reclaim_cb_data *data; struct ldlm_lock *lock; struct ldlm_ns_bucket *nsb; int rc = 0; data = (struct ldlm_reclaim_cb_data *)arg; LASSERTF(data->rcd_added < data->rcd_total, "added:%d >= total:%d\n", data->rcd_added, data->rcd_total); nsb = cfs_hash_bd_extra_get(hs, bd); res = cfs_hash_object(hs, hnode); if (data->rcd_prev_bd != bd) { if (data->rcd_prev_bd != NULL) ldlm_res_to_ns(res)->ns_reclaim_start++; data->rcd_prev_bd = bd; data->rcd_cursor = 0; data->rcd_start = nsb->nsb_reclaim_start % cfs_hash_bd_count_get(bd); } if (data->rcd_skip && data->rcd_cursor < data->rcd_start) { data->rcd_cursor++; return 0; } nsb->nsb_reclaim_start++; lock_res(res); list_for_each_entry(lock, &res->lr_granted, l_res_link) { if (!ldlm_lock_reclaimable(lock)) continue; if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW) && ktime_before(ktime_get(), ktime_add_ns(lock->l_last_used, data->rcd_age_ns))) continue; if (!ldlm_is_ast_sent(lock)) { ldlm_set_ast_sent(lock); LASSERT(list_empty(&lock->l_rk_ast)); list_add(&lock->l_rk_ast, &data->rcd_rpc_list); LDLM_LOCK_GET(lock); if (++data->rcd_added == data->rcd_total) { rc = 1; /* stop the iteration */ break; } } } unlock_res(res); return rc; } /** * Revoke locks from the resources of a namespace in a roundrobin * manner. * * \param[in] ns namespace to do the lock revoke on * \param[in] count count of lock to be revoked * \param[in] age only revoke locks older than the 'age' * \param[in] skip scan from the first lock on resource if the * 'skip' is false, otherwise, continue scan * from the last scanned position * \param[out] count count of lock still to be revoked */ static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count, s64 age_ns, bool skip) { struct ldlm_reclaim_cb_data data; int idx, type, start; ENTRY; LASSERT(*count != 0); if (ns->ns_obd) { type = server_name2index(ns->ns_obd->obd_name, &idx, NULL); if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST) { EXIT; return; } } if (atomic_read(&ns->ns_bref) == 0) { EXIT; return; } INIT_LIST_HEAD(&data.rcd_rpc_list); data.rcd_added = 0; data.rcd_total = *count; data.rcd_age_ns = age_ns; data.rcd_skip = skip; data.rcd_prev_bd = NULL; start = ns->ns_reclaim_start % CFS_HASH_NBKT(ns->ns_rs_hash); cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_reclaim_lock_cb, &data, start); CDEBUG(D_DLMTRACE, "NS(%s): %d locks to be reclaimed, found %d/%d " "locks.\n", ldlm_ns_name(ns), *count, data.rcd_added, data.rcd_total); LASSERTF(*count >= data.rcd_added, "count:%d, added:%d\n", *count, data.rcd_added); ldlm_run_ast_work(ns, &data.rcd_rpc_list, LDLM_WORK_REVOKE_AST); *count -= data.rcd_added; EXIT; } #define LDLM_RECLAIM_BATCH 512 #define LDLM_RECLAIM_AGE_MIN (300 * NSEC_PER_SEC) #define LDLM_RECLAIM_AGE_MAX (LDLM_DEFAULT_MAX_ALIVE * NSEC_PER_SEC * 3 / 4) static inline s64 ldlm_reclaim_age(void) { s64 age_ns = ldlm_last_reclaim_age_ns; ktime_t now = ktime_get(); ktime_t diff; diff = ktime_sub(now, ldlm_last_reclaim_time); age_ns += ktime_to_ns(diff); if (age_ns > LDLM_RECLAIM_AGE_MAX) age_ns = LDLM_RECLAIM_AGE_MAX; else if (age_ns < (LDLM_RECLAIM_AGE_MIN * 2)) age_ns = LDLM_RECLAIM_AGE_MIN; return age_ns; } /** * Revoke certain amount of locks from all the server namespaces * in a roundrobin manner. Lock age is used to avoid reclaim on * the non-aged locks. */ static void ldlm_reclaim_ns(void) { struct ldlm_namespace *ns; int count = LDLM_RECLAIM_BATCH; int ns_nr, nr_processed; enum ldlm_side ns_cli = LDLM_NAMESPACE_SERVER; s64 age_ns; bool skip = true; ENTRY; if (!atomic_add_unless(&ldlm_nr_reclaimer, 1, 1)) { EXIT; return; } age_ns = ldlm_reclaim_age(); again: nr_processed = 0; ns_nr = ldlm_namespace_nr_read(ns_cli); while (count > 0 && nr_processed < ns_nr) { mutex_lock(ldlm_namespace_lock(ns_cli)); if (list_empty(ldlm_namespace_list(ns_cli))) { mutex_unlock(ldlm_namespace_lock(ns_cli)); goto out; } ns = ldlm_namespace_first_locked(ns_cli); ldlm_namespace_move_to_active_locked(ns, ns_cli); mutex_unlock(ldlm_namespace_lock(ns_cli)); ldlm_reclaim_res(ns, &count, age_ns, skip); ldlm_namespace_put(ns); nr_processed++; } if (count > 0 && age_ns > LDLM_RECLAIM_AGE_MIN) { age_ns >>= 1; if (age_ns < (LDLM_RECLAIM_AGE_MIN * 2)) age_ns = LDLM_RECLAIM_AGE_MIN; skip = false; goto again; } ldlm_last_reclaim_age_ns = age_ns; ldlm_last_reclaim_time = ktime_get(); out: atomic_add_unless(&ldlm_nr_reclaimer, -1, 0); EXIT; } void ldlm_reclaim_add(struct ldlm_lock *lock) { if (!ldlm_lock_reclaimable(lock)) return; percpu_counter_add(&ldlm_granted_total, 1); lock->l_last_used = ktime_get(); } void ldlm_reclaim_del(struct ldlm_lock *lock) { if (!ldlm_lock_reclaimable(lock)) return; percpu_counter_sub(&ldlm_granted_total, 1); } /** * Check on the total granted locks: return true if it reaches the * high watermark (ldlm_lock_limit), otherwise return false; It also * triggers lock reclaim if the low watermark (ldlm_reclaim_threshold) * is reached. * * \retval true high watermark reached. * \retval false high watermark not reached. */ bool ldlm_reclaim_full(void) { __u64 high = ldlm_lock_limit; __u64 low = ldlm_reclaim_threshold; if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW)) low = cfs_fail_val; if (low != 0 && percpu_counter_sum_positive(&ldlm_granted_total) > low) ldlm_reclaim_ns(); if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH)) high = cfs_fail_val; if (high != 0 && percpu_counter_sum_positive(&ldlm_granted_total) > high) return true; return false; } static inline __u64 ldlm_ratio2locknr(int ratio) { __u64 locknr; locknr = ((__u64)NUM_CACHEPAGES << PAGE_SHIFT) * ratio; do_div(locknr, 100 * sizeof(struct ldlm_lock)); return locknr; } static inline __u64 ldlm_locknr2mb(__u64 locknr) { return (locknr * sizeof(struct ldlm_lock) + 512 * 1024) >> 20; } #define LDLM_WM_RATIO_LOW_DEFAULT 20 #define LDLM_WM_RATIO_HIGH_DEFAULT 30 int ldlm_reclaim_setup(void) { atomic_set(&ldlm_nr_reclaimer, 0); ldlm_reclaim_threshold = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT); ldlm_reclaim_threshold_mb = ldlm_locknr2mb(ldlm_reclaim_threshold); ldlm_lock_limit = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT); ldlm_lock_limit_mb = ldlm_locknr2mb(ldlm_lock_limit); ldlm_last_reclaim_age_ns = LDLM_RECLAIM_AGE_MAX; ldlm_last_reclaim_time = ktime_get(); #ifdef HAVE_PERCPU_COUNTER_INIT_GFP_FLAG return percpu_counter_init(&ldlm_granted_total, 0, GFP_KERNEL); #else return percpu_counter_init(&ldlm_granted_total, 0); #endif } void ldlm_reclaim_cleanup(void) { percpu_counter_destroy(&ldlm_granted_total); } #else /* HAVE_SERVER_SUPPORT */ bool ldlm_reclaim_full(void) { return false; } void ldlm_reclaim_add(struct ldlm_lock *lock) { } void ldlm_reclaim_del(struct ldlm_lock *lock) { } int ldlm_reclaim_setup(void) { return 0; } void ldlm_reclaim_cleanup(void) { } #endif /* HAVE_SERVER_SUPPORT */