cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t, void *data);
int
cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t,
- void *data);
+ void *data, int start);
int
cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t,
void *data);
*/
static int
cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
- void *data)
+ void *data, int start)
{
struct hlist_node *hnode;
struct hlist_node *tmp;
__u32 version;
int count = 0;
int stop_on_change;
- int rc;
- int i;
+ int rc = 0;
+ int i, end = -1;
ENTRY;
stop_on_change = cfs_hash_with_rehash_key(hs) ||
!cfs_hash_with_no_itemref(hs) ||
hs->hs_ops->hs_put_locked == NULL;
cfs_hash_lock(hs, 0);
+again:
LASSERT(!cfs_hash_is_rehashing(hs));
cfs_hash_for_each_bucket(hs, &bd, i) {
struct hlist_head *hhead;
+ if (i < start)
+ continue;
+ else if (end > 0 && i >= end)
+ break;
+
cfs_hash_bd_lock(hs, &bd, 0);
version = cfs_hash_bd_version_get(&bd);
if (rc) /* callback wants to break iteration */
break;
}
- cfs_hash_unlock(hs, 0);
- return count;
+ if (start > 0 && rc != 0) {
+ end = start;
+ start = 0;
+ goto again;
+ }
+
+ cfs_hash_unlock(hs, 0);
+ return count;
}
int
cfs_hash_for_each_nolock(struct cfs_hash *hs,
- cfs_hash_for_each_cb_t func, void *data)
+ cfs_hash_for_each_cb_t func, void *data, int start)
{
ENTRY;
hs->hs_ops->hs_put_locked == NULL))
RETURN(-EOPNOTSUPP);
- cfs_hash_for_each_enter(hs);
- cfs_hash_for_each_relax(hs, func, data);
- cfs_hash_for_each_exit(hs);
+ cfs_hash_for_each_enter(hs);
+ cfs_hash_for_each_relax(hs, func, data, start);
+ cfs_hash_for_each_exit(hs);
- RETURN(0);
+ RETURN(0);
}
EXPORT_SYMBOL(cfs_hash_for_each_nolock);
hs->hs_ops->hs_put_locked == NULL))
return -EOPNOTSUPP;
- cfs_hash_for_each_enter(hs);
- while (cfs_hash_for_each_relax(hs, func, data)) {
- CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
- hs->hs_name, i++);
- }
- cfs_hash_for_each_exit(hs);
- RETURN(0);
+ cfs_hash_for_each_enter(hs);
+ while (cfs_hash_for_each_relax(hs, func, data, 0)) {
+ CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
+ hs->hs_name, i++);
+ }
+ cfs_hash_for_each_exit(hs);
+ RETURN(0);
}
EXPORT_SYMBOL(cfs_hash_for_each_empty);
* fact the network or overall system load is at fault
*/
struct adaptive_timeout nsb_at_estimate;
+ /**
+ * Which res in the bucket should we start with the reclaim.
+ */
+ int nsb_reclaim_start;
};
enum {
* recalculation of LDLM pool statistics should be skipped.
*/
unsigned ns_stopping:1;
+
+ /**
+ * Which bucket should we start with the lock reclaim.
+ */
+ int ns_reclaim_start;
};
/**
#define OBD_FAIL_LDLM_CP_CB_WAIT3 0x321
#define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322
#define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323
-
#define OBD_FAIL_LDLM_SRV_BL_AST 0x324
#define OBD_FAIL_LDLM_SRV_CP_AST 0x325
#define OBD_FAIL_LDLM_SRV_GL_AST 0x326
+#define OBD_FAIL_LDLM_WATERMARK_LOW 0x327
+#define OBD_FAIL_LDLM_WATERMARK_HIGH 0x328
/* LOCKLESS IO */
#define OBD_FAIL_LDLM_SET_CONTENTION 0x385
EXTRA_DIST = ldlm_extent.c ldlm_flock.c ldlm_internal.h ldlm_lib.c \
ldlm_lock.c ldlm_lockd.c ldlm_plain.c ldlm_request.c \
ldlm_resource.c l_lock.c ldlm_inodebits.c ldlm_pool.c \
- interval_tree.c
+ interval_tree.c ldlm_reclaim.c
void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
ldlm_wire_policy_data_t *wpolicy);
+
+/* ldlm_reclaim.c */
+extern __u64 ldlm_watermark_low;
+extern __u64 ldlm_watermark_high;
+int ldlm_reclaim_setup(void);
+void ldlm_reclaim_cleanup(void);
+void ldlm_reclaim_add(struct ldlm_lock *lock);
+void ldlm_reclaim_del(struct ldlm_lock *lock);
+bool ldlm_reclaim_full(void);
*/
void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
{
- ENTRY;
+ ENTRY;
- if (ns != NULL) {
- cfs_hash_for_each_nolock(ns->ns_rs_hash,
- ldlm_reprocess_res, NULL);
- }
- EXIT;
+ if (ns != NULL) {
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ ldlm_reprocess_res, NULL, 0);
+ }
+ EXIT;
}
/**
flags |= LDLM_FL_RESENT;
GOTO(existing_lock, rc = 0);
}
- }
+ } else {
+ if (ldlm_reclaim_full()) {
+ DEBUG_REQ(D_DLMTRACE, req, "Too many granted locks, "
+ "reject current enqueue request and let the "
+ "client retry later.\n");
+ GOTO(out, rc = -EINPROGRESS);
+ }
+ }
/* The lock's callback data might be set in the policy function */
lock = ldlm_lock_create(ns, &dlm_req->lock_desc.l_resource.lr_name,
CERROR("Failed to initialize LDLM pools: %d\n", rc);
GOTO(out, rc);
}
+
+ rc = ldlm_reclaim_setup();
+ if (rc) {
+ CERROR("Failed to setup reclaim thread: rc = %d\n", rc);
+ GOTO(out, rc);
+ }
RETURN(0);
out:
RETURN(-EBUSY);
}
- ldlm_pools_fini();
+ ldlm_reclaim_cleanup();
+ ldlm_pools_fini();
if (ldlm_state->ldlm_bl_pool != NULL) {
struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
* cancelled, instead special kind of lock is used to drop them.
* also there is no LRU for flock locks, so no point in tracking
* them anyway.
+ *
+ * PLAIN locks are used by config and quota, the quantity is small
+ * and usually they are not in LRU.
*/
- if (lock->l_resource->lr_type == LDLM_FLOCK)
+ if (lock->l_resource->lr_type == LDLM_FLOCK ||
+ lock->l_resource->lr_type == LDLM_PLAIN)
return;
+ ldlm_reclaim_add(lock);
+
atomic_inc(&pl->pl_granted);
atomic_inc(&pl->pl_grant_rate);
lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT);
void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
{
/*
- * Filter out FLOCK locks. Read above comment in ldlm_pool_add().
+ * Filter out FLOCK & PLAIN locks. Read above comment in
+ * ldlm_pool_add().
*/
- if (lock->l_resource->lr_type == LDLM_FLOCK)
+ if (lock->l_resource->lr_type == LDLM_FLOCK ||
+ lock->l_resource->lr_type == LDLM_PLAIN)
return;
+ ldlm_reclaim_del(lock);
+
LASSERT(atomic_read(&pl->pl_granted) > 0);
atomic_dec(&pl->pl_granted);
atomic_inc(&pl->pl_cancel_rate);
--- /dev/null
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2015, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: Niu Yawei <yawei.niu@intel.com>
+ */
+
+#define DEBUG_SUBSYSTEM S_LDLM
+
+#include <linux/kthread.h>
+#include <lustre_dlm.h>
+#include <obd_class.h>
+#include "ldlm_internal.h"
+
+/*
+ * To avoid ldlm lock exhausting server memory, two global parameters:
+ * ldlm_watermark_low & ldlm_watermark_high are used for reclaiming
+ * granted locks and rejecting incoming enqueue requests defensively.
+ *
+ * ldlm_watermark_low: When the amount of granted locks reaching this
+ * threshold, server start to revoke locks gradually.
+ *
+ * ldlm_watermark_high: When the amount of granted locks reaching this
+ * threshold, server will return -EINPROGRESS to any incoming enqueue
+ * request until the lock count is shrunk below the threshold again.
+ *
+ * ldlm_watermark_low & ldlm_watermark_high is set to 20% & 30% of the
+ * total memory by default. It is tunable via proc entry, when it's set
+ * to 0, the feature is disabled.
+ */
+
+/*
+ * FIXME:
+ *
+ * In current implementation, server identifies which locks should be
+ * revoked by choosing locks from namespace/resource in a roundrobin
+ * manner, which isn't optimal. The ideal way should be server notifies
+ * clients to cancel locks voluntarily, because only client knows exactly
+ * when the lock is last used.
+ *
+ * However how to notify client immediately is a problem, one idea
+ * is to leverage the glimplse callbacks on some artificial global
+ * lock (like quota global lock does), but that requires protocol
+ * changes, let's fix it in future long-term solution.
+ */
+
+__u64 ldlm_watermark_low;
+__u64 ldlm_watermark_high;
+
+#ifdef HAVE_SERVER_SUPPORT
+
+static struct percpu_counter ldlm_granted_total;
+static atomic_t ldlm_nr_reclaimer;
+static cfs_duration_t ldlm_last_reclaim_age;
+static cfs_time_t ldlm_last_reclaim_time;
+
+struct ldlm_reclaim_cb_data {
+ struct list_head rcd_rpc_list;
+ int rcd_added;
+ int rcd_total;
+ int rcd_cursor;
+ int rcd_start;
+ bool rcd_skip;
+ cfs_duration_t rcd_age;
+ struct cfs_hash_bd *rcd_prev_bd;
+};
+
+static inline bool ldlm_lock_reclaimable(struct ldlm_lock *lock)
+{
+ struct ldlm_namespace *ns = ldlm_lock_to_ns(lock);
+
+ /* FLOCK & PLAIN lock are not reclaimable. FLOCK is
+ * explicitly controlled by application, PLAIN lock
+ * is used by quota global lock and config lock.
+ */
+ if (ns->ns_client == LDLM_NAMESPACE_SERVER &&
+ (lock->l_resource->lr_type == LDLM_IBITS ||
+ lock->l_resource->lr_type == LDLM_EXTENT))
+ return true;
+ return false;
+}
+
+static int ldlm_reclaim_lock_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+ struct hlist_node *hnode, void *arg)
+
+{
+ struct ldlm_resource *res;
+ struct ldlm_reclaim_cb_data *data;
+ struct ldlm_lock *lock;
+ struct ldlm_ns_bucket *nsb;
+ int rc = 0;
+
+ data = (struct ldlm_reclaim_cb_data *)arg;
+
+ LASSERTF(data->rcd_added < data->rcd_total, "added:%d >= total:%d\n",
+ data->rcd_added, data->rcd_total);
+
+ nsb = cfs_hash_bd_extra_get(hs, bd);
+ res = cfs_hash_object(hs, hnode);
+
+ if (data->rcd_prev_bd != bd) {
+ if (data->rcd_prev_bd != NULL)
+ ldlm_res_to_ns(res)->ns_reclaim_start++;
+ data->rcd_prev_bd = bd;
+ data->rcd_cursor = 0;
+ data->rcd_start = nsb->nsb_reclaim_start %
+ cfs_hash_bd_count_get(bd);
+ }
+
+ if (data->rcd_skip && data->rcd_cursor < data->rcd_start) {
+ data->rcd_cursor++;
+ return 0;
+ }
+
+ nsb->nsb_reclaim_start++;
+
+ lock_res(res);
+ list_for_each_entry(lock, &res->lr_granted, l_res_link) {
+ if (!ldlm_lock_reclaimable(lock))
+ continue;
+
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW) &&
+ cfs_time_before(cfs_time_current(),
+ cfs_time_add(lock->l_last_used,
+ data->rcd_age)))
+ continue;
+
+ if (!ldlm_is_ast_sent(lock)) {
+ ldlm_set_ast_sent(lock);
+ LASSERT(list_empty(&lock->l_rk_ast));
+ list_add(&lock->l_rk_ast, &data->rcd_rpc_list);
+ LDLM_LOCK_GET(lock);
+ if (++data->rcd_added == data->rcd_total) {
+ rc = 1; /* stop the iteration */
+ break;
+ }
+ }
+ }
+ unlock_res(res);
+
+ return rc;
+}
+
+static void ldlm_reclaim_res(struct ldlm_namespace *ns, int *count,
+ cfs_duration_t age, bool skip)
+{
+ struct ldlm_reclaim_cb_data data;
+ int idx, type, start;
+ ENTRY;
+
+ LASSERT(*count != 0);
+
+ if (ns->ns_obd) {
+ type = server_name2index(ns->ns_obd->obd_name, &idx, NULL);
+ if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST) {
+ EXIT;
+ return;
+ }
+ }
+
+ if (atomic_read(&ns->ns_bref) == 0) {
+ EXIT;
+ return;
+ }
+
+ INIT_LIST_HEAD(&data.rcd_rpc_list);
+ data.rcd_added = 0;
+ data.rcd_total = *count;
+ data.rcd_age = age;
+ data.rcd_skip = skip;
+ data.rcd_prev_bd = NULL;
+ start = ns->ns_reclaim_start % CFS_HASH_NBKT(ns->ns_rs_hash);
+
+ cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_reclaim_lock_cb, &data,
+ start);
+
+ CDEBUG(D_DLMTRACE, "NS(%s): %d locks to be reclaimed, found %d/%d "
+ "locks.\n", ldlm_ns_name(ns), *count, data.rcd_added,
+ data.rcd_total);
+
+ LASSERTF(*count >= data.rcd_added, "count:%d, added:%d\n", *count,
+ data.rcd_added);
+
+ ldlm_run_ast_work(ns, &data.rcd_rpc_list, LDLM_WORK_REVOKE_AST);
+ *count -= data.rcd_added;
+ EXIT;
+}
+
+#define LDLM_RECLAIM_BATCH 512
+#define LDLM_RECLAIM_AGE_MIN cfs_time_seconds(300)
+#define LDLM_RECLAIM_AGE_MAX (LDLM_DEFAULT_MAX_ALIVE * 3 / 4)
+
+static inline cfs_duration_t ldlm_reclaim_age(void)
+{
+ cfs_duration_t age;
+
+ age = ldlm_last_reclaim_age +
+ cfs_time_sub(cfs_time_current(), ldlm_last_reclaim_time);
+ if (age > LDLM_RECLAIM_AGE_MAX)
+ age = LDLM_RECLAIM_AGE_MAX;
+ else if (age < (LDLM_RECLAIM_AGE_MIN * 2))
+ age = LDLM_RECLAIM_AGE_MIN;
+ return age;
+}
+
+static void ldlm_reclaim_ns(void)
+{
+ struct ldlm_namespace *ns;
+ int count = LDLM_RECLAIM_BATCH;
+ int ns_nr, nr_processed;
+ ldlm_side_t ns_cli = LDLM_NAMESPACE_SERVER;
+ cfs_duration_t age;
+ bool skip = true;
+ ENTRY;
+
+ if (!atomic_add_unless(&ldlm_nr_reclaimer, 1, 1)) {
+ EXIT;
+ return;
+ }
+
+ age = ldlm_reclaim_age();
+again:
+ nr_processed = 0;
+ ns_nr = ldlm_namespace_nr_read(ns_cli);
+ while (count > 0 && nr_processed < ns_nr) {
+ mutex_lock(ldlm_namespace_lock(ns_cli));
+
+ if (list_empty(ldlm_namespace_list(ns_cli))) {
+ mutex_unlock(ldlm_namespace_lock(ns_cli));
+ goto out;
+ }
+
+ ns = ldlm_namespace_first_locked(ns_cli);
+ ldlm_namespace_move_to_active_locked(ns, ns_cli);
+ mutex_unlock(ldlm_namespace_lock(ns_cli));
+
+ ldlm_reclaim_res(ns, &count, age, skip);
+ ldlm_namespace_put(ns);
+ nr_processed++;
+ }
+
+ if (count > 0 && age > LDLM_RECLAIM_AGE_MIN) {
+ age >>= 1;
+ if (age < (LDLM_RECLAIM_AGE_MIN * 2))
+ age = LDLM_RECLAIM_AGE_MIN;
+ skip = false;
+ goto again;
+ }
+
+ ldlm_last_reclaim_age = age;
+ ldlm_last_reclaim_time = cfs_time_current();
+out:
+ atomic_add_unless(&ldlm_nr_reclaimer, -1, 0);
+ EXIT;
+}
+
+void ldlm_reclaim_add(struct ldlm_lock *lock)
+{
+ if (!ldlm_lock_reclaimable(lock))
+ return;
+ percpu_counter_add(&ldlm_granted_total, 1);
+ lock->l_last_used = cfs_time_current();
+}
+
+void ldlm_reclaim_del(struct ldlm_lock *lock)
+{
+ if (!ldlm_lock_reclaimable(lock))
+ return;
+ percpu_counter_sub(&ldlm_granted_total, 1);
+}
+
+bool ldlm_reclaim_full(void)
+{
+ __u64 high = ldlm_watermark_high;
+ __u64 low = ldlm_watermark_low;
+
+ if (low != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_LOW))
+ low = cfs_fail_val;
+
+ if (low != 0 &&
+ percpu_counter_read_positive(&ldlm_granted_total) > low)
+ ldlm_reclaim_ns();
+
+ if (high != 0 && OBD_FAIL_CHECK(OBD_FAIL_LDLM_WATERMARK_HIGH))
+ high = cfs_fail_val;
+
+ if (high != 0 &&
+ percpu_counter_read_positive(&ldlm_granted_total) > high)
+ return true;
+
+ return false;
+}
+
+static inline __u64 ldlm_ratio2locknr(int ratio)
+{
+ __u64 locknr;
+
+ locknr = ((__u64)NUM_CACHEPAGES << PAGE_CACHE_SHIFT) * ratio;
+ do_div(locknr, 100 * sizeof(struct ldlm_lock));
+
+ return locknr;
+}
+
+#define LDLM_WM_RATIO_LOW_DEFAULT 20
+#define LDLM_WM_RATIO_HIGH_DEFAULT 30
+
+int ldlm_reclaim_setup(void)
+{
+ atomic_set(&ldlm_nr_reclaimer, 0);
+ ldlm_watermark_low = ldlm_ratio2locknr(LDLM_WM_RATIO_LOW_DEFAULT);
+ ldlm_watermark_high = ldlm_ratio2locknr(LDLM_WM_RATIO_HIGH_DEFAULT);
+ ldlm_last_reclaim_age = LDLM_RECLAIM_AGE_MAX;
+ ldlm_last_reclaim_time = cfs_time_current();
+
+ return percpu_counter_init(&ldlm_granted_total, 0);
+}
+
+void ldlm_reclaim_cleanup(void)
+{
+ percpu_counter_destroy(&ldlm_granted_total);
+}
+
+#else /* HAVE_SERVER_SUPPORT */
+
+bool ldlm_reclaim_full(void)
+{
+ return false;
+}
+
+void ldlm_reclaim_add(struct ldlm_lock *lock)
+{
+}
+
+void ldlm_reclaim_del(struct ldlm_lock *lock)
+{
+}
+
+int ldlm_reclaim_setup(void)
+{
+ return 0;
+}
+
+void ldlm_reclaim_cleanup(void)
+{
+}
+
+#endif /* HAVE_SERVER_SUPPORT */
RETURN(ldlm_cli_cancel_unused_resource(ns, res_id, NULL,
LCK_MINMODE, flags,
opaque));
- } else {
- cfs_hash_for_each_nolock(ns->ns_rs_hash,
- ldlm_cli_hash_cancel_unused, &arg);
- RETURN(ELDLM_OK);
- }
+ } else {
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ ldlm_cli_hash_cancel_unused, &arg, 0);
+ RETURN(ELDLM_OK);
+ }
}
/* Lock iterators. */
{
struct iter_helper_data helper = { .iter = iter, .closure = closure };
- cfs_hash_for_each_nolock(ns->ns_rs_hash,
- ldlm_res_iter_helper, &helper);
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ ldlm_res_iter_helper, &helper, 0);
}
LPROC_SEQ_FOPS_RW_TYPE(ldlm_rw, uint);
LPROC_SEQ_FOPS_RO_TYPE(ldlm, uint);
+/* Lock count is stored in the watermark, and it's display as number of MB
+ * memory consumed by the locks */
+static int seq_watermark_show(struct seq_file *m, void *data)
+{
+ __u64 locknr = *(__u64 *)m->private;
+ return seq_printf(m, LPU64"\n",
+ (locknr * sizeof(struct ldlm_lock)) >> 20);
+}
+
+static ssize_t seq_watermark_write(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *off)
+{
+ __u64 watermark;
+ __u64 *data = ((struct seq_file *)file->private_data)->private;
+ int rc;
+
+ rc = lprocfs_write_frac_u64_helper(buffer, count, &watermark, 1 << 20);
+ if (rc) {
+ CERROR("Failed to set LDLM watermark, rc = %d.\n", rc);
+ return rc;
+ } else if (watermark != 0 && watermark < (1 << 20)) {
+ CERROR("Watermark should be greater than 1MB.\n");
+ return -EINVAL;
+ }
+
+ do_div(watermark, sizeof(struct ldlm_lock));
+ *data = watermark;
+
+ if (ldlm_watermark_low != 0 && ldlm_watermark_high != 0 &&
+ ldlm_watermark_low > ldlm_watermark_high)
+ ldlm_watermark_low = ldlm_watermark_high;
+ return count;
+}
+
+static int seq_watermark_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, seq_watermark_show, PDE_DATA(inode));
+}
+
+static const struct file_operations ldlm_watermark_fops = {
+ .owner = THIS_MODULE,
+ .open = seq_watermark_open,
+ .read = seq_read,
+ .write = seq_watermark_write,
+ .llseek = seq_lseek,
+ .release = lprocfs_single_release,
+};
+
int ldlm_proc_setup(void)
{
int rc;
{ .name = "cancel_unused_locks_before_replay",
.fops = &ldlm_rw_uint_fops,
.data = &ldlm_cancel_unused_locks_before_replay },
+ { .name = "watermark_mb_low",
+ .fops = &ldlm_watermark_fops,
+ .data = &ldlm_watermark_low },
+ { .name = "watermark_mb_high",
+ .fops = &ldlm_watermark_fops,
+ .data = &ldlm_watermark_high },
{ NULL }};
ENTRY;
LASSERT(ldlm_ns_proc_dir == NULL);
nsb = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd);
at_init(&nsb->nsb_at_estimate, ldlm_enqueue_min, 0);
nsb->nsb_namespace = ns;
+ nsb->nsb_reclaim_start = 0;
}
ns->ns_obd = obd;
ns->ns_orig_connect_flags = 0;
ns->ns_connect_flags = 0;
ns->ns_stopping = 0;
+ ns->ns_reclaim_start = 0;
rc = ldlm_namespace_proc_register(ns);
if (rc != 0) {
CERROR("Can't initialize ns proc, rc %d\n", rc);
return ELDLM_OK;
}
- cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean, &flags);
- cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain, NULL);
- return ELDLM_OK;
+ cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean,
+ &flags, 0);
+ cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain,
+ NULL, 0);
+ return ELDLM_OK;
}
EXPORT_SYMBOL(ldlm_namespace_cleanup);
cfs_hash_for_each_nolock(ns->ns_rs_hash,
ldlm_res_hash_dump,
- (void *)(unsigned long)level);
+ (void *)(unsigned long)level, 0);
spin_lock(&ns->ns_lock);
ns->ns_next_dump = cfs_time_shift(10);
spin_unlock(&ns->ns_lock);
if (IS_ERR(req))
RETURN(PTR_ERR(req));
- if (req != NULL && it && it->it_op & IT_CREAT)
- /* ask ptlrpc not to resend on EINPROGRESS since we have our own
- * retry logic */
- req->rq_no_retry_einprogress = 1;
-
if (resends) {
req->rq_generation_set = 1;
req->rq_import_generation = generation;
lockrep->lock_policy_res2 =
ptlrpc_status_ntoh(lockrep->lock_policy_res2);
- /* Retry the create infinitely when we get -EINPROGRESS from
- * server. This is required by the new quota design. */
- if (it && it->it_op & IT_CREAT &&
- (int)lockrep->lock_policy_res2 == -EINPROGRESS) {
+ /* Retry infinitely when the server returns -EINPROGRESS for the
+ * intent operation, when server returns -EINPROGRESS for acquiring
+ * intent lock, we'll retry in after_reply(). */
+ if (it && (int)lockrep->lock_policy_res2 == -EINPROGRESS) {
mdc_clear_replay_flag(req, rc);
ptlrpc_req_finished(req);
resends++;
LDLM_LOCK_PUT(lock);
RETURN(lock_count);
}
+ lock->l_last_used = cfs_time_current();
LDLM_LOCK_PUT(lock);
}
}
ldlm_objs += $(LDLM)ldlm_request.o $(LDLM)ldlm_lockd.o
ldlm_objs += $(LDLM)ldlm_flock.o $(LDLM)ldlm_inodebits.o
ldlm_objs += $(LDLM)ldlm_pool.o $(LDLM)interval_tree.o
+ldlm_objs += $(LDLM)ldlm_reclaim.o
target_objs := $(TARGET)tgt_main.o $(TARGET)tgt_lastrcvd.o
target_objs += $(TARGET)tgt_handler.o $(TARGET)out_handler.o
}
run_test 133g "Check for Oopses on bad io area writes/reads in /proc"
+test_134a() {
+ [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] &&
+ skip "Need MDS version at least 2.7.54" && return
+
+ mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
+ cancel_lru_locks mdc
+
+ local nsdir="ldlm.namespaces.*-MDT0000-mdc-*"
+ local unused=$($LCTL get_param -n $nsdir.lock_unused_count)
+ [ $unused -eq 0 ] || "$unused locks are not cleared"
+
+ local nr=1000
+ createmany -o $DIR/$tdir/f $nr ||
+ error "failed to create $nr files in $DIR/$tdir"
+ unused=$($LCTL get_param -n $nsdir.lock_unused_count)
+
+ #define OBD_FAIL_LDLM_WATERMARK_LOW 0x327
+ do_facet mds1 $LCTL set_param fail_loc=0x327
+ do_facet mds1 $LCTL set_param fail_val=500
+ touch $DIR/$tdir/m
+
+ echo "sleep 10 seconds ..."
+ sleep 10
+ local lck_cnt=$($LCTL get_param -n $nsdir.lock_unused_count)
+
+ do_facet mds1 $LCTL set_param fail_loc=0
+ do_facet mds1 $LCTL set_param fail_val=0
+ [ $lck_cnt -lt $unused ] ||
+ error "No locks reclaimed, before:$unused, after:$lck_cnt"
+
+ rm $DIR/$tdir/m
+ unlinkmany $DIR/$tdir/f $nr
+}
+run_test 134a "Server reclaims locks when reaching low watermark"
+
+test_134b() {
+ [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] &&
+ skip "Need MDS version at least 2.7.54" && return
+
+ mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
+ cancel_lru_locks mdc
+
+ local low_wm=$(do_facet mds1 $LCTL get_param -n ldlm.watermark_mb_low)
+ # disable reclaim temporarily
+ do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=0
+
+ #define OBD_FAIL_LDLM_WATERMARK_HIGH 0x328
+ do_facet mds1 $LCTL set_param fail_loc=0x328
+ do_facet mds1 $LCTL set_param fail_val=500
+
+ $LCTL set_param debug=+trace
+
+ local nr=600
+ createmany -o $DIR/$tdir/f $nr &
+ local create_pid=$!
+
+ echo "Sleep $TIMEOUT seconds ..."
+ sleep $TIMEOUT
+ if ! ps -p $create_pid > /dev/null 2>&1; then
+ do_facet mds1 $LCTL set_param fail_loc=0
+ do_facet mds1 $LCTL set_param fail_val=0
+ do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=$low_wm
+ error "createmany finished incorrectly!"
+ fi
+ do_facet mds1 $LCTL set_param fail_loc=0
+ do_facet mds1 $LCTL set_param fail_val=0
+ do_facet mds1 $LCTL set_param ldlm.watermark_mb_low=$low_wm
+ wait $create_pid || return 1
+
+ unlinkmany $DIR/$tdir/f $nr
+}
+run_test 134b "Server rejects lock request when reaching high watermark"
+
test_140() { #bug-17379
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
test_mkdir -p $DIR/$tdir || error "Creating dir $DIR/$tdir"