Add a new 'obd_eviction_count' counter to obd_device which
is increased every time a client is evicted, which means
every time we call `class_fail_export()`.
Expose this counter through `lctl get_param *.*.eviction_count`
for every target.
Only support recovery-small test 146 for 2.14.0.133+.
Lustre-change: https://review.whamcloud.com/40528
Lustre-commit:
3c69d46e1766480c0ffd1bef840b4e167b4cf88e
Lustre-change: https://review.whamcloud.com/52098
Lustre-commit:
b034dd27dd39483e40f91ea82d3f5c62b514ec54
Signed-off-by: Aurelien Degremont <degremoa@amazon.com>
Change-Id: I83b691662285cf2cd937187bffa54de6bd1f694c
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53897
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Andreas Dilger <adilger@whamcloud.com>
#ifdef HAVE_SERVER_SUPPORT
ssize_t num_exports_show(struct kobject *kobj, struct attribute *attr,
char *buf);
+ssize_t eviction_count_show(struct kobject *kobj, struct attribute *attr,
+ char *buf);
#endif
struct adaptive_timeout;
extern int lprocfs_at_hist_helper(struct seq_file *m,
atomic_t obd_evict_inprogress;
wait_queue_head_t obd_evict_inprogress_waitq;
struct list_head obd_evict_list; /* protected with pet_lock */
+ atomic_t obd_eviction_count;
/**
* LDLM pool part. Save last calculated SLV and Limit.
LUSTRE_RO_ATTR(instance);
LUSTRE_RO_ATTR(num_exports);
+LUSTRE_RO_ATTR(eviction_count);
static struct attribute *mdt_attrs[] = {
&lustre_attr_tot_dirty.attr,
&lustre_attr_recovery_time_soft.attr,
&lustre_attr_ir_factor.attr,
&lustre_attr_num_exports.attr,
+ &lustre_attr_eviction_count.attr,
&lustre_attr_identity_expire.attr,
&lustre_attr_identity_acquire_expire.attr,
&lustre_attr_identity_upcall.attr,
};
LUSTRE_RO_ATTR(num_exports);
+LUSTRE_RO_ATTR(eviction_count);
static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
char *buf)
static struct attribute *mgs_attrs[] = {
&lustre_attr_fstype.attr,
&lustre_attr_mntdev.attr,
+ &lustre_attr_eviction_count.attr,
&lustre_attr_num_exports.attr,
NULL,
};
continue;
}
exp->exp_failed = 1;
+ atomic_inc(&exp->exp_obd->obd_eviction_count);
spin_unlock(&exp->exp_lock);
list_move(&exp->exp_obd_chain, &work_list);
return;
}
+ atomic_inc(&exp->exp_obd->obd_eviction_count);
+
CDEBUG(D_HA, "disconnecting export %p/%s\n",
exp, exp->exp_client_uuid.uuid);
#undef BUFLEN
+ssize_t eviction_count_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ atomic_read(&obd->obd_eviction_count));
+}
+EXPORT_SYMBOL(eviction_count_show);
+
ssize_t num_exports_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
LUSTRE_RO_ATTR(instance);
LUSTRE_RO_ATTR(num_exports);
+LUSTRE_RO_ATTR(eviction_count);
struct lprocfs_vars lprocfs_ofd_obd_vars[] = {
{ .name = "last_id",
&lustre_attr_no_precreate.attr,
#endif
&lustre_attr_num_exports.attr,
+ &lustre_attr_eviction_count.attr,
&lustre_attr_precreate_batch.attr,
&lustre_attr_recovery_time_hard.attr,
&lustre_attr_recovery_time_soft.attr,
}
run_test 145 "connect mdtlovs and process update logs after recovery expire"
+test_146() {
+ (( $MDS1_VERSION >= $(version_code 2.14.0.133) )) ||
+ skip "Need MDS >= v2.14.0.133 for eviction_count"
+
+ local prev_count=$(do_facet $SINGLEMDS \
+ $LCTL get_param -n "mdt.${mds1_svc}.eviction_count")
+
+ mds_evict_client
+
+ client_reconnect
+
+ local next_count=$(do_facet $SINGLEMDS \
+ $LCTL get_param -n "mdt.${mds1_svc}.eviction_count")
+
+ [ "$prev_count" -lt "$next_count" ] ||
+ error "wrong eviction count ($prev_count >= $next_count)"
+}
+run_test 146 "test eviction is counted properly"
+
test_147() {
local obd_timeout=200
local old=$($LCTL get_param -n timeout)