From 3c69d46e1766480c0ffd1bef840b4e167b4cf88e Mon Sep 17 00:00:00 2001 From: Aurelien Degremont Date: Tue, 13 Oct 2020 14:12:23 +0000 Subject: [PATCH] LU-14111 obdclass: count eviction per obd_device Add a new 'obd_eviction_count' counter to obd_device which is increased every time a client is evicted, which means every time we call `class_fail_export()`. Expose this counter through `lctl get_param *.*.eviction_count` for every target. Signed-off-by: Aurelien Degremont Change-Id: I83b691662285cf2cd937187bffa54de6bd1f694c Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/40528 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin Reviewed-by: Shaun Tancheff --- lustre/include/lprocfs_status.h | 2 ++ lustre/include/obd.h | 1 + lustre/mdt/mdt_lproc.c | 2 ++ lustre/mgs/lproc_mgs.c | 2 ++ lustre/obdclass/genops.c | 3 +++ lustre/obdclass/lprocfs_status_server.c | 11 +++++++++++ lustre/ofd/lproc_ofd.c | 2 ++ lustre/tests/recovery-small.sh | 16 ++++++++++++++++ 8 files changed, 39 insertions(+) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 9d2d092..6ea80ee 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -616,6 +616,8 @@ ssize_t grant_check_threshold_show(struct kobject *kobj, ssize_t grant_check_threshold_store(struct kobject *kobj, struct attribute *attr, const char *buffer, size_t count); +ssize_t eviction_count_show(struct kobject *kobj, struct attribute *attr, + char *buf); #endif struct adaptive_timeout; extern int lprocfs_at_hist_helper(struct seq_file *m, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 5c899dd..d224762 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -732,6 +732,7 @@ struct obd_device { atomic_t obd_evict_inprogress; wait_queue_head_t obd_evict_inprogress_waitq; struct list_head obd_evict_list; /* protected with pet_lock */ + atomic_t obd_eviction_count; /** * LDLM pool part. Save last calculated SLV and Limit. diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 5e71039..ca88be4 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -1532,6 +1532,7 @@ LUSTRE_RO_ATTR(instance); LUSTRE_RO_ATTR(num_exports); LUSTRE_RW_ATTR(grant_check_threshold); +LUSTRE_RO_ATTR(eviction_count); static struct attribute *mdt_attrs[] = { &lustre_attr_tot_dirty.attr, @@ -1544,6 +1545,7 @@ static struct attribute *mdt_attrs[] = { &lustre_attr_ir_factor.attr, &lustre_attr_num_exports.attr, &lustre_attr_grant_check_threshold.attr, + &lustre_attr_eviction_count.attr, &lustre_attr_identity_expire.attr, &lustre_attr_identity_acquire_expire.attr, &lustre_attr_identity_upcall.attr, diff --git a/lustre/mgs/lproc_mgs.c b/lustre/mgs/lproc_mgs.c index 41cf93d..b36cef0 100644 --- a/lustre/mgs/lproc_mgs.c +++ b/lustre/mgs/lproc_mgs.c @@ -216,6 +216,7 @@ static struct lprocfs_vars lprocfs_mgs_obd_vars[] = { }; LUSTRE_RO_ATTR(num_exports); +LUSTRE_RO_ATTR(eviction_count); static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -252,6 +253,7 @@ LUSTRE_RO_ATTR(mntdev); static struct attribute *mgs_attrs[] = { &lustre_attr_fstype.attr, &lustre_attr_mntdev.attr, + &lustre_attr_eviction_count.attr, &lustre_attr_num_exports.attr, NULL, }; diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 296bc28..abb6d3d 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1564,6 +1564,7 @@ void class_disconnect_stale_exports(struct obd_device *obd, continue; } exp->exp_failed = 1; + atomic_inc(&exp->exp_obd->obd_eviction_count); spin_unlock(&exp->exp_lock); list_move(&exp->exp_obd_chain, &work_list); @@ -1600,6 +1601,8 @@ void class_fail_export(struct obd_export *exp) return; } + atomic_inc(&exp->exp_obd->obd_eviction_count); + CDEBUG(D_HA, "disconnecting export %p/%s\n", exp, exp->exp_client_uuid.uuid); diff --git a/lustre/obdclass/lprocfs_status_server.c b/lustre/obdclass/lprocfs_status_server.c index 423971e..02c3b85 100644 --- a/lustre/obdclass/lprocfs_status_server.c +++ b/lustre/obdclass/lprocfs_status_server.c @@ -144,6 +144,17 @@ EXPORT_SYMBOL(lprocfs_evict_client_seq_write); #undef BUFLEN +ssize_t eviction_count_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + atomic_read(&obd->obd_eviction_count)); +} +EXPORT_SYMBOL(eviction_count_show); + ssize_t num_exports_show(struct kobject *kobj, struct attribute *attr, char *buf) { diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index 0098d1a..ae1ca11 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -910,6 +910,7 @@ LUSTRE_RO_ATTR(instance); LUSTRE_RO_ATTR(num_exports); LUSTRE_RW_ATTR(grant_check_threshold); +LUSTRE_RO_ATTR(eviction_count); struct lprocfs_vars lprocfs_ofd_obd_vars[] = { { .name = "last_id", @@ -990,6 +991,7 @@ static struct attribute *ofd_attrs[] = { &lustre_attr_ir_factor.attr, &lustre_attr_num_exports.attr, &lustre_attr_grant_check_threshold.attr, + &lustre_attr_eviction_count.attr, &lustre_attr_seqs_allocated.attr, &lustre_attr_grant_precreate.attr, &lustre_attr_precreate_batch.attr, diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 6d405db..3356fa8 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -3212,6 +3212,22 @@ $(do_facet mds1 $LCTL get_param -n mdt.$FSNAME-MDT0000.recovery_time_hard) } run_test 145 "connect mdtlovs and process update logs after recovery expire" +test_146() { + local prev_count=$(do_facet $SINGLEMDS \ + $LCTL get_param -n "mdt.${mds1_svc}.eviction_count") + + mds_evict_client + + client_reconnect + + local next_count=$(do_facet $SINGLEMDS \ + $LCTL get_param -n "mdt.${mds1_svc}.eviction_count") + + [ "$prev_count" -lt "$next_count" ] || + error "wrong eviction count ($prev_count >= $next_count)" +} +run_test 146 "test eviction is counted properly" + test_147() { local obd_timeout=200 local old=$($LCTL get_param -n timeout) -- 1.8.3.1