From f570595e05a65045e6a2786861f7c9d013854112 Mon Sep 17 00:00:00 2001 From: Aurelien Degremont Date: Tue, 13 Oct 2020 14:12:23 +0000 Subject: [PATCH] LU-14111 obdclass: count eviction per obd_device Add a new 'obd_eviction_count' counter to obd_device which is increased every time a client is evicted, which means every time we call `class_fail_export()`. Expose this counter through `lctl get_param *.*.eviction_count` for every target. Only support recovery-small test 146 for 2.14.0.133+. Lustre-change: https://review.whamcloud.com/40528 Lustre-commit: 3c69d46e1766480c0ffd1bef840b4e167b4cf88e Lustre-change: https://review.whamcloud.com/52098 Lustre-commit: b034dd27dd39483e40f91ea82d3f5c62b514ec54 Signed-off-by: Aurelien Degremont Change-Id: I83b691662285cf2cd937187bffa54de6bd1f694c Reviewed-by: Andreas Dilger Reviewed-by: Shaun Tancheff Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53897 Tested-by: jenkins Tested-by: Andreas Dilger --- lustre/include/lprocfs_status.h | 2 ++ lustre/include/obd.h | 1 + lustre/mdt/mdt_lproc.c | 2 ++ lustre/mgs/lproc_mgs.c | 2 ++ lustre/obdclass/genops.c | 3 +++ lustre/obdclass/lprocfs_status_server.c | 11 +++++++++++ lustre/ofd/lproc_ofd.c | 2 ++ lustre/tests/recovery-small.sh | 19 +++++++++++++++++++ 8 files changed, 42 insertions(+) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 4d7397f..c587b05 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -599,6 +599,8 @@ extern int lprocfs_connect_flags_seq_show(struct seq_file *m, void *data); #ifdef HAVE_SERVER_SUPPORT ssize_t num_exports_show(struct kobject *kobj, struct attribute *attr, char *buf); +ssize_t eviction_count_show(struct kobject *kobj, struct attribute *attr, + char *buf); #endif struct adaptive_timeout; extern int lprocfs_at_hist_helper(struct seq_file *m, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index d0b4a27..8031bf8 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -769,6 +769,7 @@ struct obd_device { atomic_t obd_evict_inprogress; wait_queue_head_t obd_evict_inprogress_waitq; struct list_head obd_evict_list; /* protected with pet_lock */ + atomic_t obd_eviction_count; /** * LDLM pool part. Save last calculated SLV and Limit. diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 675abd7..12f9b18 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -1379,6 +1379,7 @@ LUSTRE_RW_ATTR(grant_compat_disable); LUSTRE_RO_ATTR(instance); LUSTRE_RO_ATTR(num_exports); +LUSTRE_RO_ATTR(eviction_count); static struct attribute *mdt_attrs[] = { &lustre_attr_tot_dirty.attr, @@ -1390,6 +1391,7 @@ static struct attribute *mdt_attrs[] = { &lustre_attr_recovery_time_soft.attr, &lustre_attr_ir_factor.attr, &lustre_attr_num_exports.attr, + &lustre_attr_eviction_count.attr, &lustre_attr_identity_expire.attr, &lustre_attr_identity_acquire_expire.attr, &lustre_attr_identity_upcall.attr, diff --git a/lustre/mgs/lproc_mgs.c b/lustre/mgs/lproc_mgs.c index 4ea2973..d645ac4 100644 --- a/lustre/mgs/lproc_mgs.c +++ b/lustre/mgs/lproc_mgs.c @@ -217,6 +217,7 @@ static struct lprocfs_vars lprocfs_mgs_obd_vars[] = { }; LUSTRE_RO_ATTR(num_exports); +LUSTRE_RO_ATTR(eviction_count); static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -253,6 +254,7 @@ LUSTRE_RO_ATTR(mntdev); static struct attribute *mgs_attrs[] = { &lustre_attr_fstype.attr, &lustre_attr_mntdev.attr, + &lustre_attr_eviction_count.attr, &lustre_attr_num_exports.attr, NULL, }; diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index b6ebc82..245235b 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1566,6 +1566,7 @@ void class_disconnect_stale_exports(struct obd_device *obd, continue; } exp->exp_failed = 1; + atomic_inc(&exp->exp_obd->obd_eviction_count); spin_unlock(&exp->exp_lock); list_move(&exp->exp_obd_chain, &work_list); @@ -1602,6 +1603,8 @@ void class_fail_export(struct obd_export *exp) return; } + atomic_inc(&exp->exp_obd->obd_eviction_count); + CDEBUG(D_HA, "disconnecting export %p/%s\n", exp, exp->exp_client_uuid.uuid); diff --git a/lustre/obdclass/lprocfs_status_server.c b/lustre/obdclass/lprocfs_status_server.c index 186e359..5cef66d 100644 --- a/lustre/obdclass/lprocfs_status_server.c +++ b/lustre/obdclass/lprocfs_status_server.c @@ -144,6 +144,17 @@ EXPORT_SYMBOL(lprocfs_evict_client_seq_write); #undef BUFLEN +ssize_t eviction_count_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + atomic_read(&obd->obd_eviction_count)); +} +EXPORT_SYMBOL(eviction_count_show); + ssize_t num_exports_show(struct kobject *kobj, struct attribute *attr, char *buf) { diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index f9fafc0..7dd9eb7 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -1087,6 +1087,7 @@ LUSTRE_RW_ATTR(grant_compat_disable); LUSTRE_RO_ATTR(instance); LUSTRE_RO_ATTR(num_exports); +LUSTRE_RO_ATTR(eviction_count); struct lprocfs_vars lprocfs_ofd_obd_vars[] = { { .name = "last_id", @@ -1176,6 +1177,7 @@ static struct attribute *ofd_attrs[] = { &lustre_attr_no_precreate.attr, #endif &lustre_attr_num_exports.attr, + &lustre_attr_eviction_count.attr, &lustre_attr_precreate_batch.attr, &lustre_attr_recovery_time_hard.attr, &lustre_attr_recovery_time_soft.attr, diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index a858dd9..5ab0e26 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -3204,6 +3204,25 @@ $(do_facet mds1 $LCTL get_param -n mdt.$FSNAME-MDT0000.recovery_time_hard) } run_test 145 "connect mdtlovs and process update logs after recovery expire" +test_146() { + (( $MDS1_VERSION >= $(version_code 2.14.0.133) )) || + skip "Need MDS >= v2.14.0.133 for eviction_count" + + local prev_count=$(do_facet $SINGLEMDS \ + $LCTL get_param -n "mdt.${mds1_svc}.eviction_count") + + mds_evict_client + + client_reconnect + + local next_count=$(do_facet $SINGLEMDS \ + $LCTL get_param -n "mdt.${mds1_svc}.eviction_count") + + [ "$prev_count" -lt "$next_count" ] || + error "wrong eviction count ($prev_count >= $next_count)" +} +run_test 146 "test eviction is counted properly" + test_147() { local obd_timeout=200 local old=$($LCTL get_param -n timeout) -- 1.8.3.1