Whamcloud - gitweb
LU-14111 obdclass: count eviction per obd_device 28/40528/15
authorAurelien Degremont <degremoa@amazon.com>
Tue, 13 Oct 2020 14:12:23 +0000 (14:12 +0000)
committerOleg Drokin <green@whamcloud.com>
Tue, 14 Feb 2023 06:03:39 +0000 (06:03 +0000)
Add a new 'obd_eviction_count' counter to obd_device which
is increased every time a client is evicted, which means
every time we call `class_fail_export()`.

Expose this counter through `lctl get_param *.*.eviction_count`
for every target.

Signed-off-by: Aurelien Degremont <degremoa@amazon.com>
Change-Id: I83b691662285cf2cd937187bffa54de6bd1f694c
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/40528
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
lustre/include/lprocfs_status.h
lustre/include/obd.h
lustre/mdt/mdt_lproc.c
lustre/mgs/lproc_mgs.c
lustre/obdclass/genops.c
lustre/obdclass/lprocfs_status_server.c
lustre/ofd/lproc_ofd.c
lustre/tests/recovery-small.sh

index 9d2d092..6ea80ee 100644 (file)
@@ -616,6 +616,8 @@ ssize_t grant_check_threshold_show(struct kobject *kobj,
 ssize_t grant_check_threshold_store(struct kobject *kobj,
                                    struct attribute *attr,
                                    const char *buffer, size_t count);
+ssize_t eviction_count_show(struct kobject *kobj, struct attribute *attr,
+                           char *buf);
 #endif
 struct adaptive_timeout;
 extern int lprocfs_at_hist_helper(struct seq_file *m,
index 5c899dd..d224762 100644 (file)
@@ -732,6 +732,7 @@ struct obd_device {
        atomic_t                obd_evict_inprogress;
        wait_queue_head_t       obd_evict_inprogress_waitq;
        struct list_head        obd_evict_list; /* protected with pet_lock */
+       atomic_t                obd_eviction_count;
 
        /**
         * LDLM pool part. Save last calculated SLV and Limit.
index 5e71039..ca88be4 100644 (file)
@@ -1532,6 +1532,7 @@ LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
 LUSTRE_RW_ATTR(grant_check_threshold);
+LUSTRE_RO_ATTR(eviction_count);
 
 static struct attribute *mdt_attrs[] = {
        &lustre_attr_tot_dirty.attr,
@@ -1544,6 +1545,7 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_ir_factor.attr,
        &lustre_attr_num_exports.attr,
        &lustre_attr_grant_check_threshold.attr,
+       &lustre_attr_eviction_count.attr,
        &lustre_attr_identity_expire.attr,
        &lustre_attr_identity_acquire_expire.attr,
        &lustre_attr_identity_upcall.attr,
index 41cf93d..b36cef0 100644 (file)
@@ -216,6 +216,7 @@ static struct lprocfs_vars lprocfs_mgs_obd_vars[] = {
 };
 
 LUSTRE_RO_ATTR(num_exports);
+LUSTRE_RO_ATTR(eviction_count);
 
 static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
                           char *buf)
@@ -252,6 +253,7 @@ LUSTRE_RO_ATTR(mntdev);
 static struct attribute *mgs_attrs[] = {
        &lustre_attr_fstype.attr,
        &lustre_attr_mntdev.attr,
+       &lustre_attr_eviction_count.attr,
        &lustre_attr_num_exports.attr,
        NULL,
 };
index 296bc28..abb6d3d 100644 (file)
@@ -1564,6 +1564,7 @@ void class_disconnect_stale_exports(struct obd_device *obd,
                        continue;
                }
                exp->exp_failed = 1;
+               atomic_inc(&exp->exp_obd->obd_eviction_count);
                spin_unlock(&exp->exp_lock);
 
                list_move(&exp->exp_obd_chain, &work_list);
@@ -1600,6 +1601,8 @@ void class_fail_export(struct obd_export *exp)
                 return;
         }
 
+       atomic_inc(&exp->exp_obd->obd_eviction_count);
+
         CDEBUG(D_HA, "disconnecting export %p/%s\n",
                exp, exp->exp_client_uuid.uuid);
 
index 423971e..02c3b85 100644 (file)
@@ -144,6 +144,17 @@ EXPORT_SYMBOL(lprocfs_evict_client_seq_write);
 
 #undef BUFLEN
 
+ssize_t eviction_count_show(struct kobject *kobj, struct attribute *attr,
+                        char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        atomic_read(&obd->obd_eviction_count));
+}
+EXPORT_SYMBOL(eviction_count_show);
+
 ssize_t num_exports_show(struct kobject *kobj, struct attribute *attr,
                         char *buf)
 {
index 0098d1a..ae1ca11 100644 (file)
@@ -910,6 +910,7 @@ LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
 LUSTRE_RW_ATTR(grant_check_threshold);
+LUSTRE_RO_ATTR(eviction_count);
 
 struct lprocfs_vars lprocfs_ofd_obd_vars[] = {
        { .name =       "last_id",
@@ -990,6 +991,7 @@ static struct attribute *ofd_attrs[] = {
        &lustre_attr_ir_factor.attr,
        &lustre_attr_num_exports.attr,
        &lustre_attr_grant_check_threshold.attr,
+       &lustre_attr_eviction_count.attr,
        &lustre_attr_seqs_allocated.attr,
        &lustre_attr_grant_precreate.attr,
        &lustre_attr_precreate_batch.attr,
index 6d405db..3356fa8 100755 (executable)
@@ -3212,6 +3212,22 @@ $(do_facet mds1 $LCTL get_param -n mdt.$FSNAME-MDT0000.recovery_time_hard)
 }
 run_test 145 "connect mdtlovs and process update logs after recovery expire"
 
+test_146() {
+       local prev_count=$(do_facet $SINGLEMDS \
+               $LCTL get_param -n "mdt.${mds1_svc}.eviction_count")
+
+       mds_evict_client
+
+       client_reconnect
+
+       local next_count=$(do_facet $SINGLEMDS \
+               $LCTL get_param -n "mdt.${mds1_svc}.eviction_count")
+
+       [ "$prev_count" -lt "$next_count" ] ||
+               error "wrong eviction count ($prev_count >= $next_count)"
+}
+run_test 146 "test eviction is counted properly"
+
 test_147() {
        local obd_timeout=200
        local old=$($LCTL get_param -n timeout)