From a7688cfd0f8dc2071d6708dd9a3aef71ab7b0a03 Mon Sep 17 00:00:00 2001 From: Courrier Guillaume Date: Thu, 1 Jul 2021 11:35:19 +0200 Subject: [PATCH] LU-14834: list the UUIDs of stale clients during recovery Add a new entry in debugfs for MDT and OSD to display the uuid of clients yet to be reconnected during a recovery. For example: $ lctl get_param obdfilter.lustre-OST0000.recovery_stale_clients obdfilter.lustre-OST0000.stale_clients= 9a7ab21d-207c-4680-b9bf-b5873fd05540 This will display, during the recovery, the UUIDs of clients that are expected to be connected. Signed-off-by: Courrier Guillaume Change-Id: Ib8c0b500adc9098e3cfb1998df06757a7d31b7b9 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/44196 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Etienne AUJAMES Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lprocfs_status.h | 3 +++ lustre/mdt/mdt_lproc.c | 9 +++++++++ lustre/obdclass/lprocfs_status_server.c | 36 +++++++++++++++++++++++++++++++++ lustre/ofd/lproc_ofd.c | 9 +++++++++ 4 files changed, 57 insertions(+) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 6ea80ee..e62cfd1 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -678,6 +678,9 @@ void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, /* lprocfs_status.c: recovery status */ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data); +/* lprocfs: display the uuid of stale clients */ +int lprocfs_recovery_stale_clients_seq_show(struct seq_file *m, void *data); + /* lprocfs_status.c: hash statistics */ int lprocfs_hash_seq_show(struct seq_file *m, void *data); diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index ccc17a2..b5634ef 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -1605,6 +1605,14 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = { { NULL } }; +LDEBUGFS_SEQ_FOPS_RO_TYPE(mdt, recovery_stale_clients); + +static struct ldebugfs_vars ldebugfs_mdt_obd_vars[] = { + { .name = "recovery_stale_clients", + .fops = &mdt_recovery_stale_clients_fops }, + { NULL } +}; + static int lprocfs_mdt_print_open_files(struct obd_export *exp, void *v) { @@ -1734,6 +1742,7 @@ int mdt_tunables_init(struct mdt_device *mdt, const char *name) mdt_obd_name(mdt), rc); return rc; } + ldebugfs_add_vars(obd->obd_debugfs_entry, ldebugfs_mdt_obd_vars, obd); rc = tgt_tunables_init(&mdt->mdt_lut); if (rc) { diff --git a/lustre/obdclass/lprocfs_status_server.c b/lustre/obdclass/lprocfs_status_server.c index 02c3b85..13cd886 100644 --- a/lustre/obdclass/lprocfs_status_server.c +++ b/lustre/obdclass/lprocfs_status_server.c @@ -79,6 +79,42 @@ no_entry: } EXPORT_SYMBOL(ldebugfs_add_symlink); +int lprocfs_recovery_stale_clients_seq_show(struct seq_file *m, void *data) +{ + struct obd_device *obd = m->private; + struct obd_export *exp, *n; + int connected; + + if (!obd->obd_recovering || + atomic_read(&obd->obd_connected_clients) >= + atomic_read(&obd->obd_max_recoverable_clients)) + /* not in recovery */ + return 0; + + spin_lock(&obd->obd_dev_lock); + list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) { + /* don't count self-export as client */ + if (obd_uuid_equals(&exp->exp_client_uuid, + &exp->exp_obd->obd_uuid)) + continue; + + /* don't count clients which have no slot in last_rcvd + * (e.g. lightweight connection) + */ + if (exp->exp_target_data.ted_lr_idx == -1) + continue; + + connected = !exp->exp_failed && (exp->exp_conn_cnt > 0); + + if (!connected) + seq_printf(m, "%s\n", exp->exp_client_uuid.uuid); + } + spin_unlock(&obd->obd_dev_lock); + + return 0; +} +EXPORT_SYMBOL(lprocfs_recovery_stale_clients_seq_show); + #ifdef CONFIG_PROC_FS int lprocfs_evict_client_open(struct inode *inode, struct file *f) diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index 74020a6..8ddb93f 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -934,6 +934,14 @@ struct lprocfs_vars lprocfs_ofd_obd_vars[] = { { NULL } }; +LDEBUGFS_SEQ_FOPS_RO_TYPE(ofd, recovery_stale_clients); + +struct ldebugfs_vars ldebugfs_ofd_obd_vars[] = { + { .name = "recovery_stale_clients", + .fops = &ofd_recovery_stale_clients_fops}, + { NULL } +}; + /** * Initialize OFD statistics counters * @@ -1040,6 +1048,7 @@ int ofd_tunables_init(struct ofd_device *ofd) obd->obd_name, rc); RETURN(rc); } + ldebugfs_add_vars(obd->obd_debugfs_entry, ldebugfs_ofd_obd_vars, obd); rc = tgt_tunables_init(&ofd->ofd_lut); if (rc) { -- 1.8.3.1