From: Mr NeilBrown Date: Thu, 3 Dec 2020 00:48:41 +0000 (+1100) Subject: LU-14161 obdclass: fix some problems with obd_nid_hash X-Git-Tag: 2.14.0-RC1~59 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=160ff3c144a37c7379613190ec752da3bd176fa3 LU-14161 obdclass: fix some problems with obd_nid_hash There are a few of problems with the handling of the obd_nid_hash rhl-table. - obd_export_evict_by_nid() drops out of rcu_readlock() while holding a reference into the table. This is theoretically unsafe as changed to the table might cause entries to be missed. - nid_keycmp() ignores entries with exp_failed set. This is a problem because rhltable_lookup() only compares the key for the *first* object in a list of objects with the same key - on a match it returns the whole list. So if exp_failed was set on the first in the list, all would become invisible. These can be fixed by moving the test on ->exp_failed to after the call to rhltabke_lookup(), and by using obd_nid_export_for_each() in obd_export_evict_by_nid(), and repeatedly dealing with the first exp returned. Fixes: 580ef453d1d6 ("LU-8130 obd: convert obd_nid_hash to rhashtable") Signed-off-by: Mr NeilBrown Change-Id: I65861c179e93c26cf39c5db8a58e3e4c9b962fe3 Reviewed-on: https://review.whamcloud.com/40846 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 5771e95..f85dfd3 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1622,11 +1622,28 @@ void class_fail_export(struct obd_export *exp) EXPORT_SYMBOL(class_fail_export); #ifdef HAVE_SERVER_SUPPORT + +static int take_first(struct obd_export *exp, void *data) +{ + struct obd_export **expp = data; + + if (*expp) + /* already have one */ + return 0; + if (exp->exp_failed) + /* Don't want this one */ + return 0; + if (!refcount_inc_not_zero(&exp->exp_handle.h_ref)) + /* Cannot get a ref on this one */ + return 0; + *expp = exp; + return 1; +} + int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) { lnet_nid_t nid_key = libcfs_str2nid((char *)nid); struct obd_export *doomed_exp; - struct rhashtable_iter iter; int exports_evicted = 0; spin_lock(&obd->obd_dev_lock); @@ -1638,20 +1655,9 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) } spin_unlock(&obd->obd_dev_lock); - rhltable_walk_enter(&obd->obd_nid_hash, &iter); - rhashtable_walk_start(&iter); - while ((doomed_exp = rhashtable_walk_next(&iter)) != NULL) { - if (IS_ERR(doomed_exp)) - continue; - - if (!doomed_exp->exp_connection || - doomed_exp->exp_connection->c_peer.nid != nid_key) - continue; - - if (!refcount_inc_not_zero(&doomed_exp->exp_handle.h_ref)) - continue; - - rhashtable_walk_stop(&iter); + doomed_exp = NULL; + while (obd_nid_export_for_each(obd, nid_key, + take_first, &doomed_exp) > 0) { LASSERTF(doomed_exp != obd->obd_self_export, "self-export is hashed by NID?\n"); @@ -1664,16 +1670,14 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) class_fail_export(doomed_exp); class_export_put(doomed_exp); exports_evicted++; - - rhashtable_walk_start(&iter); + doomed_exp = NULL; } - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); - if (!exports_evicted) - CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n", - obd->obd_name, nid); - return exports_evicted; + if (!exports_evicted) + CDEBUG(D_HA, + "%s: can't disconnect NID '%s': no exports found\n", + obd->obd_name, nid); + return exports_evicted; } EXPORT_SYMBOL(obd_export_evict_by_nid); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 10000ce..bf86016 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -173,7 +173,7 @@ nid_keycmp(struct rhashtable_compare_arg *arg, const void *obj) const lnet_nid_t *nid = arg->key; const struct obd_export *exp = obj; - if (exp->exp_connection->c_peer.nid == *nid && !exp->exp_failed) + if (exp->exp_connection->c_peer.nid == *nid) return 0; return -ESRCH; @@ -250,7 +250,7 @@ int obd_nid_export_for_each(struct obd_device *obd, lnet_nid_t nid, } rhl_for_each_entry_rcu(exp, tmp, exports, exp_nid_hash) { - if (cb(exp, data)) + if (!exp->exp_failed && cb(exp, data)) ret++; }