Whamcloud - gitweb
LU-14161 obdclass: fix some problems with obd_nid_hash 46/40846/5
authorMr NeilBrown <neilb@suse.de>
Thu, 3 Dec 2020 00:48:41 +0000 (11:48 +1100)
committerOleg Drokin <green@whamcloud.com>
Thu, 17 Dec 2020 17:00:53 +0000 (17:00 +0000)
There are a few of problems with the handling of the obd_nid_hash
rhl-table.

- obd_export_evict_by_nid() drops out of rcu_readlock() while holding
  a reference into the table.  This is theoretically unsafe as changed
  to the table might cause entries to be missed.
- nid_keycmp() ignores entries with exp_failed set.  This is a problem
  because rhltable_lookup() only compares the key for the *first*
  object in a list of objects with the same key - on a match it
  returns the whole list.
  So if exp_failed was set on the first in the list, all would become
  invisible.

These can be fixed by moving the test on ->exp_failed to after the
call to rhltabke_lookup(), and by using obd_nid_export_for_each() in
obd_export_evict_by_nid(), and repeatedly dealing with the first exp
returned.

Fixes: 580ef453d1d6 ("LU-8130 obd: convert obd_nid_hash to rhashtable")
Signed-off-by: Mr NeilBrown <neilb@suse.de>
Change-Id: I65861c179e93c26cf39c5db8a58e3e4c9b962fe3
Reviewed-on: https://review.whamcloud.com/40846
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/obdclass/genops.c
lustre/obdclass/obd_config.c

index 5771e95..f85dfd3 100644 (file)
@@ -1622,11 +1622,28 @@ void class_fail_export(struct obd_export *exp)
 EXPORT_SYMBOL(class_fail_export);
 
 #ifdef HAVE_SERVER_SUPPORT
+
+static int take_first(struct obd_export *exp, void *data)
+{
+       struct obd_export **expp = data;
+
+       if (*expp)
+               /* already have one */
+               return 0;
+       if (exp->exp_failed)
+               /* Don't want this one */
+               return 0;
+       if (!refcount_inc_not_zero(&exp->exp_handle.h_ref))
+               /* Cannot get a ref on this one */
+               return 0;
+       *expp = exp;
+       return 1;
+}
+
 int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
 {
        lnet_nid_t nid_key = libcfs_str2nid((char *)nid);
        struct obd_export *doomed_exp;
-       struct rhashtable_iter iter;
        int exports_evicted = 0;
 
        spin_lock(&obd->obd_dev_lock);
@@ -1638,20 +1655,9 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
        }
        spin_unlock(&obd->obd_dev_lock);
 
-       rhltable_walk_enter(&obd->obd_nid_hash, &iter);
-       rhashtable_walk_start(&iter);
-       while ((doomed_exp = rhashtable_walk_next(&iter)) != NULL) {
-               if (IS_ERR(doomed_exp))
-                       continue;
-
-               if (!doomed_exp->exp_connection ||
-                   doomed_exp->exp_connection->c_peer.nid != nid_key)
-                       continue;
-
-               if (!refcount_inc_not_zero(&doomed_exp->exp_handle.h_ref))
-                       continue;
-
-               rhashtable_walk_stop(&iter);
+       doomed_exp = NULL;
+       while (obd_nid_export_for_each(obd, nid_key,
+                                      take_first, &doomed_exp) > 0) {
 
                LASSERTF(doomed_exp != obd->obd_self_export,
                         "self-export is hashed by NID?\n");
@@ -1664,16 +1670,14 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
                class_fail_export(doomed_exp);
                class_export_put(doomed_exp);
                exports_evicted++;
-
-               rhashtable_walk_start(&iter);
+               doomed_exp = NULL;
        }
-       rhashtable_walk_stop(&iter);
-       rhashtable_walk_exit(&iter);
 
-        if (!exports_evicted)
-                CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n",
-                       obd->obd_name, nid);
-        return exports_evicted;
+       if (!exports_evicted)
+               CDEBUG(D_HA,
+                      "%s: can't disconnect NID '%s': no exports found\n",
+                      obd->obd_name, nid);
+       return exports_evicted;
 }
 EXPORT_SYMBOL(obd_export_evict_by_nid);
 
index 10000ce..bf86016 100644 (file)
@@ -173,7 +173,7 @@ nid_keycmp(struct rhashtable_compare_arg *arg, const void *obj)
        const lnet_nid_t *nid = arg->key;
        const struct obd_export *exp = obj;
 
-       if (exp->exp_connection->c_peer.nid == *nid && !exp->exp_failed)
+       if (exp->exp_connection->c_peer.nid == *nid)
                return 0;
 
        return -ESRCH;
@@ -250,7 +250,7 @@ int obd_nid_export_for_each(struct obd_device *obd, lnet_nid_t nid,
        }
 
        rhl_for_each_entry_rcu(exp, tmp, exports, exp_nid_hash) {
-               if (cb(exp, data))
+               if (!exp->exp_failed && cb(exp, data))
                        ret++;
        }