From ec6aae2e3aa4174e3c8fdd8e75a9a4186e3abdc4 Mon Sep 17 00:00:00 2001 From: Kit Westneat Date: Wed, 5 Nov 2014 09:23:10 -0500 Subject: [PATCH] LU-4647 nodemap: fix problem with node reclassification nodemap_add_member can't be used to move an already hashed member to a new nodemap, so this patch copies the needed functionality to nm_member_reclassif_cb. This also adds a mutex lock for reclassifying so that there is only one nodemap reclassifying at a time. Reclassifying takes a lock on a nodemap's nm_member_hash, so a deadlock could arise if one nodemap is trying to add a member to another nodemap, and that second nodemap is also reclassifying and eventually tries to add a member to the first nodemap. Signed-off-by: Kit Westneat Change-Id: Icc93a8e6d8384afa90e45cc04f1422512974ce4a Reviewed-on: http://review.whamcloud.com/12575 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: John L. Hammond Reviewed-by: Nathaniel Clark Reviewed-by: Oleg Drokin --- lustre/ptlrpc/nodemap_member.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/lustre/ptlrpc/nodemap_member.c b/lustre/ptlrpc/nodemap_member.c index 00459f1..a4e5063 100644 --- a/lustre/ptlrpc/nodemap_member.c +++ b/lustre/ptlrpc/nodemap_member.c @@ -220,7 +220,7 @@ int nm_member_add(struct lu_nodemap *nodemap, struct obd_export *exp) rc = cfs_hash_add_unique(nodemap->nm_member_hash, exp, &exp->exp_target_data.ted_nodemap_member); - if (rc == 0 ) + if (rc == 0) class_export_get(exp); /* else -EALREADY - exp already in nodemap hash */ @@ -247,25 +247,33 @@ static int nm_member_reclassify_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd, struct hlist_node *hnode, void *data) { struct obd_export *exp; + struct lu_nodemap *nodemap; exp = hlist_entry(hnode, struct obd_export, exp_target_data.ted_nodemap_member); if (exp == NULL) goto out; - /* Call member add before del so exp->nodemap is never NULL. Must use - * bd_del_locked inside a cfs_hash callback. For those reasons, can't - * use member_del. + /* Must use bd_del_locked inside a cfs_hash callback, and exp->nodemap + * should never be NULL. For those reasons, can't use member_del. */ - nodemap_add_member(exp->exp_connection->c_peer.nid, exp); - cfs_hash_bd_del_locked(hs, bd, hnode); - class_export_put(exp); + read_lock(&nm_range_tree_lock); + nodemap = nodemap_classify_nid(exp->exp_connection->c_peer.nid); + if (exp->exp_target_data.ted_nodemap != nodemap) { + cfs_hash_bd_del_locked(hs, bd, hnode); + exp->exp_target_data.ted_nodemap = nodemap; + cfs_hash_add_unique(nodemap->nm_member_hash, exp, + &exp->exp_target_data.ted_nodemap_member); + } + read_unlock(&nm_range_tree_lock); nm_member_exp_revoke(exp); out: return 0; } +DEFINE_MUTEX(reclassify_nodemap_lock); + /** * Reclassify the members of a nodemap after range changes or activation, * based on the member export's NID and the nodemap's new NID ranges. Exports @@ -277,9 +285,12 @@ out: */ void nm_member_reclassify_nodemap(struct lu_nodemap *nodemap) { + /* reclassify only one nodemap at a time to avoid deadlock */ + mutex_lock(&reclassify_nodemap_lock); cfs_hash_for_each_safe(nodemap->nm_member_hash, nm_member_reclassify_cb, NULL); + mutex_unlock(&reclassify_nodemap_lock); } static int nm_member_revoke_locks_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd, -- 1.8.3.1