X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fptlrpc%2Fnodemap_member.c;h=ce97c47ee95cdb70585118b543f25c012934c6bd;hp=81a09802ce900fe58ae3f24526f3ad83bd0fa4e3;hb=743f742874bfdf845a2f64c3d65081ea23740138;hpb=1c7f9caa99df3082a3e506673721d359147843d3 diff --git a/lustre/ptlrpc/nodemap_member.c b/lustre/ptlrpc/nodemap_member.c index 81a0980..ce97c47 100644 --- a/lustre/ptlrpc/nodemap_member.c +++ b/lustre/ptlrpc/nodemap_member.c @@ -32,25 +32,44 @@ #define HASH_NODEMAP_MEMBER_CUR_BITS 3 #define HASH_NODEMAP_MEMBER_MAX_BITS 7 + /** - * Delete a member from a member list + * Delete an export from a nodemap's member list. Called after client + * disconnects, or during system shutdown. + * + * Requires active_config_lock and nodemap's nm_member_list_lock. * * \param nodemap nodemap containing list * \param exp export member to delete */ void nm_member_del(struct lu_nodemap *nodemap, struct obd_export *exp) { - mutex_lock(&nodemap->nm_member_list_lock); + ENTRY; + + /* because all changes to ted_nodemap are with active_config_lock */ + LASSERT(exp->exp_target_data.ted_nodemap == nodemap); + + /* protected by nm_member_list_lock */ list_del_init(&exp->exp_target_data.ted_nodemap_member); - mutex_unlock(&nodemap->nm_member_list_lock); + spin_lock(&exp->exp_target_data.ted_nodemap_lock); exp->exp_target_data.ted_nodemap = NULL; + spin_unlock(&exp->exp_target_data.ted_nodemap_lock); + + /* ref formerly held by ted_nodemap */ + nodemap_putref(nodemap); + + /* ref formerly held by ted_nodemap_member */ class_export_put(exp); + + EXIT; } /** * Delete a member list from a nodemap * + * Requires active config lock. + * * \param nodemap nodemap to remove the list from */ void nm_member_delete_list(struct lu_nodemap *nodemap) @@ -60,17 +79,16 @@ void nm_member_delete_list(struct lu_nodemap *nodemap) mutex_lock(&nodemap->nm_member_list_lock); list_for_each_entry_safe(exp, tmp, &nodemap->nm_member_list, - exp_target_data.ted_nodemap_member) { - exp->exp_target_data.ted_nodemap = NULL; - list_del_init(&exp->exp_target_data.ted_nodemap_member); - class_export_put(exp); - } + exp_target_data.ted_nodemap_member) + nm_member_del(nodemap, exp); mutex_unlock(&nodemap->nm_member_list_lock); } /** * Add a member export to a nodemap * + * Must be called under active_config_lock. + * * \param nodemap nodemap to add to * \param exp obd_export to add * \retval -EEXIST export is already part of a different nodemap @@ -78,17 +96,22 @@ void nm_member_delete_list(struct lu_nodemap *nodemap) */ int nm_member_add(struct lu_nodemap *nodemap, struct obd_export *exp) { + ENTRY; + if (exp == NULL) { CWARN("attempted to add null export to nodemap %s\n", nodemap->nm_name); - return -EINVAL; + RETURN(-EINVAL); } + mutex_lock(&nodemap->nm_member_list_lock); if (exp->exp_target_data.ted_nodemap != NULL && !list_empty(&exp->exp_target_data.ted_nodemap_member)) { + mutex_unlock(&nodemap->nm_member_list_lock); + /* export is already member of nodemap */ if (exp->exp_target_data.ted_nodemap == nodemap) - return 0; + RETURN(0); /* possibly reconnecting while about to be reclassified */ CWARN("export %p %s already hashed, failed to add to " @@ -97,17 +120,20 @@ int nm_member_add(struct lu_nodemap *nodemap, struct obd_export *exp) nodemap->nm_name, (exp->exp_target_data.ted_nodemap == NULL) ? "unknown" : exp->exp_target_data.ted_nodemap->nm_name); - return -EEXIST; + RETURN(-EEXIST); } class_export_get(exp); + nodemap_getref(nodemap); + /* ted_nodemap changes also require ac lock, member_list_lock */ + spin_lock(&exp->exp_target_data.ted_nodemap_lock); exp->exp_target_data.ted_nodemap = nodemap; - mutex_lock(&nodemap->nm_member_list_lock); + spin_unlock(&exp->exp_target_data.ted_nodemap_lock); list_add(&exp->exp_target_data.ted_nodemap_member, &nodemap->nm_member_list); mutex_unlock(&nodemap->nm_member_list_lock); - return 0; + RETURN(0); } /** @@ -126,9 +152,6 @@ static void nm_member_exp_revoke(struct obd_export *exp) ldlm_revoke_export_locks(exp); } -/* Mutex used to serialize calls to reclassify_nodemap_lock */ -DEFINE_MUTEX(reclassify_nodemap_lock); - /** * Reclassify the members of a nodemap after range changes or activation. * This function reclassifies the members of a nodemap based on the member @@ -136,15 +159,8 @@ DEFINE_MUTEX(reclassify_nodemap_lock); * classified as being part of this nodemap are moved to the nodemap whose * NID ranges contain the export's NID, and their locks are revoked. * - * Calls to this function are serialized due to a potential deadlock: Say there - * is a nodemap A and a nodemap B that both need to reclassify their members. - * If there is a member in nodemap A that should be in nodemap B, reclassify - * will attempt to add the member to nodemap B. If nodemap B is also - * reclassifying its members, then its hash is locked and nodemap A's attempt - * to add will block and wait for nodemap B's reclassify to finish. If - * nodemap B's reclassify then attempts to reclassify a member that should be - * in nodemap A, it will also try add the member to nodemap A's locked hash, - * causing a deadlock. + * Callers should hold the active_config_lock and active_config + * nmc_range_tree_lock. * * \param nodemap nodemap with members to reclassify */ @@ -154,35 +170,55 @@ void nm_member_reclassify_nodemap(struct lu_nodemap *nodemap) struct obd_export *tmp; struct lu_nodemap *new_nodemap; - /* reclassify only one nodemap at a time to avoid deadlock */ - mutex_lock(&reclassify_nodemap_lock); + ENTRY; + mutex_lock(&nodemap->nm_member_list_lock); + list_for_each_entry_safe(exp, tmp, &nodemap->nm_member_list, exp_target_data.ted_nodemap_member) { - lnet_nid_t nid = exp->exp_connection->c_peer.nid; + lnet_nid_t nid; + + /* if no conn assigned to this exp, reconnect will reclassify */ + spin_lock(&exp->exp_lock); + if (exp->exp_connection) { + nid = exp->exp_connection->c_peer.nid; + } else { + spin_unlock(&exp->exp_lock); + continue; + } + spin_unlock(&exp->exp_lock); - /* nodemap_classify_nid requires range tree lock */ - read_lock(&nm_range_tree_lock); + /* nodemap_classify_nid requires nmc_range_tree_lock */ new_nodemap = nodemap_classify_nid(nid); - read_unlock(&nm_range_tree_lock); + if (new_nodemap != nodemap) { + /* could deadlock if new_nodemap also reclassifying, + * active_config_lock serializes reclassifies + */ + mutex_lock(&new_nodemap->nm_member_list_lock); + /* don't use member_del because ted_nodemap - * should never be null + * should never be NULL with a live export */ list_del_init(&exp->exp_target_data.ted_nodemap_member); + + /* keep the new_nodemap ref from classify */ + spin_lock(&exp->exp_target_data.ted_nodemap_lock); exp->exp_target_data.ted_nodemap = new_nodemap; + spin_unlock(&exp->exp_target_data.ted_nodemap_lock); + nodemap_putref(nodemap); - /* could deadlock if new_nodemap also reclassifying */ - mutex_lock(&new_nodemap->nm_member_list_lock); list_add(&exp->exp_target_data.ted_nodemap_member, &new_nodemap->nm_member_list); mutex_unlock(&new_nodemap->nm_member_list_lock); nm_member_exp_revoke(exp); + } else { + nodemap_putref(new_nodemap); } - nodemap_putref(new_nodemap); } mutex_unlock(&nodemap->nm_member_list_lock); - mutex_unlock(&reclassify_nodemap_lock); + + EXIT; } /**