Whamcloud - gitweb
LU-13004 ptlrpc: Allow BULK_BUF_KIOV to accept a kvec
[fs/lustre-release.git] / lustre / ptlrpc / nodemap_member.c
index a4e5063..275aaae 100644 (file)
 #define HASH_NODEMAP_MEMBER_CUR_BITS 3
 #define HASH_NODEMAP_MEMBER_MAX_BITS 7
 
-/**
- * member hash functions
- *
- * The purpose of this hash is to maintain the list of
- * exports that are connected and associated with a
- * particular nodemap
- */
-static void nm_member_getref(struct obd_export *exp)
-{
-}
-
-void nm_member_putref(struct obd_export *exp)
-{
-}
-
-static __u32 nm_member_hashfn(cfs_hash_t *hash_body,
-                          const void *key, unsigned mask)
-{
-       return hash_long((unsigned long)key, hash_body->hs_bkt_bits) & mask;
-}
-
-static void *nm_member_hs_key(struct hlist_node *hnode)
-{
-       struct obd_export       *exp;
-
-       exp = hlist_entry(hnode, struct obd_export,
-                         exp_target_data.ted_nodemap_member);
-
-       return exp;
-}
-
-static int nm_member_hs_keycmp(const void *key, struct hlist_node *hnode)
-{
-       struct obd_export       *exp;
-
-       exp = hlist_entry(hnode, struct obd_export,
-                         exp_target_data.ted_nodemap_member);
-
-       return key == exp;
-}
-
-static void *nm_member_hs_hashobject(struct hlist_node *hnode)
-{
-       return hlist_entry(hnode, struct obd_export,
-                          exp_target_data.ted_nodemap_member);
-}
-
-static void nm_member_hs_get(cfs_hash_t *hs, struct hlist_node *hnode)
-{
-       struct obd_export       *exp;
-
-       exp = hlist_entry(hnode, struct obd_export,
-                         exp_target_data.ted_nodemap_member);
-       nm_member_getref(exp);
-}
-
-static void nm_member_hs_put_locked(cfs_hash_t *hs,
-                                struct hlist_node *hnode)
-{
-       struct obd_export       *exp;
-
-       exp = hlist_entry(hnode, struct obd_export,
-                         exp_target_data.ted_nodemap_member);
-       nm_member_putref(exp);
-}
 
 /**
- * Delete a member from a member hash
+ * Delete an export from a nodemap's member list. Called after client
+ * disconnects, or during system shutdown.
+ *
+ * Requires active_config_lock and nodemap's nm_member_list_lock.
  *
- * \param      nodemap         nodemap containing hash
- * \paraa      nid             nid of member to delete
+ * \param      nodemap         nodemap containing list
+ * \param      exp             export member to delete
  */
 void nm_member_del(struct lu_nodemap *nodemap, struct obd_export *exp)
 {
-       struct obd_export *exp1;
-
-       exp1 = cfs_hash_del_key(nodemap->nm_member_hash, exp);
-       if (exp1 != NULL)
-               class_export_put(exp1);
-
-       LASSERT(hlist_unhashed(&exp->exp_target_data.ted_nodemap_member));
-       exp->exp_target_data.ted_nodemap = NULL;
-}
+       ENTRY;
 
-static cfs_hash_ops_t nm_member_hash_operations = {
-       .hs_hash        = nm_member_hashfn,
-       .hs_key         = nm_member_hs_key,
-       .hs_keycmp      = nm_member_hs_keycmp,
-       .hs_object      = nm_member_hs_hashobject,
-       .hs_get         = nm_member_hs_get,
-       .hs_put_locked  = nm_member_hs_put_locked,
-};
+       /* because all changes to ted_nodemap are with active_config_lock */
+       LASSERT(exp->exp_target_data.ted_nodemap == nodemap);
 
-/**
- * Init a member hash of a nodemap
- *
- * \param      nodemap         nodemap containing the member hash
- */
-int nm_member_init_hash(struct lu_nodemap *nodemap)
-{
-       char nodemap_hashname[LUSTRE_NODEMAP_NAME_LENGTH + 3];
-
-
-       snprintf(nodemap_hashname, sizeof(nodemap_hashname),
-                "nm-%s", nodemap->nm_name);
-       nodemap->nm_member_hash = cfs_hash_create(nodemap_hashname,
-                                         HASH_NODEMAP_MEMBER_CUR_BITS,
-                                         HASH_NODEMAP_MEMBER_MAX_BITS,
-                                         HASH_NODEMAP_MEMBER_BKT_BITS, 0,
-                                         CFS_HASH_MIN_THETA,
-                                         CFS_HASH_MAX_THETA,
-                                         &nm_member_hash_operations,
-                                         CFS_HASH_DEFAULT);
-       if (nodemap->nm_member_hash == NULL)
-               return -ENOMEM;
-
-       return 0;
-}
+       /* protected by nm_member_list_lock */
+       list_del_init(&exp->exp_target_data.ted_nodemap_member);
 
-/**
- * Callback from deleting a hash member
- */
-static int nm_member_delete_hash_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
-                                struct hlist_node *hnode, void *data)
-{
-       struct obd_export       *exp;
+       spin_lock(&exp->exp_target_data.ted_nodemap_lock);
+       exp->exp_target_data.ted_nodemap = NULL;
+       spin_unlock(&exp->exp_target_data.ted_nodemap_lock);
 
-       exp = hlist_entry(hnode, struct obd_export,
-                         exp_target_data.ted_nodemap_member);
+       /* ref formerly held by ted_nodemap */
+       nodemap_putref(nodemap);
 
-       exp->exp_target_data.ted_nodemap = NULL;
-       cfs_hash_bd_del_locked(hs, bd, hnode);
+       /* ref formerly held by ted_nodemap_member */
        class_export_put(exp);
 
-       return 0;
+       EXIT;
 }
 
 /**
- * Delete a member hash from a nodemap
+ * Delete a member list from a nodemap
  *
- * \param      nodemap         nodemap to remove the hash from
+ * Requires active config lock.
+ *
+ * \param      nodemap         nodemap to remove the list from
  */
-void nm_member_delete_hash(struct lu_nodemap *nodemap)
+void nm_member_delete_list(struct lu_nodemap *nodemap)
 {
-       cfs_hash_for_each_safe(nodemap->nm_member_hash,
-                              nm_member_delete_hash_cb,
-                              nodemap);
-       cfs_hash_putref(nodemap->nm_member_hash);
+       struct obd_export *exp;
+       struct obd_export *tmp;
+
+       mutex_lock(&nodemap->nm_member_list_lock);
+       list_for_each_entry_safe(exp, tmp, &nodemap->nm_member_list,
+                                exp_target_data.ted_nodemap_member)
+               nm_member_del(nodemap, exp);
+       mutex_unlock(&nodemap->nm_member_list_lock);
 }
 
 /**
  * Add a member export to a nodemap
  *
- * \param      nodemap         nodemap to search
- * \param      exp             obd_export to search
- * \retval     -EEXIST         export is already hashed to a different nodemap
+ * Must be called under active_config_lock.
+ *
+ * \param      nodemap         nodemap to add to
+ * \param      exp             obd_export to add
+ * \retval     -EEXIST         export is already part of a different nodemap
  * \retval     -EINVAL         export is NULL
  */
 int nm_member_add(struct lu_nodemap *nodemap, struct obd_export *exp)
 {
-       int     rc = 0;
+       ENTRY;
 
        if (exp == NULL) {
                CWARN("attempted to add null export to nodemap %s\n",
                      nodemap->nm_name);
-               return -EINVAL;
+               RETURN(-EINVAL);
        }
 
-       if (hlist_unhashed(&exp->exp_target_data.ted_nodemap_member) == 0) {
+       mutex_lock(&nodemap->nm_member_list_lock);
+       if (exp->exp_target_data.ted_nodemap != NULL &&
+           !list_empty(&exp->exp_target_data.ted_nodemap_member)) {
+               mutex_unlock(&nodemap->nm_member_list_lock);
+
                /* export is already member of nodemap */
                if (exp->exp_target_data.ted_nodemap == nodemap)
-                       return 0;
+                       RETURN(0);
 
                /* possibly reconnecting while about to be reclassified */
                CWARN("export %p %s already hashed, failed to add to "
@@ -212,19 +120,20 @@ int nm_member_add(struct lu_nodemap *nodemap, struct obd_export *exp)
                      nodemap->nm_name,
                      (exp->exp_target_data.ted_nodemap == NULL) ? "unknown" :
                                exp->exp_target_data.ted_nodemap->nm_name);
-               return -EEXIST;
+               RETURN(-EEXIST);
        }
 
+       class_export_get(exp);
+       nodemap_getref(nodemap);
+       /* ted_nodemap changes also require ac lock, member_list_lock */
+       spin_lock(&exp->exp_target_data.ted_nodemap_lock);
        exp->exp_target_data.ted_nodemap = nodemap;
+       spin_unlock(&exp->exp_target_data.ted_nodemap_lock);
+       list_add(&exp->exp_target_data.ted_nodemap_member,
+                &nodemap->nm_member_list);
+       mutex_unlock(&nodemap->nm_member_list_lock);
 
-       rc = cfs_hash_add_unique(nodemap->nm_member_hash, exp,
-                                &exp->exp_target_data.ted_nodemap_member);
-
-       if (rc == 0)
-               class_export_get(exp);
-       /* else -EALREADY - exp already in nodemap hash */
-
-       return rc;
+       RETURN(0);
 }
 
 /**
@@ -243,79 +152,104 @@ static void nm_member_exp_revoke(struct obd_export *exp)
        ldlm_revoke_export_locks(exp);
 }
 
-static int nm_member_reclassify_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
-                                  struct hlist_node *hnode, void *data)
-{
-       struct obd_export       *exp;
-       struct lu_nodemap       *nodemap;
-
-       exp = hlist_entry(hnode, struct obd_export,
-                         exp_target_data.ted_nodemap_member);
-       if (exp == NULL)
-               goto out;
-
-       /* Must use bd_del_locked inside a cfs_hash callback, and exp->nodemap
-        * should never be NULL. For those reasons, can't use member_del.
-        */
-       read_lock(&nm_range_tree_lock);
-       nodemap = nodemap_classify_nid(exp->exp_connection->c_peer.nid);
-       if (exp->exp_target_data.ted_nodemap != nodemap) {
-               cfs_hash_bd_del_locked(hs, bd, hnode);
-               exp->exp_target_data.ted_nodemap = nodemap;
-               cfs_hash_add_unique(nodemap->nm_member_hash, exp,
-                               &exp->exp_target_data.ted_nodemap_member);
-       }
-       read_unlock(&nm_range_tree_lock);
-
-       nm_member_exp_revoke(exp);
-out:
-       return 0;
-}
-
-DEFINE_MUTEX(reclassify_nodemap_lock);
-
 /**
- * Reclassify the members of a nodemap after range changes or activation,
- * based on the member export's NID and the nodemap's new NID ranges. Exports
- * that are no longer classified as being part of this nodemap are moved to the
- * nodemap whose NID ranges contain the export's NID, and their locks are
- * revoked.
+ * Reclassify the members of a nodemap after range changes or activation.
+ * This function reclassifies the members of a nodemap based on the member
+ * export's NID and the nodemap's new NID ranges. Exports that are no longer
+ * classified as being part of this nodemap are moved to the nodemap whose
+ * NID ranges contain the export's NID, and their locks are revoked.
+ *
+ * Callers should hold the active_config_lock and active_config
+ * nmc_range_tree_lock.
  *
  * \param      nodemap         nodemap with members to reclassify
  */
 void nm_member_reclassify_nodemap(struct lu_nodemap *nodemap)
 {
-       /* reclassify only one nodemap at a time to avoid deadlock */
-       mutex_lock(&reclassify_nodemap_lock);
-       cfs_hash_for_each_safe(nodemap->nm_member_hash,
-                              nm_member_reclassify_cb,
-                              NULL);
-       mutex_unlock(&reclassify_nodemap_lock);
-}
+       struct obd_export *exp;
+       struct obd_export *tmp;
+       struct lu_nodemap *new_nodemap;
+
+       ENTRY;
+
+       mutex_lock(&nodemap->nm_member_list_lock);
+
+       list_for_each_entry_safe(exp, tmp, &nodemap->nm_member_list,
+                                exp_target_data.ted_nodemap_member) {
+               lnet_nid_t nid;
+
+               /* if no conn assigned to this exp, reconnect will reclassify */
+               spin_lock(&exp->exp_lock);
+               if (exp->exp_connection) {
+                       nid = exp->exp_connection->c_peer.nid;
+               } else {
+                       spin_unlock(&exp->exp_lock);
+                       continue;
+               }
+               spin_unlock(&exp->exp_lock);
+
+               /* nodemap_classify_nid requires nmc_range_tree_lock */
+               new_nodemap = nodemap_classify_nid(nid);
+               if (IS_ERR(new_nodemap))
+                       continue;
+
+               if (new_nodemap != nodemap) {
+                       /* could deadlock if new_nodemap also reclassifying,
+                        * active_config_lock serializes reclassifies
+                        */
+                       mutex_lock(&new_nodemap->nm_member_list_lock);
+
+                       /* don't use member_del because ted_nodemap
+                        * should never be NULL with a live export
+                        */
+                       list_del_init(&exp->exp_target_data.ted_nodemap_member);
+
+                       /* keep the new_nodemap ref from classify */
+                       spin_lock(&exp->exp_target_data.ted_nodemap_lock);
+                       exp->exp_target_data.ted_nodemap = new_nodemap;
+                       spin_unlock(&exp->exp_target_data.ted_nodemap_lock);
+                       nodemap_putref(nodemap);
+
+                       list_add(&exp->exp_target_data.ted_nodemap_member,
+                                &new_nodemap->nm_member_list);
+                       mutex_unlock(&new_nodemap->nm_member_list_lock);
+
+                       if (nodemap_active)
+                               nm_member_exp_revoke(exp);
+               } else {
+                       nodemap_putref(new_nodemap);
+               }
+       }
+       mutex_unlock(&nodemap->nm_member_list_lock);
 
-static int nm_member_revoke_locks_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
-                                    struct hlist_node *hnode, void *data)
-{
-       struct obd_export       *exp;
-       exp = hlist_entry(hnode, struct obd_export,
-                         exp_target_data.ted_nodemap_member);
-       if (exp == NULL)
-               return 0;
-
-       nm_member_exp_revoke(exp);
-       return 0;
+       EXIT;
 }
 
 /**
- * Revoke the locks for member exports. Changing the idmap is
- * akin to deleting the security context. If the locks are not
- * canceled, the client could cache permissions that are no
- * longer correct with the map.
+ * Revoke the locks for member exports if nodemap system is active.
+ *
+ * Changing the idmap is akin to deleting the security context. If the locks
+ * are not canceled, the client could cache permissions that are no longer
+ * correct with the map.
  *
  * \param      nodemap         nodemap that has been altered
  */
 void nm_member_revoke_locks(struct lu_nodemap *nodemap)
 {
-       cfs_hash_for_each(nodemap->nm_member_hash, nm_member_revoke_locks_cb,
-                         NULL);
+       if (!nodemap_active)
+               return;
+
+       nm_member_revoke_locks_always(nodemap);
+}
+
+void nm_member_revoke_locks_always(struct lu_nodemap *nodemap)
+{
+       struct obd_export *exp;
+       struct obd_export *tmp;
+
+       mutex_lock(&nodemap->nm_member_list_lock);
+       list_for_each_entry_safe(exp, tmp, &nodemap->nm_member_list,
+                           exp_target_data.ted_nodemap_member)
+               nm_member_exp_revoke(exp);
+       mutex_unlock(&nodemap->nm_member_list_lock);
 }