Whamcloud - gitweb
LU-2456 lnet: Dynamic LNet Configuration (DLC) 30/9830/5
authorAmir Shehata <amir.shehata@intel.com>
Thu, 27 Mar 2014 23:51:21 +0000 (16:51 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 25 Jul 2014 03:31:12 +0000 (03:31 +0000)
This is the first patch of a set of patches that enables DLC.

This patch adds some cleanup in the config.c as well as some
preparatory changes in peer.c to enable dynamic network
configuration

Signed-off-by: Amir Shehata <amir.shehata@intel.com>
Change-Id: I8c8bbf3b55acf4d76f22a8be587b553a70d31889
Reviewed-on: http://review.whamcloud.com/9830
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-types.h
lnet/lnet/api-ni.c
lnet/lnet/config.c
lnet/lnet/peer.c

index 13aa91b..abd8dd7 100644 (file)
@@ -959,7 +959,7 @@ int lnet_parse_networks(struct list_head *nilist, char *networks);
 int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
 lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
                                   lnet_nid_t nid);
 int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
 lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
                                   lnet_nid_t nid);
-void lnet_peer_tables_cleanup(void);
+void lnet_peer_tables_cleanup(lnet_ni_t *ni);
 void lnet_peer_tables_destroy(void);
 int lnet_peer_tables_create(void);
 void lnet_debug_peer(lnet_nid_t nid);
 void lnet_peer_tables_destroy(void);
 int lnet_peer_tables_create(void);
 void lnet_debug_peer(lnet_nid_t nid);
index b9b9f45..41fa32b 100644 (file)
@@ -539,6 +539,8 @@ typedef struct lnet_peer {
 struct lnet_peer_table {
        int                     pt_version;     /* /proc validity stamp */
        int                     pt_number;      /* # peers extant */
 struct lnet_peer_table {
        int                     pt_version;     /* /proc validity stamp */
        int                     pt_number;      /* # peers extant */
+       int                     pt_zombies;     /* # zombies to go to deathrow
+                                                * (and not there yet) */
        struct list_head        pt_deathrow;    /* zombie peers */
        struct list_head        *pt_hash;       /* NID->peer hash */
 };
        struct list_head        pt_deathrow;    /* zombie peers */
        struct list_head        *pt_hash;       /* NID->peer hash */
 };
@@ -843,9 +845,6 @@ typedef struct
        /* registered LNDs */
        struct list_head                ln_lnds;
 
        /* registered LNDs */
        struct list_head                ln_lnds;
 
-       /* space for network names */
-       char                            *ln_network_tokens;
-       int                             ln_network_tokens_nob;
        /* test protocol compatibility flags */
        int                             ln_testprotocompat;
 
        /* test protocol compatibility flags */
        int                             ln_testprotocompat;
 
index 1a2294f..3eb2ee7 100644 (file)
@@ -1086,7 +1086,7 @@ lnet_shutdown_lndnis (void)
 
         /* Clear the peer table and wait for all peers to go (they hold refs on
          * their NIs) */
 
         /* Clear the peer table and wait for all peers to go (they hold refs on
          * their NIs) */
-       lnet_peer_tables_cleanup();
+       lnet_peer_tables_cleanup(NULL);
 
        lnet_net_lock(LNET_LOCK_EX);
        /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
 
        lnet_net_lock(LNET_LOCK_EX);
        /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
@@ -1142,12 +1142,6 @@ lnet_shutdown_lndnis (void)
 
        the_lnet.ln_shutdown = 0;
        lnet_net_unlock(LNET_LOCK_EX);
 
        the_lnet.ln_shutdown = 0;
        lnet_net_unlock(LNET_LOCK_EX);
-
-       if (the_lnet.ln_network_tokens != NULL) {
-               LIBCFS_FREE(the_lnet.ln_network_tokens,
-                           the_lnet.ln_network_tokens_nob);
-               the_lnet.ln_network_tokens = NULL;
-       }
 }
 
 int
 }
 
 int
@@ -1168,9 +1162,9 @@ lnet_startup_lndnis (void)
        if (nets == NULL)
                goto failed;
 
        if (nets == NULL)
                goto failed;
 
-        rc = lnet_parse_networks(&nilist, nets);
-        if (rc != 0)
-                goto failed;
+       rc = lnet_parse_networks(&nilist, nets);
+       if (rc != 0)
+               goto failed;
 
        while (!list_empty(&nilist)) {
                ni = list_entry(nilist.next, lnet_ni_t, ni_list);
 
        while (!list_empty(&nilist)) {
                ni = list_entry(nilist.next, lnet_ni_t, ni_list);
index 345dd26..7dd588e 100644 (file)
@@ -97,6 +97,8 @@ lnet_net_unique(__u32 net, struct list_head *nilist)
 void
 lnet_ni_free(struct lnet_ni *ni)
 {
 void
 lnet_ni_free(struct lnet_ni *ni)
 {
+       int i;
+
        if (ni->ni_refs != NULL)
                cfs_percpt_free(ni->ni_refs);
 
        if (ni->ni_refs != NULL)
                cfs_percpt_free(ni->ni_refs);
 
@@ -111,6 +113,11 @@ lnet_ni_free(struct lnet_ni *ni)
        pthread_mutex_destroy(&ni->ni_lock);
 # endif
 #endif
        pthread_mutex_destroy(&ni->ni_lock);
 # endif
 #endif
+       for (i = 0; i < LNET_MAX_INTERFACES &&
+                   ni->ni_interfaces[i] != NULL; i++) {
+               LIBCFS_FREE(ni->ni_interfaces[i],
+                           strlen(ni->ni_interfaces[i]) + 1);
+       }
        LIBCFS_FREE(ni, sizeof(*ni));
 }
 
        LIBCFS_FREE(ni, sizeof(*ni));
 }
 
@@ -205,15 +212,13 @@ lnet_parse_networks(struct list_head *nilist, char *networks)
                return -EINVAL;
        }
 
                return -EINVAL;
        }
 
-        LIBCFS_ALLOC(tokens, tokensize);
-        if (tokens == NULL) {
-                CERROR("Can't allocate net tokens\n");
+       LIBCFS_ALLOC(tokens, tokensize);
+       if (tokens == NULL) {
+               CERROR("Can't allocate net tokens\n");
                return -ENOMEM;
                return -ENOMEM;
-        }
+       }
 
 
-        the_lnet.ln_network_tokens = tokens;
-        the_lnet.ln_network_tokens_nob = tokensize;
-        memcpy (tokens, networks, tokensize);
+       memcpy(tokens, networks, tokensize);
        str = tmp = tokens;
 
        /* Add in the loopback network */
        str = tmp = tokens;
 
        /* Add in the loopback network */
@@ -324,14 +329,28 @@ lnet_parse_networks(struct list_head *nilist, char *networks)
                                goto failed_syntax;
                         }
 
                                goto failed_syntax;
                         }
 
-                        if (niface == LNET_MAX_INTERFACES) {
-                                LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
-                                                   "for net %s\n",
-                                                   libcfs_net2str(net));
-                                goto failed;
-                        }
+                       if (niface == LNET_MAX_INTERFACES) {
+                               LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
+                                                  "for net %s\n",
+                                                  libcfs_net2str(net));
+                               goto failed;
+                       }
 
 
-                        ni->ni_interfaces[niface++] = iface;
+                       /* Allocate a seperate piece of memory and copy
+                        * into it the string, so we don't have
+                        * a depencency on the tokens string.  This way we
+                        * can free the tokens at the end of the function.
+                        * The newly allocated ni_interfaces[] can be
+                        * freed when freeing the NI */
+                       LIBCFS_ALLOC(ni->ni_interfaces[niface],
+                                    strlen(iface) + 1);
+                       if (ni->ni_interfaces[niface] == NULL) {
+                               CERROR("Can't allocate net interface name\n");
+                               goto failed;
+                       }
+                       strncpy(ni->ni_interfaces[niface], iface,
+                               strlen(iface));
+                       niface++;
                        iface = comma;
                } while (iface != NULL);
 
                        iface = comma;
                } while (iface != NULL);
 
@@ -356,6 +375,8 @@ lnet_parse_networks(struct list_head *nilist, char *networks)
        }
 
        LASSERT(!list_empty(nilist));
        }
 
        LASSERT(!list_empty(nilist));
+
+       LIBCFS_FREE(tokens, tokensize);
        return 0;
 
  failed_syntax:
        return 0;
 
  failed_syntax:
@@ -372,7 +393,6 @@ lnet_parse_networks(struct list_head *nilist, char *networks)
                cfs_expr_list_free(el);
 
        LIBCFS_FREE(tokens, tokensize);
                cfs_expr_list_free(el);
 
        LIBCFS_FREE(tokens, tokensize);
-       the_lnet.ln_network_tokens = NULL;
 
        return -EINVAL;
 }
 
        return -EINVAL;
 }
@@ -663,9 +683,9 @@ lnet_parse_route (char *str, int *im_a_router)
        char             *sep;
        char             *token = str;
        int               ntokens = 0;
        char             *sep;
        char             *token = str;
        int               ntokens = 0;
-        int               myrc = -1;
-        unsigned int      hops;
-        int               got_hops = 0;
+       int               myrc = -1;
+       unsigned int      hops;
+       int               got_hops = 0;
        unsigned int      priority = 0;
 
        INIT_LIST_HEAD(&gateways);
        unsigned int      priority = 0;
 
        INIT_LIST_HEAD(&gateways);
@@ -747,8 +767,8 @@ lnet_parse_route (char *str, int *im_a_router)
                }
        }
 
                }
        }
 
-        if (!got_hops)
-                hops = 1;
+       if (!got_hops)
+               hops = 1;
 
        LASSERT(!list_empty(&nets));
        LASSERT(!list_empty(&gateways));
 
        LASSERT(!list_empty(&nets));
        LASSERT(!list_empty(&gateways));
@@ -763,28 +783,28 @@ lnet_parse_route (char *str, int *im_a_router)
                        nid = libcfs_str2nid(ltb->ltb_text);
                        LASSERT(nid != LNET_NID_ANY);
 
                        nid = libcfs_str2nid(ltb->ltb_text);
                        LASSERT(nid != LNET_NID_ANY);
 
-                        if (lnet_islocalnid(nid)) {
-                                *im_a_router = 1;
-                                continue;
-                        }
+                       if (lnet_islocalnid(nid)) {
+                               *im_a_router = 1;
+                               continue;
+                       }
 
                        rc = lnet_add_route(net, hops, nid, priority);
 
                        rc = lnet_add_route(net, hops, nid, priority);
-                        if (rc != 0) {
-                                CERROR("Can't create route "
-                                       "to %s via %s\n",
-                                       libcfs_net2str(net),
-                                       libcfs_nid2str(nid));
-                                goto out;
-                        }
+                       if (rc != 0) {
+                               CERROR("Can't create route "
+                                      "to %s via %s\n",
+                                      libcfs_net2str(net),
+                                      libcfs_nid2str(nid));
+                               goto out;
+                       }
                }
        }
 
                }
        }
 
-        myrc = 0;
-        goto out;
+       myrc = 0;
+       goto out;
 
 
- token_error:
+token_error:
        lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
        lnet_syntax("routes", cmd, (int)(token - str), strlen(token));
- out:
+out:
        lnet_free_text_bufs(&nets);
        lnet_free_text_bufs(&gateways);
        return myrc;
        lnet_free_text_bufs(&nets);
        lnet_free_text_bufs(&gateways);
        return myrc;
index 182f44d..5b4b09e 100644 (file)
@@ -103,61 +103,111 @@ lnet_peer_tables_destroy(void)
        the_lnet.ln_peer_tables = NULL;
 }
 
        the_lnet.ln_peer_tables = NULL;
 }
 
+static void
+lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable)
+{
+       int              i;
+       lnet_peer_t     *lp;
+       lnet_peer_t     *tmp;
+
+       for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
+               list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
+                                        lp_hashlist) {
+                       if (ni != NULL && ni != lp->lp_ni)
+                               continue;
+                       list_del_init(&lp->lp_hashlist);
+                       /* Lose hash table's ref */
+                       ptable->pt_zombies++;
+                       lnet_peer_decref_locked(lp);
+               }
+       }
+}
+
+static void
+lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable,
+                                    int cpt_locked)
+{
+       int     i;
+
+       for (i = 3; ptable->pt_zombies != 0; i++) {
+               lnet_net_unlock(cpt_locked);
+
+               if (IS_PO2(i)) {
+                       CDEBUG(D_WARNING,
+                              "Waiting for %d zombies on peer table\n",
+                              ptable->pt_zombies);
+               }
+               cfs_pause(cfs_time_seconds(1) >> 1);
+               lnet_net_lock(cpt_locked);
+       }
+}
+
+static void
+lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable,
+                               int cpt_locked)
+{
+       lnet_peer_t     *lp;
+       lnet_peer_t     *tmp;
+       lnet_nid_t       lp_nid;
+       int              i;
+
+       for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
+               list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
+                                        lp_hashlist) {
+                       if (ni != lp->lp_ni)
+                               continue;
+
+                       if (lp->lp_rtr_refcount == 0)
+                               continue;
+
+                       lp_nid = lp->lp_nid;
+
+                       lnet_net_unlock(cpt_locked);
+                       lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid);
+                       lnet_net_lock(cpt_locked);
+               }
+       }
+}
+
 void
 void
-lnet_peer_tables_cleanup(void)
+lnet_peer_tables_cleanup(lnet_ni_t *ni)
 {
 {
-       struct lnet_peer_table  *ptable;
        int                     i;
        int                     i;
-       int                     j;
+       struct lnet_peer_table  *ptable;
+       lnet_peer_t             *lp;
+       struct list_head        deathrow;
 
 
-       LASSERT(the_lnet.ln_shutdown);  /* i.e. no new peers */
+       INIT_LIST_HEAD(&deathrow);
 
 
+       LASSERT(the_lnet.ln_shutdown || ni != NULL);
+       /* If just deleting the peers for a NI, get rid of any routes these
+        * peers are gateways for. */
        cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
                lnet_net_lock(i);
        cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
                lnet_net_lock(i);
-
-               for (j = 0; j < LNET_PEER_HASH_SIZE; j++) {
-                       struct list_head *peers = &ptable->pt_hash[j];
-
-                       while (!list_empty(peers)) {
-                               lnet_peer_t *lp = list_entry(peers->next,
-                                                                lnet_peer_t,
-                                                                lp_hashlist);
-                               list_del_init(&lp->lp_hashlist);
-                               /* lose hash table's ref */
-                               lnet_peer_decref_locked(lp);
-                       }
-               }
-
+               lnet_peer_table_del_rtrs_locked(ni, ptable, i);
                lnet_net_unlock(i);
        }
 
                lnet_net_unlock(i);
        }
 
+       /* Start the process of moving the applicable peers to
+        * deathrow. */
        cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
        cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
-               struct list_head deathrow = LIST_HEAD_INIT(deathrow);
-               lnet_peer_t     *lp;
-
                lnet_net_lock(i);
                lnet_net_lock(i);
+               lnet_peer_table_cleanup_locked(ni, ptable);
+               lnet_net_unlock(i);
+       }
 
 
-               for (j = 3; ptable->pt_number != 0; j++) {
-                       lnet_net_unlock(i);
-
-                       if ((j & (j - 1)) == 0) {
-                               CDEBUG(D_WARNING,
-                                      "Waiting for %d peers on peer table\n",
-                                      ptable->pt_number);
-                       }
-                       cfs_pause(cfs_time_seconds(1) / 2);
-                       lnet_net_lock(i);
-               }
+       /* Cleanup all entries on deathrow. */
+       cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+               lnet_net_lock(i);
+               lnet_peer_table_deathrow_wait_locked(ptable, i);
                list_splice_init(&ptable->pt_deathrow, &deathrow);
                list_splice_init(&ptable->pt_deathrow, &deathrow);
-
                lnet_net_unlock(i);
                lnet_net_unlock(i);
+       }
 
 
-               while (!list_empty(&deathrow)) {
-                       lp = list_entry(deathrow.next,
-                                       lnet_peer_t, lp_hashlist);
-                       list_del(&lp->lp_hashlist);
-                       LIBCFS_FREE(lp, sizeof(*lp));
-               }
+       while (!list_empty(&deathrow)) {
+               lp = list_entry(deathrow.next, lnet_peer_t, lp_hashlist);
+               list_del(&lp->lp_hashlist);
+               LIBCFS_FREE(lp, sizeof(*lp));
        }
 }
 
        }
 }
 
@@ -180,6 +230,8 @@ lnet_destroy_peer_locked(lnet_peer_t *lp)
        lp->lp_ni = NULL;
 
        list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
        lp->lp_ni = NULL;
 
        list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
+       LASSERT(ptable->pt_zombies > 0);
+       ptable->pt_zombies--;
 }
 
 lnet_peer_t *
 }
 
 lnet_peer_t *