Whamcloud - gitweb
LU-2456 lnet: Dynamic LNet Configuration (DLC)
[fs/lustre-release.git] / lnet / lnet / peer.c
index 06b09b2..5b4b09e 100644 (file)
@@ -1,25 +1,39 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
  *
- * lib/lib-move.c
- * Data movement routines
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   This file is part of Lustre, http://www.lustre.org
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/lnet/peer.c
  */
 
 #define DEBUG_SUBSYSTEM S_LNET
 #include <lnet/lib-lnet.h>
 
 int
-lnet_create_peer_table(void)
+lnet_peer_tables_create(void)
 {
-       struct list_head *hash;
-       int               i;
-
-       LASSERT (the_lnet.ln_peer_hash == NULL);
-       LIBCFS_ALLOC(hash, LNET_PEER_HASHSIZE * sizeof(struct list_head));
-       
-       if (hash == NULL) {
-               CERROR("Can't allocate peer hash table\n");
+       struct lnet_peer_table  *ptable;
+       struct list_head        *hash;
+       int                     i;
+       int                     j;
+
+       the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(),
+                                                  sizeof(*ptable));
+       if (the_lnet.ln_peer_tables == NULL) {
+               CERROR("Failed to allocate cpu-partition peer tables\n");
                return -ENOMEM;
        }
 
-       for (i = 0; i < LNET_PEER_HASHSIZE; i++)
-               CFS_INIT_LIST_HEAD(&hash[i]);
+       cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+               INIT_LIST_HEAD(&ptable->pt_deathrow);
+
+               LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i,
+                                LNET_PEER_HASH_SIZE * sizeof(*hash));
+               if (hash == NULL) {
+                       CERROR("Failed to create peer hash table\n");
+                       lnet_peer_tables_destroy();
+                       return -ENOMEM;
+               }
+
+               for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
+                       INIT_LIST_HEAD(&hash[j]);
+               ptable->pt_hash = hash; /* sign of initialization */
+       }
 
-       the_lnet.ln_peer_hash = hash;
        return 0;
 }
 
 void
-lnet_destroy_peer_table(void)
+lnet_peer_tables_destroy(void)
 {
-       int         i;
+       struct lnet_peer_table  *ptable;
+       struct list_head        *hash;
+       int                     i;
+       int                     j;
 
-        if (the_lnet.ln_peer_hash == NULL)
-                return;
+       if (the_lnet.ln_peer_tables == NULL)
+               return;
+
+       cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+               hash = ptable->pt_hash;
+               if (hash == NULL) /* not intialized */
+                       break;
+
+               LASSERT(list_empty(&ptable->pt_deathrow));
+
+               ptable->pt_hash = NULL;
+               for (j = 0; j < LNET_PEER_HASH_SIZE; j++)
+                       LASSERT(list_empty(&hash[j]));
 
-       for (i = 0; i < LNET_PEER_HASHSIZE; i++)
-               LASSERT (list_empty(&the_lnet.ln_peer_hash[i]));
-       
-       LIBCFS_FREE(the_lnet.ln_peer_hash,
-                   LNET_PEER_HASHSIZE * sizeof (struct list_head));
-        the_lnet.ln_peer_hash = NULL;
+               LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash));
+       }
+
+       cfs_percpt_free(the_lnet.ln_peer_tables);
+       the_lnet.ln_peer_tables = NULL;
 }
 
-void
-lnet_clear_peer_table(void)
+static void
+lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable)
 {
-       int         i;
-
-        LASSERT (the_lnet.ln_shutdown);         /* i.e. no new peers */
-       
-       for (i = 0; i < LNET_PEER_HASHSIZE; i++) {
-               struct list_head *peers = &the_lnet.ln_peer_hash[i];
-
-               LNET_LOCK();
-               while (!list_empty(peers)) {
-                       lnet_peer_t *lp = list_entry(peers->next,
-                                                    lnet_peer_t, lp_hashlist);
-                       
-                       list_del(&lp->lp_hashlist);
-                        lnet_peer_decref_locked(lp);   /* lose hash table's ref */
+       int              i;
+       lnet_peer_t     *lp;
+       lnet_peer_t     *tmp;
+
+       for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
+               list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
+                                        lp_hashlist) {
+                       if (ni != NULL && ni != lp->lp_ni)
+                               continue;
+                       list_del_init(&lp->lp_hashlist);
+                       /* Lose hash table's ref */
+                       ptable->pt_zombies++;
+                       lnet_peer_decref_locked(lp);
                }
-               LNET_UNLOCK();
        }
+}
 
-        LNET_LOCK();
-        for (i = 3; the_lnet.ln_npeers != 0;i++) {
-                LNET_UNLOCK();
+static void
+lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable,
+                                    int cpt_locked)
+{
+       int     i;
 
-                if ((i & (i-1)) == 0)
-                        CDEBUG(D_WARNING,"Waiting for %d peers\n", 
-                               the_lnet.ln_npeers);
-                cfs_pause(cfs_time_seconds(1));
+       for (i = 3; ptable->pt_zombies != 0; i++) {
+               lnet_net_unlock(cpt_locked);
 
-                LNET_LOCK();
-        }
-        LNET_UNLOCK();
+               if (IS_PO2(i)) {
+                       CDEBUG(D_WARNING,
+                              "Waiting for %d zombies on peer table\n",
+                              ptable->pt_zombies);
+               }
+               cfs_pause(cfs_time_seconds(1) >> 1);
+               lnet_net_lock(cpt_locked);
+       }
+}
+
+static void
+lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable,
+                               int cpt_locked)
+{
+       lnet_peer_t     *lp;
+       lnet_peer_t     *tmp;
+       lnet_nid_t       lp_nid;
+       int              i;
+
+       for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
+               list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
+                                        lp_hashlist) {
+                       if (ni != lp->lp_ni)
+                               continue;
+
+                       if (lp->lp_rtr_refcount == 0)
+                               continue;
+
+                       lp_nid = lp->lp_nid;
+
+                       lnet_net_unlock(cpt_locked);
+                       lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid);
+                       lnet_net_lock(cpt_locked);
+               }
+       }
+}
+
+void
+lnet_peer_tables_cleanup(lnet_ni_t *ni)
+{
+       int                     i;
+       struct lnet_peer_table  *ptable;
+       lnet_peer_t             *lp;
+       struct list_head        deathrow;
+
+       INIT_LIST_HEAD(&deathrow);
+
+       LASSERT(the_lnet.ln_shutdown || ni != NULL);
+       /* If just deleting the peers for a NI, get rid of any routes these
+        * peers are gateways for. */
+       cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+               lnet_net_lock(i);
+               lnet_peer_table_del_rtrs_locked(ni, ptable, i);
+               lnet_net_unlock(i);
+       }
+
+       /* Start the process of moving the applicable peers to
+        * deathrow. */
+       cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+               lnet_net_lock(i);
+               lnet_peer_table_cleanup_locked(ni, ptable);
+               lnet_net_unlock(i);
+       }
+
+       /* Cleanup all entries on deathrow. */
+       cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) {
+               lnet_net_lock(i);
+               lnet_peer_table_deathrow_wait_locked(ptable, i);
+               list_splice_init(&ptable->pt_deathrow, &deathrow);
+               lnet_net_unlock(i);
+       }
+
+       while (!list_empty(&deathrow)) {
+               lp = list_entry(deathrow.next, lnet_peer_t, lp_hashlist);
+               list_del(&lp->lp_hashlist);
+               LIBCFS_FREE(lp, sizeof(*lp));
+       }
 }
 
 void
-lnet_destroy_peer_locked (lnet_peer_t *lp) 
+lnet_destroy_peer_locked(lnet_peer_t *lp)
 {
-        lnet_ni_decref_locked(lp->lp_ni);
-        LNET_UNLOCK();
+       struct lnet_peer_table *ptable;
 
-        LASSERT (lp->lp_refcount == 0);
-        LASSERT (lp->lp_rtr_refcount == 0);
-       LASSERT (list_empty(&lp->lp_txq));
-        LASSERT (lp->lp_txqnob == 0);
+       LASSERT(lp->lp_refcount == 0);
+       LASSERT(lp->lp_rtr_refcount == 0);
+       LASSERT(list_empty(&lp->lp_txq));
+       LASSERT(list_empty(&lp->lp_hashlist));
+       LASSERT(lp->lp_txqnob == 0);
 
-       LIBCFS_FREE(lp, sizeof(*lp));
+       ptable = the_lnet.ln_peer_tables[lp->lp_cpt];
+       LASSERT(ptable->pt_number > 0);
+       ptable->pt_number--;
 
-        LNET_LOCK();
+       lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
+       lp->lp_ni = NULL;
 
-        LASSERT(the_lnet.ln_npeers > 0);
-        the_lnet.ln_npeers--;
+       list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
+       LASSERT(ptable->pt_zombies > 0);
+       ptable->pt_zombies--;
 }
 
 lnet_peer_t *
-lnet_find_peer_locked (lnet_nid_t nid)
+lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
 {
-       unsigned int      idx = LNET_NIDADDR(nid) % LNET_PEER_HASHSIZE;
-       struct list_head *peers = &the_lnet.ln_peer_hash[idx];
-       struct list_head *tmp;
-        lnet_peer_t      *lp;
+       struct list_head *peers;
+       lnet_peer_t      *lp;
 
-       if (the_lnet.ln_shutdown)
-                return NULL;
+       LASSERT(!the_lnet.ln_shutdown);
 
-       list_for_each (tmp, peers) {
-               lp = list_entry(tmp, lnet_peer_t, lp_hashlist);
-               
+       peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
+       list_for_each_entry(lp, peers, lp_hashlist) {
                if (lp->lp_nid == nid) {
-                        lnet_peer_addref_locked(lp);
+                       lnet_peer_addref_locked(lp);
                        return lp;
-                }
+               }
        }
-        
+
        return NULL;
 }
 
 int
-lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid)
+lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
 {
-       lnet_peer_t    *lp;
-       lnet_peer_t    *lp2;
+       struct lnet_peer_table  *ptable;
+       lnet_peer_t             *lp = NULL;
+       lnet_peer_t             *lp2;
+       int                     cpt2;
+       int                     rc = 0;
+
+       *lpp = NULL;
+       if (the_lnet.ln_shutdown) /* it's shutting down */
+               return -ESHUTDOWN;
+
+       /* cpt can be LNET_LOCK_EX if it's called from router functions */
+       cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
+
+       ptable = the_lnet.ln_peer_tables[cpt2];
+       lp = lnet_find_peer_locked(ptable, nid);
+       if (lp != NULL) {
+               *lpp = lp;
+               return 0;
+       }
+
+       if (!list_empty(&ptable->pt_deathrow)) {
+               lp = list_entry(ptable->pt_deathrow.next,
+                               lnet_peer_t, lp_hashlist);
+               list_del(&lp->lp_hashlist);
+       }
+
+       /*
+        * take extra refcount in case another thread has shutdown LNet
+        * and destroyed locks and peer-table before I finish the allocation
+        */
+       ptable->pt_number++;
+       lnet_net_unlock(cpt);
+
+       if (lp != NULL)
+               memset(lp, 0, sizeof(*lp));
+       else
+               LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp));
 
-        lp = lnet_find_peer_locked(nid);
-        if (lp != NULL) {
-                *lpp = lp;
-                return 0;
-        }
-        
-        LNET_UNLOCK();
-       
-       LIBCFS_ALLOC(lp, sizeof(*lp));
        if (lp == NULL) {
-                *lpp = NULL;
-                LNET_LOCK();
-                return -ENOMEM;
-        }
+               rc = -ENOMEM;
+               lnet_net_lock(cpt);
+               goto out;
+       }
+
+       INIT_LIST_HEAD(&lp->lp_txq);
+       INIT_LIST_HEAD(&lp->lp_rtrq);
+       INIT_LIST_HEAD(&lp->lp_routes);
 
-        memset(lp, 0, sizeof(*lp));             /* zero counters etc */
-        
-       CFS_INIT_LIST_HEAD(&lp->lp_txq);
-        CFS_INIT_LIST_HEAD(&lp->lp_rtrq);
-       
-       lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
         lp->lp_notify = 0;
         lp->lp_notifylnd = 0;
         lp->lp_notifying = 0;
         lp->lp_alive_count = 0;
-       lp->lp_timestamp = 0;
+        lp->lp_timestamp = 0;
+        lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
+        lp->lp_last_alive = cfs_time_current(); /* assumes alive */
+        lp->lp_last_query = 0; /* haven't asked NI yet */
         lp->lp_ping_timestamp = 0;
+       lp->lp_ping_feats = LNET_PING_FEAT_INVAL;
        lp->lp_nid = nid;
-        lp->lp_refcount = 2;                    /* 1 for caller; 1 for hash */
-        lp->lp_rtr_refcount = 0;
+       lp->lp_cpt = cpt2;
+       lp->lp_refcount = 2;    /* 1 for caller; 1 for hash */
+       lp->lp_rtr_refcount = 0;
 
-        LNET_LOCK();
+       lnet_net_lock(cpt);
 
-        lp2 = lnet_find_peer_locked(nid);
-        if (lp2 != NULL) {
-                LNET_UNLOCK();
-                LIBCFS_FREE(lp, sizeof(*lp));
-                LNET_LOCK();
+       if (the_lnet.ln_shutdown) {
+               rc = -ESHUTDOWN;
+               goto out;
+       }
 
-                *lpp = lp2;
-                return 0;
-        }
-                
-        lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid));
-        if (lp->lp_ni == NULL) {
-                LNET_UNLOCK();
-                LIBCFS_FREE(lp, sizeof(*lp));
-                LNET_LOCK();
-
-                *lpp = NULL;
-                return the_lnet.ln_shutdown ? -ESHUTDOWN : -EHOSTUNREACH;
-        }
+       lp2 = lnet_find_peer_locked(ptable, nid);
+       if (lp2 != NULL) {
+               *lpp = lp2;
+               goto out;
+       }
 
-       lp->lp_txcredits = 
-                lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
+       lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
+       if (lp->lp_ni == NULL) {
+               rc = -EHOSTUNREACH;
+               goto out;
+       }
 
-        /* As a first approximation; allow this peer the same number of router
-         * buffers as it is allowed outstanding sends */
-        lp->lp_rtrcredits = lp->lp_minrtrcredits = lp->lp_txcredits;
+       lp->lp_txcredits    =
+       lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
+       lp->lp_rtrcredits    =
+       lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
 
-        LASSERT (!the_lnet.ln_shutdown);
-        /* can't add peers after shutdown starts */
+       list_add_tail(&lp->lp_hashlist,
+                     &ptable->pt_hash[lnet_nid2peerhash(nid)]);
+       ptable->pt_version++;
+       *lpp = lp;
 
-        list_add_tail(&lp->lp_hashlist, lnet_nid2peerhash(nid));
-        the_lnet.ln_npeers++;
-        the_lnet.ln_peertable_version++;
-        *lpp = lp;
-        return 0;
+       return 0;
+out:
+       if (lp != NULL)
+               list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
+       ptable->pt_number--;
+       return rc;
 }
 
 void
 lnet_debug_peer(lnet_nid_t nid)
 {
-        int          rc;
-        lnet_peer_t *lp;
-
-        LNET_LOCK();
-        
-        rc = lnet_nid2peer_locked(&lp, nid);
-        if (rc != 0) {
-                LNET_UNLOCK();
+       char            *aliveness = "NA";
+       lnet_peer_t     *lp;
+       int             rc;
+       int             cpt;
+
+       cpt = lnet_cpt_of_nid(nid);
+       lnet_net_lock(cpt);
+
+       rc = lnet_nid2peer_locked(&lp, nid, cpt);
+       if (rc != 0) {
+               lnet_net_unlock(cpt);
                 CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid));
                 return;
         }
 
+        if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp))
+                aliveness = lp->lp_alive ? "up" : "down";
+
         CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
-               libcfs_nid2str(lp->lp_nid), lp->lp_refcount, 
-               !lnet_isrouter(lp) ? "~rtr" : (lp->lp_alive ? "up" : "down"),
-               lp->lp_ni->ni_peertxcredits, 
-               lp->lp_rtrcredits, lp->lp_minrtrcredits, 
+               libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
+               aliveness, lp->lp_ni->ni_peertxcredits,
+               lp->lp_rtrcredits, lp->lp_minrtrcredits,
                lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
 
         lnet_peer_decref_locked(lp);
 
-        LNET_UNLOCK();
+       lnet_net_unlock(cpt);
 }