Whamcloud - gitweb
LU-6128 lnet: handle lnet_check_routes() errors
[fs/lustre-release.git] / lnet / lnet / api-ni.c
index 7a3c8a3..3ec8962 100644 (file)
@@ -68,7 +68,6 @@ static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
 CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
                "size of remote network hash table");
 
-static void lnet_ping_target_fini(void);
 static int lnet_ping(lnet_process_id_t id, int timeout_ms,
                     lnet_process_id_t *ids, int n_ids);
 
@@ -729,26 +728,25 @@ lnet_prepare(lnet_pid_t requested_pid)
                return -ENETDOWN;
        }
 
-        LASSERT (the_lnet.ln_refcount == 0);
+       LASSERT(the_lnet.ln_refcount == 0);
 
-        the_lnet.ln_routing = 0;
+       the_lnet.ln_routing = 0;
 
 #ifdef __KERNEL__
-        LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
-        the_lnet.ln_pid = requested_pid;
+       LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
+       the_lnet.ln_pid = requested_pid;
 #else
-        if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
+       if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
                LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
-
-               if (current_uid() != 0) /* Only root can run user-space server */
+               /* Only root can run user-space server */
+               if (current_uid() != 0)
                        return -EPERM;
                the_lnet.ln_pid = requested_pid;
 
-        } else {/* client case (liblustre) */
-
-                /* My PID must be unique on this node and flag I'm userspace */
-                the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
-        }
+       } else {/* client case (liblustre) */
+               /* My PID must be unique on this node and flag I'm userspace */
+               the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
+       }
 #endif
 
        INIT_LIST_HEAD(&the_lnet.ln_test_peers);
@@ -789,15 +787,19 @@ lnet_prepare(lnet_pid_t requested_pid)
 
        recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
                                          sizeof(lnet_me_t));
-       if (recs == NULL)
+       if (recs == NULL) {
+               rc = -ENOMEM;
                goto failed;
+       }
 
        the_lnet.ln_me_containers = recs;
 
        recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
                                          sizeof(lnet_libmd_t));
-       if (recs == NULL)
+       if (recs == NULL) {
+               rc = -ENOMEM;
                goto failed;
+       }
 
        the_lnet.ln_md_containers = recs;
 
@@ -1108,7 +1110,7 @@ lnet_ping_info_setup(lnet_ping_info_t **ppinfo, lnet_handle_md_t *md_handle,
 {
        lnet_handle_me_t  me_handle;
        lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
-       lnet_md_t         md = {0};
+       lnet_md_t         md = {NULL};
        int               rc, rc2;
 
        if (set_eq) {
@@ -1415,7 +1417,7 @@ static int
 lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
                   __s32 peer_cr, __s32 peer_buf_cr, __s32 credits)
 {
-       int                     rc = 0;
+       int                     rc = -EINVAL;
        int                     lnd_type;
        lnd_t                   *lnd;
        struct lnet_tx_queue    *tq;
@@ -1433,19 +1435,21 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
 
        /* Make sure this new NI is unique. */
        lnet_net_lock(LNET_LOCK_EX);
-       if (!lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis)) {
+       rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
+       lnet_net_unlock(LNET_LOCK_EX);
+
+       if (!rc) {
                if (lnd_type == LOLND) {
-                       lnet_net_unlock(LNET_LOCK_EX);
                        lnet_ni_free(ni);
                        return 0;
                }
-               lnet_net_unlock(LNET_LOCK_EX);
 
                CERROR("Net %s is not unique\n",
                       libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+
+               rc = -EEXIST;
                goto failed0;
        }
-       lnet_net_unlock(LNET_LOCK_EX);
 
        LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
        lnd = lnet_find_lnd_by_type(lnd_type);
@@ -1453,8 +1457,7 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
 #ifdef __KERNEL__
        if (lnd == NULL) {
                LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
-               rc = request_module("%s",
-                                   libcfs_lnd2modname(lnd_type));
+               rc = request_module("%s", libcfs_lnd2modname(lnd_type));
                LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
 
                lnd = lnet_find_lnd_by_type(lnd_type);
@@ -1468,6 +1471,7 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
                                           "compiled with kernel module "
                                           "loading support.");
 #endif
+                       rc = -EINVAL;
                        goto failed0;
                }
        }
@@ -1558,7 +1562,7 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
        if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) {
                LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
                                   libcfs_lnd2str(lnd->lnd_type),
-                                  ni->ni_peertxcredits == 0 ? 
+                                  ni->ni_peertxcredits == 0 ?
                                        "" : "per-peer ");
                /* shutdown the NI since if we get here then it must've already
                 * been started
@@ -1581,7 +1585,7 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
        return 0;
 failed0:
        lnet_ni_free(ni);
-       return -EINVAL;
+       return rc;
 }
 
 static int
@@ -1740,7 +1744,7 @@ int
 LNetNIInit(lnet_pid_t requested_pid)
 {
        int                     im_a_router = 0;
-       int                     rc, rc2;
+       int                     rc;
        int                     ni_count;
        lnet_ping_info_t        *pinfo;
        lnet_handle_md_t        md_handle;
@@ -1828,10 +1832,7 @@ LNetNIInit(lnet_pid_t requested_pid)
        return 0;
 
 failed4:
-       lnet_ping_md_unlink(pinfo, &md_handle);
-       lnet_ping_info_free(pinfo);
-       rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
-       LASSERT(rc2 == 0);
+       lnet_ping_target_fini();
 failed3:
        the_lnet.ln_refcount = 0;
        lnet_acceptor_stop();
@@ -1993,6 +1994,7 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
        struct lnet_ni          *ni;
        struct list_head        net_head;
        int                     rc;
+       lnet_remotenet_t        *rnet;
 
        INIT_LIST_HEAD(&net_head);
 
@@ -2008,12 +2010,25 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
                goto failed0;
        }
 
+       ni = list_entry(net_head.next, struct lnet_ni, ni_list);
+
+       lnet_net_lock(LNET_LOCK_EX);
+       rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
+       lnet_net_unlock(LNET_LOCK_EX);
+       /* make sure that the net added doesn't invalidate the current
+        * configuration LNet is keeping */
+       if (rnet != NULL) {
+               CERROR("Adding net %s will invalidate routing configuration\n",
+                      nets);
+               rc = -EUSERS;
+               goto failed0;
+       }
+
        rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
                                  false);
        if (rc != 0)
                goto failed0;
 
-       ni = list_entry(net_head.next, struct lnet_ni, ni_list);
        list_del_init(&ni->ni_list);
 
        rc = lnet_startup_lndni(ni, peer_timeout, peer_cr,
@@ -2021,6 +2036,16 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
        if (rc != 0)
                goto failed1;
 
+       if (ni->ni_lnd->lnd_accept != NULL) {
+               rc = lnet_acceptor_start();
+               if (rc < 0) {
+                       /* shutdown the ni that we just started */
+                       CERROR("Failed to start up acceptor thread\n");
+                       lnet_shutdown_lndni(ni);
+                       goto failed1;
+               }
+       }
+
        lnet_ping_target_update(pinfo, md_handle);
        LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
 
@@ -2068,6 +2093,10 @@ lnet_dyn_del_ni(__u32 net)
        lnet_ni_decref_locked(ni, 0);
 
        lnet_shutdown_lndni(ni);
+
+       if (lnet_count_acceptor_nis() == 0)
+               lnet_acceptor_stop();
+
        lnet_ping_target_update(pinfo, md_handle);
        goto out;
 failed:
@@ -2126,8 +2155,14 @@ LNetCtl(unsigned int cmd, void *arg)
                                    config->cfg_nid,
                                    config->cfg_config_u.cfg_route.
                                        rtr_priority);
+               if (rc == 0) {
+                       rc = lnet_check_routes();
+                       if (rc != 0)
+                               lnet_del_route(config->cfg_net,
+                                              config->cfg_nid);
+               }
                LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
-               return (rc != 0) ? rc : lnet_check_routes();
+               return rc;
 
        case IOC_LIBCFS_DEL_ROUTE:
                config = arg;
@@ -2396,7 +2431,7 @@ lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids,
        lnet_handle_eq_t     eqh;
        lnet_handle_md_t     mdh;
        lnet_event_t         event;
-       lnet_md_t            md = {0};
+       lnet_md_t            md = { NULL };
        int                  which;
        int                  unlinked = 0;
        int                  replied = 0;