Whamcloud - gitweb
LU-13276 lnet: Update nnis to avoid infinite loop
[fs/lustre-release.git] / lnet / lnet / api-ni.c
index 545685a..f8310e4 100644 (file)
@@ -37,6 +37,9 @@
 #include <linux/ktime.h>
 #include <linux/moduleparam.h>
 #include <linux/uaccess.h>
+#ifdef HAVE_SCHED_HEADERS
+#include <linux/sched/signal.h>
+#endif
 
 #include <lnet/lib-lnet.h>
 
@@ -180,7 +183,7 @@ MODULE_PARM_DESC(lnet_drop_asym_route,
                 "Set to 1 to drop asymmetrical route messages.");
 
 #define LNET_TRANSACTION_TIMEOUT_NO_HEALTH_DEFAULT 50
-#define LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT 10
+#define LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT 50
 
 unsigned lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT;
 static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
@@ -200,7 +203,7 @@ module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
 MODULE_PARM_DESC(lnet_transaction_timeout,
                "Maximum number of seconds to wait for a peer response.");
 
-#define LNET_RETRY_COUNT_HEALTH_DEFAULT 3
+#define LNET_RETRY_COUNT_HEALTH_DEFAULT 2
 unsigned lnet_retry_count = LNET_RETRY_COUNT_HEALTH_DEFAULT;
 static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
 #ifdef HAVE_KERNEL_PARAM_OPS
@@ -545,7 +548,6 @@ lnet_init_locks(void)
 {
        spin_lock_init(&the_lnet.ln_eq_wait_lock);
        spin_lock_init(&the_lnet.ln_msg_resend_lock);
-       init_waitqueue_head(&the_lnet.ln_eq_waitq);
        init_completion(&the_lnet.ln_mt_wait_complete);
        mutex_init(&the_lnet.ln_lnd_mutex);
 }
@@ -619,7 +621,7 @@ lnet_create_remote_nets_table(void)
 
        LASSERT(the_lnet.ln_remote_nets_hash == NULL);
        LASSERT(the_lnet.ln_remote_nets_hbits > 0);
-       LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
+       CFS_ALLOC_PTR_ARRAY(hash, LNET_REMOTE_NETS_HASH_SIZE);
        if (hash == NULL) {
                CERROR("Failed to create remote nets hash table\n");
                return -ENOMEM;
@@ -642,9 +644,8 @@ lnet_destroy_remote_nets_table(void)
        for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
                LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
 
-       LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
-                   LNET_REMOTE_NETS_HASH_SIZE *
-                   sizeof(the_lnet.ln_remote_nets_hash[0]));
+       CFS_FREE_PTR_ARRAY(the_lnet.ln_remote_nets_hash,
+                          LNET_REMOTE_NETS_HASH_SIZE);
        the_lnet.ln_remote_nets_hash = NULL;
 }
 
@@ -976,10 +977,7 @@ lnet_res_container_cleanup(struct lnet_res_container *rec)
                struct list_head *e = rec->rec_active.next;
 
                list_del_init(e);
-               if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
-                       lnet_eq_free(list_entry(e, struct lnet_eq, eq_list));
-
-               } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
+               if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
                        lnet_md_free(list_entry(e, struct lnet_libmd, md_list));
 
                } else { /* NB: Active MEs should be attached on portals */
@@ -997,8 +995,7 @@ lnet_res_container_cleanup(struct lnet_res_container *rec)
        }
 
        if (rec->rec_lh_hash != NULL) {
-               LIBCFS_FREE(rec->rec_lh_hash,
-                           LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
+               CFS_FREE_PTR_ARRAY(rec->rec_lh_hash, LNET_LH_HASH_SIZE);
                rec->rec_lh_hash = NULL;
        }
 
@@ -1167,7 +1164,7 @@ lnet_prepare(lnet_pid_t requested_pid)
        INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
        INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
        init_waitqueue_head(&the_lnet.ln_dc_waitq);
-       LNetInvalidateEQHandle(&the_lnet.ln_mt_eqh);
+       the_lnet.ln_mt_eq = NULL;
        init_completion(&the_lnet.ln_started);
 
        rc = lnet_slab_setup();
@@ -1253,9 +1250,9 @@ lnet_unprepare (void)
                the_lnet.ln_mt_zombie_rstqs = NULL;
        }
 
-       if (!LNetEQHandleIsInvalid(the_lnet.ln_mt_eqh)) {
-               rc = LNetEQFree(the_lnet.ln_mt_eqh);
-               LNetInvalidateEQHandle(&the_lnet.ln_mt_eqh);
+       if (the_lnet.ln_mt_eq) {
+               rc = LNetEQFree(the_lnet.ln_mt_eq);
+               the_lnet.ln_mt_eq = NULL;
                LASSERT(rc == 0);
        }
 
@@ -1513,7 +1510,7 @@ lnet_ping_buffer_alloc(int nnis, gfp_t gfp)
 void
 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
 {
-       LASSERT(lnet_ping_buffer_numref(pbuf) == 0);
+       LASSERT(atomic_read(&pbuf->pb_refcnt) == 0);
        LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nnis));
 }
 
@@ -1680,9 +1677,10 @@ lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
        int rc, rc2;
 
        if (set_eq) {
-               rc = LNetEQAlloc(0, lnet_ping_target_event_handler,
-                                &the_lnet.ln_ping_target_eq);
-               if (rc != 0) {
+               the_lnet.ln_ping_target_eq =
+                       LNetEQAlloc(lnet_ping_target_event_handler);
+               if (IS_ERR(the_lnet.ln_ping_target_eq)) {
+                       rc = PTR_ERR(the_lnet.ln_ping_target_eq);
                        CERROR("Can't allocate ping buffer EQ: %d\n", rc);
                        return rc;
                }
@@ -1726,7 +1724,7 @@ lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
 fail_unlink_ping_me:
        LNetMEUnlink(me);
 fail_decref_ping_buffer:
-       LASSERT(lnet_ping_buffer_numref(*ppbuf) == 1);
+       LASSERT(atomic_read(&(*ppbuf)->pb_refcnt) == 1);
        lnet_ping_buffer_decref(*ppbuf);
        *ppbuf = NULL;
 fail_free_eq:
@@ -1741,19 +1739,14 @@ static void
 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
                    struct lnet_handle_md *ping_mdh)
 {
-       sigset_t        blocked = cfs_block_allsigs();
-
        LNetMDUnlink(*ping_mdh);
        LNetInvalidateMDHandle(ping_mdh);
 
        /* NB the MD could be busy; this just starts the unlink */
-       while (lnet_ping_buffer_numref(pbuf) > 1) {
+       while (atomic_read(&pbuf->pb_refcnt) > 1) {
                CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(1));
+               schedule_timeout_uninterruptible(cfs_time_seconds(1));
        }
-
-       cfs_restore_sigs(blocked);
 }
 
 static void
@@ -1861,14 +1854,16 @@ int lnet_push_target_resize(void)
        struct lnet_handle_md old_mdh;
        struct lnet_ping_buffer *pbuf;
        struct lnet_ping_buffer *old_pbuf;
-       int nnis = the_lnet.ln_push_target_nnis;
+       int nnis;
        int rc;
 
+again:
+       nnis = the_lnet.ln_push_target_nnis;
        if (nnis <= 0) {
                rc = -EINVAL;
                goto fail_return;
        }
-again:
+
        pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
        if (!pbuf) {
                rc = -ENOMEM;
@@ -1950,9 +1945,10 @@ static int lnet_push_target_init(void)
        if (the_lnet.ln_push_target)
                return -EALREADY;
 
-       rc = LNetEQAlloc(0, lnet_push_target_event_handler,
-                        &the_lnet.ln_push_target_eq);
-       if (rc) {
+       the_lnet.ln_push_target_eq =
+               LNetEQAlloc(lnet_push_target_event_handler);
+       if (IS_ERR(the_lnet.ln_push_target_eq)) {
+               rc = PTR_ERR(the_lnet.ln_push_target_eq);
                CERROR("Can't allocated push target EQ: %d\n", rc);
                return rc;
        }
@@ -1964,7 +1960,7 @@ static int lnet_push_target_init(void)
 
        if (rc) {
                LNetEQFree(the_lnet.ln_push_target_eq);
-               LNetInvalidateEQHandle(&the_lnet.ln_push_target_eq);
+               the_lnet.ln_push_target_eq = NULL;
        }
 
        return rc;
@@ -1981,10 +1977,9 @@ static void lnet_push_target_fini(void)
        LNetInvalidateMDHandle(&the_lnet.ln_push_target_md);
 
        /* Wait for the unlink to complete. */
-       while (lnet_ping_buffer_numref(the_lnet.ln_push_target) > 1) {
+       while (atomic_read(&the_lnet.ln_push_target->pb_refcnt) > 1) {
                CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(1));
+               schedule_timeout_uninterruptible(cfs_time_seconds(1));
        }
 
        lnet_ping_buffer_decref(the_lnet.ln_push_target);
@@ -1992,7 +1987,7 @@ static void lnet_push_target_fini(void)
        the_lnet.ln_push_target_nnis = 0;
 
        LNetEQFree(the_lnet.ln_push_target_eq);
-       LNetInvalidateEQHandle(&the_lnet.ln_push_target_eq);
+       the_lnet.ln_push_target_eq = NULL;
 }
 
 static int
@@ -2060,8 +2055,7 @@ lnet_clear_zombies_nis_locked(struct lnet_net *net)
                                       "Waiting for zombie LNI %s\n",
                                       libcfs_nid2str(ni->ni_nid));
                        }
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(cfs_time_seconds(1));
+                       schedule_timeout_uninterruptible(cfs_time_seconds(1));
                        lnet_net_lock(LNET_LOCK_EX);
                        continue;
                }
@@ -2155,16 +2149,12 @@ lnet_shutdown_lndnets(void)
        lnet_net_lock(LNET_LOCK_EX);
        the_lnet.ln_state = LNET_STATE_STOPPING;
 
-       while (!list_empty(&the_lnet.ln_nets)) {
-               /*
-                * move the nets to the zombie list to avoid them being
-                * picked up for new work. LONET is also included in the
-                * Nets that will be moved to the zombie list
-                */
-               net = list_entry(the_lnet.ln_nets.next,
-                                struct lnet_net, net_list);
-               list_move(&net->net_list, &the_lnet.ln_net_zombie);
-       }
+       /*
+        * move the nets to the zombie list to avoid them being
+        * picked up for new work. LONET is also included in the
+        * Nets that will be moved to the zombie list
+        */
+       list_splice_init(&the_lnet.ln_nets, &the_lnet.ln_net_zombie);
 
        /* Drop the cached loopback Net. */
        if (the_lnet.ln_loni != NULL) {
@@ -2498,8 +2488,8 @@ int lnet_lib_init(void)
        lnet_assert_wire_constants();
 
        /* refer to global cfs_cpt_table for now */
-       the_lnet.ln_cpt_table   = cfs_cpt_table;
-       the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
+       the_lnet.ln_cpt_table = cfs_cpt_tab;
+       the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab);
 
        LASSERT(the_lnet.ln_cpt_number > 0);
        if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
@@ -2657,8 +2647,9 @@ LNetNIInit(lnet_pid_t requested_pid)
 
        lnet_ping_target_update(pbuf, ping_mdh);
 
-       rc = LNetEQAlloc(0, lnet_mt_event_handler, &the_lnet.ln_mt_eqh);
-       if (rc != 0) {
+       the_lnet.ln_mt_eq = LNetEQAlloc(lnet_mt_event_handler);
+       if (IS_ERR(the_lnet.ln_mt_eq)) {
+               rc = PTR_ERR(the_lnet.ln_mt_eq);
                CERROR("Can't allocate monitor thread EQ: %d\n", rc);
                goto err_stop_ping;
        }
@@ -4087,24 +4078,45 @@ LNetGetId(unsigned int index, struct lnet_process_id *id)
 }
 EXPORT_SYMBOL(LNetGetId);
 
+struct ping_data {
+       int rc;
+       int replied;
+       struct lnet_handle_md mdh;
+       struct completion completion;
+};
+
+static void
+lnet_ping_event_handler(struct lnet_event *event)
+{
+       struct ping_data *pd = event->md.user_ptr;
+
+       CDEBUG(D_NET, "ping event (%d %d)%s\n",
+              event->type, event->status,
+              event->unlinked ? " unlinked" : "");
+
+       if (event->status) {
+               if (!pd->rc)
+                       pd->rc = event->status;
+       } else if (event->type == LNET_EVENT_REPLY) {
+               pd->replied = 1;
+               pd->rc = event->mlength;
+       }
+       if (event->unlinked)
+               complete(&pd->completion);
+}
+
 static int lnet_ping(struct lnet_process_id id, signed long timeout,
                     struct lnet_process_id __user *ids, int n_ids)
 {
-       struct lnet_handle_eq eqh;
-       struct lnet_handle_md mdh;
-       struct lnet_event event;
+       struct lnet_eq *eq;
        struct lnet_md md = { NULL };
-       int which;
-       int unlinked = 0;
-       int replied = 0;
-       const signed long a_long_time = cfs_time_seconds(60);
+       struct ping_data pd = { 0 };
        struct lnet_ping_buffer *pbuf;
        struct lnet_process_id tmpid;
        int i;
        int nob;
        int rc;
        int rc2;
-       sigset_t blocked;
 
        /* n_ids limit is arbitrary */
        if (n_ids <= 0 || id.nid == LNET_NID_ANY)
@@ -4124,9 +4136,9 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
        if (!pbuf)
                return -ENOMEM;
 
-       /* NB 2 events max (including any unlink event) */
-       rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
-       if (rc != 0) {
+       eq = LNetEQAlloc(lnet_ping_event_handler);
+       if (IS_ERR(eq)) {
+               rc = PTR_ERR(eq);
                CERROR("Can't allocate EQ: %d\n", rc);
                goto fail_ping_buffer_decref;
        }
@@ -4137,79 +4149,40 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
        md.threshold = 2; /* GET/REPLY */
        md.max_size  = 0;
        md.options   = LNET_MD_TRUNCATE;
-       md.user_ptr  = NULL;
-       md.eq_handle = eqh;
+       md.user_ptr  = &pd;
+       md.eq_handle = eq;
 
-       rc = LNetMDBind(md, LNET_UNLINK, &mdh);
+       init_completion(&pd.completion);
+
+       rc = LNetMDBind(md, LNET_UNLINK, &pd.mdh);
        if (rc != 0) {
                CERROR("Can't bind MD: %d\n", rc);
                goto fail_free_eq;
        }
 
-       rc = LNetGet(LNET_NID_ANY, mdh, id,
+       rc = LNetGet(LNET_NID_ANY, pd.mdh, id,
                     LNET_RESERVED_PORTAL,
                     LNET_PROTO_PING_MATCHBITS, 0, false);
 
        if (rc != 0) {
                /* Don't CERROR; this could be deliberate! */
-               rc2 = LNetMDUnlink(mdh);
+               rc2 = LNetMDUnlink(pd.mdh);
                LASSERT(rc2 == 0);
 
                /* NB must wait for the UNLINK event below... */
-               unlinked = 1;
-               timeout = a_long_time;
-       }
-
-       do {
-               /* MUST block for unlink to complete */
-               if (unlinked)
-                       blocked = cfs_block_allsigs();
-
-               rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which);
-
-               if (unlinked)
-                       cfs_restore_sigs(blocked);
-
-               CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
-                      (rc2 <= 0) ? -1 : event.type,
-                      (rc2 <= 0) ? -1 : event.status,
-                      (rc2 > 0 && event.unlinked) ? " unlinked" : "");
-
-               LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
-
-               if (rc2 <= 0 || event.status != 0) {
-                       /* timeout or error */
-                       if (!replied && rc == 0)
-                               rc = (rc2 < 0) ? rc2 :
-                                    (rc2 == 0) ? -ETIMEDOUT :
-                                    event.status;
-
-                       if (!unlinked) {
-                               /* Ensure completion in finite time... */
-                               LNetMDUnlink(mdh);
-                               /* No assertion (racing with network) */
-                               unlinked = 1;
-                               timeout = a_long_time;
-                       } else if (rc2 == 0) {
-                               /* timed out waiting for unlink */
-                               CWARN("ping %s: late network completion\n",
-                                     libcfs_id2str(id));
-                       }
-               } else if (event.type == LNET_EVENT_REPLY) {
-                       replied = 1;
-                       rc = event.mlength;
-               }
-       } while (rc2 <= 0 || !event.unlinked);
+       }
 
-       if (!replied) {
-               if (rc >= 0)
-                       CWARN("%s: Unexpected rc >= 0 but no reply!\n",
-                             libcfs_id2str(id));
+       if (wait_for_completion_timeout(&pd.completion, timeout) == 0) {
+               /* Ensure completion in finite time... */
+               LNetMDUnlink(pd.mdh);
+               wait_for_completion(&pd.completion);
+       }
+       if (!pd.replied) {
                rc = -EIO;
                goto fail_free_eq;
        }
 
-       nob = rc;
+       nob = pd.rc;
        LASSERT(nob >= 0 && nob <= LNET_PING_INFO_SIZE(n_ids));
 
        rc = -EPROTO;           /* if I can't parse... */
@@ -4263,7 +4236,7 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout,
        rc = pbuf->pb_info.pi_nnis;
 
  fail_free_eq:
-       rc2 = LNetEQFree(eqh);
+       rc2 = LNetEQFree(eq);
        if (rc2 != 0)
                CERROR("rc2 %d\n", rc2);
        LASSERT(rc2 == 0);
@@ -4285,7 +4258,6 @@ lnet_discover(struct lnet_process_id id, __u32 force,
        int i;
        int rc;
        int max_intf = lnet_interfaces_max;
-       size_t buf_size;
 
        if (n_ids <= 0 ||
            id.nid == LNET_NID_ANY)
@@ -4301,9 +4273,7 @@ lnet_discover(struct lnet_process_id id, __u32 force,
        if (n_ids > max_intf)
                n_ids = max_intf;
 
-       buf_size = n_ids * sizeof(*buf);
-
-       LIBCFS_ALLOC(buf, buf_size);
+       CFS_ALLOC_PTR_ARRAY(buf, n_ids);
        if (!buf)
                return -ENOMEM;
 
@@ -4356,7 +4326,7 @@ out_decref:
 out:
        lnet_net_unlock(cpt);
 
-       LIBCFS_FREE(buf, buf_size);
+       CFS_FREE_PTR_ARRAY(buf, n_ids);
 
        return rc;
 }