From: eeb Date: Fri, 11 Feb 2005 20:22:44 +0000 (+0000) Subject: * ghastly hack to serialise RapkSetRiParams with comms scheduler thread X-Git-Tag: v1_7_100~1625 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=0284022e44588fb4c0525351b0d20b4453683eb3 * ghastly hack to serialise RapkSetRiParams with comms scheduler thread * Ranal "working" @ 1.2GBytes/sec single channel, 2.4GBytes/sec double --- diff --git a/lnet/klnds/ralnd/ralnd.c b/lnet/klnds/ralnd/ralnd.c index 35f436e..014b4c6 100644 --- a/lnet/klnds/ralnd/ralnd.c +++ b/lnet/klnds/ralnd/ralnd.c @@ -22,6 +22,8 @@ */ #include "ranal.h" +static int kranal_devids[] = {RAPK_MAIN_DEVICE_ID, + RAPK_EXPANSION_DEVICE_ID}; nal_t kranal_api; ptl_handle_ni_t kranal_ni; @@ -533,7 +535,17 @@ int kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq, __u32 peer_ip, int peer_port) { - RAP_RETURN rrc; + kra_device_t *dev = conn->rac_device; + unsigned long flags; + RAP_RETURN rrc; + + /* tell scheduler to release the setri_mutex... */ + spin_lock_irqsave(&dev->rad_lock, flags); + dev->rad_setri_please++; + wake_up(&dev->rad_waitq); + spin_unlock_irqrestore(&dev->rad_lock, flags); + /* ...and grab it */ + down(&dev->rad_setri_mutex); rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams); if (rrc != RAP_SUCCESS) { @@ -542,6 +554,14 @@ kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq, return -EPROTO; } + /* release the setri_mutex... */ + up(&dev->rad_setri_mutex); + /* ...and tell scheduler we're all done */ + spin_lock_irqsave(&dev->rad_lock, flags); + dev->rad_setri_please--; + wake_up(&dev->rad_waitq); + spin_unlock_irqrestore(&dev->rad_lock, flags); + conn->rac_peerstamp = connreq->racr_peerstamp; conn->rac_peer_connstamp = connreq->racr_connstamp; conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout); @@ -894,6 +914,9 @@ kranal_conn_handshake (struct socket *sock, kra_peer_t *peer) if (nstale != 0) CWARN("Closed %d stale conns to "LPX64"\n", nstale, peer_nid); + CDEBUG(D_WARNING, "New connection to "LPX64" on devid[%d] = %d\n", + peer_nid, conn->rac_device->rad_idx, conn->rac_device->rad_id); + /* Ensure conn gets checked. Transmits may have been queued and an * FMA event may have happened before it got in the cq hash table */ kranal_schedule_conn(conn); @@ -1720,6 +1743,11 @@ kranal_device_init(int id, kra_device_t *dev) const int total_ntx = RANAL_NTX + RANAL_NTX_NBLK; RAP_RETURN rrc; + /* The awful serialise RapkSetRiParams with the device scheduler + * work-around! */ + dev->rad_setri_please = 0; + init_MUTEX(&dev->rad_setri_mutex); + dev->rad_id = id; rrc = RapkGetDeviceByIndex(id, kranal_device_callback, &dev->rad_handle); @@ -1893,8 +1921,6 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid, ptl_ni_limits_t *requested_limits, ptl_ni_limits_t *actual_limits) { - static int device_ids[] = {RAPK_MAIN_DEVICE_ID, - RAPK_EXPANSION_DEVICE_ID}; struct timeval tv; ptl_process_id_t process_id; int pkmem = atomic_read(&portal_kmemory); @@ -2012,11 +2038,14 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } } - LASSERT(kranal_data.kra_ndevs == 0); - for (i = 0; i < sizeof(device_ids)/sizeof(device_ids[0]); i++) { + LASSERT (kranal_data.kra_ndevs == 0); + + for (i = 0; i < sizeof(kranal_devids)/sizeof(kranal_devids[0]); i++) { + LASSERT (i < RANAL_MAXDEVS); + dev = &kranal_data.kra_devices[kranal_data.kra_ndevs]; - rc = kranal_device_init(device_ids[i], dev); + rc = kranal_device_init(kranal_devids[i], dev); if (rc == 0) kranal_data.kra_ndevs++; diff --git a/lnet/klnds/ralnd/ralnd.h b/lnet/klnds/ralnd/ralnd.h index 0843058..5cb1640 100644 --- a/lnet/klnds/ralnd/ralnd.h +++ b/lnet/klnds/ralnd/ralnd.h @@ -111,10 +111,11 @@ typedef struct int rad_idx; /* index in kra_devices */ int rad_ready; /* set by device callback */ struct list_head rad_connq; /* connections requiring attention */ - struct list_head rad_zombies; /* connections to free */ wait_queue_head_t rad_waitq; /* scheduler waits here */ spinlock_t rad_lock; /* serialise */ void *rad_scheduler; /* scheduling thread */ + int rad_setri_please; /* ++ when connd wants to setri */ + struct semaphore rad_setri_mutex; /* serialise setri */ } kra_device_t; typedef struct diff --git a/lnet/klnds/ralnd/ralnd_cb.c b/lnet/klnds/ralnd/ralnd_cb.c index 38f1b77..fc5ed3f 100644 --- a/lnet/klnds/ralnd/ralnd_cb.c +++ b/lnet/klnds/ralnd/ralnd_cb.c @@ -1397,6 +1397,10 @@ kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg, return 0; case RAP_NOT_DONE: + if (time_after_eq(jiffies, + conn->rac_last_tx + conn->rac_keepalive*HZ)) + CDEBUG(D_WARNING, "EAGAIN sending %02x (idle %lu secs)\n", + msg->ram_type, (jiffies - conn->rac_last_tx)/HZ); return -EAGAIN; } } @@ -1466,7 +1470,9 @@ kranal_process_fmaq (kra_conn_t *conn) if (time_after_eq(jiffies, conn->rac_last_tx + conn->rac_keepalive * HZ)) { - CDEBUG(D_NET, "sending NOOP (idle)\n"); + CDEBUG(D_NET, "sending NOOP -> "LPX64" (%p idle %lu(%ld))\n", + conn->rac_peer->rap_nid, conn, + (jiffies - conn->rac_last_tx)/HZ, conn->rac_keepalive); kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP); kranal_sendmsg(conn, &conn->rac_msg, NULL, 0); } @@ -1829,12 +1835,14 @@ void kranal_complete_closed_conn (kra_conn_t *conn) { kra_tx_t *tx; + int nfma; + int nreplies; LASSERT (conn->rac_state == RANAL_CONN_CLOSED); LASSERT (list_empty(&conn->rac_list)); LASSERT (list_empty(&conn->rac_hashlist)); - while (!list_empty(&conn->rac_fmaq)) { + for (nfma = 0; !list_empty(&conn->rac_fmaq); nfma++) { tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list); list_del(&tx->tx_list); @@ -1843,12 +1851,15 @@ kranal_complete_closed_conn (kra_conn_t *conn) LASSERT (list_empty(&conn->rac_rdmaq)); - while (!list_empty(&conn->rac_replyq)) { + for (nreplies = 0; !list_empty(&conn->rac_replyq); nreplies++) { tx = list_entry(conn->rac_replyq.next, kra_tx_t, tx_list); list_del(&tx->tx_list); kranal_tx_done(tx, -ECONNABORTED); } + + CDEBUG(D_WARNING, "Closed conn %p -> "LPX64": nmsg %d nreplies %d\n", + conn, conn->rac_peer->rap_nid, nfma, nreplies); } int @@ -1868,6 +1879,9 @@ kranal_scheduler (void *arg) dev->rad_scheduler = current; init_waitqueue_entry(&wait, current); + /* prevent connd from doing setri until requested */ + down(&dev->rad_setri_mutex); + spin_lock_irqsave(&dev->rad_lock, flags); while (!kranal_data.kra_shutdown) { @@ -1882,6 +1896,19 @@ kranal_scheduler (void *arg) spin_lock_irqsave(&dev->rad_lock, flags); } + /* Ghastly hack to ensure RapkSetRiParams() serialises with + * other comms */ + if (dev->rad_setri_please != 0) { + spin_unlock_irqrestore(&dev->rad_lock, flags); + up(&dev->rad_setri_mutex); + + wait_event_interruptible(dev->rad_waitq, + dev->rad_setri_please == 0); + + down(&dev->rad_setri_mutex); + spin_lock_irqsave(&dev->rad_lock, flags); + } + if (dev->rad_ready) { /* Device callback fired since I last checked it */ dev->rad_ready = 0; @@ -1933,6 +1960,7 @@ kranal_scheduler (void *arg) } spin_unlock_irqrestore(&dev->rad_lock, flags); + up(&dev->rad_setri_mutex); dev->rad_scheduler = NULL; kranal_thread_fini();