From: eeb Date: Mon, 31 Jan 2005 12:30:58 +0000 (+0000) Subject: * 5602 fix improves checks that NID is set correctly and causes incorrect X-Git-Tag: v1_7_100~1648 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=4ab1d51e7bbd98006a21a1655f7e5bffec3cf0d4 * 5602 fix improves checks that NID is set correctly and causes incorrect configurations to fail rather than trip a server assertion. * 5606 fix ensures both peers agree on maximum "immediate" get. * ranal updates --- diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index 7023714..06bdf39 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -278,7 +278,7 @@ AC_DEFUN([LP_CONFIG_RANAL], [#### Rapid Array AC_MSG_CHECKING([if RapidArray kernel headers are present]) # placeholder -RACPPFLAGS="-I/tmp" +RACPPFLAGS="-I${LINUX}/drivers/xd1/include" EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" EXTRA_KCFLAGS="$EXTRA_KCFLAGS $RACPPFLAGS" LB_LINUX_TRY_COMPILE([ diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index b771690..6a7df9a 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -34,6 +34,7 @@ int ptl_name2nal(char *str); int ptl_parse_ipaddr (__u32 *ipaddrp, char *str); +int ptl_parse_anynid (ptl_nid_t *nidp, char *str); int ptl_parse_nid (ptl_nid_t *nidp, char *str); char * ptl_nid2str (char *buffer, ptl_nid_t nid); diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index b771690..6a7df9a 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -34,6 +34,7 @@ int ptl_name2nal(char *str); int ptl_parse_ipaddr (__u32 *ipaddrp, char *str); +int ptl_parse_anynid (ptl_nid_t *nidp, char *str); int ptl_parse_nid (ptl_nid_t *nidp, char *str); char * ptl_nid2str (char *buffer, ptl_nid_t nid); diff --git a/lnet/klnds/openiblnd/openiblnd.c b/lnet/klnds/openiblnd/openiblnd.c index 4289210..adc5ba8 100644 --- a/lnet/klnds/openiblnd/openiblnd.c +++ b/lnet/klnds/openiblnd/openiblnd.c @@ -161,6 +161,11 @@ kibnal_unpack_msg(kib_msg_t *msg, int nob) __swab64s(&msg->ibm_dststamp); } + if (msg->ibm_srcnid == PTL_NID_ANY) { + CERROR("Bad src nid: "LPX64"\n", msg->ibm_srcnid); + return -EPROTO; + } + switch (msg->ibm_type) { default: CERROR("Unknown message type %x\n", msg->ibm_type); diff --git a/lnet/klnds/openiblnd/openiblnd_cb.c b/lnet/klnds/openiblnd/openiblnd_cb.c index 34d465f..62b64a4 100644 --- a/lnet/klnds/openiblnd/openiblnd_cb.c +++ b/lnet/klnds/openiblnd/openiblnd_cb.c @@ -1272,7 +1272,7 @@ kibnal_sendmsg(lib_nal_t *nal, /* Will it fit in a message? */ nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob >= IBNAL_MSG_SIZE) { + if (nob > IBNAL_MSG_SIZE) { CERROR("REPLY for "LPX64" too big (RDMA not requested): %d\n", nid, payload_nob); return (PTL_FAIL); diff --git a/lnet/klnds/ralnd/ralnd.c b/lnet/klnds/ralnd/ralnd.c index 6333bcf..f272c1e 100644 --- a/lnet/klnds/ralnd/ralnd.c +++ b/lnet/klnds/ralnd/ralnd.c @@ -877,7 +877,7 @@ kranal_conn_handshake (struct socket *sock, kra_peer_t *peer) /* Schedule all packets blocking for a connection */ while (!list_empty(&peer->rap_tx_queue)) { - tx = list_entry(&peer->rap_tx_queue.next, + tx = list_entry(peer->rap_tx_queue.next, kra_tx_t, tx_list); list_del(&tx->tx_list); @@ -929,14 +929,14 @@ kranal_connect (kra_peer_t *peer) /* reset reconnection timeouts */ peer->rap_reconnect_interval = RANAL_MIN_RECONNECT_INTERVAL; - peer->rap_reconnect_time = CURRENT_TIME; + peer->rap_reconnect_time = CURRENT_SECONDS; write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); return; } LASSERT (peer->rap_reconnect_interval != 0); - peer->rap_reconnect_time = CURRENT_TIME + peer->rap_reconnect_interval; + peer->rap_reconnect_time = CURRENT_SECONDS + peer->rap_reconnect_interval; peer->rap_reconnect_interval = MAX(RANAL_MAX_RECONNECT_INTERVAL, 1 * peer->rap_reconnect_interval); @@ -1268,7 +1268,7 @@ kranal_create_peer (ptl_nid_t nid) INIT_LIST_HEAD(&peer->rap_conns); INIT_LIST_HEAD(&peer->rap_tx_queue); - peer->rap_reconnect_time = CURRENT_TIME; + peer->rap_reconnect_time = CURRENT_SECONDS; peer->rap_reconnect_interval = RANAL_MIN_RECONNECT_INTERVAL; atomic_inc(&kranal_data.kra_npeers); @@ -1998,7 +1998,7 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } for (i = 0; i < RANAL_N_CONND; i++) { - rc = kranal_thread_start(kranal_connd, (void *)i); + rc = kranal_thread_start(kranal_connd, (void *)(unsigned long)i); if (rc != 0) { CERROR("Can't spawn ranal connd[%d]: %d\n", i, rc); diff --git a/lnet/klnds/ralnd/ralnd.h b/lnet/klnds/ralnd/ralnd.h index d904c72..7e43705 100644 --- a/lnet/klnds/ralnd/ralnd.h +++ b/lnet/klnds/ralnd/ralnd.h @@ -49,10 +49,12 @@ #include #include +#include #define DEBUG_SUBSYSTEM S_NAL #include +#include #include #include #include @@ -350,7 +352,7 @@ typedef struct kra_peer atomic_t rap_refcount; /* # users */ int rap_persistence; /* "known" peer refs */ int rap_connecting; /* connection forming */ - unsigned long rap_reconnect_time; /* CURRENT_TIME when reconnect OK */ + unsigned long rap_reconnect_time; /* CURRENT_SECONDS when reconnect OK */ unsigned long rap_reconnect_interval; /* exponential backoff */ } kra_peer_t; @@ -455,17 +457,11 @@ kranal_tx_mapped (kra_tx_t *tx) tx->tx_buftype == RANAL_BUF_PHYS_MAPPED); } -#if CONFIG_X86 static inline __u64 kranal_page2phys (struct page *p) { - __u64 page_number = p - mem_map; - - return (page_number << PAGE_SHIFT); + return page_to_phys(p); } -#else -# error "no page->phys" -#endif extern void kranal_free_acceptsock (kra_acceptsock_t *ras); extern int kranal_listener_procint (ctl_table *table, @@ -479,7 +475,7 @@ extern kra_peer_t *kranal_create_peer (ptl_nid_t nid); extern kra_peer_t *kranal_find_peer_locked (ptl_nid_t nid); extern void kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx); extern int kranal_del_peer (ptl_nid_t nid, int single_share); -extern void kranal_device_callback (RAP_INT32 devid); +extern void kranal_device_callback (RAP_INT32 devid, RAP_PVOID arg); extern int kranal_thread_start (int(*fn)(void *arg), void *arg); extern int kranal_connd (void *arg); extern int kranal_reaper (void *arg); diff --git a/lnet/klnds/ralnd/ralnd_cb.c b/lnet/klnds/ralnd/ralnd_cb.c index d4bacdf..ff080f4 100644 --- a/lnet/klnds/ralnd/ralnd_cb.c +++ b/lnet/klnds/ralnd/ralnd_cb.c @@ -38,7 +38,7 @@ kranal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) } void -kranal_device_callback(RAP_INT32 devid) +kranal_device_callback(RAP_INT32 devid, RAP_PVOID arg) { kra_device_t *dev; int i; @@ -255,13 +255,13 @@ kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, ptl_kiov_t *kiov, /* Can't have gaps */ CERROR("Can't make payload contiguous in I/O VM:" "page %d, offset %d, len %d \n", - phys - tx->tx_phys, + (int)(phys - tx->tx_phys), kiov->kiov_offset, kiov->kiov_len); return -EINVAL; } if ((phys - tx->tx_phys) == PTL_MD_MAX_IOV) { - CERROR ("payload too big (%d)\n", phys - tx->tx_phys); + CERROR ("payload too big (%d)\n", (int)(phys - tx->tx_phys)); return -EMSGSIZE; } @@ -481,7 +481,7 @@ kranal_launch_tx (kra_tx_t *tx, ptl_nid_t nid) if (!peer->rap_connecting) { LASSERT (list_empty(&peer->rap_tx_queue)); - now = CURRENT_TIME; + now = CURRENT_SECONDS; if (now < peer->rap_reconnect_time) { write_unlock_irqrestore(g_lock, flags); kranal_tx_done(tx, -EHOSTUNREACH); @@ -583,8 +583,8 @@ kranal_do_send (lib_nal_t *nal, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, - size_t offset, - size_t nob) + int offset, + int nob) { kra_conn_t *conn; kra_tx_t *tx; @@ -592,8 +592,8 @@ kranal_do_send (lib_nal_t *nal, /* NB 'private' is different depending on what we're sending.... */ - CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64 - " pid %d\n", nob, niov, nid , pid); + CDEBUG(D_NET, "sending %d bytes in %d frags to nid:"LPX64" pid %d\n", + nob, niov, nid, pid); LASSERT (nob == 0 || niov > 0); LASSERT (niov <= PTL_MD_MAX_IOV); @@ -765,9 +765,9 @@ kranal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, } ptl_err_t -kranal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, +kranal_do_recv (lib_nal_t *nal, void *private, lib_msg_t *libmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, - size_t offset, size_t mlen, size_t rlen) + int offset, int mlen, int rlen) { kra_conn_t *conn = private; kra_msg_t *rxmsg = conn->rac_rxmsg; @@ -860,7 +860,7 @@ kranal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) { - return kranal_recvmsg(nal, private, msg, niov, iov, NULL, + return kranal_do_recv(nal, private, msg, niov, iov, NULL, offset, mlen, rlen); } @@ -869,7 +869,7 @@ kranal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, unsigned int niov, ptl_kiov_t *kiov, size_t offset, size_t mlen, size_t rlen) { - return kranal_recvmsg(nal, private, msg, niov, NULL, kiov, + return kranal_do_recv(nal, private, msg, niov, NULL, kiov, offset, mlen, rlen); } diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 70440e0..4efca98 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -369,11 +369,6 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str) char *end; unsigned long long ullval; - if (!strcmp (str, "_all_")) { - *nidp = PTL_NID_ANY; - return (0); - } - if (ptl_parse_ipaddr (&ipaddr, str) == 0) { #if !CRAY_PORTALS *nidp = (ptl_nid_t)ipaddr; @@ -384,8 +379,8 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str) } ullval = strtoull(str, &end, 0); - if (*end == 0) { - /* parsed whole string */ + if (end != str && *end == 0) { + /* parsed whole non-empty string */ *nidp = (ptl_nid_t)ullval; return (0); } @@ -393,6 +388,17 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str) return (-1); } +int +ptl_parse_anynid (ptl_nid_t *nidp, char *str) +{ + if (!strcmp (str, "_all_")) { + *nidp = PTL_NID_ANY; + return 0; + } + + return ptl_parse_nid(nidp, str); +} + __u64 ptl_nid2u64(ptl_nid_t nid) { switch (sizeof (nid)) { @@ -791,7 +797,7 @@ jt_ptl_del_peer (int argc, char **argv) } if (argc > 1 && - ptl_parse_nid (&nid, argv[1]) != 0) { + ptl_parse_anynid (&nid, argv[1]) != 0) { fprintf (stderr, "Can't parse nid: %s\n", argv[1]); return -1; } @@ -1040,7 +1046,7 @@ int jt_ptl_disconnect(int argc, char **argv) return 0; if (argc >= 2 && - ptl_parse_nid (&nid, argv[1]) != 0) { + ptl_parse_anynid (&nid, argv[1]) != 0) { fprintf (stderr, "Can't parse nid %s\n", argv[1]); return -1; } @@ -1082,7 +1088,7 @@ int jt_ptl_push_connection (int argc, char **argv) return -1; if (argc > 1 && - ptl_parse_nid (&nid, argv[1]) != 0) { + ptl_parse_anynid (&nid, argv[1]) != 0) { fprintf(stderr, "Can't parse nid: %s\n", argv[1]); return -1; } @@ -1289,7 +1295,7 @@ jt_ptl_fail_nid (int argc, char **argv) if (!strcmp (argv[1], "_all_")) nid = PTL_NID_ANY; - else if (ptl_parse_nid (&nid, argv[1]) != 0) + else if (ptl_parse_anynid (&nid, argv[1]) != 0) { fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); return (-1);