From: eeb Date: Thu, 5 Jun 2003 17:39:43 +0000 (+0000) Subject: * Added (cluster-wide) NID offset capability to qswnal X-Git-Tag: v1_7_100~1^91~147 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=9d5eb8fcd44a4325777b606f05d44f2281e9aab4;p=fs%2Flustre-release.git * Added (cluster-wide) NID offset capability to qswnal * Made lctl::mynid accept NIDs specified as a hostname string, a.b.c.d IP address, %i and %x (just like the routing commands). This command now works with the qswnal, to tell it its cluster-wide NID offset. Temp hack in lconf to actually do this function when network is 'elan' as well as 'tcp' or 'toe'. * Added lctl::shownid (no args) to print the NID --- diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index fdaae69..dc02780 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -38,6 +38,7 @@ int jt_ptl_connect(int argc, char **argv); int jt_ptl_disconnect(int argc, char **argv); int jt_ptl_push_connection(int argc, char **argv); int jt_ptl_ping(int argc, char **argv); +int jt_ptl_shownid(int argc, char **argv); int jt_ptl_mynid(int argc, char **argv); int jt_ptl_add_uuid(int argc, char **argv); int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index fdaae69..dc02780 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -38,6 +38,7 @@ int jt_ptl_connect(int argc, char **argv); int jt_ptl_disconnect(int argc, char **argv); int jt_ptl_push_connection(int argc, char **argv); int jt_ptl_ping(int argc, char **argv); +int jt_ptl_shownid(int argc, char **argv); int jt_ptl_mynid(int argc, char **argv); int jt_ptl_add_uuid(int argc, char **argv); int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index d64b7ad..f13e811 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -101,16 +101,36 @@ static nal_t * kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, ptl_pid_t requested_pid) { - ptl_nid_t mynid = ep_nodeid (kqswnal_data.kqn_epdev); - int nnids = ep_numnodes (kqswnal_data.kqn_epdev); + ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid); + int nnids = kqswnal_data.kqn_nnodes; - CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid,nnids); + CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids); lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size); return (&kqswnal_api); } +int +kqswnal_cmd (struct portal_ioctl_data *data, void *private) +{ + LASSERT (data != NULL); + + switch (data->ioc_nal_cmd) { + case NAL_CMD_REGISTER_MYNID: + CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n", + data->ioc_nid - kqswnal_data.kqn_elanid, + kqswnal_data.kqn_nid_offset); + kqswnal_data.kqn_nid_offset = + data->ioc_nid - kqswnal_data.kqn_elanid; + kqswnal_lib.ni.nid = data->ioc_nid; + return (0); + + default: + return (-EINVAL); + } +} + void __exit kqswnal_finalise (void) { @@ -324,6 +344,10 @@ kqswnal_initialise (void) return (-ENOMEM); } + kqswnal_data.kqn_nid_offset = 0; + kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_epdev); + kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_epdev); + /**********************************************************************/ /* Get the transmitter */ @@ -554,13 +578,19 @@ kqswnal_initialise (void) rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface); CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc); + rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL); + if (rc != 0) { + CERROR ("Can't initialise command interface (rc = %d)\n", rc); + kqswnal_finalise (); + return (rc); + } + PORTAL_SYMBOL_REGISTER(kqswnal_ni); kqswnal_data.kqn_init = KQN_INIT_ALL; printk(KERN_INFO "Routing QSW NAL loaded on node %d of %d " "(Routing %s, initial mem %d)\n", - ep_nodeid (kqswnal_data.kqn_epdev), - ep_numnodes (kqswnal_data.kqn_epdev), + kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes, kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled", pkmem); diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index 657b02b..88ab74f 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -200,6 +200,10 @@ typedef struct ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ kpr_router_t kqn_router; /* connection to Kernel Portals Router module */ + + ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */ + int kqn_nnodes; /* this cluster's size */ + int kqn_elanid; /* this nodes's elan ID */ } kqswnal_data_t; /* kqn_init state */ @@ -217,6 +221,23 @@ extern void kqswnal_rxhandler(EP_RXD *rxd); extern int kqswnal_scheduler (void *); extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); +static inline ptl_nid_t +kqswnal_elanid2nid (int elanid) +{ + return (kqswnal_data.kqn_nid_offset + elanid); +} + +static inline int +kqswnal_nid2elanid (ptl_nid_t nid) +{ + /* not in this cluster? */ + if (nid < kqswnal_data.kqn_nid_offset || + nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes) + return (-1); + + return (nid - kqswnal_data.kqn_nid_offset); +} + static inline void kqswnal_requeue_rx (kqswnal_rx_t *krx) { diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 5979885..3b47a25 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -109,21 +109,15 @@ kqswnal_sti(nal_cb_t *nal, unsigned long *flags) static int kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) { - /* network distance doesn't mean much for this nal */ - *dist = (nid == nal->ni.nid) ? 0 : 1; + if (nid == nal->ni.nid) + *dist = 0; /* it's me */ + else if (kqswnal_nid2elanid (nid) >= 0) + *dist = 1; /* it's my peer */ + else + *dist = 2; /* via router */ return (0); } -int -kqswnal_ispeer (ptl_nid_t nid) -{ - unsigned int elanid = (unsigned int)nid; - - /* didn't lose high bits on conversion and it's in this machine? */ - return ((ptl_nid_t)elanid == nid && - elanid < ep_numnodes (kqswnal_data.kqn_epdev)); -} - void kqswnal_unmap_tx (kqswnal_tx_t *ktx) { @@ -453,11 +447,14 @@ kqswnal_launch (kqswnal_tx_t *ktx) { /* Don't block for transmit descriptor if we're in interrupt context */ int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; - int rc = ep_transmit_large(kqswnal_data.kqn_eptx, ktx->ktx_nid, - ktx->ktx_port, attr, kqswnal_txhandler, - ktx, ktx->ktx_iov, ktx->ktx_niov); + int dest = kqswnal_nid2elanid (ktx->ktx_nid); long flags; - + int rc; + + LASSERT (dest >= 0); /* must be a peer */ + rc = ep_transmit_large(kqswnal_data.kqn_eptx, dest, + ktx->ktx_port, attr, kqswnal_txhandler, + ktx, ktx->ktx_iov, ktx->ktx_niov); if (rc == 0) atomic_inc (&kqswnal_packets_launched); @@ -595,7 +592,7 @@ kqswnal_sendmsg (nal_cb_t *nal, return (-1); } - if (!kqswnal_ispeer (nid)) { /* Can't send direct: find gateway? */ + if (kqswnal_nid2elanid (nid) < 0) { /* Can't send direct: find gateway? */ rc = kpr_lookup (&kqswnal_data.kqn_router, nid, &gatewaynid); if (rc != 0) { CERROR("Can't route to "LPX64": router error %d\n", @@ -603,7 +600,7 @@ kqswnal_sendmsg (nal_cb_t *nal, lib_finalize (&kqswnal_lib, private, cookie); return (-1); } - if (!kqswnal_ispeer (gatewaynid)) { + if (kqswnal_nid2elanid (gatewaynid) < 0) { CERROR("Bad gateway "LPX64" for "LPX64"\n", gatewaynid, nid); lib_finalize (&kqswnal_lib, private, cookie); @@ -757,7 +754,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) if (nid == kqswnal_lib.ni.nid) /* gateway is me */ nid = fwd->kprfd_target_nid; /* target is final dest */ - if (!kqswnal_ispeer (nid)) { + if (kqswnal_nid2elanid (nid) < 0) { CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid); rc = -EHOSTUNREACH; goto failed; @@ -844,7 +841,7 @@ kqswnal_rx (kqswnal_rx_t *krx) CERROR ("checksums for forwarded packets not implemented\n"); LBUG (); #endif - if (kqswnal_ispeer (dest_nid)) /* should have gone direct to peer */ + if (kqswnal_nid2elanid (dest_nid) >= 0) /* should have gone direct to peer */ { CERROR("dropping packet from "LPX64" for "LPX64 ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid); diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index d15d8c8..5bf61aa 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -95,10 +95,8 @@ nal_t * ksocknal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, ptl_pid_t requested_pid) { - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", - ksocknal_data.ksnd_mynid); - lib_init(&ksocknal_lib, ksocknal_data.ksnd_mynid, 0, 10, ptl_size, - ac_size); + CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); + lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); return (&ksocknal_api); } @@ -129,7 +127,6 @@ ksocknal_set_mynid(ptl_nid_t nid) CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid); - ksocknal_data.ksnd_mynid = nid; ni->nid = nid; return (0); } diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 0bf60ea..86cdeb0 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -106,7 +106,6 @@ typedef struct { struct list_head ksnd_socklist; /* all my connections */ rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */ - ptl_nid_t ksnd_mynid; nal_cb_t *ksnd_nal_cb; spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */ diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 8235271..940c675 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -704,18 +704,43 @@ int jt_ptl_ping(int argc, char **argv) return 0; } +int jt_ptl_shownid(int argc, char **argv) +{ + struct portal_ioctl_data data; + int rc; + + if (argc > 1) { + fprintf(stderr, "usage: %s\n", argv[0]); + return 0; + } + + if (g_nal == 0) { + fprintf(stderr, "Error: you must run the 'network' command first\n"); + return -1; + } + + PORTAL_IOC_INIT (data); + data.ioc_nal = g_nal; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); + if (rc < 0) + fprintf(stderr, "getting my NID failed: %s\n", + strerror (errno)); + else + printf(LPX64"\n", data.ioc_nid); + return 0; +} + int jt_ptl_mynid(int argc, char **argv) { int rc; - struct hostent *h; - char buf[1024], *hostname; + char hostname[1024]; + char *nidstr; struct portal_ioctl_data data; ptl_nid_t mynid; if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - fprintf(stderr, "hostname defaults to the hostname of the " - "machine.\n"); + fprintf(stderr, "usage: %s [NID]\n", argv[0]); + fprintf(stderr, "NID defaults to the primary IP address of the machine.\n"); return 0; } @@ -725,42 +750,21 @@ int jt_ptl_mynid(int argc, char **argv) return -1; } - if (g_nal == QSWNAL) { - fprintf(stderr, "'mynid' doesn't make any sense for elan.\n"); - return -1; - } else if (g_nal == GMNAL) { - fprintf(stderr, "'mynid' doesn't make any sense for GM.\n"); - return -1; - } else if (g_nal == SCIMACNAL) { - fprintf(stderr, "'mynid' doesn't make any sense for SCI.\n"); - return -1; - } - - if (g_nal != SOCKNAL && g_nal != TOENAL) { - fprintf(stderr, "This should never happen. Also it is very " - "bad.\n"); + if (argc >= 2) + nidstr = argv[1]; + else if (gethostname(hostname, sizeof(hostname)) != 0) { + fprintf(stderr, "gethostname failed: %s\n", + strerror(errno)); return -1; } + else + nidstr = hostname; - if (argc == 1) { - if (gethostname(buf, sizeof(buf)) != 0) { - fprintf(stderr, "gethostname failed: %s\n", - strerror(errno)); - return -1; - } - hostname = buf; - } else { - hostname = argv[1]; - } - - h = gethostbyname(hostname); - - if (!h) { - fprintf(stderr, "cannot get address for host '%s': %d\n", - hostname, h_errno); + rc = ptl_parse_nid (&mynid, nidstr); + if (rc != 0) { + fprintf (stderr, "Can't convert '%s' into a NID\n", nidstr); return -1; } - mynid = (ptl_nid_t)ntohl (*(__u32 *)h->h_addr); /* HOST byte order */ PORTAL_IOC_INIT(data); data.ioc_nid = mynid; @@ -769,7 +773,7 @@ int jt_ptl_mynid(int argc, char **argv) rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); if (rc < 0) - fprintf(stderr, "IOC_PORTAL_REGISTER_MYNID failed: %s\n", + fprintf(stderr, "setting my NID failed: %s\n", strerror(errno)); else printf("registered my nid "LPX64" (%s)\n", mynid, hostname); diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c index d38bd4a..8c56d93 100644 --- a/lnet/utils/ptlctl.c +++ b/lnet/utils/ptlctl.c @@ -34,6 +34,7 @@ command_t list[] = { {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [hostname]"}, {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [hostname]"}, {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"}, + {"shownid", jt_ptl_shownid, 0, "print the local NID"}, {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"}, {"add_route", jt_ptl_add_route, 0, "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"}, {"del_route", jt_ptl_del_route, 0, "delete an entry from the routing table (args: targetNID"}, diff --git a/lustre/portals/include/portals/ptlctl.h b/lustre/portals/include/portals/ptlctl.h index fdaae69..dc02780 100644 --- a/lustre/portals/include/portals/ptlctl.h +++ b/lustre/portals/include/portals/ptlctl.h @@ -38,6 +38,7 @@ int jt_ptl_connect(int argc, char **argv); int jt_ptl_disconnect(int argc, char **argv); int jt_ptl_push_connection(int argc, char **argv); int jt_ptl_ping(int argc, char **argv); +int jt_ptl_shownid(int argc, char **argv); int jt_ptl_mynid(int argc, char **argv); int jt_ptl_add_uuid(int argc, char **argv); int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c index d64b7ad..f13e811 100644 --- a/lustre/portals/knals/qswnal/qswnal.c +++ b/lustre/portals/knals/qswnal/qswnal.c @@ -101,16 +101,36 @@ static nal_t * kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, ptl_pid_t requested_pid) { - ptl_nid_t mynid = ep_nodeid (kqswnal_data.kqn_epdev); - int nnids = ep_numnodes (kqswnal_data.kqn_epdev); + ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid); + int nnids = kqswnal_data.kqn_nnodes; - CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid,nnids); + CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids); lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size); return (&kqswnal_api); } +int +kqswnal_cmd (struct portal_ioctl_data *data, void *private) +{ + LASSERT (data != NULL); + + switch (data->ioc_nal_cmd) { + case NAL_CMD_REGISTER_MYNID: + CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n", + data->ioc_nid - kqswnal_data.kqn_elanid, + kqswnal_data.kqn_nid_offset); + kqswnal_data.kqn_nid_offset = + data->ioc_nid - kqswnal_data.kqn_elanid; + kqswnal_lib.ni.nid = data->ioc_nid; + return (0); + + default: + return (-EINVAL); + } +} + void __exit kqswnal_finalise (void) { @@ -324,6 +344,10 @@ kqswnal_initialise (void) return (-ENOMEM); } + kqswnal_data.kqn_nid_offset = 0; + kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_epdev); + kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_epdev); + /**********************************************************************/ /* Get the transmitter */ @@ -554,13 +578,19 @@ kqswnal_initialise (void) rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface); CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc); + rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL); + if (rc != 0) { + CERROR ("Can't initialise command interface (rc = %d)\n", rc); + kqswnal_finalise (); + return (rc); + } + PORTAL_SYMBOL_REGISTER(kqswnal_ni); kqswnal_data.kqn_init = KQN_INIT_ALL; printk(KERN_INFO "Routing QSW NAL loaded on node %d of %d " "(Routing %s, initial mem %d)\n", - ep_nodeid (kqswnal_data.kqn_epdev), - ep_numnodes (kqswnal_data.kqn_epdev), + kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes, kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled", pkmem); diff --git a/lustre/portals/knals/qswnal/qswnal.h b/lustre/portals/knals/qswnal/qswnal.h index 657b02b..88ab74f 100644 --- a/lustre/portals/knals/qswnal/qswnal.h +++ b/lustre/portals/knals/qswnal/qswnal.h @@ -200,6 +200,10 @@ typedef struct ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ kpr_router_t kqn_router; /* connection to Kernel Portals Router module */ + + ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */ + int kqn_nnodes; /* this cluster's size */ + int kqn_elanid; /* this nodes's elan ID */ } kqswnal_data_t; /* kqn_init state */ @@ -217,6 +221,23 @@ extern void kqswnal_rxhandler(EP_RXD *rxd); extern int kqswnal_scheduler (void *); extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); +static inline ptl_nid_t +kqswnal_elanid2nid (int elanid) +{ + return (kqswnal_data.kqn_nid_offset + elanid); +} + +static inline int +kqswnal_nid2elanid (ptl_nid_t nid) +{ + /* not in this cluster? */ + if (nid < kqswnal_data.kqn_nid_offset || + nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes) + return (-1); + + return (nid - kqswnal_data.kqn_nid_offset); +} + static inline void kqswnal_requeue_rx (kqswnal_rx_t *krx) { diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c index 5979885..3b47a25 100644 --- a/lustre/portals/knals/qswnal/qswnal_cb.c +++ b/lustre/portals/knals/qswnal/qswnal_cb.c @@ -109,21 +109,15 @@ kqswnal_sti(nal_cb_t *nal, unsigned long *flags) static int kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) { - /* network distance doesn't mean much for this nal */ - *dist = (nid == nal->ni.nid) ? 0 : 1; + if (nid == nal->ni.nid) + *dist = 0; /* it's me */ + else if (kqswnal_nid2elanid (nid) >= 0) + *dist = 1; /* it's my peer */ + else + *dist = 2; /* via router */ return (0); } -int -kqswnal_ispeer (ptl_nid_t nid) -{ - unsigned int elanid = (unsigned int)nid; - - /* didn't lose high bits on conversion and it's in this machine? */ - return ((ptl_nid_t)elanid == nid && - elanid < ep_numnodes (kqswnal_data.kqn_epdev)); -} - void kqswnal_unmap_tx (kqswnal_tx_t *ktx) { @@ -453,11 +447,14 @@ kqswnal_launch (kqswnal_tx_t *ktx) { /* Don't block for transmit descriptor if we're in interrupt context */ int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; - int rc = ep_transmit_large(kqswnal_data.kqn_eptx, ktx->ktx_nid, - ktx->ktx_port, attr, kqswnal_txhandler, - ktx, ktx->ktx_iov, ktx->ktx_niov); + int dest = kqswnal_nid2elanid (ktx->ktx_nid); long flags; - + int rc; + + LASSERT (dest >= 0); /* must be a peer */ + rc = ep_transmit_large(kqswnal_data.kqn_eptx, dest, + ktx->ktx_port, attr, kqswnal_txhandler, + ktx, ktx->ktx_iov, ktx->ktx_niov); if (rc == 0) atomic_inc (&kqswnal_packets_launched); @@ -595,7 +592,7 @@ kqswnal_sendmsg (nal_cb_t *nal, return (-1); } - if (!kqswnal_ispeer (nid)) { /* Can't send direct: find gateway? */ + if (kqswnal_nid2elanid (nid) < 0) { /* Can't send direct: find gateway? */ rc = kpr_lookup (&kqswnal_data.kqn_router, nid, &gatewaynid); if (rc != 0) { CERROR("Can't route to "LPX64": router error %d\n", @@ -603,7 +600,7 @@ kqswnal_sendmsg (nal_cb_t *nal, lib_finalize (&kqswnal_lib, private, cookie); return (-1); } - if (!kqswnal_ispeer (gatewaynid)) { + if (kqswnal_nid2elanid (gatewaynid) < 0) { CERROR("Bad gateway "LPX64" for "LPX64"\n", gatewaynid, nid); lib_finalize (&kqswnal_lib, private, cookie); @@ -757,7 +754,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) if (nid == kqswnal_lib.ni.nid) /* gateway is me */ nid = fwd->kprfd_target_nid; /* target is final dest */ - if (!kqswnal_ispeer (nid)) { + if (kqswnal_nid2elanid (nid) < 0) { CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid); rc = -EHOSTUNREACH; goto failed; @@ -844,7 +841,7 @@ kqswnal_rx (kqswnal_rx_t *krx) CERROR ("checksums for forwarded packets not implemented\n"); LBUG (); #endif - if (kqswnal_ispeer (dest_nid)) /* should have gone direct to peer */ + if (kqswnal_nid2elanid (dest_nid) >= 0) /* should have gone direct to peer */ { CERROR("dropping packet from "LPX64" for "LPX64 ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid); diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index d15d8c8..5bf61aa 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -95,10 +95,8 @@ nal_t * ksocknal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, ptl_pid_t requested_pid) { - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", - ksocknal_data.ksnd_mynid); - lib_init(&ksocknal_lib, ksocknal_data.ksnd_mynid, 0, 10, ptl_size, - ac_size); + CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); + lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); return (&ksocknal_api); } @@ -129,7 +127,6 @@ ksocknal_set_mynid(ptl_nid_t nid) CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid); - ksocknal_data.ksnd_mynid = nid; ni->nid = nid; return (0); } diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index 0bf60ea..86cdeb0 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -106,7 +106,6 @@ typedef struct { struct list_head ksnd_socklist; /* all my connections */ rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */ - ptl_nid_t ksnd_mynid; nal_cb_t *ksnd_nal_cb; spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */ diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index 8235271..940c675 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -704,18 +704,43 @@ int jt_ptl_ping(int argc, char **argv) return 0; } +int jt_ptl_shownid(int argc, char **argv) +{ + struct portal_ioctl_data data; + int rc; + + if (argc > 1) { + fprintf(stderr, "usage: %s\n", argv[0]); + return 0; + } + + if (g_nal == 0) { + fprintf(stderr, "Error: you must run the 'network' command first\n"); + return -1; + } + + PORTAL_IOC_INIT (data); + data.ioc_nal = g_nal; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); + if (rc < 0) + fprintf(stderr, "getting my NID failed: %s\n", + strerror (errno)); + else + printf(LPX64"\n", data.ioc_nid); + return 0; +} + int jt_ptl_mynid(int argc, char **argv) { int rc; - struct hostent *h; - char buf[1024], *hostname; + char hostname[1024]; + char *nidstr; struct portal_ioctl_data data; ptl_nid_t mynid; if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - fprintf(stderr, "hostname defaults to the hostname of the " - "machine.\n"); + fprintf(stderr, "usage: %s [NID]\n", argv[0]); + fprintf(stderr, "NID defaults to the primary IP address of the machine.\n"); return 0; } @@ -725,42 +750,21 @@ int jt_ptl_mynid(int argc, char **argv) return -1; } - if (g_nal == QSWNAL) { - fprintf(stderr, "'mynid' doesn't make any sense for elan.\n"); - return -1; - } else if (g_nal == GMNAL) { - fprintf(stderr, "'mynid' doesn't make any sense for GM.\n"); - return -1; - } else if (g_nal == SCIMACNAL) { - fprintf(stderr, "'mynid' doesn't make any sense for SCI.\n"); - return -1; - } - - if (g_nal != SOCKNAL && g_nal != TOENAL) { - fprintf(stderr, "This should never happen. Also it is very " - "bad.\n"); + if (argc >= 2) + nidstr = argv[1]; + else if (gethostname(hostname, sizeof(hostname)) != 0) { + fprintf(stderr, "gethostname failed: %s\n", + strerror(errno)); return -1; } + else + nidstr = hostname; - if (argc == 1) { - if (gethostname(buf, sizeof(buf)) != 0) { - fprintf(stderr, "gethostname failed: %s\n", - strerror(errno)); - return -1; - } - hostname = buf; - } else { - hostname = argv[1]; - } - - h = gethostbyname(hostname); - - if (!h) { - fprintf(stderr, "cannot get address for host '%s': %d\n", - hostname, h_errno); + rc = ptl_parse_nid (&mynid, nidstr); + if (rc != 0) { + fprintf (stderr, "Can't convert '%s' into a NID\n", nidstr); return -1; } - mynid = (ptl_nid_t)ntohl (*(__u32 *)h->h_addr); /* HOST byte order */ PORTAL_IOC_INIT(data); data.ioc_nid = mynid; @@ -769,7 +773,7 @@ int jt_ptl_mynid(int argc, char **argv) rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); if (rc < 0) - fprintf(stderr, "IOC_PORTAL_REGISTER_MYNID failed: %s\n", + fprintf(stderr, "setting my NID failed: %s\n", strerror(errno)); else printf("registered my nid "LPX64" (%s)\n", mynid, hostname); diff --git a/lustre/portals/utils/ptlctl.c b/lustre/portals/utils/ptlctl.c index d38bd4a..8c56d93 100644 --- a/lustre/portals/utils/ptlctl.c +++ b/lustre/portals/utils/ptlctl.c @@ -34,6 +34,7 @@ command_t list[] = { {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [hostname]"}, {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [hostname]"}, {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"}, + {"shownid", jt_ptl_shownid, 0, "print the local NID"}, {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"}, {"add_route", jt_ptl_add_route, 0, "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"}, {"del_route", jt_ptl_del_route, 0, "delete an entry from the routing table (args: targetNID"},