From 44b908e3e76866ce60ac87b22a7f19683610cc7e Mon Sep 17 00:00:00 2001 From: eeb Date: Thu, 21 Apr 2005 00:35:17 +0000 Subject: [PATCH] * new network config snapshot half-way through lustre uses single NI --- lnet/include/libcfs/kp30.h | 57 +-- lnet/include/libcfs/libcfs.h | 17 +- lnet/include/libcfs/linux/libcfs.h | 2 +- lnet/include/lnet/Makefile.am | 8 +- lnet/include/lnet/api-support.h | 8 - lnet/include/lnet/api.h | 168 +++--- lnet/include/lnet/internal.h | 16 - lnet/include/lnet/lib-lnet.h | 332 +++++------- lnet/include/lnet/lib-p30.h | 332 +++++------- lnet/include/lnet/lib-types.h | 266 +++++----- lnet/include/lnet/lnetctl.h | 1 - lnet/include/lnet/myrnal.h | 23 - lnet/include/lnet/nal.h | 88 ---- lnet/include/lnet/nalids.h | 2 - lnet/include/lnet/ptlctl.h | 1 - lnet/include/lnet/stringtab.h | 3 - lnet/include/lnet/types.h | 7 +- lnet/klnds/gmlnd/gmlnd.h | 65 +-- lnet/klnds/gmlnd/gmlnd_api.c | 138 ++--- lnet/klnds/gmlnd/gmlnd_cb.c | 64 +-- lnet/klnds/gmlnd/gmlnd_comm.c | 95 ++-- lnet/klnds/iiblnd/iiblnd.c | 93 ++-- lnet/klnds/iiblnd/iiblnd.h | 24 +- lnet/klnds/iiblnd/iiblnd_cb.c | 180 +++---- lnet/klnds/lolnd/lolnd.c | 136 ++--- lnet/klnds/lolnd/lolnd.h | 36 +- lnet/klnds/lolnd/lolnd_cb.c | 67 +-- lnet/klnds/openiblnd/openiblnd.c | 109 ++-- lnet/klnds/openiblnd/openiblnd.h | 30 +- lnet/klnds/openiblnd/openiblnd_cb.c | 184 +++---- lnet/klnds/qswlnd/qswlnd.c | 139 ++--- lnet/klnds/qswlnd/qswlnd.h | 30 +- lnet/klnds/qswlnd/qswlnd_cb.c | 140 +++-- lnet/klnds/ralnd/ralnd.c | 119 ++--- lnet/klnds/ralnd/ralnd.h | 33 +- lnet/klnds/ralnd/ralnd_cb.c | 115 ++--- lnet/klnds/socklnd/socklnd.c | 151 +++--- lnet/klnds/socklnd/socklnd.h | 37 +- lnet/klnds/socklnd/socklnd_cb.c | 122 ++--- lnet/klnds/viblnd/viblnd.c | 103 ++-- lnet/klnds/viblnd/viblnd.h | 28 +- lnet/klnds/viblnd/viblnd_cb.c | 150 +++--- lnet/libcfs/Makefile.in | 2 +- lnet/libcfs/autoMakefile.am | 2 +- lnet/libcfs/debug.c | 58 +-- lnet/libcfs/linux/linux-proc.c | 11 +- lnet/libcfs/module.c | 14 +- lnet/libcfs/nidstrings.c | 107 ++++ lnet/libcfs/tracefile.c | 18 +- lnet/lnet/Makefile.in | 6 +- lnet/lnet/api-ni.c | 839 +++++++++++++++++++++++++----- lnet/lnet/api-wrap.c | 379 -------------- lnet/lnet/autoMakefile.am | 12 +- lnet/lnet/lib-eq.c | 151 +++--- lnet/lnet/lib-init.c | 433 ---------------- lnet/lnet/lib-md.c | 290 +++++------ lnet/lnet/lib-me.c | 156 +++--- lnet/lnet/lib-move.c | 989 +++++++++++++++++------------------- lnet/lnet/lib-msg.c | 74 ++- lnet/lnet/lib-ni.c | 29 -- lnet/lnet/lib-pid.c | 20 - lnet/lnet/module.c | 68 +-- lnet/router/router.c | 9 +- lnet/tests/ping_cli.c | 13 +- lnet/tests/ping_srv.c | 9 +- lnet/tests/sping_cli.c | 16 +- lnet/tests/sping_srv.c | 10 +- lnet/ulnds/address.c | 12 +- lnet/ulnds/bridge.h | 13 +- lnet/ulnds/debug.c | 14 +- lnet/ulnds/procapi.c | 86 ++-- lnet/ulnds/procbridge.h | 11 - lnet/ulnds/proclib.c | 44 +- lnet/ulnds/socklnd/address.c | 12 +- lnet/ulnds/socklnd/bridge.h | 13 +- lnet/ulnds/socklnd/debug.c | 14 +- lnet/ulnds/socklnd/procapi.c | 86 ++-- lnet/ulnds/socklnd/procbridge.h | 11 - lnet/ulnds/socklnd/proclib.c | 44 +- lnet/ulnds/socklnd/tcplnd.c | 41 +- lnet/ulnds/tcplnd.c | 41 +- lnet/utils/portals.c | 38 -- lnet/utils/ptlctl.c | 1 - lnet/utils/wirecheck.c | 6 +- 84 files changed, 3339 insertions(+), 4582 deletions(-) delete mode 100644 lnet/include/lnet/internal.h delete mode 100644 lnet/include/lnet/myrnal.h delete mode 100644 lnet/include/lnet/nal.h delete mode 100644 lnet/include/lnet/nalids.h delete mode 100644 lnet/include/lnet/stringtab.h create mode 100644 lnet/libcfs/nidstrings.c delete mode 100644 lnet/lnet/api-wrap.c delete mode 100644 lnet/lnet/lib-init.c delete mode 100644 lnet/lnet/lib-ni.c delete mode 100644 lnet/lnet/lib-pid.c diff --git a/lnet/include/libcfs/kp30.h b/lnet/include/libcfs/kp30.h index 4c3f225..0636aa1 100644 --- a/lnet/include/libcfs/kp30.h +++ b/lnet/include/libcfs/kp30.h @@ -15,21 +15,19 @@ #error Unsupported operating system #endif -#ifdef __KERNEL__ +#ifndef DEBUG_SUBSYSTEM +# define DEBUG_SUBSYSTEM S_UNDEFINED +#endif -# ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -# endif +#ifdef __KERNEL__ #ifdef PORTAL_DEBUG -extern void kportal_assertion_failed(char *expr, char *file, const char *func, - const int line); -#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \ - __FUNCTION__, __LINE__)) +#define LASSERT(e) ((e) ? 0 : libcfs_assertion_failed( #e , __FILE__, \ + __FUNCTION__, __LINE__)) #define LASSERTF(cond, fmt...) \ do { \ if (unlikely(!(cond))) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\ + libcfs_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\ __FUNCTION__,__LINE__, CDEBUG_STACK,\ "ASSERTION(" #cond ") failed:" fmt);\ LBUG(); \ @@ -116,16 +114,7 @@ do { \ #ifdef PORTALS_PROFILING #define prof_enum(FOO) PROF__##FOO enum { - prof_enum(our_recvmsg), - prof_enum(our_sendmsg), - prof_enum(socknal_recv), - prof_enum(lib_parse), - prof_enum(conn_list_walk), - prof_enum(memcpy), - prof_enum(lib_finalize), - prof_enum(pingcli_time), - prof_enum(gmnal_send), - prof_enum(gmnal_recv), + prof_enum(placeholder), MAX_PROFS }; @@ -174,23 +163,13 @@ int portals_debug_mark_buffer(char *text); int portals_debug_set_daemon(unsigned int cmd, unsigned int length, char *file, unsigned int size); __s32 portals_debug_copy_to_user(char *buf, unsigned long len); -/* Use the special GNU C __attribute__ hack to have the compiler check the - * printf style argument string against the actual argument count and - * types. - */ -void portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, - char *format, ...) - __attribute__ ((format (printf, 7, 8))); + void portals_debug_set_level(unsigned int debug_level); extern void kportal_daemonize (char *name); extern void kportal_blockallsigs (void); #else /* !__KERNEL__ */ -# ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -# endif # ifdef PORTAL_DEBUG # undef NDEBUG # include @@ -205,19 +184,11 @@ do { \ # define LASSERT(e) # define LASSERTF(cond, args...) do { } while (0) # endif +# define LBUG() assert(0) # define printk(format, args...) printf (format, ## args) # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0); # define PORTAL_FREE(a, b) do { free(a); } while (0); void portals_debug_dumplog(void); -# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \ - printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ - (subsys), (mask), (long)time(0), file, fn, line, \ - getpid(), (unsigned long)stack, ## a); - -#undef CWARN -#undef CERROR -#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a) -#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a) #endif /* @@ -237,8 +208,8 @@ void portals_debug_dumplog(void); #define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } }) /* support decl needed both by kernel and liblustre */ -char *portals_nid2str(int nal, ptl_nid_t nid, char *str); -char *portals_id2str(int nal, ptl_process_id_t nid, char *str); +char *libcfs_nid2str(ptl_nid_t nid); +char *libcfs_id2str(ptl_process_id_t id); #ifndef CURRENT_TIME # define CURRENT_TIME time(0) @@ -400,7 +371,7 @@ extern int portal_ioctl_getdata(char *buf, char *end, void *arg); #define IOC_PORTAL_NAL_CMD _IOWR('e', 35, IOCTL_PORTAL_TYPE) #define IOC_PORTAL_GET_NID _IOWR('e', 36, IOCTL_PORTAL_TYPE) #define IOC_PORTAL_FAIL_NID _IOWR('e', 37, IOCTL_PORTAL_TYPE) -#define IOC_PORTAL_LOOPBACK _IOWR('e', 38, IOCTL_PORTAL_TYPE) +/* gap: use me! */ #define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, IOCTL_PORTAL_TYPE) #define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, IOCTL_PORTAL_TYPE) #define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, IOCTL_PORTAL_TYPE) @@ -412,7 +383,7 @@ enum { SOCKNAL = 2, GMNAL = 3, /* 4 unused */ - TCPNAL = 5, + /* 5 unused */ ROUTER = 6, OPENIBNAL = 7, IIBNAL = 8, diff --git a/lnet/include/libcfs/libcfs.h b/lnet/include/libcfs/libcfs.h index f928fcf..ac8ecab 100644 --- a/lnet/include/libcfs/libcfs.h +++ b/lnet/include/libcfs/libcfs.h @@ -122,7 +122,7 @@ do { \ if (((mask) & (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)) || \ (portal_debug & (mask) && \ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ - portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ + libcfs_debug_msg(DEBUG_SUBSYSTEM, mask, \ __FILE__, __FUNCTION__, __LINE__, \ CDEBUG_STACK, format, ## a); \ } while (0) @@ -136,11 +136,11 @@ do { \ \ CHECK_STACK(CDEBUG_STACK); \ if (cfs_time_after(cfs_time_current(), cdebug_next)) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, __FILE__, \ + libcfs_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, __FILE__, \ __FUNCTION__, __LINE__, CDEBUG_STACK, \ cdebug_format, ## a); \ if (cdebug_count) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, \ + libcfs_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, \ __FILE__, __FUNCTION__, __LINE__, \ 0, "skipped %d similar messages\n", \ cdebug_count); \ @@ -157,7 +157,7 @@ do { \ cdebug_delay*2; \ cdebug_next = cfs_time_current() + cdebug_delay; \ } else { \ - portals_debug_msg(DEBUG_SUBSYSTEM, \ + libcfs_debug_msg(DEBUG_SUBSYSTEM, \ portal_debug & ~(D_EMERG|D_ERROR|D_WARNING),\ __FILE__, __FUNCTION__, __LINE__, \ CDEBUG_STACK, cdebug_format, ## a); \ @@ -474,11 +474,14 @@ static inline time_t cfs_unix_seconds(void) result; \ }) -extern void portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, - char *format, ...) +extern void libcfs_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, + char *format, ...) __attribute__ ((format (printf, 7, 8))); +extern void libcfs_assertion_failed(char *expr, char *file, + const char *fn, const int line); + static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg) { if (cfs_time_after(cfs_time_current(), diff --git a/lnet/include/libcfs/linux/libcfs.h b/lnet/include/libcfs/linux/libcfs.h index 208b89e..cd48871c 100644 --- a/lnet/include/libcfs/linux/libcfs.h +++ b/lnet/include/libcfs/linux/libcfs.h @@ -101,7 +101,7 @@ struct ptldebug_header { #define CHECK_STACK(stack) \ do { \ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \ + libcfs_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \ __FILE__, __FUNCTION__, __LINE__, \ (stack),"maximum lustre stack %u\n",\ portal_stack = (stack)); \ diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am index 3df0f2b..c9dc994 100644 --- a/lnet/include/lnet/Makefile.am +++ b/lnet/include/lnet/Makefile.am @@ -6,7 +6,7 @@ SUBDIRS += darwin endif DIST_SUBDIRS := $(SUBDIRS) -EXTRA_DIST = api.h api-support.h build_check.h errno.h \ - internal.h kpr.h lib-p30.h lib-types.h \ - myrnal.h nal.h nalids.h p30.h ptlctl.h \ - socknal.h stringtab.h types.h +EXTRA_DIST = api.h api-support.h build_check.h errno.h \ + kpr.h lib-p30.h lib-types.h \ + p30.h ptlctl.h \ + socknal.h types.h diff --git a/lnet/include/lnet/api-support.h b/lnet/include/lnet/api-support.h index 848cf40..549f8cb 100644 --- a/lnet/include/lnet/api-support.h +++ b/lnet/include/lnet/api-support.h @@ -7,18 +7,10 @@ # include # include # include - -/* Lots of POSIX dependencies to support PtlEQWait_timeout */ -# include -# include -# include #endif #include #include #include -#include -#include - #endif diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h index 2d3a8f6..5a6d32a 100644 --- a/lnet/include/lnet/api.h +++ b/lnet/include/lnet/api.h @@ -5,33 +5,39 @@ #include -int PtlInit(int *); -void PtlFini(void); +ptl_err_t PtlInit(int *); -int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, - ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, - ptl_handle_ni_t *interface_out); +void PtlFini(void); -int PtlNIInitialized(ptl_interface_t); +ptl_err_t PtlNIInit(ptl_interface_t interface, + ptl_pid_t requested_pid, + ptl_ni_limits_t *desired_limits, + ptl_ni_limits_t *actual_limits, + ptl_handle_ni_t *interface_out); -int PtlNIFini(ptl_handle_ni_t interface_in); +ptl_err_t PtlNIInitialized(ptl_interface_t); -int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id); +ptl_err_t PtlNIFini(ptl_handle_ni_t interface_in); -int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid); +ptl_err_t PtlGetId(ptl_handle_ni_t ni_handle, + ptl_process_id_t *id); +ptl_err_t PtlGetUid(ptl_handle_ni_t ni_handle, + ptl_uid_t *uid); /* * Network interfaces */ +ptl_err_t PtlNIStatus(ptl_handle_ni_t interface_in, + ptl_sr_index_t register_in, + ptl_sr_value_t *status_out); -int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, - ptl_sr_value_t * status_out); +ptl_err_t PtlNIDist(ptl_handle_ni_t interface_in, + ptl_process_id_t process_in, + unsigned long *distance_out); -int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, - unsigned long *distance_out); - -int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out); +ptl_err_t PtlNIHandle(ptl_handle_any_t handle_in, + ptl_handle_ni_t *interface_out); /* @@ -43,15 +49,9 @@ int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out); * for a specific number of messages. Passing a threshold of zero, "heals" * the given peer. */ -int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold); - -/* - * PtlLoopback - * - * Not an official Portals 3 API call. It provides a way of enabling or - * disabling loopback optimisation, or getting its current state. - */ -int PtlLoopback (ptl_handle_ni_t ni, int set, int *enabled); +ptl_err_t PtlFailNid(ptl_handle_ni_t ni, + ptl_nid_t nid, + unsigned int threshold); /* * PtlSnprintHandle: @@ -64,83 +64,93 @@ void PtlSnprintHandle (char *str, int str_len, ptl_handle_any_t handle); /* * Match entries */ - -int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, - ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, - ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, - ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out); - -int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, - ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, - ptl_unlink_t unlink_in, ptl_ins_pos_t position_in, - ptl_handle_me_t * handle_out); - -int PtlMEUnlink(ptl_handle_me_t current_in); - -int PtlMEUnlinkList(ptl_handle_me_t current_in); - - +ptl_err_t PtlMEAttach(ptl_handle_ni_t interface_in, + ptl_pt_index_t index_in, + ptl_process_id_t match_id_in, + ptl_match_bits_t match_bits_in, + ptl_match_bits_t ignore_bits_in, + ptl_unlink_t unlink_in, + ptl_ins_pos_t pos_in, + ptl_handle_me_t *handle_out); + +ptl_err_t PtlMEInsert(ptl_handle_me_t current_in, + ptl_process_id_t match_id_in, + ptl_match_bits_t match_bits_in, + ptl_match_bits_t ignore_bits_in, + ptl_unlink_t unlink_in, + ptl_ins_pos_t position_in, + ptl_handle_me_t *handle_out); + +ptl_err_t PtlMEUnlink(ptl_handle_me_t current_in); /* * Memory descriptors */ +ptl_err_t PtlMDAttach(ptl_handle_me_t current_in, + ptl_md_t md_in, + ptl_unlink_t unlink_in, + ptl_handle_md_t *handle_out); -int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in, - ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out); - -int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, - ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out); - -int PtlMDUnlink(ptl_handle_md_t md_in); - -int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout, - ptl_md_t * new_inout, ptl_handle_eq_t testq_in); - - -/* These should not be called by users */ -int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout, - ptl_md_t * new_inout, ptl_handle_eq_t testq_in, - ptl_seq_t sequence_in); - +ptl_err_t PtlMDBind(ptl_handle_ni_t ni_in, + ptl_md_t md_in, + ptl_unlink_t unlink_in, + ptl_handle_md_t *handle_out); +ptl_err_t PtlMDUnlink(ptl_handle_md_t md_in); +ptl_err_t PtlMDUpdate(ptl_handle_md_t md_in, + ptl_md_t *old_inout, + ptl_md_t *new_inout, + ptl_handle_eq_t testq_in); /* * Event queues */ -int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in, - ptl_eq_handler_t handler, - ptl_handle_eq_t *handle_out); -int PtlEQFree(ptl_handle_eq_t eventq_in); +ptl_err_t PtlEQAlloc(ptl_handle_ni_t ni_in, + ptl_size_t count_in, + ptl_eq_handler_t handler, + ptl_handle_eq_t *handle_out); -int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out); +ptl_err_t PtlEQFree(ptl_handle_eq_t eventq_in); +ptl_err_t PtlEQGet(ptl_handle_eq_t eventq_in, + ptl_event_t *event_out); -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out); -int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout, - ptl_event_t *event_out, int *which_out); +ptl_err_t PtlEQWait(ptl_handle_eq_t eventq_in, + ptl_event_t *event_out); + +ptl_err_t PtlEQPoll(ptl_handle_eq_t *eventqs_in, + int neq_in, + int timeout_ms, + ptl_event_t *event_out, + int *which_eq_out); /* * Access Control Table */ -int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, - ptl_process_id_t match_id_in, ptl_pt_index_t portal_in); +ptl_err_t PtlACEntry(ptl_handle_ni_t ni_in, + ptl_ac_index_t index_in, + ptl_process_id_t match_id_in, + ptl_pt_index_t portal_in); /* * Data movement */ - -int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, - ptl_process_id_t target_in, ptl_pt_index_t portal_in, - ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in, - ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in); - -int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, - ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in, - ptl_match_bits_t match_bits_in, ptl_size_t offset_in); - - - +ptl_err_t PtlPut(ptl_handle_md_t md_in, + ptl_ack_req_t ack_req_in, + ptl_process_id_t target_in, + ptl_pt_index_t portal_in, + ptl_ac_index_t cookie_in, + ptl_match_bits_t match_bits_in, + ptl_size_t offset_in, + ptl_hdr_data_t hdr_data_in); + +ptl_err_t PtlGet(ptl_handle_md_t md_in, + ptl_process_id_t target_in, + ptl_pt_index_t portal_in, + ptl_ac_index_t cookie_in, + ptl_match_bits_t match_bits_in, + ptl_size_t offset_in); #endif diff --git a/lnet/include/lnet/internal.h b/lnet/include/lnet/internal.h deleted file mode 100644 index eae00a0..0000000 --- a/lnet/include/lnet/internal.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _P30_INTERNAL_H_ -#define _P30_INTERNAL_H_ - -#include "build_check.h" -/* - * p30/internal.h - * - * Internals for the API level library that are not needed - * by the user application - */ - -#include - -extern int ptl_init; /* Has the library been initialized */ - -#endif diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index f56206b..b50c1b9 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -22,28 +22,36 @@ #include #include #include -#include #include +extern int ptl_init; /* PtlInit()/PtlFini() counter */ +extern ptl_apini_t ptl_apini; /* THE network interface (at the API) */ + static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) { return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie && wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); } +static inline int ptl_md_exhausted (ptl_libmd_t *md) +{ + return (md->md_threshold == 0 || + ((md->md_options & PTL_MD_MAX_SIZE) != 0 && + md->md_offset + md->md_max_size > md->md_length)); +} + #ifdef __KERNEL__ -#define LIB_LOCK(nal,flags) \ - spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags) -#define LIB_UNLOCK(nal,flags) \ - spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags) -#else -#define LIB_LOCK(nal,flags) \ - (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0) -#define LIB_UNLOCK(nal,flags) \ - pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex) +#define PTL_LOCK(flags) \ + spin_lock_irqsave(&ptl_apini.apini_lock, flags) +#define PTL_UNLOCK(flags) \ + spin_unlock_irqrestore(&ptl_apini.apini_lock, flags) +#else +#define PTL_LOCK(flags) \ + (pthread_mutex_lock(&ptl_apini.apini_mutex), (flags) = 0) +#define PTL_UNLOCK(flags) \ + pthread_mutex_unlock(&ptl_apini.apini_mutex) #endif - #ifdef PTL_USE_LIB_FREELIST #define MAX_MES 2048 @@ -51,163 +59,160 @@ static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) #define MAX_MSGS 2048 /* Outstanding messages */ #define MAX_EQS 512 -extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize); -extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl); - static inline void * -lib_freelist_alloc (lib_freelist_t *fl) +ptl_freelist_alloc (ptl_freelist_t *fl) { /* ALWAYS called with liblock held */ - lib_freeobj_t *o; + ptl_freeobj_t *o; if (list_empty (&fl->fl_list)) return (NULL); - o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list); + o = list_entry (fl->fl_list.next, ptl_freeobj_t, fo_list); list_del (&o->fo_list); return ((void *)&o->fo_contents); } static inline void -lib_freelist_free (lib_freelist_t *fl, void *obj) +ptl_freelist_free (ptl_freelist_t *fl, void *obj) { /* ALWAYS called with liblock held */ - lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents); + ptl_freeobj_t *o = list_entry (obj, ptl_freeobj_t, fo_contents); list_add (&o->fo_list, &fl->fl_list); } -static inline lib_eq_t * -lib_eq_alloc (lib_nal_t *nal) +static inline ptl_eq_t * +ptl_eq_alloc (void) { /* NEVER called with liblock held */ unsigned long flags; - lib_eq_t *eq; + ptl_eq_t *eq; - LIB_LOCK (nal, flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs); - LIB_UNLOCK (nal, flags); + PTL_LOCK(flags); + eq = (ptl_eq_t *)ptl_freelist_alloc(&ptl_apini.apini_free_eqs); + PTL_UNLOCK(flags); return (eq); } static inline void -lib_eq_free (lib_nal_t *nal, lib_eq_t *eq) +ptl_eq_free (ptl_eq_t *eq) { /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq); + ptl_freelist_free(&ptl_apini.apini_free_eqs, eq); } -static inline lib_md_t * -lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) +static inline ptl_libmd_t * +ptl_md_alloc (ptl_md_t *umd) { /* NEVER called with liblock held */ unsigned long flags; - lib_md_t *md; + ptl_libmd_t *md; - LIB_LOCK (nal, flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds); - LIB_UNLOCK (nal, flags); + PTL_LOCK(flags); + md = (ptl_libmd_t *)ptl_freelist_alloc(&ptl_apini.apini_free_mds); + PTL_UNLOCK(flags); return (md); } static inline void -lib_md_free (lib_nal_t *nal, lib_md_t *md) +ptl_md_free (ptl_libmd_t *md) { /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_mds, md); + ptl_freelist_free (&ptl_apini.apini_free_mds, md); } -static inline lib_me_t * -lib_me_alloc (lib_nal_t *nal) +static inline ptl_me_t * +ptl_me_alloc (void) { /* NEVER called with liblock held */ unsigned long flags; - lib_me_t *me; + ptl_me_t *me; - LIB_LOCK (nal, flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes); - LIB_UNLOCK (nal, flags); + PTL_LOCK(flags); + me = (ptl_me_t *)ptl_freelist_alloc(&ptl_apini.apini_free_mes); + PTL_UNLOCK(flags); return (me); } static inline void -lib_me_free (lib_nal_t *nal, lib_me_t *me) +ptl_me_free (ptl_me_t *me) { /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_mes, me); + ptl_freelist_free (&ptl_apini.apini_free_mes, me); } -static inline lib_msg_t * -lib_msg_alloc (lib_nal_t *nal) +static inline ptl_msg_t * +ptl_msg_alloc (void) { /* NEVER called with liblock held */ unsigned long flags; - lib_msg_t *msg; + ptl_msg_t *msg; - LIB_LOCK (nal, flags); - msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs); - LIB_UNLOCK (nal, flags); + PTL_LOCK(flags); + msg = (ptl_msg_t *)ptl_freelist_alloc(&ptl_apini.apini_free_msgs); + PTL_UNLOCK(flags); if (msg != NULL) { /* NULL pointers, clear flags etc */ memset (msg, 0, sizeof (*msg)); - msg->ack_wmd = PTL_WIRE_HANDLE_NONE; + msg->msg_ack_wmd = PTL_WIRE_HANDLE_NONE; } return(msg); } static inline void -lib_msg_free (lib_nal_t *nal, lib_msg_t *msg) +ptl_msg_free (ptl_msg_t *msg) { /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg); + ptl_freelist_free(&ptl_apini.apini_free_msgs, msg); } #else -static inline lib_eq_t * -lib_eq_alloc (lib_nal_t *nal) +static inline ptl_eq_t * +ptl_eq_alloc (void) { /* NEVER called with liblock held */ - lib_eq_t *eq; + ptl_eq_t *eq; PORTAL_ALLOC(eq, sizeof(*eq)); return (eq); } static inline void -lib_eq_free (lib_nal_t *nal, lib_eq_t *eq) +ptl_eq_free (ptl_eq_t *eq) { /* ALWAYS called with liblock held */ PORTAL_FREE(eq, sizeof(*eq)); } -static inline lib_md_t * -lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) +static inline ptl_libmd_t * +ptl_md_alloc (ptl_md_t *umd) { /* NEVER called with liblock held */ - lib_md_t *md; - int size; - int niov; + ptl_libmd_t *md; + int size; + int niov; if ((umd->options & PTL_MD_KIOV) != 0) { niov = umd->length; - size = offsetof(lib_md_t, md_iov.kiov[niov]); + size = offsetof(ptl_libmd_t, md_iov.kiov[niov]); } else { niov = ((umd->options & PTL_MD_IOVEC) != 0) ? umd->length : 1; - size = offsetof(lib_md_t, md_iov.iov[niov]); + size = offsetof(ptl_libmd_t, md_iov.iov[niov]); } PORTAL_ALLOC(md, size); if (md != NULL) { /* Set here in case of early free */ - md->options = umd->options; + md->md_options = umd->options; md->md_niov = niov; } @@ -215,41 +220,41 @@ lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) } static inline void -lib_md_free (lib_nal_t *nal, lib_md_t *md) +ptl_md_free (ptl_libmd_t *md) { /* ALWAYS called with liblock held */ int size; - if ((md->options & PTL_MD_KIOV) != 0) - size = offsetof(lib_md_t, md_iov.kiov[md->md_niov]); + if ((md->md_options & PTL_MD_KIOV) != 0) + size = offsetof(ptl_libmd_t, md_iov.kiov[md->md_niov]); else - size = offsetof(lib_md_t, md_iov.iov[md->md_niov]); + size = offsetof(ptl_libmd_t, md_iov.iov[md->md_niov]); PORTAL_FREE(md, size); } -static inline lib_me_t * -lib_me_alloc (lib_nal_t *nal) +static inline ptl_me_t * +ptl_me_alloc (void) { /* NEVER called with liblock held */ - lib_me_t *me; + ptl_me_t *me; PORTAL_ALLOC(me, sizeof(*me)); return (me); } static inline void -lib_me_free(lib_nal_t *nal, lib_me_t *me) +ptl_me_free(ptl_me_t *me) { /* ALWAYS called with liblock held */ PORTAL_FREE(me, sizeof(*me)); } -static inline lib_msg_t * -lib_msg_alloc(lib_nal_t *nal) +static inline ptl_msg_t * +ptl_msg_alloc(void) { /* NEVER called with liblock held; may be in interrupt... */ - lib_msg_t *msg; + ptl_msg_t *msg; if (in_interrupt()) PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg)); @@ -259,208 +264,151 @@ lib_msg_alloc(lib_nal_t *nal) if (msg != NULL) { /* NULL pointers, clear flags etc */ memset (msg, 0, sizeof (*msg)); - msg->ack_wmd = PTL_WIRE_HANDLE_NONE; + msg->msg_ack_wmd = PTL_WIRE_HANDLE_NONE; } return (msg); } static inline void -lib_msg_free(lib_nal_t *nal, lib_msg_t *msg) +ptl_msg_free(ptl_msg_t *msg) { /* ALWAYS called with liblock held */ PORTAL_FREE(msg, sizeof(*msg)); } #endif -extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type); -extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type); -extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh); +extern ptl_libhandle_t *ptl_lookup_cookie (__u64 cookie, int type); +extern void ptl_initialise_handle (ptl_libhandle_t *lh, int type); +extern void ptl_invalidate_handle (ptl_libhandle_t *lh); static inline void -ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq) +ptl_eq2handle (ptl_handle_eq_t *handle, ptl_eq_t *eq) { if (eq == NULL) { *handle = PTL_EQ_NONE; return; } - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; handle->cookie = eq->eq_lh.lh_cookie; } -static inline lib_eq_t * -ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal) +static inline ptl_eq_t * +ptl_handle2eq (ptl_handle_eq_t *handle) { /* ALWAYS called with liblock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_EQ); + ptl_libhandle_t *lh = ptl_lookup_cookie (handle->cookie, + PTL_COOKIE_TYPE_EQ); if (lh == NULL) return (NULL); - return (lh_entry (lh, lib_eq_t, eq_lh)); + return (lh_entry (lh, ptl_eq_t, eq_lh)); } static inline void -ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md) +ptl_md2handle (ptl_handle_md_t *handle, ptl_libmd_t *md) { - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; handle->cookie = md->md_lh.lh_cookie; } -static inline lib_md_t * -ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal) +static inline ptl_libmd_t * +ptl_handle2md (ptl_handle_md_t *handle) { /* ALWAYS called with liblock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_MD); + ptl_libhandle_t *lh = ptl_lookup_cookie (handle->cookie, + PTL_COOKIE_TYPE_MD); if (lh == NULL) return (NULL); - return (lh_entry (lh, lib_md_t, md_lh)); + return (lh_entry (lh, ptl_libmd_t, md_lh)); } -static inline lib_md_t * -ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal) +static inline ptl_libmd_t * +ptl_wire_handle2md (ptl_handle_wire_t *wh) { /* ALWAYS called with liblock held */ - lib_handle_t *lh; + ptl_libhandle_t *lh; - if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie) + if (wh->wh_interface_cookie != ptl_apini.apini_interface_cookie) return (NULL); - lh = lib_lookup_cookie (nal, wh->wh_object_cookie, + lh = ptl_lookup_cookie (wh->wh_object_cookie, PTL_COOKIE_TYPE_MD); if (lh == NULL) return (NULL); - return (lh_entry (lh, lib_md_t, md_lh)); + return (lh_entry (lh, ptl_libmd_t, md_lh)); } static inline void -ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me) +ptl_me2handle (ptl_handle_me_t *handle, ptl_me_t *me) { - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; handle->cookie = me->me_lh.lh_cookie; } -static inline lib_me_t * -ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal) +static inline ptl_me_t * +ptl_handle2me (ptl_handle_me_t *handle) { /* ALWAYS called with liblock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_ME); + ptl_libhandle_t *lh = ptl_lookup_cookie (handle->cookie, + PTL_COOKIE_TYPE_ME); if (lh == NULL) return (NULL); - return (lh_entry (lh, lib_me_t, me_lh)); + return (lh_entry (lh, ptl_me_t, me_lh)); } -extern int lib_init(lib_nal_t *libnal, nal_t *apinal, - ptl_process_id_t pid, - ptl_ni_limits_t *desired_limits, - ptl_ni_limits_t *actual_limits); -extern int lib_fini(lib_nal_t *libnal); - /* * When the NAL detects an incoming message header, it should call - * lib_parse() decode it. If the message header is garbage, lib_parse() + * ptl_parse() decode it. If the message header is garbage, ptl_parse() * returns immediately with failure, otherwise the NAL callbacks will be * called to receive the message body. They are handed the private cookie * as a way for the NAL to maintain state about which transaction is being - * processed. An extra parameter, lib_msg contains the lib-level message - * state for passing to lib_finalize() when the message body has been + * processed. An extra parameter, ptl_msg contains the lib-level message + * state for passing to ptl_finalize() when the message body has been * received. */ -extern void lib_enq_event_locked (lib_nal_t *nal, void *private, - lib_eq_t *eq, ptl_event_t *ev); -extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, +extern void ptl_enq_event_locked (void *private, + ptl_eq_t *eq, ptl_event_t *ev); +extern void ptl_finalize (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_ni_fail_t ni_fail_type); -extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private); -extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, - lib_msg_t *get_msg); -extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr); +extern ptl_err_t ptl_parse (ptl_ni_t *ni, ptl_hdr_t *hdr, void *private); +extern ptl_msg_t *ptl_create_reply_msg (ptl_ni_t *ni, ptl_nid_t peer_nid, + ptl_msg_t *get_msg); +extern void ptl_print_hdr (ptl_hdr_t * hdr); -extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov); -extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, +extern ptl_size_t ptl_iov_nob (int niov, struct iovec *iov); +extern void ptl_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t offset, ptl_size_t len); -extern void lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, +extern void ptl_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, char *src, ptl_size_t len); -extern int lib_extract_iov (int dst_niov, struct iovec *dst, +extern int ptl_extract_iov (int dst_niov, struct iovec *dst, int src_niov, struct iovec *src, ptl_size_t offset, ptl_size_t len); -extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov); -extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, +extern ptl_size_t ptl_kiov_nob (int niov, ptl_kiov_t *iov); +extern void ptl_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t offset, ptl_size_t len); -extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, +extern void ptl_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, char *src, ptl_size_t len); -extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, +extern int ptl_extract_kiov (int dst_niov, ptl_kiov_t *dst, int src_niov, ptl_kiov_t *src, ptl_size_t offset, ptl_size_t len); -extern void lib_assert_wire_constants (void); - -extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, +extern ptl_err_t ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md, ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); -extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg, +extern ptl_err_t ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len); - -extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx, - ptl_sr_value_t *status); -extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid, - unsigned long *dist); - -extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count, - ptl_eq_handler_t callback, - ptl_handle_eq_t *handle); -extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh); -extern int lib_api_eq_poll (nal_t *nal, - ptl_handle_eq_t *eventqs, int neq, int timeout_ms, - ptl_event_t *event, int *which); - -extern int lib_api_me_attach(nal_t *nal, - ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); -extern int lib_api_me_insert(nal_t *nal, - ptl_handle_me_t *current_meh, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); -extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh); -extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me); - -extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid); - -extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md); -extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd); -extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh, - ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle); -extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle); -extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh); -extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh, - ptl_md_t *oldumd, ptl_md_t *newumd, - ptl_handle_eq_t *testqh); - -extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, ptl_size_t offset); -extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_ack_req_t ack, ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, - ptl_size_t offset, ptl_hdr_data_t hdr_data); -extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold); -extern int lib_api_loopback(nal_t *apinal, int set, int *enabled); + ptl_libmd_t *md, ptl_size_t offset, ptl_size_t len); + +extern void ptl_me_unlink(ptl_me_t *me); + +extern void ptl_md_unlink(ptl_libmd_t *md); +extern void ptl_md_deconstruct(ptl_libmd_t *lmd, ptl_md_t *umd); + +#ifdef __KERNEL__ +extern int ptl_register_nal(ptl_nal_t *nal); +extern void ptl_unregister_nal(ptl_nal_t *nal); +#endif #endif diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index f56206b..b50c1b9 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -22,28 +22,36 @@ #include #include #include -#include #include +extern int ptl_init; /* PtlInit()/PtlFini() counter */ +extern ptl_apini_t ptl_apini; /* THE network interface (at the API) */ + static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) { return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie && wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); } +static inline int ptl_md_exhausted (ptl_libmd_t *md) +{ + return (md->md_threshold == 0 || + ((md->md_options & PTL_MD_MAX_SIZE) != 0 && + md->md_offset + md->md_max_size > md->md_length)); +} + #ifdef __KERNEL__ -#define LIB_LOCK(nal,flags) \ - spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags) -#define LIB_UNLOCK(nal,flags) \ - spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags) -#else -#define LIB_LOCK(nal,flags) \ - (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0) -#define LIB_UNLOCK(nal,flags) \ - pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex) +#define PTL_LOCK(flags) \ + spin_lock_irqsave(&ptl_apini.apini_lock, flags) +#define PTL_UNLOCK(flags) \ + spin_unlock_irqrestore(&ptl_apini.apini_lock, flags) +#else +#define PTL_LOCK(flags) \ + (pthread_mutex_lock(&ptl_apini.apini_mutex), (flags) = 0) +#define PTL_UNLOCK(flags) \ + pthread_mutex_unlock(&ptl_apini.apini_mutex) #endif - #ifdef PTL_USE_LIB_FREELIST #define MAX_MES 2048 @@ -51,163 +59,160 @@ static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) #define MAX_MSGS 2048 /* Outstanding messages */ #define MAX_EQS 512 -extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize); -extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl); - static inline void * -lib_freelist_alloc (lib_freelist_t *fl) +ptl_freelist_alloc (ptl_freelist_t *fl) { /* ALWAYS called with liblock held */ - lib_freeobj_t *o; + ptl_freeobj_t *o; if (list_empty (&fl->fl_list)) return (NULL); - o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list); + o = list_entry (fl->fl_list.next, ptl_freeobj_t, fo_list); list_del (&o->fo_list); return ((void *)&o->fo_contents); } static inline void -lib_freelist_free (lib_freelist_t *fl, void *obj) +ptl_freelist_free (ptl_freelist_t *fl, void *obj) { /* ALWAYS called with liblock held */ - lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents); + ptl_freeobj_t *o = list_entry (obj, ptl_freeobj_t, fo_contents); list_add (&o->fo_list, &fl->fl_list); } -static inline lib_eq_t * -lib_eq_alloc (lib_nal_t *nal) +static inline ptl_eq_t * +ptl_eq_alloc (void) { /* NEVER called with liblock held */ unsigned long flags; - lib_eq_t *eq; + ptl_eq_t *eq; - LIB_LOCK (nal, flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs); - LIB_UNLOCK (nal, flags); + PTL_LOCK(flags); + eq = (ptl_eq_t *)ptl_freelist_alloc(&ptl_apini.apini_free_eqs); + PTL_UNLOCK(flags); return (eq); } static inline void -lib_eq_free (lib_nal_t *nal, lib_eq_t *eq) +ptl_eq_free (ptl_eq_t *eq) { /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq); + ptl_freelist_free(&ptl_apini.apini_free_eqs, eq); } -static inline lib_md_t * -lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) +static inline ptl_libmd_t * +ptl_md_alloc (ptl_md_t *umd) { /* NEVER called with liblock held */ unsigned long flags; - lib_md_t *md; + ptl_libmd_t *md; - LIB_LOCK (nal, flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds); - LIB_UNLOCK (nal, flags); + PTL_LOCK(flags); + md = (ptl_libmd_t *)ptl_freelist_alloc(&ptl_apini.apini_free_mds); + PTL_UNLOCK(flags); return (md); } static inline void -lib_md_free (lib_nal_t *nal, lib_md_t *md) +ptl_md_free (ptl_libmd_t *md) { /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_mds, md); + ptl_freelist_free (&ptl_apini.apini_free_mds, md); } -static inline lib_me_t * -lib_me_alloc (lib_nal_t *nal) +static inline ptl_me_t * +ptl_me_alloc (void) { /* NEVER called with liblock held */ unsigned long flags; - lib_me_t *me; + ptl_me_t *me; - LIB_LOCK (nal, flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes); - LIB_UNLOCK (nal, flags); + PTL_LOCK(flags); + me = (ptl_me_t *)ptl_freelist_alloc(&ptl_apini.apini_free_mes); + PTL_UNLOCK(flags); return (me); } static inline void -lib_me_free (lib_nal_t *nal, lib_me_t *me) +ptl_me_free (ptl_me_t *me) { /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_mes, me); + ptl_freelist_free (&ptl_apini.apini_free_mes, me); } -static inline lib_msg_t * -lib_msg_alloc (lib_nal_t *nal) +static inline ptl_msg_t * +ptl_msg_alloc (void) { /* NEVER called with liblock held */ unsigned long flags; - lib_msg_t *msg; + ptl_msg_t *msg; - LIB_LOCK (nal, flags); - msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs); - LIB_UNLOCK (nal, flags); + PTL_LOCK(flags); + msg = (ptl_msg_t *)ptl_freelist_alloc(&ptl_apini.apini_free_msgs); + PTL_UNLOCK(flags); if (msg != NULL) { /* NULL pointers, clear flags etc */ memset (msg, 0, sizeof (*msg)); - msg->ack_wmd = PTL_WIRE_HANDLE_NONE; + msg->msg_ack_wmd = PTL_WIRE_HANDLE_NONE; } return(msg); } static inline void -lib_msg_free (lib_nal_t *nal, lib_msg_t *msg) +ptl_msg_free (ptl_msg_t *msg) { /* ALWAYS called with liblock held */ - lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg); + ptl_freelist_free(&ptl_apini.apini_free_msgs, msg); } #else -static inline lib_eq_t * -lib_eq_alloc (lib_nal_t *nal) +static inline ptl_eq_t * +ptl_eq_alloc (void) { /* NEVER called with liblock held */ - lib_eq_t *eq; + ptl_eq_t *eq; PORTAL_ALLOC(eq, sizeof(*eq)); return (eq); } static inline void -lib_eq_free (lib_nal_t *nal, lib_eq_t *eq) +ptl_eq_free (ptl_eq_t *eq) { /* ALWAYS called with liblock held */ PORTAL_FREE(eq, sizeof(*eq)); } -static inline lib_md_t * -lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) +static inline ptl_libmd_t * +ptl_md_alloc (ptl_md_t *umd) { /* NEVER called with liblock held */ - lib_md_t *md; - int size; - int niov; + ptl_libmd_t *md; + int size; + int niov; if ((umd->options & PTL_MD_KIOV) != 0) { niov = umd->length; - size = offsetof(lib_md_t, md_iov.kiov[niov]); + size = offsetof(ptl_libmd_t, md_iov.kiov[niov]); } else { niov = ((umd->options & PTL_MD_IOVEC) != 0) ? umd->length : 1; - size = offsetof(lib_md_t, md_iov.iov[niov]); + size = offsetof(ptl_libmd_t, md_iov.iov[niov]); } PORTAL_ALLOC(md, size); if (md != NULL) { /* Set here in case of early free */ - md->options = umd->options; + md->md_options = umd->options; md->md_niov = niov; } @@ -215,41 +220,41 @@ lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd) } static inline void -lib_md_free (lib_nal_t *nal, lib_md_t *md) +ptl_md_free (ptl_libmd_t *md) { /* ALWAYS called with liblock held */ int size; - if ((md->options & PTL_MD_KIOV) != 0) - size = offsetof(lib_md_t, md_iov.kiov[md->md_niov]); + if ((md->md_options & PTL_MD_KIOV) != 0) + size = offsetof(ptl_libmd_t, md_iov.kiov[md->md_niov]); else - size = offsetof(lib_md_t, md_iov.iov[md->md_niov]); + size = offsetof(ptl_libmd_t, md_iov.iov[md->md_niov]); PORTAL_FREE(md, size); } -static inline lib_me_t * -lib_me_alloc (lib_nal_t *nal) +static inline ptl_me_t * +ptl_me_alloc (void) { /* NEVER called with liblock held */ - lib_me_t *me; + ptl_me_t *me; PORTAL_ALLOC(me, sizeof(*me)); return (me); } static inline void -lib_me_free(lib_nal_t *nal, lib_me_t *me) +ptl_me_free(ptl_me_t *me) { /* ALWAYS called with liblock held */ PORTAL_FREE(me, sizeof(*me)); } -static inline lib_msg_t * -lib_msg_alloc(lib_nal_t *nal) +static inline ptl_msg_t * +ptl_msg_alloc(void) { /* NEVER called with liblock held; may be in interrupt... */ - lib_msg_t *msg; + ptl_msg_t *msg; if (in_interrupt()) PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg)); @@ -259,208 +264,151 @@ lib_msg_alloc(lib_nal_t *nal) if (msg != NULL) { /* NULL pointers, clear flags etc */ memset (msg, 0, sizeof (*msg)); - msg->ack_wmd = PTL_WIRE_HANDLE_NONE; + msg->msg_ack_wmd = PTL_WIRE_HANDLE_NONE; } return (msg); } static inline void -lib_msg_free(lib_nal_t *nal, lib_msg_t *msg) +ptl_msg_free(ptl_msg_t *msg) { /* ALWAYS called with liblock held */ PORTAL_FREE(msg, sizeof(*msg)); } #endif -extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type); -extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type); -extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh); +extern ptl_libhandle_t *ptl_lookup_cookie (__u64 cookie, int type); +extern void ptl_initialise_handle (ptl_libhandle_t *lh, int type); +extern void ptl_invalidate_handle (ptl_libhandle_t *lh); static inline void -ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq) +ptl_eq2handle (ptl_handle_eq_t *handle, ptl_eq_t *eq) { if (eq == NULL) { *handle = PTL_EQ_NONE; return; } - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; handle->cookie = eq->eq_lh.lh_cookie; } -static inline lib_eq_t * -ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal) +static inline ptl_eq_t * +ptl_handle2eq (ptl_handle_eq_t *handle) { /* ALWAYS called with liblock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_EQ); + ptl_libhandle_t *lh = ptl_lookup_cookie (handle->cookie, + PTL_COOKIE_TYPE_EQ); if (lh == NULL) return (NULL); - return (lh_entry (lh, lib_eq_t, eq_lh)); + return (lh_entry (lh, ptl_eq_t, eq_lh)); } static inline void -ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md) +ptl_md2handle (ptl_handle_md_t *handle, ptl_libmd_t *md) { - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; handle->cookie = md->md_lh.lh_cookie; } -static inline lib_md_t * -ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal) +static inline ptl_libmd_t * +ptl_handle2md (ptl_handle_md_t *handle) { /* ALWAYS called with liblock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_MD); + ptl_libhandle_t *lh = ptl_lookup_cookie (handle->cookie, + PTL_COOKIE_TYPE_MD); if (lh == NULL) return (NULL); - return (lh_entry (lh, lib_md_t, md_lh)); + return (lh_entry (lh, ptl_libmd_t, md_lh)); } -static inline lib_md_t * -ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal) +static inline ptl_libmd_t * +ptl_wire_handle2md (ptl_handle_wire_t *wh) { /* ALWAYS called with liblock held */ - lib_handle_t *lh; + ptl_libhandle_t *lh; - if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie) + if (wh->wh_interface_cookie != ptl_apini.apini_interface_cookie) return (NULL); - lh = lib_lookup_cookie (nal, wh->wh_object_cookie, + lh = ptl_lookup_cookie (wh->wh_object_cookie, PTL_COOKIE_TYPE_MD); if (lh == NULL) return (NULL); - return (lh_entry (lh, lib_md_t, md_lh)); + return (lh_entry (lh, ptl_libmd_t, md_lh)); } static inline void -ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me) +ptl_me2handle (ptl_handle_me_t *handle, ptl_me_t *me) { - handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx; handle->cookie = me->me_lh.lh_cookie; } -static inline lib_me_t * -ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal) +static inline ptl_me_t * +ptl_handle2me (ptl_handle_me_t *handle) { /* ALWAYS called with liblock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_ME); + ptl_libhandle_t *lh = ptl_lookup_cookie (handle->cookie, + PTL_COOKIE_TYPE_ME); if (lh == NULL) return (NULL); - return (lh_entry (lh, lib_me_t, me_lh)); + return (lh_entry (lh, ptl_me_t, me_lh)); } -extern int lib_init(lib_nal_t *libnal, nal_t *apinal, - ptl_process_id_t pid, - ptl_ni_limits_t *desired_limits, - ptl_ni_limits_t *actual_limits); -extern int lib_fini(lib_nal_t *libnal); - /* * When the NAL detects an incoming message header, it should call - * lib_parse() decode it. If the message header is garbage, lib_parse() + * ptl_parse() decode it. If the message header is garbage, ptl_parse() * returns immediately with failure, otherwise the NAL callbacks will be * called to receive the message body. They are handed the private cookie * as a way for the NAL to maintain state about which transaction is being - * processed. An extra parameter, lib_msg contains the lib-level message - * state for passing to lib_finalize() when the message body has been + * processed. An extra parameter, ptl_msg contains the lib-level message + * state for passing to ptl_finalize() when the message body has been * received. */ -extern void lib_enq_event_locked (lib_nal_t *nal, void *private, - lib_eq_t *eq, ptl_event_t *ev); -extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, +extern void ptl_enq_event_locked (void *private, + ptl_eq_t *eq, ptl_event_t *ev); +extern void ptl_finalize (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_ni_fail_t ni_fail_type); -extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private); -extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, - lib_msg_t *get_msg); -extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr); +extern ptl_err_t ptl_parse (ptl_ni_t *ni, ptl_hdr_t *hdr, void *private); +extern ptl_msg_t *ptl_create_reply_msg (ptl_ni_t *ni, ptl_nid_t peer_nid, + ptl_msg_t *get_msg); +extern void ptl_print_hdr (ptl_hdr_t * hdr); -extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov); -extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, +extern ptl_size_t ptl_iov_nob (int niov, struct iovec *iov); +extern void ptl_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t offset, ptl_size_t len); -extern void lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, +extern void ptl_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, char *src, ptl_size_t len); -extern int lib_extract_iov (int dst_niov, struct iovec *dst, +extern int ptl_extract_iov (int dst_niov, struct iovec *dst, int src_niov, struct iovec *src, ptl_size_t offset, ptl_size_t len); -extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov); -extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, +extern ptl_size_t ptl_kiov_nob (int niov, ptl_kiov_t *iov); +extern void ptl_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t offset, ptl_size_t len); -extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, +extern void ptl_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, char *src, ptl_size_t len); -extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, +extern int ptl_extract_kiov (int dst_niov, ptl_kiov_t *dst, int src_niov, ptl_kiov_t *src, ptl_size_t offset, ptl_size_t len); -extern void lib_assert_wire_constants (void); - -extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, +extern ptl_err_t ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md, ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); -extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg, +extern ptl_err_t ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len); - -extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx, - ptl_sr_value_t *status); -extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid, - unsigned long *dist); - -extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count, - ptl_eq_handler_t callback, - ptl_handle_eq_t *handle); -extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh); -extern int lib_api_eq_poll (nal_t *nal, - ptl_handle_eq_t *eventqs, int neq, int timeout_ms, - ptl_event_t *event, int *which); - -extern int lib_api_me_attach(nal_t *nal, - ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); -extern int lib_api_me_insert(nal_t *nal, - ptl_handle_me_t *current_meh, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); -extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh); -extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me); - -extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid); - -extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md); -extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd); -extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh, - ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle); -extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle); -extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh); -extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh, - ptl_md_t *oldumd, ptl_md_t *newumd, - ptl_handle_eq_t *testqh); - -extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, ptl_size_t offset); -extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_ack_req_t ack, ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, - ptl_size_t offset, ptl_hdr_data_t hdr_data); -extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold); -extern int lib_api_loopback(nal_t *apinal, int set, int *enabled); + ptl_libmd_t *md, ptl_size_t offset, ptl_size_t len); + +extern void ptl_me_unlink(ptl_me_t *me); + +extern void ptl_md_unlink(ptl_libmd_t *md); +extern void ptl_md_deconstruct(ptl_libmd_t *lmd, ptl_md_t *umd); + +#ifdef __KERNEL__ +extern int ptl_register_nal(ptl_nal_t *nal); +extern void ptl_unregister_nal(ptl_nal_t *nal); +#endif #endif diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 608b1e2..2ae92ed 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -23,15 +23,6 @@ #include #include #include -#include - -typedef char *user_ptr; -typedef struct lib_msg_t lib_msg_t; -typedef struct lib_ptl_t lib_ptl_t; -typedef struct lib_ac_t lib_ac_t; -typedef struct lib_me_t lib_me_t; -typedef struct lib_md_t lib_md_t; -typedef struct lib_eq_t lib_eq_t; #define WIRE_ATTR __attribute__((packed)) @@ -129,92 +120,74 @@ typedef struct { #define PORTALS_PROTO_VERSION_MAJOR 1 #define PORTALS_PROTO_VERSION_MINOR 0 -typedef struct { - long recv_count, recv_length, send_count, send_length, drop_count, - drop_length, msgs_alloc, msgs_max; -} lib_counters_t; - -/* temporary expedient: limit number of entries in discontiguous MDs */ +/* limit on the number of entries in discontiguous MDs */ #define PTL_MTU (1<<20) #define PTL_MD_MAX_IOV 256 -struct lib_msg_t { - struct list_head msg_list; - lib_md_t *md; - ptl_handle_wire_t ack_wmd; - ptl_event_t ev; -}; +/* forward refs */ +struct ptl_libmd; -struct lib_ptl_t { - ptl_pt_index_t size; - struct list_head *tbl; -}; +typedef struct ptl_msg { + struct list_head msg_list; + struct ptl_libmd *msg_md; + ptl_handle_wire_t msg_ack_wmd; + ptl_event_t msg_ev; +} ptl_msg_t; -struct lib_ac_t { - int next_free; -}; - -typedef struct { +typedef struct ptl_libhandle { struct list_head lh_hash_chain; __u64 lh_cookie; -} lib_handle_t; +} ptl_libhandle_t; #define lh_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -struct lib_eq_t { +typedef struct ptl_eq { struct list_head eq_list; - lib_handle_t eq_lh; + ptl_libhandle_t eq_lh; ptl_seq_t eq_enq_seq; ptl_seq_t eq_deq_seq; ptl_size_t eq_size; ptl_event_t *eq_events; int eq_refcount; ptl_eq_handler_t eq_callback; - void *eq_addrkey; -}; +} ptl_eq_t; -struct lib_me_t { +typedef struct ptl_me { struct list_head me_list; - lib_handle_t me_lh; - ptl_process_id_t match_id; - ptl_match_bits_t match_bits, ignore_bits; - ptl_unlink_t unlink; - lib_md_t *md; -}; - -struct lib_md_t { + ptl_libhandle_t me_lh; + ptl_process_id_t me_match_id; + ptl_match_bits_t me_match_bits; + ptl_match_bits_t me_ignore_bits; + ptl_unlink_t me_unlink; + struct ptl_libmd *me_md; +} ptl_me_t; + +typedef struct ptl_libmd { struct list_head md_list; - lib_handle_t md_lh; - lib_me_t *me; - user_ptr start; - ptl_size_t offset; - ptl_size_t length; - ptl_size_t max_size; - int threshold; - int pending; - unsigned int options; + ptl_libhandle_t md_lh; + ptl_me_t *md_me; + char *md_start; + ptl_size_t md_offset; + ptl_size_t md_length; + ptl_size_t md_max_size; + int md_threshold; + int md_pending; + unsigned int md_options; unsigned int md_flags; - void *user_ptr; - lib_eq_t *eq; + void *md_user_ptr; + ptl_eq_t *md_eq; void *md_addrkey; unsigned int md_niov; /* # frags */ union { struct iovec iov[PTL_MD_MAX_IOV]; ptl_kiov_t kiov[PTL_MD_MAX_IOV]; } md_iov; -}; +} ptl_libmd_t; #define PTL_MD_FLAG_ZOMBIE (1 << 0) #define PTL_MD_FLAG_AUTO_UNLINK (1 << 1) -static inline int lib_md_exhausted (lib_md_t *md) -{ - return (md->threshold == 0 || - ((md->options & PTL_MD_MAX_SIZE) != 0 && - md->offset + md->max_size > md->length)); -} - #ifdef PTL_USE_LIB_FREELIST typedef struct { @@ -222,21 +195,21 @@ typedef struct int fl_nobjs; /* the number of them */ int fl_objsize; /* the size (including overhead) of each of them */ struct list_head fl_list; /* where they are enqueued */ -} lib_freelist_t; +} ptl_freelist_t; typedef struct { struct list_head fo_list; /* enqueue on fl_list */ void *fo_contents; /* aligned contents */ -} lib_freeobj_t; +} ptl_freeobj_t; #endif typedef struct { /* info about peers we are trying to fail */ - struct list_head tp_list; /* stash in ni.ni_test_peers */ + struct list_head tp_list; /* apini_test_peers */ ptl_nid_t tp_nid; /* matching nid */ unsigned int tp_threshold; /* # failures to simulate */ -} lib_test_peer_t; +} ptl_test_peer_t; #define PTL_COOKIE_TYPE_MD 1 #define PTL_COOKIE_TYPE_ME 2 @@ -245,51 +218,21 @@ typedef struct { /* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be * extracted by masking with (PTL_COOKIE_TYPES - 1) */ -typedef struct lib_ni -{ - nal_t *ni_api; - ptl_process_id_t ni_pid; - lib_ptl_t ni_portals; - lib_counters_t ni_counters; - ptl_ni_limits_t ni_actual_limits; - - int ni_lh_hash_size; /* size of lib handle hash table */ - struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */ - __u64 ni_next_object_cookie; /* cookie generator */ - __u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */ - - struct list_head ni_test_peers; - int ni_loopback; /* loopback shortcircuits NAL */ - -#ifdef PTL_USE_LIB_FREELIST - lib_freelist_t ni_free_mes; - lib_freelist_t ni_free_msgs; - lib_freelist_t ni_free_mds; - lib_freelist_t ni_free_eqs; -#endif - - struct list_head ni_active_msgs; - struct list_head ni_active_mds; - struct list_head ni_active_eqs; - -#ifdef __KERNEL__ - spinlock_t ni_lock; - cfs_waitq_t ni_waitq; -#else - pthread_mutex_t ni_mutex; - pthread_cond_t ni_cond; -#endif -} lib_ni_t; +struct ptl_ni; /* forward ref */ - -typedef struct lib_nal +typedef struct ptl_nal { - /* lib-level interface state */ - lib_ni_t libnal_ni; - - /* NAL-private data */ - void *libnal_data; + /* fields managed by portals */ + struct list_head nal_list; /* stash in the NAL table */ + int nal_refcount; /* # active instances */ + /* fields initialised by the NAL */ + char *nal_name; /* NAL's type-name */ + int nal_type; + + ptl_err_t (*nal_startup) (struct ptl_ni *ni, char **interfaces); + void (*nal_shutdown) (struct ptl_ni *ni); + /* * send: Sends a preformatted header and payload data to a * specified remote process. The payload is scattered over 'niov' @@ -297,18 +240,18 @@ typedef struct lib_nal * bytes. * NB the NAL may NOT overwrite iov. * PTL_OK on success => NAL has committed to send and will call - * lib_finalize on completion + * ptl_finalize on completion */ - ptl_err_t (*libnal_send) - (struct lib_nal *nal, void *private, lib_msg_t *cookie, + ptl_err_t (*nal_send) + (struct ptl_ni *ni, void *private, ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen); /* as send, but with a set of page fragments (NULL if not supported) */ - ptl_err_t (*libnal_send_pages) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, - ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + ptl_err_t (*nal_send_pages) + (struct ptl_ni *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, ptl_kiov_t *iov, size_t offset, size_t mlen); /* @@ -319,43 +262,29 @@ typedef struct lib_nal * discarded. * NB the NAL may NOT overwrite iov. * PTL_OK on success => NAL has committed to receive and will call - * lib_finalize on completion + * ptl_finalize on completion */ - ptl_err_t (*libnal_recv) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, + ptl_err_t (*nal_recv) + (struct ptl_ni *ni, void *private, ptl_msg_t * cookie, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen); /* as recv, but with a set of page fragments (NULL if not supported) */ - ptl_err_t (*libnal_recv_pages) - (struct lib_nal *nal, void *private, lib_msg_t * cookie, + ptl_err_t (*nal_recv_pages) + (struct ptl_ni *ni, void *private, ptl_msg_t * cookie, unsigned int niov, ptl_kiov_t *iov, size_t offset, size_t mlen, size_t rlen); - /* - * (un)map: Tell the NAL about some memory it will access. - * *addrkey passed to libnal_unmap() is what libnal_map() set it to. - * type of *iov depends on options. - * Set to NULL if not required. - */ - ptl_err_t (*libnal_map) - (struct lib_nal *nal, unsigned int niov, struct iovec *iov, - void **addrkey); - void (*libnal_unmap) - (struct lib_nal *nal, unsigned int niov, struct iovec *iov, - void **addrkey); - - /* as (un)map, but with a set of page fragments */ - ptl_err_t (*libnal_map_pages) - (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - void (*libnal_unmap_pages) - (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - /* Calculate a network "distance" to given node */ - int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist); -} lib_nal_t; + int (*ptl_dist) (struct ptl_ni *ni, ptl_nid_t nid, unsigned long *dist); +} ptl_nal_t; + +typedef struct ptl_ni { + struct list_head ni_list; /* chain on apini_nis */ + ptl_nid_t ni_nid; /* interface's NID */ + void *ni_data; /* instance-specific data */ + ptl_nal_t *ni_nal; /* procedural interface */ +} ptl_ni_t; typedef struct /* loopback descriptor */ { @@ -372,4 +301,55 @@ typedef struct /* loopback descriptor */ #define LOD_IOV 0xeb105 #define LOD_KIOV 0xeb106 +typedef struct +{ + int apini_refcount; /* PtlNIInit/PtlNIFini counter */ + + int apini_nportals; /* # portals */ + struct list_head *apini_portals; /* the vector of portals */ + + ptl_pid_t apini_pid; /* requested pid */ + ptl_ni_limits_t apini_actual_limits; + + struct list_head apini_nis; /* NAL instances */ + + int apini_lh_hash_size; /* size of lib handle hash table */ + struct list_head *apini_lh_hash_table; /* all extant lib handles, this interface */ + __u64 apini_next_object_cookie; /* cookie generator */ + __u64 apini_interface_cookie; /* uniquely identifies this ni in this epoch */ + + struct list_head apini_test_peers; + +#ifdef PTL_USE_LIB_FREELIST + ptl_freelist_t apini_free_mes; + ptl_freelist_t apini_free_msgs; + ptl_freelist_t apini_free_mds; + ptl_freelist_t apini_free_eqs; +#endif + + struct list_head apini_active_msgs; + struct list_head apini_active_mds; + struct list_head apini_active_eqs; + +#ifdef __KERNEL__ + spinlock_t apini_lock; + cfs_waitq_t apini_waitq; +#else + pthread_mutex_t apini_mutex; + pthread_cond_t apini_cond; +#endif + + struct { + long recv_count; + long recv_length; + long send_count; + long send_length; + long drop_count; + long drop_length; + long msgs_alloc; + long msgs_max; + } apini_counters; + +} ptl_apini_t; + #endif diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index cce160e..99da747 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -63,7 +63,6 @@ int jt_ptl_del_route (int argc, char **argv); int jt_ptl_notify_router (int argc, char **argv); int jt_ptl_print_routes (int argc, char **argv); int jt_ptl_fail_nid (int argc, char **argv); -int jt_ptl_loopback (int argc, char **argv); int jt_ptl_lwt(int argc, char **argv); int jt_ptl_memhog(int argc, char **argv); diff --git a/lnet/include/lnet/myrnal.h b/lnet/include/lnet/myrnal.h deleted file mode 100644 index 13790f7..0000000 --- a/lnet/include/lnet/myrnal.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef MYRNAL_H -#define MYRNAL_H - -#define MAX_ARGS_LEN (256) -#define MAX_RET_LEN (128) -#define MYRNAL_MAX_ACL_SIZE (64) -#define MYRNAL_MAX_PTL_SIZE (64) - -#define P3CMD (100) -#define P3SYSCALL (200) -#define P3REGISTER (300) - -enum { PTL_MLOCKALL }; - -typedef struct { - void *args; - size_t args_len; - void *ret; - size_t ret_len; - int p3cmd; -} myrnal_forward_t; - -#endif /* MYRNAL_H */ diff --git a/lnet/include/lnet/nal.h b/lnet/include/lnet/nal.h deleted file mode 100644 index aad611d..0000000 --- a/lnet/include/lnet/nal.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef _NAL_H_ -#define _NAL_H_ - -#include "build_check.h" - -/* - * p30/nal.h - * - * The API side NAL declarations - */ - -#include - -typedef struct nal_t nal_t; - -struct nal_t { - /* common interface state */ - int nal_refct; - ptl_handle_ni_t nal_handle; - - /* NAL-private data */ - void *nal_data; - - /* NAL API implementation - * NB only nal_ni_init needs to be set when the NAL registers itself */ - int (*nal_ni_init) (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *req, ptl_ni_limits_t *actual); - - void (*nal_ni_fini) (nal_t *nal); - - int (*nal_get_id) (nal_t *nal, ptl_process_id_t *id); - int (*nal_ni_status) (nal_t *nal, ptl_sr_index_t register, ptl_sr_value_t *status); - int (*nal_ni_dist) (nal_t *nal, ptl_process_id_t *id, unsigned long *distance); - int (*nal_fail_nid) (nal_t *nal, ptl_nid_t nid, unsigned int threshold); - int (*nal_loopback) (nal_t *nal, int set, int *enabled); - - int (*nal_me_attach) (nal_t *nal, ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); - int (*nal_me_insert) (nal_t *nal, ptl_handle_me_t *me, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle); - int (*nal_me_unlink) (nal_t *nal, ptl_handle_me_t *me); - - int (*nal_md_attach) (nal_t *nal, ptl_handle_me_t *me, - ptl_md_t *md, ptl_unlink_t unlink, - ptl_handle_md_t *handle); - int (*nal_md_bind) (nal_t *nal, - ptl_md_t *md, ptl_unlink_t unlink, - ptl_handle_md_t *handle); - int (*nal_md_unlink) (nal_t *nal, ptl_handle_md_t *md); - int (*nal_md_update) (nal_t *nal, ptl_handle_md_t *md, - ptl_md_t *old_md, ptl_md_t *new_md, - ptl_handle_eq_t *testq); - - int (*nal_eq_alloc) (nal_t *nal, ptl_size_t count, - ptl_eq_handler_t handler, - ptl_handle_eq_t *handle); - int (*nal_eq_free) (nal_t *nal, ptl_handle_eq_t *eq); - int (*nal_eq_poll) (nal_t *nal, - ptl_handle_eq_t *eqs, int neqs, int timeout, - ptl_event_t *event, int *which); - - int (*nal_ace_entry) (nal_t *nal, ptl_ac_index_t index, - ptl_process_id_t match_id, ptl_pt_index_t portal); - - int (*nal_put) (nal_t *nal, ptl_handle_md_t *md, ptl_ack_req_t ack, - ptl_process_id_t *target, ptl_pt_index_t portal, - ptl_ac_index_t ac, ptl_match_bits_t match, - ptl_size_t offset, ptl_hdr_data_t hdr_data); - int (*nal_get) (nal_t *nal, ptl_handle_md_t *md, - ptl_process_id_t *target, ptl_pt_index_t portal, - ptl_ac_index_t ac, ptl_match_bits_t match, - ptl_size_t offset); -}; - -extern nal_t *ptl_hndl2nal(ptl_handle_any_t *any); - -#ifdef __KERNEL__ -extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal); -extern void ptl_unregister_nal(ptl_interface_t interface); -#endif - -#endif diff --git a/lnet/include/lnet/nalids.h b/lnet/include/lnet/nalids.h deleted file mode 100644 index 55a991b..0000000 --- a/lnet/include/lnet/nalids.h +++ /dev/null @@ -1,2 +0,0 @@ -#include "build_check.h" - diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index cce160e..99da747 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -63,7 +63,6 @@ int jt_ptl_del_route (int argc, char **argv); int jt_ptl_notify_router (int argc, char **argv); int jt_ptl_print_routes (int argc, char **argv); int jt_ptl_fail_nid (int argc, char **argv); -int jt_ptl_loopback (int argc, char **argv); int jt_ptl_lwt(int argc, char **argv); int jt_ptl_memhog(int argc, char **argv); diff --git a/lnet/include/lnet/stringtab.h b/lnet/include/lnet/stringtab.h deleted file mode 100644 index 33e4375..0000000 --- a/lnet/include/lnet/stringtab.h +++ /dev/null @@ -1,3 +0,0 @@ -/* - * stringtab.h - */ diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h index f07534b..f1284c7 100644 --- a/lnet/include/lnet/types.h +++ b/lnet/include/lnet/types.h @@ -25,8 +25,7 @@ typedef __u32 ptl_size_t; #define PTL_TIME_FOREVER (-1) typedef struct { - unsigned long nal_idx; /* which network interface */ - __u64 cookie; /* which thing on that interface */ + __u64 cookie; } ptl_handle_any_t; typedef ptl_handle_any_t ptl_handle_ni_t; @@ -35,12 +34,12 @@ typedef ptl_handle_any_t ptl_handle_md_t; typedef ptl_handle_any_t ptl_handle_me_t; #define PTL_INVALID_HANDLE \ - ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1}) + ((const ptl_handle_any_t){.cookie = -1}) #define PTL_EQ_NONE PTL_INVALID_HANDLE static inline int PtlHandleIsEqual (ptl_handle_any_t h1, ptl_handle_any_t h2) { - return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie); + return (h1.cookie == h2.cookie); } #define PTL_UID_ANY ((ptl_uid_t) -1) diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h index bffbb0b..826e455 100644 --- a/lnet/klnds/gmlnd/gmlnd.h +++ b/lnet/klnds/gmlnd/gmlnd.h @@ -119,7 +119,7 @@ typedef struct _gmnal_stxd_t { int gm_priority; int type; struct _gmnal_data_t *nal_data; - lib_msg_t *cookie; + ptl_msg_t *cookie; int niov; struct iovec iov[PTL_MD_MAX_IOV]; struct _gmnal_stxd_t *next; @@ -156,7 +156,7 @@ typedef struct _gmnal_srxd_t { int ncallbacks; spinlock_t callback_lock; int callback_status; - lib_msg_t *cookie; + ptl_msg_t *cookie; struct _gmnal_srxd_t *next; struct _gmnal_data_t *nal_data; } gmnal_srxd_t; @@ -216,8 +216,7 @@ typedef struct _gmnal_data_t { struct semaphore srxd_token; gmnal_srxd_t *srxd; struct gm_hash *srxd_hash; - nal_t *nal; - lib_nal_t *libnal; + ptl_ni_t *ni; struct gm_port *gm_port; unsigned int gm_local_nid; unsigned int gm_global_nid; @@ -318,67 +317,25 @@ extern gmnal_data_t *global_nal_data; */ /* - * API NAL - */ -int gmnal_api_startup(nal_t *, ptl_pid_t, - ptl_ni_limits_t *, ptl_ni_limits_t *); - -int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); - -void gmnal_api_shutdown(nal_t *); - -int gmnal_api_validate(nal_t *, void *, size_t); - -void gmnal_api_yield(nal_t *, unsigned long *, int); - -void gmnal_api_lock(nal_t *, unsigned long *); - -void gmnal_api_unlock(nal_t *, unsigned long *); - - -#define GMNAL_INIT_NAL(a) do { \ - (a)->nal_ni_init = gmnal_api_startup; \ - (a)->nal_ni_fini = gmnal_api_shutdown; \ - (a)->nal_data = NULL; \ - } while (0) - - -/* * CB NAL */ -ptl_err_t gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, +ptl_err_t gmnal_cb_send(ptl_ni_t *, void *, ptl_msg_t *, ptl_hdr_t *, int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t, size_t); -ptl_err_t gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, +ptl_err_t gmnal_cb_send_pages(ptl_ni_t *, void *, ptl_msg_t *, ptl_hdr_t *, int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t, size_t); -ptl_err_t gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *, +ptl_err_t gmnal_cb_recv(ptl_ni_t *, void *, ptl_msg_t *, unsigned int, struct iovec *, size_t, size_t, size_t); -ptl_err_t gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *, +ptl_err_t gmnal_cb_recv_pages(ptl_ni_t *, void *, ptl_msg_t *, unsigned int, ptl_kiov_t *, size_t, size_t, size_t); -int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *); - int gmnal_init(void); void gmnal_fini(void); - - -#define GMNAL_INIT_NAL_CB(a) do { \ - a->libnal_send = gmnal_cb_send; \ - a->libnal_send_pages = gmnal_cb_send_pages; \ - a->libnal_recv = gmnal_cb_recv; \ - a->libnal_recv_pages = gmnal_cb_recv_pages; \ - a->libnal_map = NULL; \ - a->libnal_unmap = NULL; \ - a->libnal_dist = gmnal_cb_dist; \ - a->libnal_data = NULL; \ - } while (0) - - /* * Small and Large Transmit and Receive Descriptor Functions */ @@ -429,8 +386,8 @@ void gmnal_remove_rxtwe(gmnal_data_t *); /* * Small messages */ -ptl_err_t gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *); -ptl_err_t gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, +ptl_err_t gmnal_small_rx(ptl_ni_t *, void *, ptl_msg_t *); +ptl_err_t gmnal_small_tx(ptl_ni_t *, void *, ptl_msg_t *, ptl_hdr_t *, int, ptl_nid_t, ptl_pid_t, gmnal_stxd_t*, int); void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); @@ -440,10 +397,10 @@ void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); /* * Large messages */ -int gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, +int gmnal_large_rx(ptl_ni_t *, void *, ptl_msg_t *, unsigned int, struct iovec *, size_t, size_t, size_t); -int gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, +int gmnal_large_tx(ptl_ni_t *, void *, ptl_msg_t *, ptl_hdr_t *, int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec*, size_t, int); diff --git a/lnet/klnds/gmlnd/gmlnd_api.c b/lnet/klnds/gmlnd/gmlnd_api.c index a65272a..4e7ace5 100644 --- a/lnet/klnds/gmlnd/gmlnd_api.c +++ b/lnet/klnds/gmlnd/gmlnd_api.c @@ -30,7 +30,6 @@ gmnal_data_t *global_nal_data = NULL; #define GLOBAL_NID_STR_LEN 16 char global_nid_str[GLOBAL_NID_STR_LEN] = {0}; -ptl_handle_ni_t kgmnal_ni; extern int gmnal_cmd(struct portals_cfg *pcfg, void *private); @@ -54,37 +53,28 @@ static ctl_table gmnalnal_top_sysctl_table[] = { }; /* - * gmnal_api_shutdown - * nal_refct == 0 => called on last matching PtlNIFini() + * gmnal_shutdown * Close down this interface and free any resources associated with it * nal_t nal our nal to shutdown */ void -gmnal_api_shutdown(nal_t *nal) +gmnal_shutdown(ptl_ni_t *ni) { gmnal_data_t *nal_data; - lib_nal_t *libnal; - if (nal->nal_refct != 0) { - /* This module got the first ref */ - PORTAL_MODULE_UNUSE; - return; - } + LASSERT(ni->ni_data == global_nal_data); - LASSERT(nal == global_nal_data->nal); - libnal = (lib_nal_t *)nal->nal_data; - nal_data = (gmnal_data_t *)libnal->libnal_data; + nal_data = (gmnal_data_t *)ni->ni_data; LASSERT(nal_data == global_nal_data); - CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data); + CDEBUG(D_TRACE, "gmnal_shutdown: nal_data [%p]\n", nal_data); /* Stop portals calling our ioctl handler */ libcfs_nal_cmd_unregister(GMNAL); /* XXX for shutdown "under fire" we probably need to set a shutdown - * flag so when lib calls us we fail immediately and dont queue any - * more work but our threads can still call into lib OK. THEN - * shutdown our threads, THEN lib_fini() */ - lib_fini(libnal); + * flag so when portals calls us we fail immediately and dont queue any + * more work but our threads can still call into portals OK. THEN + * shutdown our threads, THEN ptl_fini() */ gmnal_stop_rxthread(nal_data); gmnal_stop_ctthread(nal_data); @@ -98,39 +88,30 @@ gmnal_api_shutdown(nal_t *nal) unregister_sysctl_table (nal_data->sysctl); /* Don't free 'nal'; it's a static struct */ PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); global_nal_data = NULL; + + PORTAL_MODULE_UNUSE; } int -gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +gmnal_startup(ptl_ni_t *ni, char **interfaces) { - - lib_nal_t *libnal = NULL; gmnal_data_t *nal_data = NULL; gmnal_srxd_t *srxd = NULL; gm_status_t gm_status; unsigned int local_nid = 0, global_nid = 0; - ptl_process_id_t process_id; - - if (nal->nal_refct != 0) { - if (actual_limits != NULL) { - libnal = (lib_nal_t *)nal->nal_data; - *actual_limits = libnal->libnal_ni.ni_actual_limits; - } - PORTAL_MODULE_USE; - return (PTL_OK); - } - - /* Called on first PtlNIInit() */ CDEBUG(D_TRACE, "startup\n"); - LASSERT(global_nal_data == NULL); + LASSERT(ni->ni_nal == &gmnal_nal); + + if (global_nal_data != NULL) { + /* Already got 1 instance */ + CERROR("Can't support > 1 instance of this NAL\n"); + return PTL_FAIL; + } PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t)); if (!nal_data) { @@ -145,21 +126,11 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data); CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size); - PORTAL_ALLOC(libnal, sizeof(lib_nal_t)); - if (!libnal) { - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(PTL_NO_SPACE); - } - memset(libnal, 0, sizeof(lib_nal_t)); - CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal); - - GMNAL_INIT_NAL_CB(libnal); /* * String them all together */ - libnal->libnal_data = (void*)nal_data; - nal_data->nal = nal; - nal_data->libnal = libnal; + ni->ni_data = nal_data; + nal_data->ni = ni; GMNAL_GM_LOCK_INIT(nal_data); @@ -171,7 +142,6 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, if (gm_init() != GM_SUCCESS) { CDEBUG(D_ERROR, "call to gm_init failed\n"); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); return(PTL_FAIL); } @@ -216,7 +186,6 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, gm_finalize(); GMNAL_GM_UNLOCK(nal_data); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); return(PTL_FAIL); } @@ -232,7 +201,6 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, gm_finalize(); GMNAL_GM_UNLOCK(nal_data); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); return(PTL_FAIL); } @@ -261,7 +229,6 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, gm_finalize(); GMNAL_GM_UNLOCK(nal_data); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); return(PTL_FAIL); } @@ -275,9 +242,6 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, CDEBUG(D_INFO, "receive thread seems to have started\n"); - /* - * Initialise the portals library - */ CDEBUG(D_NET, "Getting node id\n"); GMNAL_GM_LOCK(nal_data); gm_status = gm_get_node_id(nal_data->gm_port, &local_nid); @@ -293,7 +257,6 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, gm_finalize(); GMNAL_GM_UNLOCK(nal_data); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); return(PTL_FAIL); } @@ -315,7 +278,6 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, gm_finalize(); GMNAL_GM_UNLOCK(nal_data); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); return(PTL_FAIL); } CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid); @@ -325,37 +287,16 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, /* pid = gm_getpid(); */ - process_id.pid = requested_pid; - process_id.nid = global_nid; - - CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); - CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid); - - CDEBUG(D_PORTALS, "calling lib_init\n"); - if (lib_init(libnal, nal, process_id, - requested_limits, actual_limits) != PTL_OK) { - CDEBUG(D_ERROR, "lib_init failed\n"); - gmnal_stop_rxthread(nal_data); - gmnal_stop_ctthread(nal_data); - gmnal_free_txd(nal_data); - gmnal_free_srxd(nal_data); - GMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); - return(PTL_FAIL); - - } + ni->ni_nid = global_nid; - if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, libnal->libnal_data) != 0) { + CDEBUG(D_INFO, "portals_pid is [%u]\n", ni->ni_pid); + CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", ni->ni_nid); + + if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, nal_data) != 0) { CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n"); /* XXX these cleanup cases should be restructured to * minimise duplication... */ - lib_fini(libnal); - gmnal_stop_rxthread(nal_data); gmnal_stop_ctthread(nal_data); gmnal_free_txd(nal_data); @@ -365,7 +306,6 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, gm_finalize(); GMNAL_GM_UNLOCK(nal_data); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(libnal, sizeof(lib_nal_t)); return(PTL_FAIL); } @@ -376,12 +316,21 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, CDEBUG(D_INFO, "gmnal_init finished\n"); - global_nal_data = libnal->libnal_data; + global_nal_data = nal_data; return(PTL_OK); } -nal_t the_gm_nal; +ptl_nal_t the_gm_nal = { + .nal_name = "gm", + .nal_type = GMNAL, + .nal_startup = gmnal_startup, + .nal_shutdown = gmnal_shutdown, + .nal_send = gmnal_cb_send, + .nal_send_pages = gmnal_cb_send_pages, + .nal_recv = gmnal_cb_recv, + .nal_recv_pages = gmnal_cb_recv_pages, +}; /* * Called when module loaded @@ -390,18 +339,9 @@ int gmnal_init(void) { int rc; - memset(&the_gm_nal, 0, sizeof(nal_t)); - CDEBUG(D_INFO, "reset nal[%p]\n", &the_gm_nal); - GMNAL_INIT_NAL(&the_gm_nal); - - rc = ptl_register_nal(GMNAL, &the_gm_nal); + rc = ptl_register_nal(&the_gm_nal); if (rc != PTL_OK) CERROR("Can't register GMNAL: %d\n", rc); - rc = PtlNIInit(GMNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kgmnal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(GMNAL); - return (-ENODEV); - } return (rc); } @@ -414,8 +354,6 @@ void gmnal_fini() { CDEBUG(D_TRACE, "gmnal_fini\n"); - PtlNIFini(kgmnal_ni); - - ptl_unregister_nal(GMNAL); + ptl_unregister_nal(&the_gm_nal); LASSERT(global_nal_data == NULL); } diff --git a/lnet/klnds/gmlnd/gmlnd_cb.c b/lnet/klnds/gmlnd/gmlnd_cb.c index 6394c37..5ee7e98 100644 --- a/lnet/klnds/gmlnd/gmlnd_cb.c +++ b/lnet/klnds/gmlnd/gmlnd_cb.c @@ -27,7 +27,7 @@ #include "gmnal.h" -ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie, +ptl_err_t gmnal_cb_recv(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) { @@ -35,9 +35,9 @@ ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie, gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; int status = PTL_OK; - CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], " + CDEBUG(D_TRACE, "gmnal_cb_recv ni [%p], private[%p], cookie[%p], " "niov[%d], iov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n", - libnal, private, cookie, niov, iov, offset, mlen, rlen); + ni, private, cookie, niov, iov, offset, mlen, rlen); switch(srxd->type) { case(GMNAL_SMALL_MESSAGE): @@ -69,11 +69,11 @@ ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie, } iov++; } - status = gmnal_small_rx(libnal, private, cookie); + status = gmnal_small_rx(ni, private, cookie); break; case(GMNAL_LARGE_MESSAGE_INIT): CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n"); - status = gmnal_large_rx(libnal, private, cookie, niov, + status = gmnal_large_rx(ni, private, cookie, niov, iov, offset, mlen, rlen); } @@ -81,8 +81,8 @@ ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie, return(status); } -ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, - lib_msg_t *cookie, unsigned int kniov, +ptl_err_t gmnal_cb_recv_pages(ptl_ni_t *ni, void *private, + ptl_msg_t *cookie, unsigned int kniov, ptl_kiov_t *kiov, size_t offset, size_t mlen, size_t rlen) { @@ -92,9 +92,9 @@ ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, void *buffer = NULL; - CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], " + CDEBUG(D_TRACE, "gmnal_cb_recv_pages ni [%p],private[%p], " "cookie[%p], kniov[%d], kiov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n", - libnal, private, cookie, kniov, kiov, offset, mlen, rlen); + ni, private, cookie, kniov, kiov, offset, mlen, rlen); if (srxd->type == GMNAL_SMALL_MESSAGE) { buffer = srxd->buffer; @@ -143,7 +143,7 @@ ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, kiov++; } CDEBUG(D_INFO, "calling gmnal_small_rx\n"); - status = gmnal_small_rx(libnal, private, cookie); + status = gmnal_small_rx(ni, private, cookie); } CDEBUG(D_INFO, "gmnal_return status [%d]\n", status); @@ -151,7 +151,7 @@ ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, } -ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie, +ptl_err_t gmnal_cb_send(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, size_t offset, size_t len) @@ -164,13 +164,9 @@ ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie, CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] offset["LPSZ"] len["LPSZ "] nid["LPU64"]\n", niov, offset, len, nid); - nal_data = libnal->libnal_data; - if (!nal_data) { - CDEBUG(D_ERROR, "no nal_data\n"); - return(PTL_FAIL); - } else { - CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); - } + nal_data = ni->ni_data; + CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); + LASSERT (nal_data != NULL); if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) { CDEBUG(D_INFO, "This is a small message send\n"); @@ -202,20 +198,20 @@ ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie, } iov++; } - gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid, + gmnal_small_tx(ni, private, cookie, hdr, type, nid, pid, stxd, len); } else { CDEBUG(D_ERROR, "Large message send is not supported\n"); - lib_finalize(libnal, private, cookie, PTL_FAIL); + ptl_finalize(ni, private, cookie, PTL_FAIL); return(PTL_FAIL); - gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid, + gmnal_large_tx(ni, private, cookie, hdr, type, nid, pid, niov, iov, offset, len); } return(PTL_OK); } -ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, - lib_msg_t *cookie, ptl_hdr_t *hdr, int type, +ptl_err_t gmnal_cb_send_pages(ptl_ni_t *ni, void *private, + ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t offset, size_t len) { @@ -228,13 +224,9 @@ ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] offset[" LPSZ"] len["LPSZ"]\n", nid, kniov, offset, len); - nal_data = libnal->libnal_data; - if (!nal_data) { - CDEBUG(D_ERROR, "no nal_data\n"); - return(PTL_FAIL); - } else { - CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); - } + nal_data = ni->ni_data; + CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); + LASSERT (nal_data != NULL); /* HP SFS 1380: Need to do the gm_bcopy after the kmap so we can kunmap * more aggressively. This is the fix for a livelock situation under @@ -283,7 +275,7 @@ ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, } kiov++; } - status = gmnal_small_tx(libnal, private, cookie, hdr, type, nid, + status = gmnal_small_tx(ni, private, cookie, hdr, type, nid, pid, stxd, len); } else { int i = 0; @@ -307,7 +299,7 @@ ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, iovec++; kiov++; } - gmnal_large_tx(libnal, private, cookie, hdr, type, nid, + gmnal_large_tx(ni, private, cookie, hdr, type, nid, pid, kniov, iovec, offset, len); for (i=0; ikiov_page); @@ -317,11 +309,3 @@ ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, } return(status); } - -int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist) -{ - CDEBUG(D_TRACE, "gmnal_cb_dist\n"); - if (dist) - *dist = 27; - return(PTL_OK); -} diff --git a/lnet/klnds/gmlnd/gmlnd_comm.c b/lnet/klnds/gmlnd/gmlnd_comm.c index 206d86b..f77f57f 100644 --- a/lnet/klnds/gmlnd/gmlnd_comm.c +++ b/lnet/klnds/gmlnd/gmlnd_comm.c @@ -186,7 +186,7 @@ int gmnal_rx_thread(void *arg) /* * Start processing a small message receive * Get here from gmnal_receive_thread - * Hand off to lib_parse, which calls cb_recv + * Hand off to ptl_parse, which calls cb_recv * which hands back to gmnal_small_receive * Deal with all endian stuff here. */ @@ -231,15 +231,15 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type) CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n"); if (!srxd) { CDEBUG(D_ERROR, "Failed to get receive descriptor\n"); - /* I think passing a NULL srxd to lib_parse will crash + /* I think passing a NULL srxd to ptl_parse will crash * gmnal_recv() */ LBUG(); - lib_parse(nal_data->libnal, portals_hdr, srxd); + ptl_parse(nal_data->ni, portals_hdr, srxd); return(GMNAL_STATUS_FAIL); } /* - * no need to bother portals library with this + * no need to bother portals with this */ if (gmnal_type == GMNAL_LARGE_MESSAGE_ACK) { gmnal_large_tx_ack_received(nal_data, srxd); @@ -251,14 +251,14 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type) srxd->nsiov = gmnal_msghdr->niov; srxd->gm_source_node = gmnal_msghdr->sender_node_id; - CDEBUG(D_PORTALS, "Calling lib_parse buffer is [%p]\n", + CDEBUG(D_PORTALS, "Calling ptl_parse buffer is [%p]\n", buffer+GMNAL_MSGHDR_SIZE); /* - * control passes to lib, which calls cb_recv - * cb_recv is responsible for returning the buffer + * control passes to portals, which calls nal_recv + * nal_recv is responsible for returning the buffer * for future receive */ - rc = lib_parse(nal_data->libnal, portals_hdr, srxd); + rc = ptl_parse(nal_data->ni, portals_hdr, srxd); if (rc != PTL_OK) { /* I just received garbage; take appropriate action... */ @@ -321,31 +321,31 @@ gmnal_rx_bad(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, gmnal_srxd_t *srxd) /* * Process a small message receive. * Get here from gmnal_receive_thread, gmnal_pre_receive - * lib_parse, cb_recv + * ptl_parse, cb_recv * Put data from prewired receive buffer into users buffer(s) * Hang out the receive buffer again for another receive - * Call lib_finalize + * Call ptl_finalize */ ptl_err_t -gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie) +gmnal_small_rx(ptl_ni_t *ni, void *private, ptl_msg_t *cookie) { gmnal_srxd_t *srxd = NULL; - gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data; + gmnal_data_t *nal_data = (gmnal_data_t*)ni->ni_data; if (!private) { CDEBUG(D_ERROR, "gmnal_small_rx no context\n"); - lib_finalize(libnal, private, cookie, PTL_FAIL); + ptl_finalize(ni, private, cookie, PTL_FAIL); return(PTL_FAIL); } srxd = (gmnal_srxd_t*)private; /* - * let portals library know receive is complete + * let portals know receive is complete */ - CDEBUG(D_PORTALS, "calling lib_finalize\n"); - lib_finalize(libnal, private, cookie, PTL_OK); + CDEBUG(D_PORTALS, "calling ptl_finalize\n"); + ptl_finalize(ni, private, cookie, PTL_OK); /* * return buffer so it can be used again */ @@ -366,31 +366,27 @@ gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie) * The callback function informs when the send is complete. */ ptl_err_t -gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, +gmnal_small_tx(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, gmnal_stxd_t *stxd, int size) { - gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data; + gmnal_data_t *nal_data = (gmnal_data_t*)ni->ni_data; void *buffer = NULL; gmnal_msghdr_t *msghdr = NULL; int tot_size = 0; unsigned int local_nid; gm_status_t gm_status = GM_SUCCESS; - CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] " + CDEBUG(D_TRACE, "gmnal_small_tx ni [%p] private [%p] cookie [%p] " "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] stxd [%p] " - "size [%d]\n", libnal, private, cookie, hdr, type, + "size [%d]\n", ni, private, cookie, hdr, type, global_nid, pid, stxd, size); CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n", hdr->dest_nid, hdr->src_nid); - if (!nal_data) { - CDEBUG(D_ERROR, "no nal_data\n"); - return(PTL_FAIL); - } else { - CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); - } + CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); + LASSERT(nal_data != NULL); GMNAL_GM_LOCK(nal_data); gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, @@ -452,7 +448,7 @@ gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, /* * A callback to indicate the small transmit operation is compete * Check for erros and try to deal with them. - * Call lib_finalise to inform the client application that the send + * Call ptl_finalise to inform the client application that the send * is complete and the memory can be reused. * Return the stxd when finished with it (returns a send token) */ @@ -460,9 +456,9 @@ void gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) { gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; - lib_msg_t *cookie = stxd->cookie; + ptl_msg_t *cookie = stxd->cookie; gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data; - lib_nal_t *libnal = nal_data->libnal; + ptl_ni_t *ni = nal_data->ni; unsigned gnid = 0; gm_status_t gm_status = 0; @@ -580,7 +576,7 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) * TO DO * If this is a large message init, * we're not finished with the data yet, - * so can't call lib_finalise. + * so can't call ptl_finalise. * However, we're also holding on to a * stxd here (to keep track of the source * iovec only). Should use another structure @@ -592,7 +588,7 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) return; } gmnal_return_stxd(nal_data, stxd); - lib_finalize(libnal, stxd, cookie, PTL_OK); + ptl_finalize(ni, stxd, cookie, PTL_OK); return; } @@ -645,7 +641,7 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, * this ack, deregister the memory. Only 1 send token is required here. */ int -gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, +gmnal_large_tx(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, size_t offset, int size) { @@ -661,18 +657,13 @@ gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, int niov_dup; - CDEBUG(D_TRACE, "gmnal_large_tx libnal [%p] private [%p], cookie [%p] " + CDEBUG(D_TRACE, "gmnal_large_tx ni [%p] private [%p], cookie [%p] " "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], " - "iov [%p], size [%d]\n", libnal, private, cookie, hdr, type, + "iov [%p], size [%d]\n", ni, private, cookie, hdr, type, global_nid, pid, niov, iov, size); - if (libnal) - nal_data = (gmnal_data_t*)libnal->libnal_data; - else { - CDEBUG(D_ERROR, "no libnal.\n"); - return(GMNAL_STATUS_FAIL); - } - + LASSERT (ni != NULL); + nal_data = (gmnal_data_t*)ni->ni_data; /* * Get stxd and buffer. Put local address of data in buffer, @@ -820,11 +811,11 @@ gmnal_large_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) * data from the sender. */ int -gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, +gmnal_large_rx(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, unsigned int nriov, struct iovec *riov, size_t offset, size_t mlen, size_t rlen) { - gmnal_data_t *nal_data = libnal->libnal_data; + gmnal_data_t *nal_data = ni->ni_data; gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; void *buffer = NULL; struct iovec *riov_dup; @@ -832,13 +823,13 @@ gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, gmnal_msghdr_t *msghdr = NULL; gm_status_t gm_status; - CDEBUG(D_TRACE, "gmnal_large_rx :: libnal[%p], private[%p], " + CDEBUG(D_TRACE, "gmnal_large_rx :: ni[%p], private[%p], " "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n", - libnal, private, cookie, nriov, riov, mlen, rlen); + ni, private, cookie, nriov, riov, mlen, rlen); if (!srxd) { CDEBUG(D_ERROR, "gmnal_large_rx no context\n"); - lib_finalize(libnal, private, cookie, PTL_FAIL); + ptl_finalize(ni, private, cookie, PTL_FAIL); return(PTL_FAIL); } @@ -1115,7 +1106,7 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context, gmnal_ltxd_t *ltxd = (gmnal_ltxd_t*)context; gmnal_srxd_t *srxd = ltxd->srxd; - lib_nal_t *libnal = srxd->nal_data->libnal; + ptl_ni_t *ni = srxd->nal_data->ni; int lastone; struct iovec *riov; int nriov; @@ -1149,7 +1140,7 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context, * Let our client application proceed */ CDEBUG(D_ERROR, "final callback context[%p]\n", srxd); - lib_finalize(libnal, srxd, srxd->cookie, PTL_OK); + ptl_finalize(ni, srxd, srxd->cookie, PTL_OK); /* * send an ack to the sender to let him know we got the data @@ -1265,7 +1256,7 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) /* * A callback to indicate the small transmit operation is compete * Check for errors and try to deal with them. - * Call lib_finalise to inform the client application that the + * Call ptl_finalise to inform the client application that the * send is complete and the memory can be reused. * Return the stxd when finished with it (returns a send token) */ @@ -1292,14 +1283,14 @@ gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context, * Indicates the large transmit operation is compete. * Called on transmit side (means data has been pulled by receiver * or failed). - * Call lib_finalise to inform the client application that the send + * Call ptl_finalise to inform the client application that the send * is complete, deregister the memory and return the stxd. * Finally, report the rx buffer that the ack message was delivered in. */ void gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) { - lib_nal_t *libnal = nal_data->libnal; + ptl_ni_t *ni = nal_data->ni; gmnal_stxd_t *stxd = NULL; gmnal_msghdr_t *msghdr = NULL; void *buffer = NULL; @@ -1314,7 +1305,7 @@ gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd); - lib_finalize(libnal, stxd, stxd->cookie, PTL_OK); + ptl_finalize(ni, stxd, stxd->cookie, PTL_OK); /* * extract the iovec from the stxd, deregister the memory. diff --git a/lnet/klnds/iiblnd/iiblnd.c b/lnet/klnds/iiblnd/iiblnd.c index 1ecd32d..fcbd1c1 100644 --- a/lnet/klnds/iiblnd/iiblnd.c +++ b/lnet/klnds/iiblnd/iiblnd.c @@ -23,7 +23,17 @@ #include "iibnal.h" -nal_t kibnal_api; +ptl_nal_t kibnal_nal = { + .nal_name = "iib", + .nal_type = IIBNAL, + .nal_startup = kibnal_startup, + .nal_shutdown = kibnal_shutdown, + .nal_send = kibnal_send, + .nal_send_pages = kibnal_send_pages, + .nal_recv = kibnal_recv, + .nal_recv_pages = kibnal_recv_pages, +}; + ptl_handle_ni_t kibnal_ni; kib_tunables_t kibnal_tunables; @@ -245,12 +255,12 @@ static int kibnal_set_mynid(ptl_nid_t nid) { struct timeval tv; - lib_ni_t *ni = &kibnal_lib.libnal_ni; + ptl_ni_t *ni = kibnal_data.kib_ni; int rc; FSTATUS frc; CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", - nid, ni->ni_pid.nid); + nid, ni->ni_nid); do_gettimeofday(&tv); @@ -280,7 +290,7 @@ kibnal_set_mynid(ptl_nid_t nid) kibnal_data.kib_cep = NULL; } - kibnal_data.kib_nid = ni->ni_pid.nid = nid; + kibnal_data.kib_nid = ni->ni_nid = nid; kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; /* Delete all existing peers and their connections after new @@ -1149,23 +1159,18 @@ kibnal_setup_tx_descs (void) return (0); } -static void -kibnal_api_shutdown (nal_t *nal) +void +kibnal_shutdown (ptl_ni_t *ni) { int i; int rc; - if (nal->nal_refct != 0) { - /* This module got the first ref */ - PORTAL_MODULE_UNUSE; - return; - } - + LASSERT (ni->ni_data == &kibnal_data); + LASSERT (ni == kibnal_data.kib_ni); + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); - LASSERT(nal == &kibnal_api); - switch (kibnal_data.kib_init) { default: CERROR ("Unexpected state %d\n", kibnal_data.kib_init); @@ -1244,10 +1249,6 @@ kibnal_api_shutdown (nal_t *nal) CERROR ("Close HCA error: %d\n", rc); /* fall through */ - case IBNAL_INIT_LIB: - lib_fini(&kibnal_lib); - /* fall through */ - case IBNAL_INIT_DATA: /* Module refcount only gets to zero when all peers * have been closed so all lists must be empty */ @@ -1318,12 +1319,9 @@ static __u64 max_phys_mem(IB_CA_ATTRIBUTES *ca_attr) } #undef roundup_power -static int -kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +kibnal_startup (ptl_ni_t *ni, char **interfaces) { - ptl_process_id_t process_id; int pkmem = atomic_read(&portal_kmemory); IB_PORT_ATTRIBUTES *pattr; FSTATUS frc; @@ -1331,17 +1329,16 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, int n; int i; - LASSERT (nal == &kibnal_api); + LASSERT (ni->ni_nal == &kibnal_nal); - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits; - /* This module got the first ref */ - PORTAL_MODULE_USE; - return (PTL_OK); + /* Only 1 instance supported */ + if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { + CERROR ("Only 1 instance supported\n"); + return PTL_FAIL; } - - LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING); + + ni->ni_data = &kibnal_data; + kibnal_data.kib_ni = ni; frc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2, &kibnal_data.kib_interfaces); @@ -1392,20 +1389,6 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, kibnal_data.kib_init = IBNAL_INIT_DATA; /*****************************************************/ - process_id.pid = requested_pid; - process_id.nid = kibnal_data.kib_nid; - - rc = lib_init(&kibnal_lib, nal, process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) { - CERROR("lib_init failed: error %d\n", rc); - goto failed; - } - - /* lib interface initialised */ - kibnal_data.kib_init = IBNAL_INIT_LIB; - /*****************************************************/ - for (i = 0; i < IBNAL_N_SCHED; i++) { rc = kibnal_thread_start (kibnal_scheduler, (void *)i); if (rc != 0) { @@ -1656,7 +1639,7 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, return (PTL_OK); failed: - kibnal_api_shutdown (&kibnal_api); + kibnal_shutdown (ni); return (PTL_FAIL); } @@ -1667,9 +1650,7 @@ kibnal_module_fini (void) if (kibnal_tunables.kib_sysctl != NULL) unregister_sysctl_table (kibnal_tunables.kib_sysctl); #endif - PtlNIFini(kibnal_ni); - - ptl_unregister_nal(IIBNAL); + ptl_unregister_nal(&kibnal_nal); } int __init @@ -1688,24 +1669,14 @@ kibnal_module_init (void) return -EINVAL; } - kibnal_api.nal_ni_init = kibnal_api_startup; - kibnal_api.nal_ni_fini = kibnal_api_shutdown; - /* Initialise dynamic tunables to defaults once only */ kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT; - rc = ptl_register_nal(IIBNAL, &kibnal_api); + rc = ptl_register_nal(&kibnal_nal); if (rc != PTL_OK) { CERROR("Can't register IBNAL: %d\n", rc); return (-ENOMEM); /* or something... */ } - - /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(IIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kibnal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(IIBNAL); - return (-ENODEV); - } #ifdef CONFIG_SYSCTL /* Press on regardless even if registering sysctl doesn't work */ diff --git a/lnet/klnds/iiblnd/iiblnd.h b/lnet/klnds/iiblnd/iiblnd.h index 82cdd3c..05b36fc 100644 --- a/lnet/klnds/iiblnd/iiblnd.h +++ b/lnet/klnds/iiblnd/iiblnd.h @@ -53,7 +53,6 @@ #include #include #include -#include #include @@ -170,6 +169,7 @@ typedef struct __u64 kib_incarnation; /* which one am I */ int kib_shutdown; /* shut down? */ atomic_t kib_nthreads; /* # live threads */ + ptl_ni_t *kib_ni; /* _the_ nal instance */ __u64 kib_service_id; /* service number I listen on */ __u64 kib_port_guid; /* my GUID (lo 64 of GID)*/ @@ -332,7 +332,7 @@ typedef struct kib_tx /* transmit message */ int tx_passive_rdma; /* peer sucks/blows */ int tx_passive_rdma_wait; /* waiting for peer to complete */ __u64 tx_passive_rdma_cookie; /* completion cookie */ - lib_msg_t *tx_libmsg[2]; /* lib msgs to finalize on completion */ + ptl_msg_t *tx_ptlmsg[2]; /* ptl msgs to finalize on completion */ kib_md_t tx_md; /* RDMA mapping (active/passive) */ __u64 tx_vaddr; /* pre-mapped buffer (hca vaddr) */ kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */ @@ -430,7 +430,6 @@ typedef struct kib_peer } kib_peer_t; -extern lib_nal_t kibnal_lib; extern kib_data_t kibnal_data; extern kib_tunables_t kibnal_tunables; @@ -858,6 +857,23 @@ kibnal_whole_mem(void) return kibnal_data.kib_md.md_handle != NULL; } +extern ptl_err_t kibnal_startup (ptl_ni_t *ni, char **interfaces); +extern void kibnal_shutdown (ptl_ni_t *ni); +extern ptl_err_t kibnal_send (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_offset, size_t payload_len); +extern ptl_err_t kibnal_send_pages (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_len); +extern ptl_err_t kibnal_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, + unsigned int niov, struct iovec *iov, + size_t offset, size_t mlen, size_t rlen); +extern ptl_err_t kibnal_recv_pages (ptl_ni_t *ni, void *private, ptl_msg_t *msg, + unsigned int niov, ptl_kiov_t *kiov, + size_t offset, size_t mlen, size_t rlen); + extern kib_peer_t *kibnal_create_peer (ptl_nid_t nid); extern void kibnal_destroy_peer (kib_peer_t *peer); extern int kibnal_del_peer (ptl_nid_t nid, int single_share); @@ -882,7 +898,7 @@ extern int kibnal_connd (void *arg); extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); extern void kibnal_close_conn (kib_conn_t *conn, int why); extern void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lib_msg_t *libmsg, + kib_rx_t *rx, ptl_msg_t *ptlmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, size_t offset, size_t nob); diff --git a/lnet/klnds/iiblnd/iiblnd_cb.c b/lnet/klnds/iiblnd/iiblnd_cb.c index b9ca677..2026e4c 100644 --- a/lnet/klnds/iiblnd/iiblnd_cb.c +++ b/lnet/klnds/iiblnd/iiblnd_cb.c @@ -88,12 +88,12 @@ kibnal_tx_done (kib_tx_t *tx) } for (i = 0; i < 2; i++) { - /* tx may have up to 2 libmsgs to finalise */ - if (tx->tx_libmsg[i] == NULL) + /* tx may have up to 2 ptlmsgs to finalise */ + if (tx->tx_ptlmsg[i] == NULL) continue; - lib_finalize (&kibnal_lib, NULL, tx->tx_libmsg[i], ptlrc); - tx->tx_libmsg[i] = NULL; + ptl_finalize (kibnal_data.kib_ni, NULL, tx->tx_ptlmsg[i], ptlrc); + tx->tx_ptlmsg[i] = NULL; } if (tx->tx_conn != NULL) { @@ -169,8 +169,8 @@ kibnal_get_idle_tx (int may_block) LASSERT (tx->tx_conn == NULL); LASSERT (!tx->tx_passive_rdma); LASSERT (!tx->tx_passive_rdma_wait); - LASSERT (tx->tx_libmsg[0] == NULL); - LASSERT (tx->tx_libmsg[1] == NULL); + LASSERT (tx->tx_ptlmsg[0] == NULL); + LASSERT (tx->tx_ptlmsg[1] == NULL); } spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); @@ -178,20 +178,6 @@ kibnal_get_idle_tx (int may_block) RETURN(tx); } -static int -kibnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if kibnal_get_peer (nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if ( nal->libnal_ni.ni_pid.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - static void kibnal_complete_passive_rdma(kib_conn_t *conn, __u64 cookie, int status) { @@ -555,7 +541,7 @@ kibnal_rx (kib_rx_t *rx) switch (msg->ibm_type) { case IBNAL_MSG_GET_RDMA: - lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr, rx); /* If the incoming get was matched, I'll have initiated the * RDMA and the completion message... */ if (rx->rx_rdma) @@ -570,10 +556,10 @@ kibnal_rx (kib_rx_t *rx) break; case IBNAL_MSG_PUT_RDMA: - lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr, rx); if (rx->rx_rdma) break; - /* This is most unusual, since even if lib_parse() didn't + /* This is most unusual, since even if ptl_parse() didn't * match anything, it should have asked us to read (and * discard) the payload. The portals header must be * inconsistent with this message type, so it's the @@ -584,7 +570,7 @@ kibnal_rx (kib_rx_t *rx) break; case IBNAL_MSG_IMMEDIATE: - lib_parse(&kibnal_lib, &msg->ibm_u.immediate.ibim_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, rx); LASSERT (!rx->rx_rdma); break; @@ -1257,9 +1243,9 @@ kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid) static ptl_err_t kibnal_start_passive_rdma (int type, ptl_nid_t nid, - lib_msg_t *libmsg, ptl_hdr_t *hdr) + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr) { - int nob = libmsg->md->length; + int nob = ptlmsg->msg_md->md_length; kib_tx_t *tx; kib_msg_t *ibmsg; int rc; @@ -1277,15 +1263,15 @@ kibnal_start_passive_rdma (int type, ptl_nid_t nid, tx = kibnal_get_idle_tx (1); /* May block; caller is an app thread */ LASSERT (tx != NULL); - if ((libmsg->md->options & PTL_MD_KIOV) == 0) + if ((ptlmsg->msg_md->md_options & PTL_MD_KIOV) == 0) rc = kibnal_map_iov (tx, access, - libmsg->md->md_niov, - libmsg->md->md_iov.iov, + ptlmsg->msg_md->md_niov, + ptlmsg->msg_md->md_iov.iov, 0, nob, 0); else rc = kibnal_map_kiov (tx, access, - libmsg->md->md_niov, - libmsg->md->md_iov.kiov, + ptlmsg->msg_md->md_niov, + ptlmsg->msg_md->md_iov.kiov, 0, nob, 0); if (rc != 0) { @@ -1295,9 +1281,9 @@ kibnal_start_passive_rdma (int type, ptl_nid_t nid, if (type == IBNAL_MSG_GET_RDMA) { /* reply gets finalized when tx completes */ - tx->tx_libmsg[1] = lib_create_reply_msg(&kibnal_lib, - nid, libmsg); - if (tx->tx_libmsg[1] == NULL) { + tx->tx_ptlmsg[1] = ptl_create_reply_msg(kibnal_data.kib_ni, + nid, ptlmsg); + if (tx->tx_ptlmsg[1] == NULL) { CERROR ("Can't create reply for GET -> "LPX64"\n", nid); rc = -ENOMEM; @@ -1327,8 +1313,8 @@ kibnal_start_passive_rdma (int type, ptl_nid_t nid, tx, tx->tx_passive_rdma_cookie, tx->tx_md.md_rkey, tx->tx_md.md_addr, nob); - /* libmsg gets finalized when tx completes. */ - tx->tx_libmsg[0] = libmsg; + /* ptlmsg gets finalized when tx completes. */ + tx->tx_ptlmsg[0] = ptlmsg; kibnal_launch_tx(tx, nid); return (PTL_OK); @@ -1341,7 +1327,7 @@ kibnal_start_passive_rdma (int type, ptl_nid_t nid, void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lib_msg_t *libmsg, + kib_rx_t *rx, ptl_msg_t *ptlmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, size_t offset, size_t nob) @@ -1389,7 +1375,7 @@ kibnal_start_active_rdma (int type, int status, CERROR ("tx descs exhausted on RDMA from "LPX64 " completing locally with failure\n", rx->rx_conn->ibc_peer->ibp_nid); - lib_finalize (&kibnal_lib, NULL, libmsg, PTL_NO_SPACE); + ptl_finalize (kibnal_data.kib_ni, NULL, ptlmsg, PTL_NO_SPACE); return; } LASSERT (tx->tx_nsp == 0); @@ -1479,15 +1465,15 @@ init_tx: if (status == 0 && nob != 0) { LASSERT (tx->tx_nsp > 1); - /* RDMA: libmsg gets finalized when the tx completes. This + /* RDMA: ptlmsg gets finalized when the tx completes. This * is after the completion message has been sent, which in * turn is after the RDMA has finished. */ - tx->tx_libmsg[0] = libmsg; + tx->tx_ptlmsg[0] = ptlmsg; } else { LASSERT (tx->tx_nsp == 1); /* No RDMA: local completion happens now! */ CDEBUG(D_WARNING,"No data: immediate completion\n"); - lib_finalize (&kibnal_lib, NULL, libmsg, + ptl_finalize (kibnal_data.kib_ni, NULL, ptlmsg, status == 0 ? PTL_OK : PTL_FAIL); } @@ -1502,18 +1488,18 @@ init_tx: } static ptl_err_t -kibnal_sendmsg(lib_nal_t *nal, - void *private, - lib_msg_t *libmsg, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - struct iovec *payload_iov, - ptl_kiov_t *payload_kiov, - size_t payload_offset, - size_t payload_nob) +kibnal_sendmsg(ptl_ni_t *ni, + void *private, + ptl_msg_t *ptlmsg, + ptl_hdr_t *hdr, + int type, + ptl_nid_t nid, + ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + ptl_kiov_t *payload_kiov, + size_t payload_offset, + size_t payload_nob) { kib_msg_t *ibmsg; kib_tx_t *tx; @@ -1544,7 +1530,7 @@ kibnal_sendmsg(lib_nal_t *nal, /* RDMA reply expected? */ if (rx->rx_msg->ibm_type == IBNAL_MSG_GET_RDMA) { kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0, - rx, libmsg, payload_niov, + rx, ptlmsg, payload_niov, payload_iov, payload_kiov, payload_offset, payload_nob); return (PTL_OK); @@ -1569,10 +1555,10 @@ kibnal_sendmsg(lib_nal_t *nal, case PTL_MSG_GET: /* might the REPLY message be big enough to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[libmsg->md->length]); + nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[ptlmsg->msg_md->md_length]); if (nob > IBNAL_MSG_SIZE) return (kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, - nid, libmsg, hdr)); + nid, ptlmsg, hdr)); break; case PTL_MSG_ACK: @@ -1584,7 +1570,7 @@ kibnal_sendmsg(lib_nal_t *nal, nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); if (nob > IBNAL_MSG_SIZE) return (kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA, - nid, libmsg, hdr)); + nid, ptlmsg, hdr)); break; } @@ -1603,11 +1589,11 @@ kibnal_sendmsg(lib_nal_t *nal, if (payload_nob > 0) { if (payload_kiov != NULL) - lib_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload, + ptl_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload, payload_niov, payload_kiov, payload_offset, payload_nob); else - lib_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload, + ptl_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload, payload_niov, payload_iov, payload_offset, payload_nob); } @@ -1616,41 +1602,41 @@ kibnal_sendmsg(lib_nal_t *nal, offsetof(kib_immediate_msg_t, ibim_payload[payload_nob])); - /* libmsg gets finalized when tx completes */ - tx->tx_libmsg[0] = libmsg; + /* ptlmsg gets finalized when tx completes */ + tx->tx_ptlmsg[0] = ptlmsg; kibnal_launch_tx(tx, nid); return (PTL_OK); } -static ptl_err_t -kibnal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_offset, size_t payload_len) +ptl_err_t +kibnal_send (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_offset, size_t payload_len) { - return (kibnal_sendmsg(nal, private, cookie, + return (kibnal_sendmsg(ni, private, cookie, hdr, type, nid, pid, payload_niov, payload_iov, NULL, payload_offset, payload_len)); } -static ptl_err_t -kibnal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_kiov, - size_t payload_offset, size_t payload_len) +ptl_err_t +kibnal_send_pages (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_len) { - return (kibnal_sendmsg(nal, private, cookie, + return (kibnal_sendmsg(ni, private, cookie, hdr, type, nid, pid, payload_niov, NULL, payload_kiov, payload_offset, payload_len)); } static ptl_err_t -kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, - unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, - size_t offset, size_t mlen, size_t rlen) +kibnal_recvmsg (ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg, + unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, + size_t offset, size_t mlen, size_t rlen) { kib_rx_t *rx = private; kib_msg_t *rxmsg = rx->rx_msg; @@ -1675,47 +1661,47 @@ kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, } if (kiov != NULL) - lib_copy_buf2kiov(niov, kiov, offset, + ptl_copy_buf2kiov(niov, kiov, offset, rxmsg->ibm_u.immediate.ibim_payload, mlen); else - lib_copy_buf2iov(niov, iov, offset, + ptl_copy_buf2iov(niov, iov, offset, rxmsg->ibm_u.immediate.ibim_payload, mlen); - lib_finalize (nal, NULL, libmsg, PTL_OK); + ptl_finalize (ni, NULL, ptlmsg, PTL_OK); return (PTL_OK); case IBNAL_MSG_GET_RDMA: /* We get called here just to discard any junk after the * GET hdr. */ - LASSERT (libmsg == NULL); - lib_finalize (nal, NULL, libmsg, PTL_OK); + LASSERT (ptlmsg == NULL); + ptl_finalize (ni, NULL, ptlmsg, PTL_OK); return (PTL_OK); case IBNAL_MSG_PUT_RDMA: kibnal_start_active_rdma (IBNAL_MSG_PUT_DONE, 0, - rx, libmsg, + rx, ptlmsg, niov, iov, kiov, offset, mlen); return (PTL_OK); } } -static ptl_err_t -kibnal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, struct iovec *iov, - size_t offset, size_t mlen, size_t rlen) +ptl_err_t +kibnal_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, + unsigned int niov, struct iovec *iov, + size_t offset, size_t mlen, size_t rlen) { - return (kibnal_recvmsg (nal, private, msg, niov, iov, NULL, + return (kibnal_recvmsg (ni, private, msg, niov, iov, NULL, offset, mlen, rlen)); } -static ptl_err_t -kibnal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, ptl_kiov_t *kiov, - size_t offset, size_t mlen, size_t rlen) +ptl_err_t +kibnal_recv_pages (ptl_ni_t *ni, void *private, ptl_msg_t *msg, + unsigned int niov, ptl_kiov_t *kiov, + size_t offset, size_t mlen, size_t rlen) { - return (kibnal_recvmsg (nal, private, msg, niov, NULL, kiov, + return (kibnal_recvmsg (ni, private, msg, niov, NULL, kiov, offset, mlen, rlen)); } @@ -3017,13 +3003,3 @@ kibnal_scheduler(void *arg) kibnal_thread_fini(); return (0); } - - -lib_nal_t kibnal_lib = { - libnal_data: &kibnal_data, /* NAL private data */ - libnal_send: kibnal_send, - libnal_send_pages: kibnal_send_pages, - libnal_recv: kibnal_recv, - libnal_recv_pages: kibnal_recv_pages, - libnal_dist: kibnal_dist -}; diff --git a/lnet/klnds/lolnd/lolnd.c b/lnet/klnds/lolnd/lolnd.c index 03c2742..815d9f5 100644 --- a/lnet/klnds/lolnd/lolnd.c +++ b/lnet/klnds/lolnd/lolnd.c @@ -21,122 +21,55 @@ #include "lonal.h" -nal_t klonal_api; -klonal_data_t klonal_data; -ptl_handle_ni_t klonal_ni; - - -int -klonal_cmd (struct portals_cfg *pcfg, void *private) +ptl_nal_t klonal_nal = { + .nal_name = "lo", + .nal_type = LONAL, + .nal_startup = klonal_startup, + .nal_shutdown = klonal_shutdown, + .nal_send = klonal_send, + .nal_send_pages = klonal_send_pages, + .nal_recv = klonal_recv, + .nal_recv_pages = klonal_recv_pages, +}; + +int klonal_instanced; + +void +klonal_shutdown(ptl_ni_t *ni) { - LASSERT (pcfg != NULL); - - switch (pcfg->pcfg_command) { - case NAL_CMD_REGISTER_MYNID: - CDEBUG (D_IOCTL, "setting NID to "LPX64" (was "LPX64")\n", - pcfg->pcfg_nid, klonal_lib.libnal_ni.ni_pid.nid); - klonal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid; - return (0); - - default: - return (-EINVAL); - } -} - -static void -klonal_shutdown(nal_t *nal) -{ - /* NB The first ref was this module! */ - if (nal->nal_refct != 0) - return; - CDEBUG (D_NET, "shutdown\n"); - LASSERT (nal == &klonal_api); - - switch (klonal_data.klo_init) - { - default: - LASSERT (0); - - case KLO_INIT_ALL: - libcfs_nal_cmd_unregister(LONAL); - /* fall through */ - - case KLO_INIT_LIB: - lib_fini (&klonal_lib); - break; - - case KLO_INIT_NOTHING: - return; - } - - memset(&klonal_data, 0, sizeof (klonal_data)); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory)); - - printk (KERN_INFO "Lustre: LO NAL unloaded (final mem %d)\n", - atomic_read(&portal_kmemory)); + LASSERT (ni->ni_nal == &klonal_nal); + LASSERT (klonal_instanced); + + klonal_instanced = 0; PORTAL_MODULE_UNUSE; } -static int -klonal_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +klonal_startup (ptl_ni_t *ni, char **interfaces) { - int rc; - ptl_process_id_t my_process_id; - int pkmem = atomic_read(&portal_kmemory); - - LASSERT (nal == &klonal_api); - - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = klonal_lib.libnal_ni.ni_actual_limits; - return (PTL_OK); - } - - LASSERT (klonal_data.klo_init == KLO_INIT_NOTHING); - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); - - /* ensure all pointers NULL etc */ - memset (&klonal_data, 0, sizeof (klonal_data)); + LASSERT (ni->ni_nal == &klonal_nal); - my_process_id.nid = 0; - my_process_id.pid = requested_pid; + if (klonal_instanced) { + /* Multiple instances of the loopback NI is never right */ + CERROR ("Only 1 instance supported\n"); + return PTL_FAIL; + } - rc = lib_init(&klonal_lib, nal, my_process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) { - CERROR ("lib_init failed %d\n", rc); - klonal_shutdown (nal); - return (rc); - } - - klonal_data.klo_init = KLO_INIT_LIB; - - rc = libcfs_nal_cmd_register (LONAL, &klonal_cmd, NULL); - if (rc != 0) { - CERROR ("Can't initialise command interface (rc = %d)\n", rc); - klonal_shutdown (nal); - return (PTL_FAIL); - } + CDEBUG (D_NET, "start\n"); - klonal_data.klo_init = KLO_INIT_ALL; +#warning fixme + ni->ni_nid = 0; + klonal_instanced = 1; - printk(KERN_INFO "Lustre: LO NAL (initial mem %d)\n", pkmem); PORTAL_MODULE_USE; - return (PTL_OK); } void __exit klonal_finalise (void) { - PtlNIFini(klonal_ni); - - ptl_unregister_nal(LONAL); + ptl_unregister_nal(&klonal_nal); } static int __init @@ -144,10 +77,7 @@ klonal_initialise (void) { int rc; - klonal_api.nal_ni_init = klonal_startup; - klonal_api.nal_ni_fini = klonal_shutdown; - - rc = ptl_register_nal(LONAL, &klonal_api); + rc = ptl_register_nal(&klonal_nal); if (rc != PTL_OK) { CERROR("Can't register LONAL: %d\n", rc); return (-ENOMEM); /* or something... */ diff --git a/lnet/klnds/lolnd/lolnd.h b/lnet/klnds/lolnd/lolnd.h index 6d8d77d..db5a2c5 100644 --- a/lnet/klnds/lolnd/lolnd.h +++ b/lnet/klnds/lolnd/lolnd.h @@ -38,7 +38,6 @@ #include #include #include -#include #define KLOD_IOV 153401 #define KLOD_KIOV 153402 @@ -55,18 +54,27 @@ typedef struct } klod_iov; } klo_desc_t; -typedef struct -{ - char klo_init; /* what's been initialised */ -} klonal_data_t; - -/* kqn_init state */ -#define KLO_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ -#define KLO_INIT_LIB 1 -#define KLO_INIT_ALL 2 - -extern lib_nal_t klonal_lib; -extern nal_t klonal_api; -extern klonal_data_t klonal_data; +ptl_err_t klonal_startup (ptl_ni_t *ni, char **interfaces); +void klonal_shutdown (ptl_ni_t *ni); +ptl_err_t klonal_send (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + size_t payload_offset, size_t payload_nob); +ptl_err_t klonal_send_pages (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_nob); +ptl_err_t klonal_recv(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + struct iovec *iov, size_t offset, + size_t mlen, size_t rlen); +ptl_err_t klonal_recv_pages(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + ptl_kiov_t *kiov, size_t offset, + size_t mlen, size_t rlen); #endif /* _LONAL_H */ diff --git a/lnet/klnds/lolnd/lolnd_cb.c b/lnet/klnds/lolnd/lolnd_cb.c index cf5df0d..5ff0528 100644 --- a/lnet/klnds/lolnd/lolnd_cb.c +++ b/lnet/klnds/lolnd/lolnd_cb.c @@ -21,21 +21,10 @@ #include "lonal.h" -/* - * LIB functions follow - * - */ -static int -klonal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - *dist = 0; /* it's me */ - return (0); -} - -static ptl_err_t -klonal_send (lib_nal_t *nal, +ptl_err_t +klonal_send (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -53,19 +42,17 @@ klonal_send (lib_nal_t *nal, .klod_iov = { .iov = payload_iov } }; ptl_err_t rc; - LASSERT(nid == klonal_lib.libnal_ni.ni_pid.nid); - - rc = lib_parse(&klonal_lib, hdr, &klod); + rc = ptl_parse(ni, hdr, &klod); if (rc == PTL_OK) - lib_finalize(&klonal_lib, private, libmsg, PTL_OK); + ptl_finalize(ni, private, ptlmsg, PTL_OK); return rc; } -static ptl_err_t -klonal_send_pages (lib_nal_t *nal, +ptl_err_t +klonal_send_pages (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -83,19 +70,17 @@ klonal_send_pages (lib_nal_t *nal, .klod_iov = { .kiov = payload_kiov } }; ptl_err_t rc; - LASSERT(nid == klonal_lib.libnal_ni.ni_pid.nid); - - rc = lib_parse(&klonal_lib, hdr, &klod); + rc = ptl_parse(ni, hdr, &klod); if (rc == PTL_OK) - lib_finalize(&klonal_lib, private, libmsg, PTL_OK); + ptl_finalize(ni, private, ptlmsg, PTL_OK); return rc; } -static ptl_err_t -klonal_recv(lib_nal_t *nal, +ptl_err_t +klonal_recv(ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, unsigned int niov, struct iovec *iov, size_t offset, @@ -108,7 +93,7 @@ klonal_recv(lib_nal_t *nal, LASSERT(klod->klod_type == KLOD_IOV); if (mlen == 0) - return PTL_OK; + goto out; while (offset >= iov->iov_len) { offset -= iov->iov_len; @@ -158,14 +143,15 @@ klonal_recv(lib_nal_t *nal, mlen -= fraglen; } while (mlen > 0); - lib_finalize(&klonal_lib, private, libmsg, PTL_OK); + out: + ptl_finalize(ni, private, ptlmsg, PTL_OK); return PTL_OK; } -static ptl_err_t -klonal_recv_pages(lib_nal_t *nal, +ptl_err_t +klonal_recv_pages(ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, unsigned int niov, ptl_kiov_t *kiov, size_t offset, @@ -183,7 +169,7 @@ klonal_recv_pages(lib_nal_t *nal, LASSERT(klod->klod_type == KLOD_KIOV); if (mlen == 0) - return PTL_OK; + goto out; while (offset >= kiov->kiov_len) { offset -= kiov->kiov_len; @@ -252,16 +238,7 @@ klonal_recv_pages(lib_nal_t *nal, if (srcaddr != NULL) kunmap(klod->klod_iov.kiov->kiov_page); - lib_finalize(&klonal_lib, private, libmsg, PTL_OK); + out: + ptl_finalize(ni, private, ptlmsg, PTL_OK); return PTL_OK; } - -lib_nal_t klonal_lib = -{ - libnal_data: &klonal_data, /* NAL private data */ - libnal_send: klonal_send, - libnal_send_pages: klonal_send_pages, - libnal_recv: klonal_recv, - libnal_recv_pages: klonal_recv_pages, - libnal_dist: klonal_dist -}; diff --git a/lnet/klnds/openiblnd/openiblnd.c b/lnet/klnds/openiblnd/openiblnd.c index 480c5aa..7076f4d 100644 --- a/lnet/klnds/openiblnd/openiblnd.c +++ b/lnet/klnds/openiblnd/openiblnd.c @@ -23,8 +23,17 @@ #include "openibnal.h" -nal_t kibnal_api; -ptl_handle_ni_t kibnal_ni; +ptl_nal_t kibnal_nal = { + .nal_name = "openib", + .nal_type = OPENIBNAL, + .nal_startup = kibnal_startup, + .nal_shutdown = kibnal_shutdown, + .nal_send = kibnal_send, + .nal_send_pages = kibnal_send_pages, + .nal_recv = kibnal_recv, + .nal_recv_pages = kibnal_recv_pages, +}; + kib_data_t kibnal_data; kib_tunables_t kibnal_tunables; @@ -89,7 +98,7 @@ kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid, __u64 dststamp) msg->ibm_credits = credits; /* ibm_nob */ msg->ibm_cksum = 0; - msg->ibm_srcnid = kibnal_lib.libnal_ni.ni_pid.nid; + msg->ibm_srcnid = kibnal_data.kib_ni->ni_nid; msg->ibm_srcstamp = kibnal_data.kib_incarnation; msg->ibm_dstnid = dstnid; msg->ibm_dststamp = dststamp; @@ -500,7 +509,7 @@ kibnal_make_svcqry (kib_conn_t *conn) goto out; } - if (msg->ibm_dstnid != kibnal_lib.libnal_ni.ni_pid.nid || + if (msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || msg->ibm_dststamp != kibnal_data.kib_incarnation) { CERROR("Unexpected dst NID/stamp "LPX64"/"LPX64" from " LPX64"@%u.%u.%u.%u/%d\n", @@ -581,10 +590,10 @@ kibnal_handle_svcqry (struct socket *sock) goto out; } - if (msg->ibm_dstnid != kibnal_lib.libnal_ni.ni_pid.nid) { + if (msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid) { CERROR("Unexpected dstnid "LPX64"(expected "LPX64" " "from %u.%u.%u.%u/%d\n", msg->ibm_dstnid, - kibnal_lib.libnal_ni.ni_pid.nid, + kibnal_data.kib_ni->ni_nid, HIPQUAD(peer_ip), peer_port); goto out; } @@ -922,11 +931,11 @@ kibnal_stop_ib_listener (void) int kibnal_set_mynid (ptl_nid_t nid) { - lib_ni_t *ni = &kibnal_lib.libnal_ni; + ptl_ni_t *ni = kibnal_data.kib_ni; int rc; CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", - nid, ni->ni_pid.nid); + nid, ni->ni_nid); down (&kibnal_data.kib_nid_mutex); @@ -945,7 +954,7 @@ kibnal_set_mynid (ptl_nid_t nid) if (kibnal_data.kib_listen_handle != NULL) kibnal_stop_ib_listener(); - ni->ni_pid.nid = nid; + ni->ni_nid = nid; kibnal_data.kib_incarnation++; mb(); /* Delete all existing peers and their connections after new @@ -953,7 +962,7 @@ kibnal_set_mynid (ptl_nid_t nid) * world. */ kibnal_del_peer (PTL_NID_ANY, 0); - if (ni->ni_pid.nid != PTL_NID_ANY) { + if (ni->ni_nid != PTL_NID_ANY) { /* got a new NID to install */ rc = kibnal_start_ib_listener(); if (rc != 0) { @@ -974,7 +983,7 @@ kibnal_set_mynid (ptl_nid_t nid) failed_1: kibnal_stop_ib_listener(); failed_0: - ni->ni_pid.nid = PTL_NID_ANY; + ni->ni_nid = PTL_NID_ANY; kibnal_data.kib_incarnation++; mb(); kibnal_del_peer (PTL_NID_ANY, 0); @@ -1781,21 +1790,16 @@ kibnal_setup_tx_descs (void) } void -kibnal_api_shutdown (nal_t *nal) +kibnal_shutdown (ptl_ni_t *ni) { int i; int rc; - if (nal->nal_refct != 0) { - /* This module got the first ref */ - PORTAL_MODULE_UNUSE; - return; - } - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); - LASSERT(nal == &kibnal_api); + LASSERT(ni == kibnal_data.kib_ni); + LASSERT(ni->ni_data == &kibnal_data); switch (kibnal_data.kib_init) { default: @@ -1845,10 +1849,6 @@ kibnal_api_shutdown (nal_t *nal) CERROR ("Destroy PD error: %d\n", rc); /* fall through */ - case IBNAL_INIT_LIB: - lib_fini(&kibnal_lib); - /* fall through */ - case IBNAL_INIT_DATA: /* Module refcount only gets to zero when all peers * have been closed so all lists must be empty */ @@ -1902,31 +1902,27 @@ kibnal_api_shutdown (nal_t *nal) kibnal_data.kib_init = IBNAL_INIT_NOTHING; } -int -kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +kibnal_startup (ptl_ni_t *ni, char **interfaces) { struct timeval tv; - ptl_process_id_t process_id; int pkmem = atomic_read(&portal_kmemory); int rc; int i; - LASSERT (nal == &kibnal_api); + LASSERT (ni->ni_nal == &kibnal_nal); - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits; - /* This module got the first ref */ - PORTAL_MODULE_USE; - return (PTL_OK); + /* Only 1 instance supported */ + if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { + CERROR ("Only 1 instance supported\n"); + return PTL_FAIL; } - LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING); - memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */ + kibnal_data.kib_ni = ni; + ni->ni_data = &kibnal_data; + do_gettimeofday(&tv); kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; @@ -1974,21 +1970,6 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, kibnal_data.kib_init = IBNAL_INIT_DATA; /*****************************************************/ - - process_id.pid = requested_pid; - process_id.nid = PTL_NID_ANY; /* don't know my NID yet */ - - rc = lib_init(&kibnal_lib, nal, process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) { - CERROR("lib_init failed: error %d\n", rc); - goto failed; - } - - /* lib interface initialised */ - kibnal_data.kib_init = IBNAL_INIT_LIB; - /*****************************************************/ - for (i = 0; i < IBNAL_N_SCHED; i++) { rc = kibnal_thread_start (kibnal_scheduler, (void *)((unsigned long)i)); @@ -2135,11 +2116,11 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, printk(KERN_INFO "Lustre: OpenIB NAL loaded " "(initial mem %d)\n", pkmem); - return (PTL_OK); + return PTL_OK; failed: - kibnal_api_shutdown (&kibnal_api); - return (PTL_FAIL); + kibnal_shutdown(ni); + return PTL_FAIL; } void __exit @@ -2147,9 +2128,8 @@ kibnal_module_fini (void) { if (kibnal_tunables.kib_sysctl != NULL) unregister_sysctl_table (kibnal_tunables.kib_sysctl); - PtlNIFini(kibnal_ni); - ptl_unregister_nal(OPENIBNAL); + ptl_unregister_nal(&kibnal_nal); } int __init @@ -2163,34 +2143,23 @@ kibnal_module_init (void) LASSERT (sizeof(kibnal_tunables.kib_backlog) == sizeof(int)); LASSERT (sizeof(kibnal_tunables.kib_port) == sizeof(int)); - kibnal_api.nal_ni_init = kibnal_api_startup; - kibnal_api.nal_ni_fini = kibnal_api_shutdown; - /* Initialise dynamic tunables to defaults once only */ kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT; kibnal_tunables.kib_listener_timeout = IBNAL_LISTENER_TIMEOUT; kibnal_tunables.kib_backlog = IBNAL_BACKLOG; kibnal_tunables.kib_port = IBNAL_PORT; - rc = ptl_register_nal(OPENIBNAL, &kibnal_api); + rc = ptl_register_nal(&kibnal_nal); if (rc != PTL_OK) { CERROR("Can't register IBNAL: %d\n", rc); return (-ENOMEM); /* or something... */ } - /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(OPENIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kibnal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(OPENIBNAL); - return (-ENODEV); - } - kibnal_tunables.kib_sysctl = register_sysctl_table (kibnal_top_ctl_table, 0); if (kibnal_tunables.kib_sysctl == NULL) { CERROR("Can't register sysctl table\n"); - PtlNIFini(kibnal_ni); - ptl_unregister_nal(OPENIBNAL); + ptl_unregister_nal(&kibnal_nal); return (-ENOMEM); } diff --git a/lnet/klnds/openiblnd/openiblnd.h b/lnet/klnds/openiblnd/openiblnd.h index de52aab..1b02744 100644 --- a/lnet/klnds/openiblnd/openiblnd.h +++ b/lnet/klnds/openiblnd/openiblnd.h @@ -56,7 +56,6 @@ #include #include #include -#include #include #include @@ -147,6 +146,7 @@ typedef struct __u64 kib_incarnation; /* which one am I */ int kib_shutdown; /* shut down? */ atomic_t kib_nthreads; /* # live threads */ + ptl_ni_t *kib_ni; /* _the_ openib interface */ __u64 kib_svc_id; /* service number I listen on */ tTS_IB_GID kib_svc_gid; /* device/port GID */ @@ -336,7 +336,7 @@ typedef struct kib_tx /* transmit message */ int tx_passive_rdma; /* peer sucks/blows */ int tx_passive_rdma_wait; /* waiting for peer to complete */ __u64 tx_passive_rdma_cookie; /* completion cookie */ - lib_msg_t *tx_libmsg[2]; /* lib msgs to finalize on completion */ + ptl_msg_t *tx_ptlmsg[2]; /* ptl msgs to finalize on completion */ kib_md_t tx_md; /* RDMA mapping (active/passive) */ __u64 tx_vaddr; /* pre-mapped buffer (hca vaddr) */ kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */ @@ -407,7 +407,6 @@ typedef struct kib_peer unsigned long ibp_reconnect_interval; /* exponential backoff */ } kib_peer_t; -extern lib_nal_t kibnal_lib; extern kib_data_t kibnal_data; extern kib_tunables_t kibnal_tunables; @@ -507,6 +506,29 @@ kibnal_wreqid_is_rx (__u64 wreqid) # define sk_sleep sleep #endif +ptl_err_t kibnal_startup (ptl_ni_t *ni, char **interfaces); +void kibnal_shutdown (ptl_ni_t *ni); +ptl_err_t kibnal_send (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + size_t payload_offset, size_t payload_nob); +ptl_err_t kibnal_send_pages (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_nob); +ptl_err_t kibnal_recv(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + struct iovec *iov, size_t offset, + size_t mlen, size_t rlen); +ptl_err_t kibnal_recv_pages(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + ptl_kiov_t *kiov, size_t offset, + size_t mlen, size_t rlen); + extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob); extern void kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid, __u64 dststamp); @@ -552,7 +574,7 @@ extern void kibnal_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg) extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); extern int kibnal_close_conn (kib_conn_t *conn, int why); extern void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lib_msg_t *libmsg, + kib_rx_t *rx, ptl_msg_t *ptlmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, int offset, int nob); diff --git a/lnet/klnds/openiblnd/openiblnd_cb.c b/lnet/klnds/openiblnd/openiblnd_cb.c index dee5bd9..6d2492b 100644 --- a/lnet/klnds/openiblnd/openiblnd_cb.c +++ b/lnet/klnds/openiblnd/openiblnd_cb.c @@ -88,12 +88,12 @@ kibnal_tx_done (kib_tx_t *tx) } for (i = 0; i < 2; i++) { - /* tx may have up to 2 libmsgs to finalise */ - if (tx->tx_libmsg[i] == NULL) + /* tx may have up to 2 ptlmsgs to finalise */ + if (tx->tx_ptlmsg[i] == NULL) continue; - lib_finalize (&kibnal_lib, NULL, tx->tx_libmsg[i], ptlrc); - tx->tx_libmsg[i] = NULL; + ptl_finalize (kibnal_data.kib_ni, NULL, tx->tx_ptlmsg[i], ptlrc); + tx->tx_ptlmsg[i] = NULL; } if (tx->tx_conn != NULL) { @@ -168,8 +168,8 @@ kibnal_get_idle_tx (int may_block) LASSERT (tx->tx_conn == NULL); LASSERT (!tx->tx_passive_rdma); LASSERT (!tx->tx_passive_rdma_wait); - LASSERT (tx->tx_libmsg[0] == NULL); - LASSERT (tx->tx_libmsg[1] == NULL); + LASSERT (tx->tx_ptlmsg[0] == NULL); + LASSERT (tx->tx_ptlmsg[1] == NULL); } spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); @@ -177,20 +177,6 @@ kibnal_get_idle_tx (int may_block) return (tx); } -int -kibnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if kibnal_get_peer (nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if ( nal->libnal_ni.ni_pid.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - void kibnal_complete_passive_rdma(kib_conn_t *conn, __u64 cookie, int status) { @@ -330,7 +316,7 @@ kibnal_rx_callback (struct ib_cq_entry *e) if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dstnid != kibnal_lib.libnal_ni.ni_pid.nid || + msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || msg->ibm_dststamp != kibnal_data.kib_incarnation) { CERROR ("Stale rx from "LPX64"\n", conn->ibc_peer->ibp_nid); @@ -409,7 +395,7 @@ kibnal_rx (kib_rx_t *rx) switch (msg->ibm_type) { case IBNAL_MSG_GET_RDMA: - lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr, rx); /* If the incoming get was matched, I'll have initiated the * RDMA and the completion message... */ if (rx->rx_rdma) @@ -424,10 +410,10 @@ kibnal_rx (kib_rx_t *rx) break; case IBNAL_MSG_PUT_RDMA: - lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr, rx); if (rx->rx_rdma) break; - /* This is most unusual, since even if lib_parse() didn't + /* This is most unusual, since even if ptl_parse() didn't * match anything, it should have asked us to read (and * discard) the payload. The portals header must be * inconsistent with this message type, so it's the @@ -438,7 +424,7 @@ kibnal_rx (kib_rx_t *rx) break; case IBNAL_MSG_IMMEDIATE: - lib_parse(&kibnal_lib, &msg->ibm_u.immediate.ibim_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, rx); LASSERT (!rx->rx_rdma); break; @@ -1013,9 +999,9 @@ kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid) ptl_err_t kibnal_start_passive_rdma (int type, ptl_nid_t nid, - lib_msg_t *libmsg, ptl_hdr_t *hdr) + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr) { - int nob = libmsg->md->length; + int nob = ptlmsg->msg_md->md_length; kib_tx_t *tx; kib_msg_t *ibmsg; int rc; @@ -1036,15 +1022,15 @@ kibnal_start_passive_rdma (int type, ptl_nid_t nid, tx = kibnal_get_idle_tx (1); /* May block; caller is an app thread */ LASSERT (tx != NULL); - if ((libmsg->md->options & PTL_MD_KIOV) == 0) + if ((ptlmsg->msg_md->md_options & PTL_MD_KIOV) == 0) rc = kibnal_map_iov (tx, access, - libmsg->md->md_niov, - libmsg->md->md_iov.iov, + ptlmsg->msg_md->md_niov, + ptlmsg->msg_md->md_iov.iov, 0, nob); else rc = kibnal_map_kiov (tx, access, - libmsg->md->md_niov, - libmsg->md->md_iov.kiov, + ptlmsg->msg_md->md_niov, + ptlmsg->msg_md->md_iov.kiov, 0, nob); if (rc != 0) { @@ -1054,9 +1040,9 @@ kibnal_start_passive_rdma (int type, ptl_nid_t nid, if (type == IBNAL_MSG_GET_RDMA) { /* reply gets finalized when tx completes */ - tx->tx_libmsg[1] = lib_create_reply_msg(&kibnal_lib, - nid, libmsg); - if (tx->tx_libmsg[1] == NULL) { + tx->tx_ptlmsg[1] = ptl_create_reply_msg(kibnal_data.kib_ni, + nid, ptlmsg); + if (tx->tx_ptlmsg[1] == NULL) { CERROR ("Can't create reply for GET -> "LPX64"\n", nid); rc = -ENOMEM; @@ -1081,8 +1067,8 @@ kibnal_start_passive_rdma (int type, ptl_nid_t nid, tx, tx->tx_passive_rdma_cookie, tx->tx_md.md_rkey, tx->tx_md.md_addr, nob); - /* libmsg gets finalized when tx completes. */ - tx->tx_libmsg[0] = libmsg; + /* ptlmsg gets finalized when tx completes. */ + tx->tx_ptlmsg[0] = ptlmsg; kibnal_launch_tx(tx, nid); return (PTL_OK); @@ -1095,7 +1081,7 @@ kibnal_start_passive_rdma (int type, ptl_nid_t nid, void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lib_msg_t *libmsg, + kib_rx_t *rx, ptl_msg_t *ptlmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, int offset, int nob) @@ -1143,7 +1129,7 @@ kibnal_start_active_rdma (int type, int status, CERROR ("tx descs exhausted on RDMA from "LPX64 " completing locally with failure\n", rx->rx_conn->ibc_peer->ibp_nid); - lib_finalize (&kibnal_lib, NULL, libmsg, PTL_NO_SPACE); + ptl_finalize (kibnal_data.kib_ni, NULL, ptlmsg, PTL_NO_SPACE); return; } LASSERT (tx->tx_nsp == 0); @@ -1201,15 +1187,15 @@ kibnal_start_active_rdma (int type, int status, if (status == 0 && nob != 0) { LASSERT (tx->tx_nsp > 1); - /* RDMA: libmsg gets finalized when the tx completes. This + /* RDMA: ptlmsg gets finalized when the tx completes. This * is after the completion message has been sent, which in * turn is after the RDMA has finished. */ - tx->tx_libmsg[0] = libmsg; + tx->tx_ptlmsg[0] = ptlmsg; } else { LASSERT (tx->tx_nsp == 1); /* No RDMA: local completion happens now! */ CDEBUG(D_NET, "No data: immediate completion\n"); - lib_finalize (&kibnal_lib, NULL, libmsg, + ptl_finalize (kibnal_data.kib_ni, NULL, ptlmsg, status == 0 ? PTL_OK : PTL_FAIL); } @@ -1224,18 +1210,18 @@ kibnal_start_active_rdma (int type, int status, } ptl_err_t -kibnal_sendmsg(lib_nal_t *nal, - void *private, - lib_msg_t *libmsg, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - struct iovec *payload_iov, - ptl_kiov_t *payload_kiov, - int payload_offset, - int payload_nob) +kibnal_sendmsg(ptl_ni_t *ni, + void *private, + ptl_msg_t *ptlmsg, + ptl_hdr_t *hdr, + int type, + ptl_nid_t nid, + ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + ptl_kiov_t *payload_kiov, + int payload_offset, + int payload_nob) { kib_msg_t *ibmsg; kib_tx_t *tx; @@ -1266,7 +1252,7 @@ kibnal_sendmsg(lib_nal_t *nal, /* RDMA reply expected? */ if (rx->rx_msg->ibm_type == IBNAL_MSG_GET_RDMA) { kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0, - rx, libmsg, payload_niov, + rx, ptlmsg, payload_niov, payload_iov, payload_kiov, payload_offset, payload_nob); return (PTL_OK); @@ -1291,10 +1277,10 @@ kibnal_sendmsg(lib_nal_t *nal, case PTL_MSG_GET: /* might the REPLY message be big enough to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[libmsg->md->length]); + nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[ptlmsg->msg_md->md_length]); if (nob > IBNAL_MSG_SIZE) return (kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, - nid, libmsg, hdr)); + nid, ptlmsg, hdr)); break; case PTL_MSG_ACK: @@ -1306,7 +1292,7 @@ kibnal_sendmsg(lib_nal_t *nal, nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); if (nob > IBNAL_MSG_SIZE) return (kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA, - nid, libmsg, hdr)); + nid, ptlmsg, hdr)); break; } @@ -1325,11 +1311,11 @@ kibnal_sendmsg(lib_nal_t *nal, if (payload_nob > 0) { if (payload_kiov != NULL) - lib_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload, + ptl_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload, payload_niov, payload_kiov, payload_offset, payload_nob); else - lib_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload, + ptl_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload, payload_niov, payload_iov, payload_offset, payload_nob); } @@ -1338,41 +1324,41 @@ kibnal_sendmsg(lib_nal_t *nal, offsetof(kib_immediate_msg_t, ibim_payload[payload_nob])); - /* libmsg gets finalized when tx completes */ - tx->tx_libmsg[0] = libmsg; + /* ptlmsg gets finalized when tx completes */ + tx->tx_ptlmsg[0] = ptlmsg; kibnal_launch_tx(tx, nid); return (PTL_OK); } ptl_err_t -kibnal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_offset, size_t payload_len) +kibnal_send (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_offset, size_t payload_len) { - return (kibnal_sendmsg(nal, private, cookie, + return (kibnal_sendmsg(ni, private, cookie, hdr, type, nid, pid, payload_niov, payload_iov, NULL, payload_offset, payload_len)); } ptl_err_t -kibnal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_kiov, - size_t payload_offset, size_t payload_len) +kibnal_send_pages (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_len) { - return (kibnal_sendmsg(nal, private, cookie, + return (kibnal_sendmsg(ni, private, cookie, hdr, type, nid, pid, payload_niov, NULL, payload_kiov, payload_offset, payload_len)); } ptl_err_t -kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, - unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, - int offset, int mlen, int rlen) +kibnal_recvmsg (ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg, + unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, + int offset, int mlen, int rlen) { kib_rx_t *rx = private; kib_msg_t *rxmsg = rx->rx_msg; @@ -1397,47 +1383,47 @@ kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, } if (kiov != NULL) - lib_copy_buf2kiov(niov, kiov, offset, + ptl_copy_buf2kiov(niov, kiov, offset, rxmsg->ibm_u.immediate.ibim_payload, mlen); else - lib_copy_buf2iov(niov, iov, offset, + ptl_copy_buf2iov(niov, iov, offset, rxmsg->ibm_u.immediate.ibim_payload, mlen); - lib_finalize (nal, NULL, libmsg, PTL_OK); + ptl_finalize (ni, NULL, ptlmsg, PTL_OK); return (PTL_OK); case IBNAL_MSG_GET_RDMA: /* We get called here just to discard any junk after the * GET hdr. */ - LASSERT (libmsg == NULL); - lib_finalize (nal, NULL, libmsg, PTL_OK); + LASSERT (ptlmsg == NULL); + ptl_finalize (ni, NULL, ptlmsg, PTL_OK); return (PTL_OK); case IBNAL_MSG_PUT_RDMA: kibnal_start_active_rdma (IBNAL_MSG_PUT_DONE, 0, - rx, libmsg, + rx, ptlmsg, niov, iov, kiov, offset, mlen); return (PTL_OK); } } ptl_err_t -kibnal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, struct iovec *iov, - size_t offset, size_t mlen, size_t rlen) +kibnal_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, + unsigned int niov, struct iovec *iov, + size_t offset, size_t mlen, size_t rlen) { - return (kibnal_recvmsg (nal, private, msg, niov, iov, NULL, + return (kibnal_recvmsg (ni, private, msg, niov, iov, NULL, offset, mlen, rlen)); } ptl_err_t -kibnal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, ptl_kiov_t *kiov, - size_t offset, size_t mlen, size_t rlen) +kibnal_recv_pages (ptl_ni_t *ni, void *private, ptl_msg_t *msg, + unsigned int niov, ptl_kiov_t *kiov, + size_t offset, size_t mlen, size_t rlen) { - return (kibnal_recvmsg (nal, private, msg, niov, NULL, kiov, + return (kibnal_recvmsg (ni, private, msg, niov, NULL, kiov, offset, mlen, rlen)); } @@ -1758,7 +1744,7 @@ kibnal_accept (kib_conn_t **connp, tTS_IB_CM_COMM_ID cid, * NB If my incarnation changes after this, the peer will get nuked and * we'll spot that when the connection is finally added into the peer's * connlist */ - if (msg->ibm_dstnid != kibnal_lib.libnal_ni.ni_pid.nid || + if (msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || msg->ibm_dststamp != kibnal_data.kib_incarnation) { write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); @@ -1978,9 +1964,9 @@ kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_CALLBACK_RETURN kibnal_active_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) + tTS_IB_CM_COMM_ID cid, + void *param, + void *arg) { kib_conn_t *conn = arg; unsigned long flags; @@ -2011,7 +1997,7 @@ kibnal_active_conn_callback (tTS_IB_CM_EVENT event, if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dstnid != kibnal_lib.libnal_ni.ni_pid.nid || + msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || msg->ibm_dststamp != kibnal_data.kib_incarnation) { CERROR("Stale conn ack from "LPX64"\n", conn->ibc_peer->ibp_nid); @@ -2543,13 +2529,3 @@ kibnal_scheduler(void *arg) kibnal_thread_fini(); return (0); } - - -lib_nal_t kibnal_lib = { - libnal_data: &kibnal_data, /* NAL private data */ - libnal_send: kibnal_send, - libnal_send_pages: kibnal_send_pages, - libnal_recv: kibnal_recv, - libnal_recv_pages: kibnal_recv_pages, - libnal_dist: kibnal_dist -}; diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index be01f5d..9560950 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -21,9 +21,20 @@ #include "qswnal.h" -nal_t kqswnal_api; + +ptl_nal_t kqswnal_nal = +{ + .nal_name = "elan", + .nal_type = QSWNAL, + .nal_startup = kqswnal_startup, + .nal_shutdown = kqswnal_shutdown, + .nal_send = kqswnal_send, + .nal_send_pages = kqswnal_send_pages, + .nal_recv = kqswnal_recv, + .nal_recv_pages = kqswnal_recv_pages, +}; + kqswnal_data_t kqswnal_data; -ptl_handle_ni_t kqswnal_ni; kqswnal_tunables_t kqswnal_tunables; kpr_nal_interface_t kqswnal_router_interface = { @@ -107,7 +118,7 @@ kqswnal_cmd (struct portals_cfg *pcfg, void *private) kqswnal_data.kqn_nid_offset); kqswnal_data.kqn_nid_offset = pcfg->pcfg_nid - kqswnal_data.kqn_elanid; - kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid; + kqswnal_data.kqn_ni->ni_nid = pcfg->pcfg_nid; return (0); default: @@ -115,22 +126,16 @@ kqswnal_cmd (struct portals_cfg *pcfg, void *private) } } -static void -kqswnal_shutdown(nal_t *nal) +void +kqswnal_shutdown(ptl_ni_t *ni) { unsigned long flags; kqswnal_tx_t *ktx; kqswnal_rx_t *krx; - int do_lib_fini = 0; - - /* NB The first ref was this module! */ - if (nal->nal_refct != 0) { - PORTAL_MODULE_UNUSE; - return; - } - + CDEBUG (D_NET, "shutdown\n"); - LASSERT (nal == &kqswnal_api); + LASSERT (ni->ni_data == &kqswnal_data); + LASSERT (ni == kqswnal_data.kqn_ni); switch (kqswnal_data.kqn_init) { @@ -141,10 +146,6 @@ kqswnal_shutdown(nal_t *nal) libcfs_nal_cmd_unregister(QSWNAL); /* fall through */ - case KQN_INIT_LIB: - do_lib_fini = 1; - /* fall through */ - case KQN_INIT_DATA: break; @@ -188,7 +189,7 @@ kqswnal_shutdown(nal_t *nal) /* NB ep_free_rcvr() returns only after we've freed off all receive * buffers (see shutdown handling in kqswnal_requeue_rx()). This * means we must have completed any messages we passed to - * lib_parse() or kpr_fwd_start(). */ + * ptl_parse() or kpr_fwd_start(). */ if (kqswnal_data.kqn_eptx != NULL) ep_free_xmtr (kqswnal_data.kqn_eptx); @@ -251,13 +252,10 @@ kqswnal_shutdown(nal_t *nal) } /**********************************************************************/ - /* finalise router and portals lib */ + /* finalise router */ kpr_deregister (&kqswnal_data.kqn_router); - if (do_lib_fini) - lib_fini (&kqswnal_lib); - /**********************************************************************/ /* Unmap message buffers and free all descriptors and buffers */ @@ -349,10 +347,8 @@ kqswnal_shutdown(nal_t *nal) atomic_read(&portal_kmemory)); } -static int -kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +kqswnal_startup (ptl_ni_t *ni, char **interfaces) { #if MULTIRAIL_EKC EP_RAILMASK all_rails = EP_RAILMASK_ALL; @@ -364,26 +360,22 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, kqswnal_rx_t *krx; kqswnal_tx_t *ktx; int elan_page_idx; - ptl_process_id_t my_process_id; int pkmem = atomic_read(&portal_kmemory); - LASSERT (nal == &kqswnal_api); - - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits; - /* This module got the first ref */ - PORTAL_MODULE_USE; - return (PTL_OK); - } - - LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING); + /* Only 1 instance supported */ + if (kqswnal_data.kqn_init != KQN_INIT_NOTHING) { + CERROR ("Only 1 instance supported\n"); + return PTL_FAIL; + } CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); /* ensure all pointers NULL etc */ memset (&kqswnal_data, 0, sizeof (kqswnal_data)); + kqswnal_data.kqn_ni = ni; + ni->ni_data = &kqswnal_data; + INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds); INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds); @@ -412,13 +404,13 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, kqswnal_data.kqn_ep = ep_system(); if (kqswnal_data.kqn_ep == NULL) { CERROR("Can't initialise EKC\n"); - kqswnal_shutdown(nal); + kqswnal_shutdown(ni); return (PTL_IFACE_INVALID); } if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) { CERROR("Can't get elan ID\n"); - kqswnal_shutdown(nal); + kqswnal_shutdown(ni); return (PTL_IFACE_INVALID); } #else @@ -429,7 +421,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, if (kqswnal_data.kqn_ep == NULL) { CERROR ("Can't get elan device 0\n"); - kqswnal_shutdown(nal); + kqswnal_shutdown(ni); return (PTL_IFACE_INVALID); } #endif @@ -445,7 +437,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, if (kqswnal_data.kqn_eptx == NULL) { CERROR ("Can't allocate transmitter\n"); - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_NO_SPACE); } @@ -458,7 +450,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, if (kqswnal_data.kqn_eprx_small == NULL) { CERROR ("Can't install small msg receiver\n"); - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_NO_SPACE); } @@ -468,7 +460,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, if (kqswnal_data.kqn_eprx_large == NULL) { CERROR ("Can't install large msg receiver\n"); - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_NO_SPACE); } @@ -483,7 +475,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, EP_PERM_WRITE); if (kqswnal_data.kqn_ep_tx_nmh == NULL) { CERROR("Can't reserve tx dma space\n"); - kqswnal_shutdown(nal); + kqswnal_shutdown(ni); return (PTL_NO_SPACE); } #else @@ -498,7 +490,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, if (rc != DDI_SUCCESS) { CERROR ("Can't reserve rx dma space\n"); - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_NO_SPACE); } #endif @@ -512,7 +504,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, EP_PERM_WRITE); if (kqswnal_data.kqn_ep_tx_nmh == NULL) { CERROR("Can't reserve rx dma space\n"); - kqswnal_shutdown(nal); + kqswnal_shutdown(ni); return (PTL_NO_SPACE); } #else @@ -528,7 +520,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, if (rc != DDI_SUCCESS) { CERROR ("Can't reserve rx dma space\n"); - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_NO_SPACE); } #endif @@ -543,7 +535,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, PORTAL_ALLOC (ktx, sizeof(*ktx)); if (ktx == NULL) { - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_NO_SPACE); } @@ -554,7 +546,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); if (ktx->ktx_buffer == NULL) { - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_NO_SPACE); } @@ -602,7 +594,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, PORTAL_ALLOC(krx, sizeof(*krx)); if (krx == NULL) { - kqswnal_shutdown(nal); + kqswnal_shutdown(ni); return (PTL_NO_SPACE); } @@ -627,7 +619,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, struct page *page = alloc_page(GFP_KERNEL); if (page == NULL) { - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_NO_SPACE); } @@ -670,23 +662,6 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE)); /**********************************************************************/ - /* Network interface ready to initialise */ - - my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); - my_process_id.pid = requested_pid; - - rc = lib_init(&kqswnal_lib, nal, my_process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) - { - CERROR ("lib_init failed %d\n", rc); - kqswnal_shutdown (nal); - return (rc); - } - - kqswnal_data.kqn_init = KQN_INIT_LIB; - - /**********************************************************************/ /* Queue receives, now that it's OK to run their completion callbacks */ for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) { @@ -703,7 +678,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, if (rc != EP_SUCCESS) { CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_FAIL); } } @@ -715,7 +690,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, if (rc != 0) { CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_FAIL); } } @@ -728,7 +703,7 @@ kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - kqswnal_shutdown (nal); + kqswnal_shutdown (ni); return (PTL_FAIL); } @@ -750,9 +725,7 @@ kqswnal_finalise (void) if (kqswnal_tunables.kqn_sysctl != NULL) unregister_sysctl_table (kqswnal_tunables.kqn_sysctl); #endif - PtlNIFini(kqswnal_ni); - - ptl_unregister_nal(QSWNAL); + ptl_unregister_nal(&kqswnal_nal); } static int __init @@ -760,28 +733,16 @@ kqswnal_initialise (void) { int rc; - kqswnal_api.nal_ni_init = kqswnal_startup; - kqswnal_api.nal_ni_fini = kqswnal_shutdown; - /* Initialise dynamic tunables to defaults once only */ kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS; kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; - rc = ptl_register_nal(QSWNAL, &kqswnal_api); + rc = ptl_register_nal(&kqswnal_nal); if (rc != PTL_OK) { CERROR("Can't register QSWNAL: %d\n", rc); return (-ENOMEM); /* or something... */ } - /* Pure gateways, and the workaround for 'EKC blocks forever until - * the service is active' want the NAL started up at module load - * time... */ - rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(QSWNAL); - return (-ENODEV); - } - #if CONFIG_SYSCTL /* Press on regardless even if registering sysctl doesn't work */ kqswnal_tunables.kqn_sysctl = diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index c138be4..ca226ea 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -78,7 +78,6 @@ #include #include #include -#include #define KQSW_CHECKSUM 0 #if KQSW_CHECKSUM @@ -223,6 +222,7 @@ typedef struct char kqn_init; /* what's been initialised */ char kqn_shuttingdown; /* I'm trying to shut down */ atomic_t kqn_nthreads; /* # threads running */ + ptl_ni_t *kqn_ni; /* _the_ instance of me */ kqswnal_rx_t *kqn_rxds; /* stack of all the receive descriptors */ kqswnal_tx_t *kqn_txds; /* stack of all the transmit descriptors */ @@ -267,11 +267,8 @@ typedef struct /* kqn_init state */ #define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ #define KQN_INIT_DATA 1 -#define KQN_INIT_LIB 2 -#define KQN_INIT_ALL 3 +#define KQN_INIT_ALL 2 -extern lib_nal_t kqswnal_lib; -extern nal_t kqswnal_api; extern kqswnal_tunables_t kqswnal_tunables; extern kqswnal_data_t kqswnal_data; @@ -373,4 +370,27 @@ ep_free_rcvr(EP_RCVR *r) } #endif +ptl_err_t kqswnal_startup (ptl_ni_t *ni, char **interfaces); +void kqswnal_shutdown (ptl_ni_t *ni); +ptl_err_t kqswnal_send (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + size_t payload_offset, size_t payload_nob); +ptl_err_t kqswnal_send_pages (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_nob); +ptl_err_t kqswnal_recv(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + struct iovec *iov, size_t offset, + size_t mlen, size_t rlen); +ptl_err_t kqswnal_recv_pages(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + ptl_kiov_t *kiov, size_t offset, + size_t mlen, size_t rlen); + #endif /* _QSWNAL_H */ diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 94c671d..a000c5f 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -23,22 +23,6 @@ #include "qswnal.h" -/* - * LIB functions follow - * - */ -static int -kqswnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - if (nid == nal->libnal_ni.ni_pid.nid) - *dist = 0; /* it's me */ - else if (kqswnal_nid2elanid (nid) >= 0) - *dist = 1; /* it's my peer */ - else - *dist = 2; /* via router */ - return (0); -} - void kqswnal_notify_peer_down(kqswnal_tx_t *ktx) { @@ -447,8 +431,8 @@ kqswnal_tx_done (kqswnal_tx_t *ktx, int error) case KTX_RDMAING: /* optimized GET/PUT handled */ case KTX_PUTTING: /* optimized PUT sent */ case KTX_SENDING: /* normal send */ - lib_finalize (&kqswnal_lib, NULL, - (lib_msg_t *)ktx->ktx_args[1], + ptl_finalize (kqswnal_data.kqn_ni, NULL, + (ptl_msg_t *)ktx->ktx_args[1], (error == 0) ? PTL_OK : PTL_FAIL); break; @@ -456,10 +440,10 @@ kqswnal_tx_done (kqswnal_tx_t *ktx, int error) /* Complete the GET with success since we can't avoid * delivering a REPLY event; we committed to it when we * launched the GET */ - lib_finalize (&kqswnal_lib, NULL, - (lib_msg_t *)ktx->ktx_args[1], PTL_OK); - lib_finalize (&kqswnal_lib, NULL, - (lib_msg_t *)ktx->ktx_args[2], + ptl_finalize (kqswnal_data.kqn_ni, NULL, + (ptl_msg_t *)ktx->ktx_args[1], PTL_OK); + ptl_finalize (kqswnal_data.kqn_ni, NULL, + (ptl_msg_t *)ktx->ktx_args[2], (error == 0) ? PTL_OK : PTL_FAIL); break; @@ -751,7 +735,7 @@ kqswnal_parse_rmd (kqswnal_rx_t *krx, int type, ptl_nid_t expected_nid) kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE); ptl_nid_t nid = kqswnal_rx_nid(krx); - /* Note (1) lib_parse has already flipped hdr. + /* Note (1) ptl_parse has already flipped hdr. * (2) RDMA addresses are sent in native endian-ness. When * EKC copes with different endian nodes, I'll fix this (and * eat my hat :) */ @@ -807,7 +791,7 @@ kqswnal_rdma_store_complete (EP_RXD *rxd) krx->krx_rpc_reply_needed = 0; kqswnal_rx_decref (krx); - /* free ktx & finalize() its lib_msg_t */ + /* free ktx & finalize() its ptl_msg_t */ kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED); } @@ -837,7 +821,7 @@ kqswnal_rdma_fetch_complete (EP_RXD *rxd) status = -ECONNABORTED; } - /* free ktx & finalize() its lib_msg_t */ + /* free ktx & finalize() its ptl_msg_t */ kqswnal_tx_done(ktx, status); if (!in_interrupt()) { @@ -859,7 +843,7 @@ kqswnal_rdma_fetch_complete (EP_RXD *rxd) } int -kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type, +kqswnal_rdma (kqswnal_rx_t *krx, ptl_msg_t *ptlmsg, int type, int niov, struct iovec *iov, ptl_kiov_t *kiov, size_t offset, size_t len) { @@ -879,13 +863,13 @@ kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type, LASSERT (krx->krx_rpc_reply_needed); LASSERT (krx->krx_rpc_reply_status != 0); - rmd = kqswnal_parse_rmd(krx, type, libmsg->ev.initiator.nid); + rmd = kqswnal_parse_rmd(krx, type, ptlmsg->msg_ev.initiator.nid); if (rmd == NULL) return (-EPROTO); if (len == 0) { /* data got truncated to nothing. */ - lib_finalize(&kqswnal_lib, krx, libmsg, PTL_OK); + ptl_finalize(kqswnal_data.kqn_ni, krx, ptlmsg, PTL_OK); /* Let kqswnal_rx_done() complete the RPC with success */ krx->krx_rpc_reply_status = 0; return (0); @@ -896,14 +880,14 @@ kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type, ktx = kqswnal_get_idle_tx(NULL, 0); if (ktx == NULL) { CERROR ("Can't get txd for RDMA with "LPX64"\n", - libmsg->ev.initiator.nid); + ptlmsg->msg_ev.initiator.nid); return (-ENOMEM); } ktx->ktx_state = KTX_RDMAING; - ktx->ktx_nid = libmsg->ev.initiator.nid; + ktx->ktx_nid = ptlmsg->msg_ev.initiator.nid; ktx->ktx_args[0] = krx; - ktx->ktx_args[1] = libmsg; + ktx->ktx_args[1] = ptlmsg; #if MULTIRAIL_EKC /* Map on the rail the RPC prefers */ @@ -1017,9 +1001,9 @@ kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type, } static ptl_err_t -kqswnal_sendmsg (lib_nal_t *nal, +kqswnal_sendmsg (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -1062,7 +1046,7 @@ kqswnal_sendmsg (lib_nal_t *nal, if (type == PTL_MSG_REPLY && /* can I look in 'private' */ ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */ /* Must be a REPLY for an optimized GET */ - rc = kqswnal_rdma ((kqswnal_rx_t *)private, libmsg, PTL_MSG_GET, + rc = kqswnal_rdma ((kqswnal_rx_t *)private, ptlmsg, PTL_MSG_GET, payload_niov, payload_iov, payload_kiov, payload_offset, payload_nob); return ((rc == 0) ? PTL_OK : PTL_FAIL); @@ -1091,14 +1075,14 @@ kqswnal_sendmsg (lib_nal_t *nal, in_interrupt())); if (ktx == NULL) { CERROR ("Can't get txd for msg type %d for "LPX64"\n", - type, libmsg->ev.initiator.nid); + type, ptlmsg->msg_ev.initiator.nid); return (PTL_NO_SPACE); } ktx->ktx_state = KTX_SENDING; ktx->ktx_nid = targetnid; ktx->ktx_args[0] = private; - ktx->ktx_args[1] = libmsg; + ktx->ktx_args[1] = ptlmsg; ktx->ktx_args[2] = NULL; /* set when a GET commits to REPLY */ memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ @@ -1152,7 +1136,7 @@ kqswnal_sendmsg (lib_nal_t *nal, (type == PTL_MSG_PUT && /* optimize PUT? */ kqswnal_tunables.kqn_optimized_puts != 0 && payload_nob >= kqswnal_tunables.kqn_optimized_puts))) { - lib_md_t *md = libmsg->md; + ptl_libmd_t *md = ptlmsg->msg_md; kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(ktx->ktx_buffer + KQSW_HDR_SIZE); /* Optimised path: I send over the Elan vaddrs of the local @@ -1167,11 +1151,11 @@ kqswnal_sendmsg (lib_nal_t *nal, ktx->ktx_state = (type == PTL_MSG_PUT) ? KTX_PUTTING : KTX_GETTING; - if ((libmsg->md->options & PTL_MD_KIOV) != 0) - rc = kqswnal_map_tx_kiov (ktx, 0, md->length, + if ((ptlmsg->msg_md->md_options & PTL_MD_KIOV) != 0) + rc = kqswnal_map_tx_kiov (ktx, 0, md->md_length, md->md_niov, md->md_iov.kiov); else - rc = kqswnal_map_tx_iov (ktx, 0, md->length, + rc = kqswnal_map_tx_iov (ktx, 0, md->md_length, md->md_niov, md->md_iov.iov); if (rc != 0) goto out; @@ -1197,8 +1181,8 @@ kqswnal_sendmsg (lib_nal_t *nal, #endif if (type == PTL_MSG_GET) { /* Allocate reply message now while I'm in thread context */ - ktx->ktx_args[2] = lib_create_reply_msg (&kqswnal_lib, - nid, libmsg); + ktx->ktx_args[2] = ptl_create_reply_msg ( + kqswnal_data.kqn_ni, nid, ptlmsg); if (ktx->ktx_args[2] == NULL) goto out; @@ -1219,11 +1203,11 @@ kqswnal_sendmsg (lib_nal_t *nal, #endif if (payload_nob > 0) { if (payload_kiov != NULL) - lib_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, + ptl_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, payload_niov, payload_kiov, payload_offset, payload_nob); else - lib_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, + ptl_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, payload_niov, payload_iov, payload_offset, payload_nob); } @@ -1268,9 +1252,9 @@ kqswnal_sendmsg (lib_nal_t *nal, * pretend the GET succeeded but the REPLY * failed. */ rc = 0; - lib_finalize (&kqswnal_lib, private, libmsg, PTL_OK); - lib_finalize (&kqswnal_lib, private, - (lib_msg_t *)ktx->ktx_args[2], PTL_FAIL); + ptl_finalize (kqswnal_data.kqn_ni, private, ptlmsg, PTL_OK); + ptl_finalize (kqswnal_data.kqn_ni, private, + (ptl_msg_t *)ktx->ktx_args[2], PTL_FAIL); } kqswnal_put_idle_tx (ktx); @@ -1280,10 +1264,10 @@ kqswnal_sendmsg (lib_nal_t *nal, return (rc == 0 ? PTL_OK : PTL_FAIL); } -static ptl_err_t -kqswnal_send (lib_nal_t *nal, +ptl_err_t +kqswnal_send (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -1293,15 +1277,15 @@ kqswnal_send (lib_nal_t *nal, size_t payload_offset, size_t payload_nob) { - return (kqswnal_sendmsg (nal, private, libmsg, hdr, type, nid, pid, + return (kqswnal_sendmsg (ni, private, ptlmsg, hdr, type, nid, pid, payload_niov, payload_iov, NULL, payload_offset, payload_nob)); } -static ptl_err_t -kqswnal_send_pages (lib_nal_t *nal, +ptl_err_t +kqswnal_send_pages (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -1311,7 +1295,7 @@ kqswnal_send_pages (lib_nal_t *nal, size_t payload_offset, size_t payload_nob) { - return (kqswnal_sendmsg (nal, private, libmsg, hdr, type, nid, pid, + return (kqswnal_sendmsg (ni, private, ptlmsg, hdr, type, nid, pid, payload_niov, NULL, payload_kiov, payload_offset, payload_nob)); } @@ -1338,7 +1322,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) if (ktx == NULL) /* can't get txd right now */ return; /* fwd will be scheduled when tx desc freed */ - if (nid == kqswnal_lib.libnal_ni.ni_pid.nid) /* gateway is me */ + if (nid == kqswnal_data.kqn_ni->ni_nid) /* gateway is me */ nid = fwd->kprfd_target_nid; /* target is final dest */ if (kqswnal_nid2elanid (nid) < 0) { @@ -1368,7 +1352,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + nob; #endif if (nob > 0) - lib_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE, + ptl_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE, niov, kiov, 0, nob); } else @@ -1522,10 +1506,10 @@ kqswnal_parse (kqswnal_rx_t *krx) LASSERT (atomic_read(&krx->krx_refcount) == 1); - if (dest_nid == kqswnal_lib.libnal_ni.ni_pid.nid) { /* It's for me :) */ + if (dest_nid == kqswnal_data.kqn_ni->ni_nid) { /* It's for me :) */ /* I ignore parse errors since I'm not consuming a byte * stream */ - (void)lib_parse (&kqswnal_lib, hdr, krx); + (void)ptl_parse (kqswnal_data.kqn_ni, hdr, krx); /* Drop my ref; any RDMA activity takes an additional ref */ kqswnal_rx_decref(krx); @@ -1678,9 +1662,9 @@ kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr) #endif static ptl_err_t -kqswnal_recvmsg (lib_nal_t *nal, +kqswnal_recvmsg (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, @@ -1714,11 +1698,11 @@ kqswnal_recvmsg (lib_nal_t *nal, if (senders_csum != hdr_csum) kqswnal_csum_error (krx, 1); #endif - /* NB lib_parse() has already flipped *hdr */ + /* NB ptl_parse() has already flipped *hdr */ CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen); - if (libmsg == NULL) { /* portals is discarding. */ + if (ptlmsg == NULL) { /* portals is discarding. */ LASSERT (mlen == 0); return PTL_OK; /* ignored by caller! */ } @@ -1726,7 +1710,7 @@ kqswnal_recvmsg (lib_nal_t *nal, if (krx->krx_rpc_reply_needed && hdr->type == PTL_MSG_PUT) { /* This must be an optimized PUT */ - rc = kqswnal_rdma (krx, libmsg, PTL_MSG_PUT, + rc = kqswnal_rdma (krx, ptlmsg, PTL_MSG_PUT, niov, iov, kiov, offset, mlen); return (rc == 0 ? PTL_OK : PTL_FAIL); } @@ -1840,37 +1824,37 @@ kqswnal_recvmsg (lib_nal_t *nal, "csum_nob %d\n", hdr_csum, payload_csum, csum_frags, csum_nob); #endif - lib_finalize(nal, private, libmsg, PTL_OK); + ptl_finalize(ni, private, ptlmsg, PTL_OK); return (PTL_OK); } -static ptl_err_t -kqswnal_recv(lib_nal_t *nal, +ptl_err_t +kqswnal_recv(ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) { - return (kqswnal_recvmsg(nal, private, libmsg, + return (kqswnal_recvmsg(ni, private, ptlmsg, niov, iov, NULL, offset, mlen, rlen)); } -static ptl_err_t -kqswnal_recv_pages (lib_nal_t *nal, +ptl_err_t +kqswnal_recv_pages (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, unsigned int niov, ptl_kiov_t *kiov, size_t offset, size_t mlen, size_t rlen) { - return (kqswnal_recvmsg(nal, private, libmsg, + return (kqswnal_recvmsg(ni, private, ptlmsg, niov, NULL, kiov, offset, mlen, rlen)); } @@ -1998,13 +1982,3 @@ kqswnal_scheduler (void *arg) kqswnal_thread_fini (); return (0); } - -lib_nal_t kqswnal_lib = -{ - libnal_data: &kqswnal_data, /* NAL private data */ - libnal_send: kqswnal_send, - libnal_send_pages: kqswnal_send_pages, - libnal_recv: kqswnal_recv, - libnal_recv_pages: kqswnal_recv_pages, - libnal_dist: kqswnal_dist -}; diff --git a/lnet/klnds/ralnd/ralnd.c b/lnet/klnds/ralnd/ralnd.c index e532ea3..28aaa85 100644 --- a/lnet/klnds/ralnd/ralnd.c +++ b/lnet/klnds/ralnd/ralnd.c @@ -25,8 +25,17 @@ static int kranal_devids[] = {RAPK_MAIN_DEVICE_ID, RAPK_EXPANSION_DEVICE_ID}; -nal_t kranal_api; -ptl_handle_ni_t kranal_ni; +ptl_nal_t kranal_nal = { + .nal_name = "ra", + .nal_type = RANAL, + .nal_startup = kranal_startup, + .nal_shutdown = kranal_shutdown, + .nal_send = kranal_send, + .nal_send_pages = kranal_send_pages, + .nal_recv = kranal_recv, + .nal_recv_pages = kranal_recv_pages, +}; + kra_data_t kranal_data; kra_tunables_t kranal_tunables; @@ -223,7 +232,7 @@ kranal_pack_connreq(kra_connreq_t *connreq, kra_conn_t *conn, ptl_nid_t dstnid) connreq->racr_magic = RANAL_MSG_MAGIC; connreq->racr_version = RANAL_MSG_VERSION; connreq->racr_devid = conn->rac_device->rad_id; - connreq->racr_srcnid = kranal_lib.libnal_ni.ni_pid.nid; + connreq->racr_srcnid = kranal_data.kra_ni->ni_nid; connreq->racr_dstnid = dstnid; connreq->racr_peerstamp = kranal_data.kra_peerstamp; connreq->racr_connstamp = conn->rac_my_connstamp; @@ -294,7 +303,7 @@ kranal_close_stale_conns_locked (kra_peer_t *peer, kra_conn_t *newconn) int loopback; int count = 0; - loopback = peer->rap_nid == kranal_lib.libnal_ni.ni_pid.nid; + loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid; list_for_each_safe (ctmp, cnxt, &peer->rap_conns) { conn = list_entry(ctmp, kra_conn_t, rac_list); @@ -340,7 +349,7 @@ kranal_conn_isdup_locked(kra_peer_t *peer, kra_conn_t *newconn) struct list_head *tmp; int loopback; - loopback = peer->rap_nid == kranal_lib.libnal_ni.ni_pid.nid; + loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid; list_for_each(tmp, &peer->rap_conns) { conn = list_entry(tmp, kra_conn_t, rac_list); @@ -728,7 +737,7 @@ kranal_active_conn_handshake(kra_peer_t *peer, /* spread connections over all devices using both peer NIDs to ensure * all nids use all devices */ - idx = peer->rap_nid + kranal_lib.libnal_ni.ni_pid.nid; + idx = peer->rap_nid + kranal_data.kra_ni->ni_nid; dev = &kranal_data.kra_devices[idx % kranal_data.kra_ndevs]; rc = kranal_create_conn(&conn, dev); @@ -861,12 +870,12 @@ kranal_conn_handshake (struct socket *sock, kra_peer_t *peer) /* Refuse connection if peer thinks we are a different NID. We check * this while holding the global lock, to synch with connection * destruction on NID change. */ - if (dst_nid != kranal_lib.libnal_ni.ni_pid.nid) { + if (dst_nid != kranal_data.kra_ni->ni_nid) { write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); CERROR("Stale/bad connection with "LPX64 ": dst_nid "LPX64", expected "LPX64"\n", - peer_nid, dst_nid, kranal_lib.libnal_ni.ni_pid.nid); + peer_nid, dst_nid, kranal_data.kra_ni->ni_nid); rc = -ESTALE; goto failed; } @@ -1247,15 +1256,15 @@ int kranal_set_mynid(ptl_nid_t nid) { unsigned long flags; - lib_ni_t *ni = &kranal_lib.libnal_ni; + ptl_ni_t *ni = kranal_data.kra_ni; int rc = 0; CDEBUG(D_NET, "setting mynid to "LPX64" (old nid="LPX64")\n", - nid, ni->ni_pid.nid); + nid, ni->ni_nid); down(&kranal_data.kra_nid_mutex); - if (nid == ni->ni_pid.nid) { + if (nid == ni->ni_nid) { /* no change of NID */ up(&kranal_data.kra_nid_mutex); return 0; @@ -1266,7 +1275,7 @@ kranal_set_mynid(ptl_nid_t nid) write_lock_irqsave(&kranal_data.kra_global_lock, flags); kranal_data.kra_peerstamp++; - ni->ni_pid.nid = nid; + ni->ni_nid = nid; write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); /* Delete all existing peers and their connections after new @@ -1800,21 +1809,16 @@ kranal_device_fini(kra_device_t *dev) } void -kranal_api_shutdown (nal_t *nal) +kranal_shutdown (ptl_ni_t *ni) { int i; unsigned long flags; - if (nal->nal_refct != 0) { - /* This module got the first ref */ - PORTAL_MODULE_UNUSE; - return; - } - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read(&portal_kmemory)); - LASSERT (nal == &kranal_api); + LASSERT (ni == kranal_data.kra_ni); + LASSERT (ni->ni_data == &kranal_data); switch (kranal_data.kra_init) { default: @@ -1844,10 +1848,6 @@ kranal_api_shutdown (nal_t *nal) } /* fall through */ - case RANAL_INIT_LIB: - lib_fini(&kranal_lib); - /* fall through */ - case RANAL_INIT_DATA: break; } @@ -1923,32 +1923,28 @@ kranal_api_shutdown (nal_t *nal) kranal_data.kra_init = RANAL_INIT_NOTHING; } -int -kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +kranal_startup (ptl_ni_t *ni, char **interfaces) { struct timeval tv; - ptl_process_id_t process_id; int pkmem = atomic_read(&portal_kmemory); int rc; int i; kra_device_t *dev; - LASSERT (nal == &kranal_api); + LASSERT (ni->ni_nal == &kranal_nal); - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = kranal_lib.libnal_ni.ni_actual_limits; - /* This module got the first ref */ - PORTAL_MODULE_USE; - return PTL_OK; + /* Only 1 instance supported */ + if (kranal_data.kra_init != RANAL_INIT_NOTHING) { + CERROR ("Only 1 instance supported\n"); + return PTL_FAIL; } - - LASSERT (kranal_data.kra_init == RANAL_INIT_NOTHING); - + memset(&kranal_data, 0, sizeof(kranal_data)); /* zero pointers, flags etc */ + ni->ni_data = &kranal_data; + kranal_data.kra_ni = ni; + /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and * a unique (for all time) connstamp so we can uniquely identify * the sender. The connstamp is an incrementing counter @@ -2017,20 +2013,6 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid, if (rc != 0) goto failed; - process_id.pid = requested_pid; - process_id.nid = PTL_NID_ANY; /* don't know my NID yet */ - - rc = lib_init(&kranal_lib, nal, process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) { - CERROR("lib_init failed: error %d\n", rc); - goto failed; - } - - /* lib interface initialised */ - kranal_data.kra_init = RANAL_INIT_LIB; - /*****************************************************/ - rc = kranal_thread_start(kranal_reaper, NULL); if (rc != 0) { CERROR("Can't spawn ranal reaper: %d\n", rc); @@ -2090,7 +2072,7 @@ kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid, return PTL_OK; failed: - kranal_api_shutdown(&kranal_api); + kranal_shutdown(ni); return PTL_FAIL; } @@ -2100,9 +2082,7 @@ kranal_module_fini (void) if (kranal_tunables.kra_sysctl != NULL) unregister_sysctl_table(kranal_tunables.kra_sysctl); - PtlNIFini(kranal_ni); - - ptl_unregister_nal(RANAL); + ptl_unregister_nal(&kranal_nal); } int __init @@ -2112,14 +2092,11 @@ kranal_module_init (void) /* the following must be sizeof(int) for * proc_dointvec/kranal_listener_procint() */ - LASSERT (sizeof(kranal_tunables.kra_timeout) == sizeof(int)); - LASSERT (sizeof(kranal_tunables.kra_listener_timeout) == sizeof(int)); - LASSERT (sizeof(kranal_tunables.kra_backlog) == sizeof(int)); - LASSERT (sizeof(kranal_tunables.kra_port) == sizeof(int)); - LASSERT (sizeof(kranal_tunables.kra_max_immediate) == sizeof(int)); - - kranal_api.nal_ni_init = kranal_api_startup; - kranal_api.nal_ni_fini = kranal_api_shutdown; + CLASSERT (sizeof(kranal_tunables.kra_timeout) == sizeof(int)); + CLASSERT (sizeof(kranal_tunables.kra_listener_timeout) == sizeof(int)); + CLASSERT (sizeof(kranal_tunables.kra_backlog) == sizeof(int)); + CLASSERT (sizeof(kranal_tunables.kra_port) == sizeof(int)); + CLASSERT (sizeof(kranal_tunables.kra_max_immediate) == sizeof(int)); /* Initialise dynamic tunables to defaults once only */ kranal_tunables.kra_timeout = RANAL_TIMEOUT; @@ -2128,25 +2105,17 @@ kranal_module_init (void) kranal_tunables.kra_port = RANAL_PORT; kranal_tunables.kra_max_immediate = RANAL_MAX_IMMEDIATE; - rc = ptl_register_nal(RANAL, &kranal_api); + rc = ptl_register_nal(&kranal_nal); if (rc != PTL_OK) { CERROR("Can't register RANAL: %d\n", rc); return -ENOMEM; /* or something... */ } - /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(RANAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kranal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(RANAL); - return -ENODEV; - } - kranal_tunables.kra_sysctl = register_sysctl_table(kranal_top_ctl_table, 0); if (kranal_tunables.kra_sysctl == NULL) { CERROR("Can't register sysctl table\n"); - PtlNIFini(kranal_ni); - ptl_unregister_nal(RANAL); + ptl_unregister_nal(&kranal_nal); return -ENOMEM; } diff --git a/lnet/klnds/ralnd/ralnd.h b/lnet/klnds/ralnd/ralnd.h index 0252cff..532daec 100644 --- a/lnet/klnds/ralnd/ralnd.h +++ b/lnet/klnds/ralnd/ralnd.h @@ -56,7 +56,6 @@ #include #include #include -#include #include @@ -122,7 +121,8 @@ typedef struct int kra_init; /* initialisation state */ int kra_shutdown; /* shut down? */ atomic_t kra_nthreads; /* # live threads */ - + ptl_ni_t *kra_ni; /* _the_ nal instance */ + struct semaphore kra_nid_mutex; /* serialise NID/listener ops */ struct semaphore kra_listener_signal; /* block for listener startup/shutdown */ struct socket *kra_listener_sock; /* listener's socket */ @@ -162,8 +162,7 @@ typedef struct #define RANAL_INIT_NOTHING 0 #define RANAL_INIT_DATA 1 -#define RANAL_INIT_LIB 2 -#define RANAL_INIT_ALL 3 +#define RANAL_INIT_ALL 2 typedef struct kra_acceptsock /* accepted socket queued for connd */ { @@ -267,7 +266,7 @@ typedef struct kra_tx /* message descriptor */ { struct list_head tx_list; /* queue on idle_txs/rac_sendq/rac_waitq */ struct kra_conn *tx_conn; /* owning conn */ - lib_msg_t *tx_libmsg[2]; /* lib msgs to finalize on completion */ + ptl_msg_t *tx_ptlmsg[2]; /* ptl msgs to finalize on completion */ unsigned long tx_qtime; /* when tx started to wait for something (jiffies) */ int tx_isnblk; /* I'm reserved for non-blocking sends */ int tx_nob; /* # bytes of payload */ @@ -353,7 +352,6 @@ typedef struct kra_peer # define sk_sleep sleep #endif -extern lib_nal_t kranal_lib; extern kra_data_t kranal_data; extern kra_tunables_t kranal_tunables; @@ -447,6 +445,29 @@ kranal_page2phys (struct page *p) return page_to_phys(p); } +ptl_err_t kranal_startup (ptl_ni_t *ni, char **interfaces); +void kranal_shutdown (ptl_ni_t *ni); +ptl_err_t kranal_send (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + size_t payload_offset, size_t payload_nob); +ptl_err_t kranal_send_pages (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_nob); +ptl_err_t kranal_recv(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + struct iovec *iov, size_t offset, + size_t mlen, size_t rlen); +ptl_err_t kranal_recv_pages(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + ptl_kiov_t *kiov, size_t offset, + size_t mlen, size_t rlen); + extern void kranal_free_acceptsock (kra_acceptsock_t *ras); extern int kranal_listener_procint (ctl_table *table, int write, struct file *filp, diff --git a/lnet/klnds/ralnd/ralnd_cb.c b/lnet/klnds/ralnd/ralnd_cb.c index d5165f4..67ccbbe 100644 --- a/lnet/klnds/ralnd/ralnd_cb.c +++ b/lnet/klnds/ralnd/ralnd_cb.c @@ -23,20 +23,6 @@ #include "ranal.h" -int -kranal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if kranal_get_peer (nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if ( nal->libnal_ni.ni_pid.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - void kranal_device_callback(RAP_INT32 devid, RAP_PVOID arg) { @@ -129,8 +115,8 @@ kranal_get_idle_tx (int may_block) LASSERT (tx->tx_buftype == RANAL_BUF_NONE); LASSERT (tx->tx_msg.ram_type == RANAL_MSG_NONE); LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_libmsg[0] == NULL); - LASSERT (tx->tx_libmsg[1] == NULL); + LASSERT (tx->tx_ptlmsg[0] == NULL); + LASSERT (tx->tx_ptlmsg[1] == NULL); } spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags); @@ -144,7 +130,7 @@ kranal_init_msg(kra_msg_t *msg, int type) msg->ram_magic = RANAL_MSG_MAGIC; msg->ram_version = RANAL_MSG_VERSION; msg->ram_type = type; - msg->ram_srcnid = kranal_lib.libnal_ni.ni_pid.nid; + msg->ram_srcnid = kranal_data.kra_ni->ni_nid; /* ram_connstamp gets set when FMA is sent */ } @@ -383,12 +369,12 @@ kranal_tx_done (kra_tx_t *tx, int completion) kranal_unmap_buffer(tx); for (i = 0; i < 2; i++) { - /* tx may have up to 2 libmsgs to finalise */ - if (tx->tx_libmsg[i] == NULL) + /* tx may have up to 2 ptlmsgs to finalise */ + if (tx->tx_ptlmsg[i] == NULL) continue; - lib_finalize(&kranal_lib, NULL, tx->tx_libmsg[i], ptlrc); - tx->tx_libmsg[i] = NULL; + ptl_finalize(kranal_data.kra_ni, NULL, tx->tx_ptlmsg[i], ptlrc); + tx->tx_ptlmsg[i] = NULL; } tx->tx_buftype = RANAL_BUF_NONE; @@ -583,9 +569,9 @@ kranal_consume_rxmsg (kra_conn_t *conn, void *buffer, int nob) } ptl_err_t -kranal_do_send (lib_nal_t *nal, +kranal_do_send (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -648,7 +634,7 @@ kranal_do_send (lib_nal_t *nal, } tx->tx_conn = conn; - tx->tx_libmsg[0] = libmsg; + tx->tx_ptlmsg[0] = ptlmsg; kranal_map_buffer(tx); kranal_rdma(tx, RANAL_MSG_GET_DONE, @@ -665,40 +651,41 @@ kranal_do_send (lib_nal_t *nal, LASSERT (nob == 0); /* We have to consider the eventual sink buffer rather than any * payload passed here (there isn't any, and strictly, looking - * inside libmsg is a layering violation). We send a simple + * inside ptlmsg is a layering violation). We send a simple * IMMEDIATE GET if the sink buffer is mapped already and small * enough for FMA */ - if ((libmsg->md->options & PTL_MD_KIOV) == 0 && - libmsg->md->length <= RANAL_FMA_MAX_DATA && - libmsg->md->length <= kranal_tunables.kra_max_immediate) + if ((ptlmsg->msg_md->md_options & PTL_MD_KIOV) == 0 && + ptlmsg->msg_md->md_length <= RANAL_FMA_MAX_DATA && + ptlmsg->msg_md->md_length <= kranal_tunables.kra_max_immediate) break; tx = kranal_new_tx_msg(!in_interrupt(), RANAL_MSG_GET_REQ); if (tx == NULL) return PTL_NO_SPACE; - if ((libmsg->md->options & PTL_MD_KIOV) == 0) - rc = kranal_setup_virt_buffer(tx, libmsg->md->md_niov, - libmsg->md->md_iov.iov, - 0, libmsg->md->length); + if ((ptlmsg->msg_md->md_options & PTL_MD_KIOV) == 0) + rc = kranal_setup_virt_buffer(tx, ptlmsg->msg_md->md_niov, + ptlmsg->msg_md->md_iov.iov, + 0, ptlmsg->msg_md->md_length); else - rc = kranal_setup_phys_buffer(tx, libmsg->md->md_niov, - libmsg->md->md_iov.kiov, - 0, libmsg->md->length); + rc = kranal_setup_phys_buffer(tx, ptlmsg->msg_md->md_niov, + ptlmsg->msg_md->md_iov.kiov, + 0, ptlmsg->msg_md->md_length); if (rc != 0) { kranal_tx_done(tx, rc); return PTL_FAIL; } - tx->tx_libmsg[1] = lib_create_reply_msg(&kranal_lib, nid, libmsg); - if (tx->tx_libmsg[1] == NULL) { + tx->tx_ptlmsg[1] = ptl_create_reply_msg(kranal_data.kra_ni, + nid, ptlmsg); + if (tx->tx_ptlmsg[1] == NULL) { CERROR("Can't create reply for GET to "LPX64"\n", nid); kranal_tx_done(tx, rc); return PTL_FAIL; } - tx->tx_libmsg[0] = libmsg; + tx->tx_ptlmsg[0] = ptlmsg; tx->tx_msg.ram_u.get.ragm_hdr = *hdr; /* rest of tx_msg is setup just before it is sent */ kranal_launch_tx(tx, nid); @@ -724,7 +711,7 @@ kranal_do_send (lib_nal_t *nal, return PTL_FAIL; } - tx->tx_libmsg[0] = libmsg; + tx->tx_ptlmsg[0] = ptlmsg; tx->tx_msg.ram_u.putreq.raprm_hdr = *hdr; /* rest of tx_msg is setup just before it is sent */ kranal_launch_tx(tx, nid); @@ -748,37 +735,37 @@ kranal_do_send (lib_nal_t *nal, } tx->tx_msg.ram_u.immediate.raim_hdr = *hdr; - tx->tx_libmsg[0] = libmsg; + tx->tx_ptlmsg[0] = ptlmsg; kranal_launch_tx(tx, nid); return PTL_OK; } ptl_err_t -kranal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie, +kranal_send (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, size_t offset, size_t len) { - return kranal_do_send(nal, private, cookie, + return kranal_do_send(ni, private, cookie, hdr, type, nid, pid, niov, iov, NULL, offset, len); } ptl_err_t -kranal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, +kranal_send_pages (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, ptl_kiov_t *kiov, size_t offset, size_t len) { - return kranal_do_send(nal, private, cookie, + return kranal_do_send(ni, private, cookie, hdr, type, nid, pid, niov, NULL, kiov, offset, len); } ptl_err_t -kranal_do_recv (lib_nal_t *nal, void *private, lib_msg_t *libmsg, +kranal_do_recv (ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg, unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, int offset, int mlen, int rlen) { @@ -793,12 +780,12 @@ kranal_do_recv (lib_nal_t *nal, void *private, lib_msg_t *libmsg, /* Either all pages or all vaddrs */ LASSERT (!(kiov != NULL && iov != NULL)); - CDEBUG(D_NET, "conn %p, rxmsg %p, libmsg %p\n", conn, rxmsg, libmsg); + CDEBUG(D_NET, "conn %p, rxmsg %p, ptlmsg %p\n", conn, rxmsg, ptlmsg); - if (libmsg == NULL) { + if (ptlmsg == NULL) { /* GET or ACK or portals is discarding */ LASSERT (mlen == 0); - lib_finalize(nal, NULL, libmsg, PTL_OK); + ptl_finalize(ni, NULL, ptlmsg, PTL_OK); return PTL_OK; } @@ -828,7 +815,7 @@ kranal_do_recv (lib_nal_t *nal, void *private, lib_msg_t *libmsg, buffer = ((char *)iov->iov_base) + offset; } rc = kranal_consume_rxmsg(conn, buffer, mlen); - lib_finalize(nal, NULL, libmsg, (rc == 0) ? PTL_OK : PTL_FAIL); + ptl_finalize(ni, NULL, ptlmsg, (rc == 0) ? PTL_OK : PTL_FAIL); return PTL_OK; case RANAL_MSG_PUT_REQ: @@ -853,7 +840,7 @@ kranal_do_recv (lib_nal_t *nal, void *private, lib_msg_t *libmsg, (__u64)((unsigned long)tx->tx_buffer); tx->tx_msg.ram_u.putack.rapam_desc.rard_nob = mlen; - tx->tx_libmsg[0] = libmsg; /* finalize this on RDMA_DONE */ + tx->tx_ptlmsg[0] = ptlmsg; /* finalize this on RDMA_DONE */ kranal_post_fma(conn, tx); @@ -864,20 +851,20 @@ kranal_do_recv (lib_nal_t *nal, void *private, lib_msg_t *libmsg, } ptl_err_t -kranal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, +kranal_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) { - return kranal_do_recv(nal, private, msg, niov, iov, NULL, + return kranal_do_recv(ni, private, msg, niov, iov, NULL, offset, mlen, rlen); } ptl_err_t -kranal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, +kranal_recv_pages (ptl_ni_t *ni, void *private, ptl_msg_t *msg, unsigned int niov, ptl_kiov_t *kiov, size_t offset, size_t mlen, size_t rlen) { - return kranal_do_recv(nal, private, msg, niov, NULL, kiov, + return kranal_do_recv(ni, private, msg, niov, NULL, kiov, offset, mlen, rlen); } @@ -1727,14 +1714,14 @@ kranal_check_fma_rx (kra_conn_t *conn) case RANAL_MSG_IMMEDIATE: CDEBUG(D_NET, "RX IMMEDIATE on %p\n", conn); - lib_parse(&kranal_lib, &msg->ram_u.immediate.raim_hdr, conn); + ptl_parse(kranal_data.kra_ni, &msg->ram_u.immediate.raim_hdr, conn); break; case RANAL_MSG_PUT_REQ: CDEBUG(D_NET, "RX PUT_REQ on %p\n", conn); - lib_parse(&kranal_lib, &msg->ram_u.putreq.raprm_hdr, conn); + ptl_parse(kranal_data.kra_ni, &msg->ram_u.putreq.raprm_hdr, conn); - if (conn->rac_rxmsg == NULL) /* lib_parse matched something */ + if (conn->rac_rxmsg == NULL) /* ptl_parse matched something */ break; tx = kranal_new_tx_msg(0, RANAL_MSG_PUT_NAK); @@ -1785,9 +1772,9 @@ kranal_check_fma_rx (kra_conn_t *conn) case RANAL_MSG_GET_REQ: CDEBUG(D_NET, "RX GET_REQ on %p\n", conn); - lib_parse(&kranal_lib, &msg->ram_u.get.ragm_hdr, conn); + ptl_parse(kranal_data.kra_ni, &msg->ram_u.get.ragm_hdr, conn); - if (conn->rac_rxmsg == NULL) /* lib_parse matched something */ + if (conn->rac_rxmsg == NULL) /* ptl_parse matched something */ break; tx = kranal_new_tx_msg(0, RANAL_MSG_GET_NAK); @@ -2025,13 +2012,3 @@ kranal_scheduler (void *arg) kranal_thread_fini(); return 0; } - - -lib_nal_t kranal_lib = { - libnal_data: &kranal_data, /* NAL private data */ - libnal_send: kranal_send, - libnal_send_pages: kranal_send_pages, - libnal_recv: kranal_recv, - libnal_recv_pages: kranal_recv_pages, - libnal_dist: kranal_dist -}; diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 448871e..540ff8d 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -25,9 +25,17 @@ #include "socknal.h" -nal_t ksocknal_api; +ptl_nal_t ksocknal_nal = { + .nal_name = "tcp", + .nal_type = SOCKNAL, + .nal_startup = ksocknal_startup, + .nal_shutdown = ksocknal_shutdown, + .nal_send = ksocknal_send, + .nal_send_pages = ksocknal_send_pages, + .nal_recv = ksocknal_recv, + .nal_recv_pages = ksocknal_recv_pages, +}; ksock_nal_data_t ksocknal_data; -ptl_handle_ni_t ksocknal_ni; ksock_tunables_t ksocknal_tunables; kpr_nal_interface_t ksocknal_router_interface = { @@ -40,7 +48,7 @@ kpr_nal_interface_t ksocknal_router_interface = { int ksocknal_set_mynid(ptl_nid_t nid) { - lib_ni_t *ni = &ksocknal_lib.libnal_ni; + ptl_ni_t *ni = ksocknal_data.ksnd_ni; /* FIXME: we have to do this because we call lib_init() at module * insertion time, which is before we have 'mynid' available. lib_init @@ -49,9 +57,9 @@ ksocknal_set_mynid(ptl_nid_t nid) * problem. */ CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", - nid, ni->ni_pid.nid); + nid, ni->ni_nid); - ni->ni_pid.nid = nid; + ni->ni_nid = nid; return (0); } @@ -1326,7 +1334,8 @@ ksocknal_destroy_conn (ksock_conn_t *conn) ", ip %d.%d.%d.%d:%d, with error\n", conn->ksnc_peer->ksnp_nid, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - lib_finalize (&ksocknal_lib, NULL, conn->ksnc_cookie, PTL_FAIL); + ptl_finalize (ksocknal_data.ksnd_ni, NULL, + conn->ksnc_cookie, PTL_FAIL); break; case SOCKNAL_RX_BODY_FWD: ksocknal_fmb_callback (conn->ksnc_cookie, -ECONNABORTED); @@ -1895,21 +1904,15 @@ ksocknal_free_buffers (void) } void -ksocknal_api_shutdown (nal_t *nal) +ksocknal_shutdown (ptl_ni_t *ni) { ksock_sched_t *sched; int i; - if (nal->nal_refct != 0) { - /* This module got the first ref */ - PORTAL_MODULE_UNUSE; - return; - } - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); - LASSERT(nal == &ksocknal_api); + LASSERT(ni->ni_nal == &ksocknal_nal); switch (ksocknal_data.ksnd_init) { default: @@ -1917,13 +1920,11 @@ ksocknal_api_shutdown (nal_t *nal) case SOCKNAL_INIT_ALL: libcfs_nal_cmd_unregister(SOCKNAL); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; /* fall through */ - case SOCKNAL_INIT_LIB: + case SOCKNAL_INIT_DATA: /* No more calls to ksocknal_cmd() to create new - * autoroutes/connections since we're being unloaded. */ + * peers/connections since we're being unloaded. */ /* Delete all peers */ ksocknal_del_peer(PTL_NID_ANY, 0, 0); @@ -1939,13 +1940,6 @@ ksocknal_api_shutdown (nal_t *nal) schedule_timeout (cfs_time_seconds(1)); } - /* Tell lib we've stopped calling into her. */ - lib_fini(&ksocknal_lib); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - /* fall through */ - - case SOCKNAL_INIT_DATA: LASSERT (atomic_read (&ksocknal_data.ksnd_npeers) == 0); LASSERT (ksocknal_data.ksnd_peers != NULL); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { @@ -1975,10 +1969,11 @@ ksocknal_api_shutdown (nal_t *nal) cfs_waitq_broadcast (&ksocknal_data.ksnd_autoconnectd_waitq); cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq); - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - sched = &ksocknal_data.ksnd_schedulers[i]; - cfs_waitq_broadcast(&sched->kss_waitq); - } + if (ksocknal_data.ksnd_schedulers != NULL) + for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { + sched = &ksocknal_data.ksnd_schedulers[i]; + cfs_waitq_broadcast(&sched->kss_waitq); + } i = 4; read_lock(&ksocknal_data.ksnd_global_lock); @@ -2010,6 +2005,8 @@ ksocknal_api_shutdown (nal_t *nal) printk(KERN_INFO "Lustre: Routing socket NAL unloaded (final mem %d)\n", atomic_read(&portal_kmemory)); + + PORTAL_MODULE_UNUSE; } @@ -2029,31 +2026,26 @@ ksocknal_init_incarnation (void) (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; } -int -ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +ksocknal_startup (ptl_ni_t *ni, char **interfaces) { - ptl_process_id_t process_id; int pkmem = atomic_read(&portal_kmemory); int rc; int i; int j; - LASSERT (nal == &ksocknal_api); + LASSERT (ni->ni_nal == &ksocknal_nal); - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = ksocknal_lib.libnal_ni.ni_actual_limits; - /* This module got the first ref */ - PORTAL_MODULE_USE; - return (PTL_OK); + if (ksocknal_data.ksnd_init != SOCKNAL_INIT_NOTHING) { + CERROR ("Only 1 instance supported\n"); + return PTL_FAIL; } - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ + ksocknal_data.ksnd_ni = ni; /* temp hack */ + ni->ni_data = &ksocknal_data; + ksocknal_init_incarnation(); ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; @@ -2092,12 +2084,13 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, /* flag lists/ptrs/locks initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; + PORTAL_MODULE_USE; ksocknal_data.ksnd_nschedulers = ksocknal_nsched(); PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers); if (ksocknal_data.ksnd_schedulers == NULL) { - ksocknal_api_shutdown (nal); + ksocknal_shutdown (ni); return (-ENOMEM); } @@ -2113,27 +2106,13 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, cfs_waitq_init (&kss->kss_waitq); } - /* NB we have to wait to be told our true NID... */ - process_id.pid = requested_pid; - process_id.nid = 0; - - rc = lib_init(&ksocknal_lib, nal, process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) { - CERROR("lib_init failed: error %d\n", rc); - ksocknal_api_shutdown (nal); - return (rc); - } - - ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { rc = ksocknal_thread_start (ksocknal_scheduler, &ksocknal_data.ksnd_schedulers[i]); if (rc != 0) { CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); - ksocknal_api_shutdown (nal); + ksocknal_shutdown (ni); return (rc); } } @@ -2142,7 +2121,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i)); if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); - ksocknal_api_shutdown (nal); + ksocknal_shutdown (ni); return (rc); } } @@ -2150,7 +2129,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, rc = ksocknal_thread_start (ksocknal_reaper, NULL); if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); - ksocknal_api_shutdown (nal); + ksocknal_shutdown (ni); return (rc); } @@ -2176,7 +2155,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, fmb_kiov[pool->fmp_buff_pages])); if (fmb == NULL) { - ksocknal_api_shutdown(nal); + ksocknal_shutdown(ni); return (-ENOMEM); } @@ -2186,7 +2165,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, fmb->fmb_kiov[j].kiov_page = cfs_alloc_page(CFS_ALLOC_STD); if (fmb->fmb_kiov[j].kiov_page == NULL) { - ksocknal_api_shutdown (nal); + ksocknal_shutdown (ni); return (-ENOMEM); } @@ -2200,7 +2179,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_api_shutdown (nal); + ksocknal_shutdown (ni); return (rc); } @@ -2211,7 +2190,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, "(Routing %s, initial mem %d, incarnation "LPX64")\n", kpr_routing (&ksocknal_data.ksnd_router) ? "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation); - + return (0); } @@ -2222,9 +2201,7 @@ ksocknal_module_fini (void) if (ksocknal_tunables.ksnd_sysctl != NULL) unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); #endif - PtlNIFini(ksocknal_ni); - - ptl_unregister_nal(SOCKNAL); + ptl_unregister_nal(&ksocknal_nal); } extern cfs_sysctl_table_t ksocknal_top_ctl_table[]; @@ -2235,28 +2212,25 @@ ksocknal_module_init (void) int rc; /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); + CLASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_buffer_size) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_nagle) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_idle) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_count) == sizeof (int)); - LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_intvl) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_buffer_size) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_nagle) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_idle) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_count) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_intvl) == sizeof (int)); #if CPU_AFFINITY - LASSERT(sizeof (ksocknal_tunables.ksnd_irq_affinity) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_irq_affinity) == sizeof (int)); #endif #if SOCKNAL_ZC - LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int)); + CLASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int)); #endif /* check ksnr_connected/connecting field large enough */ - LASSERT(SOCKNAL_CONN_NTYPES <= 4); - - ksocknal_api.nal_ni_init = ksocknal_api_startup; - ksocknal_api.nal_ni_fini = ksocknal_api_shutdown; + CLASSERT(SOCKNAL_CONN_NTYPES <= 4); /* Initialise dynamic tunables to defaults once only */ ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; @@ -2275,19 +2249,12 @@ ksocknal_module_init (void) ksocknal_tunables.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; #endif - rc = ptl_register_nal(SOCKNAL, &ksocknal_api); + rc = ptl_register_nal(&ksocknal_nal); if (rc != PTL_OK) { CERROR("Can't register SOCKNAL: %d\n", rc); return (-ENOMEM); /* or something... */ } - /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(SOCKNAL); - return (-ENODEV); - } - #ifdef CONFIG_SYSCTL /* Press on regardless even if registering sysctl doesn't work */ ksocknal_tunables.ksnd_sysctl = diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index da7014e..e6fc03b 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -42,7 +42,6 @@ #include #include #include -#include #include #define SOCKNAL_N_AUTOCONNECTD 4 /* # socknal autoconnect daemons */ @@ -187,14 +186,15 @@ typedef struct ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */ + ptl_ni_t *ksnd_ni; /* NI instance (tmp hack) */ + int ksnd_ninterfaces; ksock_interface_t ksnd_interfaces[SOCKNAL_MAX_INTERFACES]; /* published interfaces */ } ksock_nal_data_t; #define SOCKNAL_INIT_NOTHING 0 #define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_LIB 2 -#define SOCKNAL_INIT_ALL 3 +#define SOCKNAL_INIT_ALL 2 /* A packet just assembled for transmission is represented by 1 or more * struct iovec fragments (the first frag contains the portals header), @@ -238,8 +238,8 @@ typedef struct /* forwarded packet */ typedef struct /* locally transmitted packet */ { ksock_tx_t ltx_tx; /* send info */ - void *ltx_private; /* lib_finalize() callback arg */ - void *ltx_cookie; /* lib_finalize() callback arg */ + void *ltx_private; /* ptl_finalize() callback arg */ + void *ltx_cookie; /* ptl_finalize() callback arg */ ptl_hdr_t ltx_hdr; /* buffer for packet header */ int ltx_desc_size; /* bytes allocated for this desc */ struct iovec ltx_iov[1]; /* iov for hdr + payload */ @@ -310,7 +310,7 @@ typedef struct ksock_conn int ksnc_rx_nkiov; /* # page frags */ ptl_kiov_t *ksnc_rx_kiov; /* the page frags */ ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */ - void *ksnc_cookie; /* rx lib_finalize passthru arg */ + void *ksnc_cookie; /* rx ptl_finalize passthru arg */ ptl_hdr_t ksnc_hdr; /* where I read headers into */ /* WRITER */ @@ -369,7 +369,6 @@ typedef struct ksock_peer } ksock_peer_t; -extern lib_nal_t ksocknal_lib; extern ksock_nal_data_t ksocknal_data; extern ksock_tunables_t ksocknal_tunables; @@ -402,6 +401,30 @@ ksocknal_putconnsock (ksock_conn_t *conn) cfs_put_file (KSN_CONN2FILE(conn)); } +ptl_err_t ksocknal_startup (ptl_ni_t *ni, char **interfaces); +void ksocknal_shutdown (ptl_ni_t *ni); +ptl_err_t ksocknal_send (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + size_t payload_offset, size_t payload_nob); +ptl_err_t ksocknal_send_pages (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_nob); +ptl_err_t ksocknal_recv(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + struct iovec *iov, size_t offset, + size_t mlen, size_t rlen); +ptl_err_t ksocknal_recv_pages(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + ptl_kiov_t *kiov, size_t offset, + size_t mlen, size_t rlen); + + extern void ksocknal_put_route (ksock_route_t *route); extern void ksocknal_put_peer (ksock_peer_t *peer); extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index ef8ca0f..643da78 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -25,24 +25,6 @@ #include "socknal.h" -/* - * LIB functions follow - * - */ -int -ksocknal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if ksocknal_get_peer (nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if (nal->libnal_ni.ni_pid.nid == nid) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - void ksocknal_free_ltx (ksock_ltx_t *ltx) { @@ -403,7 +385,8 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch) /* local send */ ltx = KSOCK_TX_2_KSOCK_LTX (tx); - lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie, + ptl_finalize (ksocknal_data.ksnd_ni, + ltx->ltx_private, ltx->ltx_cookie, (tx->tx_resid == 0) ? PTL_OK : PTL_FAIL); ksocknal_free_ltx (ltx); @@ -530,7 +513,6 @@ ksocknal_launch_autoconnect_locked (ksock_route_t *route) ksock_peer_t * ksocknal_find_target_peer_locked (ksock_tx_t *tx, ptl_nid_t nid) { - char ipbuf[PTL_NALFMT_SIZE]; ptl_nid_t target_nid; int rc; ksock_peer_t *peer = ksocknal_find_peer_locked (nid); @@ -541,7 +523,7 @@ ksocknal_find_target_peer_locked (ksock_tx_t *tx, ptl_nid_t nid) if (tx->tx_isfwd) { CERROR ("Can't send packet to "LPX64 " %s: routed target is not a peer\n", - nid, portals_nid2str(SOCKNAL, nid, ipbuf)); + nid, libcfs_nid2str(nid)); return (NULL); } @@ -549,7 +531,7 @@ ksocknal_find_target_peer_locked (ksock_tx_t *tx, ptl_nid_t nid) &target_nid); if (rc != 0) { CERROR ("Can't route to "LPX64" %s: router error %d\n", - nid, portals_nid2str(SOCKNAL, nid, ipbuf), rc); + nid, libcfs_nid2str(nid), rc); return (NULL); } @@ -558,7 +540,7 @@ ksocknal_find_target_peer_locked (ksock_tx_t *tx, ptl_nid_t nid) return (peer); CERROR ("Can't send packet to "LPX64" %s: no peer entry\n", - target_nid, portals_nid2str(SOCKNAL, target_nid, ipbuf)); + target_nid, libcfs_nid2str(target_nid)); return (NULL); } @@ -744,8 +726,8 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) * * We always expect at least 1 mapped fragment containing the * complete portals header. */ - LASSERT (lib_iov_nob (tx->tx_niov, tx->tx_iov) + - lib_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob); + LASSERT (ptl_iov_nob (tx->tx_niov, tx->tx_iov) + + ptl_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob); LASSERT (tx->tx_niov >= 1); LASSERT (tx->tx_iov[0].iov_len >= sizeof (ptl_hdr_t)); @@ -820,9 +802,9 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) } ptl_err_t -ksocknal_sendmsg(lib_nal_t *nal, +ksocknal_sendmsg(ptl_ni_t *ni, void *private, - lib_msg_t *cookie, + ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -894,7 +876,7 @@ ksocknal_sendmsg(lib_nal_t *nal, ltx->ltx_tx.tx_nkiov = 0; ltx->ltx_tx.tx_niov = - 1 + lib_extract_iov(payload_niov, <x->ltx_iov[1], + 1 + ptl_extract_iov(payload_niov, <x->ltx_iov[1], payload_niov, payload_iov, payload_offset, payload_nob); } else { @@ -903,7 +885,7 @@ ksocknal_sendmsg(lib_nal_t *nal, ltx->ltx_tx.tx_kiov = ltx->ltx_kiov; ltx->ltx_tx.tx_nkiov = - lib_extract_kiov(payload_niov, ltx->ltx_kiov, + ptl_extract_kiov(payload_niov, ltx->ltx_kiov, payload_niov, payload_kiov, payload_offset, payload_nob); } @@ -917,24 +899,24 @@ ksocknal_sendmsg(lib_nal_t *nal, } ptl_err_t -ksocknal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie, +ksocknal_send (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int payload_niov, struct iovec *payload_iov, size_t payload_offset, size_t payload_len) { - return (ksocknal_sendmsg(nal, private, cookie, + return (ksocknal_sendmsg(ni, private, cookie, hdr, type, nid, pid, payload_niov, payload_iov, NULL, payload_offset, payload_len)); } ptl_err_t -ksocknal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, +ksocknal_send_pages (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int payload_niov, ptl_kiov_t *payload_kiov, size_t payload_offset, size_t payload_len) { - return (ksocknal_sendmsg(nal, private, cookie, + return (ksocknal_sendmsg(ni, private, cookie, hdr, type, nid, pid, payload_niov, NULL, payload_kiov, payload_offset, payload_len)); @@ -951,7 +933,7 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) fwd->kprfd_gateway_nid, fwd->kprfd_target_nid); /* I'm the gateway; must be the last hop */ - if (nid == ksocknal_lib.libnal_ni.ni_pid.nid) + if (nid == ksocknal_data.ksnd_ni->ni_nid) nid = fwd->kprfd_target_nid; /* setup iov for hdr */ @@ -1004,16 +986,14 @@ ksocknal_fmb_callback (void *arg, int error) ksock_conn_t *conn = NULL; ksock_sched_t *sched; unsigned long flags; - char ipbuf[PTL_NALFMT_SIZE]; - char ipbuf2[PTL_NALFMT_SIZE]; if (error != 0) CERROR("Failed to route packet from " LPX64" %s to "LPX64" %s: %d\n", le64_to_cpu(hdr->src_nid), - portals_nid2str(SOCKNAL, le64_to_cpu(hdr->src_nid), ipbuf), + libcfs_nid2str(le64_to_cpu(hdr->src_nid)), le64_to_cpu(hdr->dest_nid), - portals_nid2str(SOCKNAL, le64_to_cpu(hdr->dest_nid), ipbuf2), + libcfs_nid2str(le64_to_cpu(hdr->dest_nid)), error); else CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": OK\n", @@ -1166,8 +1146,6 @@ ksocknal_fwd_parse (ksock_conn_t *conn) ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid); ptl_nid_t src_nid = le64_to_cpu(conn->ksnc_hdr.src_nid); int body_len = le32_to_cpu(conn->ksnc_hdr.payload_length); - char str[PTL_NALFMT_SIZE]; - char str2[PTL_NALFMT_SIZE]; CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn, src_nid, dest_nid, conn->ksnc_rx_nob_left); @@ -1178,8 +1156,8 @@ ksocknal_fwd_parse (ksock_conn_t *conn) if (body_len < 0) { /* length corrupt (overflow) */ CERROR("dropping packet from "LPX64" (%s) for "LPX64" (%s): " "packet size %d illegal\n", - src_nid, portals_nid2str(TCPNAL, src_nid, str), - dest_nid, portals_nid2str(TCPNAL, dest_nid, str2), + src_nid, libcfs_nid2str(src_nid), + dest_nid, libcfs_nid2str(dest_nid), body_len); ksocknal_new_packet (conn, 0); /* on to new packet */ @@ -1189,8 +1167,8 @@ ksocknal_fwd_parse (ksock_conn_t *conn) if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */ CERROR("dropping packet from "LPX64" (%s) for "LPX64 " (%s): not forwarding\n", - src_nid, portals_nid2str(TCPNAL, src_nid, str), - dest_nid, portals_nid2str(TCPNAL, dest_nid, str2)); + src_nid, libcfs_nid2str(src_nid), + dest_nid, libcfs_nid2str(dest_nid)); /* on to new packet (skip this one's body) */ ksocknal_new_packet (conn, body_len); return; @@ -1199,8 +1177,8 @@ ksocknal_fwd_parse (ksock_conn_t *conn) if (body_len > PTL_MTU) { /* too big to forward */ CERROR ("dropping packet from "LPX64" (%s) for "LPX64 "(%s): packet size %d too big\n", - src_nid, portals_nid2str(TCPNAL, src_nid, str), - dest_nid, portals_nid2str(TCPNAL, dest_nid, str2), + src_nid, libcfs_nid2str(src_nid), + dest_nid, libcfs_nid2str(dest_nid), body_len); /* on to new packet (skip this one's body) */ ksocknal_new_packet (conn, body_len); @@ -1212,8 +1190,8 @@ ksocknal_fwd_parse (ksock_conn_t *conn) if (peer != NULL) { CERROR ("dropping packet from "LPX64" (%s) for "LPX64 "(%s): target is a peer\n", - src_nid, portals_nid2str(TCPNAL, src_nid, str), - dest_nid, portals_nid2str(TCPNAL, dest_nid, str2)); + src_nid, libcfs_nid2str(src_nid), + dest_nid, libcfs_nid2str(dest_nid)); ksocknal_put_peer (peer); /* drop ref from get above */ /* on to next packet (skip this one's body) */ @@ -1344,7 +1322,7 @@ ksocknal_process_receive (ksock_conn_t *conn) case SOCKNAL_RX_HEADER: if (conn->ksnc_hdr.type != cpu_to_le32(PTL_MSG_HELLO) && le64_to_cpu(conn->ksnc_hdr.dest_nid) != - ksocknal_lib.libnal_ni.ni_pid.nid) { + ksocknal_data.ksnd_ni->ni_nid) { /* This packet isn't for me */ ksocknal_fwd_parse (conn); switch (conn->ksnc_rx_state) { @@ -1361,7 +1339,7 @@ ksocknal_process_receive (ksock_conn_t *conn) } /* sets wanted_len, iovs etc */ - rc = lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn); + rc = ptl_parse(ksocknal_data.ksnd_ni, &conn->ksnc_hdr, conn); if (rc != PTL_OK) { /* I just received garbage: give up on this conn */ @@ -1377,7 +1355,7 @@ ksocknal_process_receive (ksock_conn_t *conn) case SOCKNAL_RX_BODY: /* payload all received */ - lib_finalize(&ksocknal_lib, NULL, conn->ksnc_cookie, PTL_OK); + ptl_finalize(ksocknal_data.ksnd_ni, NULL, conn->ksnc_cookie, PTL_OK); /* Fall through */ case SOCKNAL_RX_SLOP: @@ -1414,7 +1392,7 @@ ksocknal_process_receive (ksock_conn_t *conn) } ptl_err_t -ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, +ksocknal_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) { @@ -1431,18 +1409,18 @@ ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, conn->ksnc_rx_kiov = NULL; conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; conn->ksnc_rx_niov = - lib_extract_iov(PTL_MD_MAX_IOV, conn->ksnc_rx_iov, + ptl_extract_iov(PTL_MD_MAX_IOV, conn->ksnc_rx_iov, niov, iov, offset, mlen); LASSERT (mlen == - lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); + ptl_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + + ptl_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); return (PTL_OK); } ptl_err_t -ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, +ksocknal_recv_pages (ptl_ni_t *ni, void *private, ptl_msg_t *msg, unsigned int niov, ptl_kiov_t *kiov, size_t offset, size_t mlen, size_t rlen) { @@ -1459,12 +1437,12 @@ ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, conn->ksnc_rx_iov = NULL; conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; conn->ksnc_rx_nkiov = - lib_extract_kiov(PTL_MD_MAX_IOV, conn->ksnc_rx_kiov, + ptl_extract_kiov(PTL_MD_MAX_IOV, conn->ksnc_rx_kiov, niov, kiov, offset, mlen); LASSERT (mlen == - lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); + ptl_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + + ptl_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); return (PTL_OK); } @@ -1748,7 +1726,7 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) hmv->version_major = cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); hmv->version_minor = cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - hdr.src_nid = cpu_to_le64 (ksocknal_lib.libnal_ni.ni_pid.nid); + hdr.src_nid = cpu_to_le64 (ksocknal_data.ksnd_ni->ni_nid); hdr.type = cpu_to_le32 (PTL_MSG_HELLO); hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs)); @@ -1807,7 +1785,6 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, int type; ptl_hdr_t hdr; ptl_magicversion_t *hmv; - char ipbuf[PTL_NALFMT_SIZE]; hmv = (ptl_magicversion_t *)&hdr.dest_nid; LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); @@ -1876,9 +1853,7 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, "configuration.\n", *nid, HIPQUAD(conn->ksnc_ipaddr), le64_to_cpu(hdr.src_nid), - portals_nid2str(SOCKNAL, - le64_to_cpu(hdr.src_nid), - ipbuf)); + libcfs_nid2str(le64_to_cpu(hdr.src_nid))); CERROR ("Connected to nid "LPX64"@%u.%u.%u.%u " "but expecting "LPX64"\n", @@ -2090,21 +2065,15 @@ ksocknal_autoconnect (ksock_route_t *route) write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); while (!list_empty (&zombies)) { - char ipbuf[PTL_NALFMT_SIZE]; - char ipbuf2[PTL_NALFMT_SIZE]; tx = list_entry (zombies.next, ksock_tx_t, tx_list); CERROR ("Deleting packet type %d len %d ("LPX64" %s->"LPX64" %s)\n", le32_to_cpu (tx->tx_hdr->type), le32_to_cpu (tx->tx_hdr->payload_length), le64_to_cpu (tx->tx_hdr->src_nid), - portals_nid2str(SOCKNAL, - le64_to_cpu(tx->tx_hdr->src_nid), - ipbuf), + libcfs_nid2str(le64_to_cpu(tx->tx_hdr->src_nid)), le64_to_cpu (tx->tx_hdr->dest_nid), - portals_nid2str(SOCKNAL, - le64_to_cpu(tx->tx_hdr->src_nid), - ipbuf2)); + libcfs_nid2str(le64_to_cpu(tx->tx_hdr->src_nid))); list_del (&tx->tx_list); /* complete now */ @@ -2415,12 +2384,3 @@ ksocknal_reaper (void *arg) ksocknal_thread_fini (); return (0); } - -lib_nal_t ksocknal_lib = { - libnal_data: &ksocknal_data, /* NAL private data */ - libnal_send: ksocknal_send, - libnal_send_pages: ksocknal_send_pages, - libnal_recv: ksocknal_recv, - libnal_recv_pages: ksocknal_recv_pages, - libnal_dist: ksocknal_dist -}; diff --git a/lnet/klnds/viblnd/viblnd.c b/lnet/klnds/viblnd/viblnd.c index 7995610e..69558c6 100644 --- a/lnet/klnds/viblnd/viblnd.c +++ b/lnet/klnds/viblnd/viblnd.c @@ -24,8 +24,17 @@ #include "vibnal.h" -nal_t kibnal_api; -ptl_handle_ni_t kibnal_ni; +ptl_nal_t kibnal_nal = { + .nal_name = "vib", + .nal_type = VIBNAL, + .nal_startup = kibnal_startup, + .nal_shutdown = kibnal_shutdown, + .nal_send = kibnal_send, + .nal_send_pages = kibnal_send_pages, + .nal_recv = kibnal_recv, + .nal_recv_pages = kibnal_recv_pages, +}; + kib_data_t kibnal_data; kib_tunables_t kibnal_tunables; @@ -211,7 +220,7 @@ kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid, msg->ibm_credits = credits; /* ibm_nob */ msg->ibm_cksum = 0; - msg->ibm_srcnid = kibnal_lib.libnal_ni.ni_pid.nid; + msg->ibm_srcnid = kibnal_data.kib_ni->ni_nid; msg->ibm_srcstamp = kibnal_data.kib_incarnation; msg->ibm_dstnid = dstnid; msg->ibm_dststamp = dststamp; @@ -416,22 +425,22 @@ kibnal_set_mynid(ptl_nid_t nid) { static cm_listen_data_t info; /* protected by kib_nid_mutex */ - lib_ni_t *ni = &kibnal_lib.libnal_ni; + ptl_ni_t *ni = kibnal_data.kib_ni; int rc; cm_return_t cmrc; CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", - nid, ni->ni_pid.nid); + nid, ni->ni_nid); down (&kibnal_data.kib_nid_mutex); - if (nid == ni->ni_pid.nid) { + if (nid == ni->ni_nid) { /* no change of NID */ up (&kibnal_data.kib_nid_mutex); return (0); } - CDEBUG(D_NET, "NID "LPX64"("LPX64")\n", ni->ni_pid.nid, nid); + CDEBUG(D_NET, "NID "LPX64"("LPX64")\n", ni->ni_nid, nid); if (kibnal_data.kib_listen_handle != NULL) { cmrc = cm_cancel(kibnal_data.kib_listen_handle); @@ -449,7 +458,7 @@ kibnal_set_mynid(ptl_nid_t nid) /* Change NID. NB queued passive connection requests (if any) will be * rejected with an incorrect destination NID */ - ni->ni_pid.nid = nid; + ni->ni_nid = nid; kibnal_data.kib_incarnation++; mb(); @@ -458,7 +467,7 @@ kibnal_set_mynid(ptl_nid_t nid) * new world. */ kibnal_del_peer (PTL_NID_ANY, 0); - if (ni->ni_pid.nid != PTL_NID_ANY) { /* got a new NID to install */ + if (ni->ni_nid != PTL_NID_ANY) { /* got a new NID to install */ kibnal_data.kib_listen_handle = cm_create_cep(cm_cep_transp_rc); if (kibnal_data.kib_listen_handle == NULL) { @@ -490,7 +499,7 @@ kibnal_set_mynid(ptl_nid_t nid) LASSERT (cmrc == cm_stat_success); kibnal_data.kib_listen_handle = NULL; failed_0: - ni->ni_pid.nid = PTL_NID_ANY; + ni->ni_nid = PTL_NID_ANY; kibnal_data.kib_incarnation++; mb(); kibnal_del_peer (PTL_NID_ANY, 0); @@ -1481,22 +1490,17 @@ kibnal_setup_tx_descs (void) } void -kibnal_api_shutdown (nal_t *nal) +kibnal_shutdown (ptl_ni_t *ni) { int i; vv_return_t vvrc; - if (nal->nal_refct != 0) { - /* This module got the first ref */ - PORTAL_MODULE_UNUSE; - return; - } - + LASSERT (ni == kibnal_data.kib_ni); + LASSERT (ni->ni_data == &kibnal_data); + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); - LASSERT(nal == &kibnal_api); - switch (kibnal_data.kib_init) { case IBNAL_INIT_ALL: @@ -1553,10 +1557,6 @@ kibnal_api_shutdown (nal_t *nal) CERROR ("Close HCA error: %d\n", vvrc); /* fall through */ - case IBNAL_INIT_LIB: - lib_fini(&kibnal_lib); - /* fall through */ - case IBNAL_INIT_DATA: LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0); LASSERT (kibnal_data.kib_peers != NULL); @@ -1606,31 +1606,28 @@ kibnal_api_shutdown (nal_t *nal) kibnal_data.kib_init = IBNAL_INIT_NOTHING; } -int -kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +kibnal_startup (ptl_ni_t *ni, char **interfaces) { struct timeval tv; - ptl_process_id_t process_id; int pkmem = atomic_read(&portal_kmemory); int rc; int i; vv_request_event_record_t req_er; vv_return_t vvrc; - LASSERT (nal == &kibnal_api); + LASSERT (ni->ni_nal == &kibnal_nal); - if (nal->nal_refct != 0) { - if (actual_limits != NULL) - *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits; - /* This module got the first ref */ - PORTAL_MODULE_USE; - return (PTL_OK); + /* Only 1 instance supported */ + if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { + CERROR ("Only 1 instance supported\n"); + return PTL_FAIL; } - LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING); memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */ + + kibnal_data.kib_ni = ni; + ni->ni_data = &kibnal_data; do_gettimeofday(&tv); kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; @@ -1676,20 +1673,6 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, kibnal_data.kib_init = IBNAL_INIT_DATA; /*****************************************************/ - process_id.pid = requested_pid; - process_id.nid = PTL_NID_ANY; - - rc = lib_init(&kibnal_lib, nal, process_id, - requested_limits, actual_limits); - if (rc != PTL_OK) { - CERROR("lib_init failed: error %d\n", rc); - goto failed; - } - - /* lib interface initialised */ - kibnal_data.kib_init = IBNAL_INIT_LIB; - /*****************************************************/ - for (i = 0; i < IBNAL_N_SCHED; i++) { rc = kibnal_thread_start (kibnal_scheduler, (void *)((long)i)); if (rc != 0) { @@ -1878,8 +1861,8 @@ kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid, return (PTL_OK); failed: - CDEBUG(D_NET, "kibnal_api_startup failed\n"); - kibnal_api_shutdown (&kibnal_api); + CDEBUG(D_NET, "kibnal_startup failed\n"); + kibnal_shutdown (ni); return (PTL_FAIL); } @@ -1890,9 +1873,7 @@ kibnal_module_fini (void) if (kibnal_tunables.kib_sysctl != NULL) unregister_sysctl_table (kibnal_tunables.kib_sysctl); #endif - PtlNIFini(kibnal_ni); - - ptl_unregister_nal(VIBNAL); + ptl_unregister_nal(&kibnal_nal); } int __init @@ -1914,24 +1895,14 @@ kibnal_module_init (void) /* the following must be sizeof(int) for proc_dointvec() */ CLASSERT (sizeof (kibnal_tunables.kib_io_timeout) == sizeof (int)); - kibnal_api.nal_ni_init = kibnal_api_startup; - kibnal_api.nal_ni_fini = kibnal_api_shutdown; - /* Initialise dynamic tunables to defaults once only */ kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT; - rc = ptl_register_nal(VIBNAL, &kibnal_api); + rc = ptl_register_nal(&kibnal_nal); if (rc != PTL_OK) { CERROR("Can't register IBNAL: %d\n", rc); return (-ENOMEM); /* or something... */ } - - /* Pure gateways want the NAL started up at module load time... */ - rc = PtlNIInit(VIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kibnal_ni); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - ptl_unregister_nal(VIBNAL); - return (-ENODEV); - } #ifdef CONFIG_SYSCTL /* Press on regardless even if registering sysctl doesn't work */ diff --git a/lnet/klnds/viblnd/viblnd.h b/lnet/klnds/viblnd/viblnd.h index 4383c98..eb9aef5 100644 --- a/lnet/klnds/viblnd/viblnd.h +++ b/lnet/klnds/viblnd/viblnd.h @@ -58,7 +58,6 @@ #include #include #include -#include /* CPU_{L,B}E #defines needed by Voltaire headers */ #include @@ -200,6 +199,7 @@ typedef struct __u64 kib_incarnation; /* which one am I */ int kib_shutdown; /* shut down? */ atomic_t kib_nthreads; /* # live threads */ + ptl_ni_t *kib_ni; /* _the_ nal instance */ __u64 kib_svc_id; /* service number I listen on */ vv_gid_t kib_port_gid; /* device/port GID */ @@ -300,7 +300,7 @@ typedef struct kib_tx /* transmit message */ int tx_status; /* completion status */ unsigned long tx_deadline; /* completion deadline */ __u64 tx_cookie; /* completion cookie */ - lib_msg_t *tx_libmsg[2]; /* lib msgs to finalize on completion */ + ptl_msg_t *tx_ptlmsg[2]; /* ptl msgs to finalize on completion */ #if IBNAL_WHOLE_MEM vv_l_key_t tx_lkey; /* local key for message buffer */ #else @@ -406,10 +406,32 @@ typedef struct kib_peer } kib_peer_t; -extern lib_nal_t kibnal_lib; extern kib_data_t kibnal_data; extern kib_tunables_t kibnal_tunables; +ptl_err_t kibnal_startup (ptl_ni_t *ni, char **interfaces); +void kibnal_shutdown (ptl_ni_t *ni); +ptl_err_t kibnal_send (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + size_t payload_offset, size_t payload_nob); +ptl_err_t kibnal_send_pages (ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, + ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_nob); +ptl_err_t kibnal_recv(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + struct iovec *iov, size_t offset, + size_t mlen, size_t rlen); +ptl_err_t kibnal_recv_pages(ptl_ni_t *ni, void *private, + ptl_msg_t *ptlmsg, unsigned int niov, + ptl_kiov_t *kiov, size_t offset, + size_t mlen, size_t rlen); + extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob); extern void kibnal_pack_msg(kib_msg_t *msg, int credits, ptl_nid_t dstnid, __u64 dststamp, __u64 seq); diff --git a/lnet/klnds/viblnd/viblnd_cb.c b/lnet/klnds/viblnd/viblnd_cb.c index 83ea106..91033b0 100644 --- a/lnet/klnds/viblnd/viblnd_cb.c +++ b/lnet/klnds/viblnd/viblnd_cb.c @@ -55,12 +55,12 @@ kibnal_tx_done (kib_tx_t *tx) } #endif for (i = 0; i < 2; i++) { - /* tx may have up to 2 libmsgs to finalise */ - if (tx->tx_libmsg[i] == NULL) + /* tx may have up to 2 ptlmsgs to finalise */ + if (tx->tx_ptlmsg[i] == NULL) continue; - lib_finalize (&kibnal_lib, NULL, tx->tx_libmsg[i], ptlrc); - tx->tx_libmsg[i] = NULL; + ptl_finalize (kibnal_data.kib_ni, NULL, tx->tx_ptlmsg[i], ptlrc); + tx->tx_ptlmsg[i] = NULL; } if (tx->tx_conn != NULL) { @@ -135,8 +135,8 @@ kibnal_get_idle_tx (int may_block) LASSERT (!tx->tx_waiting); LASSERT (tx->tx_status == 0); LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_libmsg[0] == NULL); - LASSERT (tx->tx_libmsg[1] == NULL); + LASSERT (tx->tx_ptlmsg[0] == NULL); + LASSERT (tx->tx_ptlmsg[1] == NULL); } spin_unlock(&kibnal_data.kib_tx_lock); @@ -279,11 +279,11 @@ kibnal_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) tx->tx_status = status; } else if (txtype == IBNAL_MSG_GET_REQ) { /* XXX layering violation: set REPLY data length */ - LASSERT (tx->tx_libmsg[1] != NULL); - LASSERT (tx->tx_libmsg[1]->ev.type == + LASSERT (tx->tx_ptlmsg[1] != NULL); + LASSERT (tx->tx_ptlmsg[1]->msg_ev.type == PTL_EVENT_REPLY_END); - tx->tx_libmsg[1]->ev.mlength = status; + tx->tx_ptlmsg[1]->msg_ev.mlength = status; } } @@ -350,12 +350,12 @@ kibnal_handle_rx (kib_rx_t *rx) break; case IBNAL_MSG_IMMEDIATE: - lib_parse(&kibnal_lib, &msg->ibm_u.immediate.ibim_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, rx); break; case IBNAL_MSG_PUT_REQ: rx->rx_responded = 0; - lib_parse(&kibnal_lib, &msg->ibm_u.putreq.ibprm_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.putreq.ibprm_hdr, rx); if (rx->rx_responded) break; @@ -418,7 +418,7 @@ kibnal_handle_rx (kib_rx_t *rx) case IBNAL_MSG_GET_REQ: rx->rx_responded = 0; - lib_parse(&kibnal_lib, &msg->ibm_u.get.ibgm_hdr, rx); + ptl_parse(kibnal_data.kib_ni, &msg->ibm_u.get.ibgm_hdr, rx); if (rx->rx_responded) /* I responded to the GET_REQ */ break; /* NB GET didn't match (I'd have responded even with no payload @@ -467,7 +467,7 @@ kibnal_rx_complete (kib_rx_t *rx, vv_comp_status_t vvrc, int nob, __u64 rxseq) if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dstnid != kibnal_lib.libnal_ni.ni_pid.nid || + msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || msg->ibm_dststamp != kibnal_data.kib_incarnation) { CERROR ("Stale rx from "LPX64"\n", conn->ibc_peer->ibp_nid); @@ -1304,24 +1304,10 @@ kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid) write_unlock_irqrestore(g_lock, flags); } -int -kibnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if kibnal_get_peer (nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if ( nal->libnal_ni.ni_pid.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - ptl_err_t -kibnal_sendmsg(lib_nal_t *nal, +kibnal_sendmsg(ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *ptlmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -1416,10 +1402,10 @@ kibnal_sendmsg(lib_nal_t *nal, nid, rc); } else if (rc == 0) { /* No RDMA: local completion may happen now! */ - lib_finalize (&kibnal_lib, NULL, libmsg, PTL_OK); + ptl_finalize (kibnal_data.kib_ni, NULL, ptlmsg, PTL_OK); } else { - /* RDMA: lib_finalize(libmsg) when it completes */ - tx->tx_libmsg[0] = libmsg; + /* RDMA: ptl_finalize(ptlmsg) when it completes */ + tx->tx_ptlmsg[0] = ptlmsg; } kibnal_queue_tx(tx, rx->rx_conn); @@ -1429,7 +1415,7 @@ kibnal_sendmsg(lib_nal_t *nal, case PTL_MSG_GET: /* will the REPLY message be small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[libmsg->md->length]); + nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[ptlmsg->msg_md->md_length]); if (nob <= IBNAL_MSG_SIZE) break; @@ -1440,18 +1426,18 @@ kibnal_sendmsg(lib_nal_t *nal, ibmsg->ibm_u.get.ibgm_hdr = *hdr; ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - if ((libmsg->md->options & PTL_MD_KIOV) == 0) + if ((ptlmsg->msg_md->md_options & PTL_MD_KIOV) == 0) rc = kibnal_setup_rd_iov(tx, &ibmsg->ibm_u.get.ibgm_rd, vv_acc_r_mem_write, - libmsg->md->md_niov, - libmsg->md->md_iov.iov, - 0, libmsg->md->length); + ptlmsg->msg_md->md_niov, + ptlmsg->msg_md->md_iov.iov, + 0, ptlmsg->msg_md->md_length); else rc = kibnal_setup_rd_kiov(tx, &ibmsg->ibm_u.get.ibgm_rd, vv_acc_r_mem_write, - libmsg->md->md_niov, - libmsg->md->md_iov.kiov, - 0, libmsg->md->length); + ptlmsg->msg_md->md_niov, + ptlmsg->msg_md->md_iov.kiov, + 0, ptlmsg->msg_md->md_length); if (rc != 0) { CERROR("Can't setup GET sink for "LPX64": %d\n", nid, rc); kibnal_tx_done(tx); @@ -1462,14 +1448,14 @@ kibnal_sendmsg(lib_nal_t *nal, nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]); kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob); - tx->tx_libmsg[1] = lib_create_reply_msg(&kibnal_lib, nid, libmsg); - if (tx->tx_libmsg[1] == NULL) { + tx->tx_ptlmsg[1] = ptl_create_reply_msg(kibnal_data.kib_ni, nid, ptlmsg); + if (tx->tx_ptlmsg[1] == NULL) { CERROR("Can't create reply for GET -> "LPX64"\n", nid); kibnal_tx_done(tx); return PTL_FAIL; } - tx->tx_libmsg[0] = libmsg; /* finalise libmsg[0,1] on completion */ + tx->tx_ptlmsg[0] = ptlmsg; /* finalise ptlmsg[0,1] on completion */ tx->tx_waiting = 1; /* waiting for GET_DONE */ kibnal_launch_tx(tx, nid); return PTL_OK; @@ -1506,7 +1492,7 @@ kibnal_sendmsg(lib_nal_t *nal, ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_REQ, sizeof(kib_putreq_msg_t)); - tx->tx_libmsg[0] = libmsg; /* finalise libmsg on completion */ + tx->tx_ptlmsg[0] = ptlmsg; /* finalise ptlmsg on completion */ tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ kibnal_launch_tx(tx, nid); return PTL_OK; @@ -1527,11 +1513,11 @@ kibnal_sendmsg(lib_nal_t *nal, if (payload_nob > 0) { if (payload_kiov != NULL) - lib_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload, + ptl_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload, payload_niov, payload_kiov, payload_offset, payload_nob); else - lib_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload, + ptl_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload, payload_niov, payload_iov, payload_offset, payload_nob); } @@ -1539,41 +1525,41 @@ kibnal_sendmsg(lib_nal_t *nal, nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]); kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, nob); - tx->tx_libmsg[0] = libmsg; /* finalise libmsg on completion */ + tx->tx_ptlmsg[0] = ptlmsg; /* finalise ptlmsg on completion */ kibnal_launch_tx(tx, nid); return PTL_OK; } ptl_err_t -kibnal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_offset, size_t payload_len) +kibnal_send (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_offset, size_t payload_len) { CDEBUG(D_NET, " pid = %d, nid="LPU64"\n", pid, nid); - return (kibnal_sendmsg(nal, private, cookie, + return (kibnal_sendmsg(ni, private, cookie, hdr, type, nid, pid, payload_niov, payload_iov, NULL, payload_offset, payload_len)); } ptl_err_t -kibnal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_kiov, - size_t payload_offset, size_t payload_len) +kibnal_send_pages (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_offset, size_t payload_len) { - return (kibnal_sendmsg(nal, private, cookie, + return (kibnal_sendmsg(ni, private, cookie, hdr, type, nid, pid, payload_niov, NULL, payload_kiov, payload_offset, payload_len)); } ptl_err_t -kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, - unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, - size_t offset, int mlen, int rlen) +kibnal_recvmsg (ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg, + unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, + size_t offset, int mlen, int rlen) { kib_rx_t *rx = private; kib_msg_t *rxmsg = rx->rx_msg; @@ -1603,15 +1589,15 @@ kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, } if (kiov != NULL) - lib_copy_buf2kiov(niov, kiov, offset, + ptl_copy_buf2kiov(niov, kiov, offset, rxmsg->ibm_u.immediate.ibim_payload, mlen); else - lib_copy_buf2iov(niov, iov, offset, + ptl_copy_buf2iov(niov, iov, offset, rxmsg->ibm_u.immediate.ibim_payload, mlen); - lib_finalize (nal, NULL, libmsg, PTL_OK); + ptl_finalize (ni, NULL, ptlmsg, PTL_OK); return (PTL_OK); case IBNAL_MSG_PUT_REQ: @@ -1619,7 +1605,7 @@ kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, * here, unless I set rx_responded! */ if (mlen == 0) { /* No payload to RDMA */ - lib_finalize(nal, NULL, libmsg, PTL_OK); + ptl_finalize(ni, NULL, ptlmsg, PTL_OK); return PTL_OK; } @@ -1655,7 +1641,7 @@ kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]); kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob); - tx->tx_libmsg[0] = libmsg; /* finalise libmsg on completion */ + tx->tx_ptlmsg[0] = ptlmsg; /* finalise ptlmsg on completion */ tx->tx_waiting = 1; /* waiting for PUT_DONE */ kibnal_queue_tx(tx, conn); @@ -1666,27 +1652,27 @@ kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg, case IBNAL_MSG_GET_REQ: /* We get called here just to discard any junk after the * GET hdr. */ - LASSERT (libmsg == NULL); - lib_finalize (nal, NULL, libmsg, PTL_OK); + LASSERT (ptlmsg == NULL); + ptl_finalize (ni, NULL, ptlmsg, PTL_OK); return (PTL_OK); } } ptl_err_t -kibnal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, struct iovec *iov, - size_t offset, size_t mlen, size_t rlen) +kibnal_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, + unsigned int niov, struct iovec *iov, + size_t offset, size_t mlen, size_t rlen) { - return (kibnal_recvmsg (nal, private, msg, niov, iov, NULL, + return (kibnal_recvmsg (ni, private, msg, niov, iov, NULL, offset, mlen, rlen)); } ptl_err_t -kibnal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, ptl_kiov_t *kiov, - size_t offset, size_t mlen, size_t rlen) +kibnal_recv_pages (ptl_ni_t *ni, void *private, ptl_msg_t *msg, + unsigned int niov, ptl_kiov_t *kiov, + size_t offset, size_t mlen, size_t rlen) { - return (kibnal_recvmsg (nal, private, msg, niov, NULL, kiov, + return (kibnal_recvmsg (ni, private, msg, niov, NULL, kiov, offset, mlen, rlen)); } @@ -2254,7 +2240,7 @@ kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq) goto reject; } - if (rxmsg.ibm_dstnid != kibnal_lib.libnal_ni.ni_pid.nid) { + if (rxmsg.ibm_dstnid != kibnal_data.kib_ni->ni_nid) { CERROR("Can't accept "LPX64": bad dst nid "LPX64"\n", rxmsg.ibm_srcnid, rxmsg.ibm_dstnid); goto reject; @@ -2571,7 +2557,7 @@ kibnal_check_connreply (kib_conn_t *conn) } read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - rc = (msg.ibm_dstnid != kibnal_lib.libnal_ni.ni_pid.nid || + rc = (msg.ibm_dstnid != kibnal_data.kib_ni->ni_nid || msg.ibm_dststamp != kibnal_data.kib_incarnation) ? -ESTALE : 0; read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); @@ -3295,13 +3281,3 @@ kibnal_scheduler(void *arg) kibnal_thread_fini(); return (0); } - - -lib_nal_t kibnal_lib = { - .libnal_data = &kibnal_data, /* NAL private data */ - .libnal_send = kibnal_send, - .libnal_send_pages = kibnal_send_pages, - .libnal_recv = kibnal_recv, - .libnal_recv_pages = kibnal_recv_pages, - .libnal_dist = kibnal_dist -}; diff --git a/lnet/libcfs/Makefile.in b/lnet/libcfs/Makefile.in index aaaad93..db09995 100644 --- a/lnet/libcfs/Makefile.in +++ b/lnet/libcfs/Makefile.in @@ -24,7 +24,7 @@ sources: endif -libcfs-all-objs := debug.o lwt.o module.o tracefile.o watchdog.o +libcfs-all-objs := debug.o nidstrings.o lwt.o module.o tracefile.o watchdog.o libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) diff --git a/lnet/libcfs/autoMakefile.am b/lnet/libcfs/autoMakefile.am index 5bc2c08..41711d0 100644 --- a/lnet/libcfs/autoMakefile.am +++ b/lnet/libcfs/autoMakefile.am @@ -18,7 +18,7 @@ endif if DARWIN macos_PROGRAMS := libcfs -nodist_libcfs_SOURCES := debug.c module.c tracefile.c \ +nodist_libcfs_SOURCES := debug.c module.c tracefile.c nidstrings.c \ darwin/darwin-debug.c darwin/darwin-fs.c darwin/darwin-mem.c \ darwin/darwin-module.c darwin/darwin-prim.c \ darwin/darwin-proc.c darwin/darwin-tracefile.c \ diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index b7fd218..5f02708 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -141,7 +141,7 @@ int portals_debug_init(unsigned long bufsize) { cfs_waitq_init(&debug_ctlwq); #ifdef CRAY_PORTALS - lus_portals_debug = &portals_debug_msg; + lus_portals_debug = &libcfs_debug_msg; #endif #ifdef PORTALS_DUMP_ON_PANIC /* This is currently disabled because it spews far too much to the @@ -191,61 +191,5 @@ void portals_debug_set_level(unsigned int debug_level) portal_debug = debug_level; } -char *portals_nid2str(int nal, ptl_nid_t nid, char *str) -{ - if (nid == PTL_NID_ANY) { - snprintf(str, PTL_NALFMT_SIZE, "%s", "PTL_NID_ANY"); - return str; - } - - switch(nal){ -/* XXX this could be a nal method of some sort, 'cept it's config - * dependent whether (say) socknal NIDs are actually IP addresses... */ -#if !CRAY_PORTALS - case TCPNAL: - /* userspace NAL */ - case IIBNAL: - case VIBNAL: - case OPENIBNAL: - case RANAL: - case SOCKNAL: { - /* HIPQUAD requires __u32, but we can't cast in it */ - __u32 nid32 = (__u32)nid; - if ((__u32)(nid >> 32)) { - snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u", - (__u32)(nid >> 32), HIPQUAD(nid32)); - } else { - snprintf(str, PTL_NALFMT_SIZE, "%u.%u.%u.%u", - HIPQUAD(nid32)); - } - break; - } - case QSWNAL: - case GMNAL: - case LONAL: - snprintf(str, PTL_NALFMT_SIZE, "%u:%u", - (__u32)(nid >> 32), (__u32)nid); - break; -#endif - default: - snprintf(str, PTL_NALFMT_SIZE, "?%x? %llx", - nal, (long long)nid); - break; - } - return str; -} - -char *portals_id2str(int nal, ptl_process_id_t id, char *str) -{ - int len; - - portals_nid2str(nal, id.nid, str); - len = strlen(str); - snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); - return str; -} - EXPORT_SYMBOL(portals_debug_dumplog); EXPORT_SYMBOL(portals_debug_set_level); -EXPORT_SYMBOL(portals_nid2str); -EXPORT_SYMBOL(portals_id2str); diff --git a/lnet/libcfs/linux/linux-proc.c b/lnet/libcfs/linux/linux-proc.c index 70f4059..9d43be3 100644 --- a/lnet/libcfs/linux/linux-proc.c +++ b/lnet/libcfs/linux/linux-proc.c @@ -105,16 +105,7 @@ static struct ctl_table top_table[2] = { #define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, } struct prof_ent prof_ents[] = { - def_prof(our_recvmsg), - def_prof(our_sendmsg), - def_prof(socknal_recv), - def_prof(lib_parse), - def_prof(conn_list_walk), - def_prof(memcpy), - def_prof(lib_finalize), - def_prof(pingcli_time), - def_prof(gmnal_send), - def_prof(gmnal_recv), + def_prof(placeholder), }; EXPORT_SYMBOL(prof_ents); diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index 84c124c..c6b63f8 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -37,16 +37,6 @@ struct nal_cmd_handler { static struct nal_cmd_handler nal_cmd[16]; struct semaphore nal_cmd_mutex; -#ifdef PORTAL_DEBUG -void kportal_assertion_failed(char *expr, char *file, const char *func, - const int line) -{ - portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK, - "ASSERTION(%s) failed\n", expr); - LBUG_WITH_LOC(file, func, line); -} -#endif - void kportal_memhog_free (struct portals_device_userstate *pdu) { @@ -482,6 +472,7 @@ extern cfs_psdev_t libcfs_dev; extern struct rw_semaphore tracefile_sem; extern struct semaphore trace_thread_sem; +extern void libcfs_init_nidstrings(void); extern int libcfs_arch_init(void); extern void libcfs_arch_cleanup(void); @@ -490,6 +481,7 @@ static int init_libcfs_module(void) int rc; libcfs_arch_init(); + libcfs_init_nidstrings(); init_rwsem(&tracefile_sem); init_mutex(&trace_thread_sem); init_mutex(&nal_cmd_mutex); @@ -562,6 +554,4 @@ static void exit_libcfs_module(void) libcfs_arch_cleanup(); } -EXPORT_SYMBOL(kportal_assertion_failed); - cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module); diff --git a/lnet/libcfs/nidstrings.c b/lnet/libcfs/nidstrings.c new file mode 100644 index 0000000..6562f86 --- /dev/null +++ b/lnet/libcfs/nidstrings.c @@ -0,0 +1,107 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +# define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include + +/* CAVEAT EMPTOR! Racey temporary buffer allocation! + * Choose the number of nidstrings to support the MAXIMUM expected number of + * concurrent users. If there are more, the returned string will be volatile. + * NB this number must allow for a process to be descheduled for a timeslice + * between getting its string and using it. + */ + +static char libcfs_nidstrings[128][PTL_NALFMT_SIZE]; +static int libcfs_nidstring_idx; +static spinlock_t libcfs_nidstring_lock; + +void +libcfs_init_nidstrings (void) +{ + spin_lock_init(&libcfs_nidstring_lock); +} + +static char * +libcfs_next_nidstring (void) +{ + unsigned long flags; + char *str; + + spin_lock_irqsave(&libcfs_nidstring_lock, flags); + + str = libcfs_nidstrings[libcfs_nidstring_idx++]; + if (libcfs_nidstring_idx == + sizeof(libcfs_nidstrings)/sizeof(libcfs_nidstrings[0])) + libcfs_nidstring_idx = 0; + + spin_unlock_irqrestore(&libcfs_nidstring_lock, flags); + + return str; +} + +char *libcfs_nid2str(ptl_nid_t nid) +{ + __u32 hi = (__u32)(nid>>32); + __u32 lo = (__u32)nid; + char *str = libcfs_next_nidstring(); + + if (nid == PTL_NID_ANY) { + snprintf(str, PTL_NALFMT_SIZE, "%s", "PTL_NID_ANY"); + return str; + } + +#if !CRAY_PORTALS + if ((lo & 0xffff) != 0) { + /* probable IP address */ + if (hi != 0) + snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u", + hi, HIPQUAD(lo)); + else + snprintf(str, PTL_NALFMT_SIZE, "%u.%u.%u.%u", + HIPQUAD(lo)); + } else if (hi != 0) + snprintf(str, PTL_NALFMT_SIZE, "%u:%u", hi, lo); + else + snprintf(str, PTL_NALFMT_SIZE, "%u", lo); +#else + snprintf(str, PTL_NALFMT_SIZE, "%llx", (long long)nid); +#endif + return str; +} + +char *libcfs_id2str(ptl_process_id_t id) +{ + char *str = libcfs_nid2str(id.nid); + int len = strlen(str); + + snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); + return str; +} + +EXPORT_SYMBOL(libcfs_nid2str); +EXPORT_SYMBOL(libcfs_id2str); diff --git a/lnet/libcfs/tracefile.c b/lnet/libcfs/tracefile.c index a1dab74..ecb8ea3 100644 --- a/lnet/libcfs/tracefile.c +++ b/lnet/libcfs/tracefile.c @@ -163,8 +163,8 @@ static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd, return tage; } -void portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, char *format, ...) +void libcfs_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, char *format, ...) { struct trace_cpu_data *tcd; struct ptldebug_header header; @@ -231,7 +231,7 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, tage->used += needed; if (tage->used > CFS_PAGE_SIZE) printk(KERN_EMERG - "tage->used == %u in portals_debug_msg\n", tage->used); + "tage->used == %u in libcfs_debug_msg\n", tage->used); out: if ((mask & (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE)) || portal_printk) @@ -239,7 +239,17 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, trace_put_tcd(tcd, flags); } -EXPORT_SYMBOL(portals_debug_msg); +EXPORT_SYMBOL(libcfs_debug_msg); + +void +libcfs_assertion_failed(char *expr, char *file, + const char *func, const int line) +{ + libcfs_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK, + "ASSERTION(%s) failed\n", expr); + LBUG(); +} +EXPORT_SYMBOL(libcfs_assertion_failed); static void collect_pages_on_cpu(void *info) { diff --git a/lnet/lnet/Makefile.in b/lnet/lnet/Makefile.in index c0f2e71..0fed502 100644 --- a/lnet/lnet/Makefile.in +++ b/lnet/lnet/Makefile.in @@ -1,6 +1,6 @@ MODULES := portals -portals-objs := api-errno.o api-ni.o api-wrap.o -portals-objs += lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o -portals-objs += lib-move.o lib-ni.o lib-pid.o module.o +portals-objs := api-errno.o api-ni.o +portals-objs += lib-me.o lib-msg.o lib-eq.o lib-md.o +portals-objs += lib-move.o module.o @INCLUDE_RULES@ diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 91a307a..364add2 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -20,19 +20,107 @@ */ #define DEBUG_SUBSYSTEM S_PORTALS -#include +#include -int ptl_init; +int ptl_init; /* PtlInit() flag */ +struct list_head ptl_nal_table; /* registered NALs */ +ptl_apini_t ptl_apini; /* THE network interface (at the API) */ -/* Put some magic in the NI handle so uninitialised/zeroed handles are easy - * to spot */ -#define NI_HANDLE_MAGIC 0xebc0de00 -#define NI_HANDLE_MASK 0x000000ff - -static struct nal_t *ptl_nal_table[NAL_MAX_NR + 1]; +void ptl_assert_wire_constants (void) +{ + /* Wire protocol assertions generated by 'wirecheck' + * running on Linux mdevi 2.4.21-p4smp-55chaos #1 SMP Tue Jun 8 14:38:44 PDT 2004 i686 i686 i + * with gcc version 3.2.3 20030502 (Red Hat Linux 3.2.3-34) */ + + + /* Constants... */ + CLASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); + CLASSERT (PORTALS_PROTO_VERSION_MAJOR == 1); + CLASSERT (PORTALS_PROTO_VERSION_MINOR == 0); + CLASSERT (PTL_MSG_ACK == 0); + CLASSERT (PTL_MSG_PUT == 1); + CLASSERT (PTL_MSG_GET == 2); + CLASSERT (PTL_MSG_REPLY == 3); + CLASSERT (PTL_MSG_HELLO == 4); + + /* Checks for struct ptl_handle_wire_t */ + CLASSERT ((int)sizeof(ptl_handle_wire_t) == 16); + CLASSERT ((int)offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0); + CLASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); + CLASSERT ((int)offsetof(ptl_handle_wire_t, wh_object_cookie) == 8); + CLASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); + + /* Checks for struct ptl_magicversion_t */ + CLASSERT ((int)sizeof(ptl_magicversion_t) == 8); + CLASSERT ((int)offsetof(ptl_magicversion_t, magic) == 0); + CLASSERT ((int)sizeof(((ptl_magicversion_t *)0)->magic) == 4); + CLASSERT ((int)offsetof(ptl_magicversion_t, version_major) == 4); + CLASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_major) == 2); + CLASSERT ((int)offsetof(ptl_magicversion_t, version_minor) == 6); + CLASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_minor) == 2); + + /* Checks for struct ptl_hdr_t */ + CLASSERT ((int)sizeof(ptl_hdr_t) == 72); + CLASSERT ((int)offsetof(ptl_hdr_t, dest_nid) == 0); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_nid) == 8); + CLASSERT ((int)offsetof(ptl_hdr_t, src_nid) == 8); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_nid) == 8); + CLASSERT ((int)offsetof(ptl_hdr_t, dest_pid) == 16); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_pid) == 4); + CLASSERT ((int)offsetof(ptl_hdr_t, src_pid) == 20); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_pid) == 4); + CLASSERT ((int)offsetof(ptl_hdr_t, type) == 24); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->type) == 4); + CLASSERT ((int)offsetof(ptl_hdr_t, payload_length) == 28); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->payload_length) == 4); + CLASSERT ((int)offsetof(ptl_hdr_t, msg) == 32); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg) == 40); + + /* Ack */ + CLASSERT ((int)offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.ack.match_bits) == 48); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.ack.mlength) == 56); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.mlength) == 4); + + /* Put */ + CLASSERT ((int)offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.put.match_bits) == 48); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.match_bits) == 8); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.put.hdr_data) == 56); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.put.ptl_index) == 64); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.put.offset) == 68); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.offset) == 4); + + /* Get */ + CLASSERT ((int)offsetof(ptl_hdr_t, msg.get.return_wmd) == 32); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.get.match_bits) == 48); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.match_bits) == 8); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.get.ptl_index) == 56); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.get.src_offset) == 60); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.src_offset) == 4); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.get.sink_length) == 64); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.sink_length) == 4); + + /* Reply */ + CLASSERT ((int)offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); + + /* Hello */ + CLASSERT ((int)offsetof(ptl_hdr_t, msg.hello.incarnation) == 32); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.incarnation) == 8); + CLASSERT ((int)offsetof(ptl_hdr_t, msg.hello.type) == 40); + CLASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.type) == 4); +} #ifdef __KERNEL__ -struct semaphore ptl_mutex; +struct semaphore ptl_mutex ; static void ptl_mutex_enter (void) { @@ -53,177 +141,584 @@ static void ptl_mutex_exit (void) } #endif -nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) + +ptl_nal_t * +ptl_find_nal_by_name (char *name) { - unsigned int idx = handle->nal_idx; + ptl_nal_t *nal; + struct list_head *tmp; - /* XXX we really rely on the caller NOT racing with interface - * setup/teardown. That ensures her NI handle can't get - * invalidated out from under her (or worse, swapped for a - * completely different interface!) */ + /* holding mutex */ + list_for_each (tmp, &ptl_nal_table) { + nal = list_entry(tmp, ptl_nal_t, nal_list); - LASSERT (ptl_init); + if (!strcmp (nal->nal_name, name)) + return nal; + } + + return NULL; +} - if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0) - return NULL; - idx &= NI_HANDLE_MASK; - - if (idx > NAL_MAX_NR || - ptl_nal_table[idx] == NULL || - ptl_nal_table[idx]->nal_refct == 0) - return NULL; +ptl_nal_t * +ptl_find_nal_by_type (int type) +{ + ptl_nal_t *nal; + struct list_head *tmp; + + /* holding mutex */ + list_for_each (tmp, &ptl_nal_table) { + nal = list_entry(tmp, ptl_nal_t, nal_list); - return ptl_nal_table[idx]; + if (nal->nal_type == type) + return nal; + } + + return NULL; } -int ptl_register_nal (ptl_interface_t interface, nal_t *nal) + +int +ptl_register_nal (ptl_nal_t *nal) { int rc; ptl_mutex_enter(); - - if (interface < 0 || interface > NAL_MAX_NR) - rc = PTL_IFACE_INVALID; - else if (ptl_nal_table[interface] != NULL) + + LASSERT (ptl_init); + + if (ptl_find_nal_by_name(nal->nal_name) != NULL || + ptl_find_nal_by_type(nal->nal_type) != NULL) { rc = PTL_IFACE_DUP; - else { + } else { + list_add (&nal->nal_list, &ptl_nal_table); + + nal->nal_refcount = 0; rc = PTL_OK; - ptl_nal_table[interface] = nal; - LASSERT(nal->nal_refct == 0); } ptl_mutex_exit(); return (rc); } -void ptl_unregister_nal (ptl_interface_t interface) +void +ptl_unregister_nal (ptl_nal_t *nal) { - LASSERT(interface >= 0 && interface <= NAL_MAX_NR); - LASSERT(ptl_nal_table[interface] != NULL); - LASSERT(ptl_nal_table[interface]->nal_refct == 0); - ptl_mutex_enter(); + + LASSERT (ptl_init); + LASSERT (ptl_find_nal_by_type(nal->nal_type) == nal); + LASSERT (ptl_find_nal_by_name(nal->nal_name) == nal); + LASSERT (nal->nal_refcount == 0); - ptl_nal_table[interface] = NULL; + list_del (&nal->nal_list); ptl_mutex_exit(); } -int PtlInit(int *max_interfaces) +#ifndef PTL_USE_LIB_FREELIST +int +ptl_descriptor_setup (ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO")); + /* Ignore requested limits! */ + actual_limits->max_mes = INT_MAX; + actual_limits->max_mds = INT_MAX; + actual_limits->max_eqs = INT_MAX; - /* If this assertion fails, we need more bits in NI_HANDLE_MASK and - * to shift NI_HANDLE_MAGIC left appropriately */ - LASSERT (NAL_MAX_NR < (NI_HANDLE_MASK + 1)); - - if (max_interfaces != NULL) - *max_interfaces = NAL_MAX_NR + 1; + return PTL_OK; +} - ptl_mutex_enter(); +void +ptl_descriptor_cleanup (void) +{ +} + +#else + +int +ptl_freelist_init (ptl_freelist_t *fl, int n, int size) +{ + char *space; + + LASSERT (n > 0); + + size += offsetof (ptl_freeobj_t, fo_contents); + + PORTAL_ALLOC(space, n * size); + if (space == NULL) + return (PTL_NO_SPACE); + + CFS_INIT_LIST_HEAD (&fl->fl_list); + fl->fl_objs = space; + fl->fl_nobjs = n; + fl->fl_objsize = size; + + do + { + memset (space, 0, size); + list_add ((struct list_head *)space, &fl->fl_list); + space += size; + } while (--n != 0); + + return (PTL_OK); +} + +void +ptl_freelist_fini (ptl_freelist_t *fl) +{ + struct list_head *el; + int count; - if (!ptl_init) { - /* NULL pointers, clear flags */ - memset(ptl_nal_table, 0, sizeof(ptl_nal_table)); + if (fl->fl_nobjs == 0) + return; + + count = 0; + for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next) + count++; + + LASSERT (count == fl->fl_nobjs); + + PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); + memset (fl, 0, sizeof (fl)); +} + +int +ptl_descriptor_setup (ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) +{ + /* NB on failure caller must still call ptl_descriptor_cleanup */ + /* ****** */ + int rc; + + memset (&ptl_apini.apini_free_mes, 0, sizeof (ptl_apini.apini_free_mes)); + memset (&ptl_apini.apini_free_msgs, 0, sizeof (ptl_apini.apini_free_msgs)); + memset (&ptl_apini.apini_free_mds, 0, sizeof (ptl_apini.apini_free_mds)); + memset (&ptl_apini.apini_free_eqs, 0, sizeof (ptl_apini.apini_free_eqs)); + + /* Ignore requested limits! */ + actual_limits->max_mes = MAX_MES; + actual_limits->max_mds = MAX_MDS; + actual_limits->max_eqs = MAX_EQS; + /* Hahahah what a load of bollocks. There's nowhere to + * specify the max # messages in-flight */ + + rc = ptl_freelist_init (&ptl_apini.apini_free_mes, + MAX_MES, sizeof (ptl_me_t)); + if (rc != PTL_OK) + return (rc); + + rc = ptl_freelist_init (&ptl_apini.apini_free_msgs, + MAX_MSGS, sizeof (ptl_msg_t)); + if (rc != PTL_OK) + return (rc); + + rc = ptl_freelist_init (&ptl_apini.apini_free_mds, + MAX_MDS, sizeof (ptl_libmd_t)); + if (rc != PTL_OK) + return (rc); + + rc = ptl_freelist_init (&ptl_apini.apini_free_eqs, + MAX_EQS, sizeof (ptl_eq_t)); + return (rc); +} + +void +ptl_descriptor_cleanup (void) +{ + ptl_freelist_fini (&ptl_apini.apini_free_mes); + ptl_freelist_fini (&ptl_apini.apini_free_msgs); + ptl_freelist_fini (&ptl_apini.apini_free_mds); + ptl_freelist_fini (&ptl_apini.apini_free_eqs); +} + +#endif + +__u64 +ptl_create_interface_cookie (void) +{ + /* NB the interface cookie in wire handles guards against delayed + * replies and ACKs appearing valid after reboot. Initialisation time, + * even if it's only implemented to millisecond resolution is probably + * easily good enough. */ + struct timeval tv; + __u64 cookie; #ifndef __KERNEL__ - /* Kernel NALs register themselves when their module loads, - * and unregister themselves when their module is unloaded. - * Userspace NALs, are plugged in explicitly here... */ - { - extern nal_t procapi_nal; - - /* XXX pretend it's socknal to keep liblustre happy... */ - ptl_nal_table[SOCKNAL] = &procapi_nal; - LASSERT (procapi_nal.nal_refct == 0); - } + int rc = gettimeofday (&tv, NULL); + LASSERT (rc == 0); +#else + do_gettimeofday(&tv); +#endif + cookie = tv.tv_sec; + cookie *= 1000000; + cookie += tv.tv_usec; + return cookie; +} + +int +ptl_setup_handle_hash (void) +{ + int i; + + /* Arbitrary choice of hash table size */ +#ifdef __KERNEL__ + ptl_apini.apini_lh_hash_size = PAGE_SIZE / sizeof (struct list_head); +#else + ptl_apini.apini_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4; #endif - ptl_init = 1; + PORTAL_ALLOC(ptl_apini.apini_lh_hash_table, + ptl_apini.apini_lh_hash_size * sizeof (struct list_head)); + if (ptl_apini.apini_lh_hash_table == NULL) + return (PTL_NO_SPACE); + + for (i = 0; i < ptl_apini.apini_lh_hash_size; i++) + CFS_INIT_LIST_HEAD (&ptl_apini.apini_lh_hash_table[i]); + + ptl_apini.apini_next_object_cookie = PTL_COOKIE_TYPES; + + return (PTL_OK); +} + +void +ptl_cleanup_handle_hash (void) +{ + if (ptl_apini.apini_lh_hash_table == NULL) + return; + + PORTAL_FREE(ptl_apini.apini_lh_hash_table, + ptl_apini.apini_lh_hash_size * sizeof (struct list_head)); +} + +ptl_libhandle_t * +ptl_lookup_cookie (__u64 cookie, int type) +{ + /* ALWAYS called with PTL_LOCK held */ + struct list_head *list; + struct list_head *el; + unsigned int hash; + + if ((cookie & (PTL_COOKIE_TYPES - 1)) != type) + return (NULL); + + hash = ((unsigned int)cookie) % ptl_apini.apini_lh_hash_size; + list = &ptl_apini.apini_lh_hash_table[hash]; + + list_for_each (el, list) { + ptl_libhandle_t *lh = list_entry (el, ptl_libhandle_t, + lh_hash_chain); + + if (lh->lh_cookie == cookie) + return (lh); } + + return (NULL); +} - ptl_mutex_exit(); +void +ptl_initialise_handle (ptl_libhandle_t *lh, int type) +{ + /* ALWAYS called with PTL_LOCK held */ + unsigned int hash; + + LASSERT (type >= 0 && type < PTL_COOKIE_TYPES); + lh->lh_cookie = ptl_apini.apini_next_object_cookie | type; + ptl_apini.apini_next_object_cookie += PTL_COOKIE_TYPES; - return PTL_OK; + hash = ((unsigned int)lh->lh_cookie) % ptl_apini.apini_lh_hash_size; + list_add (&lh->lh_hash_chain, &ptl_apini.apini_lh_hash_table[hash]); } -void PtlFini(void) +void +ptl_invalidate_handle (ptl_libhandle_t *lh) { - nal_t *nal; - int i; + /* ALWAYS called with PTL_LOCK held */ + list_del (&lh->lh_hash_chain); +} - ptl_mutex_enter(); +int +ptl_startup_apini(ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) +{ + int rc = PTL_OK; + int ptl_size; + int i; + ENTRY; - if (ptl_init) { - for (i = 0; i <= NAL_MAX_NR; i++) { - - nal = ptl_nal_table[i]; - if (nal == NULL) - continue; - - if (nal->nal_refct != 0) { - CWARN("NAL %x has outstanding refcount %d\n", - i, nal->nal_refct); - nal->nal_ni_fini(nal); - } - - ptl_nal_table[i] = NULL; - } + LASSERT (ptl_apini.apini_refcount == 0); - ptl_init = 0; + ptl_apini.apini_pid = requested_pid; + + rc = ptl_descriptor_setup (requested_limits, + &ptl_apini.apini_actual_limits); + if (rc != PTL_OK) + goto out; + + memset(&ptl_apini.apini_counters, 0, + sizeof(ptl_apini.apini_counters)); + + CFS_INIT_LIST_HEAD (&ptl_apini.apini_active_msgs); + CFS_INIT_LIST_HEAD (&ptl_apini.apini_active_mds); + CFS_INIT_LIST_HEAD (&ptl_apini.apini_active_eqs); + CFS_INIT_LIST_HEAD (&ptl_apini.apini_test_peers); + CFS_INIT_LIST_HEAD (&ptl_apini.apini_nis); + +#ifdef __KERNEL__ + spin_lock_init (&ptl_apini.apini_lock); + cfs_waitq_init (&ptl_apini.apini_waitq); +#else + pthread_mutex_init(&ptl_apini.apini_mutex, NULL); + pthread_cond_init(&ptl_apini.apini_cond, NULL); +#endif + + ptl_apini.apini_interface_cookie = ptl_create_interface_cookie(); + + rc = ptl_setup_handle_hash (); + if (rc != PTL_OK) + goto out; + + if (requested_limits != NULL) + ptl_size = requested_limits->max_pt_index + 1; + else + ptl_size = 64; + + ptl_apini.apini_nportals = ptl_size; + PORTAL_ALLOC(ptl_apini.apini_portals, + ptl_size * sizeof(*ptl_apini.apini_portals)); + if (ptl_apini.apini_portals == NULL) { + rc = PTL_NO_SPACE; + goto out; } + + for (i = 0; i < ptl_size; i++) + CFS_INIT_LIST_HEAD(&(ptl_apini.apini_portals[i])); + + /* max_{mes,mds,eqs} set in ptl_descriptor_setup */ + + /* We don't have an access control table! */ + ptl_apini.apini_actual_limits.max_ac_index = -1; + + ptl_apini.apini_actual_limits.max_pt_index = ptl_size - 1; + ptl_apini.apini_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV; + ptl_apini.apini_actual_limits.max_me_list = INT_MAX; + + /* We don't support PtlGetPut! */ + ptl_apini.apini_actual_limits.max_getput_md = 0; + + if (actual_limits != NULL) + *actual_limits = ptl_apini.apini_actual_limits; + out: + if (rc != PTL_OK) { + ptl_cleanup_handle_hash (); + ptl_descriptor_cleanup (); + } + + RETURN (rc); +} + +int +ptl_shutdown_apini (void) +{ + int idx; - ptl_mutex_exit(); + /* NB no PTL_LOCK since this is the last reference. All NAL instances + * have shut down already, so it is safe to unlink and free all + * descriptors, even those that appear committed to a network op (eg MD + * with non-zero pending count) */ + + LASSERT (ptl_apini.apini_refcount == 0); + LASSERT (list_empty(&ptl_apini.apini_nis)); + + for (idx = 0; idx < ptl_apini.apini_nportals; idx++) + while (!list_empty (&ptl_apini.apini_portals[idx])) { + ptl_me_t *me = list_entry (ptl_apini.apini_portals[idx].next, + ptl_me_t, me_list); + + CERROR ("Active me %p on exit\n", me); + list_del (&me->me_list); + ptl_me_free (me); + } + + while (!list_empty (&ptl_apini.apini_active_mds)) { + ptl_libmd_t *md = list_entry (ptl_apini.apini_active_mds.next, + ptl_libmd_t, md_list); + + CERROR ("Active md %p on exit\n", md); + list_del (&md->md_list); + ptl_md_free (md); + } + + while (!list_empty (&ptl_apini.apini_active_eqs)) { + ptl_eq_t *eq = list_entry (ptl_apini.apini_active_eqs.next, + ptl_eq_t, eq_list); + + CERROR ("Active eq %p on exit\n", eq); + list_del (&eq->eq_list); + ptl_eq_free (eq); + } + + while (!list_empty (&ptl_apini.apini_active_msgs)) { + ptl_msg_t *msg = list_entry (ptl_apini.apini_active_msgs.next, + ptl_msg_t, msg_list); + + CERROR ("Active msg %p on exit\n", msg); + list_del (&msg->msg_list); + ptl_msg_free (msg); + } + + PORTAL_FREE(ptl_apini.apini_portals, + ptl_apini.apini_nportals * sizeof(*ptl_apini.apini_portals)); + + ptl_cleanup_handle_hash (); + ptl_descriptor_cleanup (); + +#ifndef __KERNEL__ + pthread_mutex_destroy(&ptl_apini.apini_mutex); + pthread_cond_destroy(&ptl_apini.apini_cond); +#endif + + return (PTL_OK); } -int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, - ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, - ptl_handle_ni_t *handle) +void +ptl_shutdown_nalnis (void) { - nal_t *nal; - int i; - int rc; + ptl_ni_t *ni; + struct list_head *tmp; + struct list_head *nxt; - if (!ptl_init) - return PTL_NO_INIT; + /* All quiet on the API front */ + LASSERT (ptl_apini.apini_refcount == 0); + + list_for_each_safe (tmp, nxt, &ptl_apini.apini_nis) { + ni = list_entry(tmp, ptl_ni_t, ni_list); - ptl_mutex_enter (); + (ni->ni_nal->nal_shutdown)(ni); + ni->ni_nal->nal_refcount--; + list_del (&ni->ni_list); - if (interface == PTL_IFACE_DEFAULT) { - for (i = 0; i <= NAL_MAX_NR; i++) - if (ptl_nal_table[i] != NULL) { - interface = i; - break; - } - /* NB if no interfaces are registered, 'interface' will - * fail the valid test below */ + PORTAL_FREE(ni, sizeof(*ni)); } +} + +ptl_err_t +ptl_startup_nalnis (void) +{ + ptl_nal_t *nal; + ptl_ni_t *ni; + struct list_head *tmp; + ptl_err_t rc = PTL_OK; + char *interface = NULL; - if (interface < 0 || - interface > NAL_MAX_NR || - ptl_nal_table[interface] == NULL) { - GOTO(out, rc = PTL_IFACE_INVALID); + list_for_each (tmp, &ptl_nal_table) { + nal = list_entry(tmp, ptl_nal_t, nal_list); + + PORTAL_ALLOC(ni, sizeof(*ni)); + if (ni == NULL) { + CERROR("Can't allocate NI for %s\n", + nal->nal_name); + rc = PTL_FAIL; + break; + } + + ni->ni_nal = nal; + nal->nal_refcount++; + + rc = (nal->nal_startup)(ni, &interface); + if (rc != PTL_OK) { + CERROR("Error %d staring up NI %s\n", + rc, nal->nal_name); + PORTAL_FREE(ni, sizeof(*ni)); + nal->nal_refcount--; + break; + } + + list_add(&ni->ni_list, &ptl_apini.apini_nis); } + + if (rc != PTL_OK) + ptl_shutdown_nalnis(); + + return rc; +} + +ptl_err_t +PtlInit(int *max_interfaces) +{ + LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO")); + ptl_assert_wire_constants (); + + ptl_mutex_enter(); - nal = ptl_nal_table[interface]; - nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface; - nal->nal_handle.cookie = 0; + LASSERT (!ptl_init); - CDEBUG(D_OTHER, "Starting up NAL (%x) refs %d\n", interface, nal->nal_refct); - rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits); + CFS_INIT_LIST_HEAD(&ptl_nal_table); + ptl_apini.apini_refcount = 0; - if (rc != PTL_OK) { - CERROR("Error %d starting up NAL %x, refs %d\n", rc, - interface, nal->nal_refct); - GOTO(out, rc); +#ifdef __KERNEL__ + /* process */ + /* Kernel NALs register themselves when their module loads, and + * unregister themselves when their module is unloaded. Userspace NALs + * are plugged in explicitly here... */ + { + extern ptl_nal_t tcpnal_nal; + + ptl_register_nal (&tcpnal_nal); } +#endif + ptl_init = 1; + + ptl_mutex_exit(); - if (nal->nal_refct != 0) { - /* Caller gets to know if this was the first ref or not */ + if (max_interfaces != NULL) + *max_interfaces = 1; + + return PTL_OK; +} + +void +PtlFini(void) +{ + ptl_mutex_enter(); + + LASSERT (ptl_init); + LASSERT (list_empty(&ptl_nal_table)); + LASSERT (ptl_apini.apini_refcount == 0); + ptl_init = 0; + + ptl_mutex_exit(); +} + +ptl_err_t +PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, ptl_ni_limits_t *actual_limits, + ptl_handle_ni_t *handle) +{ + int rc; + + ptl_mutex_enter (); + + LASSERT (ptl_init); + CDEBUG(D_OTHER, "refs %d\n", ptl_apini.apini_refcount); + + if (ptl_apini.apini_refcount != 0) { rc = PTL_IFACE_DUP; + ptl_apini.apini_refcount++; + goto out; } + + rc = ptl_startup_apini(requested_pid, + requested_limits, actual_limits); + if (rc != PTL_OK) + goto out; - nal->nal_refct++; - *handle = nal->nal_handle; + rc = ptl_startup_nalnis(); + if (rc != PTL_OK) + goto out; + + memset (handle, 0, sizeof(*handle)); + LASSERT (!PtlHandleIsEqual(*handle, PTL_INVALID_HANDLE)); + /* Handle can be anything; PTL_INVALID_HANDLE isn't wise though :) */ out: ptl_mutex_exit (); @@ -231,31 +726,105 @@ int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, return rc; } -int PtlNIFini(ptl_handle_ni_t ni) +ptl_err_t +PtlNIFini(ptl_handle_ni_t ni) { - nal_t *nal; - int idx; + ptl_mutex_enter (); - if (!ptl_init) - return PTL_NO_INIT; + LASSERT (ptl_init); + LASSERT (ptl_apini.apini_refcount > 0); - ptl_mutex_enter (); + ptl_apini.apini_refcount--; + if (ptl_apini.apini_refcount == 0) { + ptl_shutdown_nalnis(); + ptl_shutdown_apini(); + } - nal = ptl_hndl2nal (&ni); - if (nal == NULL) { - ptl_mutex_exit (); - return PTL_HANDLE_INVALID; + ptl_mutex_exit (); + return PTL_OK; +} + +ptl_err_t +PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id) +{ + ptl_ni_t *ni; + unsigned long flags; + + LASSERT (ptl_init); + LASSERT (ptl_apini.apini_refcount > 0); + + /* pretty useless; just return the NID of the first local interface */ + + PTL_LOCK(flags); + + if (list_empty(&ptl_apini.apini_nis)) { + PTL_UNLOCK(flags); + return PTL_FAIL; } + + id->pid = ptl_apini.apini_pid; + + ni = list_entry(ptl_apini.apini_nis.next, + ptl_ni_t, ni_list); + id->nid = ni->ni_nid; + + PTL_UNLOCK(flags); - idx = ni.nal_idx & NI_HANDLE_MASK; + return PTL_OK; +} - LASSERT(nal->nal_refct > 0); +ptl_err_t +PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out) +{ + LASSERT (ptl_init); + LASSERT (ptl_apini.apini_refcount > 0); - nal->nal_refct--; + *ni_out = handle_in; + return PTL_OK; +} - /* nal_refct == 0 tells nal->shutdown to really shut down */ - nal->nal_ni_fini(nal); +void +PtlSnprintHandle(char *str, int len, ptl_handle_any_t h) +{ + snprintf(str, len, LPX64, h.cookie); +} - ptl_mutex_exit (); +ptl_err_t +PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid) +{ + LASSERT (ptl_init); + LASSERT (ptl_apini.apini_refcount > 0); + + *uid = 0; /* fake it */ return PTL_OK; } + +ptl_err_t +PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, + unsigned long *distance_out) +{ + LASSERT (ptl_init); + LASSERT (ptl_apini.apini_refcount > 0); + + return 1; /* fake it */ +} + +ptl_err_t +PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, + ptl_sr_value_t *status_out) +{ + LASSERT (ptl_init); + LASSERT (ptl_apini.apini_refcount > 0); + + return PTL_FAIL; /* not supported */ +} + +ptl_err_t +PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, + ptl_process_id_t match_id_in, ptl_pt_index_t portal_in) +{ + LASSERT (ptl_init); + LASSERT (ptl_apini.apini_refcount > 0); + + return PTL_FAIL; /* not supported */ +} diff --git a/lnet/lnet/api-wrap.c b/lnet/lnet/api-wrap.c deleted file mode 100644 index 92f495e..0000000 --- a/lnet/lnet/api-wrap.c +++ /dev/null @@ -1,379 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-wrap.c - * User-level wrappers that dispatch across the protection boundaries - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PORTALS -#include - -void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h) -{ - snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie); -} - -int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out) -{ - if (!ptl_init) - return PTL_NO_INIT; - - if (ptl_hndl2nal(&handle_in) == NULL) - return PTL_HANDLE_INVALID; - - *ni_out = handle_in; - return PTL_OK; -} - -int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&ni_handle); - if (nal == NULL) - return PTL_NI_INVALID; - - return nal->nal_get_id(nal, id); -} - -int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&ni_handle); - if (nal == NULL) - return PTL_NI_INVALID; - - /* We don't support different uids yet */ - *uid = 0; - return PTL_OK; -} - -int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface); - if (nal == NULL) - return PTL_NI_INVALID; - - return nal->nal_fail_nid(nal, nid, threshold); -} - -int PtlLoopback (ptl_handle_ni_t interface, int set, int *enabled) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface); - if (nal == NULL) - return PTL_NI_INVALID; - - return nal->nal_loopback(nal, set, enabled); -} - -int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, - ptl_sr_value_t *status_out) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface_in); - if (nal == NULL) - return PTL_NI_INVALID; - - return nal->nal_ni_status(nal, register_in, status_out); -} - -int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, - unsigned long *distance_out) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface_in); - if (nal == NULL) - return PTL_NI_INVALID; - - return nal->nal_ni_dist(nal, &process_in, distance_out); -} - -int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, - ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, - ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, - ptl_ins_pos_t pos_in, ptl_handle_me_t *handle_out) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface_in); - if (nal == NULL) - return PTL_NI_INVALID; - - return nal->nal_me_attach(nal, index_in, match_id_in, - match_bits_in, ignore_bits_in, - unlink_in, pos_in, handle_out); -} - -int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, - ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, - ptl_unlink_t unlink_in, ptl_ins_pos_t position_in, - ptl_handle_me_t * handle_out) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(¤t_in); - if (nal == NULL) - return PTL_ME_INVALID; - - return nal->nal_me_insert(nal, ¤t_in, match_id_in, - match_bits_in, ignore_bits_in, - unlink_in, position_in, handle_out); -} - -int PtlMEUnlink(ptl_handle_me_t current_in) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(¤t_in); - if (nal == NULL) - return PTL_ME_INVALID; - - return nal->nal_me_unlink(nal, ¤t_in); -} - -int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, - ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&me_in); - if (nal == NULL) - return PTL_ME_INVALID; - - if (!PtlHandleIsEqual(md_in.eq_handle, PTL_EQ_NONE) && - ptl_hndl2nal(&md_in.eq_handle) != nal) - return PTL_MD_ILLEGAL; - - return (nal->nal_md_attach)(nal, &me_in, &md_in, - unlink_in, handle_out); -} - -int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, - ptl_unlink_t unlink_in, ptl_handle_md_t *handle_out) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&ni_in); - if (nal == NULL) - return PTL_NI_INVALID; - - if (!PtlHandleIsEqual(md_in.eq_handle, PTL_EQ_NONE) && - ptl_hndl2nal(&md_in.eq_handle) != nal) - return PTL_MD_ILLEGAL; - - return (nal->nal_md_bind)(nal, &md_in, unlink_in, handle_out); -} - -int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, - ptl_md_t *new_inout, ptl_handle_eq_t testq_in) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&md_in); - if (nal == NULL) - return PTL_MD_INVALID; - - if (!PtlHandleIsEqual(testq_in, PTL_EQ_NONE) && - ptl_hndl2nal(&testq_in) != nal) - return PTL_EQ_INVALID; - - return (nal->nal_md_update)(nal, &md_in, - old_inout, new_inout, &testq_in); -} - -int PtlMDUnlink(ptl_handle_md_t md_in) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&md_in); - if (nal == NULL) - return PTL_MD_INVALID; - - return (nal->nal_md_unlink)(nal, &md_in); -} - -int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, - ptl_eq_handler_t callback, - ptl_handle_eq_t *handle_out) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&interface); - if (nal == NULL) - return PTL_NI_INVALID; - - return (nal->nal_eq_alloc)(nal, count, callback, handle_out); -} - -int PtlEQFree(ptl_handle_eq_t eventq) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&eventq); - if (nal == NULL) - return PTL_EQ_INVALID; - - return (nal->nal_eq_free)(nal, &eventq); -} - -int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev) -{ - int which; - - return (PtlEQPoll (&eventq, 1, 0, ev, &which)); -} - -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) -{ - int which; - - return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, - event_out, &which)); -} - -int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout, - ptl_event_t *event_out, int *which_out) -{ - int i; - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - if (neq_in < 1) - return PTL_EQ_INVALID; - - nal = ptl_hndl2nal(&eventqs_in[0]); - if (nal == NULL) - return PTL_EQ_INVALID; - - for (i = 1; i < neq_in; i++) - if (ptl_hndl2nal(&eventqs_in[i]) != nal) - return PTL_EQ_INVALID; - - return (nal->nal_eq_poll)(nal, eventqs_in, neq_in, timeout, - event_out, which_out); -} - - -int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, - ptl_process_id_t match_id_in, ptl_pt_index_t portal_in) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&ni_in); - if (nal == NULL) - return PTL_NI_INVALID; - - return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in); -} - -int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, - ptl_process_id_t target_in, ptl_pt_index_t portal_in, - ptl_ac_index_t ac_in, ptl_match_bits_t match_bits_in, - ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&md_in); - if (nal == NULL) - return PTL_MD_INVALID; - - return (nal->nal_put)(nal, &md_in, ack_req_in, - &target_in, portal_in, ac_in, - match_bits_in, offset_in, hdr_data_in); -} - -int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, - ptl_pt_index_t portal_in, ptl_ac_index_t ac_in, - ptl_match_bits_t match_bits_in, ptl_size_t offset_in) -{ - nal_t *nal; - - if (!ptl_init) - return PTL_NO_INIT; - - nal = ptl_hndl2nal(&md_in); - if (nal == NULL) - return PTL_MD_INVALID; - - return (nal->nal_get)(nal, &md_in, - &target_in, portal_in, ac_in, - match_bits_in, offset_in); -} - diff --git a/lnet/lnet/autoMakefile.am b/lnet/lnet/autoMakefile.am index affce6e..8af6fee 100644 --- a/lnet/lnet/autoMakefile.am +++ b/lnet/lnet/autoMakefile.am @@ -1,6 +1,6 @@ -my_sources = api-errno.c api-ni.c api-wrap.c \ - lib-init.c lib-me.c lib-msg.c lib-eq.c \ - lib-md.c lib-move.c lib-ni.c lib-pid.c +my_sources = api-errno.c api-ni.c \ + lib-me.c lib-msg.c lib-eq.c \ + lib-md.c lib-move.c if !CRAY_PORTALS @@ -20,9 +20,9 @@ endif # LINUX if DARWIN macos_PROGRAMS := portals -portals_SOURCES := api-errno.c api-ni.c api-wrap.c -portals_SOURCES += lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c -portals_SOURCES += lib-move.c lib-ni.c lib-pid.c module.c +portals_SOURCES := api-errno.c api-ni.c +portals_SOURCES += lib-me.c lib-msg.c lib-eq.c lib-md.c +portals_SOURCES += lib-move.c module.c portals_CFLAGS := $(EXTRA_KCFLAGS) portals_LDFLAGS := $(EXTRA_KLDFLAGS) diff --git a/lnet/lnet/lib-eq.c b/lnet/lnet/lib-eq.c index 4992fce..2684a33 100644 --- a/lnet/lnet/lib-eq.c +++ b/lnet/lnet/lib-eq.c @@ -25,16 +25,19 @@ #define DEBUG_SUBSYSTEM S_PORTALS #include -int -lib_api_eq_alloc (nal_t *apinal, ptl_size_t count, - ptl_eq_handler_t callback, - ptl_handle_eq_t *handle) +ptl_err_t +PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, + ptl_eq_handler_t callback, ptl_handle_eq_t *handle) { - lib_nal_t *nal = apinal->nal_data; - lib_eq_t *eq; + ptl_eq_t *eq; unsigned long flags; - int rc; + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_HANDLE_INVALID; + /* We need count to be a power of 2 so that when eq_{enq,deq}_seq * overflow, they don't skip entries, so the queue has the same * apparant capacity at all times */ @@ -50,29 +53,15 @@ lib_api_eq_alloc (nal_t *apinal, ptl_size_t count, if (count == 0) /* catch bad parameter / overflow on roundup */ return (PTL_VAL_FAILED); - eq = lib_eq_alloc (nal); + eq = ptl_eq_alloc(); if (eq == NULL) return (PTL_NO_SPACE); PORTAL_ALLOC(eq->eq_events, count * sizeof(ptl_event_t)); if (eq->eq_events == NULL) { - LIB_LOCK(nal, flags); - lib_eq_free (nal, eq); - LIB_UNLOCK(nal, flags); - } - - if (nal->libnal_map != NULL) { - struct iovec iov = { - .iov_base = eq->eq_events, - .iov_len = count * sizeof(ptl_event_t)}; - - rc = nal->libnal_map(nal, 1, &iov, &eq->eq_addrkey); - if (rc != PTL_OK) { - LIB_LOCK(nal, flags); - lib_eq_free (nal, eq); - LIB_UNLOCK(nal, flags); - return (rc); - } + PTL_LOCK(flags); + ptl_eq_free (eq); + PTL_UNLOCK(flags); } /* NB this resets all event sequence numbers to 0, to be earlier @@ -85,66 +74,61 @@ lib_api_eq_alloc (nal_t *apinal, ptl_size_t count, eq->eq_refcount = 0; eq->eq_callback = callback; - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ); - list_add (&eq->eq_list, &nal->libnal_ni.ni_active_eqs); + ptl_initialise_handle (&eq->eq_lh, PTL_COOKIE_TYPE_EQ); + list_add (&eq->eq_list, &ptl_apini.apini_active_eqs); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); - ptl_eq2handle(handle, nal, eq); + ptl_eq2handle(handle, eq); return (PTL_OK); } -int -lib_api_eq_free(nal_t *apinal, ptl_handle_eq_t *eqh) +ptl_err_t +PtlEQFree(ptl_handle_eq_t eqh) { - lib_nal_t *nal = apinal->nal_data; - lib_eq_t *eq; + ptl_eq_t *eq; int size; ptl_event_t *events; - void *addrkey; unsigned long flags; - LIB_LOCK(nal, flags); + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_EQ_INVALID; + + PTL_LOCK(flags); - eq = ptl_handle2eq(eqh, nal); + eq = ptl_handle2eq(&eqh); if (eq == NULL) { - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (PTL_EQ_INVALID); } if (eq->eq_refcount != 0) { - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (PTL_EQ_IN_USE); } /* stash for free after lock dropped */ events = eq->eq_events; size = eq->eq_size; - addrkey = eq->eq_addrkey; - lib_invalidate_handle (nal, &eq->eq_lh); + ptl_invalidate_handle (&eq->eq_lh); list_del (&eq->eq_list); - lib_eq_free (nal, eq); - - LIB_UNLOCK(nal, flags); + ptl_eq_free (eq); - if (nal->libnal_unmap != NULL) { - struct iovec iov = { - .iov_base = events, - .iov_len = size * sizeof(ptl_event_t)}; - - nal->libnal_unmap(nal, 1, &iov, &addrkey); - } + PTL_UNLOCK(flags); PORTAL_FREE(events, size * sizeof (ptl_event_t)); - return (PTL_OK); + return PTL_OK; } -int -lib_get_event (lib_eq_t *eq, ptl_event_t *ev) +ptl_err_t +lib_get_event (ptl_eq_t *eq, ptl_event_t *ev) { int new_index = eq->eq_deq_seq & (eq->eq_size - 1); ptl_event_t *new_event = &eq->eq_events[new_index]; @@ -175,13 +159,28 @@ lib_get_event (lib_eq_t *eq, ptl_event_t *ev) } -int -lib_api_eq_poll (nal_t *apinal, - ptl_handle_eq_t *eventqs, int neq, int timeout_ms, - ptl_event_t *event, int *which) +ptl_err_t +PtlEQGet (ptl_handle_eq_t eventq, ptl_event_t *event) +{ + int which; + + return PtlEQPoll(&eventq, 1, 0, + event, &which); +} + +ptl_err_t +PtlEQWait (ptl_handle_eq_t eventq, ptl_event_t *event) +{ + int which; + + return PtlEQPoll(&eventq, 1, PTL_TIME_FOREVER, + event, &which); +} + +ptl_err_t +PtlEQPoll (ptl_handle_eq_t *eventqs, int neq, int timeout_ms, + ptl_event_t *event, int *which) { - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; unsigned long flags; int i; int rc; @@ -195,22 +194,31 @@ lib_api_eq_poll (nal_t *apinal, #endif ENTRY; - LIB_LOCK(nal, flags); + if (!ptl_init) + RETURN(PTL_NO_INIT); + + if (ptl_apini.apini_refcount == 0) + RETURN(PTL_HANDLE_INVALID); + + if (neq < 1) + RETURN(PTL_EQ_INVALID); + + PTL_LOCK(flags); for (;;) { for (i = 0; i < neq; i++) { - lib_eq_t *eq = ptl_handle2eq(&eventqs[i], nal); + ptl_eq_t *eq = ptl_handle2eq(&eventqs[i]); rc = lib_get_event (eq, event); if (rc != PTL_EQ_EMPTY) { - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); *which = i; RETURN(rc); } } if (timeout_ms == 0) { - LIB_UNLOCK (nal, flags); + PTL_UNLOCK (flags); RETURN (PTL_EQ_EMPTY); } @@ -220,9 +228,9 @@ lib_api_eq_poll (nal_t *apinal, #ifdef __KERNEL__ cfs_waitlink_init(&wl); set_current_state(TASK_INTERRUPTIBLE); - cfs_waitq_add(&ni->ni_waitq, &wl); + cfs_waitq_add(&ptl_apini.apini_waitq, &wl); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); if (timeout_ms < 0) { cfs_waitq_wait (&wl); @@ -237,11 +245,12 @@ lib_api_eq_poll (nal_t *apinal, timeout_ms = 0; } - LIB_LOCK(nal, flags); - cfs_waitq_del(&ni->ni_waitq, &wl); + PTL_LOCK(flags); + cfs_waitq_del(&ptl_apini.apini_waitq, &wl); #else if (timeout_ms < 0) { - pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex); + pthread_cond_wait(&ptl_apini.apini_cond, + &ptl_apini.apini_mutex); } else { gettimeofday(&then, NULL); @@ -253,8 +262,8 @@ lib_api_eq_poll (nal_t *apinal, ts.tv_nsec -= 1000000000; } - pthread_cond_timedwait(&ni->ni_cond, - &ni->ni_mutex, &ts); + pthread_cond_timedwait(&ptl_apini.apini_cond, + &ptl_apini.apini_mutex, &ts); gettimeofday(&now, NULL); timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + diff --git a/lnet/lnet/lib-init.c b/lnet/lnet/lib-init.c deleted file mode 100644 index 6d0099c..0000000 --- a/lnet/lnet/lib-init.c +++ /dev/null @@ -1,433 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-init.c - * Start up the internal library and clear all structures - * Called by the NAL when it initializes. Safe to call multiple times. - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -# define DEBUG_SUBSYSTEM S_PORTALS -#include - -#ifdef __KERNEL__ -# include -#else -# include -# include -#endif - -#ifndef PTL_USE_LIB_FREELIST - -int -kportal_descriptor_setup (lib_nal_t *nal, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) -{ - /* Ignore requested limits! */ - actual_limits->max_mes = INT_MAX; - actual_limits->max_mds = INT_MAX; - actual_limits->max_eqs = INT_MAX; - - return PTL_OK; -} - -void -kportal_descriptor_cleanup (lib_nal_t *nal) -{ -} -#else - -int -lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size) -{ - char *space; - - LASSERT (n > 0); - - size += offsetof (lib_freeobj_t, fo_contents); - - PORTAL_ALLOC(space, n * size); - if (space == NULL) - return (PTL_NO_SPACE); - - CFS_INIT_LIST_HEAD (&fl->fl_list); - fl->fl_objs = space; - fl->fl_nobjs = n; - fl->fl_objsize = size; - - do - { - memset (space, 0, size); - list_add ((struct list_head *)space, &fl->fl_list); - space += size; - } while (--n != 0); - - return (PTL_OK); -} - -void -lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl) -{ - struct list_head *el; - int count; - - if (fl->fl_nobjs == 0) - return; - - count = 0; - for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next) - count++; - - LASSERT (count == fl->fl_nobjs); - - PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); - memset (fl, 0, sizeof (fl)); -} - -int -kportal_descriptor_setup (lib_nal_t *nal, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) -{ - /* NB on failure caller must still call kportal_descriptor_cleanup */ - /* ****** */ - lib_ni_t *ni = &nal->libnal_ni; - int rc; - - memset (&ni->ni_free_mes, 0, sizeof (ni->ni_free_mes)); - memset (&ni->ni_free_msgs, 0, sizeof (ni->ni_free_msgs)); - memset (&ni->ni_free_mds, 0, sizeof (ni->ni_free_mds)); - memset (&ni->ni_free_eqs, 0, sizeof (ni->ni_free_eqs)); - - /* Ignore requested limits! */ - actual_limits->max_mes = MAX_MES; - actual_limits->max_mds = MAX_MDS; - actual_limits->max_eqs = MAX_EQS; - /* Hahahah what a load of bollocks. There's nowhere to - * specify the max # messages in-flight */ - - rc = lib_freelist_init (nal, &ni->ni_free_mes, - MAX_MES, sizeof (lib_me_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &ni->ni_free_msgs, - MAX_MSGS, sizeof (lib_msg_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &ni->ni_free_mds, - MAX_MDS, sizeof (lib_md_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &ni->ni_free_eqs, - MAX_EQS, sizeof (lib_eq_t)); - return (rc); -} - -void -kportal_descriptor_cleanup (lib_nal_t *nal) -{ - lib_ni_t *ni = &nal->libnal_ni; - - lib_freelist_fini (nal, &ni->ni_free_mes); - lib_freelist_fini (nal, &ni->ni_free_msgs); - lib_freelist_fini (nal, &ni->ni_free_mds); - lib_freelist_fini (nal, &ni->ni_free_eqs); -} - -#endif - -__u64 -lib_create_interface_cookie (lib_nal_t *nal) -{ - /* NB the interface cookie in wire handles guards against delayed - * replies and ACKs appearing valid in a new instance of the same - * interface. Initialisation time, even if it's only implemented - * to millisecond resolution is probably easily good enough. */ - struct timeval tv; - __u64 cookie; -#ifndef __KERNEL__ - int rc = gettimeofday (&tv, NULL); - LASSERT (rc == 0); -#else - do_gettimeofday(&tv); -#endif - cookie = tv.tv_sec; - cookie *= 1000000; - cookie += tv.tv_usec; - return (cookie); -} - -int -lib_setup_handle_hash (lib_nal_t *nal) -{ - lib_ni_t *ni = &nal->libnal_ni; - int i; - - /* Arbitrary choice of hash table size */ -#ifdef __KERNEL__ - ni->ni_lh_hash_size = PAGE_SIZE / sizeof (struct list_head); -#else - ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4; -#endif - PORTAL_ALLOC(ni->ni_lh_hash_table, - ni->ni_lh_hash_size * sizeof (struct list_head)); - if (ni->ni_lh_hash_table == NULL) - return (PTL_NO_SPACE); - - for (i = 0; i < ni->ni_lh_hash_size; i++) - CFS_INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]); - - ni->ni_next_object_cookie = PTL_COOKIE_TYPES; - - return (PTL_OK); -} - -void -lib_cleanup_handle_hash (lib_nal_t *nal) -{ - lib_ni_t *ni = &nal->libnal_ni; - - if (ni->ni_lh_hash_table == NULL) - return; - - PORTAL_FREE(ni->ni_lh_hash_table, - ni->ni_lh_hash_size * sizeof (struct list_head)); -} - -lib_handle_t * -lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type) -{ - /* ALWAYS called with statelock held */ - lib_ni_t *ni = &nal->libnal_ni; - struct list_head *list; - struct list_head *el; - unsigned int hash; - - if ((cookie & (PTL_COOKIE_TYPES - 1)) != type) - return (NULL); - - hash = ((unsigned int)cookie) % ni->ni_lh_hash_size; - list = &ni->ni_lh_hash_table[hash]; - - list_for_each (el, list) { - lib_handle_t *lh = list_entry (el, lib_handle_t, lh_hash_chain); - - if (lh->lh_cookie == cookie) - return (lh); - } - - return (NULL); -} - -void -lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type) -{ - /* ALWAYS called with statelock held */ - lib_ni_t *ni = &nal->libnal_ni; - unsigned int hash; - - LASSERT (type >= 0 && type < PTL_COOKIE_TYPES); - lh->lh_cookie = ni->ni_next_object_cookie | type; - ni->ni_next_object_cookie += PTL_COOKIE_TYPES; - - hash = ((unsigned int)lh->lh_cookie) % ni->ni_lh_hash_size; - list_add (&lh->lh_hash_chain, &ni->ni_lh_hash_table[hash]); -} - -void -lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh) -{ - list_del (&lh->lh_hash_chain); -} - -int -lib_init(lib_nal_t *libnal, nal_t *apinal, - ptl_process_id_t process_id, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) -{ - int rc = PTL_OK; - lib_ni_t *ni = &libnal->libnal_ni; - int ptl_size; - int i; - ENTRY; - - /* NB serialised in PtlNIInit() */ - - lib_assert_wire_constants (); - - /* Setup the API nal with the lib API handling functions */ - apinal->nal_get_id = lib_api_get_id; - apinal->nal_ni_status = lib_api_ni_status; - apinal->nal_ni_dist = lib_api_ni_dist; - apinal->nal_fail_nid = lib_api_fail_nid; - apinal->nal_loopback = lib_api_loopback; - apinal->nal_me_attach = lib_api_me_attach; - apinal->nal_me_insert = lib_api_me_insert; - apinal->nal_me_unlink = lib_api_me_unlink; - apinal->nal_md_attach = lib_api_md_attach; - apinal->nal_md_bind = lib_api_md_bind; - apinal->nal_md_unlink = lib_api_md_unlink; - apinal->nal_md_update = lib_api_md_update; - apinal->nal_eq_alloc = lib_api_eq_alloc; - apinal->nal_eq_free = lib_api_eq_free; - apinal->nal_eq_poll = lib_api_eq_poll; - apinal->nal_put = lib_api_put; - apinal->nal_get = lib_api_get; - - apinal->nal_data = libnal; - ni->ni_api = apinal; - - rc = kportal_descriptor_setup (libnal, requested_limits, - &ni->ni_actual_limits); - if (rc != PTL_OK) - goto out; - - memset(&ni->ni_counters, 0, sizeof(lib_counters_t)); - - CFS_INIT_LIST_HEAD (&ni->ni_active_msgs); - CFS_INIT_LIST_HEAD (&ni->ni_active_mds); - CFS_INIT_LIST_HEAD (&ni->ni_active_eqs); - CFS_INIT_LIST_HEAD (&ni->ni_test_peers); - -#ifdef __KERNEL__ - spin_lock_init (&ni->ni_lock); - cfs_waitq_init (&ni->ni_waitq); -#else - pthread_mutex_init(&ni->ni_mutex, NULL); - pthread_cond_init(&ni->ni_cond, NULL); -#endif - - ni->ni_interface_cookie = lib_create_interface_cookie (libnal); - ni->ni_next_object_cookie = 0; - rc = lib_setup_handle_hash (libnal); - if (rc != PTL_OK) - goto out; - - ni->ni_pid = process_id; - - if (requested_limits != NULL) - ptl_size = requested_limits->max_pt_index + 1; - else - ptl_size = 64; - - ni->ni_portals.size = ptl_size; - PORTAL_ALLOC(ni->ni_portals.tbl, - ptl_size * sizeof(struct list_head)); - if (ni->ni_portals.tbl == NULL) { - rc = PTL_NO_SPACE; - goto out; - } - - for (i = 0; i < ptl_size; i++) - CFS_INIT_LIST_HEAD(&(ni->ni_portals.tbl[i])); - - /* max_{mes,mds,eqs} set in kportal_descriptor_setup */ - - /* We don't have an access control table! */ - ni->ni_actual_limits.max_ac_index = -1; - - ni->ni_actual_limits.max_pt_index = ptl_size - 1; - ni->ni_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV; - ni->ni_actual_limits.max_me_list = INT_MAX; - - /* We don't support PtlGetPut! */ - ni->ni_actual_limits.max_getput_md = 0; - - if (actual_limits != NULL) - *actual_limits = ni->ni_actual_limits; - - /* disable loopback optimisation by default */ - ni->ni_loopback = 0; - - out: - if (rc != PTL_OK) { - lib_cleanup_handle_hash (libnal); - kportal_descriptor_cleanup (libnal); - } - - RETURN (rc); -} - -int -lib_fini(lib_nal_t *nal) -{ - lib_ni_t *ni = &nal->libnal_ni; - int idx; - - /* NB no state_lock() since this is the last reference. The NAL - * should have shut down already, so it should be safe to unlink - * and free all descriptors, even those that appear committed to a - * network op (eg MD with non-zero pending count) - */ - - for (idx = 0; idx < ni->ni_portals.size; idx++) - while (!list_empty (&ni->ni_portals.tbl[idx])) { - lib_me_t *me = list_entry (ni->ni_portals.tbl[idx].next, - lib_me_t, me_list); - - CERROR ("Active me %p on exit\n", me); - list_del (&me->me_list); - lib_me_free (nal, me); - } - - while (!list_empty (&ni->ni_active_mds)) { - lib_md_t *md = list_entry (ni->ni_active_mds.next, - lib_md_t, md_list); - - CERROR ("Active md %p on exit\n", md); - list_del (&md->md_list); - lib_md_free (nal, md); - } - - while (!list_empty (&ni->ni_active_eqs)) { - lib_eq_t *eq = list_entry (ni->ni_active_eqs.next, - lib_eq_t, eq_list); - - CERROR ("Active eq %p on exit\n", eq); - list_del (&eq->eq_list); - lib_eq_free (nal, eq); - } - - while (!list_empty (&ni->ni_active_msgs)) { - lib_msg_t *msg = list_entry (ni->ni_active_msgs.next, - lib_msg_t, msg_list); - - CERROR ("Active msg %p on exit\n", msg); - list_del (&msg->msg_list); - lib_msg_free (nal, msg); - } - - PORTAL_FREE(ni->ni_portals.tbl, - ni->ni_portals.size * sizeof(struct list_head)); - - lib_cleanup_handle_hash (nal); - kportal_descriptor_cleanup (nal); - -#ifndef __KERNEL__ - pthread_mutex_destroy(&ni->ni_mutex); - pthread_cond_destroy(&ni->ni_cond); -#endif - - return (PTL_OK); -} diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c index f188e2a..b307df1 100644 --- a/lnet/lnet/lib-md.c +++ b/lnet/lnet/lib-md.c @@ -24,81 +24,62 @@ #define DEBUG_SUBSYSTEM S_PORTALS -#ifndef __KERNEL__ -# include -#else -# include -#endif - #include -/* must be called with state lock held */ +/* must be called with PTL_LOCK held */ void -lib_md_unlink(lib_nal_t *nal, lib_md_t *md) +ptl_md_unlink(ptl_libmd_t *md) { if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) { /* first unlink attempt... */ - lib_me_t *me = md->me; + ptl_me_t *me = md->md_me; md->md_flags |= PTL_MD_FLAG_ZOMBIE; /* Disassociate from ME (if any), and unlink it if it was created * with PTL_UNLINK */ if (me != NULL) { - me->md = NULL; - if (me->unlink == PTL_UNLINK) - lib_me_unlink(nal, me); + me->me_md = NULL; + if (me->me_unlink == PTL_UNLINK) + ptl_me_unlink(me); } /* emsure all future handle lookups fail */ - lib_invalidate_handle(nal, &md->md_lh); + ptl_invalidate_handle(&md->md_lh); } - if (md->pending != 0) { + if (md->md_pending != 0) { CDEBUG(D_NET, "Queueing unlink of md %p\n", md); return; } CDEBUG(D_NET, "Unlinking md %p\n", md); - if ((md->options & PTL_MD_KIOV) != 0) { - if (nal->libnal_unmap_pages != NULL) - nal->libnal_unmap_pages (nal, - md->md_niov, - md->md_iov.kiov, - &md->md_addrkey); - } else if (nal->libnal_unmap != NULL) { - nal->libnal_unmap (nal, - md->md_niov, md->md_iov.iov, - &md->md_addrkey); - } - - if (md->eq != NULL) { - md->eq->eq_refcount--; - LASSERT (md->eq->eq_refcount >= 0); + if (md->md_eq != NULL) { + md->md_eq->eq_refcount--; + LASSERT (md->md_eq->eq_refcount >= 0); } list_del (&md->md_list); - lib_md_free(nal, md); + ptl_md_free(md); } -/* must be called with state lock held */ +/* must be called with PTL_LOCK held */ static int -lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) +lib_md_build(ptl_libmd_t *lmd, ptl_md_t *umd, int unlink) { - lib_eq_t *eq = NULL; - int rc; - int i; - int niov; - int total_length = 0; + ptl_eq_t *eq = NULL; + int i; + int niov; + int total_length = 0; /* NB we are passed an allocated, but uninitialised/active md. - * if we return success, caller may lib_md_unlink() it. - * otherwise caller may only lib_md_free() it. + * if we return success, caller may ptl_md_unlink() it. + * otherwise caller may only ptl_md_free() it. */ if (!PtlHandleIsEqual (umd->eq_handle, PTL_EQ_NONE)) { - eq = ptl_handle2eq(&umd->eq_handle, nal); + eq = ptl_handle2eq(&umd->eq_handle); if (eq == NULL) return PTL_EQ_INVALID; } @@ -110,15 +91,15 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) ((umd->options & PTL_MD_EVENT_START_DISABLE) != 0 && (umd->options & PTL_MD_EVENT_END_DISABLE) == 0)); - lmd->me = NULL; - lmd->start = umd->start; - lmd->offset = 0; - lmd->max_size = umd->max_size; - lmd->options = umd->options; - lmd->user_ptr = umd->user_ptr; - lmd->eq = eq; - lmd->threshold = umd->threshold; - lmd->pending = 0; + lmd->md_me = NULL; + lmd->md_start = umd->start; + lmd->md_offset = 0; + lmd->md_max_size = umd->max_size; + lmd->md_options = umd->options; + lmd->md_user_ptr = umd->user_ptr; + lmd->md_eq = eq; + lmd->md_threshold = umd->threshold; + lmd->md_pending = 0; lmd->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0; if ((umd->options & PTL_MD_IOVEC) != 0) { @@ -138,28 +119,17 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) total_length += lmd->md_iov.iov[i].iov_len; } - lmd->length = total_length; + lmd->md_length = total_length; if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ (umd->max_size < 0 || umd->max_size > total_length)) // illegal max_size return PTL_MD_ILLEGAL; - if (nal->libnal_map != NULL) { - rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, - &lmd->md_addrkey); - if (rc != PTL_OK) - return (rc); - } } else if ((umd->options & PTL_MD_KIOV) != 0) { #ifndef __KERNEL__ return PTL_MD_ILLEGAL; #else - /* Trap attempt to use paged I/O if unsupported early. */ - if (nal->libnal_send_pages == NULL || - nal->libnal_recv_pages == NULL) - return PTL_MD_INVALID; - lmd->md_niov = niov = umd->length; memcpy(lmd->md_iov.kiov, umd->start, niov * sizeof (lmd->md_iov.kiov[0])); @@ -173,22 +143,15 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) total_length += lmd->md_iov.kiov[i].kiov_len; } - lmd->length = total_length; + lmd->md_length = total_length; if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ (umd->max_size < 0 || umd->max_size > total_length)) // illegal max_size return PTL_MD_ILLEGAL; - - if (nal->libnal_map_pages != NULL) { - rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov, - &lmd->md_addrkey); - if (rc != PTL_OK) - return (rc); - } #endif } else { /* contiguous */ - lmd->length = umd->length; + lmd->md_length = umd->length; lmd->md_niov = niov = 1; lmd->md_iov.iov[0].iov_base = umd->start; lmd->md_iov.iov[0].iov_len = umd->length; @@ -197,185 +160,195 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) (umd->max_size < 0 || umd->max_size > umd->length)) // illegal max_size return PTL_MD_ILLEGAL; - - if (nal->libnal_map != NULL) { - rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, - &lmd->md_addrkey); - if (rc != PTL_OK) - return (rc); - } } if (eq != NULL) eq->eq_refcount++; /* It's good; let handle2md succeed and add to active mds */ - lib_initialise_handle (nal, &lmd->md_lh, PTL_COOKIE_TYPE_MD); - list_add (&lmd->md_list, &nal->libnal_ni.ni_active_mds); + ptl_initialise_handle (&lmd->md_lh, PTL_COOKIE_TYPE_MD); + list_add (&lmd->md_list, &ptl_apini.apini_active_mds); return PTL_OK; } -/* must be called with state lock held */ +/* must be called with PTL_LOCK held */ void -lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd) +ptl_md_deconstruct(ptl_libmd_t *lmd, ptl_md_t *umd) { /* NB this doesn't copy out all the iov entries so when a * discontiguous MD is copied out, the target gets to know the * original iov pointer (in start) and the number of entries it had * and that's all. */ - umd->start = lmd->start; - umd->length = ((lmd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? - lmd->length : lmd->md_niov; - umd->threshold = lmd->threshold; - umd->max_size = lmd->max_size; - umd->options = lmd->options; - umd->user_ptr = lmd->user_ptr; - ptl_eq2handle(&umd->eq_handle, nal, lmd->eq); + umd->start = lmd->md_start; + umd->length = ((lmd->md_options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? + lmd->md_length : lmd->md_niov; + umd->threshold = lmd->md_threshold; + umd->max_size = lmd->md_max_size; + umd->options = lmd->md_options; + umd->user_ptr = lmd->md_user_ptr; + ptl_eq2handle(&umd->eq_handle, lmd->md_eq); } -int -lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh, - ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle) +ptl_err_t +PtlMDAttach(ptl_handle_me_t meh, ptl_md_t umd, + ptl_unlink_t unlink, ptl_handle_md_t *handle) { - lib_nal_t *nal = apinal->nal_data; - lib_me_t *me; - lib_md_t *md; + ptl_me_t *me; + ptl_libmd_t *md; unsigned long flags; int rc; - if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - umd->length > PTL_MD_MAX_IOV) /* too many fragments */ + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_NI_INVALID; + + if ((umd.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && + umd.length > PTL_MD_MAX_IOV) /* too many fragments */ return PTL_IOV_INVALID; - md = lib_md_alloc(nal, umd); + md = ptl_md_alloc(&umd); if (md == NULL) return PTL_NO_SPACE; - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - me = ptl_handle2me(meh, nal); + me = ptl_handle2me(&meh); if (me == NULL) { rc = PTL_ME_INVALID; - } else if (me->md != NULL) { + } else if (me->me_md != NULL) { rc = PTL_ME_IN_USE; } else { - rc = lib_md_build(nal, md, umd, unlink); + rc = lib_md_build(md, &umd, unlink); if (rc == PTL_OK) { - me->md = md; - md->me = me; + me->me_md = md; + md->md_me = me; - ptl_md2handle(handle, nal, md); + ptl_md2handle(handle, md); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (PTL_OK); } } - lib_md_free (nal, md); + ptl_md_free (md); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (rc); } -int -lib_api_md_bind(nal_t *apinal, - ptl_md_t *umd, ptl_unlink_t unlink, - ptl_handle_md_t *handle) +ptl_err_t +PtlMDBind(ptl_handle_ni_t nih, ptl_md_t umd, + ptl_unlink_t unlink, ptl_handle_md_t *handle) { - lib_nal_t *nal = apinal->nal_data; - lib_md_t *md; + ptl_libmd_t *md; unsigned long flags; int rc; - if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - umd->length > PTL_MD_MAX_IOV) /* too many fragments */ + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_NI_INVALID; + + if ((umd.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && + umd.length > PTL_MD_MAX_IOV) /* too many fragments */ return PTL_IOV_INVALID; - md = lib_md_alloc(nal, umd); + md = ptl_md_alloc(&umd); if (md == NULL) return PTL_NO_SPACE; - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - rc = lib_md_build(nal, md, umd, unlink); + rc = lib_md_build(md, &umd, unlink); if (rc == PTL_OK) { - ptl_md2handle(handle, nal, md); + ptl_md2handle(handle, md); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (PTL_OK); } - lib_md_free (nal, md); + ptl_md_free (md); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (rc); } -int -lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh) +ptl_err_t +PtlMDUnlink (ptl_handle_md_t mdh) { - lib_nal_t *nal = apinal->nal_data; ptl_event_t ev; - lib_md_t *md; + ptl_libmd_t *md; unsigned long flags; - LIB_LOCK(nal, flags); + if (!ptl_init) + return PTL_NO_INIT; - md = ptl_handle2md(mdh, nal); + if (ptl_apini.apini_refcount == 0) + return PTL_MD_INVALID; + + PTL_LOCK(flags); + + md = ptl_handle2md(&mdh); if (md == NULL) { - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return PTL_MD_INVALID; } - /* If the MD is busy, lib_md_unlink just marks it for deletion, and + /* If the MD is busy, ptl_md_unlink just marks it for deletion, and * when the NAL is done, the completion event flags that the MD was * unlinked. Otherwise, we enqueue an event now... */ - if (md->eq != NULL && - md->pending == 0) { + if (md->md_eq != NULL && + md->md_pending == 0) { memset(&ev, 0, sizeof(ev)); ev.type = PTL_EVENT_UNLINK; ev.ni_fail_type = PTL_OK; ev.unlinked = 1; - lib_md_deconstruct(nal, md, &ev.md); - ptl_md2handle(&ev.md_handle, nal, md); + ptl_md_deconstruct(md, &ev.md); + ptl_md2handle(&ev.md_handle, md); - lib_enq_event_locked(nal, NULL, md->eq, &ev); + ptl_enq_event_locked(NULL, md->md_eq, &ev); } - lib_md_unlink(nal, md); + ptl_md_unlink(md); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return PTL_OK; } -int -lib_api_md_update (nal_t *apinal, - ptl_handle_md_t *mdh, - ptl_md_t *oldumd, ptl_md_t *newumd, - ptl_handle_eq_t *testqh) +ptl_err_t +PtlMDUpdate(ptl_handle_md_t mdh, + ptl_md_t *oldumd, ptl_md_t *newumd, + ptl_handle_eq_t testqh) { - lib_nal_t *nal = apinal->nal_data; - lib_md_t *md; - lib_eq_t *test_eq = NULL; + ptl_libmd_t *md; + ptl_eq_t *test_eq = NULL; unsigned long flags; int rc; - LIB_LOCK(nal, flags); + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_MD_INVALID; + + PTL_LOCK(flags); - md = ptl_handle2md(mdh, nal); + md = ptl_handle2md(&mdh); if (md == NULL) { rc = PTL_MD_INVALID; goto out; } if (oldumd != NULL) - lib_md_deconstruct(nal, md, oldumd); + ptl_md_deconstruct(md, oldumd); if (newumd == NULL) { rc = PTL_OK; @@ -384,7 +357,7 @@ lib_api_md_update (nal_t *apinal, /* XXX fttb, the new MD must be the same "shape" wrt fragmentation, * since we simply overwrite the old lib-md */ - if ((((newumd->options ^ md->options) & + if ((((newumd->options ^ md->md_options) & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) || ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && newumd->length != md->md_niov)) { @@ -392,35 +365,36 @@ lib_api_md_update (nal_t *apinal, goto out; } - if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) { - test_eq = ptl_handle2eq(testqh, nal); + if (!PtlHandleIsEqual (testqh, PTL_EQ_NONE)) { + test_eq = ptl_handle2eq(&testqh); if (test_eq == NULL) { rc = PTL_EQ_INVALID; goto out; } } - if (md->pending != 0) { + if (md->md_pending != 0) { rc = PTL_MD_NO_UPDATE; goto out; } if (test_eq == NULL || test_eq->eq_deq_seq == test_eq->eq_enq_seq) { - lib_me_t *me = md->me; + ptl_me_t *me = md->md_me; int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ? PTL_UNLINK : PTL_RETAIN; // #warning this does not track eq refcounts properly - rc = lib_md_build(nal, md, newumd, unlink); + LBUG(); + rc = lib_md_build(md, newumd, unlink); - md->me = me; + md->md_me = me; } else { rc = PTL_MD_NO_UPDATE; } out: - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return rc; } diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c index cbc7c53..f521ed5 100644 --- a/lnet/lnet/lib-me.c +++ b/lnet/lnet/lib-me.c @@ -24,158 +24,164 @@ #define DEBUG_SUBSYSTEM S_PORTALS -#ifndef __KERNEL__ -# include -#else -# include -#endif - #include -int -lib_api_me_attach(nal_t *apinal, - ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle) +ptl_err_t +PtlMEAttach(ptl_handle_ni_t interface, + ptl_pt_index_t portal, + ptl_process_id_t match_id, + ptl_match_bits_t match_bits, + ptl_match_bits_t ignore_bits, + ptl_unlink_t unlink, ptl_ins_pos_t pos, + ptl_handle_me_t *handle) { - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; - lib_ptl_t *tbl = &ni->ni_portals; - lib_me_t *me; - unsigned long flags; + ptl_me_t *me; + unsigned long flags; - if (portal >= tbl->size) + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_NI_INVALID; + + if (portal >= ptl_apini.apini_nportals) return PTL_PT_INDEX_INVALID; - me = lib_me_alloc (nal); + me = ptl_me_alloc(); if (me == NULL) return PTL_NO_SPACE; - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - me->match_id = match_id; - me->match_bits = match_bits; - me->ignore_bits = ignore_bits; - me->unlink = unlink; - me->md = NULL; + me->me_match_id = match_id; + me->me_match_bits = match_bits; + me->me_ignore_bits = ignore_bits; + me->me_unlink = unlink; + me->me_md = NULL; - lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME); + ptl_initialise_handle (&me->me_lh, PTL_COOKIE_TYPE_ME); if (pos == PTL_INS_AFTER) - list_add_tail(&me->me_list, &(tbl->tbl[portal])); + list_add_tail(&me->me_list, &(ptl_apini.apini_portals[portal])); else - list_add(&me->me_list, &(tbl->tbl[portal])); + list_add(&me->me_list, &(ptl_apini.apini_portals[portal])); - ptl_me2handle(handle, nal, me); + ptl_me2handle(handle, me); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return PTL_OK; } -int -lib_api_me_insert(nal_t *apinal, - ptl_handle_me_t *current_meh, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, - ptl_match_bits_t ignore_bits, - ptl_unlink_t unlink, ptl_ins_pos_t pos, - ptl_handle_me_t *handle) +ptl_err_t +PtlMEInsert(ptl_handle_me_t current_meh, + ptl_process_id_t match_id, + ptl_match_bits_t match_bits, + ptl_match_bits_t ignore_bits, + ptl_unlink_t unlink, ptl_ins_pos_t pos, + ptl_handle_me_t *handle) { - lib_nal_t *nal = apinal->nal_data; - lib_me_t *current_me; - lib_me_t *new_me; + ptl_me_t *current_me; + ptl_me_t *new_me; unsigned long flags; - new_me = lib_me_alloc (nal); + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_ME_INVALID; + + new_me = ptl_me_alloc(); if (new_me == NULL) return PTL_NO_SPACE; - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - current_me = ptl_handle2me(current_meh, nal); + current_me = ptl_handle2me(¤t_meh); if (current_me == NULL) { - lib_me_free (nal, new_me); + ptl_me_free (new_me); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return PTL_ME_INVALID; } - new_me->match_id = match_id; - new_me->match_bits = match_bits; - new_me->ignore_bits = ignore_bits; - new_me->unlink = unlink; - new_me->md = NULL; + new_me->me_match_id = match_id; + new_me->me_match_bits = match_bits; + new_me->me_ignore_bits = ignore_bits; + new_me->me_unlink = unlink; + new_me->me_md = NULL; - lib_initialise_handle (nal, &new_me->me_lh, PTL_COOKIE_TYPE_ME); + ptl_initialise_handle (&new_me->me_lh, PTL_COOKIE_TYPE_ME); if (pos == PTL_INS_AFTER) list_add_tail(&new_me->me_list, ¤t_me->me_list); else list_add(&new_me->me_list, ¤t_me->me_list); - ptl_me2handle(handle, nal, new_me); + ptl_me2handle(handle, new_me); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return PTL_OK; } -int -lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh) +ptl_err_t +PtlMEUnlink(ptl_handle_me_t meh) { - lib_nal_t *nal = apinal->nal_data; unsigned long flags; - lib_me_t *me; + ptl_me_t *me; int rc; - LIB_LOCK(nal, flags); + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_ME_INVALID; + + PTL_LOCK(flags); - me = ptl_handle2me(meh, nal); + me = ptl_handle2me(&meh); if (me == NULL) { rc = PTL_ME_INVALID; } else { - lib_me_unlink(nal, me); + ptl_me_unlink(me); rc = PTL_OK; } - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (rc); } -/* call with state_lock please */ +/* call with PTL_LOCK please */ void -lib_me_unlink(lib_nal_t *nal, lib_me_t *me) +ptl_me_unlink(ptl_me_t *me) { list_del (&me->me_list); - if (me->md) { - me->md->me = NULL; - lib_md_unlink(nal, me->md); + if (me->me_md) { + me->me_md->md_me = NULL; + ptl_md_unlink(me->me_md); } - lib_invalidate_handle (nal, &me->me_lh); - lib_me_free(nal, me); + ptl_invalidate_handle (&me->me_lh); + ptl_me_free(me); } #if 0 static void -lib_me_dump(lib_nal_t *nal, lib_me_t * me) +lib_me_dump(ptl_me_t *me) { CWARN("Match Entry %p ("LPX64")\n", me, me->me_lh.lh_cookie); CWARN("\tMatch/Ignore\t= %016lx / %016lx\n", - me->match_bits, me->ignore_bits); + me->me_match_bits, me->me_ignore_bits); CWARN("\tMD\t= %p\n", me->md); CWARN("\tprev\t= %p\n", - list_entry(me->me_list.prev, lib_me_t, me_list)); + list_entry(me->me_list.prev, ptl_me_t, me_list)); CWARN("\tnext\t= %p\n", - list_entry(me->me_list.next, lib_me_t, me_list)); + list_entry(me->me_list.next, ptl_me_t, me_list)); } #endif diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 5339b6d..44b86e6 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -24,123 +24,123 @@ #define DEBUG_SUBSYSTEM S_PORTALS +#if 0 #ifndef __KERNEL__ # include #else # include #endif -#include +#endif #include /* forward ref */ -static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg); -static ptl_err_t do_lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, +static void ptl_commit_md (ptl_libmd_t *md, ptl_msg_t *msg); +static ptl_err_t do_ptl_parse(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private, int loopback); -static lib_md_t * -lib_match_md(lib_nal_t *nal, int index, int op_mask, +static ptl_libmd_t * +ptl_match_md(int index, int op_mask, ptl_nid_t src_nid, ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset, - ptl_match_bits_t match_bits, lib_msg_t *msg, + ptl_match_bits_t match_bits, ptl_msg_t *msg, ptl_size_t *mlength_out, ptl_size_t *offset_out) { - lib_ni_t *ni = &nal->libnal_ni; - struct list_head *match_list = &ni->ni_portals.tbl[index]; + struct list_head *match_list = &ptl_apini.apini_portals[index]; struct list_head *tmp; - lib_me_t *me; - lib_md_t *md; + ptl_me_t *me; + ptl_libmd_t *md; ptl_size_t mlength; ptl_size_t offset; ENTRY; - CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d " + CDEBUG (D_NET, "Request from "LPX64".%d of length %d into portal %d " "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits); - if (index < 0 || index >= ni->ni_portals.size) { + if (index < 0 || index >= ptl_apini.apini_nportals) { CERROR("Invalid portal %d not in [0-%d]\n", - index, ni->ni_portals.size); + index, ptl_apini.apini_nportals); goto failed; } list_for_each (tmp, match_list) { - me = list_entry(tmp, lib_me_t, me_list); - md = me->md; + me = list_entry(tmp, ptl_me_t, me_list); + md = me->me_md; /* ME attached but MD not attached yet */ if (md == NULL) continue; - LASSERT (me == md->me); + LASSERT (me == md->md_me); /* mismatched MD op */ - if ((md->options & op_mask) == 0) + if ((md->md_options & op_mask) == 0) continue; /* MD exhausted */ - if (lib_md_exhausted(md)) + if (ptl_md_exhausted(md)) continue; /* mismatched ME nid/pid? */ - if (me->match_id.nid != PTL_NID_ANY && - me->match_id.nid != src_nid) + if (me->me_match_id.nid != PTL_NID_ANY && + me->me_match_id.nid != src_nid) continue; CDEBUG(D_NET, "match_id.pid [%x], src_pid [%x]\n", - me->match_id.pid, src_pid); + me->me_match_id.pid, src_pid); - if (me->match_id.pid != PTL_PID_ANY && - me->match_id.pid != src_pid) + if (me->me_match_id.pid != PTL_PID_ANY && + me->me_match_id.pid != src_pid) continue; /* mismatched ME matchbits? */ - if (((me->match_bits ^ match_bits) & ~me->ignore_bits) != 0) + if (((me->me_match_bits ^ match_bits) & ~me->me_ignore_bits) != 0) continue; /* Hurrah! This _is_ a match; check it out... */ - if ((md->options & PTL_MD_MANAGE_REMOTE) == 0) - offset = md->offset; + if ((md->md_options & PTL_MD_MANAGE_REMOTE) == 0) + offset = md->md_offset; else offset = roffset; - if ((md->options & PTL_MD_MAX_SIZE) != 0) { - mlength = md->max_size; - LASSERT (md->offset + mlength <= md->length); + if ((md->md_options & PTL_MD_MAX_SIZE) != 0) { + mlength = md->md_max_size; + LASSERT (md->md_offset + mlength <= md->md_length); } else { - mlength = md->length - offset; + mlength = md->md_length - offset; } if (rlength <= mlength) { /* fits in allowed space */ mlength = rlength; - } else if ((md->options & PTL_MD_TRUNCATE) == 0) { + } else if ((md->md_options & PTL_MD_TRUNCATE) == 0) { /* this packet _really_ is too big */ CERROR("Matching packet %d too big: %d left, " - "%d allowed\n", rlength, md->length - offset, + "%d allowed\n", rlength, md->md_length - offset, mlength); goto failed; } /* Commit to this ME/MD */ - CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of " + CDEBUG(D_NET, "Incoming %s index %x from "LPX64"/%u of " "length %d/%d into md "LPX64" [%d] + %d\n", (op_mask == PTL_MD_OP_PUT) ? "put" : "get", index, src_nid, src_pid, mlength, rlength, md->md_lh.lh_cookie, md->md_niov, offset); - lib_commit_md(nal, md, msg); - md->offset = offset + mlength; + ptl_commit_md(md, msg); + md->md_offset = offset + mlength; /* NB Caller sets ev.type and ev.hdr_data */ - msg->ev.initiator.nid = src_nid; - msg->ev.initiator.pid = src_pid; - msg->ev.pt_index = index; - msg->ev.match_bits = match_bits; - msg->ev.rlength = rlength; - msg->ev.mlength = mlength; - msg->ev.offset = offset; + msg->msg_ev.initiator.nid = src_nid; + msg->msg_ev.initiator.pid = src_pid; + msg->msg_ev.pt_index = index; + msg->msg_ev.match_bits = match_bits; + msg->msg_ev.rlength = rlength; + msg->msg_ev.mlength = mlength; + msg->msg_ev.offset = offset; - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); + ptl_md_deconstruct(md, &msg->msg_ev.md); + ptl_md2handle(&msg->msg_ev.md_handle, md); *offset_out = offset; *mlength_out = mlength; @@ -149,29 +149,36 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask, * We bumped md->pending above so the MD just gets flagged * for unlink when it is finalized. */ if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) != 0 && - lib_md_exhausted(md)) - lib_md_unlink(nal, md); + ptl_md_exhausted(md)) + ptl_md_unlink(md); RETURN (md); } failed: - CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64 + CERROR ("Dropping %s from "LPX64".%d portal %d match "LPX64 " offset %d length %d: no match\n", - ni->ni_pid.nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT", + (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT", src_nid, src_pid, index, match_bits, roffset, rlength); RETURN(NULL); } -int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold) +ptl_err_t +PtlFailNid (ptl_handle_ni_t interface, + ptl_nid_t nid, unsigned int threshold) { - lib_nal_t *nal = apinal->nal_data; - lib_test_peer_t *tp; + ptl_test_peer_t *tp; unsigned long flags; struct list_head *el; struct list_head *next; struct list_head cull; + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_HANDLE_INVALID; + if (threshold != 0) { /* Adding a new entry */ PORTAL_ALLOC(tp, sizeof(*tp)); @@ -181,19 +188,19 @@ int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold) tp->tp_nid = nid; tp->tp_threshold = threshold; - LIB_LOCK(nal, flags); - list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers); - LIB_UNLOCK(nal, flags); + PTL_LOCK(flags); + list_add_tail (&tp->tp_list, &ptl_apini.apini_test_peers); + PTL_UNLOCK(flags); return PTL_OK; } /* removing entries */ CFS_INIT_LIST_HEAD (&cull); - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) { - tp = list_entry (el, lib_test_peer_t, tp_list); + list_for_each_safe (el, next, &ptl_apini.apini_test_peers) { + tp = list_entry (el, ptl_test_peer_t, tp_list); if (tp->tp_threshold == 0 || /* needs culling anyway */ nid == PTL_NID_ANY || /* removing all entries */ @@ -204,10 +211,10 @@ int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold) } } - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); while (!list_empty (&cull)) { - tp = list_entry (cull.next, lib_test_peer_t, tp_list); + tp = list_entry (cull.next, ptl_test_peer_t, tp_list); list_del (&tp->tp_list); PORTAL_FREE(tp, sizeof (*tp)); @@ -215,23 +222,10 @@ int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold) return PTL_OK; } -int -lib_api_loopback (nal_t *apinal, int set, int *enabled) -{ - lib_nal_t *nal = apinal->nal_data; - - if (set) - nal->libnal_ni.ni_loopback = *enabled; - else - *enabled = nal->libnal_ni.ni_loopback; - - return PTL_OK; -} - static int -fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) +fail_peer (ptl_nid_t nid, int outgoing) { - lib_test_peer_t *tp; + ptl_test_peer_t *tp; struct list_head *el; struct list_head *next; unsigned long flags; @@ -240,10 +234,10 @@ fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) CFS_INIT_LIST_HEAD (&cull); - LIB_LOCK (nal, flags); + PTL_LOCK(flags); - list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) { - tp = list_entry (el, lib_test_peer_t, tp_list); + list_for_each_safe (el, next, &ptl_apini.apini_test_peers) { + tp = list_entry (el, ptl_test_peer_t, tp_list); if (tp->tp_threshold == 0) { /* zombie entry */ @@ -274,10 +268,10 @@ fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) } } - LIB_UNLOCK (nal, flags); + PTL_UNLOCK (flags); while (!list_empty (&cull)) { - tp = list_entry (cull.next, lib_test_peer_t, tp_list); + tp = list_entry (cull.next, ptl_test_peer_t, tp_list); list_del (&tp->tp_list); PORTAL_FREE(tp, sizeof (*tp)); @@ -287,7 +281,7 @@ fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) } ptl_size_t -lib_iov_nob (int niov, struct iovec *iov) +ptl_iov_nob (int niov, struct iovec *iov) { ptl_size_t nob = 0; @@ -298,7 +292,7 @@ lib_iov_nob (int niov, struct iovec *iov) } void -lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, +ptl_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t offset, ptl_size_t len) { ptl_size_t nob; @@ -329,7 +323,7 @@ lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, } void -lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, +ptl_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, char *src, ptl_size_t len) { ptl_size_t nob; @@ -360,7 +354,7 @@ lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, } int -lib_extract_iov (int dst_niov, struct iovec *dst, +ptl_extract_iov (int dst_niov, struct iovec *dst, int src_niov, struct iovec *src, ptl_size_t offset, ptl_size_t len) { @@ -407,28 +401,28 @@ lib_extract_iov (int dst_niov, struct iovec *dst, #ifndef __KERNEL__ ptl_size_t -lib_kiov_nob (int niov, ptl_kiov_t *kiov) +ptl_kiov_nob (int niov, ptl_kiov_t *kiov) { LASSERT (0); return (0); } void -lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, +ptl_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t offset, ptl_size_t len) { LASSERT (0); } void -lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, +ptl_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, char *src, ptl_size_t len) { LASSERT (0); } int -lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, +ptl_extract_kiov (int dst_niov, ptl_kiov_t *dst, int src_niov, ptl_kiov_t *src, ptl_size_t offset, ptl_size_t len) { @@ -436,7 +430,7 @@ lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, } ptl_err_t -lib_lo_rxkiov(lib_nal_t *nal, void *private, lib_msg_t *libmsg, +ptl_lo_rxkiov(ptl_ni_t *ni, void *private, ptl_msg_t *libmsg, unsigned int niov, ptl_kiov_t *kiov, size_t offset, size_t mlen, size_t rlen) { @@ -444,7 +438,7 @@ lib_lo_rxkiov(lib_nal_t *nal, void *private, lib_msg_t *libmsg, } ptl_err_t -lib_lo_txkiov (lib_nal_t *nal, void *private, lib_msg_t *libmsg, +ptl_lo_txkiov (ptl_ni_t *ni, void *private, ptl_msg_t *libmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int payload_niov, ptl_kiov_t *payload_kiov, size_t payload_offset, size_t payload_nob) @@ -455,7 +449,7 @@ lib_lo_txkiov (lib_nal_t *nal, void *private, lib_msg_t *libmsg, #else /* __KERNEL__ */ ptl_size_t -lib_kiov_nob (int niov, ptl_kiov_t *kiov) +ptl_kiov_nob (int niov, ptl_kiov_t *kiov) { ptl_size_t nob = 0; @@ -466,7 +460,7 @@ lib_kiov_nob (int niov, ptl_kiov_t *kiov) } void -lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, +ptl_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t offset, ptl_size_t len) { ptl_size_t nob; @@ -503,7 +497,7 @@ lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, } void -lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, +ptl_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, char *src, ptl_size_t len) { ptl_size_t nob; @@ -540,7 +534,7 @@ lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, } int -lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, +ptl_extract_kiov (int dst_niov, ptl_kiov_t *dst, int src_niov, ptl_kiov_t *src, ptl_size_t offset, ptl_size_t len) { @@ -601,9 +595,9 @@ lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, #endif ptl_err_t -lib_lo_rxkiov(lib_nal_t *nal, +ptl_lo_rxkiov(ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *libmsg, unsigned int niov, ptl_kiov_t *kiov, size_t offset, @@ -693,14 +687,14 @@ lib_lo_rxkiov(lib_nal_t *nal, if (srcaddr != NULL) cfs_kunmap(lod->lod_iov.kiov->kiov_page); - lib_finalize(nal, private, libmsg, PTL_OK); + ptl_finalize(ni, private, libmsg, PTL_OK); return PTL_OK; } ptl_err_t -lib_lo_txkiov (lib_nal_t *nal, +ptl_lo_txkiov (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *libmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -718,18 +712,18 @@ lib_lo_txkiov (lib_nal_t *nal, .lod_iov = { .kiov = payload_kiov } }; ptl_err_t rc; - rc = do_lib_parse(nal, hdr, &lod, 1); + rc = do_ptl_parse(ni, hdr, &lod, 1); if (rc == PTL_OK) - lib_finalize(nal, private, libmsg, PTL_OK); + ptl_finalize(ni, private, libmsg, PTL_OK); return rc; } #endif ptl_err_t -lib_lo_rxiov(lib_nal_t *nal, +ptl_lo_rxiov(ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *libmsg, unsigned int niov, struct iovec *iov, size_t offset, @@ -790,14 +784,14 @@ lib_lo_rxiov(lib_nal_t *nal, mlen -= fraglen; } while (mlen > 0); - lib_finalize(nal, private, libmsg, PTL_OK); + ptl_finalize(ni, private, libmsg, PTL_OK); return PTL_OK; } ptl_err_t -lib_lo_txiov (lib_nal_t *nal, +ptl_lo_txiov (ptl_ni_t *ni, void *private, - lib_msg_t *libmsg, + ptl_msg_t *libmsg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -815,123 +809,124 @@ lib_lo_txiov (lib_nal_t *nal, .lod_iov = { .iov = payload_iov } }; ptl_err_t rc; - rc = do_lib_parse(nal, hdr, &lod, 1); + rc = do_ptl_parse(ni, hdr, &lod, 1); if (rc == PTL_OK) - lib_finalize(nal, private, libmsg, PTL_OK); + ptl_finalize(ni, private, libmsg, PTL_OK); return rc; } ptl_err_t -lib_lo_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, +ptl_lo_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md, ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen) { if (mlen == 0) { - lib_finalize(nal, private, msg, PTL_OK); + ptl_finalize(ni, private, msg, PTL_OK); return PTL_OK; } - if ((md->options & PTL_MD_KIOV) == 0) - return lib_lo_rxiov(nal, private, msg, + if ((md->md_options & PTL_MD_KIOV) == 0) + return ptl_lo_rxiov(ni, private, msg, md->md_niov, md->md_iov.iov, offset, mlen, rlen); - return lib_lo_rxkiov(nal, private, msg, + return ptl_lo_rxkiov(ni, private, msg, md->md_niov, md->md_iov.kiov, offset, mlen, rlen); } ptl_err_t -lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, +ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md, ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen) { if (mlen == 0) - return (nal->libnal_recv(nal, private, msg, - 0, NULL, - offset, mlen, rlen)); - - if ((md->options & PTL_MD_KIOV) == 0) - return (nal->libnal_recv(nal, private, msg, - md->md_niov, md->md_iov.iov, - offset, mlen, rlen)); - - return (nal->libnal_recv_pages(nal, private, msg, - md->md_niov, md->md_iov.kiov, - offset, mlen, rlen)); + return ((ni->ni_nal->nal_recv)(ni, private, msg, + 0, NULL, + offset, mlen, rlen)); + + if ((md->md_options & PTL_MD_KIOV) == 0) + return ((ni->ni_nal->nal_recv)(ni, private, msg, + md->md_niov, md->md_iov.iov, + offset, mlen, rlen)); + + return ((ni->ni_nal->nal_recv_pages)(ni, private, msg, + md->md_niov, md->md_iov.kiov, + offset, mlen, rlen)); } ptl_err_t -lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg, +ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len) + ptl_libmd_t *md, ptl_size_t offset, ptl_size_t len) { - int loopback = (nal->libnal_ni.ni_loopback && - (nid == nal->libnal_ni.ni_pid.nid)); + int loopback = (nid == ni->ni_nid); if (len == 0) { if (loopback) - return lib_lo_txiov(nal, private, msg, + return ptl_lo_txiov(ni, private, msg, hdr, type, nid, pid, 0, NULL, offset, len); else - return nal->libnal_send(nal, private, msg, - hdr, type, nid, pid, - 0, NULL, - offset, len); + return (ni->ni_nal->nal_send)(ni, private, msg, + hdr, type, nid, pid, + 0, NULL, + offset, len); } - if ((md->options & PTL_MD_KIOV) == 0) { + if ((md->md_options & PTL_MD_KIOV) == 0) { if (loopback) - return lib_lo_txiov(nal, private, msg, + return ptl_lo_txiov(ni, private, msg, hdr, type, nid, pid, md->md_niov, md->md_iov.iov, offset, len); else - return nal->libnal_send(nal, private, msg, - hdr, type, nid, pid, - md->md_niov, md->md_iov.iov, - offset, len); + return (ni->ni_nal->nal_send)(ni, private, msg, + hdr, type, nid, pid, + md->md_niov, + md->md_iov.iov, + offset, len); } if (loopback) - return lib_lo_txkiov(nal, private, msg, + return ptl_lo_txkiov(ni, private, msg, hdr, type, nid, pid, md->md_niov, md->md_iov.kiov, offset, len); else - return nal->libnal_send_pages(nal, private, msg, - hdr, type, nid, pid, - md->md_niov, md->md_iov.kiov, - offset, len); + return (ni->ni_nal->nal_send_pages)(ni, private, msg, + hdr, type, nid, pid, + md->md_niov, + md->md_iov.kiov, + offset, len); } static void -lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg) +ptl_commit_md (ptl_libmd_t *md, ptl_msg_t *msg) { - /* ALWAYS called holding the LIB_LOCK */ - lib_counters_t *counters = &nal->libnal_ni.ni_counters; - + /* ALWAYS called holding the PTL_LOCK */ /* Here, we commit the MD to a network OP by marking it busy and * decrementing its threshold. Come what may, the network "owns" - * the MD until a call to lib_finalize() signals completion. */ - msg->md = md; + * the MD until a call to ptl_finalize() signals completion. */ + msg->msg_md = md; - md->pending++; - if (md->threshold != PTL_MD_THRESH_INF) { - LASSERT (md->threshold > 0); - md->threshold--; + md->md_pending++; + if (md->md_threshold != PTL_MD_THRESH_INF) { + LASSERT (md->md_threshold > 0); + md->md_threshold--; } - counters->msgs_alloc++; - if (counters->msgs_alloc > counters->msgs_max) - counters->msgs_max = counters->msgs_alloc; + ptl_apini.apini_counters.msgs_alloc++; + if (ptl_apini.apini_counters.msgs_alloc > + ptl_apini.apini_counters.msgs_max) + ptl_apini.apini_counters.msgs_max = + ptl_apini.apini_counters.msgs_alloc; - list_add (&msg->msg_list, &nal->libnal_ni.ni_active_msgs); + list_add (&msg->msg_list, &ptl_apini.apini_active_msgs); } static void -lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr, int loopback) +ptl_drop_message (ptl_ni_t *ni, void *private, ptl_hdr_t *hdr, int loopback) { unsigned long flags; @@ -939,14 +934,14 @@ lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr, int loopback) * to receive (init_msg() not called) and therefore can't cause an * event. */ - LIB_LOCK(nal, flags); - nal->libnal_ni.ni_counters.drop_count++; - nal->libnal_ni.ni_counters.drop_length += hdr->payload_length; - LIB_UNLOCK(nal, flags); + PTL_LOCK(flags); + ptl_apini.apini_counters.drop_count++; + ptl_apini.apini_counters.drop_length += hdr->payload_length; + PTL_UNLOCK(flags); - /* NULL msg => if NAL calls lib_finalize it will be a noop */ + /* NULL msg => if NAL calls ptl_finalize it will be a noop */ if (!loopback) - (void) lib_recv(nal, private, NULL, NULL, 0, 0, + (void) ptl_recv(ni, private, NULL, NULL, 0, 0, hdr->payload_length); } @@ -958,14 +953,13 @@ lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr, int loopback) * */ static ptl_err_t -parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, - lib_msg_t *msg, int loopback) +ptl_parse_put(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private, + ptl_msg_t *msg, int loopback) { - lib_ni_t *ni = &nal->libnal_ni; ptl_size_t mlength = 0; ptl_size_t offset = 0; ptl_err_t rc; - lib_md_t *md; + ptl_libmd_t *md; unsigned long flags; /* Convert put fields to host byte order */ @@ -973,53 +967,52 @@ parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index); hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset); - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT, + md = ptl_match_md(hdr->msg.put.ptl_index, PTL_MD_OP_PUT, hdr->src_nid, hdr->src_pid, hdr->payload_length, hdr->msg.put.offset, hdr->msg.put.match_bits, msg, &mlength, &offset); if (md == NULL) { - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (PTL_FAIL); } - msg->ev.type = PTL_EVENT_PUT_END; - msg->ev.hdr_data = hdr->msg.put.hdr_data; + msg->msg_ev.type = PTL_EVENT_PUT_END; + msg->msg_ev.hdr_data = hdr->msg.put.hdr_data; if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) && - !(md->options & PTL_MD_ACK_DISABLE)) { - msg->ack_wmd = hdr->msg.put.ack_wmd; + !(md->md_options & PTL_MD_ACK_DISABLE)) { + msg->msg_ack_wmd = hdr->msg.put.ack_wmd; } - ni->ni_counters.recv_count++; - ni->ni_counters.recv_length += mlength; + ptl_apini.apini_counters.recv_count++; + ptl_apini.apini_counters.recv_length += mlength; - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); if (loopback) - rc = lib_lo_recv(nal, private, msg, md, offset, mlength, + rc = ptl_lo_recv(ni, private, msg, md, offset, mlength, hdr->payload_length); else - rc = lib_recv(nal, private, msg, md, offset, mlength, + rc = ptl_recv(ni, private, msg, md, offset, mlength, hdr->payload_length); if (rc != PTL_OK) - CERROR(LPU64": error on receiving PUT from "LPU64": %d\n", - ni->ni_pid.nid, hdr->src_nid, rc); + CERROR(LPX64": error on receiving PUT from "LPX64": %d\n", + ni->ni_nid, hdr->src_nid, rc); return (rc); } static ptl_err_t -parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, - lib_msg_t *msg, int loopback) +ptl_parse_get(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private, + ptl_msg_t *msg, int loopback) { - lib_ni_t *ni = &nal->libnal_ni; ptl_size_t mlength = 0; ptl_size_t offset = 0; - lib_md_t *md; + ptl_libmd_t *md; ptl_hdr_t reply; unsigned long flags; int rc; @@ -1030,182 +1023,181 @@ parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length); hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset); - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET, + md = ptl_match_md(hdr->msg.get.ptl_index, PTL_MD_OP_GET, hdr->src_nid, hdr->src_pid, hdr->msg.get.sink_length, hdr->msg.get.src_offset, hdr->msg.get.match_bits, msg, &mlength, &offset); if (md == NULL) { - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (PTL_FAIL); } - msg->ev.type = PTL_EVENT_GET_END; - msg->ev.hdr_data = 0; + msg->msg_ev.type = PTL_EVENT_GET_END; + msg->msg_ev.hdr_data = 0; - ni->ni_counters.send_count++; - ni->ni_counters.send_length += mlength; + ptl_apini.apini_counters.send_count++; + ptl_apini.apini_counters.send_length += mlength; - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); memset (&reply, 0, sizeof (reply)); reply.type = cpu_to_le32(PTL_MSG_REPLY); reply.dest_nid = cpu_to_le64(hdr->src_nid); reply.dest_pid = cpu_to_le32(hdr->src_pid); - reply.src_nid = cpu_to_le64(ni->ni_pid.nid); - reply.src_pid = cpu_to_le32(ni->ni_pid.pid); + reply.src_nid = cpu_to_le64(ni->ni_nid); + reply.src_pid = cpu_to_le32(ptl_apini.apini_pid); reply.payload_length = cpu_to_le32(mlength); reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd; - /* NB call lib_send() _BEFORE_ lib_recv() completes the incoming + /* NB call ptl_send() _BEFORE_ ptl_recv() completes the incoming * message. Some NALs _require_ this to implement optimized GET */ - rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY, + rc = ptl_send (ni, private, msg, &reply, PTL_MSG_REPLY, hdr->src_nid, hdr->src_pid, md, offset, mlength); if (rc != PTL_OK) - CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n", - ni->ni_pid.nid, hdr->src_nid, rc); + CERROR(LPX64": Unable to send REPLY for GET from "LPX64": %d\n", + ni->ni_nid, hdr->src_nid, rc); /* Discard any junk after the hdr */ if (!loopback) - (void) lib_recv(nal, private, NULL, NULL, 0, 0, + (void) ptl_recv(ni, private, NULL, NULL, 0, 0, hdr->payload_length); return (rc); } static ptl_err_t -parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, - lib_msg_t *msg, int loopback) +ptl_parse_reply(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private, + ptl_msg_t *msg, int loopback) { - lib_ni_t *ni = &nal->libnal_ni; - lib_md_t *md; + ptl_libmd_t *md; int rlength; int length; unsigned long flags; ptl_err_t rc; - LIB_LOCK(nal, flags); + PTL_LOCK(flags); /* NB handles only looked up by creator (no flips) */ - md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal); - if (md == NULL || md->threshold == 0) { - CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n", - ni->ni_pid.nid, hdr->src_nid, - md == NULL ? "invalid" : "inactive", + md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd); + if (md == NULL || md->md_threshold == 0) { + CERROR (LPX64": Dropping REPLY from "LPX64" for %s MD " + LPX64"."LPX64"\n", ni->ni_nid, hdr->src_nid, + (md == NULL) ? "invalid" : "inactive", hdr->msg.reply.dst_wmd.wh_interface_cookie, hdr->msg.reply.dst_wmd.wh_object_cookie); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (PTL_FAIL); } - LASSERT (md->offset == 0); + LASSERT (md->md_offset == 0); length = rlength = hdr->payload_length; - if (length > md->length) { - if ((md->options & PTL_MD_TRUNCATE) == 0) { - CERROR (LPU64": Dropping REPLY from "LPU64 + if (length > md->md_length) { + if ((md->md_options & PTL_MD_TRUNCATE) == 0) { + CERROR (LPX64": Dropping REPLY from "LPX64 " length %d for MD "LPX64" would overflow (%d)\n", - ni->ni_pid.nid, hdr->src_nid, length, + ni->ni_nid, hdr->src_nid, length, hdr->msg.reply.dst_wmd.wh_object_cookie, - md->length); - LIB_UNLOCK(nal, flags); + md->md_length); + PTL_UNLOCK(flags); return (PTL_FAIL); } - length = md->length; + length = md->md_length; } - CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n", - hdr->src_nid, length, rlength, + CDEBUG(D_NET, LPX64": Reply from "LPX64 + " of length %d/%d into md "LPX64"\n", + ni->ni_nid, hdr->src_nid, length, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie); - lib_commit_md(nal, md, msg); + ptl_commit_md(md, msg); - msg->ev.type = PTL_EVENT_REPLY_END; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.rlength = rlength; - msg->ev.mlength = length; - msg->ev.offset = 0; + msg->msg_ev.type = PTL_EVENT_REPLY_END; + msg->msg_ev.initiator.nid = hdr->src_nid; + msg->msg_ev.initiator.pid = hdr->src_pid; + msg->msg_ev.rlength = rlength; + msg->msg_ev.mlength = length; + msg->msg_ev.offset = 0; - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); + ptl_md_deconstruct(md, &msg->msg_ev.md); + ptl_md2handle(&msg->msg_ev.md_handle, md); - ni->ni_counters.recv_count++; - ni->ni_counters.recv_length += length; + ptl_apini.apini_counters.recv_count++; + ptl_apini.apini_counters.recv_length += length; - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); if (loopback) - rc = lib_lo_recv(nal, private, msg, md, 0, length, rlength); + rc = ptl_lo_recv(ni, private, msg, md, 0, length, rlength); else - rc = lib_recv(nal, private, msg, md, 0, length, rlength); + rc = ptl_recv(ni, private, msg, md, 0, length, rlength); if (rc != PTL_OK) - CERROR(LPU64": error on receiving REPLY from "LPU64": %d\n", - ni->ni_pid.nid, hdr->src_nid, rc); + CERROR(LPX64": error on receiving REPLY from "LPX64": %d\n", + ni->ni_nid, hdr->src_nid, rc); return (rc); } static ptl_err_t -parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, - lib_msg_t *msg, int loopback) +ptl_parse_ack(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private, + ptl_msg_t *msg, int loopback) { - lib_ni_t *ni = &nal->libnal_ni; - lib_md_t *md; + ptl_libmd_t *md; unsigned long flags; /* Convert ack fields to host byte order */ hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits); hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength); - LIB_LOCK(nal, flags); + PTL_LOCK(flags); /* NB handles only looked up by creator (no flips) */ - md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal); - if (md == NULL || md->threshold == 0) { - CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD " - LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid, + md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd); + if (md == NULL || md->md_threshold == 0) { + CERROR (LPX64": Dropping ACK from "LPX64" to %s MD " + LPX64"."LPX64"\n", ni->ni_nid, hdr->src_nid, (md == NULL) ? "invalid" : "inactive", hdr->msg.ack.dst_wmd.wh_interface_cookie, hdr->msg.ack.dst_wmd.wh_object_cookie); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return (PTL_FAIL); } - CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n", - ni->ni_pid.nid, hdr->src_nid, + CDEBUG(D_NET, LPX64": ACK from "LPX64" into md "LPX64"\n", + ni->ni_nid, hdr->src_nid, hdr->msg.ack.dst_wmd.wh_object_cookie); - lib_commit_md(nal, md, msg); + ptl_commit_md(md, msg); - msg->ev.type = PTL_EVENT_ACK; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.mlength = hdr->msg.ack.mlength; - msg->ev.match_bits = hdr->msg.ack.match_bits; + msg->msg_ev.type = PTL_EVENT_ACK; + msg->msg_ev.initiator.nid = hdr->src_nid; + msg->msg_ev.initiator.pid = hdr->src_pid; + msg->msg_ev.mlength = hdr->msg.ack.mlength; + msg->msg_ev.match_bits = hdr->msg.ack.match_bits; - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); + ptl_md_deconstruct(md, &msg->msg_ev.md); + ptl_md2handle(&msg->msg_ev.md_handle, md); - ni->ni_counters.recv_count++; + ptl_apini.apini_counters.recv_count++; - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); /* We have received and matched up the ack OK, create the * completion event now... */ - lib_finalize(nal, private, msg, PTL_OK); + ptl_finalize(ni, private, msg, PTL_OK); /* ...and now discard any junk after the hdr */ if (!loopback) - (void) lib_recv(nal, private, NULL, NULL, 0, 0, + (void) ptl_recv(ni, private, NULL, NULL, 0, 0, hdr->payload_length); return (PTL_OK); @@ -1230,7 +1222,8 @@ hdr_type_string (ptl_hdr_t *hdr) } } -void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr) +void +ptl_print_hdr(ptl_hdr_t * hdr) { char *type_str = hdr_type_string (hdr); @@ -1281,21 +1274,21 @@ void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr) hdr->payload_length); } -} /* end of print_hdr() */ +} ptl_err_t -lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) +ptl_parse(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private) { - return do_lib_parse(nal, hdr, private, 0); + return do_ptl_parse(ni, hdr, private, 0); } ptl_err_t -do_lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, int loopback) +do_ptl_parse(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private, int loopback) { unsigned long flags; ptl_err_t rc; - lib_msg_t *msg; + ptl_msg_t *msg; /* NB we return PTL_OK if we manage to parse the header and believe * it looks OK. Anything that goes wrong with receiving the @@ -1320,21 +1313,21 @@ do_lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, int loopback) if (mv->magic == PORTALS_PROTO_MAGIC && mv->version_major == PORTALS_PROTO_VERSION_MAJOR && mv->version_minor == PORTALS_PROTO_VERSION_MINOR) { - CWARN (LPU64": Dropping unexpected HELLO message: " + CWARN (LPX64": Dropping unexpected HELLO message: " "magic %d, version %d.%d from "LPD64"\n", - nal->libnal_ni.ni_pid.nid, mv->magic, + ni->ni_nid, mv->magic, mv->version_major, mv->version_minor, hdr->src_nid); /* it's good but we don't want it */ - lib_drop_message(nal, private, hdr, loopback); + ptl_drop_message(ni, private, hdr, loopback); return PTL_OK; } /* we got garbage */ - CERROR (LPU64": Bad HELLO message: " + CERROR (LPX64": Bad HELLO message: " "magic %d, version %d.%d from "LPD64"\n", - nal->libnal_ni.ni_pid.nid, mv->magic, + ni->ni_nid, mv->magic, mv->version_major, mv->version_minor, hdr->src_nid); return PTL_FAIL; @@ -1345,57 +1338,57 @@ do_lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, int loopback) case PTL_MSG_GET: case PTL_MSG_REPLY: hdr->dest_nid = le64_to_cpu(hdr->dest_nid); - if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) { - CERROR(LPU64": BAD dest NID in %s message from" - LPU64" to "LPU64" (not me)\n", - nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), + if (hdr->dest_nid != ni->ni_nid) { + CERROR(LPX64": BAD dest NID in %s message from" + LPX64" to "LPX64" (not me)\n", + ni->ni_nid, hdr_type_string(hdr), hdr->src_nid, hdr->dest_nid); return PTL_FAIL; } break; default: - CERROR(LPU64": Bad message type 0x%x from "LPU64"\n", - nal->libnal_ni.ni_pid.nid, hdr->type, hdr->src_nid); + CERROR(LPX64": Bad message type 0x%x from "LPX64"\n", + ni->ni_nid, hdr->type, hdr->src_nid); return PTL_FAIL; } /* We've decided we're not receiving garbage since we can parse the * header. We will return PTL_OK come what may... */ - if (!list_empty (&nal->libnal_ni.ni_test_peers) && /* normally we don't */ - fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */ + if (!list_empty (&ptl_apini.apini_test_peers) && /* normally we don't */ + fail_peer (hdr->src_nid, 0)) /* shall we now? */ { - CERROR(LPU64": Dropping incoming %s from "LPU64 + CERROR(LPX64": Dropping incoming %s from "LPX64 ": simulated failure\n", - nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), + ni->ni_nid, hdr_type_string (hdr), hdr->src_nid); - lib_drop_message(nal, private, hdr, loopback); + ptl_drop_message(ni, private, hdr, loopback); return PTL_OK; } - msg = lib_msg_alloc(nal); + msg = ptl_msg_alloc(); if (msg == NULL) { - CERROR(LPU64": Dropping incoming %s from "LPU64 - ": can't allocate a lib_msg_t\n", - nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), + CERROR(LPX64": Dropping incoming %s from "LPX64 + ": can't allocate a ptl_msg_t\n", + ni->ni_nid, hdr_type_string (hdr), hdr->src_nid); - lib_drop_message(nal, private, hdr, loopback); + ptl_drop_message(ni, private, hdr, loopback); return PTL_OK; } switch (hdr->type) { case PTL_MSG_ACK: - rc = parse_ack(nal, hdr, private, msg, loopback); + rc = ptl_parse_ack(ni, hdr, private, msg, loopback); break; case PTL_MSG_PUT: - rc = parse_put(nal, hdr, private, msg, loopback); + rc = ptl_parse_put(ni, hdr, private, msg, loopback); break; case PTL_MSG_GET: - rc = parse_get(nal, hdr, private, msg, loopback); + rc = ptl_parse_get(ni, hdr, private, msg, loopback); break; case PTL_MSG_REPLY: - rc = parse_reply(nal, hdr, private, msg, loopback); + rc = ptl_parse_reply(ni, hdr, private, msg, loopback); break; default: LASSERT(0); @@ -1404,15 +1397,15 @@ do_lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, int loopback) } if (rc != PTL_OK) { - if (msg->md != NULL) { + if (msg->msg_md != NULL) { /* committed... */ - lib_finalize(nal, private, msg, rc); + ptl_finalize(ni, private, msg, rc); } else { - LIB_LOCK(nal, flags); - lib_msg_free(nal, msg); /* expects LIB_LOCK held */ - LIB_UNLOCK(nal, flags); + PTL_LOCK(flags); + ptl_msg_free(msg); /* expects PTL_LOCK held */ + PTL_UNLOCK(flags); - lib_drop_message(nal, private, hdr, loopback); + ptl_drop_message(ni, private, hdr, loopback); } } @@ -1420,59 +1413,91 @@ do_lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, int loopback) /* That's "OK I can parse it", not "OK I like it" :) */ } -int -lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, - ptl_ack_req_t ack, ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, - ptl_size_t offset, ptl_hdr_data_t hdr_data) +ptl_ni_t * +ptl_nid2ni (ptl_nid_t nid) +{ + /* Called holding PTL_LOCK */ + + if (list_empty(&ptl_apini.apini_nis)) + return NULL; + + if (ptl_apini.apini_nis.next != ptl_apini.apini_nis.prev) { + CERROR ("Can't decide which NI\n"); + return NULL; + } + + return list_entry(ptl_apini.apini_nis.next, ptl_ni_t, ni_list); +} + +ptl_err_t +PtlPut(ptl_handle_md_t mdh, ptl_ack_req_t ack, + ptl_process_id_t target, ptl_pt_index_t portal, + ptl_ac_index_t ac, ptl_match_bits_t match_bits, + ptl_size_t offset, ptl_hdr_data_t hdr_data) { - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; - lib_msg_t *msg; + ptl_ni_t *ni; + ptl_msg_t *msg; ptl_hdr_t hdr; - lib_md_t *md; + ptl_libmd_t *md; unsigned long flags; int rc; - if (!list_empty (&ni->ni_test_peers) && /* normally we don't */ - fail_peer (nal, id->nid, 1)) /* shall we now? */ + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_MD_INVALID; + + if (!list_empty (&ptl_apini.apini_test_peers) && /* normally we don't */ + fail_peer (target.nid, 1)) /* shall we now? */ { - CERROR("Dropping PUT to "LPU64": simulated failure\n", - id->nid); + CERROR("Dropping PUT to "LPX64": simulated failure\n", + target.nid); return PTL_PROCESS_INVALID; } - msg = lib_msg_alloc(nal); + msg = ptl_msg_alloc(); if (msg == NULL) { - CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n", - ni->ni_pid.nid, id->nid); + CERROR("Dropping PUT to "LPX64": ENOMEM on ptl_msg_t\n", + target.nid); return PTL_NO_SPACE; } - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - md = ptl_handle2md(mdh, nal); - if (md == NULL || md->threshold == 0) { - lib_msg_free(nal, msg); - LIB_UNLOCK(nal, flags); + ni = ptl_nid2ni(target.nid); + if (ni == NULL) { + ptl_msg_free(msg); + PTL_UNLOCK(flags); + CERROR("Dropping PUT to "LPX64": not reachable\n", target.nid); + return PTL_PROCESS_INVALID; + } + + md = ptl_handle2md(&mdh); + if (md == NULL || md->md_threshold == 0) { + ptl_msg_free(msg); + PTL_UNLOCK(flags); + + CERROR("Dropping PUT to "LPX64": MD invalid\n", target.nid); return PTL_MD_INVALID; } - CDEBUG(D_NET, "PtlPut -> "LPX64"\n", id->nid); + CDEBUG(D_NET, LPX64": PtlPut -> "LPX64":%lu\n", + ni->ni_nid, target.nid, (unsigned long)target.pid); memset (&hdr, 0, sizeof (hdr)); hdr.type = cpu_to_le32(PTL_MSG_PUT); - hdr.dest_nid = cpu_to_le64(id->nid); - hdr.dest_pid = cpu_to_le32(id->pid); - hdr.src_nid = cpu_to_le64(ni->ni_pid.nid); - hdr.src_pid = cpu_to_le32(ni->ni_pid.pid); - hdr.payload_length = cpu_to_le32(md->length); + hdr.dest_nid = cpu_to_le64(target.nid); + hdr.dest_pid = cpu_to_le32(target.pid); + hdr.src_nid = cpu_to_le64(ni->ni_nid); + hdr.src_pid = cpu_to_le32(ptl_apini.apini_pid); + hdr.payload_length = cpu_to_le32(md->md_length); /* NB handles only looked up by creator (no flips) */ if (ack == PTL_ACK_REQ) { - hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie; + hdr.msg.put.ack_wmd.wh_interface_cookie = + ptl_apini.apini_interface_cookie; hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie; } else { hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE; @@ -1483,279 +1508,203 @@ lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, hdr.msg.put.offset = cpu_to_le32(offset); hdr.msg.put.hdr_data = hdr_data; - lib_commit_md(nal, md, msg); + ptl_commit_md(md, msg); - msg->ev.type = PTL_EVENT_SEND_END; - msg->ev.initiator.nid = ni->ni_pid.nid; - msg->ev.initiator.pid = ni->ni_pid.pid; - msg->ev.pt_index = portal; - msg->ev.match_bits = match_bits; - msg->ev.rlength = md->length; - msg->ev.mlength = md->length; - msg->ev.offset = offset; - msg->ev.hdr_data = hdr_data; + msg->msg_ev.type = PTL_EVENT_SEND_END; + msg->msg_ev.initiator.nid = ni->ni_nid; + msg->msg_ev.initiator.pid = ptl_apini.apini_pid; + msg->msg_ev.pt_index = portal; + msg->msg_ev.match_bits = match_bits; + msg->msg_ev.rlength = md->md_length; + msg->msg_ev.mlength = md->md_length; + msg->msg_ev.offset = offset; + msg->msg_ev.hdr_data = hdr_data; - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); + ptl_md_deconstruct(md, &msg->msg_ev.md); + ptl_md2handle(&msg->msg_ev.md_handle, md); - ni->ni_counters.send_count++; - ni->ni_counters.send_length += md->length; + ptl_apini.apini_counters.send_count++; + ptl_apini.apini_counters.send_length += md->md_length; - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); - rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT, - id->nid, id->pid, md, 0, md->length); + rc = ptl_send (ni, NULL, msg, &hdr, PTL_MSG_PUT, + target.nid, target.pid, md, 0, md->md_length); if (rc != PTL_OK) { - CERROR("Error sending PUT to "LPX64": %d\n", - id->nid, rc); - lib_finalize (nal, NULL, msg, rc); + CERROR(LPX64": Error sending PUT to "LPX64": %d\n", + ni->ni_nid, target.nid, rc); + ptl_finalize (ni, NULL, msg, rc); } /* completion will be signalled by an event */ return PTL_OK; } -lib_msg_t * -lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg) +ptl_msg_t * +ptl_create_reply_msg (ptl_ni_t *ni, ptl_nid_t peer_nid, ptl_msg_t *getmsg) { /* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This - * returns a msg for the NAL to pass to lib_finalize() when the sink + * returns a msg for the NAL to pass to ptl_finalize() when the sink * data has been received. * * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when - * lib_finalize() is called on it, so the NAL must call this first */ + * ptl_finalize() is called on it, so the NAL must call this first */ - lib_ni_t *ni = &nal->libnal_ni; - lib_msg_t *msg = lib_msg_alloc(nal); - lib_md_t *getmd = getmsg->md; + ptl_msg_t *msg = ptl_msg_alloc(); + ptl_libmd_t *getmd = getmsg->msg_md; unsigned long flags; - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - LASSERT (getmd->pending > 0); + LASSERT (getmd->md_pending > 0); if (msg == NULL) { - CERROR ("Dropping REPLY from "LPU64": can't allocate msg\n", - peer_nid); + CERROR (LPX64": Dropping REPLY from "LPX64": can't allocate msg\n", + ni->ni_nid, peer_nid); goto drop; } - if (getmd->threshold == 0) { - CERROR ("Dropping REPLY from "LPU64" for inactive MD %p\n", - peer_nid, getmd); + if (getmd->md_threshold == 0) { + CERROR (LPX64": Dropping REPLY from "LPX64" for inactive MD %p\n", + ni->ni_nid, peer_nid, getmd); goto drop_msg; } - LASSERT (getmd->offset == 0); + LASSERT (getmd->md_offset == 0); - CDEBUG(D_NET, "Reply from "LPU64" md %p\n", peer_nid, getmd); + CDEBUG(D_NET, LPX64": Reply from "LPX64" md %p\n", + ni->ni_nid, peer_nid, getmd); - lib_commit_md (nal, getmd, msg); + ptl_commit_md (getmd, msg); - msg->ev.type = PTL_EVENT_REPLY_END; - msg->ev.initiator.nid = peer_nid; - msg->ev.initiator.pid = 0; /* XXX FIXME!!! */ - msg->ev.rlength = msg->ev.mlength = getmd->length; - msg->ev.offset = 0; + msg->msg_ev.type = PTL_EVENT_REPLY_END; + msg->msg_ev.initiator.nid = peer_nid; + msg->msg_ev.initiator.pid = 0; /* XXX FIXME!!! */ + msg->msg_ev.rlength = msg->msg_ev.mlength = getmd->md_length; + msg->msg_ev.offset = 0; - lib_md_deconstruct(nal, getmd, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, getmd); + ptl_md_deconstruct(getmd, &msg->msg_ev.md); + ptl_md2handle(&msg->msg_ev.md_handle, getmd); - ni->ni_counters.recv_count++; - ni->ni_counters.recv_length += getmd->length; + ptl_apini.apini_counters.recv_count++; + ptl_apini.apini_counters.recv_length += getmd->md_length; - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); return msg; drop_msg: - lib_msg_free(nal, msg); + ptl_msg_free(msg); drop: - nal->libnal_ni.ni_counters.drop_count++; - nal->libnal_ni.ni_counters.drop_length += getmd->length; + ptl_apini.apini_counters.drop_count++; + ptl_apini.apini_counters.drop_length += getmd->md_length; - LIB_UNLOCK (nal, flags); + PTL_UNLOCK (flags); return NULL; } -int -lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id, - ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, ptl_size_t offset) +ptl_err_t +PtlGet(ptl_handle_md_t mdh, ptl_process_id_t target, + ptl_pt_index_t portal, ptl_ac_index_t ac, + ptl_match_bits_t match_bits, ptl_size_t offset) { - lib_nal_t *nal = apinal->nal_data; - lib_ni_t *ni = &nal->libnal_ni; - lib_msg_t *msg; + ptl_ni_t *ni; + ptl_msg_t *msg; ptl_hdr_t hdr; - lib_md_t *md; + ptl_libmd_t *md; unsigned long flags; int rc; - if (!list_empty (&ni->ni_test_peers) && /* normally we don't */ - fail_peer (nal, id->nid, 1)) /* shall we now? */ + if (!ptl_init) + return PTL_NO_INIT; + + if (ptl_apini.apini_refcount == 0) + return PTL_MD_INVALID; + + if (!list_empty (&ptl_apini.apini_test_peers) && /* normally we don't */ + fail_peer (target.nid, 1)) /* shall we now? */ { - CERROR("Dropping PUT to "LPX64": simulated failure\n", - id->nid); + CERROR("Dropping GET to "LPX64": simulated failure\n", + target.nid); return PTL_PROCESS_INVALID; } - msg = lib_msg_alloc(nal); + msg = ptl_msg_alloc(); if (msg == NULL) { - CERROR("Dropping GET to "LPU64": ENOMEM on lib_msg_t\n", - id->nid); + CERROR("Dropping GET to "LPX64": ENOMEM on ptl_msg_t\n", + target.nid); return PTL_NO_SPACE; } - LIB_LOCK(nal, flags); + PTL_LOCK(flags); - md = ptl_handle2md(mdh, nal); - if (md == NULL || !md->threshold) { - lib_msg_free(nal, msg); - LIB_UNLOCK(nal, flags); + ni = ptl_nid2ni(target.nid); + if (ni == NULL) { + ptl_msg_free(msg); + PTL_UNLOCK(flags); + CERROR("Dropping GET to "LPX64": not reachable\n", target.nid); + return PTL_PROCESS_INVALID; + } + + md = ptl_handle2md(&mdh); + if (md == NULL || md->md_threshold == 0) { + ptl_msg_free(msg); + PTL_UNLOCK(flags); + + CERROR("Dropping GET to "LPX64": MD invalid\n", target.nid); return PTL_MD_INVALID; } - CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid, - (unsigned long)id->pid); + CDEBUG(D_NET, LPX64": PtlGet -> "LPX64":%lu\n", + ni->ni_nid, target.nid, (unsigned long)target.pid); memset (&hdr, 0, sizeof (hdr)); hdr.type = cpu_to_le32(PTL_MSG_GET); - hdr.dest_nid = cpu_to_le64(id->nid); - hdr.dest_pid = cpu_to_le32(id->pid); - hdr.src_nid = cpu_to_le64(ni->ni_pid.nid); - hdr.src_pid = cpu_to_le32(ni->ni_pid.pid); + hdr.dest_nid = cpu_to_le64(target.nid); + hdr.dest_pid = cpu_to_le32(target.pid); + hdr.src_nid = cpu_to_le64(ni->ni_nid); + hdr.src_pid = cpu_to_le32(ptl_apini.apini_pid); hdr.payload_length = 0; /* NB handles only looked up by creator (no flips) */ - hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie; + hdr.msg.get.return_wmd.wh_interface_cookie = + ptl_apini.apini_interface_cookie; hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie; hdr.msg.get.match_bits = cpu_to_le64(match_bits); hdr.msg.get.ptl_index = cpu_to_le32(portal); hdr.msg.get.src_offset = cpu_to_le32(offset); - hdr.msg.get.sink_length = cpu_to_le32(md->length); + hdr.msg.get.sink_length = cpu_to_le32(md->md_length); - lib_commit_md(nal, md, msg); + ptl_commit_md(md, msg); - msg->ev.type = PTL_EVENT_SEND_END; - msg->ev.initiator = ni->ni_pid; - msg->ev.pt_index = portal; - msg->ev.match_bits = match_bits; - msg->ev.rlength = md->length; - msg->ev.mlength = md->length; - msg->ev.offset = offset; - msg->ev.hdr_data = 0; + msg->msg_ev.type = PTL_EVENT_SEND_END; + msg->msg_ev.initiator.nid = ni->ni_nid; + msg->msg_ev.initiator.pid = ptl_apini.apini_pid; + msg->msg_ev.pt_index = portal; + msg->msg_ev.match_bits = match_bits; + msg->msg_ev.rlength = md->md_length; + msg->msg_ev.mlength = md->md_length; + msg->msg_ev.offset = offset; + msg->msg_ev.hdr_data = 0; - lib_md_deconstruct(nal, md, &msg->ev.md); - ptl_md2handle(&msg->ev.md_handle, nal, md); + ptl_md_deconstruct(md, &msg->msg_ev.md); + ptl_md2handle(&msg->msg_ev.md_handle, md); - ni->ni_counters.send_count++; + ptl_apini.apini_counters.send_count++; - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); - rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_GET, - id->nid, id->pid, NULL, 0, 0); + rc = ptl_send (ni, NULL, msg, &hdr, PTL_MSG_GET, + target.nid, target.pid, NULL, 0, 0); if (rc != PTL_OK) { - CERROR(LPU64": error sending GET to "LPU64": %d\n", - ni->ni_pid.nid, id->nid, rc); - lib_finalize (nal, NULL, msg, rc); + CERROR(LPX64": error sending GET to "LPX64": %d\n", + ni->ni_nid, target.nid, rc); + ptl_finalize (ni, NULL, msg, rc); } /* completion will be signalled by an event */ return PTL_OK; } - -void lib_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux mdevi 2.4.21-p4smp-55chaos #1 SMP Tue Jun 8 14:38:44 PDT 2004 i686 i686 i - * with gcc version 3.2.3 20030502 (Red Hat Linux 3.2.3-34) */ - - - /* Constants... */ - LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 1); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 0); - LASSERT (PTL_MSG_ACK == 0); - LASSERT (PTL_MSG_PUT == 1); - LASSERT (PTL_MSG_GET == 2); - LASSERT (PTL_MSG_REPLY == 3); - LASSERT (PTL_MSG_HELLO == 4); - - /* Checks for struct ptl_handle_wire_t */ - LASSERT ((int)sizeof(ptl_handle_wire_t) == 16); - LASSERT ((int)offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0); - LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); - LASSERT ((int)offsetof(ptl_handle_wire_t, wh_object_cookie) == 8); - LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); - - /* Checks for struct ptl_magicversion_t */ - LASSERT ((int)sizeof(ptl_magicversion_t) == 8); - LASSERT ((int)offsetof(ptl_magicversion_t, magic) == 0); - LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->magic) == 4); - LASSERT ((int)offsetof(ptl_magicversion_t, version_major) == 4); - LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_major) == 2); - LASSERT ((int)offsetof(ptl_magicversion_t, version_minor) == 6); - LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_minor) == 2); - - /* Checks for struct ptl_hdr_t */ - LASSERT ((int)sizeof(ptl_hdr_t) == 72); - LASSERT ((int)offsetof(ptl_hdr_t, dest_nid) == 0); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_nid) == 8); - LASSERT ((int)offsetof(ptl_hdr_t, src_nid) == 8); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_nid) == 8); - LASSERT ((int)offsetof(ptl_hdr_t, dest_pid) == 16); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_pid) == 4); - LASSERT ((int)offsetof(ptl_hdr_t, src_pid) == 20); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_pid) == 4); - LASSERT ((int)offsetof(ptl_hdr_t, type) == 24); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->type) == 4); - LASSERT ((int)offsetof(ptl_hdr_t, payload_length) == 28); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->payload_length) == 4); - LASSERT ((int)offsetof(ptl_hdr_t, msg) == 32); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg) == 40); - - /* Ack */ - LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); - LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.match_bits) == 48); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); - LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.mlength) == 56); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.mlength) == 4); - - /* Put */ - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.match_bits) == 48); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.match_bits) == 8); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.hdr_data) == 56); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.ptl_index) == 64); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); - LASSERT ((int)offsetof(ptl_hdr_t, msg.put.offset) == 68); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.offset) == 4); - - /* Get */ - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.return_wmd) == 32); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.match_bits) == 48); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.match_bits) == 8); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.ptl_index) == 56); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.src_offset) == 60); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.src_offset) == 4); - LASSERT ((int)offsetof(ptl_hdr_t, msg.get.sink_length) == 64); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.sink_length) == 4); - - /* Reply */ - LASSERT ((int)offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); - - /* Hello */ - LASSERT ((int)offsetof(ptl_hdr_t, msg.hello.incarnation) == 32); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.incarnation) == 8); - LASSERT ((int)offsetof(ptl_hdr_t, msg.hello.type) == 40); - LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.type) == 4); -} diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index 38904c4..667a7cc 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -24,17 +24,10 @@ #define DEBUG_SUBSYSTEM S_PORTALS -#ifndef __KERNEL__ -# include -#else -# include -#endif - #include void -lib_enq_event_locked (lib_nal_t *nal, void *private, - lib_eq_t *eq, ptl_event_t *ev) +ptl_enq_event_locked (void *private, ptl_eq_t *eq, ptl_event_t *ev) { ptl_event_t *eq_slot; @@ -54,7 +47,7 @@ lib_enq_event_locked (lib_nal_t *nal, void *private, eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1)); /* There is no race since both event consumers and event producers - * take the LIB_LOCK(), so we don't screw around with memory + * take the PTL_LOCK, so we don't screw around with memory * barriers, setting the sequence number last or wierd structure * layout assertions. */ *eq_slot = *ev; @@ -65,17 +58,17 @@ lib_enq_event_locked (lib_nal_t *nal, void *private, /* Wake anyone sleeping for an event (see lib-eq.c) */ #ifdef __KERNEL__ - if (cfs_waitq_active(&nal->libnal_ni.ni_waitq)) - cfs_waitq_broadcast(&nal->libnal_ni.ni_waitq); + if (cfs_waitq_active(&ptl_apini.apini_waitq)) + cfs_waitq_broadcast(&ptl_apini.apini_waitq); #else - pthread_cond_broadcast(&nal->libnal_ni.ni_cond); + pthread_cond_broadcast(&ptl_apini.apini_cond); #endif } void -lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) +ptl_finalize (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_err_t status) { - lib_md_t *md; + ptl_libmd_t *md; int unlink; unsigned long flags; int rc; @@ -86,62 +79,63 @@ lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) /* Only send an ACK if the PUT completed successfully */ if (status == PTL_OK && - !ptl_is_wire_handle_none(&msg->ack_wmd)) { + !ptl_is_wire_handle_none(&msg->msg_ack_wmd)) { - LASSERT(msg->ev.type == PTL_EVENT_PUT_END); + LASSERT(msg->msg_ev.type == PTL_EVENT_PUT_END); memset (&ack, 0, sizeof (ack)); ack.type = cpu_to_le32(PTL_MSG_ACK); - ack.dest_nid = cpu_to_le64(msg->ev.initiator.nid); - ack.dest_pid = cpu_to_le32(msg->ev.initiator.pid); - ack.src_nid = cpu_to_le64(nal->libnal_ni.ni_pid.nid); - ack.src_pid = cpu_to_le32(nal->libnal_ni.ni_pid.pid); + ack.dest_nid = cpu_to_le64(msg->msg_ev.initiator.nid); + ack.dest_pid = cpu_to_le32(msg->msg_ev.initiator.pid); + ack.src_nid = cpu_to_le64(ni->ni_nid); + ack.src_pid = cpu_to_le32(ptl_apini.apini_pid); ack.payload_length = 0; - ack.msg.ack.dst_wmd = msg->ack_wmd; - ack.msg.ack.match_bits = msg->ev.match_bits; - ack.msg.ack.mlength = cpu_to_le32(msg->ev.mlength); + ack.msg.ack.dst_wmd = msg->msg_ack_wmd; + ack.msg.ack.match_bits = msg->msg_ev.match_bits; + ack.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength); - rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK, - msg->ev.initiator.nid, msg->ev.initiator.pid, + rc = ptl_send (ni, private, NULL, &ack, PTL_MSG_ACK, + msg->msg_ev.initiator.nid, + msg->msg_ev.initiator.pid, NULL, 0, 0); if (rc != PTL_OK) { /* send failed: there's nothing else to clean up. */ CERROR("Error %d sending ACK to "LPX64"\n", - rc, msg->ev.initiator.nid); + rc, msg->msg_ev.initiator.nid); } } - md = msg->md; + md = msg->msg_md; - LIB_LOCK(nal, flags); + PTL_LOCK(flags); /* Now it's safe to drop my caller's ref */ - md->pending--; - LASSERT (md->pending >= 0); + md->md_pending--; + LASSERT (md->md_pending >= 0); /* Should I unlink this MD? */ - if (md->pending != 0) /* other refs */ + if (md->md_pending != 0) /* other refs */ unlink = 0; else if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) != 0) unlink = 1; else if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) == 0) unlink = 0; else - unlink = lib_md_exhausted(md); + unlink = ptl_md_exhausted(md); - msg->ev.ni_fail_type = status; - msg->ev.unlinked = unlink; + msg->msg_ev.ni_fail_type = status; + msg->msg_ev.unlinked = unlink; - if (md->eq != NULL) - lib_enq_event_locked(nal, private, md->eq, &msg->ev); + if (md->md_eq != NULL) + ptl_enq_event_locked(private, md->md_eq, &msg->msg_ev); if (unlink) - lib_md_unlink(nal, md); + ptl_md_unlink(md); list_del (&msg->msg_list); - nal->libnal_ni.ni_counters.msgs_alloc--; - lib_msg_free(nal, msg); + ptl_apini.apini_counters.msgs_alloc--; + ptl_msg_free(msg); - LIB_UNLOCK(nal, flags); + PTL_UNLOCK(flags); } diff --git a/lnet/lnet/lib-ni.c b/lnet/lnet/lib-ni.c deleted file mode 100644 index e45859a..0000000 --- a/lnet/lnet/lib-ni.c +++ /dev/null @@ -1,29 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * This file is part of Lustre, http://www.lustre.org - * This file is not subject to copyright protection. - */ - -#define DEBUG_SUBSYSTEM S_PORTALS -#include - -int lib_api_ni_status (nal_t *apinal, ptl_sr_index_t sr_idx, - ptl_sr_value_t *status) -{ - return PTL_FAIL; -} - - -int lib_api_ni_dist (nal_t *apinal, ptl_process_id_t *pid, unsigned long *dist) -{ - lib_nal_t *nal = apinal->nal_data; - - if (nal->libnal_ni.ni_loopback && - pid->nid == nal->libnal_ni.ni_pid.nid) { - *dist = 0; - return PTL_OK; - } - - return (nal->libnal_dist(nal, pid->nid, dist)); -} diff --git a/lnet/lnet/lib-pid.c b/lnet/lnet/lib-pid.c deleted file mode 100644 index 23d6dd3..0000000 --- a/lnet/lnet/lib-pid.c +++ /dev/null @@ -1,20 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * This file is part of Lustre, http://www.lustre.org - * This file is not subject to copyright protection. - */ - -/* This should be removed. The NAL should have the PID information */ -#define DEBUG_SUBSYSTEM S_PORTALS - -#include - -int -lib_api_get_id(nal_t *apinal, ptl_process_id_t *pid) -{ - lib_nal_t *nal = apinal->nal_data; - - *pid = nal->libnal_ni.ni_pid; - return PTL_OK; -} diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 472175b..56300f7 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -23,12 +23,7 @@ # define EXPORT_SYMTAB #endif #define DEBUG_SUBSYSTEM S_PORTALS - #include -#include -#include -#include -#include extern void (kping_client)(struct portal_ioctl_data *); @@ -36,7 +31,6 @@ static int kportal_ioctl(struct portal_ioctl_data *data, unsigned int cmd, unsigned long arg) { int err; - char str[PTL_NALFMT_SIZE]; ENTRY; switch (cmd) { @@ -45,7 +39,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n", data->ioc_count, data->ioc_nid, - portals_nid2str(data->ioc_nal, data->ioc_nid, str)); + libcfs_nid2str(data->ioc_nid)); ping = PORTAL_SYMBOL_GET(kping_client); if (!ping) CERROR("PORTAL_SYMBOL_GET failed\n"); @@ -84,7 +78,7 @@ static int kportal_ioctl(struct portal_ioctl_data *data, CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", data->ioc_nal, data->ioc_nid, data->ioc_count); - err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, + err = PtlNIInit(PTL_IFACE_DEFAULT, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (!(err == PTL_OK || err == PTL_IFACE_DUP)) return (-EINVAL); @@ -103,40 +97,6 @@ static int kportal_ioctl(struct portal_ioctl_data *data, RETURN (err); } - case IOC_PORTAL_LOOPBACK: { - ptl_handle_ni_t nih; - int enabled = data->ioc_flags; - int set = data->ioc_misc; - - CDEBUG (D_IOCTL, "loopback: [%d] %d %d\n", - data->ioc_nal, enabled, set); - - err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL, - NULL, &nih); - if (!(err == PTL_OK || err == PTL_IFACE_DUP)) - return (-EINVAL); - - if (err == PTL_OK) { - /* There's no point in failing an interface that - * came into existance just for this */ - err = -EINVAL; - } else { - err = PtlLoopback (nih, set, &enabled); - if (err != PTL_OK) { - err = -EINVAL; - } else { - data->ioc_flags = enabled; - if (copy_to_user ((char *)arg, data, - sizeof (*data))) - err = -EFAULT; - else - err = 0; - } - } - - PtlNIFini(nih); - RETURN (err); - } default: RETURN(-EINVAL); } @@ -196,19 +156,17 @@ EXPORT_SYMBOL(PtlEQFree); EXPORT_SYMBOL(PtlEQGet); EXPORT_SYMBOL(PtlGetId); EXPORT_SYMBOL(PtlMDBind); -EXPORT_SYMBOL(lib_iov_nob); -EXPORT_SYMBOL(lib_copy_iov2buf); -EXPORT_SYMBOL(lib_copy_buf2iov); -EXPORT_SYMBOL(lib_extract_iov); -EXPORT_SYMBOL(lib_kiov_nob); -EXPORT_SYMBOL(lib_copy_kiov2buf); -EXPORT_SYMBOL(lib_copy_buf2kiov); -EXPORT_SYMBOL(lib_extract_kiov); -EXPORT_SYMBOL(lib_finalize); -EXPORT_SYMBOL(lib_parse); -EXPORT_SYMBOL(lib_create_reply_msg); -EXPORT_SYMBOL(lib_init); -EXPORT_SYMBOL(lib_fini); +EXPORT_SYMBOL(ptl_iov_nob); +EXPORT_SYMBOL(ptl_copy_iov2buf); +EXPORT_SYMBOL(ptl_copy_buf2iov); +EXPORT_SYMBOL(ptl_extract_iov); +EXPORT_SYMBOL(ptl_kiov_nob); +EXPORT_SYMBOL(ptl_copy_kiov2buf); +EXPORT_SYMBOL(ptl_copy_buf2kiov); +EXPORT_SYMBOL(ptl_extract_kiov); +EXPORT_SYMBOL(ptl_finalize); +EXPORT_SYMBOL(ptl_parse); +EXPORT_SYMBOL(ptl_create_reply_msg); MODULE_AUTHOR("Peter J. Braam "); MODULE_DESCRIPTION("Portals v3.1"); diff --git a/lnet/router/router.c b/lnet/router/router.c index 7edc5f6..9d74754 100644 --- a/lnet/router/router.c +++ b/lnet/router/router.c @@ -120,15 +120,13 @@ kpr_do_upcall (void *arg) void kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when) { - char str[PTL_NALFMT_SIZE]; - /* May be in arbitrary context */ kpr_upcall_t *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC); if (u == NULL) { CERROR ("Upcall out of memory: nal %x nid "LPX64" (%s) %s\n", gw_nalid, gw_nid, - portals_nid2str(gw_nalid, gw_nid, str), + libcfs_nid2str(gw_nid), alive ? "up" : "down"); return; } @@ -153,7 +151,6 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid, struct timeval now; struct list_head *e; struct list_head *n; - char str[PTL_NALFMT_SIZE]; CDEBUG (D_NET, "%s notifying [%x] "LPX64": %s\n", byNal ? "NAL" : "userspace", @@ -254,7 +251,7 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid, /* It wasn't userland that notified me... */ CWARN ("Upcall: NAL %x NID "LPX64" (%s) is %s\n", gateway_nalid, gateway_nid, - portals_nid2str(gateway_nalid, gateway_nid, str), + libcfs_nid2str(gateway_nid), alive ? "alive" : "dead"); kpr_upcall (gateway_nalid, gateway_nid, alive, when); } else { @@ -452,7 +449,7 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x\n", fwd, target_nid, src_ne->kpne_interface.kprni_nalid); - LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov)); + LASSERT (nob == ptl_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov)); LASSERT (!in_interrupt()); read_lock (&kpr_rwlock); diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c index 2995b46..f33a997 100644 --- a/lnet/tests/ping_cli.c +++ b/lnet/tests/ping_cli.c @@ -112,16 +112,14 @@ pingcli_start(struct portal_ioctl_data *args) unsigned ping_head_magic = __cpu_to_le32(PING_HEADER_MAGIC); int rc; struct timeval tv1, tv2; - char str[PTL_NALFMT_SIZE]; client->tsk = cfs_current(); client->args = args; CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ - nal %x, size %u, count: %u, timeout: %u\n", + size %u, count: %u, timeout: %u\n", args->ioc_nid, - portals_nid2str(args->ioc_nal, args->ioc_nid, str), - args->ioc_nal, args->ioc_size, - args->ioc_count, args->ioc_timeout); + libcfs_nid2str(args->ioc_nid), + args->ioc_size, args->ioc_count, args->ioc_timeout); PORTAL_ALLOC (client->outbuf, STDSIZE + args->ioc_size) ; @@ -141,11 +139,10 @@ pingcli_start(struct portal_ioctl_data *args) return (NULL); } - /* Aquire and initialize the proper nal for portals. */ - rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); + rc = PtlNIInit(PTL_IFACE_DEFAULT, 0, NULL, NULL, &nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CERROR ("NAL %x not loaded\n", args->ioc_nal); + CERROR ("PtlNIInit: error %d\n", rc); pingcli_shutdown (nih, 4); return (NULL); } diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c index ae0d722..c444514 100644 --- a/lnet/tests/ping_srv.c +++ b/lnet/tests/ping_srv.c @@ -34,7 +34,6 @@ static unsigned ping_head_magic; static unsigned ping_bulk_magic; -static int nal = SOCKNAL; // Your NAL, static unsigned long packets_valid = 0; // Valid packets static int running = 1; atomic_t pkt; @@ -187,9 +186,9 @@ static struct pingsrv_data *pingsrv_setup(void) server->ni = PTL_INVALID_HANDLE; /* Aquire and initialize the proper nal for portals. */ - rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); + rc = PtlNIInit(PTL_IFACE_DEFAULT, 0, NULL, NULL, &server->ni); if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) { - CDEBUG (D_OTHER, "NAL %x not loaded\n", nal); + CDEBUG (D_OTHER, "PtlNIInit: error %d\n", rc); return pingsrv_shutdown (4); } @@ -277,10 +276,6 @@ static void /*__exit*/ pingsrv_cleanup(void) } /* pingsrv_cleanup() */ -MODULE_PARM(nal, "i"); -MODULE_PARM_DESC(nal, "Use the specified NAL " - "(2-ksocknal, 1-kqswnal)"); - MODULE_AUTHOR("Brian Behlendorf (LLNL)"); MODULE_DESCRIPTION("A kernel space ping server for portals testing"); MODULE_LICENSE("GPL"); diff --git a/lnet/tests/sping_cli.c b/lnet/tests/sping_cli.c index 71a2a98..251a6c7 100644 --- a/lnet/tests/sping_cli.c +++ b/lnet/tests/sping_cli.c @@ -103,18 +103,16 @@ pingcli_start(struct portal_ioctl_data *args) { ptl_handle_ni_t nih = PTL_INVALID_HANDLE; unsigned ping_head_magic = PING_HEADER_MAGIC; - char str[PTL_NALFMT_SIZE]; int rc; client->tsk = current; client->args = args; - CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ - nal %x, size %u, count: %u, timeout: %u\n", - args->ioc_nid, - portals_nid2str(args->ioc_nid, args->ioc_nal, str), - args->ioc_nal, args->ioc_size, - args->ioc_count, args->ioc_timeout); + CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), " + "size %u, count: %u, timeout: %u\n", + args->ioc_nid, + libcfs_nid2str(args->ioc_nid), + args->ioc_size, args->ioc_count, args->ioc_timeout); PORTAL_ALLOC (client->outbuf, STDSIZE) ; @@ -135,10 +133,10 @@ pingcli_start(struct portal_ioctl_data *args) } /* Aquire and initialize the proper nal for portals. */ - rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); + rc = PtlNIInit(PTL_IFACE_DEFAULT, 0, NULL, NULL, &nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CERROR ("NAL %x not loaded.\n", args->ioc_nal); + CERROR ("PtlNIInit: error %d\n", rc); pingcli_shutdown (nih, 4); return (NULL); } diff --git a/lnet/tests/sping_srv.c b/lnet/tests/sping_srv.c index 30f158c..fafb0db 100644 --- a/lnet/tests/sping_srv.c +++ b/lnet/tests/sping_srv.c @@ -53,7 +53,6 @@ #define STDSIZE (sizeof(int) + sizeof(int) + 4) -static int nal = PTL_IFACE_DEFAULT; // Your NAL, static unsigned long packets_valid = 0; // Valid packets static int running = 1; atomic_t pkt; @@ -190,12 +189,11 @@ static struct pingsrv_data *pingsrv_setup(void) { int rc; - /* Aquire and initialize the proper nal for portals. */ server->ni = PTL_INVALID_HANDLE; - rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); + rc = PtlNIInit(PTL_IFACE_DEFAULT, 0, NULL, NULL, &server->ni); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CDEBUG (D_OTHER, "Nal %x not loaded.\n", nal); + CDEBUG (D_OTHER, "PtlNIInit: error %d\n", rc); return pingsrv_shutdown (4); } @@ -282,10 +280,6 @@ static void /*__exit*/ pingsrv_cleanup(void) } /* pingsrv_cleanup() */ -MODULE_PARM(nal, "i"); -MODULE_PARM_DESC(nal, "Use the specified NAL " - "(2-ksocknal, 1-kqswnal)"); - MODULE_AUTHOR("Brian Behlendorf (LLNL)"); MODULE_DESCRIPTION("A kernel space ping server for portals testing"); MODULE_LICENSE("GPL"); diff --git a/lnet/ulnds/address.c b/lnet/ulnds/address.c index 07b4249..f47964c 100644 --- a/lnet/ulnds/address.c +++ b/lnet/ulnds/address.c @@ -93,8 +93,8 @@ void set_address(bridge t,ptl_pid_t pidrequest) int port; if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; else port=pidrequest; - t->lib_nal->libnal_ni.ni_pid.nid=get_node_id(); - t->lib_nal->libnal_ni.ni_pid.pid=port; + t->b_ni->ni_nid=get_node_id(); + ptl_apini.apini_pid=port; } #else @@ -122,9 +122,9 @@ void set_address(bridge t,ptl_pid_t pidrequest) in_addr = get_node_id(); t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; + t->b_ni->ni_nid = ((in_addr & PNAL_HOSTID_MASK) + << PNAL_VNODE_SHIFT) + + virtnode; pid=pidrequest; /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ #ifdef notyet @@ -142,6 +142,6 @@ void set_address(bridge t,ptl_pid_t pidrequest) return; } else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - t->lib_nal->libnal_ni.ni_pid.pid=pid; + ptl_apini->apini_pid=pid; } #endif diff --git a/lnet/ulnds/bridge.h b/lnet/ulnds/bridge.h index d2f0f2c..e228d5f 100644 --- a/lnet/ulnds/bridge.h +++ b/lnet/ulnds/bridge.h @@ -10,25 +10,14 @@ #define TCPNAL_PROCBRIDGE_H #include -#include - -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 typedef struct bridge { int alive; - lib_nal_t *lib_nal; + ptl_ni_t *b_ni; void *lower; void *local; - void (*shutdown)(struct bridge *); /* this doesn't really belong here */ unsigned char iptop8; } *bridge; - -typedef int (*nal_initialize)(bridge); -extern nal_initialize nal_table[PTL_IFACE_MAX]; - #endif diff --git a/lnet/ulnds/debug.c b/lnet/ulnds/debug.c index b82bb2f..68f6988 100644 --- a/lnet/ulnds/debug.c +++ b/lnet/ulnds/debug.c @@ -21,10 +21,12 @@ */ #include +#include #include #include #include #include +#include int smp_processor_id = 1; char debug_file_path[1024] = "/tmp/lustre-log"; @@ -84,8 +86,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len) /* FIXME: I'm not very smart; someone smarter should make this better. */ void -portals_debug_msg (int subsys, int mask, char *file, const char *fn, - const int line, const char *format, ...) +libcfs_debug_msg (int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, char *format, ...) { va_list ap; unsigned long flags; @@ -117,3 +119,11 @@ portals_debug_msg (int subsys, int mask, char *file, const char *fn, } +void +libcfs_assertion_failed(char *expr, char *file, const char *func, + const int line) +{ + libcfs_debug_msg(0, D_EMERG, file, func, line, 0, + "ASSERTION(%s) failed\n", expr); + abort(); +} diff --git a/lnet/ulnds/procapi.c b/lnet/ulnds/procapi.c index 6b471c0..7251a47 100644 --- a/lnet/ulnds/procapi.c +++ b/lnet/ulnds/procapi.c @@ -41,7 +41,6 @@ #include #include - /* XXX CFS workaround, to give a chance to let nal thread wake up * from waiting in select */ @@ -60,17 +59,39 @@ void procbridge_wakeup_nal(procbridge p) syscall(SYS_write, p->notifier[0], buf, sizeof(buf)); } +/* forward decl */ +extern ptl_err_t procbridge_startup (ptl_ni_t *, char **); +extern void procbridge_shutdown (ptl_ni_t *); +extern ptl_err_t tcpnal_send(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int niov, struct iovec *iov, + size_t offset, size_t len); +extern ptl_err_t tcpnal_recv(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + unsigned int niov, struct iovec *iov, + size_t offset, size_t mlen, size_t rlen); + +ptl_nal_t tcpnal_nal = { + .nal_name = "tcp", + .nal_type = SOCKNAL, + .nal_startup = procbridge_startup, + .nal_shutdown = procbridge_shutdown, + .nal_send = tcpnal_send, + .nal_recv = tcpnal_recv, +}; +int tcpnal_running; +ptl_nid_t tcpnal_mynid; + + /* Function: shutdown - * Arguments: nal: a pointer to my top side nal structure - * ni: my network interface index + * Arguments: ni: the instance of me * * cleanup nal state, reclaim the lower side thread and * its state using PTL_FINI codepoint */ -static void procbridge_shutdown(nal_t *n) +void +procbridge_shutdown(ptl_ni_t *ni) { - lib_nal_t *nal = n->nal_data; - bridge b=(bridge)nal->libnal_data; + bridge b=(bridge)ni->ni_data; procbridge p=(procbridge)b->local; p->nal_flags |= NAL_FLAG_STOPPING; @@ -87,68 +108,39 @@ static void procbridge_shutdown(nal_t *n) } while (1); free(p); + tcpnal_running = 0; } - -/* forward decl */ -extern int procbridge_startup (nal_t *, ptl_pid_t, - ptl_ni_limits_t *, ptl_ni_limits_t *); - -/* api_nal - * the interface vector to allow the generic code to access - * this nal. this is seperate from the library side lib_nal. - * TODO: should be dyanmically allocated - */ -nal_t procapi_nal = { - nal_data: NULL, - nal_ni_init: procbridge_startup, - nal_ni_fini: procbridge_shutdown, -}; - -ptl_nid_t tcpnal_mynid; - #ifdef ENABLE_SELECT_DISPATCH procbridge __global_procbridge = NULL; #endif /* Function: procbridge_startup * - * Arguments: pid: requested process id (port offset) - * PTL_ID_ANY not supported. - * desired: limits passed from the application - * and effectively ignored - * actual: limits actually allocated and returned + * Arguments: ni: the instance of me + * interfaces: ignored * * Returns: portals rc * * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +procbridge_startup (ptl_ni_t *ni, char **interfaces) { - nal_init_args_t args; - procbridge p; bridge b; - /* XXX nal_type is purely private to tcpnal here */ - int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - LASSERT(nal == &procapi_nal); + LASSERT(ni->ni_nal == &tcpnal_nal); + LASSERT (!tcpnal_running); /* only single instance supported */ init_unix_timer(); b=(bridge)malloc(sizeof(struct bridge)); p=(procbridge)malloc(sizeof(struct procbridge)); b->local=p; - - args.nia_requested_pid = requested_pid; - args.nia_requested_limits = requested_limits; - args.nia_actual_limits = actual_limits; - args.nia_nal_type = nal_type; - args.nia_bridge = b; - args.nia_apinal = nal; + b->b_ni = ni; + ni->ni_data = b; /* init procbridge */ pthread_mutex_init(&p->mutex,0); @@ -172,7 +164,7 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, #endif /* create nal thread */ - if (pthread_create(&p->t, NULL, nal_thread, &args)) { + if (pthread_create(&p->t, NULL, nal_thread, b)) { perror("nal_init: pthread_create"); return PTL_FAIL; } @@ -190,7 +182,9 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, if (p->nal_flags & NAL_FLAG_STOPPED) return PTL_FAIL; - b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid; + /* so what a load of bollocks set_address() is... */ + ni->ni_nid = tcpnal_mynid; + tcpnal_running = 1; return PTL_OK; } diff --git a/lnet/ulnds/procbridge.h b/lnet/ulnds/procbridge.h index 1f91ced..72e0e39 100644 --- a/lnet/ulnds/procbridge.h +++ b/lnet/ulnds/procbridge.h @@ -34,22 +34,11 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_requested_limits; - ptl_ni_limits_t *nia_actual_limits; - int nia_nal_type; bridge nia_bridge; - nal_t *nia_apinal; } nal_init_args_t; extern void *nal_thread(void *); - -#define PTL_INIT (LIB_MAX_DISPATCH+1) -#define PTL_FINI (LIB_MAX_DISPATCH+2) - -#define MAX_ACLS 1 -#define MAX_PTLS 128 - extern void set_address(bridge t,ptl_pid_t pidrequest); extern void procbridge_wakeup_nal(procbridge p); diff --git a/lnet/ulnds/proclib.c b/lnet/ulnds/proclib.c index 7ee7c71..a93004b 100644 --- a/lnet/ulnds/proclib.c +++ b/lnet/ulnds/proclib.c @@ -42,13 +42,8 @@ /* the following functions are stubs to satisfy the nal definition without doing anything particularily useful*/ - -static int nal_dist(lib_nal_t *nal, - ptl_nid_t nid, - unsigned long *dist) -{ - return 0; -} +extern int tcpnal_init(bridge); +extern void tcpnal_shutdown(bridge); static void check_stopping(void *z) { @@ -63,6 +58,7 @@ static void check_stopping(void *z) pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); + tcpnal_shutdown(b); pthread_exit(0); } @@ -79,47 +75,25 @@ static void check_stopping(void *z) * We define a limit macro to place a ceiling on limits * for syntactic convenience */ -extern int tcpnal_init(bridge); - -nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; void *nal_thread(void *z) { - nal_init_args_t *args = (nal_init_args_t *) z; - bridge b = args->nia_bridge; + bridge b = (bridge) z; procbridge p=b->local; int rc; - ptl_process_id_t process_id; - int nal_type; - b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t)); - b->lib_nal->libnal_data=b; - b->lib_nal->libnal_map=NULL; - b->lib_nal->libnal_unmap=NULL; - b->lib_nal->libnal_dist=nal_dist; - - nal_type = args->nia_nal_type; - - /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which - * lib_init() is about to do from the process_id passed to it...*/ - set_address(b,args->nia_requested_pid); - - process_id = b->lib_nal->libnal_ni.ni_pid; + /* _the_ NI (ptl_apini) has already been set up with a requested pid; pass + * that to set_address... */ + set_address(b, ptl_apini.apini_pid); - if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); - /* initialize the generic 'library' level code */ - - rc = lib_init(b->lib_nal, args->nia_apinal, - process_id, - args->nia_requested_limits, - args->nia_actual_limits); + rc = tcpnal_init(b); /* * Whatever the initialization returned is passed back to the * user level code for further interpretation. We just exit if * it is non-zero since something went wrong. */ - /* this should perform error checking */ + pthread_mutex_lock(&p->mutex); p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; pthread_cond_broadcast(&p->cond); diff --git a/lnet/ulnds/socklnd/address.c b/lnet/ulnds/socklnd/address.c index 07b4249..f47964c 100644 --- a/lnet/ulnds/socklnd/address.c +++ b/lnet/ulnds/socklnd/address.c @@ -93,8 +93,8 @@ void set_address(bridge t,ptl_pid_t pidrequest) int port; if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; else port=pidrequest; - t->lib_nal->libnal_ni.ni_pid.nid=get_node_id(); - t->lib_nal->libnal_ni.ni_pid.pid=port; + t->b_ni->ni_nid=get_node_id(); + ptl_apini.apini_pid=port; } #else @@ -122,9 +122,9 @@ void set_address(bridge t,ptl_pid_t pidrequest) in_addr = get_node_id(); t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; + t->b_ni->ni_nid = ((in_addr & PNAL_HOSTID_MASK) + << PNAL_VNODE_SHIFT) + + virtnode; pid=pidrequest; /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ #ifdef notyet @@ -142,6 +142,6 @@ void set_address(bridge t,ptl_pid_t pidrequest) return; } else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - t->lib_nal->libnal_ni.ni_pid.pid=pid; + ptl_apini->apini_pid=pid; } #endif diff --git a/lnet/ulnds/socklnd/bridge.h b/lnet/ulnds/socklnd/bridge.h index d2f0f2c..e228d5f 100644 --- a/lnet/ulnds/socklnd/bridge.h +++ b/lnet/ulnds/socklnd/bridge.h @@ -10,25 +10,14 @@ #define TCPNAL_PROCBRIDGE_H #include -#include - -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 typedef struct bridge { int alive; - lib_nal_t *lib_nal; + ptl_ni_t *b_ni; void *lower; void *local; - void (*shutdown)(struct bridge *); /* this doesn't really belong here */ unsigned char iptop8; } *bridge; - -typedef int (*nal_initialize)(bridge); -extern nal_initialize nal_table[PTL_IFACE_MAX]; - #endif diff --git a/lnet/ulnds/socklnd/debug.c b/lnet/ulnds/socklnd/debug.c index b82bb2f..68f6988 100644 --- a/lnet/ulnds/socklnd/debug.c +++ b/lnet/ulnds/socklnd/debug.c @@ -21,10 +21,12 @@ */ #include +#include #include #include #include #include +#include int smp_processor_id = 1; char debug_file_path[1024] = "/tmp/lustre-log"; @@ -84,8 +86,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len) /* FIXME: I'm not very smart; someone smarter should make this better. */ void -portals_debug_msg (int subsys, int mask, char *file, const char *fn, - const int line, const char *format, ...) +libcfs_debug_msg (int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, char *format, ...) { va_list ap; unsigned long flags; @@ -117,3 +119,11 @@ portals_debug_msg (int subsys, int mask, char *file, const char *fn, } +void +libcfs_assertion_failed(char *expr, char *file, const char *func, + const int line) +{ + libcfs_debug_msg(0, D_EMERG, file, func, line, 0, + "ASSERTION(%s) failed\n", expr); + abort(); +} diff --git a/lnet/ulnds/socklnd/procapi.c b/lnet/ulnds/socklnd/procapi.c index 6b471c0..7251a47 100644 --- a/lnet/ulnds/socklnd/procapi.c +++ b/lnet/ulnds/socklnd/procapi.c @@ -41,7 +41,6 @@ #include #include - /* XXX CFS workaround, to give a chance to let nal thread wake up * from waiting in select */ @@ -60,17 +59,39 @@ void procbridge_wakeup_nal(procbridge p) syscall(SYS_write, p->notifier[0], buf, sizeof(buf)); } +/* forward decl */ +extern ptl_err_t procbridge_startup (ptl_ni_t *, char **); +extern void procbridge_shutdown (ptl_ni_t *); +extern ptl_err_t tcpnal_send(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int niov, struct iovec *iov, + size_t offset, size_t len); +extern ptl_err_t tcpnal_recv(ptl_ni_t *ni, void *private, ptl_msg_t *cookie, + unsigned int niov, struct iovec *iov, + size_t offset, size_t mlen, size_t rlen); + +ptl_nal_t tcpnal_nal = { + .nal_name = "tcp", + .nal_type = SOCKNAL, + .nal_startup = procbridge_startup, + .nal_shutdown = procbridge_shutdown, + .nal_send = tcpnal_send, + .nal_recv = tcpnal_recv, +}; +int tcpnal_running; +ptl_nid_t tcpnal_mynid; + + /* Function: shutdown - * Arguments: nal: a pointer to my top side nal structure - * ni: my network interface index + * Arguments: ni: the instance of me * * cleanup nal state, reclaim the lower side thread and * its state using PTL_FINI codepoint */ -static void procbridge_shutdown(nal_t *n) +void +procbridge_shutdown(ptl_ni_t *ni) { - lib_nal_t *nal = n->nal_data; - bridge b=(bridge)nal->libnal_data; + bridge b=(bridge)ni->ni_data; procbridge p=(procbridge)b->local; p->nal_flags |= NAL_FLAG_STOPPING; @@ -87,68 +108,39 @@ static void procbridge_shutdown(nal_t *n) } while (1); free(p); + tcpnal_running = 0; } - -/* forward decl */ -extern int procbridge_startup (nal_t *, ptl_pid_t, - ptl_ni_limits_t *, ptl_ni_limits_t *); - -/* api_nal - * the interface vector to allow the generic code to access - * this nal. this is seperate from the library side lib_nal. - * TODO: should be dyanmically allocated - */ -nal_t procapi_nal = { - nal_data: NULL, - nal_ni_init: procbridge_startup, - nal_ni_fini: procbridge_shutdown, -}; - -ptl_nid_t tcpnal_mynid; - #ifdef ENABLE_SELECT_DISPATCH procbridge __global_procbridge = NULL; #endif /* Function: procbridge_startup * - * Arguments: pid: requested process id (port offset) - * PTL_ID_ANY not supported. - * desired: limits passed from the application - * and effectively ignored - * actual: limits actually allocated and returned + * Arguments: ni: the instance of me + * interfaces: ignored * * Returns: portals rc * * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, - ptl_ni_limits_t *requested_limits, - ptl_ni_limits_t *actual_limits) +ptl_err_t +procbridge_startup (ptl_ni_t *ni, char **interfaces) { - nal_init_args_t args; - procbridge p; bridge b; - /* XXX nal_type is purely private to tcpnal here */ - int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - LASSERT(nal == &procapi_nal); + LASSERT(ni->ni_nal == &tcpnal_nal); + LASSERT (!tcpnal_running); /* only single instance supported */ init_unix_timer(); b=(bridge)malloc(sizeof(struct bridge)); p=(procbridge)malloc(sizeof(struct procbridge)); b->local=p; - - args.nia_requested_pid = requested_pid; - args.nia_requested_limits = requested_limits; - args.nia_actual_limits = actual_limits; - args.nia_nal_type = nal_type; - args.nia_bridge = b; - args.nia_apinal = nal; + b->b_ni = ni; + ni->ni_data = b; /* init procbridge */ pthread_mutex_init(&p->mutex,0); @@ -172,7 +164,7 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, #endif /* create nal thread */ - if (pthread_create(&p->t, NULL, nal_thread, &args)) { + if (pthread_create(&p->t, NULL, nal_thread, b)) { perror("nal_init: pthread_create"); return PTL_FAIL; } @@ -190,7 +182,9 @@ int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, if (p->nal_flags & NAL_FLAG_STOPPED) return PTL_FAIL; - b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid; + /* so what a load of bollocks set_address() is... */ + ni->ni_nid = tcpnal_mynid; + tcpnal_running = 1; return PTL_OK; } diff --git a/lnet/ulnds/socklnd/procbridge.h b/lnet/ulnds/socklnd/procbridge.h index 1f91ced..72e0e39 100644 --- a/lnet/ulnds/socklnd/procbridge.h +++ b/lnet/ulnds/socklnd/procbridge.h @@ -34,22 +34,11 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_requested_limits; - ptl_ni_limits_t *nia_actual_limits; - int nia_nal_type; bridge nia_bridge; - nal_t *nia_apinal; } nal_init_args_t; extern void *nal_thread(void *); - -#define PTL_INIT (LIB_MAX_DISPATCH+1) -#define PTL_FINI (LIB_MAX_DISPATCH+2) - -#define MAX_ACLS 1 -#define MAX_PTLS 128 - extern void set_address(bridge t,ptl_pid_t pidrequest); extern void procbridge_wakeup_nal(procbridge p); diff --git a/lnet/ulnds/socklnd/proclib.c b/lnet/ulnds/socklnd/proclib.c index 7ee7c71..a93004b 100644 --- a/lnet/ulnds/socklnd/proclib.c +++ b/lnet/ulnds/socklnd/proclib.c @@ -42,13 +42,8 @@ /* the following functions are stubs to satisfy the nal definition without doing anything particularily useful*/ - -static int nal_dist(lib_nal_t *nal, - ptl_nid_t nid, - unsigned long *dist) -{ - return 0; -} +extern int tcpnal_init(bridge); +extern void tcpnal_shutdown(bridge); static void check_stopping(void *z) { @@ -63,6 +58,7 @@ static void check_stopping(void *z) pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); + tcpnal_shutdown(b); pthread_exit(0); } @@ -79,47 +75,25 @@ static void check_stopping(void *z) * We define a limit macro to place a ceiling on limits * for syntactic convenience */ -extern int tcpnal_init(bridge); - -nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; void *nal_thread(void *z) { - nal_init_args_t *args = (nal_init_args_t *) z; - bridge b = args->nia_bridge; + bridge b = (bridge) z; procbridge p=b->local; int rc; - ptl_process_id_t process_id; - int nal_type; - b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t)); - b->lib_nal->libnal_data=b; - b->lib_nal->libnal_map=NULL; - b->lib_nal->libnal_unmap=NULL; - b->lib_nal->libnal_dist=nal_dist; - - nal_type = args->nia_nal_type; - - /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which - * lib_init() is about to do from the process_id passed to it...*/ - set_address(b,args->nia_requested_pid); - - process_id = b->lib_nal->libnal_ni.ni_pid; + /* _the_ NI (ptl_apini) has already been set up with a requested pid; pass + * that to set_address... */ + set_address(b, ptl_apini.apini_pid); - if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); - /* initialize the generic 'library' level code */ - - rc = lib_init(b->lib_nal, args->nia_apinal, - process_id, - args->nia_requested_limits, - args->nia_actual_limits); + rc = tcpnal_init(b); /* * Whatever the initialization returned is passed back to the * user level code for further interpretation. We just exit if * it is non-zero since something went wrong. */ - /* this should perform error checking */ + pthread_mutex_lock(&p->mutex); p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; pthread_cond_broadcast(&p->cond); diff --git a/lnet/ulnds/socklnd/tcplnd.c b/lnet/ulnds/socklnd/tcplnd.c index abb6d01..e805ff8 100644 --- a/lnet/ulnds/socklnd/tcplnd.c +++ b/lnet/ulnds/socklnd/tcplnd.c @@ -43,7 +43,7 @@ #endif /* Function: tcpnal_send - * Arguments: nal: pointer to my nal control block + * Arguments: ni: pointer to NAL instance * private: unused * cookie: passed back to the portals library * hdr: pointer to the portals header @@ -55,9 +55,9 @@ * * sends a packet to the peer, after insuring that a connection exists */ -ptl_err_t tcpnal_send(lib_nal_t *n, +ptl_err_t tcpnal_send(ptl_ni_t *ni, void *private, - lib_msg_t *cookie, + ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -68,7 +68,7 @@ ptl_err_t tcpnal_send(lib_nal_t *n, size_t len) { connection c; - bridge b=(bridge)n->libnal_data; + bridge b=(bridge)ni->ni_data; struct iovec tiov[257]; static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER; ptl_err_t rc = PTL_OK; @@ -94,7 +94,7 @@ ptl_err_t tcpnal_send(lib_nal_t *n, tiov[0].iov_base = hdr; tiov[0].iov_len = sizeof(ptl_hdr_t); - ntiov = 1 + lib_extract_iov(256, &tiov[1], niov, iov, offset, len); + ntiov = 1 + ptl_extract_iov(256, &tiov[1], niov, iov, offset, len); pthread_mutex_lock(&send_lock); #if 1 @@ -132,9 +132,9 @@ ptl_err_t tcpnal_send(lib_nal_t *n, pthread_mutex_unlock(&send_lock); if (rc == PTL_OK) { - /* NB the NAL only calls lib_finalize() if it returns PTL_OK + /* NB the NAL only calls ptl_finalize() if it returns PTL_OK * from cb_send() */ - lib_finalize(n, private, cookie, PTL_OK); + ptl_finalize(ni, private, cookie, PTL_OK); } return(rc); @@ -142,10 +142,10 @@ ptl_err_t tcpnal_send(lib_nal_t *n, /* Function: tcpnal_recv - * Arguments: lib_nal_t *nal: pointer to my nal control block + * Arguments: ptl_ni_t *: pointer to NAL instance * void *private: connection pointer passed through - * lib_parse() - * lib_msg_t *cookie: passed back to portals library + * ptl_parse() + * ptl_msg_t *cookie: passed back to portals library * user_ptr data: pointer to the destination buffer * size_t mlen: length of the body * size_t rlen: length of data in the network @@ -154,15 +154,14 @@ ptl_err_t tcpnal_send(lib_nal_t *n, * blocking read of the requested data. must drain out the * difference of mainpulated and requested lengths from the network */ -ptl_err_t tcpnal_recv(lib_nal_t *n, +ptl_err_t tcpnal_recv(ptl_ni_t *ni, void *private, - lib_msg_t *cookie, + ptl_msg_t *cookie, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) - { struct iovec tiov[256]; int ntiov; @@ -175,7 +174,7 @@ ptl_err_t tcpnal_recv(lib_nal_t *n, LASSERT(rlen); LASSERT(rlen >= mlen); - ntiov = lib_extract_iov(256, tiov, niov, iov, offset, mlen); + ntiov = ptl_extract_iov(256, tiov, niov, iov, offset, mlen); /* FIXME * 1. Is this effecient enough? change to use readv() directly? @@ -187,7 +186,7 @@ ptl_err_t tcpnal_recv(lib_nal_t *n, finalize: /* FIXME; we always assume success here... */ - lib_finalize(n, private, cookie, PTL_OK); + ptl_finalize(ni, private, cookie, PTL_OK); if (mlen!=rlen){ char *trash=malloc(rlen-mlen); @@ -217,7 +216,7 @@ static int from_connection(void *a, void *d) ptl_hdr_t hdr; if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->lib_nal, &hdr, c); + ptl_parse(b->b_ni, &hdr, c); /*TODO: check error status*/ return(1); } @@ -225,7 +224,7 @@ static int from_connection(void *a, void *d) } -static void tcpnal_shutdown(bridge b) +void tcpnal_shutdown(bridge b) { shutdown_connections(b->lower); } @@ -240,12 +239,8 @@ int tcpnal_init(bridge b) { manager m; - b->lib_nal->libnal_send=tcpnal_send; - b->lib_nal->libnal_recv=tcpnal_recv; - b->shutdown=tcpnal_shutdown; - - if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid, - b->lib_nal->libnal_ni.ni_pid.pid), + if (!(m=init_connections(PNAL_PORT(b->b_ni->ni_nid, + ptl_apini.apini_pid), from_connection,b))){ /* TODO: this needs to shut down the newly created junk */ diff --git a/lnet/ulnds/tcplnd.c b/lnet/ulnds/tcplnd.c index abb6d01..e805ff8 100644 --- a/lnet/ulnds/tcplnd.c +++ b/lnet/ulnds/tcplnd.c @@ -43,7 +43,7 @@ #endif /* Function: tcpnal_send - * Arguments: nal: pointer to my nal control block + * Arguments: ni: pointer to NAL instance * private: unused * cookie: passed back to the portals library * hdr: pointer to the portals header @@ -55,9 +55,9 @@ * * sends a packet to the peer, after insuring that a connection exists */ -ptl_err_t tcpnal_send(lib_nal_t *n, +ptl_err_t tcpnal_send(ptl_ni_t *ni, void *private, - lib_msg_t *cookie, + ptl_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, @@ -68,7 +68,7 @@ ptl_err_t tcpnal_send(lib_nal_t *n, size_t len) { connection c; - bridge b=(bridge)n->libnal_data; + bridge b=(bridge)ni->ni_data; struct iovec tiov[257]; static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER; ptl_err_t rc = PTL_OK; @@ -94,7 +94,7 @@ ptl_err_t tcpnal_send(lib_nal_t *n, tiov[0].iov_base = hdr; tiov[0].iov_len = sizeof(ptl_hdr_t); - ntiov = 1 + lib_extract_iov(256, &tiov[1], niov, iov, offset, len); + ntiov = 1 + ptl_extract_iov(256, &tiov[1], niov, iov, offset, len); pthread_mutex_lock(&send_lock); #if 1 @@ -132,9 +132,9 @@ ptl_err_t tcpnal_send(lib_nal_t *n, pthread_mutex_unlock(&send_lock); if (rc == PTL_OK) { - /* NB the NAL only calls lib_finalize() if it returns PTL_OK + /* NB the NAL only calls ptl_finalize() if it returns PTL_OK * from cb_send() */ - lib_finalize(n, private, cookie, PTL_OK); + ptl_finalize(ni, private, cookie, PTL_OK); } return(rc); @@ -142,10 +142,10 @@ ptl_err_t tcpnal_send(lib_nal_t *n, /* Function: tcpnal_recv - * Arguments: lib_nal_t *nal: pointer to my nal control block + * Arguments: ptl_ni_t *: pointer to NAL instance * void *private: connection pointer passed through - * lib_parse() - * lib_msg_t *cookie: passed back to portals library + * ptl_parse() + * ptl_msg_t *cookie: passed back to portals library * user_ptr data: pointer to the destination buffer * size_t mlen: length of the body * size_t rlen: length of data in the network @@ -154,15 +154,14 @@ ptl_err_t tcpnal_send(lib_nal_t *n, * blocking read of the requested data. must drain out the * difference of mainpulated and requested lengths from the network */ -ptl_err_t tcpnal_recv(lib_nal_t *n, +ptl_err_t tcpnal_recv(ptl_ni_t *ni, void *private, - lib_msg_t *cookie, + ptl_msg_t *cookie, unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen) - { struct iovec tiov[256]; int ntiov; @@ -175,7 +174,7 @@ ptl_err_t tcpnal_recv(lib_nal_t *n, LASSERT(rlen); LASSERT(rlen >= mlen); - ntiov = lib_extract_iov(256, tiov, niov, iov, offset, mlen); + ntiov = ptl_extract_iov(256, tiov, niov, iov, offset, mlen); /* FIXME * 1. Is this effecient enough? change to use readv() directly? @@ -187,7 +186,7 @@ ptl_err_t tcpnal_recv(lib_nal_t *n, finalize: /* FIXME; we always assume success here... */ - lib_finalize(n, private, cookie, PTL_OK); + ptl_finalize(ni, private, cookie, PTL_OK); if (mlen!=rlen){ char *trash=malloc(rlen-mlen); @@ -217,7 +216,7 @@ static int from_connection(void *a, void *d) ptl_hdr_t hdr; if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->lib_nal, &hdr, c); + ptl_parse(b->b_ni, &hdr, c); /*TODO: check error status*/ return(1); } @@ -225,7 +224,7 @@ static int from_connection(void *a, void *d) } -static void tcpnal_shutdown(bridge b) +void tcpnal_shutdown(bridge b) { shutdown_connections(b->lower); } @@ -240,12 +239,8 @@ int tcpnal_init(bridge b) { manager m; - b->lib_nal->libnal_send=tcpnal_send; - b->lib_nal->libnal_recv=tcpnal_recv; - b->shutdown=tcpnal_shutdown; - - if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid, - b->lib_nal->libnal_ni.ni_pid.pid), + if (!(m=init_connections(PNAL_PORT(b->b_ni->ni_nid, + ptl_apini.apini_pid), from_connection,b))){ /* TODO: this needs to shut down the newly created junk */ diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index a9f2e35..2a5166b 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -1361,44 +1361,6 @@ jt_ptl_fail_nid (int argc, char **argv) } int -jt_ptl_loopback (int argc, char **argv) -{ - int rc; - int set; - int enable; - struct portal_ioctl_data data; - - if (argc > 2) - { - fprintf (stderr, "usage: %s [on|off]\n", argv[0]); - return (0); - } - - if (!g_nal_is_set()) - return (-1); - - set = argc > 1; - if (set && ptl_parse_bool (&enable, argv[1]) != 0) { - fprintf (stderr, "Can't parse boolean %s\n", argv[1]); - return (-1); - } - - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - data.ioc_flags = enable; - data.ioc_misc = set; - - rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_LOOPBACK, &data); - if (rc < 0) - fprintf (stderr, "IOC_PORTAL_FAIL_NID failed: %s\n", - strerror (errno)); - else - printf ("loopback %s\n", data.ioc_flags ? "enabled" : "disabled"); - - return (0); -} - -int jt_ptl_add_route (int argc, char **argv) { struct portals_cfg pcfg; diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c index 3089211..03cfe77 100644 --- a/lnet/utils/ptlctl.c +++ b/lnet/utils/ptlctl.c @@ -53,7 +53,6 @@ command_t list[] = { {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"}, {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"}, - {"loopback", jt_ptl_loopback, 0, "usage: loopback [on|off]"}, {"help", Parser_help, 0, "help"}, {"exit", Parser_quit, 0, "quit"}, {"quit", Parser_quit, 0, "quit"}, diff --git a/lnet/utils/wirecheck.c b/lnet/utils/wirecheck.c index 986d081..acdd4db 100644 --- a/lnet/utils/wirecheck.c +++ b/lnet/utils/wirecheck.c @@ -27,12 +27,12 @@ do { \ #define CHECK_DEFINE(a) \ do { \ - printf (" LASSERT ("#a" == "STRINGIFY(a)");\n"); \ + printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \ } while (0) #define CHECK_VALUE(a) \ do { \ - printf (" LASSERT ("#a" == %d);\n", a); \ + printf (" CLASSERT ("#a" == %d);\n", a); \ } while (0) #define CHECK_MEMBER_OFFSET(s,m) \ @@ -180,7 +180,7 @@ main (int argc, char **argv) system_string("uname -a", unameinfo, sizeof(unameinfo)); system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); - printf ("void lib_assert_wire_constants (void)\n" + printf ("void ptl_assert_wire_constants (void)\n" "{\n" " /* Wire protocol assertions generated by 'wirecheck'\n" " * running on %s\n" -- 1.8.3.1