From aca2d0d1e32cb9c483f7eb901a2664aed9ffbf30 Mon Sep 17 00:00:00 2001 From: phil Date: Thu, 27 Nov 2003 06:31:34 +0000 Subject: [PATCH] Slightly less obvious, but still very innocent, parts of b_eq: - code or includes in the main build, but contained in #if __KERNEL__ - userspace-only portals code - very few and minor other changes, such as renaming a function --- lnet/include/linux/kp30.h | 6 +- lnet/include/lnet/lib-lnet.h | 15 --- lnet/include/lnet/lib-p30.h | 15 --- lnet/include/lnet/lnetctl.h | 2 + lnet/include/lnet/ptlctl.h | 2 + lnet/include/lnet/types.h | 10 ++ lnet/lnet/api-eq.c | 13 +- lnet/ulnds/connection.c | 215 ++++++++++++++++++++++++++----- lnet/ulnds/connection.h | 1 + lnet/ulnds/procapi.c | 160 +++++++---------------- lnet/ulnds/procbridge.h | 25 +++- lnet/ulnds/proclib.c | 104 +++++---------- lnet/ulnds/select.c | 30 +++-- lnet/ulnds/socklnd/connection.c | 215 ++++++++++++++++++++++++++----- lnet/ulnds/socklnd/connection.h | 1 + lnet/ulnds/socklnd/procapi.c | 160 +++++++---------------- lnet/ulnds/socklnd/procbridge.h | 25 +++- lnet/ulnds/socklnd/proclib.c | 104 +++++---------- lnet/ulnds/socklnd/select.c | 30 +++-- lnet/ulnds/socklnd/tcplnd.c | 74 +++++++---- lnet/ulnds/tcplnd.c | 74 +++++++---- lnet/utils/Makefile.am | 9 +- lnet/utils/debug.c | 19 ++- lustre/lov/lov_log.c | 2 +- lustre/mdc/mdc_internal.h | 2 - lustre/mdc/mdc_lib.c | 8 ++ lustre/obdclass/llog_obd.c | 7 +- lustre/obdclass/obdo.c | 4 + lustre/portals/include/linux/kp30.h | 6 +- lustre/portals/include/portals/lib-p30.h | 15 --- lustre/portals/include/portals/ptlctl.h | 2 + lustre/portals/include/portals/types.h | 10 ++ lustre/portals/portals/api-eq.c | 13 +- lustre/portals/unals/connection.c | 215 ++++++++++++++++++++++++++----- lustre/portals/unals/connection.h | 1 + lustre/portals/unals/procapi.c | 160 +++++++---------------- lustre/portals/unals/procbridge.h | 25 +++- lustre/portals/unals/proclib.c | 104 +++++---------- lustre/portals/unals/select.c | 30 +++-- lustre/portals/unals/tcpnal.c | 74 +++++++---- lustre/portals/utils/Makefile.am | 9 +- lustre/portals/utils/debug.c | 19 ++- lustre/ptlrpc/llog_client.c | 5 + lustre/ptlrpc/llog_net.c | 21 +++ lustre/ptlrpc/pinger.c | 36 +++++- lustre/utils/lfs.c | 2 +- 46 files changed, 1225 insertions(+), 854 deletions(-) diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 49f6d95..3e6d5e3 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -611,7 +611,6 @@ extern struct prof_ent prof_ents[MAX_PROFS]; extern spinlock_t stack_backtrace_lock; char *portals_debug_dumpstack(void); -char *portals_nid2str(int nal, ptl_nid_t nid, char *str); void portals_run_upcall(char **argv); void portals_run_lbug_upcall(char * file, const char *fn, const int line); void portals_debug_dumplog(void); @@ -654,6 +653,8 @@ extern void kportal_blockallsigs (void); # include #ifndef __CYGWIN__ # include +#else +# include #endif # include # include @@ -679,6 +680,9 @@ extern void kportal_blockallsigs (void); getpid() , stack, ## a); #endif +/* support decl needed both by kernel and liblustre */ +char *portals_nid2str(int nal, ptl_nid_t nid, char *str); + #ifndef CURRENT_TIME # define CURRENT_TIME time(0) #endif diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 2401f22..55fd720 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -30,7 +30,6 @@ static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); } -#ifdef __KERNEL__ #define state_lock(nal,flagsp) \ do { \ CDEBUG(D_PORTALS, "taking state lock\n"); \ @@ -42,20 +41,6 @@ do { \ CDEBUG(D_PORTALS, "releasing state lock\n"); \ nal->cb_sti(nal, flagsp); \ } -#else -/* not needed in user space until we thread there */ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} -#endif /* __KERNEL__ */ #ifndef PTL_USE_SLAB_CACHE diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index 2401f22..55fd720 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -30,7 +30,6 @@ static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); } -#ifdef __KERNEL__ #define state_lock(nal,flagsp) \ do { \ CDEBUG(D_PORTALS, "taking state lock\n"); \ @@ -42,20 +41,6 @@ do { \ CDEBUG(D_PORTALS, "releasing state lock\n"); \ nal->cb_sti(nal, flagsp); \ } -#else -/* not needed in user space until we thread there */ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} -#endif /* __KERNEL__ */ #ifndef PTL_USE_SLAB_CACHE diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index a9942aa..f581e72 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -75,6 +75,8 @@ int jt_dbg_panic(int argc, char **argv); int ptl_set_cfg_record_cb(cfg_record_cb_t cb); /* l_ioctl.c */ +typedef int (ioc_handler_t)(int dev_id, int opc, void *buf); +void set_ioc_handler(ioc_handler_t *handler); int register_ioc_dev(int dev_id, const char * dev_name); void unregister_ioc_dev(int dev_id); int set_ioctl_dump(char * file); diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index a9942aa..f581e72 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -75,6 +75,8 @@ int jt_dbg_panic(int argc, char **argv); int ptl_set_cfg_record_cb(cfg_record_cb_t cb); /* l_ioctl.c */ +typedef int (ioc_handler_t)(int dev_id, int opc, void *buf); +void set_ioc_handler(ioc_handler_t *handler); int register_ioc_dev(int dev_id, const char * dev_name); void unregister_ioc_dev(int dev_id); int set_ioctl_dump(char * file); diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h index 0269290..e4ccebf 100644 --- a/lnet/include/lnet/types.h +++ b/lnet/include/lnet/types.h @@ -104,6 +104,13 @@ typedef enum { typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t; #define PTL_SEQ_GT(a,b) (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0) +/* XXX + * cygwin need the pragma line, not clear if it's needed in other places. + * checking!!! + */ +#ifdef __CYGWIN__ +#pragma pack(push, 4) +#endif typedef struct { ptl_event_kind_t type; ptl_process_id_t initiator; @@ -116,6 +123,9 @@ typedef struct { struct timeval arrival_time; volatile ptl_seq_t sequence; } ptl_event_t; +#ifdef __CYGWIN__ +#pragma pop +#endif typedef enum { PTL_ACK_REQ, diff --git a/lnet/lnet/api-eq.c b/lnet/lnet/api-eq.c index e066619..9bc9c36 100644 --- a/lnet/lnet/api-eq.c +++ b/lnet/lnet/api-eq.c @@ -123,13 +123,22 @@ static jmp_buf eq_jumpbuf; static void eq_timeout(int signal) { + sigset_t set; + + /* signal will be automatically disabled in sig handler, + * must enable it before long jump + */ + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigprocmask(SIG_UNBLOCK, &set, NULL); + longjmp(eq_jumpbuf, -1); } int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, int timeout) { - static void (*prev) (int); + static void (*prev) (int) = NULL; static int left_over; time_t time_at_start; int rc; @@ -143,7 +152,7 @@ int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, left_over = alarm(timeout); prev = signal(SIGALRM, eq_timeout); time_at_start = time(NULL); - if (left_over < timeout) + if (left_over && left_over < timeout) alarm(left_over); rc = PtlEQWait(eventq_in, event_out); diff --git a/lnet/ulnds/connection.c b/lnet/ulnds/connection.c index edd7c96..3e64b33 100644 --- a/lnet/ulnds/connection.c +++ b/lnet/ulnds/connection.c @@ -34,13 +34,21 @@ #include #include #include -#include #include #include #include +#include +#include +#include +#include +#include +#include #include +#include #include - +#ifndef __CYGWIN__ +#include +#endif /* global variable: acceptor port */ unsigned short tcpnal_acceptor_port = 988; @@ -55,9 +63,14 @@ unsigned short tcpnal_acceptor_port = 988; */ static int compare_connection(void *arg1, void *arg2) { - connection c = arg1; - unsigned int * id = arg2; - return((c->ip==id[0]) && (c->port==id[1])); + connection c = arg1; + unsigned int * id = arg2; +#if 0 + return((c->ip==id[0]) && (c->port==id[1])); +#else + /* CFS specific hacking */ + return (c->ip == id[0]); +#endif } @@ -68,7 +81,12 @@ static int compare_connection(void *arg1, void *arg2) */ static unsigned int connection_key(unsigned int *id) { +#if 0 return(id[0]^id[1]); +#else + /* CFS specific hacking */ + return (unsigned int) id[0]; +#endif } @@ -102,22 +120,27 @@ int read_connection(connection c, unsigned char *dest, int len) { - int offset=0,rc; + int offset = 0,rc; - if (len){ + if (len) { do { - if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){ - if (errno==EINTR) { - rc=0; +#ifndef __CYGWIN__ + rc = syscall(SYS_read, c->fd, dest+offset, len-offset); +#else + rc = recv(c->fd, dest+offset, len-offset, 0); +#endif + if (rc <= 0) { + if (errno == EINTR) { + rc = 0; } else { remove_connection(c); - return(0); + return (0); } } - offset+=rc; - } while (offsetconn_lock); allocate_connection(m,htonl(nid),0/*pid*/,fd); + pthread_mutex_unlock(&m->conn_lock); return(1); } +/* FIXME assuming little endian, cleanup!! */ +#define __cpu_to_le64(x) ((__u64)(x)) +#define __le64_to_cpu(x) ((__u64)(x)) +#define __cpu_to_le32(x) ((__u32)(x)) +#define __le32_to_cpu(x) ((__u32)(x)) +#define __cpu_to_le16(x) ((__u16)(x)) +#define __le16_to_cpu(x) ((__u16)(x)) + +extern ptl_nid_t tcpnal_mynid; + +int +tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) +{ + int rc; + ptl_hdr_t hdr; + ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; + + LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); + + memset (&hdr, 0, sizeof (hdr)); + hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); + hmv->version_major = __cpu_to_le32 (PORTALS_PROTO_VERSION_MAJOR); + hmv->version_minor = __cpu_to_le32 (PORTALS_PROTO_VERSION_MINOR); + + hdr.src_nid = __cpu_to_le64 (tcpnal_mynid); + hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); + + hdr.msg.hello.type = __cpu_to_le32 (type); + hdr.msg.hello.incarnation = 0; + + /* Assume sufficient socket buffering for this message */ + rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); + if (rc <= 0) { + CERROR ("Error %d sending HELLO to %llx\n", rc, *nid); + return (rc); + } + + rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv)); + if (rc <= 0) { + CERROR ("Error %d reading HELLO from %llx\n", rc, *nid); + return (rc); + } + + if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) { + CERROR ("Bad magic %#08x (%#08x expected) from %llx\n", + __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid); + return (-EPROTO); + } + + if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || + hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { + CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" + " from %llx\n", + __le16_to_cpu (hmv->version_major), + __le16_to_cpu (hmv->version_minor), + PORTALS_PROTO_VERSION_MAJOR, + PORTALS_PROTO_VERSION_MINOR, + *nid); + return (-EPROTO); + } + +#if (PORTALS_PROTO_VERSION_MAJOR != 0) +# error "This code only understands protocol version 0.x" +#endif + /* version 0 sends magic/version as the dest_nid of a 'hello' header, + * so read the rest of it in now... */ + + rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); + if (rc <= 0) { + CERROR ("Error %d reading rest of HELLO hdr from %llx\n", + rc, *nid); + return (rc); + } + + /* ...and check we got what we expected */ + if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || + hdr.payload_length != __cpu_to_le32 (0)) { + CERROR ("Expecting a HELLO hdr with 0 payload," + " but got type %d with %d payload from %llx\n", + __le32_to_cpu (hdr.type), + __le32_to_cpu (hdr.payload_length), *nid); + return (-EPROTO); + } + + if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { + CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); + return (-EPROTO); + } + + if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ + *nid = __le64_to_cpu(hdr.src_nid); + } else if (*nid != __le64_to_cpu (hdr.src_nid)) { + CERROR ("Connected to nid %llx, but expecting %llx\n", + __le64_to_cpu (hdr.src_nid), *nid); + return (-EPROTO); + } + + return (0); +} /* Function: force_tcp_connection * Arguments: t: tcpnal @@ -187,17 +311,22 @@ connection force_tcp_connection(manager m, unsigned int ip, unsigned short port) { - connection c; + connection conn; struct sockaddr_in addr; unsigned int id[2]; port = tcpnal_acceptor_port; - id[0]=ip; - id[1]=port; + id[0] = ip; + id[1] = port; - if (!(c=hash_table_find(m->connections,id))){ + pthread_mutex_lock(&m->conn_lock); + + conn = hash_table_find(m->connections, id); + if (!conn) { int fd; + int option; + ptl_nid_t peernid = PTL_NID_ANY; bzero((char *) &addr, sizeof(addr)); addr.sin_family = AF_INET; @@ -208,16 +337,30 @@ connection force_tcp_connection(manager m, perror("tcpnal socket failed"); exit(-1); } - if (connect(fd, - (struct sockaddr *)&addr, - sizeof(struct sockaddr_in))) - { - perror("tcpnal connect"); - return(0); - } - return(allocate_connection(m,ip,port,fd)); + if (connect(fd, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in))) { + perror("tcpnal connect"); + return(0); + } + +#if 1 + option = 1; + setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); + option = 1<<20; + setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); + option = 1<<20; + setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); +#endif + + /* say hello */ + if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, 0)) + exit(-1); + + conn = allocate_connection(m, ip, port, fd); } - return(c); + + pthread_mutex_unlock(&m->conn_lock); + return (conn); } @@ -243,8 +386,8 @@ static int bind_socket(manager m,unsigned short port) bzero((char *) &addr, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = 0; - addr.sin_port = port; - + addr.sin_port = htons(port); + if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ perror ("tcpnal bind"); return(0); @@ -284,11 +427,15 @@ manager init_connections(unsigned short pid, int (*input)(void *, void *), void *a) { - manager m=(manager)malloc(sizeof(struct manager)); - m->connections=hash_create_table(compare_connection,connection_key); - m->handler=input; - m->handler_arg=a; - if (bind_socket(m,pid)) return(m); + manager m = (manager)malloc(sizeof(struct manager)); + m->connections = hash_create_table(compare_connection,connection_key); + m->handler = input; + m->handler_arg = a; + pthread_mutex_init(&m->conn_lock, 0); + + if (bind_socket(m,pid)) + return(m); + free(m); return(0); } diff --git a/lnet/ulnds/connection.h b/lnet/ulnds/connection.h index 6f57287..fb1eaab 100644 --- a/lnet/ulnds/connection.h +++ b/lnet/ulnds/connection.h @@ -10,6 +10,7 @@ typedef struct manager { table connections; + pthread_mutex_t conn_lock; /* protect connections table */ int bound; io_handler bound_handler; int (*handler)(void *, void *); diff --git a/lnet/ulnds/procapi.c b/lnet/ulnds/procapi.c index d897058..2a3fbd8 100644 --- a/lnet/ulnds/procapi.c +++ b/lnet/ulnds/procapi.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -31,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -48,35 +48,22 @@ * forwards a packaged api call from the 'api' side to the 'library' * side, and collects the result */ -#define forward_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(PTL_SEGV);\ - } static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, - void *ret, size_t ret_len) + void *ret, ptl_size_t ret_len) { - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int lib=p->to_lib[1]; - int k; + bridge b = (bridge) n->nal_data; - forward_failure(write,lib, &id, sizeof(id)); - forward_failure(write,lib,&args_len, sizeof(args_len)); - forward_failure(write,lib,&ret_len, sizeof(ret_len)); - forward_failure(write,lib,args, args_len); - - do { - k=syscall(SYS_read, p->from_lib[0], ret, ret_len); - } while ((k!=ret_len) && (errno += EINTR)); + if (id == PTL_FINI) { + lib_fini(b->nal_cb); - if(k!=ret_len){ - perror("nal: read return block"); - return PTL_SEGV; + if (b->shutdown) + (*b->shutdown)(b); } + + lib_dispatch(b->nal_cb, NULL, id, args, ret); + return (PTL_OK); } -#undef forward_failure /* Function: shutdown @@ -90,15 +77,18 @@ static int procbridge_shutdown(nal_t *n, int ni) { bridge b=(bridge)n->nal_data; procbridge p=(procbridge)b->local; - int code=PTL_FINI; - syscall(SYS_write, p->to_lib[1],&code,sizeof(code)); - syscall(SYS_read, p->from_lib[0],&code,sizeof(code)); + p->nal_flags |= NAL_FLAG_STOPPING; - syscall(SYS_close, p->to_lib[0]); - syscall(SYS_close, p->to_lib[1]); - syscall(SYS_close, p->from_lib[0]); - syscall(SYS_close, p->from_lib[1]); + do { + pthread_mutex_lock(&p->mutex); + if (p->nal_flags & NAL_FLAG_STOPPED) { + pthread_mutex_unlock(&p->mutex); + break; + } + pthread_cond_wait(&p->cond, &p->mutex); + pthread_mutex_unlock(&p->mutex); + } while (1); free(p); return(0); @@ -151,7 +141,9 @@ static nal_t api_nal = { unlock: procbridge_unlock }; -/* Function: bridge_init +ptl_nid_t tcpnal_mynid; + +/* Function: procbridge_interface * * Arguments: pid: requested process id (port offset) * PTL_ID_ANY not supported. @@ -165,77 +157,17 @@ static nal_t api_nal = { * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -#define unix_failure(operand,fd,buffer,length,text)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - perror(text);\ - return(NULL);\ - } -#if 0 -static nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (desired) limits = *desired; - unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t), - "tcp_init: read"); - unix_failure(read,p->from_lib[0], rc, sizeof(rc), - "nal_init: read"); - - if(*rc) return(NULL); - - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#endif - -ptl_nid_t tcpnal_mynid; - nal_t *procbridge_interface(int num_interface, ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size, ptl_pid_t requested_pid) { + nal_init_args_t args; procbridge p; bridge b; static int initialized=0; ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ + int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ if(initialized) return (&api_nal); @@ -246,38 +178,42 @@ nal_t *procbridge_interface(int num_interface, api_nal.nal_data=b; b->local=p; - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - if (ptl_size) limits.max_ptable_index = ptl_size; if (acl_size) limits.max_atable_index = acl_size; - unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type), - "nal_init: write"); + args.nia_requested_pid = requested_pid; + args.nia_limits = &limits; + args.nia_nal_type = nal_type; + args.nia_bridge = b; - if(pthread_create(&p->t, NULL, nal_thread, b)) { + /* init procbridge */ + pthread_mutex_init(&p->mutex,0); + pthread_cond_init(&p->cond, 0); + p->nal_flags = 0; + pthread_mutex_init(&p->nal_cb_lock, 0); + + if (pthread_create(&p->t, NULL, nal_thread, &args)) { perror("nal_init: pthread_create"); return(NULL); } - unix_failure(read,p->from_lib[0], &rc, sizeof(rc), - "nal_init: read"); - - if(rc) return(NULL); + do { + pthread_mutex_lock(&p->mutex); + if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) { + pthread_mutex_unlock(&p->mutex); + break; + } + pthread_cond_wait(&p->cond, &p->mutex); + pthread_mutex_unlock(&p->mutex); + } while (1); + + if (p->nal_flags & NAL_FLAG_STOPPED) + return (NULL); b->nal_cb->ni.nid = tcpnal_mynid; initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); return (&api_nal); } -#undef unix_failure diff --git a/lnet/ulnds/procbridge.h b/lnet/ulnds/procbridge.h index 060ae7b..317e22f 100644 --- a/lnet/ulnds/procbridge.h +++ b/lnet/ulnds/procbridge.h @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ */ @@ -14,14 +15,28 @@ #include +#define NAL_FLAG_RUNNING 1 +#define NAL_FLAG_STOPPING 2 +#define NAL_FLAG_STOPPED 4 + typedef struct procbridge { + /* sync between user threads and nal thread */ pthread_t t; pthread_cond_t cond; pthread_mutex_t mutex; - int to_lib[2]; - int from_lib[2]; + + int nal_flags; + + pthread_mutex_t nal_cb_lock; } *procbridge; +typedef struct nal_init_args { + ptl_pid_t nia_requested_pid; + ptl_ni_limits_t *nia_limits; + int nia_nal_type; + bridge nia_bridge; +} nal_init_args_t; + extern void *nal_thread(void *); @@ -33,8 +48,8 @@ extern void *nal_thread(void *); extern void set_address(bridge t,ptl_pid_t pidrequest); extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); + ptl_pt_index_t ptl_size, + ptl_ac_index_t acl_size, + ptl_pid_t requested_pid); #endif diff --git a/lnet/ulnds/proclib.c b/lnet/ulnds/proclib.c index 89b67ad..2627253 100644 --- a/lnet/ulnds/proclib.c +++ b/lnet/ulnds/proclib.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -31,14 +32,12 @@ #include #include #include -#include #include #include #include #include #include #include -//#include #include /* the following functions are stubs to satisfy the nal definition @@ -93,12 +92,20 @@ static void nal_printf(nal_cb_t *nal, static void nal_cli(nal_cb_t *nal, unsigned long *flags) { + bridge b = (bridge) nal->nal_data; + procbridge p = (procbridge) b->local; + + pthread_mutex_lock(&p->nal_cb_lock); } static void nal_sti(nal_cb_t *nal, unsigned long *flags) { + bridge b = (bridge)nal->nal_data; + procbridge p = (procbridge) b->local; + + pthread_mutex_unlock(&p->nal_cb_lock); } @@ -108,69 +115,22 @@ static int nal_dist(nal_cb_t *nal, { return 0; } - - - -/* Function: data_from_api - * Arguments: t: the nal state for this interface - * Returns: whether to continue reading from the pipe - * - * data_from_api() reads data from the api side in response - * to a select. - * - * We define data_failure() for syntactic convenience - * of unix error reporting. - */ - -#define data_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(0);\ - } -static int data_from_api(void *arg) -{ - bridge b = arg; - procbridge p=(procbridge)b->local; - /* where are these two sizes derived from ??*/ - char arg_block[ 256 ]; - char ret_block[ 128 ]; - ptl_size_t arg_len,ret_len; - int fd=p->to_lib[0]; - int index; - - data_failure(read,fd, &index, sizeof(index)); - - if (index==PTL_FINI) { - lib_fini(b->nal_cb); - if (b->shutdown) (*b->shutdown)(b); - syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive)); - - /* a heavy-handed but convenient way of shutting down - the lower side thread */ - pthread_exit(0); - } - - data_failure(read,fd, &arg_len, sizeof(arg_len)); - data_failure(read,fd, &ret_len, sizeof(ret_len)); - data_failure(read,fd, arg_block, arg_len); - - lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block); - - data_failure(write,p->from_lib[1],ret_block, ret_len); - return(1); -} -#undef data_failure - - static void wakeup_topside(void *z) { - bridge b=z; - procbridge p=b->local; + bridge b = z; + procbridge p = b->local; + int stop; pthread_mutex_lock(&p->mutex); + stop = p->nal_flags & NAL_FLAG_STOPPING; + if (stop) + p->nal_flags |= NAL_FLAG_STOPPED; pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); + + if (stop) + pthread_exit(0); } @@ -195,7 +155,8 @@ nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; void *nal_thread(void *z) { - bridge b=z; + nal_init_args_t *args = (nal_init_args_t *) z; + bridge b = args->nia_bridge; procbridge p=b->local; int rc; ptl_pid_t pid_request; @@ -216,15 +177,9 @@ void *nal_thread(void *z) b->nal_cb->cb_sti=nal_sti; b->nal_cb->cb_dist=nal_dist; - - register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b); - - if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type)))) - perror("procbridge read from api"); + pid_request = args->nia_requested_pid; + desired = *args->nia_limits; + nal_type = args->nia_nal_type; actual = desired; LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); @@ -251,12 +206,12 @@ void *nal_thread(void *z) * it is non-zero since something went wrong. */ /* this should perform error checking */ -#if 0 - write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t)); -#endif - syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc)); - - if(!rc) { + pthread_mutex_lock(&p->mutex); + p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; + pthread_cond_broadcast(&p->cond); + pthread_mutex_unlock(&p->mutex); + + if (!rc) { /* the thunk function is called each time the timer loop performs an operation and returns to blocking mode. we overload this function to inform the api side that @@ -267,4 +222,3 @@ void *nal_thread(void *z) return(0); } #undef LIMIT - diff --git a/lnet/ulnds/select.c b/lnet/ulnds/select.c index 47adc50..fe24efc 100644 --- a/lnet/ulnds/select.c +++ b/lnet/ulnds/select.c @@ -97,9 +97,9 @@ void remove_io_handler (io_handler i) static void set_flag(io_handler n,fd_set *fds) { - if (n->type & READ_HANDLER) FD_SET(n->fd,fds); - if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1); - if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2); + if (n->type & READ_HANDLER) FD_SET(n->fd, &fds[0]); + if (n->type & WRITE_HANDLER) FD_SET(n->fd,&fds[1]); + if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, &fds[2]); } @@ -126,9 +126,18 @@ void select_timer_block(when until) timeout_pointer=&timeout; } else timeout_pointer=0; - FD_ZERO(fds); - FD_ZERO(fds+1); - FD_ZERO(fds+2); + + /* FIXME + * temporarily add timer for endless waiting problem. + * FIXME + */ + timeout.tv_sec = 1; + timeout.tv_usec = 0; + timeout_pointer=&timeout; + + FD_ZERO(&fds[0]); + FD_ZERO(&fds[1]); + FD_ZERO(&fds[2]); for (k=&io_handlers;*k;){ if ((*k)->disabled){ j=*k; @@ -140,14 +149,15 @@ void select_timer_block(when until) k=&(*k)->next; } } - result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer); + + result=select(FD_SETSIZE, &fds[0], &fds[1], &fds[2], timeout_pointer); if (result > 0) for (j=io_handlers;j;j=j->next){ if (!(j->disabled) && - ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) || - (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) || - (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){ + ((FD_ISSET(j->fd, &fds[0]) && (j->type & READ_HANDLER)) || + (FD_ISSET(j->fd, &fds[1]) && (j->type & WRITE_HANDLER)) || + (FD_ISSET(j->fd, &fds[2]) && (j->type & EXCEPTION_HANDLER)))){ if (!(*j->function)(j->argument)) j->disabled=1; } diff --git a/lnet/ulnds/socklnd/connection.c b/lnet/ulnds/socklnd/connection.c index edd7c96..3e64b33 100644 --- a/lnet/ulnds/socklnd/connection.c +++ b/lnet/ulnds/socklnd/connection.c @@ -34,13 +34,21 @@ #include #include #include -#include #include #include #include +#include +#include +#include +#include +#include +#include #include +#include #include - +#ifndef __CYGWIN__ +#include +#endif /* global variable: acceptor port */ unsigned short tcpnal_acceptor_port = 988; @@ -55,9 +63,14 @@ unsigned short tcpnal_acceptor_port = 988; */ static int compare_connection(void *arg1, void *arg2) { - connection c = arg1; - unsigned int * id = arg2; - return((c->ip==id[0]) && (c->port==id[1])); + connection c = arg1; + unsigned int * id = arg2; +#if 0 + return((c->ip==id[0]) && (c->port==id[1])); +#else + /* CFS specific hacking */ + return (c->ip == id[0]); +#endif } @@ -68,7 +81,12 @@ static int compare_connection(void *arg1, void *arg2) */ static unsigned int connection_key(unsigned int *id) { +#if 0 return(id[0]^id[1]); +#else + /* CFS specific hacking */ + return (unsigned int) id[0]; +#endif } @@ -102,22 +120,27 @@ int read_connection(connection c, unsigned char *dest, int len) { - int offset=0,rc; + int offset = 0,rc; - if (len){ + if (len) { do { - if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){ - if (errno==EINTR) { - rc=0; +#ifndef __CYGWIN__ + rc = syscall(SYS_read, c->fd, dest+offset, len-offset); +#else + rc = recv(c->fd, dest+offset, len-offset, 0); +#endif + if (rc <= 0) { + if (errno == EINTR) { + rc = 0; } else { remove_connection(c); - return(0); + return (0); } } - offset+=rc; - } while (offsetconn_lock); allocate_connection(m,htonl(nid),0/*pid*/,fd); + pthread_mutex_unlock(&m->conn_lock); return(1); } +/* FIXME assuming little endian, cleanup!! */ +#define __cpu_to_le64(x) ((__u64)(x)) +#define __le64_to_cpu(x) ((__u64)(x)) +#define __cpu_to_le32(x) ((__u32)(x)) +#define __le32_to_cpu(x) ((__u32)(x)) +#define __cpu_to_le16(x) ((__u16)(x)) +#define __le16_to_cpu(x) ((__u16)(x)) + +extern ptl_nid_t tcpnal_mynid; + +int +tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) +{ + int rc; + ptl_hdr_t hdr; + ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; + + LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); + + memset (&hdr, 0, sizeof (hdr)); + hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); + hmv->version_major = __cpu_to_le32 (PORTALS_PROTO_VERSION_MAJOR); + hmv->version_minor = __cpu_to_le32 (PORTALS_PROTO_VERSION_MINOR); + + hdr.src_nid = __cpu_to_le64 (tcpnal_mynid); + hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); + + hdr.msg.hello.type = __cpu_to_le32 (type); + hdr.msg.hello.incarnation = 0; + + /* Assume sufficient socket buffering for this message */ + rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); + if (rc <= 0) { + CERROR ("Error %d sending HELLO to %llx\n", rc, *nid); + return (rc); + } + + rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv)); + if (rc <= 0) { + CERROR ("Error %d reading HELLO from %llx\n", rc, *nid); + return (rc); + } + + if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) { + CERROR ("Bad magic %#08x (%#08x expected) from %llx\n", + __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid); + return (-EPROTO); + } + + if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || + hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { + CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" + " from %llx\n", + __le16_to_cpu (hmv->version_major), + __le16_to_cpu (hmv->version_minor), + PORTALS_PROTO_VERSION_MAJOR, + PORTALS_PROTO_VERSION_MINOR, + *nid); + return (-EPROTO); + } + +#if (PORTALS_PROTO_VERSION_MAJOR != 0) +# error "This code only understands protocol version 0.x" +#endif + /* version 0 sends magic/version as the dest_nid of a 'hello' header, + * so read the rest of it in now... */ + + rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); + if (rc <= 0) { + CERROR ("Error %d reading rest of HELLO hdr from %llx\n", + rc, *nid); + return (rc); + } + + /* ...and check we got what we expected */ + if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || + hdr.payload_length != __cpu_to_le32 (0)) { + CERROR ("Expecting a HELLO hdr with 0 payload," + " but got type %d with %d payload from %llx\n", + __le32_to_cpu (hdr.type), + __le32_to_cpu (hdr.payload_length), *nid); + return (-EPROTO); + } + + if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { + CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); + return (-EPROTO); + } + + if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ + *nid = __le64_to_cpu(hdr.src_nid); + } else if (*nid != __le64_to_cpu (hdr.src_nid)) { + CERROR ("Connected to nid %llx, but expecting %llx\n", + __le64_to_cpu (hdr.src_nid), *nid); + return (-EPROTO); + } + + return (0); +} /* Function: force_tcp_connection * Arguments: t: tcpnal @@ -187,17 +311,22 @@ connection force_tcp_connection(manager m, unsigned int ip, unsigned short port) { - connection c; + connection conn; struct sockaddr_in addr; unsigned int id[2]; port = tcpnal_acceptor_port; - id[0]=ip; - id[1]=port; + id[0] = ip; + id[1] = port; - if (!(c=hash_table_find(m->connections,id))){ + pthread_mutex_lock(&m->conn_lock); + + conn = hash_table_find(m->connections, id); + if (!conn) { int fd; + int option; + ptl_nid_t peernid = PTL_NID_ANY; bzero((char *) &addr, sizeof(addr)); addr.sin_family = AF_INET; @@ -208,16 +337,30 @@ connection force_tcp_connection(manager m, perror("tcpnal socket failed"); exit(-1); } - if (connect(fd, - (struct sockaddr *)&addr, - sizeof(struct sockaddr_in))) - { - perror("tcpnal connect"); - return(0); - } - return(allocate_connection(m,ip,port,fd)); + if (connect(fd, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in))) { + perror("tcpnal connect"); + return(0); + } + +#if 1 + option = 1; + setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); + option = 1<<20; + setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); + option = 1<<20; + setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); +#endif + + /* say hello */ + if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, 0)) + exit(-1); + + conn = allocate_connection(m, ip, port, fd); } - return(c); + + pthread_mutex_unlock(&m->conn_lock); + return (conn); } @@ -243,8 +386,8 @@ static int bind_socket(manager m,unsigned short port) bzero((char *) &addr, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = 0; - addr.sin_port = port; - + addr.sin_port = htons(port); + if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ perror ("tcpnal bind"); return(0); @@ -284,11 +427,15 @@ manager init_connections(unsigned short pid, int (*input)(void *, void *), void *a) { - manager m=(manager)malloc(sizeof(struct manager)); - m->connections=hash_create_table(compare_connection,connection_key); - m->handler=input; - m->handler_arg=a; - if (bind_socket(m,pid)) return(m); + manager m = (manager)malloc(sizeof(struct manager)); + m->connections = hash_create_table(compare_connection,connection_key); + m->handler = input; + m->handler_arg = a; + pthread_mutex_init(&m->conn_lock, 0); + + if (bind_socket(m,pid)) + return(m); + free(m); return(0); } diff --git a/lnet/ulnds/socklnd/connection.h b/lnet/ulnds/socklnd/connection.h index 6f57287..fb1eaab 100644 --- a/lnet/ulnds/socklnd/connection.h +++ b/lnet/ulnds/socklnd/connection.h @@ -10,6 +10,7 @@ typedef struct manager { table connections; + pthread_mutex_t conn_lock; /* protect connections table */ int bound; io_handler bound_handler; int (*handler)(void *, void *); diff --git a/lnet/ulnds/socklnd/procapi.c b/lnet/ulnds/socklnd/procapi.c index d897058..2a3fbd8 100644 --- a/lnet/ulnds/socklnd/procapi.c +++ b/lnet/ulnds/socklnd/procapi.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -31,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -48,35 +48,22 @@ * forwards a packaged api call from the 'api' side to the 'library' * side, and collects the result */ -#define forward_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(PTL_SEGV);\ - } static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, - void *ret, size_t ret_len) + void *ret, ptl_size_t ret_len) { - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int lib=p->to_lib[1]; - int k; + bridge b = (bridge) n->nal_data; - forward_failure(write,lib, &id, sizeof(id)); - forward_failure(write,lib,&args_len, sizeof(args_len)); - forward_failure(write,lib,&ret_len, sizeof(ret_len)); - forward_failure(write,lib,args, args_len); - - do { - k=syscall(SYS_read, p->from_lib[0], ret, ret_len); - } while ((k!=ret_len) && (errno += EINTR)); + if (id == PTL_FINI) { + lib_fini(b->nal_cb); - if(k!=ret_len){ - perror("nal: read return block"); - return PTL_SEGV; + if (b->shutdown) + (*b->shutdown)(b); } + + lib_dispatch(b->nal_cb, NULL, id, args, ret); + return (PTL_OK); } -#undef forward_failure /* Function: shutdown @@ -90,15 +77,18 @@ static int procbridge_shutdown(nal_t *n, int ni) { bridge b=(bridge)n->nal_data; procbridge p=(procbridge)b->local; - int code=PTL_FINI; - syscall(SYS_write, p->to_lib[1],&code,sizeof(code)); - syscall(SYS_read, p->from_lib[0],&code,sizeof(code)); + p->nal_flags |= NAL_FLAG_STOPPING; - syscall(SYS_close, p->to_lib[0]); - syscall(SYS_close, p->to_lib[1]); - syscall(SYS_close, p->from_lib[0]); - syscall(SYS_close, p->from_lib[1]); + do { + pthread_mutex_lock(&p->mutex); + if (p->nal_flags & NAL_FLAG_STOPPED) { + pthread_mutex_unlock(&p->mutex); + break; + } + pthread_cond_wait(&p->cond, &p->mutex); + pthread_mutex_unlock(&p->mutex); + } while (1); free(p); return(0); @@ -151,7 +141,9 @@ static nal_t api_nal = { unlock: procbridge_unlock }; -/* Function: bridge_init +ptl_nid_t tcpnal_mynid; + +/* Function: procbridge_interface * * Arguments: pid: requested process id (port offset) * PTL_ID_ANY not supported. @@ -165,77 +157,17 @@ static nal_t api_nal = { * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -#define unix_failure(operand,fd,buffer,length,text)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - perror(text);\ - return(NULL);\ - } -#if 0 -static nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (desired) limits = *desired; - unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t), - "tcp_init: read"); - unix_failure(read,p->from_lib[0], rc, sizeof(rc), - "nal_init: read"); - - if(*rc) return(NULL); - - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#endif - -ptl_nid_t tcpnal_mynid; - nal_t *procbridge_interface(int num_interface, ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size, ptl_pid_t requested_pid) { + nal_init_args_t args; procbridge p; bridge b; static int initialized=0; ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ + int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ if(initialized) return (&api_nal); @@ -246,38 +178,42 @@ nal_t *procbridge_interface(int num_interface, api_nal.nal_data=b; b->local=p; - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - if (ptl_size) limits.max_ptable_index = ptl_size; if (acl_size) limits.max_atable_index = acl_size; - unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type), - "nal_init: write"); + args.nia_requested_pid = requested_pid; + args.nia_limits = &limits; + args.nia_nal_type = nal_type; + args.nia_bridge = b; - if(pthread_create(&p->t, NULL, nal_thread, b)) { + /* init procbridge */ + pthread_mutex_init(&p->mutex,0); + pthread_cond_init(&p->cond, 0); + p->nal_flags = 0; + pthread_mutex_init(&p->nal_cb_lock, 0); + + if (pthread_create(&p->t, NULL, nal_thread, &args)) { perror("nal_init: pthread_create"); return(NULL); } - unix_failure(read,p->from_lib[0], &rc, sizeof(rc), - "nal_init: read"); - - if(rc) return(NULL); + do { + pthread_mutex_lock(&p->mutex); + if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) { + pthread_mutex_unlock(&p->mutex); + break; + } + pthread_cond_wait(&p->cond, &p->mutex); + pthread_mutex_unlock(&p->mutex); + } while (1); + + if (p->nal_flags & NAL_FLAG_STOPPED) + return (NULL); b->nal_cb->ni.nid = tcpnal_mynid; initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); return (&api_nal); } -#undef unix_failure diff --git a/lnet/ulnds/socklnd/procbridge.h b/lnet/ulnds/socklnd/procbridge.h index 060ae7b..317e22f 100644 --- a/lnet/ulnds/socklnd/procbridge.h +++ b/lnet/ulnds/socklnd/procbridge.h @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ */ @@ -14,14 +15,28 @@ #include +#define NAL_FLAG_RUNNING 1 +#define NAL_FLAG_STOPPING 2 +#define NAL_FLAG_STOPPED 4 + typedef struct procbridge { + /* sync between user threads and nal thread */ pthread_t t; pthread_cond_t cond; pthread_mutex_t mutex; - int to_lib[2]; - int from_lib[2]; + + int nal_flags; + + pthread_mutex_t nal_cb_lock; } *procbridge; +typedef struct nal_init_args { + ptl_pid_t nia_requested_pid; + ptl_ni_limits_t *nia_limits; + int nia_nal_type; + bridge nia_bridge; +} nal_init_args_t; + extern void *nal_thread(void *); @@ -33,8 +48,8 @@ extern void *nal_thread(void *); extern void set_address(bridge t,ptl_pid_t pidrequest); extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); + ptl_pt_index_t ptl_size, + ptl_ac_index_t acl_size, + ptl_pid_t requested_pid); #endif diff --git a/lnet/ulnds/socklnd/proclib.c b/lnet/ulnds/socklnd/proclib.c index 89b67ad..2627253 100644 --- a/lnet/ulnds/socklnd/proclib.c +++ b/lnet/ulnds/socklnd/proclib.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -31,14 +32,12 @@ #include #include #include -#include #include #include #include #include #include #include -//#include #include /* the following functions are stubs to satisfy the nal definition @@ -93,12 +92,20 @@ static void nal_printf(nal_cb_t *nal, static void nal_cli(nal_cb_t *nal, unsigned long *flags) { + bridge b = (bridge) nal->nal_data; + procbridge p = (procbridge) b->local; + + pthread_mutex_lock(&p->nal_cb_lock); } static void nal_sti(nal_cb_t *nal, unsigned long *flags) { + bridge b = (bridge)nal->nal_data; + procbridge p = (procbridge) b->local; + + pthread_mutex_unlock(&p->nal_cb_lock); } @@ -108,69 +115,22 @@ static int nal_dist(nal_cb_t *nal, { return 0; } - - - -/* Function: data_from_api - * Arguments: t: the nal state for this interface - * Returns: whether to continue reading from the pipe - * - * data_from_api() reads data from the api side in response - * to a select. - * - * We define data_failure() for syntactic convenience - * of unix error reporting. - */ - -#define data_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(0);\ - } -static int data_from_api(void *arg) -{ - bridge b = arg; - procbridge p=(procbridge)b->local; - /* where are these two sizes derived from ??*/ - char arg_block[ 256 ]; - char ret_block[ 128 ]; - ptl_size_t arg_len,ret_len; - int fd=p->to_lib[0]; - int index; - - data_failure(read,fd, &index, sizeof(index)); - - if (index==PTL_FINI) { - lib_fini(b->nal_cb); - if (b->shutdown) (*b->shutdown)(b); - syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive)); - - /* a heavy-handed but convenient way of shutting down - the lower side thread */ - pthread_exit(0); - } - - data_failure(read,fd, &arg_len, sizeof(arg_len)); - data_failure(read,fd, &ret_len, sizeof(ret_len)); - data_failure(read,fd, arg_block, arg_len); - - lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block); - - data_failure(write,p->from_lib[1],ret_block, ret_len); - return(1); -} -#undef data_failure - - static void wakeup_topside(void *z) { - bridge b=z; - procbridge p=b->local; + bridge b = z; + procbridge p = b->local; + int stop; pthread_mutex_lock(&p->mutex); + stop = p->nal_flags & NAL_FLAG_STOPPING; + if (stop) + p->nal_flags |= NAL_FLAG_STOPPED; pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); + + if (stop) + pthread_exit(0); } @@ -195,7 +155,8 @@ nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; void *nal_thread(void *z) { - bridge b=z; + nal_init_args_t *args = (nal_init_args_t *) z; + bridge b = args->nia_bridge; procbridge p=b->local; int rc; ptl_pid_t pid_request; @@ -216,15 +177,9 @@ void *nal_thread(void *z) b->nal_cb->cb_sti=nal_sti; b->nal_cb->cb_dist=nal_dist; - - register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b); - - if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type)))) - perror("procbridge read from api"); + pid_request = args->nia_requested_pid; + desired = *args->nia_limits; + nal_type = args->nia_nal_type; actual = desired; LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); @@ -251,12 +206,12 @@ void *nal_thread(void *z) * it is non-zero since something went wrong. */ /* this should perform error checking */ -#if 0 - write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t)); -#endif - syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc)); - - if(!rc) { + pthread_mutex_lock(&p->mutex); + p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; + pthread_cond_broadcast(&p->cond); + pthread_mutex_unlock(&p->mutex); + + if (!rc) { /* the thunk function is called each time the timer loop performs an operation and returns to blocking mode. we overload this function to inform the api side that @@ -267,4 +222,3 @@ void *nal_thread(void *z) return(0); } #undef LIMIT - diff --git a/lnet/ulnds/socklnd/select.c b/lnet/ulnds/socklnd/select.c index 47adc50..fe24efc 100644 --- a/lnet/ulnds/socklnd/select.c +++ b/lnet/ulnds/socklnd/select.c @@ -97,9 +97,9 @@ void remove_io_handler (io_handler i) static void set_flag(io_handler n,fd_set *fds) { - if (n->type & READ_HANDLER) FD_SET(n->fd,fds); - if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1); - if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2); + if (n->type & READ_HANDLER) FD_SET(n->fd, &fds[0]); + if (n->type & WRITE_HANDLER) FD_SET(n->fd,&fds[1]); + if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, &fds[2]); } @@ -126,9 +126,18 @@ void select_timer_block(when until) timeout_pointer=&timeout; } else timeout_pointer=0; - FD_ZERO(fds); - FD_ZERO(fds+1); - FD_ZERO(fds+2); + + /* FIXME + * temporarily add timer for endless waiting problem. + * FIXME + */ + timeout.tv_sec = 1; + timeout.tv_usec = 0; + timeout_pointer=&timeout; + + FD_ZERO(&fds[0]); + FD_ZERO(&fds[1]); + FD_ZERO(&fds[2]); for (k=&io_handlers;*k;){ if ((*k)->disabled){ j=*k; @@ -140,14 +149,15 @@ void select_timer_block(when until) k=&(*k)->next; } } - result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer); + + result=select(FD_SETSIZE, &fds[0], &fds[1], &fds[2], timeout_pointer); if (result > 0) for (j=io_handlers;j;j=j->next){ if (!(j->disabled) && - ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) || - (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) || - (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){ + ((FD_ISSET(j->fd, &fds[0]) && (j->type & READ_HANDLER)) || + (FD_ISSET(j->fd, &fds[1]) && (j->type & WRITE_HANDLER)) || + (FD_ISSET(j->fd, &fds[2]) && (j->type & EXCEPTION_HANDLER)))){ if (!(*j->function)(j->argument)) j->disabled=1; } diff --git a/lnet/ulnds/socklnd/tcplnd.c b/lnet/ulnds/socklnd/tcplnd.c index 5daee9c0..012447f 100644 --- a/lnet/ulnds/socklnd/tcplnd.c +++ b/lnet/ulnds/socklnd/tcplnd.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -27,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +36,10 @@ #include #include #include +#include +#ifndef __CYGWIN__ +#include +#endif /* Function: tcpnal_send * Arguments: nal: pointer to my nal control block @@ -50,7 +54,6 @@ * * sends a packet to the peer, after insuring that a connection exists */ -#warning FIXME: "param 'type' is newly added, make use of it!!" int tcpnal_send(nal_cb_t *n, void *private, lib_msg_t *cookie, @@ -64,8 +67,11 @@ int tcpnal_send(nal_cb_t *n, { connection c; bridge b=(bridge)n->nal_data; - struct iovec tiov[2]; - int count = 1; + struct iovec tiov[257]; + static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef __CYGWIN__ + int i; +#endif if (!(c=force_tcp_connection((manager)b->lower, PNAL_IP(nid,b), @@ -83,18 +89,22 @@ int tcpnal_send(nal_cb_t *n, LASSERT (niov <= 1); if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len); #else - LASSERT (niov <= 1); + LASSERT (niov <= 256); tiov[0].iov_base = hdr; tiov[0].iov_len = sizeof(ptl_hdr_t); - if (len) { - tiov[1].iov_base = iov[0].iov_base; - tiov[1].iov_len = len; - count++; - } + if (niov > 0) + memcpy(&tiov[1], iov, niov * sizeof(struct iovec)); - syscall(SYS_writev, c->fd, tiov, count); + pthread_mutex_lock(&send_lock); +#ifndef __CYGWIN__ + syscall(SYS_writev, c->fd, tiov, niov+1); +#else + for (i = 0; i <= niov; i++) + send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0); +#endif + pthread_mutex_unlock(&send_lock); #endif lib_finalize(n, private, cookie); @@ -124,11 +134,25 @@ int tcpnal_recv(nal_cb_t *n, size_t rlen) { - if (mlen) { - LASSERT (niov <= 1); - read_connection(private,iov[0].iov_base,mlen); - lib_finalize(n, private, cookie); - } + int i; + + if (!niov) + goto finalize; + + LASSERT(mlen); + LASSERT(rlen); + LASSERT(rlen >= mlen); + + /* FIXME + * 1. Is this effecient enough? change to use readv() directly? + * 2. need check return from read_connection() + * - MeiJia + */ + for (i = 0; i < niov; i++) + read_connection(private, iov[i].iov_base, iov[i].iov_len); + +finalize: + lib_finalize(n, private, cookie); if (mlen!=rlen){ char *trash=malloc(rlen-mlen); @@ -153,15 +177,15 @@ int tcpnal_recv(nal_cb_t *n, */ static int from_connection(void *a, void *d) { - connection c = d; - bridge b=a; - ptl_hdr_t hdr; - - if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->nal_cb, &hdr, c); - return(1); - } - return(0); + connection c = d; + bridge b = a; + ptl_hdr_t hdr; + + if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ + lib_parse(b->nal_cb, &hdr, c); + return(1); + } + return(0); } diff --git a/lnet/ulnds/tcplnd.c b/lnet/ulnds/tcplnd.c index 5daee9c0..012447f 100644 --- a/lnet/ulnds/tcplnd.c +++ b/lnet/ulnds/tcplnd.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -27,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +36,10 @@ #include #include #include +#include +#ifndef __CYGWIN__ +#include +#endif /* Function: tcpnal_send * Arguments: nal: pointer to my nal control block @@ -50,7 +54,6 @@ * * sends a packet to the peer, after insuring that a connection exists */ -#warning FIXME: "param 'type' is newly added, make use of it!!" int tcpnal_send(nal_cb_t *n, void *private, lib_msg_t *cookie, @@ -64,8 +67,11 @@ int tcpnal_send(nal_cb_t *n, { connection c; bridge b=(bridge)n->nal_data; - struct iovec tiov[2]; - int count = 1; + struct iovec tiov[257]; + static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef __CYGWIN__ + int i; +#endif if (!(c=force_tcp_connection((manager)b->lower, PNAL_IP(nid,b), @@ -83,18 +89,22 @@ int tcpnal_send(nal_cb_t *n, LASSERT (niov <= 1); if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len); #else - LASSERT (niov <= 1); + LASSERT (niov <= 256); tiov[0].iov_base = hdr; tiov[0].iov_len = sizeof(ptl_hdr_t); - if (len) { - tiov[1].iov_base = iov[0].iov_base; - tiov[1].iov_len = len; - count++; - } + if (niov > 0) + memcpy(&tiov[1], iov, niov * sizeof(struct iovec)); - syscall(SYS_writev, c->fd, tiov, count); + pthread_mutex_lock(&send_lock); +#ifndef __CYGWIN__ + syscall(SYS_writev, c->fd, tiov, niov+1); +#else + for (i = 0; i <= niov; i++) + send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0); +#endif + pthread_mutex_unlock(&send_lock); #endif lib_finalize(n, private, cookie); @@ -124,11 +134,25 @@ int tcpnal_recv(nal_cb_t *n, size_t rlen) { - if (mlen) { - LASSERT (niov <= 1); - read_connection(private,iov[0].iov_base,mlen); - lib_finalize(n, private, cookie); - } + int i; + + if (!niov) + goto finalize; + + LASSERT(mlen); + LASSERT(rlen); + LASSERT(rlen >= mlen); + + /* FIXME + * 1. Is this effecient enough? change to use readv() directly? + * 2. need check return from read_connection() + * - MeiJia + */ + for (i = 0; i < niov; i++) + read_connection(private, iov[i].iov_base, iov[i].iov_len); + +finalize: + lib_finalize(n, private, cookie); if (mlen!=rlen){ char *trash=malloc(rlen-mlen); @@ -153,15 +177,15 @@ int tcpnal_recv(nal_cb_t *n, */ static int from_connection(void *a, void *d) { - connection c = d; - bridge b=a; - ptl_hdr_t hdr; - - if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->nal_cb, &hdr, c); - return(1); - } - return(0); + connection c = d; + bridge b = a; + ptl_hdr_t hdr; + + if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ + lib_parse(b->nal_cb, &hdr, c); + return(1); + } + return(0); } diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am index c79909c..31c2a87 100644 --- a/lnet/utils/Makefile.am +++ b/lnet/utils/Makefile.am @@ -7,7 +7,14 @@ COMPILE = $(CC) -Wall -g -I$(srcdir)/../include LINK = $(CC) -o $@ -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid + +if LIBLUSTRE +tmp= +else +tmp=gmnalnid +endif + +sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck $(tmp) lib_LIBRARIES = libptlctl.a acceptor_SOURCES = acceptor.c # -lefence diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index ecd0b81..3f3e69c 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -31,13 +31,16 @@ #include #include #include -#include +#ifndef __CYGWIN__ +# include +#endif #include #include #include #include #include + #define BUG() /* workaround for module.h includes */ #include @@ -412,12 +415,17 @@ int jt_dbg_debug_file(int argc, char **argv) strerror(errno)); return -1; } -#ifndef SYS_fstat64 -#define __SYS_fstat__ SYS_fstat + +#ifndef __CYGWIN__ +# ifndef SYS_fstat64 +# define __SYS_fstat__ SYS_fstat +# else +# define __SYS_fstat__ SYS_fstat64 +# endif + rc = syscall(__SYS_fstat__, fd, &statbuf); #else -#define __SYS_fstat__ SYS_fstat64 + rc = fstat(fd, &statbuf); #endif - rc = syscall(__SYS_fstat__, fd, &statbuf); if (rc < 0) { fprintf(stderr, "fstat failed: %s\n", strerror(errno)); goto out; @@ -522,7 +530,6 @@ int jt_dbg_mark_debug_buf(int argc, char **argv) return 0; } - int jt_dbg_modules(int argc, char **argv) { #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index b8f0e67..0b9f6f3 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -33,6 +33,7 @@ #include #include #include +#include #else #include #endif @@ -46,7 +47,6 @@ #include #include #include -#include #include #include "lov_internal.h" diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index bab2ebb..2b459b4 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -39,7 +39,6 @@ static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck) lck->rpcl_it = NULL; } -#ifdef __KERNEL__ static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, struct lookup_intent *it) { @@ -65,4 +64,3 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, up(&lck->rpcl_sem); } } -#endif diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index e3a1c4d..0de8ad7 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -21,6 +21,7 @@ #define DEBUG_SUBSYSTEM S_MDS #ifndef __KERNEL__ +# include # include #endif #include @@ -28,6 +29,13 @@ #include #include "mdc_internal.h" +#ifndef __KERNEL__ +/* some liblustre hackings here */ +#ifndef O_DIRECTORY +#define O_DIRECTORY 0 +#endif +#endif + void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, struct ll_fid *mdc_fid) { diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index 6c17a64..d07fcf6 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -13,7 +13,12 @@ #define EXPORT_SYMTAB #endif +#ifdef __KERNEL__ #include +#else +#include +#endif + #include #include #include @@ -312,7 +317,7 @@ int llog_cat_initialize(struct obd_device *obd, int count) RETURN(rc); } EXPORT_SYMBOL(llog_cat_initialize); - + int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, int count, struct llog_logid *logid) { diff --git a/lustre/obdclass/obdo.c b/lustre/obdclass/obdo.c index aa604f8..996ef58 100644 --- a/lustre/obdclass/obdo.c +++ b/lustre/obdclass/obdo.c @@ -29,9 +29,13 @@ # define EXPORT_SYMTAB #endif +#ifndef __KERNEL__ +#include +#else #include #include #include +#endif #ifdef __KERNEL__ #include diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 49f6d95..3e6d5e3 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -611,7 +611,6 @@ extern struct prof_ent prof_ents[MAX_PROFS]; extern spinlock_t stack_backtrace_lock; char *portals_debug_dumpstack(void); -char *portals_nid2str(int nal, ptl_nid_t nid, char *str); void portals_run_upcall(char **argv); void portals_run_lbug_upcall(char * file, const char *fn, const int line); void portals_debug_dumplog(void); @@ -654,6 +653,8 @@ extern void kportal_blockallsigs (void); # include #ifndef __CYGWIN__ # include +#else +# include #endif # include # include @@ -679,6 +680,9 @@ extern void kportal_blockallsigs (void); getpid() , stack, ## a); #endif +/* support decl needed both by kernel and liblustre */ +char *portals_nid2str(int nal, ptl_nid_t nid, char *str); + #ifndef CURRENT_TIME # define CURRENT_TIME time(0) #endif diff --git a/lustre/portals/include/portals/lib-p30.h b/lustre/portals/include/portals/lib-p30.h index 2401f22..55fd720 100644 --- a/lustre/portals/include/portals/lib-p30.h +++ b/lustre/portals/include/portals/lib-p30.h @@ -30,7 +30,6 @@ static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); } -#ifdef __KERNEL__ #define state_lock(nal,flagsp) \ do { \ CDEBUG(D_PORTALS, "taking state lock\n"); \ @@ -42,20 +41,6 @@ do { \ CDEBUG(D_PORTALS, "releasing state lock\n"); \ nal->cb_sti(nal, flagsp); \ } -#else -/* not needed in user space until we thread there */ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} -#endif /* __KERNEL__ */ #ifndef PTL_USE_SLAB_CACHE diff --git a/lustre/portals/include/portals/ptlctl.h b/lustre/portals/include/portals/ptlctl.h index a9942aa..f581e72 100644 --- a/lustre/portals/include/portals/ptlctl.h +++ b/lustre/portals/include/portals/ptlctl.h @@ -75,6 +75,8 @@ int jt_dbg_panic(int argc, char **argv); int ptl_set_cfg_record_cb(cfg_record_cb_t cb); /* l_ioctl.c */ +typedef int (ioc_handler_t)(int dev_id, int opc, void *buf); +void set_ioc_handler(ioc_handler_t *handler); int register_ioc_dev(int dev_id, const char * dev_name); void unregister_ioc_dev(int dev_id); int set_ioctl_dump(char * file); diff --git a/lustre/portals/include/portals/types.h b/lustre/portals/include/portals/types.h index 0269290..e4ccebf 100644 --- a/lustre/portals/include/portals/types.h +++ b/lustre/portals/include/portals/types.h @@ -104,6 +104,13 @@ typedef enum { typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t; #define PTL_SEQ_GT(a,b) (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0) +/* XXX + * cygwin need the pragma line, not clear if it's needed in other places. + * checking!!! + */ +#ifdef __CYGWIN__ +#pragma pack(push, 4) +#endif typedef struct { ptl_event_kind_t type; ptl_process_id_t initiator; @@ -116,6 +123,9 @@ typedef struct { struct timeval arrival_time; volatile ptl_seq_t sequence; } ptl_event_t; +#ifdef __CYGWIN__ +#pragma pop +#endif typedef enum { PTL_ACK_REQ, diff --git a/lustre/portals/portals/api-eq.c b/lustre/portals/portals/api-eq.c index e066619..9bc9c36 100644 --- a/lustre/portals/portals/api-eq.c +++ b/lustre/portals/portals/api-eq.c @@ -123,13 +123,22 @@ static jmp_buf eq_jumpbuf; static void eq_timeout(int signal) { + sigset_t set; + + /* signal will be automatically disabled in sig handler, + * must enable it before long jump + */ + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigprocmask(SIG_UNBLOCK, &set, NULL); + longjmp(eq_jumpbuf, -1); } int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, int timeout) { - static void (*prev) (int); + static void (*prev) (int) = NULL; static int left_over; time_t time_at_start; int rc; @@ -143,7 +152,7 @@ int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, left_over = alarm(timeout); prev = signal(SIGALRM, eq_timeout); time_at_start = time(NULL); - if (left_over < timeout) + if (left_over && left_over < timeout) alarm(left_over); rc = PtlEQWait(eventq_in, event_out); diff --git a/lustre/portals/unals/connection.c b/lustre/portals/unals/connection.c index edd7c96..3e64b33 100644 --- a/lustre/portals/unals/connection.c +++ b/lustre/portals/unals/connection.c @@ -34,13 +34,21 @@ #include #include #include -#include #include #include #include +#include +#include +#include +#include +#include +#include #include +#include #include - +#ifndef __CYGWIN__ +#include +#endif /* global variable: acceptor port */ unsigned short tcpnal_acceptor_port = 988; @@ -55,9 +63,14 @@ unsigned short tcpnal_acceptor_port = 988; */ static int compare_connection(void *arg1, void *arg2) { - connection c = arg1; - unsigned int * id = arg2; - return((c->ip==id[0]) && (c->port==id[1])); + connection c = arg1; + unsigned int * id = arg2; +#if 0 + return((c->ip==id[0]) && (c->port==id[1])); +#else + /* CFS specific hacking */ + return (c->ip == id[0]); +#endif } @@ -68,7 +81,12 @@ static int compare_connection(void *arg1, void *arg2) */ static unsigned int connection_key(unsigned int *id) { +#if 0 return(id[0]^id[1]); +#else + /* CFS specific hacking */ + return (unsigned int) id[0]; +#endif } @@ -102,22 +120,27 @@ int read_connection(connection c, unsigned char *dest, int len) { - int offset=0,rc; + int offset = 0,rc; - if (len){ + if (len) { do { - if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){ - if (errno==EINTR) { - rc=0; +#ifndef __CYGWIN__ + rc = syscall(SYS_read, c->fd, dest+offset, len-offset); +#else + rc = recv(c->fd, dest+offset, len-offset, 0); +#endif + if (rc <= 0) { + if (errno == EINTR) { + rc = 0; } else { remove_connection(c); - return(0); + return (0); } } - offset+=rc; - } while (offsetconn_lock); allocate_connection(m,htonl(nid),0/*pid*/,fd); + pthread_mutex_unlock(&m->conn_lock); return(1); } +/* FIXME assuming little endian, cleanup!! */ +#define __cpu_to_le64(x) ((__u64)(x)) +#define __le64_to_cpu(x) ((__u64)(x)) +#define __cpu_to_le32(x) ((__u32)(x)) +#define __le32_to_cpu(x) ((__u32)(x)) +#define __cpu_to_le16(x) ((__u16)(x)) +#define __le16_to_cpu(x) ((__u16)(x)) + +extern ptl_nid_t tcpnal_mynid; + +int +tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) +{ + int rc; + ptl_hdr_t hdr; + ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; + + LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); + + memset (&hdr, 0, sizeof (hdr)); + hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); + hmv->version_major = __cpu_to_le32 (PORTALS_PROTO_VERSION_MAJOR); + hmv->version_minor = __cpu_to_le32 (PORTALS_PROTO_VERSION_MINOR); + + hdr.src_nid = __cpu_to_le64 (tcpnal_mynid); + hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); + + hdr.msg.hello.type = __cpu_to_le32 (type); + hdr.msg.hello.incarnation = 0; + + /* Assume sufficient socket buffering for this message */ + rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); + if (rc <= 0) { + CERROR ("Error %d sending HELLO to %llx\n", rc, *nid); + return (rc); + } + + rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv)); + if (rc <= 0) { + CERROR ("Error %d reading HELLO from %llx\n", rc, *nid); + return (rc); + } + + if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) { + CERROR ("Bad magic %#08x (%#08x expected) from %llx\n", + __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid); + return (-EPROTO); + } + + if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || + hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { + CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" + " from %llx\n", + __le16_to_cpu (hmv->version_major), + __le16_to_cpu (hmv->version_minor), + PORTALS_PROTO_VERSION_MAJOR, + PORTALS_PROTO_VERSION_MINOR, + *nid); + return (-EPROTO); + } + +#if (PORTALS_PROTO_VERSION_MAJOR != 0) +# error "This code only understands protocol version 0.x" +#endif + /* version 0 sends magic/version as the dest_nid of a 'hello' header, + * so read the rest of it in now... */ + + rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); + if (rc <= 0) { + CERROR ("Error %d reading rest of HELLO hdr from %llx\n", + rc, *nid); + return (rc); + } + + /* ...and check we got what we expected */ + if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || + hdr.payload_length != __cpu_to_le32 (0)) { + CERROR ("Expecting a HELLO hdr with 0 payload," + " but got type %d with %d payload from %llx\n", + __le32_to_cpu (hdr.type), + __le32_to_cpu (hdr.payload_length), *nid); + return (-EPROTO); + } + + if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { + CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); + return (-EPROTO); + } + + if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ + *nid = __le64_to_cpu(hdr.src_nid); + } else if (*nid != __le64_to_cpu (hdr.src_nid)) { + CERROR ("Connected to nid %llx, but expecting %llx\n", + __le64_to_cpu (hdr.src_nid), *nid); + return (-EPROTO); + } + + return (0); +} /* Function: force_tcp_connection * Arguments: t: tcpnal @@ -187,17 +311,22 @@ connection force_tcp_connection(manager m, unsigned int ip, unsigned short port) { - connection c; + connection conn; struct sockaddr_in addr; unsigned int id[2]; port = tcpnal_acceptor_port; - id[0]=ip; - id[1]=port; + id[0] = ip; + id[1] = port; - if (!(c=hash_table_find(m->connections,id))){ + pthread_mutex_lock(&m->conn_lock); + + conn = hash_table_find(m->connections, id); + if (!conn) { int fd; + int option; + ptl_nid_t peernid = PTL_NID_ANY; bzero((char *) &addr, sizeof(addr)); addr.sin_family = AF_INET; @@ -208,16 +337,30 @@ connection force_tcp_connection(manager m, perror("tcpnal socket failed"); exit(-1); } - if (connect(fd, - (struct sockaddr *)&addr, - sizeof(struct sockaddr_in))) - { - perror("tcpnal connect"); - return(0); - } - return(allocate_connection(m,ip,port,fd)); + if (connect(fd, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in))) { + perror("tcpnal connect"); + return(0); + } + +#if 1 + option = 1; + setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); + option = 1<<20; + setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); + option = 1<<20; + setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); +#endif + + /* say hello */ + if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, 0)) + exit(-1); + + conn = allocate_connection(m, ip, port, fd); } - return(c); + + pthread_mutex_unlock(&m->conn_lock); + return (conn); } @@ -243,8 +386,8 @@ static int bind_socket(manager m,unsigned short port) bzero((char *) &addr, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = 0; - addr.sin_port = port; - + addr.sin_port = htons(port); + if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ perror ("tcpnal bind"); return(0); @@ -284,11 +427,15 @@ manager init_connections(unsigned short pid, int (*input)(void *, void *), void *a) { - manager m=(manager)malloc(sizeof(struct manager)); - m->connections=hash_create_table(compare_connection,connection_key); - m->handler=input; - m->handler_arg=a; - if (bind_socket(m,pid)) return(m); + manager m = (manager)malloc(sizeof(struct manager)); + m->connections = hash_create_table(compare_connection,connection_key); + m->handler = input; + m->handler_arg = a; + pthread_mutex_init(&m->conn_lock, 0); + + if (bind_socket(m,pid)) + return(m); + free(m); return(0); } diff --git a/lustre/portals/unals/connection.h b/lustre/portals/unals/connection.h index 6f57287..fb1eaab 100644 --- a/lustre/portals/unals/connection.h +++ b/lustre/portals/unals/connection.h @@ -10,6 +10,7 @@ typedef struct manager { table connections; + pthread_mutex_t conn_lock; /* protect connections table */ int bound; io_handler bound_handler; int (*handler)(void *, void *); diff --git a/lustre/portals/unals/procapi.c b/lustre/portals/unals/procapi.c index d897058..2a3fbd8 100644 --- a/lustre/portals/unals/procapi.c +++ b/lustre/portals/unals/procapi.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -31,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -48,35 +48,22 @@ * forwards a packaged api call from the 'api' side to the 'library' * side, and collects the result */ -#define forward_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(PTL_SEGV);\ - } static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, - void *ret, size_t ret_len) + void *ret, ptl_size_t ret_len) { - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int lib=p->to_lib[1]; - int k; + bridge b = (bridge) n->nal_data; - forward_failure(write,lib, &id, sizeof(id)); - forward_failure(write,lib,&args_len, sizeof(args_len)); - forward_failure(write,lib,&ret_len, sizeof(ret_len)); - forward_failure(write,lib,args, args_len); - - do { - k=syscall(SYS_read, p->from_lib[0], ret, ret_len); - } while ((k!=ret_len) && (errno += EINTR)); + if (id == PTL_FINI) { + lib_fini(b->nal_cb); - if(k!=ret_len){ - perror("nal: read return block"); - return PTL_SEGV; + if (b->shutdown) + (*b->shutdown)(b); } + + lib_dispatch(b->nal_cb, NULL, id, args, ret); + return (PTL_OK); } -#undef forward_failure /* Function: shutdown @@ -90,15 +77,18 @@ static int procbridge_shutdown(nal_t *n, int ni) { bridge b=(bridge)n->nal_data; procbridge p=(procbridge)b->local; - int code=PTL_FINI; - syscall(SYS_write, p->to_lib[1],&code,sizeof(code)); - syscall(SYS_read, p->from_lib[0],&code,sizeof(code)); + p->nal_flags |= NAL_FLAG_STOPPING; - syscall(SYS_close, p->to_lib[0]); - syscall(SYS_close, p->to_lib[1]); - syscall(SYS_close, p->from_lib[0]); - syscall(SYS_close, p->from_lib[1]); + do { + pthread_mutex_lock(&p->mutex); + if (p->nal_flags & NAL_FLAG_STOPPED) { + pthread_mutex_unlock(&p->mutex); + break; + } + pthread_cond_wait(&p->cond, &p->mutex); + pthread_mutex_unlock(&p->mutex); + } while (1); free(p); return(0); @@ -151,7 +141,9 @@ static nal_t api_nal = { unlock: procbridge_unlock }; -/* Function: bridge_init +ptl_nid_t tcpnal_mynid; + +/* Function: procbridge_interface * * Arguments: pid: requested process id (port offset) * PTL_ID_ANY not supported. @@ -165,77 +157,17 @@ static nal_t api_nal = { * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -#define unix_failure(operand,fd,buffer,length,text)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - perror(text);\ - return(NULL);\ - } -#if 0 -static nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (desired) limits = *desired; - unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t), - "tcp_init: read"); - unix_failure(read,p->from_lib[0], rc, sizeof(rc), - "nal_init: read"); - - if(*rc) return(NULL); - - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#endif - -ptl_nid_t tcpnal_mynid; - nal_t *procbridge_interface(int num_interface, ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size, ptl_pid_t requested_pid) { + nal_init_args_t args; procbridge p; bridge b; static int initialized=0; ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ + int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ if(initialized) return (&api_nal); @@ -246,38 +178,42 @@ nal_t *procbridge_interface(int num_interface, api_nal.nal_data=b; b->local=p; - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - if (ptl_size) limits.max_ptable_index = ptl_size; if (acl_size) limits.max_atable_index = acl_size; - unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type), - "nal_init: write"); + args.nia_requested_pid = requested_pid; + args.nia_limits = &limits; + args.nia_nal_type = nal_type; + args.nia_bridge = b; - if(pthread_create(&p->t, NULL, nal_thread, b)) { + /* init procbridge */ + pthread_mutex_init(&p->mutex,0); + pthread_cond_init(&p->cond, 0); + p->nal_flags = 0; + pthread_mutex_init(&p->nal_cb_lock, 0); + + if (pthread_create(&p->t, NULL, nal_thread, &args)) { perror("nal_init: pthread_create"); return(NULL); } - unix_failure(read,p->from_lib[0], &rc, sizeof(rc), - "nal_init: read"); - - if(rc) return(NULL); + do { + pthread_mutex_lock(&p->mutex); + if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) { + pthread_mutex_unlock(&p->mutex); + break; + } + pthread_cond_wait(&p->cond, &p->mutex); + pthread_mutex_unlock(&p->mutex); + } while (1); + + if (p->nal_flags & NAL_FLAG_STOPPED) + return (NULL); b->nal_cb->ni.nid = tcpnal_mynid; initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); return (&api_nal); } -#undef unix_failure diff --git a/lustre/portals/unals/procbridge.h b/lustre/portals/unals/procbridge.h index 060ae7b..317e22f 100644 --- a/lustre/portals/unals/procbridge.h +++ b/lustre/portals/unals/procbridge.h @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ */ @@ -14,14 +15,28 @@ #include +#define NAL_FLAG_RUNNING 1 +#define NAL_FLAG_STOPPING 2 +#define NAL_FLAG_STOPPED 4 + typedef struct procbridge { + /* sync between user threads and nal thread */ pthread_t t; pthread_cond_t cond; pthread_mutex_t mutex; - int to_lib[2]; - int from_lib[2]; + + int nal_flags; + + pthread_mutex_t nal_cb_lock; } *procbridge; +typedef struct nal_init_args { + ptl_pid_t nia_requested_pid; + ptl_ni_limits_t *nia_limits; + int nia_nal_type; + bridge nia_bridge; +} nal_init_args_t; + extern void *nal_thread(void *); @@ -33,8 +48,8 @@ extern void *nal_thread(void *); extern void set_address(bridge t,ptl_pid_t pidrequest); extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); + ptl_pt_index_t ptl_size, + ptl_ac_index_t acl_size, + ptl_pid_t requested_pid); #endif diff --git a/lustre/portals/unals/proclib.c b/lustre/portals/unals/proclib.c index 89b67ad..2627253 100644 --- a/lustre/portals/unals/proclib.c +++ b/lustre/portals/unals/proclib.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -31,14 +32,12 @@ #include #include #include -#include #include #include #include #include #include #include -//#include #include /* the following functions are stubs to satisfy the nal definition @@ -93,12 +92,20 @@ static void nal_printf(nal_cb_t *nal, static void nal_cli(nal_cb_t *nal, unsigned long *flags) { + bridge b = (bridge) nal->nal_data; + procbridge p = (procbridge) b->local; + + pthread_mutex_lock(&p->nal_cb_lock); } static void nal_sti(nal_cb_t *nal, unsigned long *flags) { + bridge b = (bridge)nal->nal_data; + procbridge p = (procbridge) b->local; + + pthread_mutex_unlock(&p->nal_cb_lock); } @@ -108,69 +115,22 @@ static int nal_dist(nal_cb_t *nal, { return 0; } - - - -/* Function: data_from_api - * Arguments: t: the nal state for this interface - * Returns: whether to continue reading from the pipe - * - * data_from_api() reads data from the api side in response - * to a select. - * - * We define data_failure() for syntactic convenience - * of unix error reporting. - */ - -#define data_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(0);\ - } -static int data_from_api(void *arg) -{ - bridge b = arg; - procbridge p=(procbridge)b->local; - /* where are these two sizes derived from ??*/ - char arg_block[ 256 ]; - char ret_block[ 128 ]; - ptl_size_t arg_len,ret_len; - int fd=p->to_lib[0]; - int index; - - data_failure(read,fd, &index, sizeof(index)); - - if (index==PTL_FINI) { - lib_fini(b->nal_cb); - if (b->shutdown) (*b->shutdown)(b); - syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive)); - - /* a heavy-handed but convenient way of shutting down - the lower side thread */ - pthread_exit(0); - } - - data_failure(read,fd, &arg_len, sizeof(arg_len)); - data_failure(read,fd, &ret_len, sizeof(ret_len)); - data_failure(read,fd, arg_block, arg_len); - - lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block); - - data_failure(write,p->from_lib[1],ret_block, ret_len); - return(1); -} -#undef data_failure - - static void wakeup_topside(void *z) { - bridge b=z; - procbridge p=b->local; + bridge b = z; + procbridge p = b->local; + int stop; pthread_mutex_lock(&p->mutex); + stop = p->nal_flags & NAL_FLAG_STOPPING; + if (stop) + p->nal_flags |= NAL_FLAG_STOPPED; pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); + + if (stop) + pthread_exit(0); } @@ -195,7 +155,8 @@ nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; void *nal_thread(void *z) { - bridge b=z; + nal_init_args_t *args = (nal_init_args_t *) z; + bridge b = args->nia_bridge; procbridge p=b->local; int rc; ptl_pid_t pid_request; @@ -216,15 +177,9 @@ void *nal_thread(void *z) b->nal_cb->cb_sti=nal_sti; b->nal_cb->cb_dist=nal_dist; - - register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b); - - if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type)))) - perror("procbridge read from api"); + pid_request = args->nia_requested_pid; + desired = *args->nia_limits; + nal_type = args->nia_nal_type; actual = desired; LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); @@ -251,12 +206,12 @@ void *nal_thread(void *z) * it is non-zero since something went wrong. */ /* this should perform error checking */ -#if 0 - write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t)); -#endif - syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc)); - - if(!rc) { + pthread_mutex_lock(&p->mutex); + p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; + pthread_cond_broadcast(&p->cond); + pthread_mutex_unlock(&p->mutex); + + if (!rc) { /* the thunk function is called each time the timer loop performs an operation and returns to blocking mode. we overload this function to inform the api side that @@ -267,4 +222,3 @@ void *nal_thread(void *z) return(0); } #undef LIMIT - diff --git a/lustre/portals/unals/select.c b/lustre/portals/unals/select.c index 47adc50..fe24efc 100644 --- a/lustre/portals/unals/select.c +++ b/lustre/portals/unals/select.c @@ -97,9 +97,9 @@ void remove_io_handler (io_handler i) static void set_flag(io_handler n,fd_set *fds) { - if (n->type & READ_HANDLER) FD_SET(n->fd,fds); - if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1); - if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2); + if (n->type & READ_HANDLER) FD_SET(n->fd, &fds[0]); + if (n->type & WRITE_HANDLER) FD_SET(n->fd,&fds[1]); + if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, &fds[2]); } @@ -126,9 +126,18 @@ void select_timer_block(when until) timeout_pointer=&timeout; } else timeout_pointer=0; - FD_ZERO(fds); - FD_ZERO(fds+1); - FD_ZERO(fds+2); + + /* FIXME + * temporarily add timer for endless waiting problem. + * FIXME + */ + timeout.tv_sec = 1; + timeout.tv_usec = 0; + timeout_pointer=&timeout; + + FD_ZERO(&fds[0]); + FD_ZERO(&fds[1]); + FD_ZERO(&fds[2]); for (k=&io_handlers;*k;){ if ((*k)->disabled){ j=*k; @@ -140,14 +149,15 @@ void select_timer_block(when until) k=&(*k)->next; } } - result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer); + + result=select(FD_SETSIZE, &fds[0], &fds[1], &fds[2], timeout_pointer); if (result > 0) for (j=io_handlers;j;j=j->next){ if (!(j->disabled) && - ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) || - (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) || - (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){ + ((FD_ISSET(j->fd, &fds[0]) && (j->type & READ_HANDLER)) || + (FD_ISSET(j->fd, &fds[1]) && (j->type & WRITE_HANDLER)) || + (FD_ISSET(j->fd, &fds[2]) && (j->type & EXCEPTION_HANDLER)))){ if (!(*j->function)(j->argument)) j->disabled=1; } diff --git a/lustre/portals/unals/tcpnal.c b/lustre/portals/unals/tcpnal.c index 5daee9c0..012447f 100644 --- a/lustre/portals/unals/tcpnal.c +++ b/lustre/portals/unals/tcpnal.c @@ -2,6 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2002 Cray Inc. + * Copyright (c) 2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -27,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +36,10 @@ #include #include #include +#include +#ifndef __CYGWIN__ +#include +#endif /* Function: tcpnal_send * Arguments: nal: pointer to my nal control block @@ -50,7 +54,6 @@ * * sends a packet to the peer, after insuring that a connection exists */ -#warning FIXME: "param 'type' is newly added, make use of it!!" int tcpnal_send(nal_cb_t *n, void *private, lib_msg_t *cookie, @@ -64,8 +67,11 @@ int tcpnal_send(nal_cb_t *n, { connection c; bridge b=(bridge)n->nal_data; - struct iovec tiov[2]; - int count = 1; + struct iovec tiov[257]; + static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef __CYGWIN__ + int i; +#endif if (!(c=force_tcp_connection((manager)b->lower, PNAL_IP(nid,b), @@ -83,18 +89,22 @@ int tcpnal_send(nal_cb_t *n, LASSERT (niov <= 1); if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len); #else - LASSERT (niov <= 1); + LASSERT (niov <= 256); tiov[0].iov_base = hdr; tiov[0].iov_len = sizeof(ptl_hdr_t); - if (len) { - tiov[1].iov_base = iov[0].iov_base; - tiov[1].iov_len = len; - count++; - } + if (niov > 0) + memcpy(&tiov[1], iov, niov * sizeof(struct iovec)); - syscall(SYS_writev, c->fd, tiov, count); + pthread_mutex_lock(&send_lock); +#ifndef __CYGWIN__ + syscall(SYS_writev, c->fd, tiov, niov+1); +#else + for (i = 0; i <= niov; i++) + send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0); +#endif + pthread_mutex_unlock(&send_lock); #endif lib_finalize(n, private, cookie); @@ -124,11 +134,25 @@ int tcpnal_recv(nal_cb_t *n, size_t rlen) { - if (mlen) { - LASSERT (niov <= 1); - read_connection(private,iov[0].iov_base,mlen); - lib_finalize(n, private, cookie); - } + int i; + + if (!niov) + goto finalize; + + LASSERT(mlen); + LASSERT(rlen); + LASSERT(rlen >= mlen); + + /* FIXME + * 1. Is this effecient enough? change to use readv() directly? + * 2. need check return from read_connection() + * - MeiJia + */ + for (i = 0; i < niov; i++) + read_connection(private, iov[i].iov_base, iov[i].iov_len); + +finalize: + lib_finalize(n, private, cookie); if (mlen!=rlen){ char *trash=malloc(rlen-mlen); @@ -153,15 +177,15 @@ int tcpnal_recv(nal_cb_t *n, */ static int from_connection(void *a, void *d) { - connection c = d; - bridge b=a; - ptl_hdr_t hdr; - - if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->nal_cb, &hdr, c); - return(1); - } - return(0); + connection c = d; + bridge b = a; + ptl_hdr_t hdr; + + if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ + lib_parse(b->nal_cb, &hdr, c); + return(1); + } + return(0); } diff --git a/lustre/portals/utils/Makefile.am b/lustre/portals/utils/Makefile.am index c79909c..31c2a87 100644 --- a/lustre/portals/utils/Makefile.am +++ b/lustre/portals/utils/Makefile.am @@ -7,7 +7,14 @@ COMPILE = $(CC) -Wall -g -I$(srcdir)/../include LINK = $(CC) -o $@ -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid + +if LIBLUSTRE +tmp= +else +tmp=gmnalnid +endif + +sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck $(tmp) lib_LIBRARIES = libptlctl.a acceptor_SOURCES = acceptor.c # -lefence diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c index ecd0b81..3f3e69c 100644 --- a/lustre/portals/utils/debug.c +++ b/lustre/portals/utils/debug.c @@ -31,13 +31,16 @@ #include #include #include -#include +#ifndef __CYGWIN__ +# include +#endif #include #include #include #include #include + #define BUG() /* workaround for module.h includes */ #include @@ -412,12 +415,17 @@ int jt_dbg_debug_file(int argc, char **argv) strerror(errno)); return -1; } -#ifndef SYS_fstat64 -#define __SYS_fstat__ SYS_fstat + +#ifndef __CYGWIN__ +# ifndef SYS_fstat64 +# define __SYS_fstat__ SYS_fstat +# else +# define __SYS_fstat__ SYS_fstat64 +# endif + rc = syscall(__SYS_fstat__, fd, &statbuf); #else -#define __SYS_fstat__ SYS_fstat64 + rc = fstat(fd, &statbuf); #endif - rc = syscall(__SYS_fstat__, fd, &statbuf); if (rc < 0) { fprintf(stderr, "fstat failed: %s\n", strerror(errno)); goto out; @@ -522,7 +530,6 @@ int jt_dbg_mark_debug_buf(int argc, char **argv) return 0; } - int jt_dbg_modules(int argc, char **argv) { #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) diff --git a/lustre/ptlrpc/llog_client.c b/lustre/ptlrpc/llog_client.c index 513a7aa..5524843 100644 --- a/lustre/ptlrpc/llog_client.c +++ b/lustre/ptlrpc/llog_client.c @@ -29,7 +29,12 @@ #define EXPORT_SYMTAB #endif +#ifdef __KERNEL__ #include +#else +#include +#endif + #include #include #include diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c index 08c1407..845257d 100644 --- a/lustre/ptlrpc/llog_net.c +++ b/lustre/ptlrpc/llog_net.c @@ -32,12 +32,18 @@ #define EXPORT_SYMTAB #endif +#ifdef __KERNEL__ #include +#else +#include +#endif + #include #include #include #include +#ifdef __KERNEL__ int llog_origin_connect(struct llog_ctxt *ctxt, int count, struct llog_logid *logid, struct llog_ctxt_gen *gen) @@ -107,3 +113,18 @@ int llog_initiator_connect(struct llog_ctxt *ctxt) RETURN(0); } EXPORT_SYMBOL(llog_initiator_connect); + +#else /* !__KERNEL__ */ + +int llog_origin_connect(struct llog_ctxt *ctxt, int count, + struct llog_logid *logid, + struct llog_ctxt_gen *gen) +{ + return 0; +} + +int llog_initiator_connect(struct llog_ctxt *ctxt) +{ + return 0; +} +#endif diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 3a645ca..3caf74e 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -23,14 +23,20 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#ifndef __KERNEL__ +#include +#else #include #include - #define DEBUG_SUBSYSTEM S_RPC +#endif + #include #include #include "ptlrpc_internal.h" +#ifdef __KERNEL__ + static struct ptlrpc_thread *pinger_thread = NULL; static DECLARE_MUTEX(pinger_sem); static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports); @@ -300,3 +306,31 @@ int ptlrpc_pinger_del_import(struct obd_import *imp) up(&pinger_sem); RETURN(0); } + +#else /* !__KERNEL__ */ + +int ptlrpc_start_pinger(void) +{ + return 0; +} + +int ptlrpc_stop_pinger(void) +{ + return 0; +} + +int ptlrpc_pinger_add_import(struct obd_import *imp) +{ + return 0; +} + +int ptlrpc_pinger_del_import(struct obd_import *imp) +{ + return 0; +} + +void ptlrpc_pinger_sending_on_import(struct obd_import *imp) +{ +} + +#endif diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 7339242..f403706 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -304,6 +304,6 @@ int main(int argc, char **argv) rc = Parser_commands(); } - obd_cleanup(argc, argv); + obd_finalize(argc, argv); return rc; } -- 1.8.3.1