From: green Date: Mon, 26 Apr 2004 17:06:04 +0000 (+0000) Subject: Update to HEAD. X-Git-Tag: v1_7_100~1^71~9 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=a6ec6d72a686f90005b5dabe529ca12d6303774a;p=fs%2Flustre-release.git Update to HEAD. --- diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h index cdde5b7..e48552e 100644 --- a/lnet/klnds/gmlnd/gmlnd.h +++ b/lnet/klnds/gmlnd/gmlnd.h @@ -45,6 +45,7 @@ #include "linux/init.h" #include "linux/sem.h" #include "linux/vmalloc.h" +#include "linux/sysctl.h" #define DEBUG_SUBSYSTEM S_GMNAL @@ -80,9 +81,14 @@ extern int gmnal_small_msg_size; extern int num_rx_threads; extern int num_stxds; +extern int gm_port; #define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size #define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c) #define GMNAL_MAGIC 0x1234abcd +/* + * The gm_port to use for gmnal + */ +#define GMNAL_GM_PORT gm_port /* @@ -184,7 +190,6 @@ typedef struct _gmnal_rxtwe { #define NRXTHREADS 10 /* max number of receiver threads */ typedef struct _gmnal_data_t { - int refcnt; spinlock_t cb_lock; spinlock_t stxd_lock; struct semaphore stxd_token; @@ -218,6 +223,7 @@ typedef struct _gmnal_data_t { gmnal_rxtwe_t *rxtwe_tail; spinlock_t rxtwe_lock; struct semaphore rxtwe_wait; + struct ctl_table_header *sysctl; } gmnal_data_t; /* @@ -234,11 +240,6 @@ typedef struct _gmnal_data_t { extern gmnal_data_t *global_nal_data; /* - * The gm_port to use for gmnal - */ -#define GMNAL_GM_PORT 4 - -/* * for ioctl get pid */ #define GMNAL_IOC_GET_GNID 1 @@ -307,13 +308,16 @@ extern gmnal_data_t *global_nal_data; /* * API NAL */ +int gmnal_api_startup(nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); + int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); -int gmnal_api_shutdown(nal_t *, int); +void gmnal_api_shutdown(nal_t *); int gmnal_api_validate(nal_t *, void *, size_t); -void gmnal_api_yield(nal_t *); +void gmnal_api_yield(nal_t *, unsigned long *, int); void gmnal_api_lock(nal_t *, unsigned long *); @@ -321,14 +325,13 @@ void gmnal_api_unlock(nal_t *, unsigned long *); #define GMNAL_INIT_NAL(a) do { \ + a->startup = gmnal_api_startup; \ a->forward = gmnal_api_forward; \ a->shutdown = gmnal_api_shutdown; \ - a->validate = NULL; \ a->yield = gmnal_api_yield; \ a->lock = gmnal_api_lock; \ a->unlock = gmnal_api_unlock; \ a->timeout = NULL; \ - a->refct = 1; \ a->nal_data = NULL; \ } while (0) @@ -353,6 +356,8 @@ int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); +int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); + void *gmnal_cb_malloc(nal_cb_t *, size_t); void gmnal_cb_free(nal_cb_t *, void *, size_t); @@ -369,7 +374,7 @@ void gmnal_cb_sti(nal_cb_t *, unsigned long *); int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); -nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); +int gmnal_init(void); void gmnal_fini(void); @@ -382,7 +387,7 @@ void gmnal_fini(void); a->cb_recv_pages = gmnal_cb_recv_pages; \ a->cb_read = gmnal_cb_read; \ a->cb_write = gmnal_cb_write; \ - a->cb_callback = NULL; \ + a->cb_callback = gmnal_cb_callback; \ a->cb_malloc = gmnal_cb_malloc; \ a->cb_free = gmnal_cb_free; \ a->cb_map = NULL; \ @@ -418,6 +423,7 @@ void gmnal_stop_rxthread(gmnal_data_t *); void gmnal_stop_ctthread(gmnal_data_t *); void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t); +void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t); char *gmnal_gm_error(gm_status_t); char *gmnal_rxevent(gm_recv_event_t*); int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int); diff --git a/lnet/klnds/gmlnd/gmlnd_cb.c b/lnet/klnds/gmlnd/gmlnd_cb.c index e055242..ece1380 100644 --- a/lnet/klnds/gmlnd/gmlnd_cb.c +++ b/lnet/klnds/gmlnd/gmlnd_cb.c @@ -35,8 +35,8 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, int status = PTL_OK; - CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], - niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", + CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], " + "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", nal_cb, private, cookie, niov, iov, mlen, rlen); switch(srxd->type) { @@ -64,10 +64,11 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, int status = PTL_OK; struct iovec *iovec = NULL, *iovec_dup = NULL; int i = 0; + ptl_kiov_t *kiov_dup = kiov;; - CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], - cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", + CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], " + "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", nal_cb, private, cookie, kniov, kiov, mlen, rlen); if (srxd->type == GMNAL_SMALL_MESSAGE) { @@ -99,6 +100,10 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, CDEBUG(D_INFO, "calling gmnal_small_rx\n"); status = gmnal_small_rx(nal_cb, private, cookie, kniov, iovec_dup, mlen, rlen); + for (i=0; ikiov_page); + kiov_dup++; + } PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov); } @@ -126,6 +131,7 @@ int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, niov, iov, len); } else { CDEBUG(D_ERROR, "Large message send it is not supported\n"); + lib_finalize(nal_cb, private, cookie, PTL_FAIL); return(PTL_FAIL); gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); @@ -140,6 +146,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, int i = 0; gmnal_data_t *nal_data; struct iovec *iovec = NULL, *iovec_dup = NULL; + ptl_kiov_t *kiov_dup = kiov; CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len); nal_data = nal_cb->nal_data; @@ -181,6 +188,10 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, kniov, iovec, len); } + for (i=0; ikiov_page); + kiov_dup++; + } PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec)); return(PTL_OK); } @@ -199,6 +210,18 @@ int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, return(PTL_OK); } +int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, + ptl_event_t *ev) +{ + + if (eq->event_callback != NULL) { + CDEBUG(D_INFO, "found callback\n"); + eq->event_callback(ev); + } + + return(PTL_OK); +} + void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) { void *ptr = NULL; @@ -249,6 +272,17 @@ void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) return; } +void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev) +{ + /* holding cb_lock */ + + if (eq->event_callback != NULL) + eq->event_callback(ev); + + /* We will wake theads sleeping in yield() here, AFTER the + * callback, when we implement blocking yield */ +} + int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) { CDEBUG(D_TRACE, "gmnal_cb_dist\n"); diff --git a/lnet/klnds/scimaclnd/scimacnal_cb.c b/lnet/klnds/scimaclnd/scimacnal_cb.c index 52afb98..f9562b2 100644 --- a/lnet/klnds/scimaclnd/scimacnal_cb.c +++ b/lnet/klnds/scimaclnd/scimacnal_cb.c @@ -97,6 +97,18 @@ kscimacnal_sti(nal_cb_t *nal, unsigned long *flags) } +static void +kscimacnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev) +{ + /* holding ksci_dispatch_lock */ + + if (eq->event_callback != NULL) + eq->event_callback(ev); + + /* We will wake theads sleeping in yield() here, AFTER the + * callback, when we implement blocking yield */ +} + static int kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) { @@ -233,7 +245,7 @@ kscimacnal_sendmsg(nal_cb_t *nal, /* save transaction info for later finalize and cleanup */ PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t))); if (!ktx) { - return PTL_NOSPACE; + return PTL_NO_SPACE; } ktx->ktx_nmapped = 0; /* Start with no mapped pages :) */ @@ -248,7 +260,7 @@ kscimacnal_sendmsg(nal_cb_t *nal, kscimacnal_txrelease, ktx); if (!msg) { PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return PTL_NOSPACE; + return PTL_NO_SPACE; } mac_put_mblk(msg, sizeof(ptl_hdr_t)); lastblk=msg; @@ -285,7 +297,7 @@ kscimacnal_sendmsg(nal_cb_t *nal, if(!newblk) { mac_free_msg(msg); PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return PTL_NOSPACE; + return PTL_NO_SPACE; } mac_put_mblk(newblk, nob); mac_link_mblk(lastblk, newblk); @@ -597,5 +609,6 @@ nal_cb_t kscimacnal_lib = { cb_printf: kscimacnal_printf, cb_cli: kscimacnal_cli, cb_sti: kscimacnal_sti, + cb_callback: kscimacnal_callback, cb_dist: kscimacnal_dist }; diff --git a/lnet/lnet/api-eq.c b/lnet/lnet/api-eq.c index 964b9d8..0306043 100644 --- a/lnet/lnet/api-eq.c +++ b/lnet/lnet/api-eq.c @@ -23,59 +23,19 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS #include -int ptl_eq_init(void) +int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev) { - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_fini(void) -{ - /* Nothing to do anymore... */ -} - -int ptl_eq_ni_init(nal_t * nal) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_ni_fini(nal_t * nal) -{ - /* Nothing to do anymore... */ -} - -int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev) -{ - ptl_eq_t *eq; - int rc, new_index; - unsigned long flags; - ptl_event_t *new_event; - nal_t *nal; + int new_index = eq->sequence & (eq->size - 1); + ptl_event_t *new_event = &eq->base[new_index]; ENTRY; - if (!ptl_init) - RETURN(PTL_NOINIT); - - nal = ptl_hndl2nal(&eventq); - if (!nal) - RETURN(PTL_INV_EQ); - - eq = ptl_handle2usereq(&eventq); - nal->lock(nal, &flags); - - /* size must be a power of 2 to handle a wrapped sequence # */ - LASSERT (eq->size != 0 && - eq->size == LOWEST_BIT_SET (eq->size)); - - new_index = eq->sequence & (eq->size - 1); - new_event = &eq->base[new_index]; CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n", new_event, eq->sequence, eq->size); + if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) { - nal->unlock(nal, &flags); RETURN(PTL_EQ_EMPTY); } @@ -86,117 +46,75 @@ int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev) if (eq->sequence != new_event->sequence) { CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n", eq->sequence, new_event->sequence); - rc = PTL_EQ_DROPPED; - } else { - rc = PTL_OK; + RETURN(PTL_EQ_DROPPED); } eq->sequence = new_event->sequence + 1; - nal->unlock(nal, &flags); - RETURN(rc); + RETURN(PTL_OK); } - -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) +int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev) { - int rc; + int which; - /* PtlEQGet does the handle checking */ - while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) { - nal_t *nal = ptl_hndl2nal(&eventq_in); - - if (nal->yield) - nal->yield(nal); - } - - return rc; -} - -#ifndef __KERNEL__ -#if 0 -static jmp_buf eq_jumpbuf; - -static void eq_timeout(int signal) -{ - sigset_t set; - - /* signal will be automatically disabled in sig handler, - * must enable it before long jump - */ - sigemptyset(&set); - sigaddset(&set, SIGALRM); - sigprocmask(SIG_UNBLOCK, &set, NULL); - - longjmp(eq_jumpbuf, -1); + return (PtlEQPoll (&eventq, 1, 0, ev, &which)); } -int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout) +int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) { - static void (*prev) (int) = NULL; - static int left_over; - time_t time_at_start; - int rc; - - if (setjmp(eq_jumpbuf)) { - signal(SIGALRM, prev); - alarm(left_over - timeout); - return PTL_EQ_EMPTY; - } - - left_over = alarm(timeout); - prev = signal(SIGALRM, eq_timeout); - time_at_start = time(NULL); - if (left_over && left_over < timeout) - alarm(left_over); - - rc = PtlEQWait(eventq_in, event_out); - - signal(SIGALRM, prev); - alarm(left_over); /* Should compute how long we waited */ - - return rc; + int which; + + return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, + event_out, &which)); } -#else -#include - -/* FIXME - * Here timeout need a trick with tcpnal, definitely unclean but OK for - * this moment. - */ -/* global variables defined by tcpnal */ -extern int __tcpnal_eqwait_timeout_value; -extern int __tcpnal_eqwait_timedout; - -int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout) +int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout, + ptl_event_t *event_out, int *which_out) { - int rc; + nal_t *nal; + int i; + int rc; + unsigned long flags; + + if (!ptl_init) + RETURN(PTL_NO_INIT); - if (!timeout) - return PtlEQWait(eventq_in, event_out); + if (neq_in < 1) + RETURN(PTL_EQ_INVALID); + + nal = ptl_hndl2nal(&eventqs_in[0]); + if (nal == NULL) + RETURN(PTL_EQ_INVALID); - __tcpnal_eqwait_timeout_value = timeout; + nal->lock(nal, &flags); - while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) { - nal_t *nal = ptl_hndl2nal(&eventq_in); + for (;;) { + for (i = 0; i < neq_in; i++) { + ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]); + + if (i > 0 && + ptl_hndl2nal(&eventqs_in[i]) != nal) { + nal->unlock(nal, &flags); + RETURN (PTL_EQ_INVALID); + } + + /* size must be a power of 2 to handle a wrapped sequence # */ + LASSERT (eq->size != 0 && + eq->size == LOWEST_BIT_SET (eq->size)); + + rc = ptl_get_event (eq, event_out); + if (rc != PTL_EQ_EMPTY) { + nal->unlock(nal, &flags); + *which_out = i; + RETURN(rc); + } + } - if (nal->yield) - nal->yield(nal); - - if (__tcpnal_eqwait_timedout) { - if (__tcpnal_eqwait_timedout != ETIMEDOUT) - printf("Warning: yield return error %d\n", - __tcpnal_eqwait_timedout); - rc = PTL_EQ_EMPTY; - break; + if (timeout == 0) { + nal->unlock(nal, &flags); + RETURN (PTL_EQ_EMPTY); } + + timeout = nal->yield(nal, &flags, timeout); } - - __tcpnal_eqwait_timeout_value = 0; - - return rc; } -#endif -#endif /* __KERNEL__ */ diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c index a1ed583..64a55b9 100644 --- a/lnet/lnet/lib-md.c +++ b/lnet/lnet/lib-md.c @@ -33,16 +33,29 @@ #include #include -/* - * must be called with state lock held - */ +/* must be called with state lock held */ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) { - lib_me_t *me = md->me; + if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) { + /* first unlink attempt... */ + lib_me_t *me = md->me; + + md->md_flags |= PTL_MD_FLAG_ZOMBIE; + + /* Disassociate from ME (if any), and unlink it if it was created + * with PTL_UNLINK */ + if (me != NULL) { + me->md = NULL; + if (me->unlink == PTL_UNLINK) + lib_me_unlink(nal, me); + } + + /* emsure all future handle lookups fail */ + lib_invalidate_handle(nal, &md->md_lh); + } if (md->pending != 0) { CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - md->md_flags |= PTL_MD_FLAG_UNLINK; return; } @@ -52,23 +65,16 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) if (nal->cb_unmap_pages != NULL) nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, &md->md_addrkey); - } else if (nal->cb_unmap != NULL) + } else if (nal->cb_unmap != NULL) { nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, &md->md_addrkey); - - if (me) { - me->md = NULL; - if (me->unlink == PTL_UNLINK) - lib_me_unlink(nal, me); } - if (md->eq != NULL) - { + if (md->eq != NULL) { md->eq->eq_refcount--; LASSERT (md->eq->eq_refcount >= 0); } - lib_invalidate_handle (nal, &md->md_lh); list_del (&md->md_list); lib_md_free(nal, md); } @@ -77,58 +83,62 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink) { - const int max_size_opts = PTL_MD_AUTO_UNLINK | - PTL_MD_MAX_SIZE; lib_eq_t *eq = NULL; int rc; int i; + int niov; /* NB we are passed an allocated, but uninitialised/active md. * if we return success, caller may lib_md_unlink() it. * otherwise caller may only lib_md_free() it. */ - if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) { + if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) { eq = ptl_handle2eq(eqh, nal); if (eq == NULL) - return PTL_INV_EQ; + return PTL_EQ_INVALID; } /* Must check this _before_ allocation. Also, note that non-iov * MDs must set md_niov to 0. */ - LASSERT((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0 || - md->niov <= PTL_MD_MAX_IOV); + LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 || + md->length <= PTL_MD_MAX_IOV); - if ((md->options & max_size_opts) != 0 && /* max size used */ + /* This implementation doesn't know how to create START events or + * disable END events. Best to LASSERT our caller is compliant so + * we find out quickly... */ + LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) || + ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 && + (md->options & PTL_MD_EVENT_END_DISABLE) == 0)); + + if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ (md->max_size < 0 || md->max_size > md->length)) // illegal max_size - return PTL_INV_MD; + return PTL_MD_INVALID; new->me = NULL; new->start = md->start; - new->length = md->length; new->offset = 0; new->max_size = md->max_size; - new->unlink = unlink; new->options = md->options; new->user_ptr = md->user_ptr; new->eq = eq; new->threshold = md->threshold; new->pending = 0; - new->md_flags = 0; + new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0; - if ((md->options & PTL_MD_IOV) != 0) { + if ((md->options & PTL_MD_IOVEC) != 0) { int total_length = 0; if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */ - return PTL_INV_MD; + return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.iov, md->start, - md->niov * sizeof (new->md_iov.iov[0]))) + niov * sizeof (new->md_iov.iov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the base address on trust */ if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */ return PTL_VAL_FAILED; @@ -136,33 +146,32 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.iov[i].iov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - + new->length = total_length; + if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); } } else if ((md->options & PTL_MD_KIOV) != 0) { #ifndef __KERNEL__ - return PTL_INV_MD; + return PTL_MD_INVALID; #else int total_length = 0; /* Trap attempt to use paged I/O if unsupported early. */ if (nal->cb_send_pages == NULL || nal->cb_recv_pages == NULL) - return PTL_INV_MD; + return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.kiov, md->start, - md->niov * sizeof (new->md_iov.kiov[0]))) + niov * sizeof (new->md_iov.kiov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the page pointer on trust */ if (new->md_iov.kiov[i].kiov_offset + new->md_iov.kiov[i].kiov_len > PAGE_SIZE ) @@ -171,23 +180,23 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.kiov[i].kiov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; + new->length = total_length; if (nal->cb_map_pages != NULL) { - rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov, + rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov, &new->md_addrkey); if (rc != PTL_OK) return (rc); } #endif } else { /* contiguous */ - new->md_niov = 1; + new->length = md->length; + new->md_niov = niov = 1; new->md_iov.iov[0].iov_base = md->start; new->md_iov.iov[0].iov_len = md->length; if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); @@ -213,13 +222,13 @@ void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new) * and that's all. */ new->start = md->start; - new->length = md->length; + new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? + md->length : md->md_niov; new->threshold = md->threshold; new->max_size = md->max_size; new->options = md->options; new->user_ptr = md->user_ptr; ptl_eq2handle(&new->eventq, md->eq); - new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov; } int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) @@ -240,21 +249,21 @@ int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) lib_md_t *md; unsigned long flags; - if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) - return (ret->rc = PTL_NOSPACE); + return (ret->rc = PTL_NO_SPACE); state_lock(nal, &flags); me = ptl_handle2me(&args->me_in, nal); if (me == NULL) { - ret->rc = PTL_INV_ME; + ret->rc = PTL_ME_INVALID; } else if (me->md != NULL) { - ret->rc = PTL_INUSE; + ret->rc = PTL_ME_IN_USE; } else { ret->rc = lib_md_build(nal, md, private, &args->md_in, &args->eq_in, args->unlink_in); @@ -292,18 +301,18 @@ int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret) lib_md_t *md; unsigned long flags; - if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) - return (ret->rc = PTL_NOSPACE); + return (ret->rc = PTL_NO_SPACE); state_lock(nal, &flags); - ret->rc = lib_md_build(nal, md, private, - &args->md_in, &args->eq_in, PTL_UNLINK); + ret->rc = lib_md_build(nal, md, private, &args->md_in, + &args->eq_in, args->unlink_in); if (ret->rc == PTL_OK) { ptl_md2handle(&ret->handle_out, md); @@ -331,7 +340,7 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) md = ptl_handle2md(&args->md_in, nal); if (md == NULL) { state_unlock(nal, &flags); - return (ret->rc = PTL_INV_MD); + return (ret->rc = PTL_MD_INVALID); } /* If the MD is busy, lib_md_unlink just marks it for deletion, and @@ -343,7 +352,7 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) memset(&ev, 0, sizeof(ev)); ev.type = PTL_EVENT_UNLINK; - ev.status = PTL_OK; + ev.ni_fail_type = PTL_OK; ev.unlinked = 1; lib_md_deconstruct(nal, md, &ev.mem_desc); @@ -385,7 +394,7 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, md = ptl_handle2md(&args->md_in, nal); if (md == NULL) { - ret->rc = PTL_INV_MD; + ret->rc = PTL_MD_INVALID; goto out; } @@ -397,47 +406,42 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, goto out; } - /* XXX fttb, the new MD must be the same type wrt fragmentation */ - if (((new->options ^ md->options) & - (PTL_MD_IOV | PTL_MD_KIOV)) != 0) { - ret->rc = PTL_INV_MD; - goto out; - } - - if (new->niov > md->md_niov) { - ret->rc = PTL_IOV_TOO_MANY; + /* XXX fttb, the new MD must be the same "shape" wrt fragmentation, + * since we simply overwrite the old lib-md */ + if ((((new->options ^ md->options) & + (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) || + ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && + new->length != md->md_niov)) { + ret->rc = PTL_IOV_INVALID; goto out; } - if (new->niov < md->md_niov) { - ret->rc = PTL_IOV_TOO_SMALL; - goto out; - } - - if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) { + if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) { test_eq = ptl_handle2eq(&args->testq_in, nal); if (test_eq == NULL) { - ret->rc = PTL_INV_EQ; + ret->rc = PTL_EQ_INVALID; goto out; } } if (md->pending != 0) { - ret->rc = PTL_NOUPDATE; + ret->rc = PTL_MD_NO_UPDATE; goto out; } if (test_eq == NULL || test_eq->sequence == args->sequence_in) { lib_me_t *me = md->me; + int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ? + PTL_UNLINK : PTL_RETAIN; // #warning this does not track eq refcounts properly ret->rc = lib_md_build(nal, md, private, - new, &new->eventq, md->unlink); + new, &new->eventq, unlink); md->me = me; } else { - ret->rc = PTL_NOUPDATE; + ret->rc = PTL_MD_NO_UPDATE; } out: diff --git a/lnet/ulnds/procbridge.h b/lnet/ulnds/procbridge.h index 965f83d..1c8e7dd 100644 --- a/lnet/ulnds/procbridge.h +++ b/lnet/ulnds/procbridge.h @@ -35,7 +35,8 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; + ptl_ni_limits_t *nia_requested_limits; + ptl_ni_limits_t *nia_actual_limits; int nia_nal_type; bridge nia_bridge; } nal_init_args_t; @@ -50,10 +51,6 @@ extern void *nal_thread(void *); #define MAX_PTLS 128 extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); extern void procbridge_wakeup_nal(procbridge p); #endif diff --git a/lnet/ulnds/socklnd/procbridge.h b/lnet/ulnds/socklnd/procbridge.h index 965f83d..1c8e7dd 100644 --- a/lnet/ulnds/socklnd/procbridge.h +++ b/lnet/ulnds/socklnd/procbridge.h @@ -35,7 +35,8 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; + ptl_ni_limits_t *nia_requested_limits; + ptl_ni_limits_t *nia_actual_limits; int nia_nal_type; bridge nia_bridge; } nal_init_args_t; @@ -50,10 +51,6 @@ extern void *nal_thread(void *); #define MAX_PTLS 128 extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); extern void procbridge_wakeup_nal(procbridge p); #endif diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am index 6c31b3d..a14df1c 100644 --- a/lnet/utils/Makefile.am +++ b/lnet/utils/Makefile.am @@ -3,35 +3,40 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -LINK = $(CC) -o $@ +## $(srcdir)/../ for , ../../ for generated +#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include +#LINK = $(CC) -o $@ if LIBLUSTRE - noinst_LIBRARIES = libuptlctl.a -libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h -libuptlctl_a_CFLAGS = -fPIC +libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c +libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS) +libuptlctl_a_CFLAGS = $(LLCFLAGS) +endif -else +sbin_PROGRAMS = debugctl -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid lib_LIBRARIES = libptlctl.a -acceptor_SOURCES = acceptor.c # -lefence +libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h + +if !CRAY_PORTALS +sbin_PROGRAMS += acceptor ptlctl routerstat wirecheck gmnalnid -wirecheck_SOURCES = wirecheck.c +acceptor_SOURCES = acceptor.c -libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h +wirecheck_SOURCES = wirecheck.c gmnalnid_SOURCES = gmnalnid.c ptlctl_SOURCES = ptlctl.c -ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence +ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) ptlctl_DEPENDENCIES = libptlctl.a +routerstat_SOURCES = routerstat.c +endif + debugctl_SOURCES = debugctl.c -debugctl_LDADD = -L. -lptlctl -lncurses # -lefence +debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) debugctl_DEPENDENCIES = libptlctl.a -routerstat_SOURCES = routerstat.c -endif diff --git a/lustre/kernel_patches/patches/vfs_intent_2.6.0-test1.patch b/lustre/kernel_patches/patches/vfs_intent_2.6.0-test1.patch deleted file mode 100644 index 3d75965..0000000 --- a/lustre/kernel_patches/patches/vfs_intent_2.6.0-test1.patch +++ /dev/null @@ -1,734 +0,0 @@ - fs/exec.c | 18 +++++++--- - fs/namei.c | 86 +++++++++++++++++++++++++++++++++++++++++++++---- - fs/namespace.c | 2 + - fs/nfs/dir.c | 4 +- - fs/open.c | 62 +++++++++++++++++++++++------------ - fs/stat.c | 24 ++++++++++--- - include/linux/dcache.h | 3 + - include/linux/fs.h | 8 ++++ - include/linux/namei.h | 56 ++++++++++++++++++++++++++----- - kernel/ksyms.c | 8 ++++ - 10 files changed, 222 insertions(+), 49 deletions(-) - ---- linux-2.6.0-test1/fs/exec.c~vfs_intent_2.6.0-test1 2003-09-13 19:08:34.000000000 +0400 -+++ linux-2.6.0-test1-alexey/fs/exec.c 2003-09-14 18:13:44.000000000 +0400 -@@ -116,8 +116,11 @@ asmlinkage long sys_uselib(const char __ - struct file * file; - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_OPEN); - -- nd.intent.open.flags = O_RDONLY; -+ error = user_path_walk_it(library, &nd); -+ -+ nd.intent.it_flags = O_RDONLY; - error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); - if (error) - goto out; -@@ -130,7 +133,7 @@ asmlinkage long sys_uselib(const char __ - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -457,8 +460,13 @@ static inline void free_arg_pages(struct - struct file *open_exec(const char *name) - { - struct nameidata nd; -- int err = path_lookup(name, LOOKUP_FOLLOW, &nd); -- struct file *file = ERR_PTR(err); -+ int err; -+ struct file *file; -+ -+ intent_init(&nd.intent, IT_OPEN); -+ nd.intent.it_flags = O_RDONLY; -+ err = path_lookup(name, LOOKUP_FOLLOW, &nd); -+ file = ERR_PTR(err); - - if (!err) { - struct inode *inode = nd.dentry->d_inode; -@@ -470,7 +478,7 @@ struct file *open_exec(const char *name) - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { ---- linux-2.6.0-test1/fs/namei.c~vfs_intent_2.6.0-test1 2003-09-13 19:08:34.000000000 +0400 -+++ linux-2.6.0-test1-alexey/fs/namei.c 2003-09-14 18:15:11.000000000 +0400 -@@ -263,8 +263,19 @@ int deny_write_access(struct file * file - return 0; - } - -+void intent_release(struct lookup_intent *it) -+{ -+ if (!it) -+ return; -+ if (it->it_magic != INTENT_MAGIC) -+ return; -+ if (it->it_op_release) -+ it->it_op_release(it); -+} -+ - void path_release(struct nameidata *nd) - { -+ intent_release(&nd->intent); - dput(nd->dentry); - mntput(nd->mnt); - } -@@ -340,7 +351,10 @@ static struct dentry * real_lookup(struc - { - struct dentry * result; - struct inode *dir = parent->d_inode; -+ int counter = 0; - -+again: -+ counter++; - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -379,7 +393,10 @@ static struct dentry * real_lookup(struc - if (result->d_op && result->d_op->d_revalidate) { - if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { - dput(result); -- result = ERR_PTR(-ENOENT); -+ if (counter > 10) -+ result = ERR_PTR(-ESTALE); -+ if (!IS_ERR(result)) -+ goto again; - } - } - return result; -@@ -554,6 +571,31 @@ fail: - return PTR_ERR(dentry); - } - -+static int revalidate_special(struct nameidata *nd) -+{ -+ struct dentry *dentry = nd->dentry; -+ int err, counter = 0; -+ -+ if (!dentry->d_op || !dentry->d_op->d_revalidate) -+ return 0; -+ revalidate_again: -+ if (!dentry->d_op->d_revalidate(dentry, nd)) { -+ struct dentry *new; -+ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC,nd))) -+ return err; -+ new = real_lookup(dentry->d_parent, &dentry->d_name, nd); -+ d_invalidate(dentry); -+ dput(dentry); -+ dentry = new; -+ counter++; -+ if (counter < 10) -+ goto revalidate_again; -+ printk("excessive revalidate_it loops\n"); -+ return -ESTALE; -+ } -+ return 0; -+} -+ - /* - * Name resolution. - * -@@ -654,7 +696,9 @@ int link_path_walk(const char * name, st - - if (inode->i_op->follow_link) { - mntget(next.mnt); -+ nd->flags |= LOOKUP_LINK_NOTLAST; - err = do_follow_link(next.dentry, nd); -+ nd->flags &= ~LOOKUP_LINK_NOTLAST; - dput(next.dentry); - mntput(next.mnt); - if (err) -@@ -693,6 +737,11 @@ last_component: - inode = nd->dentry->d_inode; - /* fallthrough */ - case 1: -+ nd->flags |= LOOKUP_LAST; -+ err = revalidate_special(nd); -+ nd->flags &= ~LOOKUP_LAST; -+ if (err) -+ break; - goto return_reval; - } - if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { -@@ -700,7 +749,9 @@ last_component: - if (err < 0) - break; - } -+ nd->flags |= LOOKUP_LAST; - err = do_lookup(nd, &this, &next); -+ nd->flags &= ~LOOKUP_LAST; - if (err) - break; - follow_mount(&next.mnt, &next.dentry); -@@ -926,7 +977,7 @@ struct dentry * lookup_hash(struct qstr - } - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) - { - unsigned long hash; - struct qstr this; -@@ -946,11 +997,16 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return __lookup_hash(&this, base, nd); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -@@ -962,11 +1018,12 @@ access: - * that namei follows links, while lnamei does not. - * SMP-safe - */ --int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) -+int __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) - { - char *tmp = getname(name); - int err = PTR_ERR(tmp); - -+ - if (!IS_ERR(tmp)) { - err = path_lookup(tmp, flags, nd); - putname(tmp); -@@ -974,6 +1031,12 @@ int __user_walk(const char __user *name, - return err; - } - -+int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) -+{ -+ intent_init(&nd->intent, IT_LOOKUP); -+ return __user_walk_it(name, flags, nd); -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -1246,8 +1309,8 @@ int open_namei(const char * pathname, in - acc_mode |= MAY_APPEND; - - /* Fill in the open() intent data */ -- nd->intent.open.flags = flag; -- nd->intent.open.create_mode = mode; -+ nd->intent.it_flags = flag; -+ nd->intent.it_create_mode = mode; - - /* - * The simplest case - just a plain lookup. -@@ -1263,6 +1326,7 @@ int open_namei(const char * pathname, in - /* - * Create - we need to know the parent. - */ -+ nd->intent.it_op |= IT_CREAT; - error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); - if (error) - return error; -@@ -1279,7 +1343,9 @@ int open_namei(const char * pathname, in - dir = nd->dentry; - nd->flags &= ~LOOKUP_PARENT; - down(&dir->d_inode->i_sem); -+ nd->flags |= LOOKUP_LAST; - dentry = __lookup_hash(&nd->last, nd->dentry, nd); -+ nd->flags &= ~LOOKUP_LAST; - - do_last: - error = PTR_ERR(dentry); -@@ -1384,7 +1450,9 @@ do_link: - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -+ nd->flags |= LOOKUP_LAST; - dentry = __lookup_hash(&nd->last, nd->dentry, nd); -+ nd->flags &= ~LOOKUP_LAST; - putname(nd->last.name); - goto do_last; - } -@@ -2142,7 +2210,9 @@ static inline int - __vfs_follow_link(struct nameidata *nd, const char *link) - { - int res = 0; -+ struct lookup_intent it = nd->intent; - char *name; -+ - if (IS_ERR(link)) - goto fail; - -@@ -2152,6 +2222,10 @@ __vfs_follow_link(struct nameidata *nd, - /* weird __emul_prefix() stuff did it */ - goto out; - } -+ -+ intent_init(&nd->intent, it.it_op); -+ nd->intent.it_flags = it.it_flags; -+ nd->intent.it_create_mode = it.it_create_mode; - res = link_path_walk(link, nd); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) ---- linux-2.6.0-test1/fs/namespace.c~vfs_intent_2.6.0-test1 2003-09-13 19:07:44.000000000 +0400 -+++ linux-2.6.0-test1-alexey/fs/namespace.c 2003-09-14 18:13:44.000000000 +0400 -@@ -738,6 +738,7 @@ long do_mount(char * dev_name, char * di - int retval = 0; - int mnt_flags = 0; - -+ intent_init(&nd.intent, IT_LOOKUP); - /* Discard magic */ - if ((flags & MS_MGC_MSK) == MS_MGC_VAL) - flags &= ~MS_MGC_MSK; -@@ -947,6 +948,7 @@ void set_fs_pwd(struct fs_struct *fs, st - mntput(old_pwdmnt); - } - } -+EXPORT_SYMBOL(set_fs_pwd); - - static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) - { ---- linux-2.6.0-test1/fs/open.c~vfs_intent_2.6.0-test1 2003-09-13 19:08:34.000000000 +0400 -+++ linux-2.6.0-test1-alexey/fs/open.c 2003-09-14 18:13:44.000000000 +0400 -@@ -200,7 +200,7 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -- -+ intent_init(&nd.intent, IT_GETATTR); - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; -@@ -443,6 +443,7 @@ asmlinkage long sys_access(const char __ - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ intent_init(&nd.intent, IT_GETATTR); - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -474,6 +475,7 @@ asmlinkage long sys_access(const char __ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ - path_release(&nd); - } - -@@ -488,6 +490,7 @@ asmlinkage long sys_chdir(const char __u - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - - error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); - if (error) -@@ -539,6 +542,7 @@ asmlinkage long sys_chroot(const char __ - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - - error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); - if (error) -@@ -611,7 +615,7 @@ asmlinkage long sys_chmod(const char __u - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -- -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto dput_and_out; -@@ -719,25 +723,8 @@ asmlinkage long sys_fchown(unsigned int - * for the internal routines (ie open_namei()/follow_link() etc). 00 is - * used by symlinks. - */ --struct file *filp_open(const char * filename, int flags, int mode) --{ -- int namei_flags, error; -- struct nameidata nd; -- -- namei_flags = flags; -- if ((namei_flags+1) & O_ACCMODE) -- namei_flags++; -- if (namei_flags & O_TRUNC) -- namei_flags |= 2; -- -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -- -- return ERR_PTR(error); --} -- --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, -+ struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -749,6 +736,7 @@ struct file *dentry_open(struct dentry * - goto cleanup_dentry; - f->f_flags = flags; - f->f_mode = (flags+1) & O_ACCMODE; -+ f->f_it = it; - inode = dentry->d_inode; - if (f->f_mode & FMODE_WRITE) { - error = get_write_access(inode); -@@ -767,6 +755,7 @@ struct file *dentry_open(struct dentry * - error = f->f_op->open(inode,f); - if (error) - goto cleanup_all; -+ intent_release(it); - } - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - -@@ -791,11 +780,42 @@ cleanup_all: - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *filp_open(const char * filename, int flags, int mode) -+{ -+ int namei_flags, error; -+ struct file * temp_filp; -+ struct nameidata nd; -+ intent_init(&nd.intent, IT_OPEN); -+ -+ namei_flags = flags; -+ if ((namei_flags+1) & O_ACCMODE) -+ namei_flags++; -+ if (namei_flags & O_TRUNC) -+ namei_flags |= 2; -+ -+ error = open_namei(filename, namei_flags, mode, &nd); -+ if (!error) { -+ temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); -+ return temp_filp; -+ } -+ return ERR_PTR(error); -+} -+ -+ -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ struct lookup_intent it; -+ intent_init(&it, IT_LOOKUP); -+ -+ return dentry_open_it(dentry, mnt, flags, &it); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ ---- linux-2.6.0-test1/fs/stat.c~vfs_intent_2.6.0-test1 2003-09-13 19:08:11.000000000 +0400 -+++ linux-2.6.0-test1-alexey/fs/stat.c 2003-09-14 18:13:44.000000000 +0400 -@@ -33,7 +33,7 @@ void generic_fillattr(struct inode *inod - stat->blksize = inode->i_blksize; - } - --int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -+int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) - { - struct inode *inode = dentry->d_inode; - int retval; -@@ -44,6 +44,8 @@ int vfs_getattr(struct vfsmount *mnt, st - - if (inode->i_op->getattr) - return inode->i_op->getattr(mnt, dentry, stat); -+ if (inode->i_op->getattr_it) -+ return inode->i_op->getattr_it(mnt, dentry, it, stat); - - generic_fillattr(inode, stat); - if (!stat->blksize) { -@@ -56,14 +58,20 @@ int vfs_getattr(struct vfsmount *mnt, st - return 0; - } - -+int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) -+{ -+ return vfs_getattr_it(mnt, dentry, NULL, stat); -+} -+ - int vfs_stat(char __user *name, struct kstat *stat) - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - -- error = user_path_walk(name, &nd); -+ error = user_path_walk_it(name, &nd); - if (!error) { -- error = vfs_getattr(nd.mnt, nd.dentry, stat); -+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); - path_release(&nd); - } - return error; -@@ -73,10 +81,11 @@ int vfs_lstat(char __user *name, struct - { - struct nameidata nd; - int error; -+ intent_init(&nd.intent, IT_GETATTR); - -- error = user_path_walk_link(name, &nd); -+ error = user_path_walk_link_it(name, &nd); - if (!error) { -- error = vfs_getattr(nd.mnt, nd.dentry, stat); -+ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); - path_release(&nd); - } - return error; -@@ -86,9 +95,12 @@ int vfs_fstat(unsigned int fd, struct ks - { - struct file *f = fget(fd); - int error = -EBADF; -+ struct nameidata nd; -+ intent_init(&nd.intent, IT_GETATTR); - - if (f) { -- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); -+ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); -+ intent_release(&nd.intent); - fput(f); - } - return error; ---- linux-2.6.0-test1/include/linux/dcache.h~vfs_intent_2.6.0-test1 2003-09-13 19:08:13.000000000 +0400 -+++ linux-2.6.0-test1-alexey/include/linux/dcache.h 2003-09-14 18:13:44.000000000 +0400 -@@ -4,6 +4,7 @@ - #ifdef __KERNEL__ - - #include -+#include - #include - #include - #include -@@ -35,6 +36,8 @@ struct qstr { - char name_str[0]; - }; - -+#include -+ - struct dentry_stat_t { - int nr_dentry; - int nr_unused; ---- linux-2.6.0-test1/include/linux/fs.h~vfs_intent_2.6.0-test1 2003-09-13 19:08:41.000000000 +0400 -+++ linux-2.6.0-test1-alexey/include/linux/fs.h 2003-09-14 18:13:44.000000000 +0400 -@@ -240,6 +240,8 @@ typedef void (dio_iodone_t)(struct inode - #define ATTR_ATTR_FLAG 1024 - #define ATTR_KILL_SUID 2048 - #define ATTR_KILL_SGID 4096 -+#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -399,6 +401,7 @@ struct inode { - struct block_device *i_bdev; - struct cdev *i_cdev; - int i_cindex; -+ void *i_filterdata; - - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ -@@ -514,6 +517,7 @@ struct file { - /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct list_head f_ep_links; - spinlock_t f_ep_lock; -+ struct lookup_intent *f_it; - }; - extern spinlock_t files_lock; - #define file_list_lock() spin_lock(&files_lock); -@@ -814,7 +818,9 @@ struct inode_operations { - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int, struct nameidata *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); -+ int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); -@@ -1027,6 +1033,7 @@ extern int register_filesystem(struct fi - extern int unregister_filesystem(struct file_system_type *); - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); -+struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); - extern long do_mount(char *, char *, char *, unsigned long, void *); - - extern int vfs_statfs(struct super_block *, struct kstatfs *); -@@ -1094,6 +1101,7 @@ extern int do_truncate(struct dentry *, - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char __user *); - ---- linux-2.6.0-test1/include/linux/namei.h~vfs_intent_2.6.0-test1 2003-07-24 15:52:31.000000000 +0400 -+++ linux-2.6.0-test1-alexey/include/linux/namei.h 2003-09-14 18:13:44.000000000 +0400 -@@ -2,25 +2,55 @@ - #define _LINUX_NAMEI_H - - #include -+#include - - struct vfsmount; -+struct nameidata; - --struct open_intent { -- int flags; -- int create_mode; -+/* intent opcodes */ -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) -+#define IT_TRUNC (1<<6) -+#define IT_GETXATTR (1<<7) -+ -+struct lustre_intent_data { -+ int it_disposition; -+ int it_status; -+ __u64 it_lock_handle; -+ void *it_data; -+ int it_lock_mode; - }; - -+#define INTENT_MAGIC 0x19620323 -+struct lookup_intent { -+ int it_magic; -+ void (*it_op_release)(struct lookup_intent *); -+ int it_op; -+ int it_flags; -+ int it_create_mode; -+ union { -+ struct lustre_intent_data lustre; -+ } d; -+}; -+ -+static inline void intent_init(struct lookup_intent *it, int op) -+{ -+ memset(it, 0, sizeof(*it)); -+ it->it_magic = INTENT_MAGIC; -+ it->it_op = op; -+} -+ - struct nameidata { - struct dentry *dentry; - struct vfsmount *mnt; - struct qstr last; - unsigned int flags; - int last_type; -- -- /* Intent data */ -- union { -- struct open_intent open; -- } intent; -+ struct lookup_intent intent; - }; - - /* -@@ -41,6 +71,9 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA - #define LOOKUP_CONTINUE 4 - #define LOOKUP_PARENT 16 - #define LOOKUP_NOALT 32 -+#define LOOKUP_LAST (1<<6) -+#define LOOKUP_LINK_NOTLAST (1<<7) -+ - /* - * Intent data - */ -@@ -49,6 +82,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA - #define LOOKUP_ACCESS (0x0400) - - extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); -+#define user_path_walk_it(name,nd) \ -+ __user_walk_it(name, LOOKUP_FOLLOW, nd) -+#define user_path_walk_link_it(name,nd) \ -+ __user_walk_it(name, 0, nd) -+extern void intent_release(struct lookup_intent *); - #define user_path_walk(name,nd) \ - __user_walk(name, LOOKUP_FOLLOW, nd) - #define user_path_walk_link(name,nd) \ -@@ -60,7 +99,6 @@ extern void path_release(struct nameidat - - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -- - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); - ---- linux-2.6.0-test1/kernel/ksyms.c~vfs_intent_2.6.0-test1 2003-09-13 19:08:42.000000000 +0400 -+++ linux-2.6.0-test1-alexey/kernel/ksyms.c 2003-09-14 18:13:44.000000000 +0400 -@@ -382,6 +382,7 @@ EXPORT_SYMBOL(unregister_filesystem); - EXPORT_SYMBOL(kern_mount); - EXPORT_SYMBOL(__mntput); - EXPORT_SYMBOL(may_umount); -+EXPORT_SYMBOL(reparent_to_init); - - /* executable format registration */ - EXPORT_SYMBOL(register_binfmt); -@@ -409,6 +410,12 @@ EXPORT_SYMBOL(proc_doulongvec_minmax); - EXPORT_SYMBOL(request_irq); - EXPORT_SYMBOL(free_irq); - -+/* lustre */ -+EXPORT_SYMBOL(do_kern_mount); -+EXPORT_SYMBOL(exit_files); -+//EXPORT_SYMBOL(kmem_cache_validate); -+ -+ - /* waitqueue handling */ - EXPORT_SYMBOL(add_wait_queue); - EXPORT_SYMBOL(add_wait_queue_exclusive); -@@ -558,6 +565,7 @@ EXPORT_SYMBOL(sys_tz); - EXPORT_SYMBOL(file_fsync); - EXPORT_SYMBOL(fsync_buffers_list); - EXPORT_SYMBOL(clear_inode); -+EXPORT_SYMBOL(__iget); - EXPORT_SYMBOL(init_special_inode); - EXPORT_SYMBOL(new_inode); - EXPORT_SYMBOL(__insert_inode_hash); ---- linux-2.6.0-test1/fs/nfs/dir.c~vfs_intent_2.6.0-test1 2003-09-13 19:08:34.000000000 +0400 -+++ linux-2.6.0-test1-alexey/fs/nfs/dir.c 2003-09-14 18:13:44.000000000 +0400 -@@ -652,7 +652,7 @@ int nfs_is_exclusive_create(struct inode - return 0; - if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) - return 0; -- return (nd->intent.open.flags & O_EXCL) != 0; -+ return (nd->intent.it_flags & O_EXCL) != 0; - } - - static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) -@@ -825,7 +825,7 @@ static int nfs_create(struct inode *dir, - attr.ia_valid = ATTR_MODE; - - if (nd && (nd->flags & LOOKUP_CREATE)) -- open_flags = nd->intent.open.flags; -+ open_flags = nd->intent.it_flags; - - /* - * The 0 argument passed into the create function should one day - -_ diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 21e884f..b9193e8 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -51,44 +51,6 @@ #include "llite_internal.h" #include -/* called for each page in a completed rpc.*/ -void ll_ap_completion_26(void *data, int cmd, int rc) -{ - struct ll_async_page *llap; - struct page *page; - - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - - page = llap->llap_page; - LASSERT(PageLocked(page)); - - if (rc == 0) { - if (cmd == OBD_BRW_READ) { - if (!llap->llap_defer_uptodate) - SetPageUptodate(page); - } else { - llap->llap_write_queued = 0; - } - } else { - SetPageError(page); - } - - LL_CDEBUG_PAGE(page, "io complete, unlocking\n"); - - unlock_page(page); - - if (0 && cmd == OBD_BRW_WRITE) { - llap_write_complete(page->mapping->host, llap); - ll_try_done_writing(page->mapping->host); - } - - page_cache_release(page); -} - static int ll_writepage_26(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; @@ -110,7 +72,7 @@ static int ll_writepage_26(struct page *page, struct writeback_control *wbc) page_cache_get(page); if (llap->llap_write_queued) { - LL_CDEBUG_PAGE(page, "marking urgent\n"); + LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n"); rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL, llap->llap_cookie, ASYNC_READY | ASYNC_URGENT); @@ -120,7 +82,7 @@ static int ll_writepage_26(struct page *page, struct writeback_control *wbc) llap->llap_cookie, OBD_BRW_WRITE, 0, 0, 0, ASYNC_READY | ASYNC_URGENT); if (rc == 0) - LL_CDEBUG_PAGE(page, "mmap write queued\n"); + LL_CDEBUG_PAGE(D_PAGE, page, "mmap write queued\n"); else llap->llap_write_queued = 0; } @@ -133,15 +95,15 @@ out: } struct address_space_operations ll_aops = { - readpage: ll_readpage, -// readpages: ll_readpages, -// direct_IO: ll_direct_IO_26, - writepage: ll_writepage_26, - writepages: generic_writepages, - set_page_dirty: __set_page_dirty_nobuffers, - sync_page: ll_sync_page, - prepare_write: ll_prepare_write, - commit_write: ll_commit_write, - removepage: ll_removepage, - bmap: NULL + .readpage = ll_readpage, +// .readpages = ll_readpages, +// .direct_IO = ll_direct_IO_26, + .writepage = ll_writepage_26, + .writepages = generic_writepages, + .set_page_dirty = __set_page_dirty_nobuffers, + .sync_page = NULL, + .prepare_write = ll_prepare_write, + .commit_write = ll_commit_write, + .removepage = ll_removepage, + .bmap = NULL }; diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index a565f51..cefe534 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -10,7 +10,7 @@ #ifndef LOV_INTERNAL_H #define LOV_INTERNAL_H -#include +#include #define LAP_MAGIC 8200 @@ -31,7 +31,7 @@ void lov_free_memmd(struct lov_stripe_md **lsmp); /* lov_log.c */ int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_logid *logid); + int count, struct llog_catid *logid); int lov_llog_finish(struct obd_device *obd, int count); /* lov_pack.c */ @@ -41,7 +41,7 @@ int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, struct lov_mds_md *lmm, int lmm_bytes); int lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp, struct lov_user_md *lump); -int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp, +int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp, struct lov_user_md *lump); int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_user_md *lump); diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index c4d5690..b8ce8b5 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -54,7 +54,8 @@ static int mds_llog_origin_add(struct llog_ctxt *ctxt, static int mds_llog_origin_connect(struct llog_ctxt *ctxt, int count, struct llog_logid *logid, - struct llog_gen *gen) + struct llog_gen *gen, + struct obd_uuid *uuid) { struct obd_device *obd = ctxt->loc_obd; struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; @@ -63,7 +64,7 @@ static int mds_llog_origin_connect(struct llog_ctxt *ctxt, int count, ENTRY; lctxt = llog_get_context(lov_obd, ctxt->loc_idx); - rc = llog_connect(lctxt, count, logid, gen); + rc = llog_connect(lctxt, count, logid, gen, uuid); RETURN(rc); } @@ -118,7 +119,7 @@ static struct llog_operations mds_size_repl_logops = { }; int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_logid *logid) + int count, struct llog_catid *logid) { struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; int rc; diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index e4146dc..0ad595f 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -66,9 +66,9 @@ void llog_free_handle(struct llog_handle *loghandle) if (!loghandle->lgh_hdr) goto out; - if (le32_to_cpu(loghandle->lgh_hdr->llh_flags) & LLOG_F_IS_PLAIN) + if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN) list_del_init(&loghandle->u.phd.phd_entry); - if (le32_to_cpu(loghandle->lgh_hdr->llh_flags) & LLOG_F_IS_CAT) + if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT) LASSERT(list_empty(&loghandle->u.chd.chd_head)); OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE); @@ -97,10 +97,10 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index) RETURN(-EINVAL); } - llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) - 1); + llh->llh_count--; - if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) && - (le32_to_cpu(llh->llh_count) == 1) && + if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) && + (llh->llh_count == 1) && (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) { rc = llog_destroy(loghandle); if (rc) @@ -131,10 +131,10 @@ int llog_init_handle(struct llog_handle *handle, int flags, RETURN(-ENOMEM); handle->lgh_hdr = llh; /* first assign flags to use llog_client_ops */ - llh->llh_flags = cpu_to_le32(flags); + llh->llh_flags = flags; rc = llog_read_header(handle); if (rc == 0) { - flags = le32_to_cpu(llh->llh_flags); + flags = llh->llh_flags; if (uuid) LASSERT(obd_uuid_equals(uuid, &llh->llh_tgtuuid)); GOTO(out, rc); @@ -146,21 +146,20 @@ int llog_init_handle(struct llog_handle *handle, int flags, rc = 0; handle->lgh_last_idx = 0; /* header is record with index 0 */ - llh->llh_count = cpu_to_le32(1); /* for the header record */ - llh->llh_hdr.lrh_type = cpu_to_le32(LLOG_HDR_MAGIC); - llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = - cpu_to_le32(LLOG_CHUNK_SIZE); + llh->llh_count = 1; /* for the header record */ + llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC; + llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE; llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0; - llh->llh_timestamp = cpu_to_le64(LTIME_S(CURRENT_TIME)); + llh->llh_timestamp = LTIME_S(CURRENT_TIME); if (uuid) memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid)); - llh->llh_bitmap_offset = cpu_to_le32(offsetof(typeof(*llh),llh_bitmap)); + llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap); ext2_set_bit(0, llh->llh_bitmap); out: if (flags & LLOG_F_IS_CAT) { INIT_LIST_HEAD(&handle->u.chd.chd_head); - llh->llh_size = cpu_to_le32(sizeof(struct llog_logid_rec)); + llh->llh_size = sizeof(struct llog_logid_rec); } else if (flags & LLOG_F_IS_PLAIN) INIT_LIST_HEAD(&handle->u.phd.phd_entry); @@ -235,11 +234,12 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, GOTO(out, rc); rec = buf; - idx = le32_to_cpu(rec->lrh_index); + idx = rec->lrh_index; if (idx < index) CDEBUG(D_HA, "index %u : idx %u\n", index, idx); while (idx < index) { - rec = ((void *)rec + le32_to_cpu(rec->lrh_len)); + rec = (struct llog_rec_hdr *) + ((char *)rec + rec->lrh_len); idx ++; } @@ -266,7 +266,8 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, ++index; if (index > last_index) GOTO(out, rc = 0); - rec = ((void *)rec + le32_to_cpu(rec->lrh_len)); + rec = (struct llog_rec_hdr *) + ((char *)rec + rec->lrh_len); } } diff --git a/lustre/portals/knals/gmnal/gmnal.h b/lustre/portals/knals/gmnal/gmnal.h index cdde5b7..e48552e 100644 --- a/lustre/portals/knals/gmnal/gmnal.h +++ b/lustre/portals/knals/gmnal/gmnal.h @@ -45,6 +45,7 @@ #include "linux/init.h" #include "linux/sem.h" #include "linux/vmalloc.h" +#include "linux/sysctl.h" #define DEBUG_SUBSYSTEM S_GMNAL @@ -80,9 +81,14 @@ extern int gmnal_small_msg_size; extern int num_rx_threads; extern int num_stxds; +extern int gm_port; #define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size #define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c) #define GMNAL_MAGIC 0x1234abcd +/* + * The gm_port to use for gmnal + */ +#define GMNAL_GM_PORT gm_port /* @@ -184,7 +190,6 @@ typedef struct _gmnal_rxtwe { #define NRXTHREADS 10 /* max number of receiver threads */ typedef struct _gmnal_data_t { - int refcnt; spinlock_t cb_lock; spinlock_t stxd_lock; struct semaphore stxd_token; @@ -218,6 +223,7 @@ typedef struct _gmnal_data_t { gmnal_rxtwe_t *rxtwe_tail; spinlock_t rxtwe_lock; struct semaphore rxtwe_wait; + struct ctl_table_header *sysctl; } gmnal_data_t; /* @@ -234,11 +240,6 @@ typedef struct _gmnal_data_t { extern gmnal_data_t *global_nal_data; /* - * The gm_port to use for gmnal - */ -#define GMNAL_GM_PORT 4 - -/* * for ioctl get pid */ #define GMNAL_IOC_GET_GNID 1 @@ -307,13 +308,16 @@ extern gmnal_data_t *global_nal_data; /* * API NAL */ +int gmnal_api_startup(nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); + int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); -int gmnal_api_shutdown(nal_t *, int); +void gmnal_api_shutdown(nal_t *); int gmnal_api_validate(nal_t *, void *, size_t); -void gmnal_api_yield(nal_t *); +void gmnal_api_yield(nal_t *, unsigned long *, int); void gmnal_api_lock(nal_t *, unsigned long *); @@ -321,14 +325,13 @@ void gmnal_api_unlock(nal_t *, unsigned long *); #define GMNAL_INIT_NAL(a) do { \ + a->startup = gmnal_api_startup; \ a->forward = gmnal_api_forward; \ a->shutdown = gmnal_api_shutdown; \ - a->validate = NULL; \ a->yield = gmnal_api_yield; \ a->lock = gmnal_api_lock; \ a->unlock = gmnal_api_unlock; \ a->timeout = NULL; \ - a->refct = 1; \ a->nal_data = NULL; \ } while (0) @@ -353,6 +356,8 @@ int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); +int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); + void *gmnal_cb_malloc(nal_cb_t *, size_t); void gmnal_cb_free(nal_cb_t *, void *, size_t); @@ -369,7 +374,7 @@ void gmnal_cb_sti(nal_cb_t *, unsigned long *); int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); -nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); +int gmnal_init(void); void gmnal_fini(void); @@ -382,7 +387,7 @@ void gmnal_fini(void); a->cb_recv_pages = gmnal_cb_recv_pages; \ a->cb_read = gmnal_cb_read; \ a->cb_write = gmnal_cb_write; \ - a->cb_callback = NULL; \ + a->cb_callback = gmnal_cb_callback; \ a->cb_malloc = gmnal_cb_malloc; \ a->cb_free = gmnal_cb_free; \ a->cb_map = NULL; \ @@ -418,6 +423,7 @@ void gmnal_stop_rxthread(gmnal_data_t *); void gmnal_stop_ctthread(gmnal_data_t *); void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t); +void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t); char *gmnal_gm_error(gm_status_t); char *gmnal_rxevent(gm_recv_event_t*); int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int); diff --git a/lustre/portals/knals/gmnal/gmnal_cb.c b/lustre/portals/knals/gmnal/gmnal_cb.c index e055242..ece1380 100644 --- a/lustre/portals/knals/gmnal/gmnal_cb.c +++ b/lustre/portals/knals/gmnal/gmnal_cb.c @@ -35,8 +35,8 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, int status = PTL_OK; - CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], - niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", + CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], " + "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", nal_cb, private, cookie, niov, iov, mlen, rlen); switch(srxd->type) { @@ -64,10 +64,11 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, int status = PTL_OK; struct iovec *iovec = NULL, *iovec_dup = NULL; int i = 0; + ptl_kiov_t *kiov_dup = kiov;; - CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], - cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", + CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], " + "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", nal_cb, private, cookie, kniov, kiov, mlen, rlen); if (srxd->type == GMNAL_SMALL_MESSAGE) { @@ -99,6 +100,10 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, CDEBUG(D_INFO, "calling gmnal_small_rx\n"); status = gmnal_small_rx(nal_cb, private, cookie, kniov, iovec_dup, mlen, rlen); + for (i=0; ikiov_page); + kiov_dup++; + } PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov); } @@ -126,6 +131,7 @@ int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, niov, iov, len); } else { CDEBUG(D_ERROR, "Large message send it is not supported\n"); + lib_finalize(nal_cb, private, cookie, PTL_FAIL); return(PTL_FAIL); gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); @@ -140,6 +146,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, int i = 0; gmnal_data_t *nal_data; struct iovec *iovec = NULL, *iovec_dup = NULL; + ptl_kiov_t *kiov_dup = kiov; CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len); nal_data = nal_cb->nal_data; @@ -181,6 +188,10 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, kniov, iovec, len); } + for (i=0; ikiov_page); + kiov_dup++; + } PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec)); return(PTL_OK); } @@ -199,6 +210,18 @@ int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, return(PTL_OK); } +int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, + ptl_event_t *ev) +{ + + if (eq->event_callback != NULL) { + CDEBUG(D_INFO, "found callback\n"); + eq->event_callback(ev); + } + + return(PTL_OK); +} + void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) { void *ptr = NULL; @@ -249,6 +272,17 @@ void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) return; } +void gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev) +{ + /* holding cb_lock */ + + if (eq->event_callback != NULL) + eq->event_callback(ev); + + /* We will wake theads sleeping in yield() here, AFTER the + * callback, when we implement blocking yield */ +} + int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) { CDEBUG(D_TRACE, "gmnal_cb_dist\n"); diff --git a/lustre/portals/knals/scimacnal/scimacnal_cb.c b/lustre/portals/knals/scimacnal/scimacnal_cb.c index 52afb98..f9562b2 100644 --- a/lustre/portals/knals/scimacnal/scimacnal_cb.c +++ b/lustre/portals/knals/scimacnal/scimacnal_cb.c @@ -97,6 +97,18 @@ kscimacnal_sti(nal_cb_t *nal, unsigned long *flags) } +static void +kscimacnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev) +{ + /* holding ksci_dispatch_lock */ + + if (eq->event_callback != NULL) + eq->event_callback(ev); + + /* We will wake theads sleeping in yield() here, AFTER the + * callback, when we implement blocking yield */ +} + static int kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) { @@ -233,7 +245,7 @@ kscimacnal_sendmsg(nal_cb_t *nal, /* save transaction info for later finalize and cleanup */ PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t))); if (!ktx) { - return PTL_NOSPACE; + return PTL_NO_SPACE; } ktx->ktx_nmapped = 0; /* Start with no mapped pages :) */ @@ -248,7 +260,7 @@ kscimacnal_sendmsg(nal_cb_t *nal, kscimacnal_txrelease, ktx); if (!msg) { PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return PTL_NOSPACE; + return PTL_NO_SPACE; } mac_put_mblk(msg, sizeof(ptl_hdr_t)); lastblk=msg; @@ -285,7 +297,7 @@ kscimacnal_sendmsg(nal_cb_t *nal, if(!newblk) { mac_free_msg(msg); PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return PTL_NOSPACE; + return PTL_NO_SPACE; } mac_put_mblk(newblk, nob); mac_link_mblk(lastblk, newblk); @@ -597,5 +609,6 @@ nal_cb_t kscimacnal_lib = { cb_printf: kscimacnal_printf, cb_cli: kscimacnal_cli, cb_sti: kscimacnal_sti, + cb_callback: kscimacnal_callback, cb_dist: kscimacnal_dist }; diff --git a/lustre/portals/portals/api-eq.c b/lustre/portals/portals/api-eq.c index 964b9d8..0306043 100644 --- a/lustre/portals/portals/api-eq.c +++ b/lustre/portals/portals/api-eq.c @@ -23,59 +23,19 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS #include -int ptl_eq_init(void) +int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev) { - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_fini(void) -{ - /* Nothing to do anymore... */ -} - -int ptl_eq_ni_init(nal_t * nal) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_ni_fini(nal_t * nal) -{ - /* Nothing to do anymore... */ -} - -int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev) -{ - ptl_eq_t *eq; - int rc, new_index; - unsigned long flags; - ptl_event_t *new_event; - nal_t *nal; + int new_index = eq->sequence & (eq->size - 1); + ptl_event_t *new_event = &eq->base[new_index]; ENTRY; - if (!ptl_init) - RETURN(PTL_NOINIT); - - nal = ptl_hndl2nal(&eventq); - if (!nal) - RETURN(PTL_INV_EQ); - - eq = ptl_handle2usereq(&eventq); - nal->lock(nal, &flags); - - /* size must be a power of 2 to handle a wrapped sequence # */ - LASSERT (eq->size != 0 && - eq->size == LOWEST_BIT_SET (eq->size)); - - new_index = eq->sequence & (eq->size - 1); - new_event = &eq->base[new_index]; CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n", new_event, eq->sequence, eq->size); + if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) { - nal->unlock(nal, &flags); RETURN(PTL_EQ_EMPTY); } @@ -86,117 +46,75 @@ int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev) if (eq->sequence != new_event->sequence) { CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n", eq->sequence, new_event->sequence); - rc = PTL_EQ_DROPPED; - } else { - rc = PTL_OK; + RETURN(PTL_EQ_DROPPED); } eq->sequence = new_event->sequence + 1; - nal->unlock(nal, &flags); - RETURN(rc); + RETURN(PTL_OK); } - -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) +int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev) { - int rc; + int which; - /* PtlEQGet does the handle checking */ - while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) { - nal_t *nal = ptl_hndl2nal(&eventq_in); - - if (nal->yield) - nal->yield(nal); - } - - return rc; -} - -#ifndef __KERNEL__ -#if 0 -static jmp_buf eq_jumpbuf; - -static void eq_timeout(int signal) -{ - sigset_t set; - - /* signal will be automatically disabled in sig handler, - * must enable it before long jump - */ - sigemptyset(&set); - sigaddset(&set, SIGALRM); - sigprocmask(SIG_UNBLOCK, &set, NULL); - - longjmp(eq_jumpbuf, -1); + return (PtlEQPoll (&eventq, 1, 0, ev, &which)); } -int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout) +int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) { - static void (*prev) (int) = NULL; - static int left_over; - time_t time_at_start; - int rc; - - if (setjmp(eq_jumpbuf)) { - signal(SIGALRM, prev); - alarm(left_over - timeout); - return PTL_EQ_EMPTY; - } - - left_over = alarm(timeout); - prev = signal(SIGALRM, eq_timeout); - time_at_start = time(NULL); - if (left_over && left_over < timeout) - alarm(left_over); - - rc = PtlEQWait(eventq_in, event_out); - - signal(SIGALRM, prev); - alarm(left_over); /* Should compute how long we waited */ - - return rc; + int which; + + return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, + event_out, &which)); } -#else -#include - -/* FIXME - * Here timeout need a trick with tcpnal, definitely unclean but OK for - * this moment. - */ -/* global variables defined by tcpnal */ -extern int __tcpnal_eqwait_timeout_value; -extern int __tcpnal_eqwait_timedout; - -int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout) +int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout, + ptl_event_t *event_out, int *which_out) { - int rc; + nal_t *nal; + int i; + int rc; + unsigned long flags; + + if (!ptl_init) + RETURN(PTL_NO_INIT); - if (!timeout) - return PtlEQWait(eventq_in, event_out); + if (neq_in < 1) + RETURN(PTL_EQ_INVALID); + + nal = ptl_hndl2nal(&eventqs_in[0]); + if (nal == NULL) + RETURN(PTL_EQ_INVALID); - __tcpnal_eqwait_timeout_value = timeout; + nal->lock(nal, &flags); - while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) { - nal_t *nal = ptl_hndl2nal(&eventq_in); + for (;;) { + for (i = 0; i < neq_in; i++) { + ptl_eq_t *eq = ptl_handle2usereq(&eventqs_in[i]); + + if (i > 0 && + ptl_hndl2nal(&eventqs_in[i]) != nal) { + nal->unlock(nal, &flags); + RETURN (PTL_EQ_INVALID); + } + + /* size must be a power of 2 to handle a wrapped sequence # */ + LASSERT (eq->size != 0 && + eq->size == LOWEST_BIT_SET (eq->size)); + + rc = ptl_get_event (eq, event_out); + if (rc != PTL_EQ_EMPTY) { + nal->unlock(nal, &flags); + *which_out = i; + RETURN(rc); + } + } - if (nal->yield) - nal->yield(nal); - - if (__tcpnal_eqwait_timedout) { - if (__tcpnal_eqwait_timedout != ETIMEDOUT) - printf("Warning: yield return error %d\n", - __tcpnal_eqwait_timedout); - rc = PTL_EQ_EMPTY; - break; + if (timeout == 0) { + nal->unlock(nal, &flags); + RETURN (PTL_EQ_EMPTY); } + + timeout = nal->yield(nal, &flags, timeout); } - - __tcpnal_eqwait_timeout_value = 0; - - return rc; } -#endif -#endif /* __KERNEL__ */ diff --git a/lustre/portals/portals/lib-md.c b/lustre/portals/portals/lib-md.c index a1ed583..64a55b9 100644 --- a/lustre/portals/portals/lib-md.c +++ b/lustre/portals/portals/lib-md.c @@ -33,16 +33,29 @@ #include #include -/* - * must be called with state lock held - */ +/* must be called with state lock held */ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) { - lib_me_t *me = md->me; + if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) { + /* first unlink attempt... */ + lib_me_t *me = md->me; + + md->md_flags |= PTL_MD_FLAG_ZOMBIE; + + /* Disassociate from ME (if any), and unlink it if it was created + * with PTL_UNLINK */ + if (me != NULL) { + me->md = NULL; + if (me->unlink == PTL_UNLINK) + lib_me_unlink(nal, me); + } + + /* emsure all future handle lookups fail */ + lib_invalidate_handle(nal, &md->md_lh); + } if (md->pending != 0) { CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - md->md_flags |= PTL_MD_FLAG_UNLINK; return; } @@ -52,23 +65,16 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) if (nal->cb_unmap_pages != NULL) nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, &md->md_addrkey); - } else if (nal->cb_unmap != NULL) + } else if (nal->cb_unmap != NULL) { nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, &md->md_addrkey); - - if (me) { - me->md = NULL; - if (me->unlink == PTL_UNLINK) - lib_me_unlink(nal, me); } - if (md->eq != NULL) - { + if (md->eq != NULL) { md->eq->eq_refcount--; LASSERT (md->eq->eq_refcount >= 0); } - lib_invalidate_handle (nal, &md->md_lh); list_del (&md->md_list); lib_md_free(nal, md); } @@ -77,58 +83,62 @@ void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink) { - const int max_size_opts = PTL_MD_AUTO_UNLINK | - PTL_MD_MAX_SIZE; lib_eq_t *eq = NULL; int rc; int i; + int niov; /* NB we are passed an allocated, but uninitialised/active md. * if we return success, caller may lib_md_unlink() it. * otherwise caller may only lib_md_free() it. */ - if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) { + if (!PtlHandleIsEqual (*eqh, PTL_EQ_NONE)) { eq = ptl_handle2eq(eqh, nal); if (eq == NULL) - return PTL_INV_EQ; + return PTL_EQ_INVALID; } /* Must check this _before_ allocation. Also, note that non-iov * MDs must set md_niov to 0. */ - LASSERT((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0 || - md->niov <= PTL_MD_MAX_IOV); + LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 || + md->length <= PTL_MD_MAX_IOV); - if ((md->options & max_size_opts) != 0 && /* max size used */ + /* This implementation doesn't know how to create START events or + * disable END events. Best to LASSERT our caller is compliant so + * we find out quickly... */ + LASSERT (PtlHandleIsEqual (*eqh, PTL_EQ_NONE) || + ((md->options & PTL_MD_EVENT_START_DISABLE) != 0 && + (md->options & PTL_MD_EVENT_END_DISABLE) == 0)); + + if ((md->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ (md->max_size < 0 || md->max_size > md->length)) // illegal max_size - return PTL_INV_MD; + return PTL_MD_INVALID; new->me = NULL; new->start = md->start; - new->length = md->length; new->offset = 0; new->max_size = md->max_size; - new->unlink = unlink; new->options = md->options; new->user_ptr = md->user_ptr; new->eq = eq; new->threshold = md->threshold; new->pending = 0; - new->md_flags = 0; + new->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0; - if ((md->options & PTL_MD_IOV) != 0) { + if ((md->options & PTL_MD_IOVEC) != 0) { int total_length = 0; if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */ - return PTL_INV_MD; + return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.iov, md->start, - md->niov * sizeof (new->md_iov.iov[0]))) + niov * sizeof (new->md_iov.iov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the base address on trust */ if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */ return PTL_VAL_FAILED; @@ -136,33 +146,32 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.iov[i].iov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - + new->length = total_length; + if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); } } else if ((md->options & PTL_MD_KIOV) != 0) { #ifndef __KERNEL__ - return PTL_INV_MD; + return PTL_MD_INVALID; #else int total_length = 0; /* Trap attempt to use paged I/O if unsupported early. */ if (nal->cb_send_pages == NULL || nal->cb_recv_pages == NULL) - return PTL_INV_MD; + return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.kiov, md->start, - md->niov * sizeof (new->md_iov.kiov[0]))) + niov * sizeof (new->md_iov.kiov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the page pointer on trust */ if (new->md_iov.kiov[i].kiov_offset + new->md_iov.kiov[i].kiov_len > PAGE_SIZE ) @@ -171,23 +180,23 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.kiov[i].kiov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; + new->length = total_length; if (nal->cb_map_pages != NULL) { - rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov, + rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov, &new->md_addrkey); if (rc != PTL_OK) return (rc); } #endif } else { /* contiguous */ - new->md_niov = 1; + new->length = md->length; + new->md_niov = niov = 1; new->md_iov.iov[0].iov_base = md->start; new->md_iov.iov[0].iov_len = md->length; if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); @@ -213,13 +222,13 @@ void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new) * and that's all. */ new->start = md->start; - new->length = md->length; + new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? + md->length : md->md_niov; new->threshold = md->threshold; new->max_size = md->max_size; new->options = md->options; new->user_ptr = md->user_ptr; ptl_eq2handle(&new->eventq, md->eq); - new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov; } int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) @@ -240,21 +249,21 @@ int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) lib_md_t *md; unsigned long flags; - if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) - return (ret->rc = PTL_NOSPACE); + return (ret->rc = PTL_NO_SPACE); state_lock(nal, &flags); me = ptl_handle2me(&args->me_in, nal); if (me == NULL) { - ret->rc = PTL_INV_ME; + ret->rc = PTL_ME_INVALID; } else if (me->md != NULL) { - ret->rc = PTL_INUSE; + ret->rc = PTL_ME_IN_USE; } else { ret->rc = lib_md_build(nal, md, private, &args->md_in, &args->eq_in, args->unlink_in); @@ -292,18 +301,18 @@ int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret) lib_md_t *md; unsigned long flags; - if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) - return (ret->rc = PTL_NOSPACE); + return (ret->rc = PTL_NO_SPACE); state_lock(nal, &flags); - ret->rc = lib_md_build(nal, md, private, - &args->md_in, &args->eq_in, PTL_UNLINK); + ret->rc = lib_md_build(nal, md, private, &args->md_in, + &args->eq_in, args->unlink_in); if (ret->rc == PTL_OK) { ptl_md2handle(&ret->handle_out, md); @@ -331,7 +340,7 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) md = ptl_handle2md(&args->md_in, nal); if (md == NULL) { state_unlock(nal, &flags); - return (ret->rc = PTL_INV_MD); + return (ret->rc = PTL_MD_INVALID); } /* If the MD is busy, lib_md_unlink just marks it for deletion, and @@ -343,7 +352,7 @@ int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) memset(&ev, 0, sizeof(ev)); ev.type = PTL_EVENT_UNLINK; - ev.status = PTL_OK; + ev.ni_fail_type = PTL_OK; ev.unlinked = 1; lib_md_deconstruct(nal, md, &ev.mem_desc); @@ -385,7 +394,7 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, md = ptl_handle2md(&args->md_in, nal); if (md == NULL) { - ret->rc = PTL_INV_MD; + ret->rc = PTL_MD_INVALID; goto out; } @@ -397,47 +406,42 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, goto out; } - /* XXX fttb, the new MD must be the same type wrt fragmentation */ - if (((new->options ^ md->options) & - (PTL_MD_IOV | PTL_MD_KIOV)) != 0) { - ret->rc = PTL_INV_MD; - goto out; - } - - if (new->niov > md->md_niov) { - ret->rc = PTL_IOV_TOO_MANY; + /* XXX fttb, the new MD must be the same "shape" wrt fragmentation, + * since we simply overwrite the old lib-md */ + if ((((new->options ^ md->options) & + (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) || + ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && + new->length != md->md_niov)) { + ret->rc = PTL_IOV_INVALID; goto out; } - if (new->niov < md->md_niov) { - ret->rc = PTL_IOV_TOO_SMALL; - goto out; - } - - if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) { + if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) { test_eq = ptl_handle2eq(&args->testq_in, nal); if (test_eq == NULL) { - ret->rc = PTL_INV_EQ; + ret->rc = PTL_EQ_INVALID; goto out; } } if (md->pending != 0) { - ret->rc = PTL_NOUPDATE; + ret->rc = PTL_MD_NO_UPDATE; goto out; } if (test_eq == NULL || test_eq->sequence == args->sequence_in) { lib_me_t *me = md->me; + int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ? + PTL_UNLINK : PTL_RETAIN; // #warning this does not track eq refcounts properly ret->rc = lib_md_build(nal, md, private, - new, &new->eventq, md->unlink); + new, &new->eventq, unlink); md->me = me; } else { - ret->rc = PTL_NOUPDATE; + ret->rc = PTL_MD_NO_UPDATE; } out: diff --git a/lustre/portals/unals/procbridge.h b/lustre/portals/unals/procbridge.h index 965f83d..1c8e7dd 100644 --- a/lustre/portals/unals/procbridge.h +++ b/lustre/portals/unals/procbridge.h @@ -35,7 +35,8 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; + ptl_ni_limits_t *nia_requested_limits; + ptl_ni_limits_t *nia_actual_limits; int nia_nal_type; bridge nia_bridge; } nal_init_args_t; @@ -50,10 +51,6 @@ extern void *nal_thread(void *); #define MAX_PTLS 128 extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); extern void procbridge_wakeup_nal(procbridge p); #endif diff --git a/lustre/portals/utils/Makefile.am b/lustre/portals/utils/Makefile.am index 6c31b3d..a14df1c 100644 --- a/lustre/portals/utils/Makefile.am +++ b/lustre/portals/utils/Makefile.am @@ -3,35 +3,40 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -LINK = $(CC) -o $@ +## $(srcdir)/../ for , ../../ for generated +#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include +#LINK = $(CC) -o $@ if LIBLUSTRE - noinst_LIBRARIES = libuptlctl.a -libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h -libuptlctl_a_CFLAGS = -fPIC +libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c +libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS) +libuptlctl_a_CFLAGS = $(LLCFLAGS) +endif -else +sbin_PROGRAMS = debugctl -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid lib_LIBRARIES = libptlctl.a -acceptor_SOURCES = acceptor.c # -lefence +libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h + +if !CRAY_PORTALS +sbin_PROGRAMS += acceptor ptlctl routerstat wirecheck gmnalnid -wirecheck_SOURCES = wirecheck.c +acceptor_SOURCES = acceptor.c -libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h +wirecheck_SOURCES = wirecheck.c gmnalnid_SOURCES = gmnalnid.c ptlctl_SOURCES = ptlctl.c -ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence +ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) ptlctl_DEPENDENCIES = libptlctl.a +routerstat_SOURCES = routerstat.c +endif + debugctl_SOURCES = debugctl.c -debugctl_LDADD = -L. -lptlctl -lncurses # -lefence +debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) debugctl_DEPENDENCIES = libptlctl.a -routerstat_SOURCES = routerstat.c -endif diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 4e688a8..687f588 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -54,7 +54,7 @@ #include #define LIOD_STOP 0 -static struct ptlrpcd_ctl { +struct ptlrpcd_ctl { unsigned long pc_flags; spinlock_t pc_lock; struct completion pc_starting; @@ -62,23 +62,40 @@ static struct ptlrpcd_ctl { struct list_head pc_req_list; wait_queue_head_t pc_waitq; struct ptlrpc_request_set *pc_set; -} ptlrpcd_pc; +#ifndef __KERNEL__ + int pc_recurred; + void *pc_callback; +#endif +}; + +static struct ptlrpcd_ctl ptlrpcd_pc; +static struct ptlrpcd_ctl ptlrpcd_recovery_pc; static DECLARE_MUTEX(ptlrpcd_sem); static int ptlrpcd_users = 0; -void ptlrpcd_wake(void) +void ptlrpcd_wake(struct ptlrpc_request *req) { - struct ptlrpcd_ctl *pc = &ptlrpcd_pc; + struct ptlrpcd_ctl *pc = req->rq_ptlrpcd_data; + + LASSERT(pc != NULL); + wake_up(&pc->pc_waitq); } void ptlrpcd_add_req(struct ptlrpc_request *req) { - struct ptlrpcd_ctl *pc = &ptlrpcd_pc; + struct ptlrpcd_ctl *pc; + + if (req->rq_send_state == LUSTRE_IMP_FULL) + pc = &ptlrpcd_pc; + else + pc = &ptlrpcd_recovery_pc; ptlrpc_set_add_new_req(pc->pc_set, req); - ptlrpcd_wake(); + req->rq_ptlrpcd_data = pc; + + ptlrpcd_wake(req); } static int ptlrpcd_check(struct ptlrpcd_ctl *pc) @@ -169,8 +186,6 @@ static int ptlrpcd(void *arg) return 0; } #else -static int ptlrpcd_recurred = 0; -static void *ptlrpcd_callback; int ptlrpcd_check_async_rpcs(void *arg) { @@ -178,25 +193,19 @@ int ptlrpcd_check_async_rpcs(void *arg) int rc = 0; /* single threaded!! */ - ptlrpcd_recurred++; + pc->pc_recurred++; - if (ptlrpcd_recurred == 1) + if (pc->pc_recurred == 1) rc = ptlrpcd_check(pc); - ptlrpcd_recurred--; + pc->pc_recurred--; return rc; } #endif -int ptlrpcd_addref(void) +static int ptlrpcd_start(struct ptlrpcd_ctl *pc) { - struct ptlrpcd_ctl *pc = &ptlrpcd_pc; int rc = 0; - ENTRY; - - down(&ptlrpcd_sem); - if (++ptlrpcd_users != 1) - GOTO(out, rc); memset(pc, 0, sizeof(*pc)); init_completion(&pc->pc_starting); @@ -218,28 +227,57 @@ int ptlrpcd_addref(void) wait_for_completion(&pc->pc_starting); #else - ptlrpcd_callback = + pc->pc_callback = liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc); #endif out: + RETURN(rc); +} + +static void ptlrpcd_stop(struct ptlrpcd_ctl *pc) +{ + set_bit(LIOD_STOP, &pc->pc_flags); + wake_up(&pc->pc_waitq); +#ifdef __KERNEL__ + wait_for_completion(&pc->pc_finishing); +#else + liblustre_deregister_wait_callback(pc->pc_callback); +#endif + ptlrpc_set_destroy(pc->pc_set); +} + +int ptlrpcd_addref(void) +{ + int rc = 0; + ENTRY; + + down(&ptlrpcd_sem); + if (++ptlrpcd_users != 1) + GOTO(out, rc); + + rc = ptlrpcd_start(&ptlrpcd_pc); + if (rc) { + --ptlrpcd_users; + GOTO(out, rc); + } + + rc = ptlrpcd_start(&ptlrpcd_recovery_pc); + if (rc) { + ptlrpcd_stop(&ptlrpcd_pc); + --ptlrpcd_users; + GOTO(out, rc); + } +out: up(&ptlrpcd_sem); RETURN(rc); } void ptlrpcd_decref(void) { - struct ptlrpcd_ctl *pc = &ptlrpcd_pc; - down(&ptlrpcd_sem); if (--ptlrpcd_users == 0) { - set_bit(LIOD_STOP, &pc->pc_flags); - wake_up(&pc->pc_waitq); -#ifdef __KERNEL__ - wait_for_completion(&pc->pc_finishing); -#else - liblustre_deregister_wait_callback(ptlrpcd_callback); -#endif - ptlrpc_set_destroy(pc->pc_set); + ptlrpcd_stop(&ptlrpcd_pc); + ptlrpcd_stop(&ptlrpcd_recovery_pc); } up(&ptlrpcd_sem); } diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index f6395a4..8e10631 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -175,7 +175,6 @@ test_6() { } run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2" - if [ "$ONLY" != "setup" ]; then equals_msg test complete, cleaning up cleanup