From c290df36e89df0bbb2416e947ed819cb7122f25a Mon Sep 17 00:00:00 2001 From: eeb Date: Tue, 2 Dec 2003 15:27:59 +0000 Subject: [PATCH] * Dynamic allocation of socknal tx descs. * Statically allocated socknal forwarded message buffers now allocated individually rather than 1 large chunk to avoid store fragmentation. * Some function renaming in socknal_cb.c to avoid name conflicts. * added memhog gfp flags * lmc --gw_cluster_id => lmc --gateway_cluster_id to avoid conflict with lmc --gw --- lnet/include/linux/kp30.h | 10 +- lnet/klnds/socklnd/socklnd.c | 114 +++++--------- lnet/klnds/socklnd/socklnd.h | 30 +--- lnet/klnds/socklnd/socklnd_cb.c | 243 +++++++++++++----------------- lnet/libcfs/module.c | 4 +- lnet/utils/portals.c | 16 +- lustre/portals/include/linux/kp30.h | 10 +- lustre/portals/knals/socknal/socknal.c | 114 +++++--------- lustre/portals/knals/socknal/socknal.h | 30 +--- lustre/portals/knals/socknal/socknal_cb.c | 243 +++++++++++++----------------- lustre/portals/libcfs/module.c | 4 +- lustre/portals/utils/portals.c | 16 +- 12 files changed, 346 insertions(+), 488 deletions(-) diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 6fd1207..909ecc0 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -283,13 +283,13 @@ do { \ #define GFP_MEMALLOC 0 #endif -#define PORTAL_ALLOC(ptr, size) \ +#define PORTAL_ALLOC_GFP(ptr, size, mask) \ do { \ LASSERT (!in_interrupt()); \ if ((size) > PORTAL_VMALLOC_SIZE) \ (ptr) = vmalloc(size); \ else \ - (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC)); \ + (ptr) = kmalloc((size), (mask)); \ if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ @@ -303,6 +303,12 @@ do { \ (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) +#define PORTAL_ALLOC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC)) + +#define PORTAL_ALLOC_ATOMIC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC)) + #define PORTAL_FREE(ptr, size) \ do { \ int s = (size); \ diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 6f6fa7e..6de511c 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1395,30 +1395,35 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) } void -ksocknal_free_buffers (void) +ksocknal_free_fmbs (ksock_fmb_pool_t *p) { - if (ksocknal_data.ksnd_fmbs != NULL) { - ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; - i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); - i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ksocknal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); + ksock_fmb_t *fmb; + int i; + + LASSERT (list_empty(&p->fmp_blocked_conns)); + LASSERT (p->fmp_nactive_fmbs == 0); + + while (!list_empty(&p->fmp_idle_fmbs)) { + + fmb = list_entry(p->fmp_idle_fmbs.next, + ksock_fmb_t, fmb_list); + + for (i = 0; i < fmb->fmb_npages; i++) + if (fmb->fmb_pages[i] != NULL) + __free_page(fmb->fmb_pages[i]); + + list_del(&fmb->fmb_list); + PORTAL_FREE(fmb, sizeof(*fmb)); } +} + +void +ksocknal_free_buffers (void) +{ + ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp); + ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp); - LASSERT (ksocknal_data.ksnd_active_ltxs == 0); - if (ksocknal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ksocknal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + - SOCKNAL_NNBLK_LTXS)); + LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0); if (ksocknal_data.ksnd_schedulers != NULL) PORTAL_FREE (ksocknal_data.ksnd_schedulers, @@ -1572,7 +1577,7 @@ ksocknal_module_init (void) PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); if (ksocknal_data.ksnd_peers == NULL) - RETURN (-ENOMEM); + return (-ENOMEM); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); @@ -1590,11 +1595,6 @@ ksocknal_module_init (void) INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); - spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq); - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); @@ -1614,7 +1614,7 @@ ksocknal_module_init (void) sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { ksocknal_module_fini (); - RETURN(-ENOMEM); + return (-ENOMEM); } for (i = 0; i < SOCKNAL_N_SCHED; i++) { @@ -1629,35 +1629,11 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t), - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - PORTAL_ALLOC(ksocknal_data.ksnd_ltxs, - sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS)); - if (ksocknal_data.ksnd_ltxs == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ksocknal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i]; - - ltx->ltx_tx.tx_hdr = <x->ltx_hdr; - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ksocknal_data.ksnd_idle_ltx_list : - &ksocknal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); if (rc != 0) { CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } PtlNIDebug(ksocknal_ni, ~0); @@ -1670,7 +1646,7 @@ ksocknal_module_init (void) CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1679,7 +1655,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1687,7 +1663,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } rc = kpr_register(&ksocknal_data.ksnd_router, @@ -1698,23 +1674,15 @@ ksocknal_module_init (void) } else { /* Only allocate forwarding buffers if I'm on a gateway */ - PORTAL_ALLOC(ksocknal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - if (ksocknal_data.ksnd_fmbs == NULL) { - ksocknal_module_fini (); - RETURN(-ENOMEM); - } - - /* NULL out buffer pointers etc */ - memset(ksocknal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb = - &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i]; + ksock_fmb_t *fmb; + + PORTAL_ALLOC(fmb, sizeof(*fmb)); + if (fmb == NULL) { + ksocknal_module_fini(); + return (-ENOMEM); + } if (i < SOCKNAL_SMALL_FWD_NMSGS) { fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; @@ -1724,7 +1692,6 @@ ksocknal_module_init (void) fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; } - LASSERT (fmb->fmb_npages > 0); for (j = 0; j < fmb->fmb_npages; j++) { fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); @@ -1733,8 +1700,7 @@ ksocknal_module_init (void) return (-ENOMEM); } - LASSERT(page_address (fmb->fmb_pages[j]) != - NULL); + LASSERT(page_address(fmb->fmb_pages[j]) != NULL); } list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 227a24f..9dbe415 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -82,9 +82,6 @@ #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - #define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ #define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ @@ -113,8 +110,9 @@ typedef struct /* pool of forwarding buffers */ { spinlock_t fmp_lock; /* serialise */ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ + struct list_head fmp_idle_fmbs; /* free buffers */ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ + int fmp_nactive_fmbs; /* # buffers in use */ } ksock_fmb_pool_t; @@ -164,16 +162,10 @@ typedef struct { kpr_router_t ksnd_router; /* THE router */ - void *ksnd_fmbs; /* all the pre-allocated FMBs */ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - int ksnd_active_ltxs; /* #active ltxs */ + atomic_t ksnd_nactive_ltxs; /* #active ltxs */ struct list_head ksnd_deathrow_conns; /* conns to be closed */ struct list_head ksnd_zombie_conns; /* conns to be freed */ @@ -233,25 +225,15 @@ typedef struct /* transmit packet */ #define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) /* network zero copy callback descriptor embedded in ksock_tx_t */ -/* space for the tx frag descriptors: hdr is always 1 iovec - * and payload is PTL_MD_MAX of either type. */ -typedef struct -{ - struct iovec hdr; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } payload; -} ksock_txiovspace_t; - typedef struct /* locally transmitted packet */ { ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ void *ltx_private; /* lib_finalize() callback arg */ void *ltx_cookie; /* lib_finalize() callback arg */ - ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */ ptl_hdr_t ltx_hdr; /* buffer for packet header */ + int ltx_desc_size; /* bytes allocated for this desc */ + struct iovec ltx_iov[1]; /* iov for hdr + payload */ + ptl_kiov_t ltx_kiov[0]; /* kiov for payload */ } ksock_ltx_t; #define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 46b643f..44f8982 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -129,60 +129,11 @@ ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) return 0; } -ksock_ltx_t * -ksocknal_get_ltx (int may_block) -{ - unsigned long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) { - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - break; - } - - if (!may_block) { - if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - } - break; - } - - spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock, - flags); - - wait_event (ksocknal_data.ksnd_idle_ltx_waitq, - !list_empty (&ksocknal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - return (ltx); -} - void -ksocknal_put_ltx (ksock_ltx_t *ltx) +ksocknal_free_ltx (ksock_ltx_t *ltx) { - unsigned long flags; - - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - ksocknal_data.ksnd_active_ltxs--; - list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list) - wake_up (&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); + atomic_dec(&ksocknal_data.ksnd_nactive_ltxs); + PORTAL_FREE(ltx, ltx->ltx_desc_size); } #if SOCKNAL_ZC @@ -364,7 +315,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) } int -ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { /* Return 0 on success, < 0 on error. * caller checks tx_resid to determine progress/completion */ @@ -551,7 +502,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) } int -ksocknal_recvmsg (ksock_conn_t *conn) +ksocknal_receive (ksock_conn_t *conn) { /* Return 1 on success, 0 on EOF, < 0 on error. * Caller checks ksnc_rx_nob_wanted to determine @@ -659,7 +610,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch) lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx (ltx); EXIT; } @@ -690,7 +641,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { int rc; - rc = ksocknal_sendmsg (conn, tx); + rc = ksocknal_transmit (conn, tx); CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); LASSERT (rc != -EAGAIN); @@ -1021,115 +972,125 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) return (-EHOSTUNREACH); } -ksock_ltx_t * -ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type) +int +ksocknal_sendmsg(nal_cb_t *nal, + void *private, + lib_msg_t *cookie, + ptl_hdr_t *hdr, + int type, + ptl_nid_t nid, + ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + ptl_kiov_t *payload_kiov, + size_t payload_nob) { ksock_ltx_t *ltx; + int desc_size; + int rc; + + /* NB 'private' is different depending on what we're sending. + * Just ignore it... */ - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); + CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64 + " pid %d\n", payload_nob, payload_niov, nid , pid); + + LASSERT (payload_nob == 0 || payload_niov > 0); + LASSERT (payload_niov <= PTL_MD_MAX_IOV); + + /* It must be OK to kmap() if required */ + LASSERT (payload_kiov == NULL || !in_interrupt ()); + /* payload is either all vaddrs or all pages */ + LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); + + if (payload_iov != NULL) + desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]); + else + desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]); + + if (in_interrupt() || + type == PTL_MSG_ACK || + type == PTL_MSG_REPLY) { + /* Can't block if in interrupt or responding to an incoming + * message */ + PORTAL_ALLOC_ATOMIC(ltx, desc_size); + } else { + PORTAL_ALLOC(ltx, desc_size); + } + if (ltx == NULL) { - CERROR ("Can't allocate tx desc\n"); - return (NULL); + CERROR("Can't allocate tx desc type %d size %d %s\n", + type, desc_size, in_interrupt() ? "(intr)" : ""); + return (PTL_NOSPACE); } - /* Init local send packet (storage for hdr, finalize() args) */ + atomic_inc(&ksocknal_data.ksnd_nactive_ltxs); + + ltx->ltx_desc_size = desc_size; + + /* We always have 1 mapped frag for the header */ + ltx->ltx_tx.tx_iov = ltx->ltx_iov; + ltx->ltx_iov[0].iov_base = <x->ltx_hdr; + ltx->ltx_iov[0].iov_len = sizeof(*hdr); ltx->ltx_hdr = *hdr; + ltx->ltx_private = private; ltx->ltx_cookie = cookie; - /* Init common ltx_tx */ ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr); + ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob; - /* We always have 1 mapped frag for the header */ - ltx->ltx_tx.tx_niov = 1; - ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr; - ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - ltx->ltx_tx.tx_kiov = NULL; - ltx->ltx_tx.tx_nkiov = 0; - - return (ltx); -} - -int -ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_len) -{ - ksock_ltx_t *ltx; - int rc; + if (payload_iov != NULL) { + /* payload is all mapped */ + ltx->ltx_tx.tx_kiov = NULL; + ltx->ltx_tx.tx_nkiov = 0; - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it - */ + ltx->ltx_tx.tx_niov = 1 + payload_niov; - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64 - " pid %d\n", payload_len, payload_niov, nid, pid); + memcpy(ltx->ltx_iov + 1, payload_iov, + payload_niov * sizeof (*payload_iov)); - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); + } else { + /* payload is all pages */ + ltx->ltx_tx.tx_kiov = ltx->ltx_kiov; + ltx->ltx_tx.tx_nkiov = payload_niov; - /* append the payload_iovs to the one pointing at the header */ - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); + ltx->ltx_tx.tx_niov = 1; - memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; + memcpy(ltx->ltx_kiov, payload_kiov, + payload_niov * sizeof (*payload_kiov)); + } - rc = ksocknal_launch_packet (<x->ltx_tx, nid); + rc = ksocknal_launch_packet(<x->ltx_tx, nid); if (rc == 0) return (PTL_OK); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx(ltx); return (PTL_FAIL); } int +ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_len) +{ + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, payload_iov, NULL, + payload_len)); +} + +int ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len) + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_len) { - ksock_ltx_t *ltx; - int rc; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n", - payload_len, payload_niov, nid, pid); - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); - - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov; - memcpy (ltx->ltx_tx.tx_kiov, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_nkiov = payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc == 0) - return (PTL_OK); - - ksocknal_put_ltx (ltx); - return (PTL_FAIL); + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, NULL, payload_kiov, + payload_len)); } void @@ -1201,6 +1162,7 @@ ksocknal_fmb_callback (void *arg, int error) spin_lock_irqsave (&fmp->fmp_lock, flags); list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); + fmp->fmp_nactive_fmbs--; if (!list_empty (&fmp->fmp_blocked_conns)) { conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, @@ -1239,7 +1201,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) ksock_fmb_t *fmb; LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (ksocknal_data.ksnd_fmbs != NULL); + LASSERT (kpr_routing(&ksocknal_data.ksnd_router)); if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) pool = &ksocknal_data.ksnd_small_fmp; @@ -1252,6 +1214,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) fmb = list_entry(pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); list_del (&fmb->fmb_list); + pool->fmp_nactive_fmbs++; spin_unlock_irqrestore (&pool->fmp_lock, flags); return (fmb); @@ -1394,7 +1357,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) return; } - if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */ + if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */ CERROR("dropping packet from "LPX64" (%s) for "LPX64 " (%s): not forwarding\n", src_nid, portals_nid2str(TCPNAL, src_nid, str), @@ -1522,7 +1485,7 @@ ksocknal_process_receive (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_nob_wanted > 0); - rc = ksocknal_recvmsg(conn); + rc = ksocknal_receive(conn); if (rc <= 0) { LASSERT (rc != -EAGAIN); diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index dad710d..55e1935 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -143,7 +143,7 @@ kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flag if (npages == 0) return 0; - + level0p = &pdu->pdu_memhog_root_page; *level0p = alloc_page(flags); if (*level0p == NULL) @@ -665,7 +665,7 @@ static int kportal_ioctl(struct inode *inode, struct file *file, kportal_memhog_free(file->private_data); err = kportal_memhog_alloc(file->private_data, data->ioc_count, - GFP_NOFS); + data->ioc_flags); if (err != 0) kportal_memhog_free(file->private_data); } diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index aa9aa93..b46ee16 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -1691,13 +1691,15 @@ jt_ptl_lwt(int argc, char **argv) int jt_ptl_memhog(int argc, char **argv) { + static int gfp = 0; /* sticky! */ + struct portal_ioctl_data data; int rc; int count; char *end; - if (argc != 2) { - fprintf(stderr, "usage: %s \n", argv[0]); + if (argc < 2) { + fprintf(stderr, "usage: %s []\n", argv[0]); return 0; } @@ -1707,8 +1709,18 @@ int jt_ptl_memhog(int argc, char **argv) return -1; } + if (argc >= 3) { + rc = strtol(argv[2], &end, 0); + if (*end != 0) { + fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]); + return -1; + } + gfp = rc; + } + PORTAL_IOC_INIT(data); data.ioc_count = count; + data.ioc_flags = gfp; rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data); if (rc != 0) { diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 6fd1207..909ecc0 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -283,13 +283,13 @@ do { \ #define GFP_MEMALLOC 0 #endif -#define PORTAL_ALLOC(ptr, size) \ +#define PORTAL_ALLOC_GFP(ptr, size, mask) \ do { \ LASSERT (!in_interrupt()); \ if ((size) > PORTAL_VMALLOC_SIZE) \ (ptr) = vmalloc(size); \ else \ - (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC)); \ + (ptr) = kmalloc((size), (mask)); \ if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ @@ -303,6 +303,12 @@ do { \ (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) +#define PORTAL_ALLOC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC)) + +#define PORTAL_ALLOC_ATOMIC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC)) + #define PORTAL_FREE(ptr, size) \ do { \ int s = (size); \ diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index 6f6fa7e..6de511c 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -1395,30 +1395,35 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) } void -ksocknal_free_buffers (void) +ksocknal_free_fmbs (ksock_fmb_pool_t *p) { - if (ksocknal_data.ksnd_fmbs != NULL) { - ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; - i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); - i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ksocknal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); + ksock_fmb_t *fmb; + int i; + + LASSERT (list_empty(&p->fmp_blocked_conns)); + LASSERT (p->fmp_nactive_fmbs == 0); + + while (!list_empty(&p->fmp_idle_fmbs)) { + + fmb = list_entry(p->fmp_idle_fmbs.next, + ksock_fmb_t, fmb_list); + + for (i = 0; i < fmb->fmb_npages; i++) + if (fmb->fmb_pages[i] != NULL) + __free_page(fmb->fmb_pages[i]); + + list_del(&fmb->fmb_list); + PORTAL_FREE(fmb, sizeof(*fmb)); } +} + +void +ksocknal_free_buffers (void) +{ + ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp); + ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp); - LASSERT (ksocknal_data.ksnd_active_ltxs == 0); - if (ksocknal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ksocknal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + - SOCKNAL_NNBLK_LTXS)); + LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0); if (ksocknal_data.ksnd_schedulers != NULL) PORTAL_FREE (ksocknal_data.ksnd_schedulers, @@ -1572,7 +1577,7 @@ ksocknal_module_init (void) PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); if (ksocknal_data.ksnd_peers == NULL) - RETURN (-ENOMEM); + return (-ENOMEM); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); @@ -1590,11 +1595,6 @@ ksocknal_module_init (void) INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); - spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq); - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); @@ -1614,7 +1614,7 @@ ksocknal_module_init (void) sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { ksocknal_module_fini (); - RETURN(-ENOMEM); + return (-ENOMEM); } for (i = 0; i < SOCKNAL_N_SCHED; i++) { @@ -1629,35 +1629,11 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t), - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - PORTAL_ALLOC(ksocknal_data.ksnd_ltxs, - sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS)); - if (ksocknal_data.ksnd_ltxs == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ksocknal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i]; - - ltx->ltx_tx.tx_hdr = <x->ltx_hdr; - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ksocknal_data.ksnd_idle_ltx_list : - &ksocknal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); if (rc != 0) { CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } PtlNIDebug(ksocknal_ni, ~0); @@ -1670,7 +1646,7 @@ ksocknal_module_init (void) CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1679,7 +1655,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1687,7 +1663,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } rc = kpr_register(&ksocknal_data.ksnd_router, @@ -1698,23 +1674,15 @@ ksocknal_module_init (void) } else { /* Only allocate forwarding buffers if I'm on a gateway */ - PORTAL_ALLOC(ksocknal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - if (ksocknal_data.ksnd_fmbs == NULL) { - ksocknal_module_fini (); - RETURN(-ENOMEM); - } - - /* NULL out buffer pointers etc */ - memset(ksocknal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb = - &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i]; + ksock_fmb_t *fmb; + + PORTAL_ALLOC(fmb, sizeof(*fmb)); + if (fmb == NULL) { + ksocknal_module_fini(); + return (-ENOMEM); + } if (i < SOCKNAL_SMALL_FWD_NMSGS) { fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; @@ -1724,7 +1692,6 @@ ksocknal_module_init (void) fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; } - LASSERT (fmb->fmb_npages > 0); for (j = 0; j < fmb->fmb_npages; j++) { fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); @@ -1733,8 +1700,7 @@ ksocknal_module_init (void) return (-ENOMEM); } - LASSERT(page_address (fmb->fmb_pages[j]) != - NULL); + LASSERT(page_address(fmb->fmb_pages[j]) != NULL); } list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index 227a24f..9dbe415 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -82,9 +82,6 @@ #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - #define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ #define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ @@ -113,8 +110,9 @@ typedef struct /* pool of forwarding buffers */ { spinlock_t fmp_lock; /* serialise */ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ + struct list_head fmp_idle_fmbs; /* free buffers */ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ + int fmp_nactive_fmbs; /* # buffers in use */ } ksock_fmb_pool_t; @@ -164,16 +162,10 @@ typedef struct { kpr_router_t ksnd_router; /* THE router */ - void *ksnd_fmbs; /* all the pre-allocated FMBs */ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - int ksnd_active_ltxs; /* #active ltxs */ + atomic_t ksnd_nactive_ltxs; /* #active ltxs */ struct list_head ksnd_deathrow_conns; /* conns to be closed */ struct list_head ksnd_zombie_conns; /* conns to be freed */ @@ -233,25 +225,15 @@ typedef struct /* transmit packet */ #define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) /* network zero copy callback descriptor embedded in ksock_tx_t */ -/* space for the tx frag descriptors: hdr is always 1 iovec - * and payload is PTL_MD_MAX of either type. */ -typedef struct -{ - struct iovec hdr; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } payload; -} ksock_txiovspace_t; - typedef struct /* locally transmitted packet */ { ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ void *ltx_private; /* lib_finalize() callback arg */ void *ltx_cookie; /* lib_finalize() callback arg */ - ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */ ptl_hdr_t ltx_hdr; /* buffer for packet header */ + int ltx_desc_size; /* bytes allocated for this desc */ + struct iovec ltx_iov[1]; /* iov for hdr + payload */ + ptl_kiov_t ltx_kiov[0]; /* kiov for payload */ } ksock_ltx_t; #define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c index 46b643f..44f8982 100644 --- a/lustre/portals/knals/socknal/socknal_cb.c +++ b/lustre/portals/knals/socknal/socknal_cb.c @@ -129,60 +129,11 @@ ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) return 0; } -ksock_ltx_t * -ksocknal_get_ltx (int may_block) -{ - unsigned long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) { - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - break; - } - - if (!may_block) { - if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - } - break; - } - - spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock, - flags); - - wait_event (ksocknal_data.ksnd_idle_ltx_waitq, - !list_empty (&ksocknal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - return (ltx); -} - void -ksocknal_put_ltx (ksock_ltx_t *ltx) +ksocknal_free_ltx (ksock_ltx_t *ltx) { - unsigned long flags; - - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - ksocknal_data.ksnd_active_ltxs--; - list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list) - wake_up (&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); + atomic_dec(&ksocknal_data.ksnd_nactive_ltxs); + PORTAL_FREE(ltx, ltx->ltx_desc_size); } #if SOCKNAL_ZC @@ -364,7 +315,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) } int -ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { /* Return 0 on success, < 0 on error. * caller checks tx_resid to determine progress/completion */ @@ -551,7 +502,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) } int -ksocknal_recvmsg (ksock_conn_t *conn) +ksocknal_receive (ksock_conn_t *conn) { /* Return 1 on success, 0 on EOF, < 0 on error. * Caller checks ksnc_rx_nob_wanted to determine @@ -659,7 +610,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch) lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx (ltx); EXIT; } @@ -690,7 +641,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { int rc; - rc = ksocknal_sendmsg (conn, tx); + rc = ksocknal_transmit (conn, tx); CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); LASSERT (rc != -EAGAIN); @@ -1021,115 +972,125 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) return (-EHOSTUNREACH); } -ksock_ltx_t * -ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type) +int +ksocknal_sendmsg(nal_cb_t *nal, + void *private, + lib_msg_t *cookie, + ptl_hdr_t *hdr, + int type, + ptl_nid_t nid, + ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + ptl_kiov_t *payload_kiov, + size_t payload_nob) { ksock_ltx_t *ltx; + int desc_size; + int rc; + + /* NB 'private' is different depending on what we're sending. + * Just ignore it... */ - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); + CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64 + " pid %d\n", payload_nob, payload_niov, nid , pid); + + LASSERT (payload_nob == 0 || payload_niov > 0); + LASSERT (payload_niov <= PTL_MD_MAX_IOV); + + /* It must be OK to kmap() if required */ + LASSERT (payload_kiov == NULL || !in_interrupt ()); + /* payload is either all vaddrs or all pages */ + LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); + + if (payload_iov != NULL) + desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]); + else + desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]); + + if (in_interrupt() || + type == PTL_MSG_ACK || + type == PTL_MSG_REPLY) { + /* Can't block if in interrupt or responding to an incoming + * message */ + PORTAL_ALLOC_ATOMIC(ltx, desc_size); + } else { + PORTAL_ALLOC(ltx, desc_size); + } + if (ltx == NULL) { - CERROR ("Can't allocate tx desc\n"); - return (NULL); + CERROR("Can't allocate tx desc type %d size %d %s\n", + type, desc_size, in_interrupt() ? "(intr)" : ""); + return (PTL_NOSPACE); } - /* Init local send packet (storage for hdr, finalize() args) */ + atomic_inc(&ksocknal_data.ksnd_nactive_ltxs); + + ltx->ltx_desc_size = desc_size; + + /* We always have 1 mapped frag for the header */ + ltx->ltx_tx.tx_iov = ltx->ltx_iov; + ltx->ltx_iov[0].iov_base = <x->ltx_hdr; + ltx->ltx_iov[0].iov_len = sizeof(*hdr); ltx->ltx_hdr = *hdr; + ltx->ltx_private = private; ltx->ltx_cookie = cookie; - /* Init common ltx_tx */ ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr); + ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob; - /* We always have 1 mapped frag for the header */ - ltx->ltx_tx.tx_niov = 1; - ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr; - ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - ltx->ltx_tx.tx_kiov = NULL; - ltx->ltx_tx.tx_nkiov = 0; - - return (ltx); -} - -int -ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_len) -{ - ksock_ltx_t *ltx; - int rc; + if (payload_iov != NULL) { + /* payload is all mapped */ + ltx->ltx_tx.tx_kiov = NULL; + ltx->ltx_tx.tx_nkiov = 0; - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it - */ + ltx->ltx_tx.tx_niov = 1 + payload_niov; - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64 - " pid %d\n", payload_len, payload_niov, nid, pid); + memcpy(ltx->ltx_iov + 1, payload_iov, + payload_niov * sizeof (*payload_iov)); - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); + } else { + /* payload is all pages */ + ltx->ltx_tx.tx_kiov = ltx->ltx_kiov; + ltx->ltx_tx.tx_nkiov = payload_niov; - /* append the payload_iovs to the one pointing at the header */ - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); + ltx->ltx_tx.tx_niov = 1; - memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; + memcpy(ltx->ltx_kiov, payload_kiov, + payload_niov * sizeof (*payload_kiov)); + } - rc = ksocknal_launch_packet (<x->ltx_tx, nid); + rc = ksocknal_launch_packet(<x->ltx_tx, nid); if (rc == 0) return (PTL_OK); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx(ltx); return (PTL_FAIL); } int +ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_len) +{ + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, payload_iov, NULL, + payload_len)); +} + +int ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len) + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_len) { - ksock_ltx_t *ltx; - int rc; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n", - payload_len, payload_niov, nid, pid); - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); - - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov; - memcpy (ltx->ltx_tx.tx_kiov, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_nkiov = payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc == 0) - return (PTL_OK); - - ksocknal_put_ltx (ltx); - return (PTL_FAIL); + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, NULL, payload_kiov, + payload_len)); } void @@ -1201,6 +1162,7 @@ ksocknal_fmb_callback (void *arg, int error) spin_lock_irqsave (&fmp->fmp_lock, flags); list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); + fmp->fmp_nactive_fmbs--; if (!list_empty (&fmp->fmp_blocked_conns)) { conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, @@ -1239,7 +1201,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) ksock_fmb_t *fmb; LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (ksocknal_data.ksnd_fmbs != NULL); + LASSERT (kpr_routing(&ksocknal_data.ksnd_router)); if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) pool = &ksocknal_data.ksnd_small_fmp; @@ -1252,6 +1214,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) fmb = list_entry(pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); list_del (&fmb->fmb_list); + pool->fmp_nactive_fmbs++; spin_unlock_irqrestore (&pool->fmp_lock, flags); return (fmb); @@ -1394,7 +1357,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) return; } - if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */ + if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */ CERROR("dropping packet from "LPX64" (%s) for "LPX64 " (%s): not forwarding\n", src_nid, portals_nid2str(TCPNAL, src_nid, str), @@ -1522,7 +1485,7 @@ ksocknal_process_receive (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_nob_wanted > 0); - rc = ksocknal_recvmsg(conn); + rc = ksocknal_receive(conn); if (rc <= 0) { LASSERT (rc != -EAGAIN); diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c index dad710d..55e1935 100644 --- a/lustre/portals/libcfs/module.c +++ b/lustre/portals/libcfs/module.c @@ -143,7 +143,7 @@ kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flag if (npages == 0) return 0; - + level0p = &pdu->pdu_memhog_root_page; *level0p = alloc_page(flags); if (*level0p == NULL) @@ -665,7 +665,7 @@ static int kportal_ioctl(struct inode *inode, struct file *file, kportal_memhog_free(file->private_data); err = kportal_memhog_alloc(file->private_data, data->ioc_count, - GFP_NOFS); + data->ioc_flags); if (err != 0) kportal_memhog_free(file->private_data); } diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index aa9aa93..b46ee16 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -1691,13 +1691,15 @@ jt_ptl_lwt(int argc, char **argv) int jt_ptl_memhog(int argc, char **argv) { + static int gfp = 0; /* sticky! */ + struct portal_ioctl_data data; int rc; int count; char *end; - if (argc != 2) { - fprintf(stderr, "usage: %s \n", argv[0]); + if (argc < 2) { + fprintf(stderr, "usage: %s []\n", argv[0]); return 0; } @@ -1707,8 +1709,18 @@ int jt_ptl_memhog(int argc, char **argv) return -1; } + if (argc >= 3) { + rc = strtol(argv[2], &end, 0); + if (*end != 0) { + fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]); + return -1; + } + gfp = rc; + } + PORTAL_IOC_INIT(data); data.ioc_count = count; + data.ioc_flags = gfp; rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data); if (rc != 0) { -- 1.8.3.1