From 377e4cffe48720ddfa852bce7a14a10b71da4189 Mon Sep 17 00:00:00 2001 From: eeb Date: Thu, 4 Dec 2003 11:06:57 +0000 Subject: [PATCH] * Merged HEAD * "clobbered" (as Phil would say) kernel_patches/patches --- lnet/include/linux/kp30.h | 37 ++- lnet/include/lnet/lib-lnet.h | 20 +- lnet/include/lnet/lib-p30.h | 20 +- lnet/include/lnet/lnetctl.h | 1 + lnet/include/lnet/ptlctl.h | 1 + lnet/klnds/socklnd/socklnd.c | 114 +++----- lnet/klnds/socklnd/socklnd.h | 30 +-- lnet/klnds/socklnd/socklnd_cb.c | 291 +++++++++------------ lnet/libcfs/module.c | 150 ++++++++++- lnet/utils/portals.c | 84 +++++- .../kernel_patches/patches/2.6.0-test6-mm4.patch | 46 ++-- .../patches/gfp_memalloc-2.4.18-chaos.patch | 23 +- .../patches/gfp_memalloc-2.4.20-rh.patch | 11 + .../patches/gfp_memalloc-2.4.22.patch | 32 +-- .../patches/linux-2.4.22-xattr-0.8.54.patch | 160 +++++------ lustre/kernel_patches/series/rh-2.4.22 | 1 + lustre/kernel_patches/series/vanilla-2.4.19-pre1 | 1 + lustre/kernel_patches/series/vanilla-2.4.22 | 1 + lustre/portals/include/linux/kp30.h | 37 ++- lustre/portals/include/portals/lib-p30.h | 20 +- lustre/portals/include/portals/ptlctl.h | 1 + lustre/portals/knals/socknal/socknal.c | 114 +++----- lustre/portals/knals/socknal/socknal.h | 30 +-- lustre/portals/knals/socknal/socknal_cb.c | 291 +++++++++------------ lustre/portals/libcfs/module.c | 150 ++++++++++- lustre/portals/utils/portals.c | 84 +++++- 26 files changed, 1038 insertions(+), 712 deletions(-) diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 3e6d5e3..3d60631 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -115,7 +115,7 @@ do { \ if (portal_cerror == 0) \ break; \ CHECK_STACK(CDEBUG_STACK); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \ + if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ (portal_debug & (mask) && \ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ @@ -283,17 +283,19 @@ do { \ #define GFP_MEMALLOC 0 #endif -#define PORTAL_ALLOC(ptr, size) \ +#define PORTAL_ALLOC_GFP(ptr, size, mask) \ do { \ LASSERT (!in_interrupt()); \ if ((size) > PORTAL_VMALLOC_SIZE) \ (ptr) = vmalloc(size); \ else \ - (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC)); \ - if ((ptr) == NULL) \ + (ptr) = kmalloc((size), (mask)); \ + if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ - else { \ + CERROR("PORTALS: %d total bytes allocated by portals\n", \ + atomic_read(&portal_kmemory)); \ + } else { \ portal_kmem_inc((ptr), (size)); \ memset((ptr), 0, (size)); \ } \ @@ -301,6 +303,12 @@ do { \ (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) +#define PORTAL_ALLOC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC)) + +#define PORTAL_ALLOC_ATOMIC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC)) + #define PORTAL_FREE(ptr, size) \ do { \ int s = (size); \ @@ -330,11 +338,13 @@ do { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ " '" #ptr "' from slab '" #slab "')\n", __FILE__, \ __LINE__); \ + CERROR("PORTALS: %d total bytes allocated by portals\n", \ + atomic_read(&portal_kmemory)); \ } else { \ portal_kmem_inc((ptr), (size)); \ memset((ptr), 0, (size)); \ } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ (int)(size), (ptr), atomic_read(&portal_kmemory)); \ } while (0) @@ -690,7 +700,10 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); /******************************************************************************/ /* Light-weight trace * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 1 +#define LWT_SUPPORT 0 + +#define LWT_MEMORY (64<<20) +#define LWT_MAX_CPUS 4 typedef struct { cycles_t lwte_when; @@ -728,7 +741,7 @@ extern void lwt_fini (void); extern int lwt_lookup_string (int *size, char *knlptr, char *usrptr, int usrsize); extern int lwt_control (int enable, int clear); -extern int lwt_snapshot (int *ncpu, int *total_size, +extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, void *user_ptr, int user_size); /* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. @@ -775,6 +788,11 @@ do { \ #endif /* __KERNEL__ */ #endif /* LWT_SUPPORT */ +struct portals_device_userstate +{ + int pdu_memhog_pages; + struct page *pdu_memhog_root_page; +}; #include @@ -1044,7 +1062,8 @@ static inline int portal_ioctl_getdata(char *buf, char *end, void *arg) #define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long) #define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long) #define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long) -#define IOC_PORTAL_MAX_NR 41 +#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long) +#define IOC_PORTAL_MAX_NR 42 enum { QSWNAL = 1, diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 55fd720..c402828 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -168,7 +168,8 @@ static inline lib_eq_t * lib_eq_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); + lib_eq_t *eq; + PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq)); if (eq == NULL) return (NULL); @@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) { /* ALWAYS called with statelock held */ atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); + PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq)); } static inline lib_md_t * lib_md_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); + lib_md_t *md; + PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md)); if (md == NULL) return (NULL); @@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md) { /* ALWAYS called with statelock held */ atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); + PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md)); } static inline lib_me_t * lib_me_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); + lib_me_t *me; + PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me)); if (me == NULL) return (NULL); @@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me) { /* ALWAYS called with statelock held */ atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); + PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me)); } static inline lib_msg_t * lib_msg_alloc(nal_cb_t *nal) { /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); + lib_msg_t *msg; + PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg)); if (msg == NULL) return (NULL); @@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) { /* ALWAYS called with statelock held */ atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); + PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg)); } #endif diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index 55fd720..c402828 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -168,7 +168,8 @@ static inline lib_eq_t * lib_eq_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); + lib_eq_t *eq; + PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq)); if (eq == NULL) return (NULL); @@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) { /* ALWAYS called with statelock held */ atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); + PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq)); } static inline lib_md_t * lib_md_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); + lib_md_t *md; + PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md)); if (md == NULL) return (NULL); @@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md) { /* ALWAYS called with statelock held */ atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); + PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md)); } static inline lib_me_t * lib_me_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); + lib_me_t *me; + PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me)); if (me == NULL) return (NULL); @@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me) { /* ALWAYS called with statelock held */ atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); + PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me)); } static inline lib_msg_t * lib_msg_alloc(nal_cb_t *nal) { /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); + lib_msg_t *msg; + PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg)); if (msg == NULL) return (NULL); @@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) { /* ALWAYS called with statelock held */ atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); + PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg)); } #endif diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index f581e72..12ef47a 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv); int jt_ptl_print_routes (int argc, char **argv); int jt_ptl_fail_nid (int argc, char **argv); int jt_ptl_lwt(int argc, char **argv); +int jt_ptl_memhog(int argc, char **argv); int dbg_initialize(int argc, char **argv); int jt_dbg_filter(int argc, char **argv); diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index f581e72..12ef47a 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv); int jt_ptl_print_routes (int argc, char **argv); int jt_ptl_fail_nid (int argc, char **argv); int jt_ptl_lwt(int argc, char **argv); +int jt_ptl_memhog(int argc, char **argv); int dbg_initialize(int argc, char **argv); int jt_dbg_filter(int argc, char **argv); diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 6f6fa7e..6de511c 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1395,30 +1395,35 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) } void -ksocknal_free_buffers (void) +ksocknal_free_fmbs (ksock_fmb_pool_t *p) { - if (ksocknal_data.ksnd_fmbs != NULL) { - ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; - i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); - i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ksocknal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); + ksock_fmb_t *fmb; + int i; + + LASSERT (list_empty(&p->fmp_blocked_conns)); + LASSERT (p->fmp_nactive_fmbs == 0); + + while (!list_empty(&p->fmp_idle_fmbs)) { + + fmb = list_entry(p->fmp_idle_fmbs.next, + ksock_fmb_t, fmb_list); + + for (i = 0; i < fmb->fmb_npages; i++) + if (fmb->fmb_pages[i] != NULL) + __free_page(fmb->fmb_pages[i]); + + list_del(&fmb->fmb_list); + PORTAL_FREE(fmb, sizeof(*fmb)); } +} + +void +ksocknal_free_buffers (void) +{ + ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp); + ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp); - LASSERT (ksocknal_data.ksnd_active_ltxs == 0); - if (ksocknal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ksocknal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + - SOCKNAL_NNBLK_LTXS)); + LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0); if (ksocknal_data.ksnd_schedulers != NULL) PORTAL_FREE (ksocknal_data.ksnd_schedulers, @@ -1572,7 +1577,7 @@ ksocknal_module_init (void) PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); if (ksocknal_data.ksnd_peers == NULL) - RETURN (-ENOMEM); + return (-ENOMEM); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); @@ -1590,11 +1595,6 @@ ksocknal_module_init (void) INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); - spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq); - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); @@ -1614,7 +1614,7 @@ ksocknal_module_init (void) sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { ksocknal_module_fini (); - RETURN(-ENOMEM); + return (-ENOMEM); } for (i = 0; i < SOCKNAL_N_SCHED; i++) { @@ -1629,35 +1629,11 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t), - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - PORTAL_ALLOC(ksocknal_data.ksnd_ltxs, - sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS)); - if (ksocknal_data.ksnd_ltxs == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ksocknal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i]; - - ltx->ltx_tx.tx_hdr = <x->ltx_hdr; - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ksocknal_data.ksnd_idle_ltx_list : - &ksocknal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); if (rc != 0) { CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } PtlNIDebug(ksocknal_ni, ~0); @@ -1670,7 +1646,7 @@ ksocknal_module_init (void) CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1679,7 +1655,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1687,7 +1663,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } rc = kpr_register(&ksocknal_data.ksnd_router, @@ -1698,23 +1674,15 @@ ksocknal_module_init (void) } else { /* Only allocate forwarding buffers if I'm on a gateway */ - PORTAL_ALLOC(ksocknal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - if (ksocknal_data.ksnd_fmbs == NULL) { - ksocknal_module_fini (); - RETURN(-ENOMEM); - } - - /* NULL out buffer pointers etc */ - memset(ksocknal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb = - &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i]; + ksock_fmb_t *fmb; + + PORTAL_ALLOC(fmb, sizeof(*fmb)); + if (fmb == NULL) { + ksocknal_module_fini(); + return (-ENOMEM); + } if (i < SOCKNAL_SMALL_FWD_NMSGS) { fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; @@ -1724,7 +1692,6 @@ ksocknal_module_init (void) fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; } - LASSERT (fmb->fmb_npages > 0); for (j = 0; j < fmb->fmb_npages; j++) { fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); @@ -1733,8 +1700,7 @@ ksocknal_module_init (void) return (-ENOMEM); } - LASSERT(page_address (fmb->fmb_pages[j]) != - NULL); + LASSERT(page_address(fmb->fmb_pages[j]) != NULL); } list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 227a24f..9dbe415 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -82,9 +82,6 @@ #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - #define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ #define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ @@ -113,8 +110,9 @@ typedef struct /* pool of forwarding buffers */ { spinlock_t fmp_lock; /* serialise */ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ + struct list_head fmp_idle_fmbs; /* free buffers */ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ + int fmp_nactive_fmbs; /* # buffers in use */ } ksock_fmb_pool_t; @@ -164,16 +162,10 @@ typedef struct { kpr_router_t ksnd_router; /* THE router */ - void *ksnd_fmbs; /* all the pre-allocated FMBs */ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - int ksnd_active_ltxs; /* #active ltxs */ + atomic_t ksnd_nactive_ltxs; /* #active ltxs */ struct list_head ksnd_deathrow_conns; /* conns to be closed */ struct list_head ksnd_zombie_conns; /* conns to be freed */ @@ -233,25 +225,15 @@ typedef struct /* transmit packet */ #define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) /* network zero copy callback descriptor embedded in ksock_tx_t */ -/* space for the tx frag descriptors: hdr is always 1 iovec - * and payload is PTL_MD_MAX of either type. */ -typedef struct -{ - struct iovec hdr; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } payload; -} ksock_txiovspace_t; - typedef struct /* locally transmitted packet */ { ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ void *ltx_private; /* lib_finalize() callback arg */ void *ltx_cookie; /* lib_finalize() callback arg */ - ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */ ptl_hdr_t ltx_hdr; /* buffer for packet header */ + int ltx_desc_size; /* bytes allocated for this desc */ + struct iovec ltx_iov[1]; /* iov for hdr + payload */ + ptl_kiov_t ltx_kiov[0]; /* kiov for payload */ } ksock_ltx_t; #define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 6ea4fa8..22345fe 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -129,60 +129,11 @@ ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) return 0; } -ksock_ltx_t * -ksocknal_get_ltx (int may_block) -{ - unsigned long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) { - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - break; - } - - if (!may_block) { - if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - } - break; - } - - spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock, - flags); - - wait_event (ksocknal_data.ksnd_idle_ltx_waitq, - !list_empty (&ksocknal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - return (ltx); -} - void -ksocknal_put_ltx (ksock_ltx_t *ltx) +ksocknal_free_ltx (ksock_ltx_t *ltx) { - unsigned long flags; - - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - ksocknal_data.ksnd_active_ltxs--; - list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list) - wake_up (&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); + atomic_dec(&ksocknal_data.ksnd_nactive_ltxs); + PORTAL_FREE(ltx, ltx->ltx_desc_size); } #if SOCKNAL_ZC @@ -364,7 +315,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) } int -ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { /* Return 0 on success, < 0 on error. * caller checks tx_resid to determine progress/completion */ @@ -377,17 +328,14 @@ ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) } rc = ksocknal_getconnsock (conn); - if (rc != 0) + if (rc != 0) { + LASSERT (conn->ksnc_closing); return (rc); + } for (;;) { LASSERT (tx->tx_resid != 0); - if (conn->ksnc_closing) { - rc = -ESHUTDOWN; - break; - } - if (tx->tx_niov != 0) rc = ksocknal_send_iov (conn, tx); else @@ -554,7 +502,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) } int -ksocknal_recvmsg (ksock_conn_t *conn) +ksocknal_receive (ksock_conn_t *conn) { /* Return 1 on success, 0 on EOF, < 0 on error. * Caller checks ksnc_rx_nob_wanted to determine @@ -568,15 +516,12 @@ ksocknal_recvmsg (ksock_conn_t *conn) } rc = ksocknal_getconnsock (conn); - if (rc != 0) + if (rc != 0) { + LASSERT (conn->ksnc_closing); return (rc); + } for (;;) { - if (conn->ksnc_closing) { - rc = -ESHUTDOWN; - break; - } - if (conn->ksnc_rx_niov != 0) rc = ksocknal_recv_iov (conn); else @@ -665,7 +610,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch) lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx (ltx); EXIT; } @@ -696,7 +641,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { int rc; - rc = ksocknal_sendmsg (conn, tx); + rc = ksocknal_transmit (conn, tx); CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); LASSERT (rc != -EAGAIN); @@ -840,13 +785,17 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) unsigned long flags; ksock_sched_t *sched = conn->ksnc_scheduler; - /* called holding global lock (read or irq-write) */ - + /* called holding global lock (read or irq-write) and caller may + * not have dropped this lock between finding conn and calling me, + * so we don't need the {get,put}connsock dance to deref + * ksnc_sock... */ + LASSERT(!conn->ksnc_closing); + LASSERT(tx->tx_resid == tx->tx_nob); + CDEBUG (D_NET, "Sending to "LPX64" on port %d\n", conn->ksnc_peer->ksnp_nid, conn->ksnc_port); atomic_add (tx->tx_nob, &conn->ksnc_tx_nob); - tx->tx_resid = tx->tx_nob; tx->tx_conn = conn; #if SOCKNAL_ZC @@ -854,7 +803,6 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) /* NB this sets 1 ref on zccd, so the callback can only occur after * I've released this ref. */ #endif - spin_lock_irqsave (&sched->kss_lock, flags); conn->ksnc_tx_deadline = jiffies + @@ -960,6 +908,8 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) tx->tx_nob, tx->tx_niov, tx->tx_nkiov); tx->tx_conn = NULL; /* only set when assigned a conn */ + tx->tx_resid = tx->tx_nob; + tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base; g_lock = &ksocknal_data.ksnd_global_lock; read_lock (g_lock); @@ -1024,115 +974,125 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) return (-EHOSTUNREACH); } -ksock_ltx_t * -ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type) +int +ksocknal_sendmsg(nal_cb_t *nal, + void *private, + lib_msg_t *cookie, + ptl_hdr_t *hdr, + int type, + ptl_nid_t nid, + ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + ptl_kiov_t *payload_kiov, + size_t payload_nob) { ksock_ltx_t *ltx; + int desc_size; + int rc; + + /* NB 'private' is different depending on what we're sending. + * Just ignore it... */ + + CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64 + " pid %d\n", payload_nob, payload_niov, nid , pid); - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); + LASSERT (payload_nob == 0 || payload_niov > 0); + LASSERT (payload_niov <= PTL_MD_MAX_IOV); + + /* It must be OK to kmap() if required */ + LASSERT (payload_kiov == NULL || !in_interrupt ()); + /* payload is either all vaddrs or all pages */ + LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); + + if (payload_iov != NULL) + desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]); + else + desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]); + + if (in_interrupt() || + type == PTL_MSG_ACK || + type == PTL_MSG_REPLY) { + /* Can't block if in interrupt or responding to an incoming + * message */ + PORTAL_ALLOC_ATOMIC(ltx, desc_size); + } else { + PORTAL_ALLOC(ltx, desc_size); + } + if (ltx == NULL) { - CERROR ("Can't allocate tx desc\n"); - return (NULL); + CERROR("Can't allocate tx desc type %d size %d %s\n", + type, desc_size, in_interrupt() ? "(intr)" : ""); + return (PTL_NOSPACE); } - /* Init local send packet (storage for hdr, finalize() args) */ + atomic_inc(&ksocknal_data.ksnd_nactive_ltxs); + + ltx->ltx_desc_size = desc_size; + + /* We always have 1 mapped frag for the header */ + ltx->ltx_tx.tx_iov = ltx->ltx_iov; + ltx->ltx_iov[0].iov_base = <x->ltx_hdr; + ltx->ltx_iov[0].iov_len = sizeof(*hdr); ltx->ltx_hdr = *hdr; + ltx->ltx_private = private; ltx->ltx_cookie = cookie; - /* Init common ltx_tx */ ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr); - - /* We always have 1 mapped frag for the header */ - ltx->ltx_tx.tx_niov = 1; - ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr; - ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - ltx->ltx_tx.tx_kiov = NULL; - ltx->ltx_tx.tx_nkiov = 0; + ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob; - return (ltx); -} - -int -ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_len) -{ - ksock_ltx_t *ltx; - int rc; + if (payload_iov != NULL) { + /* payload is all mapped */ + ltx->ltx_tx.tx_kiov = NULL; + ltx->ltx_tx.tx_nkiov = 0; - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it - */ + ltx->ltx_tx.tx_niov = 1 + payload_niov; - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64 - " pid %d\n", payload_len, payload_niov, nid, pid); + memcpy(ltx->ltx_iov + 1, payload_iov, + payload_niov * sizeof (*payload_iov)); - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); + } else { + /* payload is all pages */ + ltx->ltx_tx.tx_kiov = ltx->ltx_kiov; + ltx->ltx_tx.tx_nkiov = payload_niov; - /* append the payload_iovs to the one pointing at the header */ - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); + ltx->ltx_tx.tx_niov = 1; - memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; + memcpy(ltx->ltx_kiov, payload_kiov, + payload_niov * sizeof (*payload_kiov)); + } - rc = ksocknal_launch_packet (<x->ltx_tx, nid); + rc = ksocknal_launch_packet(<x->ltx_tx, nid); if (rc == 0) return (PTL_OK); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx(ltx); return (PTL_FAIL); } int +ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_len) +{ + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, payload_iov, NULL, + payload_len)); +} + +int ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len) + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_len) { - ksock_ltx_t *ltx; - int rc; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n", - payload_len, payload_niov, nid, pid); - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); - - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov; - memcpy (ltx->ltx_tx.tx_kiov, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_nkiov = payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc == 0) - return (PTL_OK); - - ksocknal_put_ltx (ltx); - return (PTL_FAIL); + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, NULL, payload_kiov, + payload_len)); } void @@ -1155,7 +1115,6 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) tx->tx_iov = fwd->kprfd_iov; tx->tx_nkiov = 0; tx->tx_kiov = NULL; - tx->tx_hdr = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base; rc = ksocknal_launch_packet (tx, nid); if (rc != 0) @@ -1204,6 +1163,7 @@ ksocknal_fmb_callback (void *arg, int error) spin_lock_irqsave (&fmp->fmp_lock, flags); list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); + fmp->fmp_nactive_fmbs--; if (!list_empty (&fmp->fmp_blocked_conns)) { conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, @@ -1242,7 +1202,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) ksock_fmb_t *fmb; LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (ksocknal_data.ksnd_fmbs != NULL); + LASSERT (kpr_routing(&ksocknal_data.ksnd_router)); if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) pool = &ksocknal_data.ksnd_small_fmp; @@ -1255,6 +1215,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) fmb = list_entry(pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); list_del (&fmb->fmb_list); + pool->fmp_nactive_fmbs++; spin_unlock_irqrestore (&pool->fmp_lock, flags); return (fmb); @@ -1397,7 +1358,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) return; } - if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */ + if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */ CERROR("dropping packet from "LPX64" (%s) for "LPX64 " (%s): not forwarding\n", src_nid, portals_nid2str(TCPNAL, src_nid, str), @@ -1525,9 +1486,11 @@ ksocknal_process_receive (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_nob_wanted > 0); - rc = ksocknal_recvmsg(conn); + rc = ksocknal_receive(conn); if (rc <= 0) { + LASSERT (rc != -EAGAIN); + if (rc == 0) CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n", conn, conn->ksnc_peer->ksnp_nid, @@ -1766,9 +1729,9 @@ int ksocknal_scheduler (void *arg) * kss_lock. */ conn->ksnc_tx_ready = 0; spin_unlock_irqrestore (&sched->kss_lock, flags); - + rc = ksocknal_process_transmit(conn, tx); - + spin_lock_irqsave (&sched->kss_lock, flags); if (rc != -EAGAIN) { @@ -1851,7 +1814,7 @@ ksocknal_data_ready (struct sock *sk, int n) read_lock (&ksocknal_data.ksnd_global_lock); conn = sk->sk_user_data; - if (conn == NULL) { /* raced with ksocknal_close_sock */ + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ LASSERT (sk->sk_data_ready != &ksocknal_data_ready); sk->sk_data_ready (sk, n); } else { @@ -1900,7 +1863,7 @@ ksocknal_write_space (struct sock *sk) (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued")); - if (conn == NULL) { /* raced with ksocknal_close_sock */ + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ LASSERT (sk->sk_write_space != &ksocknal_write_space); sk->sk_write_space (sk); @@ -2136,7 +2099,7 @@ ksocknal_setup_sock (struct socket *sock) int option; struct linger linger; - sock->sk->allocation = GFP_NOFS; + sock->sk->allocation = GFP_MEMALLOC; /* Ensure this socket aborts active sends immediately when we close * it. */ @@ -2421,6 +2384,8 @@ ksocknal_autoconnectd (void *arg) kportal_daemonize (name); kportal_blockallsigs (); + current->flags |= PF_MEMALLOC; + spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { @@ -2548,6 +2513,8 @@ ksocknal_reaper (void *arg) init_waitqueue_entry (&wait, current); + current->flags |= PF_MEMALLOC; + spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index 7c0cafc..55e1935 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -83,6 +83,115 @@ kportal_daemonize (char *str) } void +kportal_memhog_free (struct portals_device_userstate *pdu) +{ + struct page **level0p = &pdu->pdu_memhog_root_page; + struct page **level1p; + struct page **level2p; + int count1; + int count2; + + if (*level0p != NULL) { + + level1p = (struct page **)page_address(*level0p); + count1 = 0; + + while (count1 < PAGE_SIZE/sizeof(struct page *) && + *level1p != NULL) { + + level2p = (struct page **)page_address(*level1p); + count2 = 0; + + while (count2 < PAGE_SIZE/sizeof(struct page *) && + *level2p != NULL) { + + __free_page(*level2p); + pdu->pdu_memhog_pages--; + level2p++; + count2++; + } + + __free_page(*level1p); + pdu->pdu_memhog_pages--; + level1p++; + count1++; + } + + __free_page(*level0p); + pdu->pdu_memhog_pages--; + + *level0p = NULL; + } + + LASSERT (pdu->pdu_memhog_pages == 0); +} + +int +kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags) +{ + struct page **level0p; + struct page **level1p; + struct page **level2p; + int count1; + int count2; + + LASSERT (pdu->pdu_memhog_pages == 0); + LASSERT (pdu->pdu_memhog_root_page == NULL); + + if (npages < 0) + return -EINVAL; + + if (npages == 0) + return 0; + + level0p = &pdu->pdu_memhog_root_page; + *level0p = alloc_page(flags); + if (*level0p == NULL) + return -ENOMEM; + pdu->pdu_memhog_pages++; + + level1p = (struct page **)page_address(*level0p); + count1 = 0; + memset(level1p, 0, PAGE_SIZE); + + while (pdu->pdu_memhog_pages < npages && + count1 < PAGE_SIZE/sizeof(struct page *)) { + + if (signal_pending(current)) + return (-EINTR); + + *level1p = alloc_page(flags); + if (*level1p == NULL) + return -ENOMEM; + pdu->pdu_memhog_pages++; + + level2p = (struct page **)page_address(*level1p); + count2 = 0; + memset(level2p, 0, PAGE_SIZE); + + while (pdu->pdu_memhog_pages < npages && + count2 < PAGE_SIZE/sizeof(struct page *)) { + + if (signal_pending(current)) + return (-EINTR); + + *level2p = alloc_page(flags); + if (*level2p == NULL) + return (-ENOMEM); + pdu->pdu_memhog_pages++; + + level2p++; + count2++; + } + + level1p++; + count1++; + } + + return 0; +} + +void kportal_blockallsigs () { unsigned long flags; @@ -96,22 +205,39 @@ kportal_blockallsigs () /* called when opening /dev/device */ static int kportal_psdev_open(struct inode * inode, struct file * file) { + struct portals_device_userstate *pdu; ENTRY; - + if (!inode) RETURN(-EINVAL); + PORTAL_MODULE_USE; + + PORTAL_ALLOC(pdu, sizeof(*pdu)); + if (pdu != NULL) { + pdu->pdu_memhog_pages = 0; + pdu->pdu_memhog_root_page = NULL; + } + file->private_data = pdu; + RETURN(0); } /* called when closing /dev/device */ static int kportal_psdev_release(struct inode * inode, struct file * file) { + struct portals_device_userstate *pdu; ENTRY; if (!inode) RETURN(-EINVAL); + pdu = file->private_data; + if (pdu != NULL) { + kportal_memhog_free(pdu); + PORTAL_FREE(pdu, sizeof(*pdu)); + } + PORTAL_MODULE_UNUSE; RETURN(0); } @@ -514,7 +640,8 @@ static int kportal_ioctl(struct inode *inode, struct file *file, break; case IOC_PORTAL_LWT_SNAPSHOT: - err = lwt_snapshot (&data->ioc_count, &data->ioc_misc, + err = lwt_snapshot (&data->ioc_nid, + &data->ioc_count, &data->ioc_misc, data->ioc_pbuf1, data->ioc_plen1); if (err == 0 && copy_to_user((char *)arg, data, sizeof (*data))) @@ -528,7 +655,22 @@ static int kportal_ioctl(struct inode *inode, struct file *file, copy_to_user((char *)arg, data, sizeof (*data))) err = -EFAULT; break; -#endif +#endif + case IOC_PORTAL_MEMHOG: + if (!capable (CAP_SYS_ADMIN)) + err = -EPERM; + else if (file->private_data == NULL) { + err = -EINVAL; + } else { + kportal_memhog_free(file->private_data); + err = kportal_memhog_alloc(file->private_data, + data->ioc_count, + data->ioc_flags); + if (err != 0) + kportal_memhog_free(file->private_data); + } + break; + default: err = -EINVAL; break; @@ -612,8 +754,8 @@ static int init_kportals_module(void) cleanup_lwt: #if LWT_SUPPORT lwt_fini(); -#endif cleanup_debug: +#endif portals_debug_cleanup(); return rc; } diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index f191664..2326fed 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -1389,7 +1389,8 @@ lwt_control(int enable, int clear) } static int -lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size) +lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, + lwt_event_t *events, int size) { struct portal_ioctl_data data; int rc; @@ -1408,6 +1409,9 @@ lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size) LASSERT (data.ioc_count != 0); LASSERT (data.ioc_misc != 0); + if (now != NULL) + *now = data.ioc_nid; + if (ncpu != NULL) *ncpu = data.ioc_count; @@ -1517,14 +1521,13 @@ get_cycles_per_usec () int jt_ptl_lwt(int argc, char **argv) { -#define MAX_CPUS 8 int ncpus; int totalspace; int nevents_per_cpu; lwt_event_t *events; - lwt_event_t *cpu_event[MAX_CPUS + 1]; - lwt_event_t *next_event[MAX_CPUS]; - lwt_event_t *first_event[MAX_CPUS]; + lwt_event_t *cpu_event[LWT_MAX_CPUS + 1]; + lwt_event_t *next_event[LWT_MAX_CPUS]; + lwt_event_t *first_event[LWT_MAX_CPUS]; int cpu; lwt_event_t *e; int rc; @@ -1532,6 +1535,9 @@ jt_ptl_lwt(int argc, char **argv) double mhz; cycles_t t0; cycles_t tlast; + cycles_t tnow; + struct timeval tvnow; + int printed_date = 0; FILE *f = stdout; if (argc < 2 || @@ -1559,11 +1565,12 @@ jt_ptl_lwt(int argc, char **argv) return (0); } - if (lwt_snapshot(&ncpus, &totalspace, NULL, 0) != 0) + if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0) return (-1); - if (ncpus > MAX_CPUS) { - fprintf(stderr, "Too many cpus: %d (%d)\n", ncpus, MAX_CPUS); + if (ncpus > LWT_MAX_CPUS) { + fprintf(stderr, "Too many cpus: %d (%d)\n", + ncpus, LWT_MAX_CPUS); return (-1); } @@ -1578,11 +1585,14 @@ jt_ptl_lwt(int argc, char **argv) return (-1); } - if (lwt_snapshot(NULL, NULL, events, totalspace)) { + if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) { free(events); return (-1); } + /* we want this time to be sampled at snapshot time */ + gettimeofday(&tvnow, NULL); + if (argc > 2) { f = fopen (argv[2], "w"); if (f == NULL) { @@ -1663,6 +1673,17 @@ jt_ptl_lwt(int argc, char **argv) if (t0 <= next_event[cpu]->lwte_when) { /* on or after the first event */ + if (!printed_date) { + cycles_t du = (tnow - t0) / mhz; + time_t then = tvnow.tv_sec - du/1000000; + + if (du % 1000000 > tvnow.tv_usec) + then--; + + fprintf(f, "%s", ctime(&then)); + printed_date = 1; + } + rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]); if (rc != 0) break; @@ -1684,5 +1705,48 @@ jt_ptl_lwt(int argc, char **argv) free(events); return (0); -#undef MAX_CPUS } + +int jt_ptl_memhog(int argc, char **argv) +{ + static int gfp = 0; /* sticky! */ + + struct portal_ioctl_data data; + int rc; + int count; + char *end; + + if (argc < 2) { + fprintf(stderr, "usage: %s []\n", argv[0]); + return 0; + } + + count = strtol(argv[1], &end, 0); + if (count < 0 || *end != 0) { + fprintf(stderr, "Can't parse page count '%s'\n", argv[1]); + return -1; + } + + if (argc >= 3) { + rc = strtol(argv[2], &end, 0); + if (*end != 0) { + fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]); + return -1; + } + gfp = rc; + } + + PORTAL_IOC_INIT(data); + data.ioc_count = count; + data.ioc_flags = gfp; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data); + + if (rc != 0) { + fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno)); + return -1; + } + + printf("memhog %d OK\n", count); + return 0; +} + diff --git a/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch b/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch index 20d5af9..851b00f 100644 --- a/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch +++ b/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch @@ -14430,7 +14430,7 @@ +++ 25/arch/parisc/lib/checksum.c 2003-10-05 00:33:23.000000000 -0700 @@ -16,8 +16,10 @@ * - * $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ + * $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ */ -#include +#include @@ -31511,8 +31511,8 @@ --- linux-2.6.0-test6/drivers/char/ftape/compressor/zftape-compress.c 2003-06-14 12:18:32.000000000 -0700 +++ 25/drivers/char/ftape/compressor/zftape-compress.c 2003-10-05 00:33:24.000000000 -0700 @@ -31,6 +31,7 @@ - char zftc_rev[] = "$Revision: 1.1.4.1 $"; - char zftc_dat[] = "$Date: 2003/10/10 09:31:08 $"; + char zftc_rev[] = "$Revision: 1.1.4.2 $"; + char zftc_dat[] = "$Date: 2003/12/04 11:06:35 $"; +#include #include @@ -37169,8 +37169,8 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divamnt.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/divamnt.c 2003-10-05 00:33:24.000000000 -0700 @@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ * * Driver for Eicon DIVA Server ISDN cards. * Maint module @@ -37181,16 +37181,16 @@ -#include "di_defs.h" #include "debug_if.h" --static char *main_revision = "$Revision: 1.1.4.1 $"; -+static char *main_revision = "$Revision: 1.1.4.1 $"; +-static char *main_revision = "$Revision: 1.1.4.2 $"; ++static char *main_revision = "$Revision: 1.1.4.2 $"; static int major; --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divasmain.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/divasmain.c 2003-10-05 00:33:24.000000000 -0700 @@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ * * Low level driver for Eicon DIVA Server ISDN cards. * @@ -37212,16 +37212,16 @@ #include "diva_dma.h" #include "diva_pci.h" --static char *main_revision = "$Revision: 1.1.4.1 $"; -+static char *main_revision = "$Revision: 1.1.4.1 $"; +-static char *main_revision = "$Revision: 1.1.4.2 $"; ++static char *main_revision = "$Revision: 1.1.4.2 $"; static int major; --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/dqueue.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/dqueue.c 2003-10-05 00:33:24.000000000 -0700 @@ -1,10 +1,10 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ * * Driver for Eicon DIVA Server ISDN cards. * User Mode IDI Interface @@ -37236,8 +37236,8 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/mntfunc.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/mntfunc.c 2003-10-05 00:33:24.000000000 -0700 @@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ * * Driver for Eicon DIVA Server ISDN cards. * Maint module @@ -37252,8 +37252,8 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/os_capi.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/os_capi.h 2003-10-05 00:33:24.000000000 -0700 @@ -1,10 +1,10 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ * * ISDN interface module for Eicon active cards DIVA. * CAPI Interface OS include files @@ -37268,8 +37268,8 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/platform.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/platform.h 2003-10-05 00:33:24.000000000 -0700 @@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ * * platform.h * @@ -37754,7 +37754,7 @@ +++ 25/drivers/media/video/planb.c 2003-10-05 00:33:24.000000000 -0700 @@ -27,7 +27,6 @@ - /* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ */ + /* $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ */ -#include #include @@ -38069,7 +38069,7 @@ --- linux-2.6.0-test6/drivers/mtd/chips/map_rom.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/drivers/mtd/chips/map_rom.c 2003-10-05 00:33:24.000000000 -0700 @@ -4,7 +4,6 @@ - * $Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ + * $Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ */ -#include @@ -42159,8 +42159,8 @@ #include /* Version */ --static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ for Linux\n"; -+static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.1.4.1 2003/10/10 09:31:08 ericm Exp $ for Linux\n"; +-static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ for Linux\n"; ++static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.1.4.2 2003/12/04 11:06:35 eeb Exp $ for Linux\n"; static int debug; static int quartz; diff --git a/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch b/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch index a8489e6..97cd9dc 100644 --- a/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch +++ b/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch @@ -1,7 +1,7 @@ Index: linux-2.4.18-chaos/include/linux/mm.h =================================================================== ---- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-13 17:06:48.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/mm.h 2003-11-17 15:46:32.000000000 +0300 +--- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-23 00:07:20.000000000 +0300 ++++ linux-2.4.18-chaos/include/linux/mm.h 2003-11-23 00:07:23.000000000 +0300 @@ -677,6 +677,7 @@ #define __GFP_IO 0x40 /* Can start low memory physical IO? */ #define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ @@ -20,8 +20,8 @@ Index: linux-2.4.18-chaos/include/linux/mm.h platforms, used as appropriate on others */ Index: linux-2.4.18-chaos/mm/page_alloc.c =================================================================== ---- linux-2.4.18-chaos.orig/mm/page_alloc.c 2003-11-13 17:06:47.000000000 +0300 -+++ linux-2.4.18-chaos/mm/page_alloc.c 2003-11-17 15:49:11.000000000 +0300 +--- linux-2.4.18-chaos.orig/mm/page_alloc.c 2003-11-23 00:07:20.000000000 +0300 ++++ linux-2.4.18-chaos/mm/page_alloc.c 2003-12-02 23:12:31.000000000 +0300 @@ -554,7 +554,7 @@ /* * Oh well, we didn't succeed. @@ -31,10 +31,21 @@ Index: linux-2.4.18-chaos/mm/page_alloc.c /* * Are we dealing with a higher order allocation? * +@@ -628,7 +628,9 @@ + + /* XXX: is pages_min/4 a good amount to reserve for this? */ + min += z->pages_min / 4; +- if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) { ++ if (z->free_pages > min || ++ (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC)) ++ && !in_interrupt())) { + page = rmqueue(z, order); + if (page) + return page; Index: linux-2.4.18-chaos/include/linux/slab.h =================================================================== --- linux-2.4.18-chaos.orig/include/linux/slab.h 2003-07-28 17:52:18.000000000 +0400 -+++ linux-2.4.18-chaos/include/linux/slab.h 2003-11-17 15:46:32.000000000 +0300 ++++ linux-2.4.18-chaos/include/linux/slab.h 2003-11-23 00:07:23.000000000 +0300 @@ -23,6 +23,7 @@ #define SLAB_KERNEL GFP_KERNEL #define SLAB_NFS GFP_NFS @@ -46,7 +57,7 @@ Index: linux-2.4.18-chaos/include/linux/slab.h Index: linux-2.4.18-chaos/mm/slab.c =================================================================== --- linux-2.4.18-chaos.orig/mm/slab.c 2003-07-28 17:52:20.000000000 +0400 -+++ linux-2.4.18-chaos/mm/slab.c 2003-11-17 15:46:32.000000000 +0300 ++++ linux-2.4.18-chaos/mm/slab.c 2003-11-23 00:07:23.000000000 +0300 @@ -1116,7 +1116,7 @@ /* Be lazy and only check for valid flags here, * keeping it out of the critical path in kmem_cache_alloc(). diff --git a/lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch b/lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch index 3f37e44..79caa76 100644 --- a/lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch +++ b/lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch @@ -32,6 +32,17 @@ Index: linux-2.4.20-rh-20.9/mm/page_alloc.c /* * Are we dealing with a higher order allocation? * +@@ -583,7 +583,9 @@ + + /* XXX: is pages_min/4 a good amount to reserve for this? */ + min += z->pages_min / 4; +- if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) { ++ if (z->free_pages > min || ++ (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC)) ++ && !in_interrupt())) { + page = rmqueue(z, order); + if (page) + return page; Index: linux-2.4.20-rh-20.9/include/linux/slab.h =================================================================== --- linux-2.4.20-rh-20.9.orig/include/linux/slab.h 2003-11-13 17:35:48.000000000 +0300 diff --git a/lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch b/lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch index 015bfc8..92e79c8 100644 --- a/lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch +++ b/lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch @@ -1,8 +1,8 @@ -Index: linux-2.4.22-vanilla/include/linux/mm.h +Index: linux-2.4.20/include/linux/mm.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/mm.h 2003-11-17 15:26:32.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/mm.h 2003-11-17 15:40:32.000000000 +0300 -@@ -612,6 +612,7 @@ +--- linux-2.4.20.orig/include/linux/mm.h 2003-12-01 17:07:14.000000000 +0300 ++++ linux-2.4.20/include/linux/mm.h 2003-12-02 23:17:06.000000000 +0300 +@@ -614,6 +614,7 @@ #define __GFP_IO 0x40 /* Can start low memory physical IO? */ #define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ #define __GFP_FS 0x100 /* Can call down to low-level FS? */ @@ -10,7 +10,7 @@ Index: linux-2.4.22-vanilla/include/linux/mm.h #define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) #define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) -@@ -622,6 +623,7 @@ +@@ -624,6 +625,7 @@ #define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) #define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) @@ -18,24 +18,24 @@ Index: linux-2.4.22-vanilla/include/linux/mm.h /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ -Index: linux-2.4.22-vanilla/mm/page_alloc.c +Index: linux-2.4.20/mm/page_alloc.c =================================================================== ---- linux-2.4.22-vanilla.orig/mm/page_alloc.c 2003-11-13 18:19:51.000000000 +0300 -+++ linux-2.4.22-vanilla/mm/page_alloc.c 2003-11-17 15:40:32.000000000 +0300 +--- linux-2.4.20.orig/mm/page_alloc.c 2003-12-01 17:02:43.000000000 +0300 ++++ linux-2.4.20/mm/page_alloc.c 2003-12-02 23:21:56.000000000 +0300 @@ -377,7 +377,8 @@ /* here we're in the low on memory slow path */ rebalance: - if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) { + if (current->flags & (PF_MEMALLOC | PF_MEMDIE) || -+ gfp_mask & __GFP_MEMALLOC) { ++ (gfp_mask & __GFP_MEMALLOC)) { zone = zonelist->zones; for (;;) { zone_t *z = *(zone++); -Index: linux-2.4.22-vanilla/include/linux/slab.h +Index: linux-2.4.20/include/linux/slab.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/slab.h 2003-11-17 14:58:37.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/slab.h 2003-11-17 15:42:13.000000000 +0300 +--- linux-2.4.20.orig/include/linux/slab.h 2003-12-01 17:07:14.000000000 +0300 ++++ linux-2.4.20/include/linux/slab.h 2003-12-02 23:17:06.000000000 +0300 @@ -23,6 +23,7 @@ #define SLAB_KERNEL GFP_KERNEL #define SLAB_NFS GFP_NFS @@ -44,11 +44,11 @@ Index: linux-2.4.22-vanilla/include/linux/slab.h #define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS) #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ -Index: linux-2.4.22-vanilla/mm/slab.c +Index: linux-2.4.20/mm/slab.c =================================================================== ---- linux-2.4.22-vanilla.orig/mm/slab.c 2003-11-13 17:39:29.000000000 +0300 -+++ linux-2.4.22-vanilla/mm/slab.c 2003-11-17 15:42:13.000000000 +0300 -@@ -1115,7 +1115,7 @@ +--- linux-2.4.20.orig/mm/slab.c 2003-12-01 17:02:34.000000000 +0300 ++++ linux-2.4.20/mm/slab.c 2003-12-02 23:17:06.000000000 +0300 +@@ -1113,7 +1113,7 @@ /* Be lazy and only check for valid flags here, * keeping it out of the critical path in kmem_cache_alloc(). */ diff --git a/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch index c5abbf3..937aa40 100644 --- a/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch +++ b/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch @@ -67,7 +67,7 @@ Index: linux-2.4.22-vanilla/Documentation/Configure.help =================================================================== --- linux-2.4.22-vanilla.orig/Documentation/Configure.help 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/Documentation/Configure.help 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/Documentation/Configure.help 2003-12-02 23:55:38.000000000 +0300 @@ -15613,6 +15613,39 @@ be compiled as a module, and so this could be dangerous. Most everyone wants to say Y here. @@ -151,7 +151,7 @@ Index: linux-2.4.22-vanilla/Documentation/Configure.help Index: linux-2.4.22-vanilla/arch/alpha/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/alpha/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/alpha/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/alpha/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -169,7 +169,7 @@ Index: linux-2.4.22-vanilla/arch/alpha/defconfig Index: linux-2.4.22-vanilla/arch/alpha/kernel/entry.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/alpha/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/alpha/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/alpha/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300 @@ -1154,6 +1154,18 @@ .quad sys_readahead .quad sys_ni_syscall /* 380, sys_security */ @@ -192,7 +192,7 @@ Index: linux-2.4.22-vanilla/arch/alpha/kernel/entry.S Index: linux-2.4.22-vanilla/arch/arm/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/arm/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/arm/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/arm/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -210,7 +210,7 @@ Index: linux-2.4.22-vanilla/arch/arm/defconfig Index: linux-2.4.22-vanilla/arch/arm/kernel/calls.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/arm/kernel/calls.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/arm/kernel/calls.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/arm/kernel/calls.S 2003-12-02 23:55:38.000000000 +0300 @@ -240,18 +240,18 @@ .long SYMBOL_NAME(sys_ni_syscall) /* Security */ .long SYMBOL_NAME(sys_gettid) @@ -245,7 +245,7 @@ Index: linux-2.4.22-vanilla/arch/arm/kernel/calls.S Index: linux-2.4.22-vanilla/arch/i386/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/i386/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/i386/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/i386/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -263,7 +263,7 @@ Index: linux-2.4.22-vanilla/arch/i386/defconfig Index: linux-2.4.22-vanilla/arch/ia64/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/ia64/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/ia64/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/ia64/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -281,7 +281,7 @@ Index: linux-2.4.22-vanilla/arch/ia64/defconfig Index: linux-2.4.22-vanilla/arch/m68k/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/m68k/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/m68k/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/m68k/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -299,7 +299,7 @@ Index: linux-2.4.22-vanilla/arch/m68k/defconfig Index: linux-2.4.22-vanilla/arch/mips/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/mips/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/mips/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/mips/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -317,7 +317,7 @@ Index: linux-2.4.22-vanilla/arch/mips/defconfig Index: linux-2.4.22-vanilla/arch/mips64/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/mips64/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/mips64/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/mips64/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -335,7 +335,7 @@ Index: linux-2.4.22-vanilla/arch/mips64/defconfig Index: linux-2.4.22-vanilla/arch/s390/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -353,7 +353,7 @@ Index: linux-2.4.22-vanilla/arch/s390/defconfig Index: linux-2.4.22-vanilla/arch/s390/kernel/entry.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300 @@ -558,18 +558,18 @@ .long sys_fcntl64 .long sys_readahead @@ -388,7 +388,7 @@ Index: linux-2.4.22-vanilla/arch/s390/kernel/entry.S Index: linux-2.4.22-vanilla/arch/s390x/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390x/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390x/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390x/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -406,7 +406,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/defconfig Index: linux-2.4.22-vanilla/arch/s390x/kernel/entry.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390x/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390x/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390x/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300 @@ -591,18 +591,18 @@ .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) .long SYSCALL(sys_readahead,sys32_readahead) @@ -441,7 +441,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/kernel/entry.S Index: linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390x/kernel/wrapper32.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S 2003-12-02 23:55:38.000000000 +0300 @@ -1098,6 +1098,98 @@ llgfr %r4,%r4 # long jg sys32_fstat64 # branch to system call @@ -544,7 +544,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S Index: linux-2.4.22-vanilla/arch/sparc64/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/sparc64/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/sparc64/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/sparc64/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -562,7 +562,7 @@ Index: linux-2.4.22-vanilla/arch/sparc64/defconfig Index: linux-2.4.22-vanilla/fs/Config.in =================================================================== --- linux-2.4.22-vanilla.orig/fs/Config.in 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/Config.in 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/Config.in 2003-12-02 23:55:38.000000000 +0300 @@ -29,6 +29,11 @@ dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL @@ -600,8 +600,8 @@ Index: linux-2.4.22-vanilla/fs/Config.in source fs/partitions/Config.in Index: linux-2.4.22-vanilla/fs/Makefile =================================================================== ---- linux-2.4.22-vanilla.orig/fs/Makefile 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/Makefile 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/Makefile 2003-12-02 23:55:36.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/Makefile 2003-12-02 23:55:38.000000000 +0300 @@ -77,6 +77,9 @@ obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o @@ -615,7 +615,7 @@ Index: linux-2.4.22-vanilla/fs/Makefile Index: linux-2.4.22-vanilla/fs/ext2/Makefile =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/Makefile 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/Makefile 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/Makefile 2003-12-02 23:55:38.000000000 +0300 @@ -13,4 +13,8 @@ ioctl.o namei.o super.o symlink.o obj-m := $(O_TARGET) @@ -628,7 +628,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/Makefile Index: linux-2.4.22-vanilla/fs/ext2/file.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/file.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/file.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/file.c 2003-12-02 23:55:38.000000000 +0300 @@ -20,6 +20,7 @@ #include @@ -649,7 +649,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/file.c Index: linux-2.4.22-vanilla/fs/ext2/ialloc.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/ialloc.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/ialloc.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/ialloc.c 2003-12-02 23:55:38.000000000 +0300 @@ -15,6 +15,7 @@ #include #include @@ -669,7 +669,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/ialloc.c Index: linux-2.4.22-vanilla/fs/ext2/inode.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/inode.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/inode.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/inode.c 2003-12-02 23:55:38.000000000 +0300 @@ -39,6 +39,18 @@ static int ext2_update_inode(struct inode * inode, int do_sync); @@ -755,7 +755,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/inode.c Index: linux-2.4.22-vanilla/fs/ext2/namei.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/namei.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/namei.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/namei.c 2003-12-02 23:55:38.000000000 +0300 @@ -31,6 +31,7 @@ #include @@ -792,7 +792,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/namei.c Index: linux-2.4.22-vanilla/fs/ext2/super.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/super.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/super.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/super.c 2003-12-02 23:55:38.000000000 +0300 @@ -21,6 +21,7 @@ #include #include @@ -865,7 +865,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/super.c Index: linux-2.4.22-vanilla/fs/ext2/symlink.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/symlink.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/symlink.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/symlink.c 2003-12-02 23:55:38.000000000 +0300 @@ -19,6 +19,7 @@ #include @@ -897,8 +897,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/symlink.c }; Index: linux-2.4.22-vanilla/fs/ext2/xattr.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext2/xattr.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/xattr.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext2/xattr.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/xattr.c 2003-12-02 23:55:38.000000000 +0300 @@ -0,0 +1,1212 @@ +/* + * linux/fs/ext2/xattr.c @@ -2114,8 +2114,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr.c +#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ Index: linux-2.4.22-vanilla/fs/ext2/xattr_user.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext2/xattr_user.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/xattr_user.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext2/xattr_user.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/xattr_user.c 2003-12-02 23:55:38.000000000 +0300 @@ -0,0 +1,103 @@ +/* + * linux/fs/ext2/xattr_user.c @@ -2222,8 +2222,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr_user.c +} Index: linux-2.4.22-vanilla/fs/ext3/Makefile =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/Makefile 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/Makefile 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/Makefile 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/Makefile 2003-12-02 23:55:38.000000000 +0300 @@ -1,5 +1,5 @@ # -# Makefile for the linux ext2-filesystem routines. @@ -2250,8 +2250,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/Makefile include $(TOPDIR)/Rules.make Index: linux-2.4.22-vanilla/fs/ext3/file.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/file.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/file.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/file.c 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/file.c 2003-12-02 23:55:38.000000000 +0300 @@ -23,6 +23,7 @@ #include #include @@ -2273,7 +2273,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/file.c Index: linux-2.4.22-vanilla/fs/ext3/ialloc.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext3/ialloc.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/ialloc.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/ialloc.c 2003-12-02 23:55:38.000000000 +0300 @@ -17,6 +17,7 @@ #include #include @@ -2293,7 +2293,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/ialloc.c Index: linux-2.4.22-vanilla/fs/ext3/inode.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext3/inode.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/inode.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/inode.c 2003-12-02 23:55:38.000000000 +0300 @@ -39,6 +39,18 @@ */ #undef SEARCH_FROM_ZERO @@ -2386,8 +2386,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/inode.c return; Index: linux-2.4.22-vanilla/fs/ext3/namei.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/namei.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/namei.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/namei.c 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/namei.c 2003-12-02 23:55:38.000000000 +0300 @@ -29,6 +29,7 @@ #include #include @@ -2451,8 +2451,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/namei.c + Index: linux-2.4.22-vanilla/fs/ext3/super.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/super.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/super.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/super.c 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/super.c 2003-12-02 23:56:03.000000000 +0300 @@ -24,6 +24,7 @@ #include #include @@ -2504,12 +2504,18 @@ Index: linux-2.4.22-vanilla/fs/ext3/super.c if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { sb->s_dev = 0; goto out_fail; -@@ -1827,17 +1843,29 @@ +@@ -1822,22 +1838,35 @@ + + static int __init init_ext3_fs(void) + { ++ int error; + #ifdef CONFIG_QUOTA + init_dquot_operations(&ext3_qops); old_sync_dquot = ext3_qops.sync_dquot; ext3_qops.sync_dquot = ext3_sync_dquot; #endif - return register_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); ++ error = init_ext3_xattr(); + if (error) + return error; + error = init_ext3_xattr_user(); @@ -2541,7 +2547,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/super.c Index: linux-2.4.22-vanilla/fs/ext3/symlink.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext3/symlink.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/symlink.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/symlink.c 2003-12-02 23:55:38.000000000 +0300 @@ -20,6 +20,7 @@ #include #include @@ -2573,8 +2579,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/symlink.c }; Index: linux-2.4.22-vanilla/fs/ext3/xattr.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/xattr.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/xattr.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/xattr.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/xattr.c 2003-12-02 23:55:38.000000000 +0300 @@ -0,0 +1,1225 @@ +/* + * linux/fs/ext3/xattr.c @@ -3803,8 +3809,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr.c +#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ Index: linux-2.4.22-vanilla/fs/ext3/xattr_user.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/xattr_user.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/xattr_user.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/xattr_user.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/xattr_user.c 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,111 @@ +/* + * linux/fs/ext3/xattr_user.c @@ -3920,7 +3926,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr_user.c Index: linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h =================================================================== --- linux-2.4.22-vanilla.orig/fs/jfs/jfs_xattr.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h 2003-12-02 23:55:39.000000000 +0300 @@ -52,8 +52,10 @@ #define END_EALIST(ealist) \ ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) @@ -3937,7 +3943,7 @@ Index: linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h Index: linux-2.4.22-vanilla/fs/jfs/xattr.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/jfs/xattr.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/jfs/xattr.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/jfs/xattr.c 2003-12-02 23:55:39.000000000 +0300 @@ -641,7 +641,7 @@ } @@ -3967,8 +3973,8 @@ Index: linux-2.4.22-vanilla/fs/jfs/xattr.c if (value == NULL) { /* empty EA, do not remove */ Index: linux-2.4.22-vanilla/fs/mbcache.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/mbcache.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/mbcache.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/mbcache.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/mbcache.c 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,648 @@ +/* + * linux/fs/mbcache.c @@ -4621,7 +4627,7 @@ Index: linux-2.4.22-vanilla/fs/mbcache.c Index: linux-2.4.22-vanilla/include/asm-arm/unistd.h =================================================================== --- linux-2.4.22-vanilla.orig/include/asm-arm/unistd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-arm/unistd.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/asm-arm/unistd.h 2003-12-02 23:55:39.000000000 +0300 @@ -250,7 +250,6 @@ #define __NR_security (__NR_SYSCALL_BASE+223) #define __NR_gettid (__NR_SYSCALL_BASE+224) @@ -4641,7 +4647,7 @@ Index: linux-2.4.22-vanilla/include/asm-arm/unistd.h Index: linux-2.4.22-vanilla/include/asm-ppc64/unistd.h =================================================================== --- linux-2.4.22-vanilla.orig/include/asm-ppc64/unistd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-ppc64/unistd.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/asm-ppc64/unistd.h 2003-12-02 23:55:39.000000000 +0300 @@ -218,6 +218,7 @@ #define __NR_mincore 206 #define __NR_gettid 207 @@ -4661,7 +4667,7 @@ Index: linux-2.4.22-vanilla/include/asm-ppc64/unistd.h Index: linux-2.4.22-vanilla/include/asm-s390/unistd.h =================================================================== --- linux-2.4.22-vanilla.orig/include/asm-s390/unistd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-s390/unistd.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/asm-s390/unistd.h 2003-12-02 23:55:39.000000000 +0300 @@ -213,9 +213,18 @@ #define __NR_getdents64 220 #define __NR_fcntl64 221 @@ -4687,7 +4693,7 @@ Index: linux-2.4.22-vanilla/include/asm-s390/unistd.h Index: linux-2.4.22-vanilla/include/asm-s390x/unistd.h =================================================================== --- linux-2.4.22-vanilla.orig/include/asm-s390x/unistd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-s390x/unistd.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/asm-s390x/unistd.h 2003-12-02 23:55:39.000000000 +0300 @@ -181,9 +181,18 @@ #define __NR_mincore 218 #define __NR_madvise 219 @@ -4712,8 +4718,8 @@ Index: linux-2.4.22-vanilla/include/asm-s390x/unistd.h Index: linux-2.4.22-vanilla/include/linux/cache_def.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/cache_def.h 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/cache_def.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/cache_def.h 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/cache_def.h 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,15 @@ +/* + * linux/cache_def.h @@ -4733,7 +4739,7 @@ Index: linux-2.4.22-vanilla/include/linux/cache_def.h Index: linux-2.4.22-vanilla/include/linux/errno.h =================================================================== --- linux-2.4.22-vanilla.orig/include/linux/errno.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/errno.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/errno.h 2003-12-02 23:55:39.000000000 +0300 @@ -23,4 +23,8 @@ #endif @@ -4746,7 +4752,7 @@ Index: linux-2.4.22-vanilla/include/linux/errno.h Index: linux-2.4.22-vanilla/include/linux/ext2_fs.h =================================================================== --- linux-2.4.22-vanilla.orig/include/linux/ext2_fs.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext2_fs.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext2_fs.h 2003-12-02 23:55:39.000000000 +0300 @@ -57,8 +57,6 @@ */ #define EXT2_BAD_INO 1 /* Bad blocks inode */ @@ -4831,8 +4837,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext2_fs.h #endif /* __KERNEL__ */ Index: linux-2.4.22-vanilla/include/linux/ext2_xattr.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/ext2_xattr.h 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext2_xattr.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/ext2_xattr.h 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext2_xattr.h 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,157 @@ +/* + File: linux/ext2_xattr.h @@ -4993,8 +4999,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext2_xattr.h + Index: linux-2.4.22-vanilla/include/linux/ext3_fs.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/ext3_fs.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext3_fs.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/ext3_fs.h 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext3_fs.h 2003-12-02 23:55:39.000000000 +0300 @@ -63,8 +63,6 @@ */ #define EXT3_BAD_INO 1 /* Bad blocks inode */ @@ -5079,8 +5085,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_fs.h Index: linux-2.4.22-vanilla/include/linux/ext3_jbd.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/ext3_jbd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext3_jbd.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/ext3_jbd.h 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext3_jbd.h 2003-12-02 23:55:39.000000000 +0300 @@ -30,13 +30,19 @@ #define EXT3_SINGLEDATA_TRANS_BLOCKS 8U @@ -5104,8 +5110,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_jbd.h Index: linux-2.4.22-vanilla/include/linux/ext3_xattr.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/ext3_xattr.h 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext3_xattr.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/ext3_xattr.h 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext3_xattr.h 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,157 @@ +/* + File: linux/ext3_xattr.h @@ -5266,8 +5272,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_xattr.h + Index: linux-2.4.22-vanilla/include/linux/fs.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/fs.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/fs.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/fs.h 2003-12-02 23:55:35.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/fs.h 2003-12-02 23:55:39.000000000 +0300 @@ -913,7 +913,7 @@ int (*setattr) (struct dentry *, struct iattr *); int (*setattr_raw) (struct inode *, struct iattr *); @@ -5279,8 +5285,8 @@ Index: linux-2.4.22-vanilla/include/linux/fs.h int (*removexattr) (struct dentry *, const char *); Index: linux-2.4.22-vanilla/include/linux/mbcache.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/mbcache.h 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/mbcache.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/mbcache.h 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/mbcache.h 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,69 @@ +/* + File: linux/mbcache.h @@ -5353,8 +5359,8 @@ Index: linux-2.4.22-vanilla/include/linux/mbcache.h +#endif Index: linux-2.4.22-vanilla/kernel/ksyms.c =================================================================== ---- linux-2.4.22-vanilla.orig/kernel/ksyms.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/kernel/ksyms.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/kernel/ksyms.c 2003-12-02 23:55:34.000000000 +0300 ++++ linux-2.4.22-vanilla/kernel/ksyms.c 2003-12-02 23:55:39.000000000 +0300 @@ -11,6 +11,7 @@ #include @@ -5371,7 +5377,7 @@ Index: linux-2.4.22-vanilla/kernel/ksyms.c /* internal kernel memory management */ EXPORT_SYMBOL(_alloc_pages); -@@ -109,6 +111,8 @@ +@@ -108,6 +110,8 @@ EXPORT_SYMBOL(kmem_cache_alloc); EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_size); @@ -5383,7 +5389,7 @@ Index: linux-2.4.22-vanilla/kernel/ksyms.c Index: linux-2.4.22-vanilla/mm/vmscan.c =================================================================== --- linux-2.4.22-vanilla.orig/mm/vmscan.c 2003-11-03 23:41:27.000000000 +0300 -+++ linux-2.4.22-vanilla/mm/vmscan.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/mm/vmscan.c 2003-12-02 23:55:39.000000000 +0300 @@ -18,6 +18,7 @@ #include #include @@ -5442,8 +5448,8 @@ Index: linux-2.4.22-vanilla/mm/vmscan.c #endif Index: linux-2.4.22-vanilla/fs/ext3/ext3-exports.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/ext3-exports.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/ext3-exports.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/ext3-exports.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/ext3-exports.c 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,13 @@ +#include +#include diff --git a/lustre/kernel_patches/series/rh-2.4.22 b/lustre/kernel_patches/series/rh-2.4.22 index a3bd2b9..a4ce33d 100644 --- a/lustre/kernel_patches/series/rh-2.4.22 +++ b/lustre/kernel_patches/series/rh-2.4.22 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.22-rh.patch dev_read_only_2.4.20-rh.patch exports_2.4.20-rh-hp.patch lustre_version.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.19-pre1 b/lustre/kernel_patches/series/vanilla-2.4.19-pre1 index b6ded90..9551189 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.19-pre1 +++ b/lustre/kernel_patches/series/vanilla-2.4.19-pre1 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.19-pre1.patch dev_read_only_2.4.20.patch exports_2.4.19-pre1.patch lustre_version.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.22 b/lustre/kernel_patches/series/vanilla-2.4.22 index d3b7123..1e91487 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.22 +++ b/lustre/kernel_patches/series/vanilla-2.4.22 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.20.patch dev_read_only_2.4.20-rh.patch exports_2.4.20-rh-hp.patch lustre_version.patch diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 3e6d5e3..3d60631 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -115,7 +115,7 @@ do { \ if (portal_cerror == 0) \ break; \ CHECK_STACK(CDEBUG_STACK); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \ + if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ (portal_debug & (mask) && \ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ @@ -283,17 +283,19 @@ do { \ #define GFP_MEMALLOC 0 #endif -#define PORTAL_ALLOC(ptr, size) \ +#define PORTAL_ALLOC_GFP(ptr, size, mask) \ do { \ LASSERT (!in_interrupt()); \ if ((size) > PORTAL_VMALLOC_SIZE) \ (ptr) = vmalloc(size); \ else \ - (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC)); \ - if ((ptr) == NULL) \ + (ptr) = kmalloc((size), (mask)); \ + if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ - else { \ + CERROR("PORTALS: %d total bytes allocated by portals\n", \ + atomic_read(&portal_kmemory)); \ + } else { \ portal_kmem_inc((ptr), (size)); \ memset((ptr), 0, (size)); \ } \ @@ -301,6 +303,12 @@ do { \ (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) +#define PORTAL_ALLOC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC)) + +#define PORTAL_ALLOC_ATOMIC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC)) + #define PORTAL_FREE(ptr, size) \ do { \ int s = (size); \ @@ -330,11 +338,13 @@ do { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ " '" #ptr "' from slab '" #slab "')\n", __FILE__, \ __LINE__); \ + CERROR("PORTALS: %d total bytes allocated by portals\n", \ + atomic_read(&portal_kmemory)); \ } else { \ portal_kmem_inc((ptr), (size)); \ memset((ptr), 0, (size)); \ } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ (int)(size), (ptr), atomic_read(&portal_kmemory)); \ } while (0) @@ -690,7 +700,10 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); /******************************************************************************/ /* Light-weight trace * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 1 +#define LWT_SUPPORT 0 + +#define LWT_MEMORY (64<<20) +#define LWT_MAX_CPUS 4 typedef struct { cycles_t lwte_when; @@ -728,7 +741,7 @@ extern void lwt_fini (void); extern int lwt_lookup_string (int *size, char *knlptr, char *usrptr, int usrsize); extern int lwt_control (int enable, int clear); -extern int lwt_snapshot (int *ncpu, int *total_size, +extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, void *user_ptr, int user_size); /* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. @@ -775,6 +788,11 @@ do { \ #endif /* __KERNEL__ */ #endif /* LWT_SUPPORT */ +struct portals_device_userstate +{ + int pdu_memhog_pages; + struct page *pdu_memhog_root_page; +}; #include @@ -1044,7 +1062,8 @@ static inline int portal_ioctl_getdata(char *buf, char *end, void *arg) #define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long) #define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long) #define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long) -#define IOC_PORTAL_MAX_NR 41 +#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long) +#define IOC_PORTAL_MAX_NR 42 enum { QSWNAL = 1, diff --git a/lustre/portals/include/portals/lib-p30.h b/lustre/portals/include/portals/lib-p30.h index 55fd720..c402828 100644 --- a/lustre/portals/include/portals/lib-p30.h +++ b/lustre/portals/include/portals/lib-p30.h @@ -168,7 +168,8 @@ static inline lib_eq_t * lib_eq_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); + lib_eq_t *eq; + PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq)); if (eq == NULL) return (NULL); @@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) { /* ALWAYS called with statelock held */ atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); + PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq)); } static inline lib_md_t * lib_md_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); + lib_md_t *md; + PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md)); if (md == NULL) return (NULL); @@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md) { /* ALWAYS called with statelock held */ atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); + PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md)); } static inline lib_me_t * lib_me_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); + lib_me_t *me; + PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me)); if (me == NULL) return (NULL); @@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me) { /* ALWAYS called with statelock held */ atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); + PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me)); } static inline lib_msg_t * lib_msg_alloc(nal_cb_t *nal) { /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); + lib_msg_t *msg; + PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg)); if (msg == NULL) return (NULL); @@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) { /* ALWAYS called with statelock held */ atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); + PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg)); } #endif diff --git a/lustre/portals/include/portals/ptlctl.h b/lustre/portals/include/portals/ptlctl.h index f581e72..12ef47a 100644 --- a/lustre/portals/include/portals/ptlctl.h +++ b/lustre/portals/include/portals/ptlctl.h @@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv); int jt_ptl_print_routes (int argc, char **argv); int jt_ptl_fail_nid (int argc, char **argv); int jt_ptl_lwt(int argc, char **argv); +int jt_ptl_memhog(int argc, char **argv); int dbg_initialize(int argc, char **argv); int jt_dbg_filter(int argc, char **argv); diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index 6f6fa7e..6de511c 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -1395,30 +1395,35 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) } void -ksocknal_free_buffers (void) +ksocknal_free_fmbs (ksock_fmb_pool_t *p) { - if (ksocknal_data.ksnd_fmbs != NULL) { - ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; - i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); - i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ksocknal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); + ksock_fmb_t *fmb; + int i; + + LASSERT (list_empty(&p->fmp_blocked_conns)); + LASSERT (p->fmp_nactive_fmbs == 0); + + while (!list_empty(&p->fmp_idle_fmbs)) { + + fmb = list_entry(p->fmp_idle_fmbs.next, + ksock_fmb_t, fmb_list); + + for (i = 0; i < fmb->fmb_npages; i++) + if (fmb->fmb_pages[i] != NULL) + __free_page(fmb->fmb_pages[i]); + + list_del(&fmb->fmb_list); + PORTAL_FREE(fmb, sizeof(*fmb)); } +} + +void +ksocknal_free_buffers (void) +{ + ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp); + ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp); - LASSERT (ksocknal_data.ksnd_active_ltxs == 0); - if (ksocknal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ksocknal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + - SOCKNAL_NNBLK_LTXS)); + LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0); if (ksocknal_data.ksnd_schedulers != NULL) PORTAL_FREE (ksocknal_data.ksnd_schedulers, @@ -1572,7 +1577,7 @@ ksocknal_module_init (void) PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); if (ksocknal_data.ksnd_peers == NULL) - RETURN (-ENOMEM); + return (-ENOMEM); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); @@ -1590,11 +1595,6 @@ ksocknal_module_init (void) INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); - spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq); - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); @@ -1614,7 +1614,7 @@ ksocknal_module_init (void) sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { ksocknal_module_fini (); - RETURN(-ENOMEM); + return (-ENOMEM); } for (i = 0; i < SOCKNAL_N_SCHED; i++) { @@ -1629,35 +1629,11 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t), - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - PORTAL_ALLOC(ksocknal_data.ksnd_ltxs, - sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS)); - if (ksocknal_data.ksnd_ltxs == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ksocknal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i]; - - ltx->ltx_tx.tx_hdr = <x->ltx_hdr; - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ksocknal_data.ksnd_idle_ltx_list : - &ksocknal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); if (rc != 0) { CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } PtlNIDebug(ksocknal_ni, ~0); @@ -1670,7 +1646,7 @@ ksocknal_module_init (void) CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1679,7 +1655,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1687,7 +1663,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } rc = kpr_register(&ksocknal_data.ksnd_router, @@ -1698,23 +1674,15 @@ ksocknal_module_init (void) } else { /* Only allocate forwarding buffers if I'm on a gateway */ - PORTAL_ALLOC(ksocknal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - if (ksocknal_data.ksnd_fmbs == NULL) { - ksocknal_module_fini (); - RETURN(-ENOMEM); - } - - /* NULL out buffer pointers etc */ - memset(ksocknal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb = - &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i]; + ksock_fmb_t *fmb; + + PORTAL_ALLOC(fmb, sizeof(*fmb)); + if (fmb == NULL) { + ksocknal_module_fini(); + return (-ENOMEM); + } if (i < SOCKNAL_SMALL_FWD_NMSGS) { fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; @@ -1724,7 +1692,6 @@ ksocknal_module_init (void) fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; } - LASSERT (fmb->fmb_npages > 0); for (j = 0; j < fmb->fmb_npages; j++) { fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); @@ -1733,8 +1700,7 @@ ksocknal_module_init (void) return (-ENOMEM); } - LASSERT(page_address (fmb->fmb_pages[j]) != - NULL); + LASSERT(page_address(fmb->fmb_pages[j]) != NULL); } list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index 227a24f..9dbe415 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -82,9 +82,6 @@ #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - #define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ #define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ @@ -113,8 +110,9 @@ typedef struct /* pool of forwarding buffers */ { spinlock_t fmp_lock; /* serialise */ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ + struct list_head fmp_idle_fmbs; /* free buffers */ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ + int fmp_nactive_fmbs; /* # buffers in use */ } ksock_fmb_pool_t; @@ -164,16 +162,10 @@ typedef struct { kpr_router_t ksnd_router; /* THE router */ - void *ksnd_fmbs; /* all the pre-allocated FMBs */ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - int ksnd_active_ltxs; /* #active ltxs */ + atomic_t ksnd_nactive_ltxs; /* #active ltxs */ struct list_head ksnd_deathrow_conns; /* conns to be closed */ struct list_head ksnd_zombie_conns; /* conns to be freed */ @@ -233,25 +225,15 @@ typedef struct /* transmit packet */ #define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) /* network zero copy callback descriptor embedded in ksock_tx_t */ -/* space for the tx frag descriptors: hdr is always 1 iovec - * and payload is PTL_MD_MAX of either type. */ -typedef struct -{ - struct iovec hdr; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } payload; -} ksock_txiovspace_t; - typedef struct /* locally transmitted packet */ { ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ void *ltx_private; /* lib_finalize() callback arg */ void *ltx_cookie; /* lib_finalize() callback arg */ - ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */ ptl_hdr_t ltx_hdr; /* buffer for packet header */ + int ltx_desc_size; /* bytes allocated for this desc */ + struct iovec ltx_iov[1]; /* iov for hdr + payload */ + ptl_kiov_t ltx_kiov[0]; /* kiov for payload */ } ksock_ltx_t; #define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c index 6ea4fa8..22345fe 100644 --- a/lustre/portals/knals/socknal/socknal_cb.c +++ b/lustre/portals/knals/socknal/socknal_cb.c @@ -129,60 +129,11 @@ ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) return 0; } -ksock_ltx_t * -ksocknal_get_ltx (int may_block) -{ - unsigned long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) { - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - break; - } - - if (!may_block) { - if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - } - break; - } - - spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock, - flags); - - wait_event (ksocknal_data.ksnd_idle_ltx_waitq, - !list_empty (&ksocknal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - return (ltx); -} - void -ksocknal_put_ltx (ksock_ltx_t *ltx) +ksocknal_free_ltx (ksock_ltx_t *ltx) { - unsigned long flags; - - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - ksocknal_data.ksnd_active_ltxs--; - list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list) - wake_up (&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); + atomic_dec(&ksocknal_data.ksnd_nactive_ltxs); + PORTAL_FREE(ltx, ltx->ltx_desc_size); } #if SOCKNAL_ZC @@ -364,7 +315,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) } int -ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { /* Return 0 on success, < 0 on error. * caller checks tx_resid to determine progress/completion */ @@ -377,17 +328,14 @@ ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) } rc = ksocknal_getconnsock (conn); - if (rc != 0) + if (rc != 0) { + LASSERT (conn->ksnc_closing); return (rc); + } for (;;) { LASSERT (tx->tx_resid != 0); - if (conn->ksnc_closing) { - rc = -ESHUTDOWN; - break; - } - if (tx->tx_niov != 0) rc = ksocknal_send_iov (conn, tx); else @@ -554,7 +502,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) } int -ksocknal_recvmsg (ksock_conn_t *conn) +ksocknal_receive (ksock_conn_t *conn) { /* Return 1 on success, 0 on EOF, < 0 on error. * Caller checks ksnc_rx_nob_wanted to determine @@ -568,15 +516,12 @@ ksocknal_recvmsg (ksock_conn_t *conn) } rc = ksocknal_getconnsock (conn); - if (rc != 0) + if (rc != 0) { + LASSERT (conn->ksnc_closing); return (rc); + } for (;;) { - if (conn->ksnc_closing) { - rc = -ESHUTDOWN; - break; - } - if (conn->ksnc_rx_niov != 0) rc = ksocknal_recv_iov (conn); else @@ -665,7 +610,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch) lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx (ltx); EXIT; } @@ -696,7 +641,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { int rc; - rc = ksocknal_sendmsg (conn, tx); + rc = ksocknal_transmit (conn, tx); CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); LASSERT (rc != -EAGAIN); @@ -840,13 +785,17 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) unsigned long flags; ksock_sched_t *sched = conn->ksnc_scheduler; - /* called holding global lock (read or irq-write) */ - + /* called holding global lock (read or irq-write) and caller may + * not have dropped this lock between finding conn and calling me, + * so we don't need the {get,put}connsock dance to deref + * ksnc_sock... */ + LASSERT(!conn->ksnc_closing); + LASSERT(tx->tx_resid == tx->tx_nob); + CDEBUG (D_NET, "Sending to "LPX64" on port %d\n", conn->ksnc_peer->ksnp_nid, conn->ksnc_port); atomic_add (tx->tx_nob, &conn->ksnc_tx_nob); - tx->tx_resid = tx->tx_nob; tx->tx_conn = conn; #if SOCKNAL_ZC @@ -854,7 +803,6 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) /* NB this sets 1 ref on zccd, so the callback can only occur after * I've released this ref. */ #endif - spin_lock_irqsave (&sched->kss_lock, flags); conn->ksnc_tx_deadline = jiffies + @@ -960,6 +908,8 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) tx->tx_nob, tx->tx_niov, tx->tx_nkiov); tx->tx_conn = NULL; /* only set when assigned a conn */ + tx->tx_resid = tx->tx_nob; + tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base; g_lock = &ksocknal_data.ksnd_global_lock; read_lock (g_lock); @@ -1024,115 +974,125 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) return (-EHOSTUNREACH); } -ksock_ltx_t * -ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type) +int +ksocknal_sendmsg(nal_cb_t *nal, + void *private, + lib_msg_t *cookie, + ptl_hdr_t *hdr, + int type, + ptl_nid_t nid, + ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + ptl_kiov_t *payload_kiov, + size_t payload_nob) { ksock_ltx_t *ltx; + int desc_size; + int rc; + + /* NB 'private' is different depending on what we're sending. + * Just ignore it... */ + + CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64 + " pid %d\n", payload_nob, payload_niov, nid , pid); - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); + LASSERT (payload_nob == 0 || payload_niov > 0); + LASSERT (payload_niov <= PTL_MD_MAX_IOV); + + /* It must be OK to kmap() if required */ + LASSERT (payload_kiov == NULL || !in_interrupt ()); + /* payload is either all vaddrs or all pages */ + LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); + + if (payload_iov != NULL) + desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]); + else + desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]); + + if (in_interrupt() || + type == PTL_MSG_ACK || + type == PTL_MSG_REPLY) { + /* Can't block if in interrupt or responding to an incoming + * message */ + PORTAL_ALLOC_ATOMIC(ltx, desc_size); + } else { + PORTAL_ALLOC(ltx, desc_size); + } + if (ltx == NULL) { - CERROR ("Can't allocate tx desc\n"); - return (NULL); + CERROR("Can't allocate tx desc type %d size %d %s\n", + type, desc_size, in_interrupt() ? "(intr)" : ""); + return (PTL_NOSPACE); } - /* Init local send packet (storage for hdr, finalize() args) */ + atomic_inc(&ksocknal_data.ksnd_nactive_ltxs); + + ltx->ltx_desc_size = desc_size; + + /* We always have 1 mapped frag for the header */ + ltx->ltx_tx.tx_iov = ltx->ltx_iov; + ltx->ltx_iov[0].iov_base = <x->ltx_hdr; + ltx->ltx_iov[0].iov_len = sizeof(*hdr); ltx->ltx_hdr = *hdr; + ltx->ltx_private = private; ltx->ltx_cookie = cookie; - /* Init common ltx_tx */ ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr); - - /* We always have 1 mapped frag for the header */ - ltx->ltx_tx.tx_niov = 1; - ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr; - ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - ltx->ltx_tx.tx_kiov = NULL; - ltx->ltx_tx.tx_nkiov = 0; + ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob; - return (ltx); -} - -int -ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_len) -{ - ksock_ltx_t *ltx; - int rc; + if (payload_iov != NULL) { + /* payload is all mapped */ + ltx->ltx_tx.tx_kiov = NULL; + ltx->ltx_tx.tx_nkiov = 0; - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it - */ + ltx->ltx_tx.tx_niov = 1 + payload_niov; - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64 - " pid %d\n", payload_len, payload_niov, nid, pid); + memcpy(ltx->ltx_iov + 1, payload_iov, + payload_niov * sizeof (*payload_iov)); - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); + } else { + /* payload is all pages */ + ltx->ltx_tx.tx_kiov = ltx->ltx_kiov; + ltx->ltx_tx.tx_nkiov = payload_niov; - /* append the payload_iovs to the one pointing at the header */ - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); + ltx->ltx_tx.tx_niov = 1; - memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; + memcpy(ltx->ltx_kiov, payload_kiov, + payload_niov * sizeof (*payload_kiov)); + } - rc = ksocknal_launch_packet (<x->ltx_tx, nid); + rc = ksocknal_launch_packet(<x->ltx_tx, nid); if (rc == 0) return (PTL_OK); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx(ltx); return (PTL_FAIL); } int +ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_len) +{ + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, payload_iov, NULL, + payload_len)); +} + +int ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len) + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_len) { - ksock_ltx_t *ltx; - int rc; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n", - payload_len, payload_niov, nid, pid); - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); - - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov; - memcpy (ltx->ltx_tx.tx_kiov, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_nkiov = payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc == 0) - return (PTL_OK); - - ksocknal_put_ltx (ltx); - return (PTL_FAIL); + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, NULL, payload_kiov, + payload_len)); } void @@ -1155,7 +1115,6 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) tx->tx_iov = fwd->kprfd_iov; tx->tx_nkiov = 0; tx->tx_kiov = NULL; - tx->tx_hdr = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base; rc = ksocknal_launch_packet (tx, nid); if (rc != 0) @@ -1204,6 +1163,7 @@ ksocknal_fmb_callback (void *arg, int error) spin_lock_irqsave (&fmp->fmp_lock, flags); list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); + fmp->fmp_nactive_fmbs--; if (!list_empty (&fmp->fmp_blocked_conns)) { conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, @@ -1242,7 +1202,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) ksock_fmb_t *fmb; LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (ksocknal_data.ksnd_fmbs != NULL); + LASSERT (kpr_routing(&ksocknal_data.ksnd_router)); if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) pool = &ksocknal_data.ksnd_small_fmp; @@ -1255,6 +1215,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) fmb = list_entry(pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); list_del (&fmb->fmb_list); + pool->fmp_nactive_fmbs++; spin_unlock_irqrestore (&pool->fmp_lock, flags); return (fmb); @@ -1397,7 +1358,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) return; } - if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */ + if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */ CERROR("dropping packet from "LPX64" (%s) for "LPX64 " (%s): not forwarding\n", src_nid, portals_nid2str(TCPNAL, src_nid, str), @@ -1525,9 +1486,11 @@ ksocknal_process_receive (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_nob_wanted > 0); - rc = ksocknal_recvmsg(conn); + rc = ksocknal_receive(conn); if (rc <= 0) { + LASSERT (rc != -EAGAIN); + if (rc == 0) CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n", conn, conn->ksnc_peer->ksnp_nid, @@ -1766,9 +1729,9 @@ int ksocknal_scheduler (void *arg) * kss_lock. */ conn->ksnc_tx_ready = 0; spin_unlock_irqrestore (&sched->kss_lock, flags); - + rc = ksocknal_process_transmit(conn, tx); - + spin_lock_irqsave (&sched->kss_lock, flags); if (rc != -EAGAIN) { @@ -1851,7 +1814,7 @@ ksocknal_data_ready (struct sock *sk, int n) read_lock (&ksocknal_data.ksnd_global_lock); conn = sk->sk_user_data; - if (conn == NULL) { /* raced with ksocknal_close_sock */ + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ LASSERT (sk->sk_data_ready != &ksocknal_data_ready); sk->sk_data_ready (sk, n); } else { @@ -1900,7 +1863,7 @@ ksocknal_write_space (struct sock *sk) (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued")); - if (conn == NULL) { /* raced with ksocknal_close_sock */ + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ LASSERT (sk->sk_write_space != &ksocknal_write_space); sk->sk_write_space (sk); @@ -2136,7 +2099,7 @@ ksocknal_setup_sock (struct socket *sock) int option; struct linger linger; - sock->sk->allocation = GFP_NOFS; + sock->sk->allocation = GFP_MEMALLOC; /* Ensure this socket aborts active sends immediately when we close * it. */ @@ -2421,6 +2384,8 @@ ksocknal_autoconnectd (void *arg) kportal_daemonize (name); kportal_blockallsigs (); + current->flags |= PF_MEMALLOC; + spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { @@ -2548,6 +2513,8 @@ ksocknal_reaper (void *arg) init_waitqueue_entry (&wait, current); + current->flags |= PF_MEMALLOC; + spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c index 7c0cafc..55e1935 100644 --- a/lustre/portals/libcfs/module.c +++ b/lustre/portals/libcfs/module.c @@ -83,6 +83,115 @@ kportal_daemonize (char *str) } void +kportal_memhog_free (struct portals_device_userstate *pdu) +{ + struct page **level0p = &pdu->pdu_memhog_root_page; + struct page **level1p; + struct page **level2p; + int count1; + int count2; + + if (*level0p != NULL) { + + level1p = (struct page **)page_address(*level0p); + count1 = 0; + + while (count1 < PAGE_SIZE/sizeof(struct page *) && + *level1p != NULL) { + + level2p = (struct page **)page_address(*level1p); + count2 = 0; + + while (count2 < PAGE_SIZE/sizeof(struct page *) && + *level2p != NULL) { + + __free_page(*level2p); + pdu->pdu_memhog_pages--; + level2p++; + count2++; + } + + __free_page(*level1p); + pdu->pdu_memhog_pages--; + level1p++; + count1++; + } + + __free_page(*level0p); + pdu->pdu_memhog_pages--; + + *level0p = NULL; + } + + LASSERT (pdu->pdu_memhog_pages == 0); +} + +int +kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags) +{ + struct page **level0p; + struct page **level1p; + struct page **level2p; + int count1; + int count2; + + LASSERT (pdu->pdu_memhog_pages == 0); + LASSERT (pdu->pdu_memhog_root_page == NULL); + + if (npages < 0) + return -EINVAL; + + if (npages == 0) + return 0; + + level0p = &pdu->pdu_memhog_root_page; + *level0p = alloc_page(flags); + if (*level0p == NULL) + return -ENOMEM; + pdu->pdu_memhog_pages++; + + level1p = (struct page **)page_address(*level0p); + count1 = 0; + memset(level1p, 0, PAGE_SIZE); + + while (pdu->pdu_memhog_pages < npages && + count1 < PAGE_SIZE/sizeof(struct page *)) { + + if (signal_pending(current)) + return (-EINTR); + + *level1p = alloc_page(flags); + if (*level1p == NULL) + return -ENOMEM; + pdu->pdu_memhog_pages++; + + level2p = (struct page **)page_address(*level1p); + count2 = 0; + memset(level2p, 0, PAGE_SIZE); + + while (pdu->pdu_memhog_pages < npages && + count2 < PAGE_SIZE/sizeof(struct page *)) { + + if (signal_pending(current)) + return (-EINTR); + + *level2p = alloc_page(flags); + if (*level2p == NULL) + return (-ENOMEM); + pdu->pdu_memhog_pages++; + + level2p++; + count2++; + } + + level1p++; + count1++; + } + + return 0; +} + +void kportal_blockallsigs () { unsigned long flags; @@ -96,22 +205,39 @@ kportal_blockallsigs () /* called when opening /dev/device */ static int kportal_psdev_open(struct inode * inode, struct file * file) { + struct portals_device_userstate *pdu; ENTRY; - + if (!inode) RETURN(-EINVAL); + PORTAL_MODULE_USE; + + PORTAL_ALLOC(pdu, sizeof(*pdu)); + if (pdu != NULL) { + pdu->pdu_memhog_pages = 0; + pdu->pdu_memhog_root_page = NULL; + } + file->private_data = pdu; + RETURN(0); } /* called when closing /dev/device */ static int kportal_psdev_release(struct inode * inode, struct file * file) { + struct portals_device_userstate *pdu; ENTRY; if (!inode) RETURN(-EINVAL); + pdu = file->private_data; + if (pdu != NULL) { + kportal_memhog_free(pdu); + PORTAL_FREE(pdu, sizeof(*pdu)); + } + PORTAL_MODULE_UNUSE; RETURN(0); } @@ -514,7 +640,8 @@ static int kportal_ioctl(struct inode *inode, struct file *file, break; case IOC_PORTAL_LWT_SNAPSHOT: - err = lwt_snapshot (&data->ioc_count, &data->ioc_misc, + err = lwt_snapshot (&data->ioc_nid, + &data->ioc_count, &data->ioc_misc, data->ioc_pbuf1, data->ioc_plen1); if (err == 0 && copy_to_user((char *)arg, data, sizeof (*data))) @@ -528,7 +655,22 @@ static int kportal_ioctl(struct inode *inode, struct file *file, copy_to_user((char *)arg, data, sizeof (*data))) err = -EFAULT; break; -#endif +#endif + case IOC_PORTAL_MEMHOG: + if (!capable (CAP_SYS_ADMIN)) + err = -EPERM; + else if (file->private_data == NULL) { + err = -EINVAL; + } else { + kportal_memhog_free(file->private_data); + err = kportal_memhog_alloc(file->private_data, + data->ioc_count, + data->ioc_flags); + if (err != 0) + kportal_memhog_free(file->private_data); + } + break; + default: err = -EINVAL; break; @@ -612,8 +754,8 @@ static int init_kportals_module(void) cleanup_lwt: #if LWT_SUPPORT lwt_fini(); -#endif cleanup_debug: +#endif portals_debug_cleanup(); return rc; } diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index f191664..2326fed 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -1389,7 +1389,8 @@ lwt_control(int enable, int clear) } static int -lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size) +lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, + lwt_event_t *events, int size) { struct portal_ioctl_data data; int rc; @@ -1408,6 +1409,9 @@ lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size) LASSERT (data.ioc_count != 0); LASSERT (data.ioc_misc != 0); + if (now != NULL) + *now = data.ioc_nid; + if (ncpu != NULL) *ncpu = data.ioc_count; @@ -1517,14 +1521,13 @@ get_cycles_per_usec () int jt_ptl_lwt(int argc, char **argv) { -#define MAX_CPUS 8 int ncpus; int totalspace; int nevents_per_cpu; lwt_event_t *events; - lwt_event_t *cpu_event[MAX_CPUS + 1]; - lwt_event_t *next_event[MAX_CPUS]; - lwt_event_t *first_event[MAX_CPUS]; + lwt_event_t *cpu_event[LWT_MAX_CPUS + 1]; + lwt_event_t *next_event[LWT_MAX_CPUS]; + lwt_event_t *first_event[LWT_MAX_CPUS]; int cpu; lwt_event_t *e; int rc; @@ -1532,6 +1535,9 @@ jt_ptl_lwt(int argc, char **argv) double mhz; cycles_t t0; cycles_t tlast; + cycles_t tnow; + struct timeval tvnow; + int printed_date = 0; FILE *f = stdout; if (argc < 2 || @@ -1559,11 +1565,12 @@ jt_ptl_lwt(int argc, char **argv) return (0); } - if (lwt_snapshot(&ncpus, &totalspace, NULL, 0) != 0) + if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0) return (-1); - if (ncpus > MAX_CPUS) { - fprintf(stderr, "Too many cpus: %d (%d)\n", ncpus, MAX_CPUS); + if (ncpus > LWT_MAX_CPUS) { + fprintf(stderr, "Too many cpus: %d (%d)\n", + ncpus, LWT_MAX_CPUS); return (-1); } @@ -1578,11 +1585,14 @@ jt_ptl_lwt(int argc, char **argv) return (-1); } - if (lwt_snapshot(NULL, NULL, events, totalspace)) { + if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) { free(events); return (-1); } + /* we want this time to be sampled at snapshot time */ + gettimeofday(&tvnow, NULL); + if (argc > 2) { f = fopen (argv[2], "w"); if (f == NULL) { @@ -1663,6 +1673,17 @@ jt_ptl_lwt(int argc, char **argv) if (t0 <= next_event[cpu]->lwte_when) { /* on or after the first event */ + if (!printed_date) { + cycles_t du = (tnow - t0) / mhz; + time_t then = tvnow.tv_sec - du/1000000; + + if (du % 1000000 > tvnow.tv_usec) + then--; + + fprintf(f, "%s", ctime(&then)); + printed_date = 1; + } + rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]); if (rc != 0) break; @@ -1684,5 +1705,48 @@ jt_ptl_lwt(int argc, char **argv) free(events); return (0); -#undef MAX_CPUS } + +int jt_ptl_memhog(int argc, char **argv) +{ + static int gfp = 0; /* sticky! */ + + struct portal_ioctl_data data; + int rc; + int count; + char *end; + + if (argc < 2) { + fprintf(stderr, "usage: %s []\n", argv[0]); + return 0; + } + + count = strtol(argv[1], &end, 0); + if (count < 0 || *end != 0) { + fprintf(stderr, "Can't parse page count '%s'\n", argv[1]); + return -1; + } + + if (argc >= 3) { + rc = strtol(argv[2], &end, 0); + if (*end != 0) { + fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]); + return -1; + } + gfp = rc; + } + + PORTAL_IOC_INIT(data); + data.ioc_count = count; + data.ioc_flags = gfp; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data); + + if (rc != 0) { + fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno)); + return -1; + } + + printf("memhog %d OK\n", count); + return 0; +} + -- 1.8.3.1