From 6703c0ee3391c8940445eb0f3421ffdf8d44b78e Mon Sep 17 00:00:00 2001 From: phil Date: Wed, 3 Dec 2003 05:12:52 +0000 Subject: [PATCH] land 1.0.1 fixes on main development branch (head) --- lnet/include/cygwin-ioctl.h | 2 +- lnet/include/linux/kp30.h | 37 ++- lnet/include/lnet/lib-lnet.h | 20 +- lnet/include/lnet/lib-p30.h | 20 +- lnet/include/lnet/lnetctl.h | 1 + lnet/include/lnet/ptlctl.h | 1 + lnet/klnds/socklnd/socklnd.c | 114 +++----- lnet/klnds/socklnd/socklnd.h | 30 +- lnet/klnds/socklnd/socklnd_cb.c | 291 +++++++++---------- lnet/libcfs/debug.c | 6 +- lnet/libcfs/lwt.c | 9 +- lnet/libcfs/module.c | 150 +++++++++- lnet/utils/portals.c | 84 +++++- lustre/ChangeLog | 21 ++ lustre/autogen.sh | 2 +- lustre/doc/lfs.lyx | 82 +++++- lustre/include/linux/lustre_net.h | 7 +- lustre/include/linux/obd_support.h | 44 +-- .../kernel_patches/patches/2.6.0-test6-mm4.patch | 46 +-- lustre/kernel_patches/patches/bproc-patch-2.4.20 | 6 +- .../configurable-x86-stack-2.4.19-pre1.patch | 302 +++++++++++++++++++ .../patches/configurable-x86-stack-2.4.20-rh.patch | 311 ++++++++++++++++++++ .../patches/configurable-x86-stack-2.4.20.patch | 318 +++++++++++++++++++++ .../patches/configurable-x86-stack-2.4.22-rh.patch | 311 ++++++++++++++++++++ .../patches/gfp_memalloc-2.4.18-chaos.patch | 23 +- .../patches/gfp_memalloc-2.4.20-rh.patch | 11 + .../patches/gfp_memalloc-2.4.22.patch | 32 +-- .../patches/linux-2.4.22-xattr-0.8.54.patch | 160 ++++++----- lustre/kernel_patches/series/hp-pnnl-2.4.20 | 1 + lustre/kernel_patches/series/rh-2.4.20 | 1 + lustre/kernel_patches/series/rh-2.4.22 | 1 + lustre/kernel_patches/series/suse-2.4.21 | 1 + lustre/kernel_patches/series/vanilla-2.4.19-pre1 | 1 + lustre/kernel_patches/series/vanilla-2.4.20 | 4 +- lustre/kernel_patches/series/vanilla-2.4.22 | 1 + lustre/ldlm/ldlm_internal.h | 4 +- lustre/ldlm/ldlm_lib.c | 6 +- lustre/lvfs/fsfilt_ext3.c | 144 +++++----- lustre/mds/handler.c | 4 +- lustre/obdfilter/filter.c | 35 +-- lustre/ost/ost_handler.c | 4 - lustre/portals/include/cygwin-ioctl.h | 2 +- lustre/portals/include/linux/kp30.h | 37 ++- lustre/portals/include/portals/lib-p30.h | 20 +- lustre/portals/include/portals/ptlctl.h | 1 + lustre/portals/knals/socknal/socknal.c | 114 +++----- lustre/portals/knals/socknal/socknal.h | 30 +- lustre/portals/knals/socknal/socknal_cb.c | 291 +++++++++---------- lustre/portals/libcfs/debug.c | 6 +- lustre/portals/libcfs/lwt.c | 9 +- lustre/portals/libcfs/module.c | 150 +++++++++- lustre/portals/utils/portals.c | 84 +++++- lustre/ptlrpc/client.c | 20 +- lustre/ptlrpc/events.c | 8 +- lustre/ptlrpc/lproc_ptlrpc.c | 16 +- lustre/ptlrpc/niobuf.c | 4 +- lustre/ptlrpc/recover.c | 2 +- lustre/ptlrpc/service.c | 13 +- lustre/utils/lctl.c | 3 + lustre/utils/lmc | 4 +- 60 files changed, 2571 insertions(+), 891 deletions(-) create mode 100644 lustre/kernel_patches/patches/configurable-x86-stack-2.4.19-pre1.patch create mode 100644 lustre/kernel_patches/patches/configurable-x86-stack-2.4.20-rh.patch create mode 100644 lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch create mode 100644 lustre/kernel_patches/patches/configurable-x86-stack-2.4.22-rh.patch diff --git a/lnet/include/cygwin-ioctl.h b/lnet/include/cygwin-ioctl.h index 3ecefff..900f0a4 100644 --- a/lnet/include/cygwin-ioctl.h +++ b/lnet/include/cygwin-ioctl.h @@ -1,4 +1,4 @@ -/* $Id: cygwin-ioctl.h,v 1.2 2003/12/03 03:14:43 phil Exp $ +/* $Id: cygwin-ioctl.h,v 1.3 2003/12/03 05:12:41 phil Exp $ * * linux/ioctl.h for Linux by H.H. Bergman. */ diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 3e6d5e3..3d60631 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -115,7 +115,7 @@ do { \ if (portal_cerror == 0) \ break; \ CHECK_STACK(CDEBUG_STACK); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \ + if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ (portal_debug & (mask) && \ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ @@ -283,17 +283,19 @@ do { \ #define GFP_MEMALLOC 0 #endif -#define PORTAL_ALLOC(ptr, size) \ +#define PORTAL_ALLOC_GFP(ptr, size, mask) \ do { \ LASSERT (!in_interrupt()); \ if ((size) > PORTAL_VMALLOC_SIZE) \ (ptr) = vmalloc(size); \ else \ - (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC)); \ - if ((ptr) == NULL) \ + (ptr) = kmalloc((size), (mask)); \ + if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ - else { \ + CERROR("PORTALS: %d total bytes allocated by portals\n", \ + atomic_read(&portal_kmemory)); \ + } else { \ portal_kmem_inc((ptr), (size)); \ memset((ptr), 0, (size)); \ } \ @@ -301,6 +303,12 @@ do { \ (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) +#define PORTAL_ALLOC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC)) + +#define PORTAL_ALLOC_ATOMIC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC)) + #define PORTAL_FREE(ptr, size) \ do { \ int s = (size); \ @@ -330,11 +338,13 @@ do { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ " '" #ptr "' from slab '" #slab "')\n", __FILE__, \ __LINE__); \ + CERROR("PORTALS: %d total bytes allocated by portals\n", \ + atomic_read(&portal_kmemory)); \ } else { \ portal_kmem_inc((ptr), (size)); \ memset((ptr), 0, (size)); \ } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ (int)(size), (ptr), atomic_read(&portal_kmemory)); \ } while (0) @@ -690,7 +700,10 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); /******************************************************************************/ /* Light-weight trace * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 1 +#define LWT_SUPPORT 0 + +#define LWT_MEMORY (64<<20) +#define LWT_MAX_CPUS 4 typedef struct { cycles_t lwte_when; @@ -728,7 +741,7 @@ extern void lwt_fini (void); extern int lwt_lookup_string (int *size, char *knlptr, char *usrptr, int usrsize); extern int lwt_control (int enable, int clear); -extern int lwt_snapshot (int *ncpu, int *total_size, +extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, void *user_ptr, int user_size); /* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. @@ -775,6 +788,11 @@ do { \ #endif /* __KERNEL__ */ #endif /* LWT_SUPPORT */ +struct portals_device_userstate +{ + int pdu_memhog_pages; + struct page *pdu_memhog_root_page; +}; #include @@ -1044,7 +1062,8 @@ static inline int portal_ioctl_getdata(char *buf, char *end, void *arg) #define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long) #define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long) #define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long) -#define IOC_PORTAL_MAX_NR 41 +#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long) +#define IOC_PORTAL_MAX_NR 42 enum { QSWNAL = 1, diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 55fd720..c402828 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -168,7 +168,8 @@ static inline lib_eq_t * lib_eq_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); + lib_eq_t *eq; + PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq)); if (eq == NULL) return (NULL); @@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) { /* ALWAYS called with statelock held */ atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); + PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq)); } static inline lib_md_t * lib_md_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); + lib_md_t *md; + PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md)); if (md == NULL) return (NULL); @@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md) { /* ALWAYS called with statelock held */ atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); + PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md)); } static inline lib_me_t * lib_me_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); + lib_me_t *me; + PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me)); if (me == NULL) return (NULL); @@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me) { /* ALWAYS called with statelock held */ atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); + PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me)); } static inline lib_msg_t * lib_msg_alloc(nal_cb_t *nal) { /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); + lib_msg_t *msg; + PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg)); if (msg == NULL) return (NULL); @@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) { /* ALWAYS called with statelock held */ atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); + PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg)); } #endif diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index 55fd720..c402828 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -168,7 +168,8 @@ static inline lib_eq_t * lib_eq_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); + lib_eq_t *eq; + PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq)); if (eq == NULL) return (NULL); @@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) { /* ALWAYS called with statelock held */ atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); + PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq)); } static inline lib_md_t * lib_md_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); + lib_md_t *md; + PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md)); if (md == NULL) return (NULL); @@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md) { /* ALWAYS called with statelock held */ atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); + PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md)); } static inline lib_me_t * lib_me_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); + lib_me_t *me; + PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me)); if (me == NULL) return (NULL); @@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me) { /* ALWAYS called with statelock held */ atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); + PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me)); } static inline lib_msg_t * lib_msg_alloc(nal_cb_t *nal) { /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); + lib_msg_t *msg; + PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg)); if (msg == NULL) return (NULL); @@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) { /* ALWAYS called with statelock held */ atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); + PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg)); } #endif diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index f581e72..12ef47a 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv); int jt_ptl_print_routes (int argc, char **argv); int jt_ptl_fail_nid (int argc, char **argv); int jt_ptl_lwt(int argc, char **argv); +int jt_ptl_memhog(int argc, char **argv); int dbg_initialize(int argc, char **argv); int jt_dbg_filter(int argc, char **argv); diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index f581e72..12ef47a 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv); int jt_ptl_print_routes (int argc, char **argv); int jt_ptl_fail_nid (int argc, char **argv); int jt_ptl_lwt(int argc, char **argv); +int jt_ptl_memhog(int argc, char **argv); int dbg_initialize(int argc, char **argv); int jt_dbg_filter(int argc, char **argv); diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 6f6fa7e..6de511c 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1395,30 +1395,35 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) } void -ksocknal_free_buffers (void) +ksocknal_free_fmbs (ksock_fmb_pool_t *p) { - if (ksocknal_data.ksnd_fmbs != NULL) { - ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; - i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); - i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ksocknal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); + ksock_fmb_t *fmb; + int i; + + LASSERT (list_empty(&p->fmp_blocked_conns)); + LASSERT (p->fmp_nactive_fmbs == 0); + + while (!list_empty(&p->fmp_idle_fmbs)) { + + fmb = list_entry(p->fmp_idle_fmbs.next, + ksock_fmb_t, fmb_list); + + for (i = 0; i < fmb->fmb_npages; i++) + if (fmb->fmb_pages[i] != NULL) + __free_page(fmb->fmb_pages[i]); + + list_del(&fmb->fmb_list); + PORTAL_FREE(fmb, sizeof(*fmb)); } +} + +void +ksocknal_free_buffers (void) +{ + ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp); + ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp); - LASSERT (ksocknal_data.ksnd_active_ltxs == 0); - if (ksocknal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ksocknal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + - SOCKNAL_NNBLK_LTXS)); + LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0); if (ksocknal_data.ksnd_schedulers != NULL) PORTAL_FREE (ksocknal_data.ksnd_schedulers, @@ -1572,7 +1577,7 @@ ksocknal_module_init (void) PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); if (ksocknal_data.ksnd_peers == NULL) - RETURN (-ENOMEM); + return (-ENOMEM); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); @@ -1590,11 +1595,6 @@ ksocknal_module_init (void) INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); - spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq); - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); @@ -1614,7 +1614,7 @@ ksocknal_module_init (void) sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { ksocknal_module_fini (); - RETURN(-ENOMEM); + return (-ENOMEM); } for (i = 0; i < SOCKNAL_N_SCHED; i++) { @@ -1629,35 +1629,11 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t), - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - PORTAL_ALLOC(ksocknal_data.ksnd_ltxs, - sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS)); - if (ksocknal_data.ksnd_ltxs == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ksocknal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i]; - - ltx->ltx_tx.tx_hdr = <x->ltx_hdr; - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ksocknal_data.ksnd_idle_ltx_list : - &ksocknal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); if (rc != 0) { CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } PtlNIDebug(ksocknal_ni, ~0); @@ -1670,7 +1646,7 @@ ksocknal_module_init (void) CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1679,7 +1655,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1687,7 +1663,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } rc = kpr_register(&ksocknal_data.ksnd_router, @@ -1698,23 +1674,15 @@ ksocknal_module_init (void) } else { /* Only allocate forwarding buffers if I'm on a gateway */ - PORTAL_ALLOC(ksocknal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - if (ksocknal_data.ksnd_fmbs == NULL) { - ksocknal_module_fini (); - RETURN(-ENOMEM); - } - - /* NULL out buffer pointers etc */ - memset(ksocknal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb = - &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i]; + ksock_fmb_t *fmb; + + PORTAL_ALLOC(fmb, sizeof(*fmb)); + if (fmb == NULL) { + ksocknal_module_fini(); + return (-ENOMEM); + } if (i < SOCKNAL_SMALL_FWD_NMSGS) { fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; @@ -1724,7 +1692,6 @@ ksocknal_module_init (void) fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; } - LASSERT (fmb->fmb_npages > 0); for (j = 0; j < fmb->fmb_npages; j++) { fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); @@ -1733,8 +1700,7 @@ ksocknal_module_init (void) return (-ENOMEM); } - LASSERT(page_address (fmb->fmb_pages[j]) != - NULL); + LASSERT(page_address(fmb->fmb_pages[j]) != NULL); } list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 227a24f..9dbe415 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -82,9 +82,6 @@ #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - #define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ #define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ @@ -113,8 +110,9 @@ typedef struct /* pool of forwarding buffers */ { spinlock_t fmp_lock; /* serialise */ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ + struct list_head fmp_idle_fmbs; /* free buffers */ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ + int fmp_nactive_fmbs; /* # buffers in use */ } ksock_fmb_pool_t; @@ -164,16 +162,10 @@ typedef struct { kpr_router_t ksnd_router; /* THE router */ - void *ksnd_fmbs; /* all the pre-allocated FMBs */ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - int ksnd_active_ltxs; /* #active ltxs */ + atomic_t ksnd_nactive_ltxs; /* #active ltxs */ struct list_head ksnd_deathrow_conns; /* conns to be closed */ struct list_head ksnd_zombie_conns; /* conns to be freed */ @@ -233,25 +225,15 @@ typedef struct /* transmit packet */ #define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) /* network zero copy callback descriptor embedded in ksock_tx_t */ -/* space for the tx frag descriptors: hdr is always 1 iovec - * and payload is PTL_MD_MAX of either type. */ -typedef struct -{ - struct iovec hdr; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } payload; -} ksock_txiovspace_t; - typedef struct /* locally transmitted packet */ { ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ void *ltx_private; /* lib_finalize() callback arg */ void *ltx_cookie; /* lib_finalize() callback arg */ - ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */ ptl_hdr_t ltx_hdr; /* buffer for packet header */ + int ltx_desc_size; /* bytes allocated for this desc */ + struct iovec ltx_iov[1]; /* iov for hdr + payload */ + ptl_kiov_t ltx_kiov[0]; /* kiov for payload */ } ksock_ltx_t; #define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 6ea4fa8..22345fe 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -129,60 +129,11 @@ ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) return 0; } -ksock_ltx_t * -ksocknal_get_ltx (int may_block) -{ - unsigned long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) { - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - break; - } - - if (!may_block) { - if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - } - break; - } - - spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock, - flags); - - wait_event (ksocknal_data.ksnd_idle_ltx_waitq, - !list_empty (&ksocknal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - return (ltx); -} - void -ksocknal_put_ltx (ksock_ltx_t *ltx) +ksocknal_free_ltx (ksock_ltx_t *ltx) { - unsigned long flags; - - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - ksocknal_data.ksnd_active_ltxs--; - list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list) - wake_up (&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); + atomic_dec(&ksocknal_data.ksnd_nactive_ltxs); + PORTAL_FREE(ltx, ltx->ltx_desc_size); } #if SOCKNAL_ZC @@ -364,7 +315,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) } int -ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { /* Return 0 on success, < 0 on error. * caller checks tx_resid to determine progress/completion */ @@ -377,17 +328,14 @@ ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) } rc = ksocknal_getconnsock (conn); - if (rc != 0) + if (rc != 0) { + LASSERT (conn->ksnc_closing); return (rc); + } for (;;) { LASSERT (tx->tx_resid != 0); - if (conn->ksnc_closing) { - rc = -ESHUTDOWN; - break; - } - if (tx->tx_niov != 0) rc = ksocknal_send_iov (conn, tx); else @@ -554,7 +502,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) } int -ksocknal_recvmsg (ksock_conn_t *conn) +ksocknal_receive (ksock_conn_t *conn) { /* Return 1 on success, 0 on EOF, < 0 on error. * Caller checks ksnc_rx_nob_wanted to determine @@ -568,15 +516,12 @@ ksocknal_recvmsg (ksock_conn_t *conn) } rc = ksocknal_getconnsock (conn); - if (rc != 0) + if (rc != 0) { + LASSERT (conn->ksnc_closing); return (rc); + } for (;;) { - if (conn->ksnc_closing) { - rc = -ESHUTDOWN; - break; - } - if (conn->ksnc_rx_niov != 0) rc = ksocknal_recv_iov (conn); else @@ -665,7 +610,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch) lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx (ltx); EXIT; } @@ -696,7 +641,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { int rc; - rc = ksocknal_sendmsg (conn, tx); + rc = ksocknal_transmit (conn, tx); CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); LASSERT (rc != -EAGAIN); @@ -840,13 +785,17 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) unsigned long flags; ksock_sched_t *sched = conn->ksnc_scheduler; - /* called holding global lock (read or irq-write) */ - + /* called holding global lock (read or irq-write) and caller may + * not have dropped this lock between finding conn and calling me, + * so we don't need the {get,put}connsock dance to deref + * ksnc_sock... */ + LASSERT(!conn->ksnc_closing); + LASSERT(tx->tx_resid == tx->tx_nob); + CDEBUG (D_NET, "Sending to "LPX64" on port %d\n", conn->ksnc_peer->ksnp_nid, conn->ksnc_port); atomic_add (tx->tx_nob, &conn->ksnc_tx_nob); - tx->tx_resid = tx->tx_nob; tx->tx_conn = conn; #if SOCKNAL_ZC @@ -854,7 +803,6 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) /* NB this sets 1 ref on zccd, so the callback can only occur after * I've released this ref. */ #endif - spin_lock_irqsave (&sched->kss_lock, flags); conn->ksnc_tx_deadline = jiffies + @@ -960,6 +908,8 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) tx->tx_nob, tx->tx_niov, tx->tx_nkiov); tx->tx_conn = NULL; /* only set when assigned a conn */ + tx->tx_resid = tx->tx_nob; + tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base; g_lock = &ksocknal_data.ksnd_global_lock; read_lock (g_lock); @@ -1024,115 +974,125 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) return (-EHOSTUNREACH); } -ksock_ltx_t * -ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type) +int +ksocknal_sendmsg(nal_cb_t *nal, + void *private, + lib_msg_t *cookie, + ptl_hdr_t *hdr, + int type, + ptl_nid_t nid, + ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + ptl_kiov_t *payload_kiov, + size_t payload_nob) { ksock_ltx_t *ltx; + int desc_size; + int rc; + + /* NB 'private' is different depending on what we're sending. + * Just ignore it... */ + + CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64 + " pid %d\n", payload_nob, payload_niov, nid , pid); - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); + LASSERT (payload_nob == 0 || payload_niov > 0); + LASSERT (payload_niov <= PTL_MD_MAX_IOV); + + /* It must be OK to kmap() if required */ + LASSERT (payload_kiov == NULL || !in_interrupt ()); + /* payload is either all vaddrs or all pages */ + LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); + + if (payload_iov != NULL) + desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]); + else + desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]); + + if (in_interrupt() || + type == PTL_MSG_ACK || + type == PTL_MSG_REPLY) { + /* Can't block if in interrupt or responding to an incoming + * message */ + PORTAL_ALLOC_ATOMIC(ltx, desc_size); + } else { + PORTAL_ALLOC(ltx, desc_size); + } + if (ltx == NULL) { - CERROR ("Can't allocate tx desc\n"); - return (NULL); + CERROR("Can't allocate tx desc type %d size %d %s\n", + type, desc_size, in_interrupt() ? "(intr)" : ""); + return (PTL_NOSPACE); } - /* Init local send packet (storage for hdr, finalize() args) */ + atomic_inc(&ksocknal_data.ksnd_nactive_ltxs); + + ltx->ltx_desc_size = desc_size; + + /* We always have 1 mapped frag for the header */ + ltx->ltx_tx.tx_iov = ltx->ltx_iov; + ltx->ltx_iov[0].iov_base = <x->ltx_hdr; + ltx->ltx_iov[0].iov_len = sizeof(*hdr); ltx->ltx_hdr = *hdr; + ltx->ltx_private = private; ltx->ltx_cookie = cookie; - /* Init common ltx_tx */ ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr); - - /* We always have 1 mapped frag for the header */ - ltx->ltx_tx.tx_niov = 1; - ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr; - ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - ltx->ltx_tx.tx_kiov = NULL; - ltx->ltx_tx.tx_nkiov = 0; + ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob; - return (ltx); -} - -int -ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_len) -{ - ksock_ltx_t *ltx; - int rc; + if (payload_iov != NULL) { + /* payload is all mapped */ + ltx->ltx_tx.tx_kiov = NULL; + ltx->ltx_tx.tx_nkiov = 0; - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it - */ + ltx->ltx_tx.tx_niov = 1 + payload_niov; - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64 - " pid %d\n", payload_len, payload_niov, nid, pid); + memcpy(ltx->ltx_iov + 1, payload_iov, + payload_niov * sizeof (*payload_iov)); - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); + } else { + /* payload is all pages */ + ltx->ltx_tx.tx_kiov = ltx->ltx_kiov; + ltx->ltx_tx.tx_nkiov = payload_niov; - /* append the payload_iovs to the one pointing at the header */ - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); + ltx->ltx_tx.tx_niov = 1; - memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; + memcpy(ltx->ltx_kiov, payload_kiov, + payload_niov * sizeof (*payload_kiov)); + } - rc = ksocknal_launch_packet (<x->ltx_tx, nid); + rc = ksocknal_launch_packet(<x->ltx_tx, nid); if (rc == 0) return (PTL_OK); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx(ltx); return (PTL_FAIL); } int +ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_len) +{ + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, payload_iov, NULL, + payload_len)); +} + +int ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len) + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_len) { - ksock_ltx_t *ltx; - int rc; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n", - payload_len, payload_niov, nid, pid); - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); - - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov; - memcpy (ltx->ltx_tx.tx_kiov, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_nkiov = payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc == 0) - return (PTL_OK); - - ksocknal_put_ltx (ltx); - return (PTL_FAIL); + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, NULL, payload_kiov, + payload_len)); } void @@ -1155,7 +1115,6 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) tx->tx_iov = fwd->kprfd_iov; tx->tx_nkiov = 0; tx->tx_kiov = NULL; - tx->tx_hdr = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base; rc = ksocknal_launch_packet (tx, nid); if (rc != 0) @@ -1204,6 +1163,7 @@ ksocknal_fmb_callback (void *arg, int error) spin_lock_irqsave (&fmp->fmp_lock, flags); list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); + fmp->fmp_nactive_fmbs--; if (!list_empty (&fmp->fmp_blocked_conns)) { conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, @@ -1242,7 +1202,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) ksock_fmb_t *fmb; LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (ksocknal_data.ksnd_fmbs != NULL); + LASSERT (kpr_routing(&ksocknal_data.ksnd_router)); if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) pool = &ksocknal_data.ksnd_small_fmp; @@ -1255,6 +1215,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) fmb = list_entry(pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); list_del (&fmb->fmb_list); + pool->fmp_nactive_fmbs++; spin_unlock_irqrestore (&pool->fmp_lock, flags); return (fmb); @@ -1397,7 +1358,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) return; } - if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */ + if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */ CERROR("dropping packet from "LPX64" (%s) for "LPX64 " (%s): not forwarding\n", src_nid, portals_nid2str(TCPNAL, src_nid, str), @@ -1525,9 +1486,11 @@ ksocknal_process_receive (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_nob_wanted > 0); - rc = ksocknal_recvmsg(conn); + rc = ksocknal_receive(conn); if (rc <= 0) { + LASSERT (rc != -EAGAIN); + if (rc == 0) CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n", conn, conn->ksnc_peer->ksnp_nid, @@ -1766,9 +1729,9 @@ int ksocknal_scheduler (void *arg) * kss_lock. */ conn->ksnc_tx_ready = 0; spin_unlock_irqrestore (&sched->kss_lock, flags); - + rc = ksocknal_process_transmit(conn, tx); - + spin_lock_irqsave (&sched->kss_lock, flags); if (rc != -EAGAIN) { @@ -1851,7 +1814,7 @@ ksocknal_data_ready (struct sock *sk, int n) read_lock (&ksocknal_data.ksnd_global_lock); conn = sk->sk_user_data; - if (conn == NULL) { /* raced with ksocknal_close_sock */ + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ LASSERT (sk->sk_data_ready != &ksocknal_data_ready); sk->sk_data_ready (sk, n); } else { @@ -1900,7 +1863,7 @@ ksocknal_write_space (struct sock *sk) (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued")); - if (conn == NULL) { /* raced with ksocknal_close_sock */ + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ LASSERT (sk->sk_write_space != &ksocknal_write_space); sk->sk_write_space (sk); @@ -2136,7 +2099,7 @@ ksocknal_setup_sock (struct socket *sock) int option; struct linger linger; - sock->sk->allocation = GFP_NOFS; + sock->sk->allocation = GFP_MEMALLOC; /* Ensure this socket aborts active sends immediately when we close * it. */ @@ -2421,6 +2384,8 @@ ksocknal_autoconnectd (void *arg) kportal_daemonize (name); kportal_blockallsigs (); + current->flags |= PF_MEMALLOC; + spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { @@ -2548,6 +2513,8 @@ ksocknal_reaper (void *arg) init_waitqueue_entry (&wait, current); + current->flags |= PF_MEMALLOC; + spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 6e2c1ca..ad2c966 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -974,18 +974,14 @@ char *portals_debug_dumpstack(void) return buf; } -#elif defined(CONFIG_X86) +#elif defined(__i386__) extern int is_kernel_text_address(unsigned long addr); extern int lookup_symbol(unsigned long address, char *buf, int buflen); char *portals_debug_dumpstack(void) { -#if defined(__x86_64__) - unsigned long esp = current->thread.rsp; -#else unsigned long esp = current->thread.esp; -#endif unsigned long *stack = (unsigned long *)&esp; int size; unsigned long addr; diff --git a/lnet/libcfs/lwt.c b/lnet/libcfs/lwt.c index 89fe8f7..a24423e 100644 --- a/lnet/libcfs/lwt.c +++ b/lnet/libcfs/lwt.c @@ -45,9 +45,6 @@ #if LWT_SUPPORT -#define LWT_MEMORY (1<<20) /* 1Mb of trace memory */ -#define LWT_MAX_CPUS 4 - int lwt_enabled; int lwt_pages_per_cpu; lwt_cpu_t lwt_cpus[LWT_MAX_CPUS]; @@ -123,7 +120,8 @@ lwt_control (int enable, int clear) } int -lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size) +lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, + void *user_ptr, int user_size) { const int events_per_page = PAGE_SIZE / sizeof(lwt_event_t); const int bytes_per_page = events_per_page * sizeof(lwt_event_t); @@ -136,7 +134,8 @@ lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size) *ncpu = num_online_cpus(); *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page; - + *now = get_cycles(); + if (user_ptr == NULL) return (0); diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index 7c0cafc..55e1935 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -83,6 +83,115 @@ kportal_daemonize (char *str) } void +kportal_memhog_free (struct portals_device_userstate *pdu) +{ + struct page **level0p = &pdu->pdu_memhog_root_page; + struct page **level1p; + struct page **level2p; + int count1; + int count2; + + if (*level0p != NULL) { + + level1p = (struct page **)page_address(*level0p); + count1 = 0; + + while (count1 < PAGE_SIZE/sizeof(struct page *) && + *level1p != NULL) { + + level2p = (struct page **)page_address(*level1p); + count2 = 0; + + while (count2 < PAGE_SIZE/sizeof(struct page *) && + *level2p != NULL) { + + __free_page(*level2p); + pdu->pdu_memhog_pages--; + level2p++; + count2++; + } + + __free_page(*level1p); + pdu->pdu_memhog_pages--; + level1p++; + count1++; + } + + __free_page(*level0p); + pdu->pdu_memhog_pages--; + + *level0p = NULL; + } + + LASSERT (pdu->pdu_memhog_pages == 0); +} + +int +kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags) +{ + struct page **level0p; + struct page **level1p; + struct page **level2p; + int count1; + int count2; + + LASSERT (pdu->pdu_memhog_pages == 0); + LASSERT (pdu->pdu_memhog_root_page == NULL); + + if (npages < 0) + return -EINVAL; + + if (npages == 0) + return 0; + + level0p = &pdu->pdu_memhog_root_page; + *level0p = alloc_page(flags); + if (*level0p == NULL) + return -ENOMEM; + pdu->pdu_memhog_pages++; + + level1p = (struct page **)page_address(*level0p); + count1 = 0; + memset(level1p, 0, PAGE_SIZE); + + while (pdu->pdu_memhog_pages < npages && + count1 < PAGE_SIZE/sizeof(struct page *)) { + + if (signal_pending(current)) + return (-EINTR); + + *level1p = alloc_page(flags); + if (*level1p == NULL) + return -ENOMEM; + pdu->pdu_memhog_pages++; + + level2p = (struct page **)page_address(*level1p); + count2 = 0; + memset(level2p, 0, PAGE_SIZE); + + while (pdu->pdu_memhog_pages < npages && + count2 < PAGE_SIZE/sizeof(struct page *)) { + + if (signal_pending(current)) + return (-EINTR); + + *level2p = alloc_page(flags); + if (*level2p == NULL) + return (-ENOMEM); + pdu->pdu_memhog_pages++; + + level2p++; + count2++; + } + + level1p++; + count1++; + } + + return 0; +} + +void kportal_blockallsigs () { unsigned long flags; @@ -96,22 +205,39 @@ kportal_blockallsigs () /* called when opening /dev/device */ static int kportal_psdev_open(struct inode * inode, struct file * file) { + struct portals_device_userstate *pdu; ENTRY; - + if (!inode) RETURN(-EINVAL); + PORTAL_MODULE_USE; + + PORTAL_ALLOC(pdu, sizeof(*pdu)); + if (pdu != NULL) { + pdu->pdu_memhog_pages = 0; + pdu->pdu_memhog_root_page = NULL; + } + file->private_data = pdu; + RETURN(0); } /* called when closing /dev/device */ static int kportal_psdev_release(struct inode * inode, struct file * file) { + struct portals_device_userstate *pdu; ENTRY; if (!inode) RETURN(-EINVAL); + pdu = file->private_data; + if (pdu != NULL) { + kportal_memhog_free(pdu); + PORTAL_FREE(pdu, sizeof(*pdu)); + } + PORTAL_MODULE_UNUSE; RETURN(0); } @@ -514,7 +640,8 @@ static int kportal_ioctl(struct inode *inode, struct file *file, break; case IOC_PORTAL_LWT_SNAPSHOT: - err = lwt_snapshot (&data->ioc_count, &data->ioc_misc, + err = lwt_snapshot (&data->ioc_nid, + &data->ioc_count, &data->ioc_misc, data->ioc_pbuf1, data->ioc_plen1); if (err == 0 && copy_to_user((char *)arg, data, sizeof (*data))) @@ -528,7 +655,22 @@ static int kportal_ioctl(struct inode *inode, struct file *file, copy_to_user((char *)arg, data, sizeof (*data))) err = -EFAULT; break; -#endif +#endif + case IOC_PORTAL_MEMHOG: + if (!capable (CAP_SYS_ADMIN)) + err = -EPERM; + else if (file->private_data == NULL) { + err = -EINVAL; + } else { + kportal_memhog_free(file->private_data); + err = kportal_memhog_alloc(file->private_data, + data->ioc_count, + data->ioc_flags); + if (err != 0) + kportal_memhog_free(file->private_data); + } + break; + default: err = -EINVAL; break; @@ -612,8 +754,8 @@ static int init_kportals_module(void) cleanup_lwt: #if LWT_SUPPORT lwt_fini(); -#endif cleanup_debug: +#endif portals_debug_cleanup(); return rc; } diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 3325892..b46ee16 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -1371,7 +1371,8 @@ lwt_control(int enable, int clear) } static int -lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size) +lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, + lwt_event_t *events, int size) { struct portal_ioctl_data data; int rc; @@ -1390,6 +1391,9 @@ lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size) LASSERT (data.ioc_count != 0); LASSERT (data.ioc_misc != 0); + if (now != NULL) + *now = data.ioc_nid; + if (ncpu != NULL) *ncpu = data.ioc_count; @@ -1499,14 +1503,13 @@ get_cycles_per_usec () int jt_ptl_lwt(int argc, char **argv) { -#define MAX_CPUS 8 int ncpus; int totalspace; int nevents_per_cpu; lwt_event_t *events; - lwt_event_t *cpu_event[MAX_CPUS + 1]; - lwt_event_t *next_event[MAX_CPUS]; - lwt_event_t *first_event[MAX_CPUS]; + lwt_event_t *cpu_event[LWT_MAX_CPUS + 1]; + lwt_event_t *next_event[LWT_MAX_CPUS]; + lwt_event_t *first_event[LWT_MAX_CPUS]; int cpu; lwt_event_t *e; int rc; @@ -1514,6 +1517,9 @@ jt_ptl_lwt(int argc, char **argv) double mhz; cycles_t t0; cycles_t tlast; + cycles_t tnow; + struct timeval tvnow; + int printed_date = 0; FILE *f = stdout; if (argc < 2 || @@ -1541,11 +1547,12 @@ jt_ptl_lwt(int argc, char **argv) return (0); } - if (lwt_snapshot(&ncpus, &totalspace, NULL, 0) != 0) + if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0) return (-1); - if (ncpus > MAX_CPUS) { - fprintf(stderr, "Too many cpus: %d (%d)\n", ncpus, MAX_CPUS); + if (ncpus > LWT_MAX_CPUS) { + fprintf(stderr, "Too many cpus: %d (%d)\n", + ncpus, LWT_MAX_CPUS); return (-1); } @@ -1560,11 +1567,14 @@ jt_ptl_lwt(int argc, char **argv) return (-1); } - if (lwt_snapshot(NULL, NULL, events, totalspace)) { + if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) { free(events); return (-1); } + /* we want this time to be sampled at snapshot time */ + gettimeofday(&tvnow, NULL); + if (argc > 2) { f = fopen (argv[2], "w"); if (f == NULL) { @@ -1645,6 +1655,17 @@ jt_ptl_lwt(int argc, char **argv) if (t0 <= next_event[cpu]->lwte_when) { /* on or after the first event */ + if (!printed_date) { + cycles_t du = (tnow - t0) / mhz; + time_t then = tvnow.tv_sec - du/1000000; + + if (du % 1000000 > tvnow.tv_usec) + then--; + + fprintf(f, "%s", ctime(&then)); + printed_date = 1; + } + rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]); if (rc != 0) break; @@ -1666,5 +1687,48 @@ jt_ptl_lwt(int argc, char **argv) free(events); return (0); -#undef MAX_CPUS } + +int jt_ptl_memhog(int argc, char **argv) +{ + static int gfp = 0; /* sticky! */ + + struct portal_ioctl_data data; + int rc; + int count; + char *end; + + if (argc < 2) { + fprintf(stderr, "usage: %s []\n", argv[0]); + return 0; + } + + count = strtol(argv[1], &end, 0); + if (count < 0 || *end != 0) { + fprintf(stderr, "Can't parse page count '%s'\n", argv[1]); + return -1; + } + + if (argc >= 3) { + rc = strtol(argv[2], &end, 0); + if (*end != 0) { + fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]); + return -1; + } + gfp = rc; + } + + PORTAL_IOC_INIT(data); + data.ioc_count = count; + data.ioc_flags = gfp; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data); + + if (rc != 0) { + fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno)); + return -1; + } + + printf("memhog %d OK\n", count); + return 0; +} + diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 22a6196..872df60 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,3 +1,24 @@ +tbd Cluster File Systems, Inc. + * version 1.0.1 + * bug fixes + - remove now-unused request->rq_obd (278) + - if an allocation fails, print out how much memory we've used (1933) + - use PORTAL_SLAB_ALLOC for structures, to get GFP_MEMALLOC (1933) + - add the "configurable stack size" patch to most series files (1256) + - ability to write large log records, for 100+ OST configs (2306) + - fix NULL deref when filter_prep fails (2314) + - fix operator precedence error in filter_sync + - dynamic allocation of socknal TX descriptors (2315) + - fix a missed case in the GFP_MEMALLOC patch, can cause deadlock (2310) + - fix gcc 2.96 compilation problem in xattr kernel patch (2294) + - ensure that CWARN messages in Portals always get to the syslog + - __init/__exit are not for prototype decls (ldlm_init/exit) + - x86-64 compile warning fixes + - fix gateway LMC keyword conflict (2318) + * miscellania + - allow configurable automake binary, for testing new versions + - small update to the lfs documentation + 2003-12-03 Cluster File Systems, Inc. * version 1.0.0 * fix negative export reference count in fsfilt_sync (2312) diff --git a/lustre/autogen.sh b/lustre/autogen.sh index 9deed73..be0d42d 100644 --- a/lustre/autogen.sh +++ b/lustre/autogen.sh @@ -1,5 +1,5 @@ #!/bin/sh aclocal && -automake --add-missing && +${AUTOMAKE:-automake} --add-missing && ${AUTOCONF:-autoconf} diff --git a/lustre/doc/lfs.lyx b/lustre/doc/lfs.lyx index b8568da..28890e0 100644 --- a/lustre/doc/lfs.lyx +++ b/lustre/doc/lfs.lyx @@ -59,6 +59,12 @@ getstripe \series bold lfs\SpecialChar ~ setstripe +\layout Standard + + +\series bold +lfs\SpecialChar ~ +check \layout Subsection DESCRIPTION @@ -101,6 +107,23 @@ getstripe \series bold +check +\series default +Display the status of MDS or OSTs (as specified in the command) or all the + servers (MDS and OSTs) +\layout List +\labelwidthstring 00.00.0000 + + +\series bold +osts +\series default + List all the OSTs for the filesystem +\layout List +\labelwidthstring 00.00.0000 + + +\series bold help \series default Provides brief help on the various arguments @@ -167,7 +190,22 @@ given file \layout LyX-Code - $lfs find /mnt/lustre/file1 + $lfs find /mnt/lustre/foo1 +\layout LyX-Code + + OBDS: +\layout LyX-Code + + 0: OST_localhost_UUID +\layout LyX-Code + + /mnt/lustre/foo1 +\layout LyX-Code + + obdidx objid objid group +\layout LyX-Code + + 0 1 0x1 0 \layout Description Listing\SpecialChar ~ @@ -218,8 +256,50 @@ OST \layout LyX-Code $lfs find -r --obd OST2_UUID /mnt/lustre/ +\layout Description + +Check\SpecialChar ~ +the\SpecialChar ~ +status\SpecialChar ~ +of\SpecialChar ~ +all\SpecialChar ~ +servers(mds,\SpecialChar ~ +osts) +\layout LyX-Code + + $lfs check servers +\layout LyX-Code + + OSC_localhost.localdomain_OST_localhost_mds1 active. + +\layout LyX-Code + + OSC_localhost.localdomain_OST_localhost_MNT_localhost active. + +\layout LyX-Code + + MDC_localhost.localdomain_mds1_MNT_localhost active. +\layout LyX-Code + + $ +\layout Description + +List\SpecialChar ~ +all\SpecialChar ~ +the\SpecialChar ~ +OSTs +\layout LyX-Code + + $lfs osts +\layout LyX-Code + + OBDS: +\layout LyX-Code + + 0: OST_localhost_UUID \layout LyX-Code + $ \layout Subsection BUGS diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 7fe649b..d95ae9c 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -201,7 +201,6 @@ struct ptlrpc_bulk_desc; struct ptlrpc_request { int rq_type; /* one of PTL_RPC_MSG_* */ struct list_head rq_list; - struct obd_device *rq_obd; int rq_status; spinlock_t rq_lock; unsigned int rq_intr:1, rq_replied:1, rq_want_ack:1, rq_err:1, @@ -230,7 +229,7 @@ struct ptlrpc_request { int rq_import_generation; enum lustre_imp_state rq_send_state; - wait_queue_head_t rq_wait_for_rep; /* XXX also _for_ack */ + wait_queue_head_t rq_reply_waitq; /* XXX also _for_ack */ /* incoming reply */ ptl_md_t rq_reply_md; @@ -413,8 +412,8 @@ struct ptlrpc_service { struct list_head srv_threads; int (*srv_handler)(struct ptlrpc_request *req); char *srv_name; /* only statically allocated strings here; we don't clean them */ - struct proc_dir_entry *svc_procroot; - struct lprocfs_stats *svc_stats; + struct proc_dir_entry *srv_procroot; + struct lprocfs_stats *srv_stats; int srv_interface_rover; struct ptlrpc_srv_ni srv_interfaces[0]; diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 003daad..90b9612 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -28,8 +28,6 @@ #include #include #include -#else - #endif #include #include @@ -173,23 +171,23 @@ do { \ #define fixme() CDEBUG(D_OTHER, "FIXME\n"); #ifdef __KERNEL__ -#include -#include +# include +# include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE] -#define ll_bdevname(SB, STORAGE) __bdevname(kdev_t_to_nr(SB->s_dev), STORAGE) -#define ll_lock_kernel lock_kernel() -#define ll_sbdev(SB) ((SB)->s_bdev) +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE] +# define ll_bdevname(SB, STORAGE) __bdevname(kdev_t_to_nr(SB->s_dev), STORAGE) +# define ll_lock_kernel lock_kernel() +# define ll_sbdev(SB) ((SB)->s_bdev) void dev_set_rdonly(struct block_device *, int); -#else -#define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo -#define ll_sbdev(SB) (kdev_t_to_nr((SB)->s_dev)) -#define ll_bdevname(SB,STORAGE) ((void)__unused_##STORAGE,bdevname(ll_sbdev(SB))) -#define ll_lock_kernel +# else +# define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo +# define ll_sbdev(SB) (kdev_t_to_nr((SB)->s_dev)) +# define ll_bdevname(SB,STORAGE) ((void)__unused_##STORAGE,bdevname(ll_sbdev(SB))) +# define ll_lock_kernel void dev_set_rdonly(kdev_t, int); -#endif +# endif void dev_clear_rdonly(int); @@ -205,23 +203,27 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) } } #else /* !__KERNEL__ */ -#define LTIME_S(time) (time) +# define LTIME_S(time) (time) /* for obd_class.h */ -#ifndef ERR_PTR -# define ERR_PTR(a) ((void *)(a)) -#endif +# ifndef ERR_PTR +# define ERR_PTR(a) ((void *)(a)) +# endif #endif /* __KERNEL__ */ #ifndef GFP_MEMALLOC #define GFP_MEMALLOC 0 #endif +extern atomic_t portal_kmemory; + #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ (ptr) = kmalloc(size, (gfp_mask)); \ if ((ptr) == NULL) { \ CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ } else { \ memset(ptr, 0, size); \ atomic_add(size, &obd_memory); \ @@ -248,6 +250,8 @@ do { \ if ((ptr) == NULL) { \ CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ } else { \ memset(ptr, 0, size); \ atomic_add(size, &obd_memory); \ @@ -312,6 +316,8 @@ do { \ if ((ptr) == NULL) { \ CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ } else { \ memset(ptr, 0, size); \ atomic_add(size, &obd_memory); \ diff --git a/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch b/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch index 320311e..6293972 100644 --- a/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch +++ b/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch @@ -14430,7 +14430,7 @@ +++ 25/arch/parisc/lib/checksum.c 2003-10-05 00:33:23.000000000 -0700 @@ -16,8 +16,10 @@ * - * $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ + * $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ */ -#include +#include @@ -31511,8 +31511,8 @@ --- linux-2.6.0-test6/drivers/char/ftape/compressor/zftape-compress.c 2003-06-14 12:18:32.000000000 -0700 +++ 25/drivers/char/ftape/compressor/zftape-compress.c 2003-10-05 00:33:24.000000000 -0700 @@ -31,6 +31,7 @@ - char zftc_rev[] = "$Revision: 1.2 $"; - char zftc_dat[] = "$Date: 2003/12/03 03:13:28 $"; + char zftc_rev[] = "$Revision: 1.3 $"; + char zftc_dat[] = "$Date: 2003/12/03 05:12:20 $"; +#include #include @@ -37169,8 +37169,8 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divamnt.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/divamnt.c 2003-10-05 00:33:24.000000000 -0700 @@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ * * Driver for Eicon DIVA Server ISDN cards. * Maint module @@ -37181,16 +37181,16 @@ -#include "di_defs.h" #include "debug_if.h" --static char *main_revision = "$Revision: 1.2 $"; -+static char *main_revision = "$Revision: 1.2 $"; +-static char *main_revision = "$Revision: 1.3 $"; ++static char *main_revision = "$Revision: 1.3 $"; static int major; --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divasmain.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/divasmain.c 2003-10-05 00:33:24.000000000 -0700 @@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ * * Low level driver for Eicon DIVA Server ISDN cards. * @@ -37212,16 +37212,16 @@ #include "diva_dma.h" #include "diva_pci.h" --static char *main_revision = "$Revision: 1.2 $"; -+static char *main_revision = "$Revision: 1.2 $"; +-static char *main_revision = "$Revision: 1.3 $"; ++static char *main_revision = "$Revision: 1.3 $"; static int major; --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/dqueue.c 2003-06-14 12:18:22.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/dqueue.c 2003-10-05 00:33:24.000000000 -0700 @@ -1,10 +1,10 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ * * Driver for Eicon DIVA Server ISDN cards. * User Mode IDI Interface @@ -37236,8 +37236,8 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/mntfunc.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/mntfunc.c 2003-10-05 00:33:24.000000000 -0700 @@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ * * Driver for Eicon DIVA Server ISDN cards. * Maint module @@ -37252,8 +37252,8 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/os_capi.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/os_capi.h 2003-10-05 00:33:24.000000000 -0700 @@ -1,10 +1,10 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ * * ISDN interface module for Eicon active cards DIVA. * CAPI Interface OS include files @@ -37268,8 +37268,8 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/platform.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/platform.h 2003-10-05 00:33:24.000000000 -0700 @@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ -+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ +-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ ++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ * * platform.h * @@ -37754,7 +37754,7 @@ +++ 25/drivers/media/video/planb.c 2003-10-05 00:33:24.000000000 -0700 @@ -27,7 +27,6 @@ - /* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ */ + /* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ */ -#include #include @@ -38069,7 +38069,7 @@ --- linux-2.6.0-test6/drivers/mtd/chips/map_rom.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/drivers/mtd/chips/map_rom.c 2003-10-05 00:33:24.000000000 -0700 @@ -4,7 +4,6 @@ - * $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ + * $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ */ -#include @@ -42159,8 +42159,8 @@ #include /* Version */ --static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ for Linux\n"; -+static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ for Linux\n"; +-static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ for Linux\n"; ++static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ for Linux\n"; static int debug; static int quartz; diff --git a/lustre/kernel_patches/patches/bproc-patch-2.4.20 b/lustre/kernel_patches/patches/bproc-patch-2.4.20 index 00bb337..5411d9c 100644 --- a/lustre/kernel_patches/patches/bproc-patch-2.4.20 +++ b/lustre/kernel_patches/patches/bproc-patch-2.4.20 @@ -1,4 +1,4 @@ -$Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $ +$Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $ Index: linux/fs/exec.c =================================================================== @@ -764,7 +764,7 @@ Index: linux/kernel/bproc_hook.c + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * -+ * $Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $ ++ * $Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $ + *-----------------------------------------------------------------------*/ +#include +#include @@ -832,7 +832,7 @@ Index: linux/include/linux/bproc.h + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * -+ * $Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $ ++ * $Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $ + *-----------------------------------------------------------------------*/ +#ifndef _LINUX_BPROC_H +#define _LINUX_BPROC_H diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.19-pre1.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.19-pre1.patch new file mode 100644 index 0000000..3f79b5b --- /dev/null +++ b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.19-pre1.patch @@ -0,0 +1,302 @@ +Index: linux-2.4.19-pre1/arch/i386/kernel/entry.S +=================================================================== +--- linux-2.4.19-pre1.orig/arch/i386/kernel/entry.S 2003-11-21 03:38:55.000000000 +0300 ++++ linux-2.4.19-pre1/arch/i386/kernel/entry.S 2003-12-01 18:14:32.000000000 +0300 +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + + EBX = 0x00 + ECX = 0x04 +@@ -128,10 +129,6 @@ + .long 3b,6b; \ + .previous + +-#define GET_CURRENT(reg) \ +- movl $-8192, reg; \ +- andl %esp, reg +- + ENTRY(lcall7) + pushfl # We get a different stack layout with call gates, + pushl %eax # which has to be cleaned up later.. +@@ -144,7 +141,7 @@ + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx +- andl $-8192,%ebx # GET_CURRENT ++ andl $-THREAD_SIZE,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x7 +@@ -165,7 +162,7 @@ + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx +- andl $-8192,%ebx # GET_CURRENT ++ andl $-THREAD_SIZE,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x27 +Index: linux-2.4.19-pre1/arch/i386/kernel/smpboot.c +=================================================================== +--- linux-2.4.19-pre1.orig/arch/i386/kernel/smpboot.c 2001-12-21 20:41:53.000000000 +0300 ++++ linux-2.4.19-pre1/arch/i386/kernel/smpboot.c 2003-12-01 18:14:32.000000000 +0300 +@@ -819,7 +819,7 @@ + + /* So we see what's up */ + printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); +- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); ++ stack_start.esp = (void *)idle->thread.esp; + + /* + * This grunge runs the startup process for +@@ -892,7 +892,7 @@ + Dprintk("CPU has booted.\n"); + } else { + boot_error= 1; +- if (*((volatile unsigned char *)phys_to_virt(8192)) ++ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) + == 0xA5) + /* trampoline started but...? */ + printk("Stuck ??\n"); +@@ -915,7 +915,7 @@ + } + + /* mark "stuck" area as not stuck */ +- *((volatile unsigned long *)phys_to_virt(8192)) = 0; ++ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0; + + if(clustered_apic_mode) { + printk("Restoring NMI vector\n"); +Index: linux-2.4.19-pre1/arch/i386/kernel/traps.c +=================================================================== +--- linux-2.4.19-pre1.orig/arch/i386/kernel/traps.c 2003-12-01 18:11:31.000000000 +0300 ++++ linux-2.4.19-pre1/arch/i386/kernel/traps.c 2003-12-01 18:14:32.000000000 +0300 +@@ -158,7 +158,7 @@ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ +- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) ++ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) + return; + show_trace((unsigned long *)esp); + } +Index: linux-2.4.19-pre1/arch/i386/kernel/head.S +=================================================================== +--- linux-2.4.19-pre1.orig/arch/i386/kernel/head.S 2003-11-20 19:01:35.000000000 +0300 ++++ linux-2.4.19-pre1/arch/i386/kernel/head.S 2003-12-01 18:14:32.000000000 +0300 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #define OLD_CL_MAGIC_ADDR 0x90020 + #define OLD_CL_MAGIC 0xA33F +@@ -320,7 +321,7 @@ + ret + + ENTRY(stack_start) +- .long SYMBOL_NAME(init_task_union)+8192 ++ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE + .long __KERNEL_DS + + /* This is the default interrupt "handler" :-) */ +Index: linux-2.4.19-pre1/arch/i386/lib/getuser.S +=================================================================== +--- linux-2.4.19-pre1.orig/arch/i386/lib/getuser.S 1998-01-13 00:42:52.000000000 +0300 ++++ linux-2.4.19-pre1/arch/i386/lib/getuser.S 2003-12-01 18:14:32.000000000 +0300 +@@ -21,6 +21,10 @@ + * as they get called from within inline assembly. + */ + ++/* Duplicated from asm/processor.h */ ++#include ++#include ++ + addr_limit = 12 + + .text +@@ -28,7 +32,7 @@ + .globl __get_user_1 + __get_user_1: + movl %esp,%edx +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 1: movzbl (%eax),%edx +@@ -41,7 +45,7 @@ + addl $1,%eax + movl %esp,%edx + jc bad_get_user +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 2: movzwl -1(%eax),%edx +@@ -54,7 +58,7 @@ + addl $3,%eax + movl %esp,%edx + jc bad_get_user +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 3: movl -3(%eax),%edx +Index: linux-2.4.19-pre1/arch/i386/config.in +=================================================================== +--- linux-2.4.19-pre1.orig/arch/i386/config.in 2003-11-20 19:01:35.000000000 +0300 ++++ linux-2.4.19-pre1/arch/i386/config.in 2003-12-01 18:14:32.000000000 +0300 +@@ -201,6 +201,29 @@ + if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi ++ ++choice 'Bigger Stack Size Support' \ ++ "off CONFIG_NOBIGSTACK \ ++ 16KB CONFIG_STACK_SIZE_16KB \ ++ 32KB CONFIG_STACK_SIZE_32KB \ ++ 64KB CONFIG_STACK_SIZE_64KB" off ++ ++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 1 ++else ++ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 2 ++ else ++ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 3 ++ else ++ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 4 ++ fi ++ fi ++ fi ++fi ++ + endmenu + + mainmenu_option next_comment +Index: linux-2.4.19-pre1/arch/i386/vmlinux.lds +=================================================================== +--- linux-2.4.19-pre1.orig/arch/i386/vmlinux.lds 2003-11-20 19:01:35.000000000 +0300 ++++ linux-2.4.19-pre1/arch/i386/vmlinux.lds 2003-12-01 18:14:32.000000000 +0300 +@@ -35,7 +35,8 @@ + + _edata = .; /* End of data section */ + +- . = ALIGN(8192); /* init_task */ ++/* chose the biggest of the possible stack sizes here? */ ++ . = ALIGN(65536); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ +Index: linux-2.4.19-pre1/include/asm-i386/current.h +=================================================================== +--- linux-2.4.19-pre1.orig/include/asm-i386/current.h 1998-08-15 03:35:22.000000000 +0400 ++++ linux-2.4.19-pre1/include/asm-i386/current.h 2003-12-01 18:14:32.000000000 +0300 +@@ -1,15 +1,43 @@ + #ifndef _I386_CURRENT_H + #define _I386_CURRENT_H ++#include ++ ++/* ++ * Configurable page sizes on i386, mainly for debugging purposes. ++ * (c) Balbir Singh ++ */ ++ ++#ifdef __ASSEMBLY__ ++ ++#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */ ++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) ++ ++#define GET_CURRENT(reg) \ ++ movl $-THREAD_SIZE, reg; \ ++ andl %esp, reg ++ ++#else /* __ASSEMBLY__ */ ++ ++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) ++#define alloc_task_struct() \ ++ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT)) ++ ++#define free_task_struct(p) \ ++ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT) ++ ++#define INIT_TASK_SIZE THREAD_SIZE + + struct task_struct; + + static inline struct task_struct * get_current(void) + { + struct task_struct *current; +- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); ++ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1))); + return current; + } + + #define current get_current() + ++#endif /* __ASSEMBLY__ */ ++ + #endif /* !(_I386_CURRENT_H) */ +Index: linux-2.4.19-pre1/include/asm-i386/hw_irq.h +=================================================================== +--- linux-2.4.19-pre1.orig/include/asm-i386/hw_irq.h 2003-11-21 02:59:05.000000000 +0300 ++++ linux-2.4.19-pre1/include/asm-i386/hw_irq.h 2003-12-01 18:14:32.000000000 +0300 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + /* + * IDT vectors usable for external interrupt sources start +@@ -113,10 +114,6 @@ + #define IRQ_NAME2(nr) nr##_interrupt(void) + #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +-#define GET_CURRENT \ +- "movl %esp, %ebx\n\t" \ +- "andl $-8192, %ebx\n\t" +- + /* + * SMP has a few special interrupts for IPI messages + */ +Index: linux-2.4.19-pre1/include/asm-i386/processor.h +=================================================================== +--- linux-2.4.19-pre1.orig/include/asm-i386/processor.h 2003-11-21 02:59:05.000000000 +0300 ++++ linux-2.4.19-pre1/include/asm-i386/processor.h 2003-12-01 18:14:32.000000000 +0300 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -447,9 +448,6 @@ + #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) + #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) + +-#define THREAD_SIZE (2*PAGE_SIZE) +-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) +-#define free_task_struct(p) free_pages((unsigned long) (p), 1) + #define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) + + #define init_task (init_task_union.task) +Index: linux-2.4.19-pre1/include/linux/sched.h +=================================================================== +--- linux-2.4.19-pre1.orig/include/linux/sched.h 2003-12-01 18:11:28.000000000 +0300 ++++ linux-2.4.19-pre1/include/linux/sched.h 2003-12-01 18:14:32.000000000 +0300 +@@ -2,6 +2,7 @@ + #define _LINUX_SCHED_H + + #include /* for HZ */ ++#include /* maybe for INIT_TASK_SIZE */ + + extern unsigned long event; + diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20-rh.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20-rh.patch new file mode 100644 index 0000000..f70b0d4 --- /dev/null +++ b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20-rh.patch @@ -0,0 +1,311 @@ +Index: linux-2.4.20-rh-20.9/arch/i386/kernel/entry.S +=================================================================== +--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/entry.S 2003-09-13 19:34:35.000000000 +0400 ++++ linux-2.4.20-rh-20.9/arch/i386/kernel/entry.S 2003-12-01 18:02:14.000000000 +0300 +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + + EBX = 0x00 + ECX = 0x04 +@@ -130,10 +131,6 @@ + .long 3b,6b; \ + .previous + +-#define GET_CURRENT(reg) \ +- movl $-8192, reg; \ +- andl %esp, reg +- + ENTRY(lcall7) + pushfl # We get a different stack layout with call gates, + pushl %eax # which has to be cleaned up later.. +@@ -149,7 +146,7 @@ + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx +- andl $-8192,%ebx # GET_CURRENT ++ andl $-THREAD_SIZE,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x7 +@@ -173,7 +170,7 @@ + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx +- andl $-8192,%ebx # GET_CURRENT ++ andl $-THREAD_SIZE,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x27 +Index: linux-2.4.20-rh-20.9/arch/i386/kernel/smpboot.c +=================================================================== +--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/smpboot.c 2003-09-13 19:34:35.000000000 +0400 ++++ linux-2.4.20-rh-20.9/arch/i386/kernel/smpboot.c 2003-12-01 18:02:14.000000000 +0300 +@@ -811,7 +811,7 @@ + + /* So we see what's up */ + printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); +- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); ++ stack_start.esp = (void *)idle->thread.esp; + + /* + * This grunge runs the startup process for +@@ -884,7 +884,7 @@ + Dprintk("CPU has booted.\n"); + } else { + boot_error= 1; +- if (*((volatile unsigned char *)phys_to_virt(8192)) ++ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) + == 0xA5) + /* trampoline started but...? */ + printk("Stuck ??\n"); +@@ -907,7 +907,7 @@ + } + + /* mark "stuck" area as not stuck */ +- *((volatile unsigned long *)phys_to_virt(8192)) = 0; ++ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0; + + if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { + printk("Restoring NMI vector\n"); +Index: linux-2.4.20-rh-20.9/arch/i386/kernel/traps.c +=================================================================== +--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/traps.c 2003-09-13 19:34:35.000000000 +0400 ++++ linux-2.4.20-rh-20.9/arch/i386/kernel/traps.c 2003-12-01 18:02:14.000000000 +0300 +@@ -161,7 +161,7 @@ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ +- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) ++ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) + return; + show_trace((unsigned long *)esp); + } +Index: linux-2.4.20-rh-20.9/arch/i386/kernel/head.S +=================================================================== +--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/head.S 2003-09-13 19:34:35.000000000 +0400 ++++ linux-2.4.20-rh-20.9/arch/i386/kernel/head.S 2003-12-01 18:02:14.000000000 +0300 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #define OLD_CL_MAGIC_ADDR 0x90020 + #define OLD_CL_MAGIC 0xA33F +@@ -315,7 +316,7 @@ + ret + + ENTRY(stack_start) +- .long SYMBOL_NAME(init_task_union)+8192 ++ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE + .long __KERNEL_DS + + /* This is the default interrupt "handler" :-) */ +Index: linux-2.4.20-rh-20.9/arch/i386/kernel/irq.c +=================================================================== +--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/irq.c 2003-12-01 17:42:59.000000000 +0300 ++++ linux-2.4.20-rh-20.9/arch/i386/kernel/irq.c 2003-12-01 18:02:14.000000000 +0300 +@@ -581,7 +581,10 @@ + long esp; + + /* Debugging check for stack overflow: is there less than 1KB free? */ +- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); ++ __asm__ __volatile__( ++ "andl %%esp,%0" ++ : "=r" (esp) : "0" (THREAD_SIZE-1)); ++ + if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { + extern void show_stack(unsigned long *); + +Index: linux-2.4.20-rh-20.9/arch/i386/lib/getuser.S +=================================================================== +--- linux-2.4.20-rh-20.9.orig/arch/i386/lib/getuser.S 1998-01-13 00:42:52.000000000 +0300 ++++ linux-2.4.20-rh-20.9/arch/i386/lib/getuser.S 2003-12-01 18:02:14.000000000 +0300 +@@ -21,6 +21,10 @@ + * as they get called from within inline assembly. + */ + ++/* Duplicated from asm/processor.h */ ++#include ++#include ++ + addr_limit = 12 + + .text +@@ -28,7 +32,7 @@ + .globl __get_user_1 + __get_user_1: + movl %esp,%edx +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 1: movzbl (%eax),%edx +@@ -41,7 +45,7 @@ + addl $1,%eax + movl %esp,%edx + jc bad_get_user +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 2: movzwl -1(%eax),%edx +@@ -54,7 +58,7 @@ + addl $3,%eax + movl %esp,%edx + jc bad_get_user +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 3: movl -3(%eax),%edx +Index: linux-2.4.20-rh-20.9/arch/i386/config.in +=================================================================== +--- linux-2.4.20-rh-20.9.orig/arch/i386/config.in 2003-09-13 19:34:34.000000000 +0400 ++++ linux-2.4.20-rh-20.9/arch/i386/config.in 2003-12-01 18:02:14.000000000 +0300 +@@ -266,6 +266,29 @@ + if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi ++ ++choice 'Bigger Stack Size Support' \ ++ "off CONFIG_NOBIGSTACK \ ++ 16KB CONFIG_STACK_SIZE_16KB \ ++ 32KB CONFIG_STACK_SIZE_32KB \ ++ 64KB CONFIG_STACK_SIZE_64KB" off ++ ++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 1 ++else ++ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 2 ++ else ++ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 3 ++ else ++ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 4 ++ fi ++ fi ++ fi ++fi ++ + endmenu + + mainmenu_option next_comment +Index: linux-2.4.20-rh-20.9/arch/i386/vmlinux.lds +=================================================================== +--- linux-2.4.20-rh-20.9.orig/arch/i386/vmlinux.lds 2003-09-13 19:34:24.000000000 +0400 ++++ linux-2.4.20-rh-20.9/arch/i386/vmlinux.lds 2003-12-01 18:02:14.000000000 +0300 +@@ -38,7 +38,8 @@ + + _edata = .; /* End of data section */ + +- . = ALIGN(8192); /* init_task */ ++/* chose the biggest of the possible stack sizes here? */ ++ . = ALIGN(65536); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ +Index: linux-2.4.20-rh-20.9/include/asm-i386/current.h +=================================================================== +--- linux-2.4.20-rh-20.9.orig/include/asm-i386/current.h 1998-08-15 03:35:22.000000000 +0400 ++++ linux-2.4.20-rh-20.9/include/asm-i386/current.h 2003-12-01 18:03:28.000000000 +0300 +@@ -1,15 +1,43 @@ + #ifndef _I386_CURRENT_H + #define _I386_CURRENT_H ++#include ++ ++/* ++ * Configurable page sizes on i386, mainly for debugging purposes. ++ * (c) Balbir Singh ++ */ ++ ++#ifdef __ASSEMBLY__ ++ ++#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */ ++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) ++ ++#define GET_CURRENT(reg) \ ++ movl $-THREAD_SIZE, reg; \ ++ andl %esp, reg ++ ++#else /* __ASSEMBLY__ */ ++ ++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) ++#define __alloc_task_struct() \ ++ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT)) ++ ++#define __free_task_struct(p) \ ++ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT) ++ ++#define INIT_TASK_SIZE THREAD_SIZE + + struct task_struct; + + static inline struct task_struct * get_current(void) + { + struct task_struct *current; +- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); ++ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1))); + return current; + } + + #define current get_current() + ++#endif /* __ASSEMBLY__ */ ++ + #endif /* !(_I386_CURRENT_H) */ +Index: linux-2.4.20-rh-20.9/include/asm-i386/hw_irq.h +=================================================================== +--- linux-2.4.20-rh-20.9.orig/include/asm-i386/hw_irq.h 2003-11-13 17:35:48.000000000 +0300 ++++ linux-2.4.20-rh-20.9/include/asm-i386/hw_irq.h 2003-12-01 18:02:14.000000000 +0300 +@@ -116,10 +116,6 @@ + #define IRQ_NAME2(nr) nr##_interrupt(void) + #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +-#define GET_CURRENT \ +- "movl %esp, %ebx\n\t" \ +- "andl $-8192, %ebx\n\t" +- + /* + * SMP has a few special interrupts for IPI messages + */ +Index: linux-2.4.20-rh-20.9/include/asm-i386/processor.h +=================================================================== +--- linux-2.4.20-rh-20.9.orig/include/asm-i386/processor.h 2003-10-08 12:29:57.000000000 +0400 ++++ linux-2.4.20-rh-20.9/include/asm-i386/processor.h 2003-12-01 18:02:14.000000000 +0300 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -469,10 +470,6 @@ + #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) + #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) + +-#define THREAD_SIZE (2*PAGE_SIZE) +-#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) +-#define __free_task_struct(p) do { BUG_ON((p)->state < TASK_ZOMBIE); free_pages((unsigned long) (p), 1); } while (0) +- + #define init_task (init_task_union.task) + #define init_stack (init_task_union.stack) + +Index: linux-2.4.20-rh-20.9/include/linux/sched.h +=================================================================== +--- linux-2.4.20-rh-20.9.orig/include/linux/sched.h 2003-11-13 17:35:48.000000000 +0300 ++++ linux-2.4.20-rh-20.9/include/linux/sched.h 2003-12-01 18:02:14.000000000 +0300 +@@ -2,6 +2,7 @@ + #define _LINUX_SCHED_H + + #include /* for HZ */ ++#include /* maybe for INIT_TASK_SIZE */ + + extern unsigned long event; + diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch new file mode 100644 index 0000000..4fc4938 --- /dev/null +++ b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch @@ -0,0 +1,318 @@ +Index: linux-2.4.20/arch/i386/kernel/entry.S +=================================================================== +--- linux-2.4.20.orig/arch/i386/kernel/entry.S 2003-05-16 05:28:59.000000000 +0400 ++++ linux-2.4.20/arch/i386/kernel/entry.S 2003-12-01 16:54:50.000000000 +0300 +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + + EBX = 0x00 + ECX = 0x04 +@@ -130,10 +131,6 @@ + .long 3b,6b; \ + .previous + +-#define GET_CURRENT(reg) \ +- movl $-8192, reg; \ +- andl %esp, reg +- + ENTRY(lcall7) + pushfl # We get a different stack layout with call gates, + pushl %eax # which has to be cleaned up later.. +@@ -149,7 +146,7 @@ + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx +- andl $-8192,%ebx # GET_CURRENT ++ andl $-THREAD_SIZE,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x7 +@@ -173,7 +170,7 @@ + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx +- andl $-8192,%ebx # GET_CURRENT ++ andl $-THREAD_SIZE,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x27 +Index: linux-2.4.20/arch/i386/kernel/smpboot.c +=================================================================== +--- linux-2.4.20.orig/arch/i386/kernel/smpboot.c 2003-05-16 05:28:59.000000000 +0400 ++++ linux-2.4.20/arch/i386/kernel/smpboot.c 2003-12-01 16:54:50.000000000 +0300 +@@ -819,7 +819,7 @@ + + /* So we see what's up */ + printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); +- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); ++ stack_start.esp = (void *)idle->thread.esp; + + /* + * This grunge runs the startup process for +@@ -892,7 +892,7 @@ + Dprintk("CPU has booted.\n"); + } else { + boot_error= 1; +- if (*((volatile unsigned char *)phys_to_virt(8192)) ++ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) + == 0xA5) + /* trampoline started but...? */ + printk("Stuck ??\n"); +@@ -915,7 +915,7 @@ + } + + /* mark "stuck" area as not stuck */ +- *((volatile unsigned long *)phys_to_virt(8192)) = 0; ++ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0; + + if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { + printk("Restoring NMI vector\n"); +Index: linux-2.4.20/arch/i386/kernel/traps.c +=================================================================== +--- linux-2.4.20.orig/arch/i386/kernel/traps.c 2003-12-01 16:53:23.000000000 +0300 ++++ linux-2.4.20/arch/i386/kernel/traps.c 2003-12-01 16:54:50.000000000 +0300 +@@ -158,7 +158,7 @@ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ +- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) ++ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) + return; + show_trace((unsigned long *)esp); + } +Index: linux-2.4.20/arch/i386/kernel/head.S +=================================================================== +--- linux-2.4.20.orig/arch/i386/kernel/head.S 2003-05-16 05:28:28.000000000 +0400 ++++ linux-2.4.20/arch/i386/kernel/head.S 2003-12-01 16:54:50.000000000 +0300 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #define OLD_CL_MAGIC_ADDR 0x90020 + #define OLD_CL_MAGIC 0xA33F +@@ -320,7 +321,7 @@ + ret + + ENTRY(stack_start) +- .long SYMBOL_NAME(init_task_union)+8192 ++ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE + .long __KERNEL_DS + + /* This is the default interrupt "handler" :-) */ +Index: linux-2.4.20/arch/i386/kernel/irq.c +=================================================================== +--- linux-2.4.20.orig/arch/i386/kernel/irq.c 2003-05-16 05:28:59.000000000 +0400 ++++ linux-2.4.20/arch/i386/kernel/irq.c 2003-12-01 16:57:05.000000000 +0300 +@@ -581,7 +581,10 @@ + long esp; + + /* Debugging check for stack overflow: is there less than 1KB free? */ +- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); ++ __asm__ __volatile__( ++ "andl %%esp,%0" ++ : "=r" (esp) : "0" (THREAD_SIZE-1)); ++ + if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { + extern void show_stack(unsigned long *); + +Index: linux-2.4.20/arch/i386/lib/getuser.S +=================================================================== +--- linux-2.4.20.orig/arch/i386/lib/getuser.S 1998-01-13 00:42:52.000000000 +0300 ++++ linux-2.4.20/arch/i386/lib/getuser.S 2003-12-01 16:54:50.000000000 +0300 +@@ -21,6 +21,10 @@ + * as they get called from within inline assembly. + */ + ++/* Duplicated from asm/processor.h */ ++#include ++#include ++ + addr_limit = 12 + + .text +@@ -28,7 +32,7 @@ + .globl __get_user_1 + __get_user_1: + movl %esp,%edx +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 1: movzbl (%eax),%edx +@@ -41,7 +45,7 @@ + addl $1,%eax + movl %esp,%edx + jc bad_get_user +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 2: movzwl -1(%eax),%edx +@@ -54,7 +58,7 @@ + addl $3,%eax + movl %esp,%edx + jc bad_get_user +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 3: movl -3(%eax),%edx +Index: linux-2.4.20/arch/i386/config.in +=================================================================== +--- linux-2.4.20.orig/arch/i386/config.in 2003-05-16 05:28:59.000000000 +0400 ++++ linux-2.4.20/arch/i386/config.in 2003-12-01 17:01:56.000000000 +0300 +@@ -227,6 +227,29 @@ + if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi ++ ++choice 'Bigger Stack Size Support' \ ++ "off CONFIG_NOBIGSTACK \ ++ 16KB CONFIG_STACK_SIZE_16KB \ ++ 32KB CONFIG_STACK_SIZE_32KB \ ++ 64KB CONFIG_STACK_SIZE_64KB" off ++ ++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 1 ++else ++ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 2 ++ else ++ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 3 ++ else ++ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 4 ++ fi ++ fi ++ fi ++fi ++ + endmenu + + mainmenu_option next_comment +Index: linux-2.4.20/arch/i386/vmlinux.lds +=================================================================== +--- linux-2.4.20.orig/arch/i386/vmlinux.lds 2003-05-16 05:28:09.000000000 +0400 ++++ linux-2.4.20/arch/i386/vmlinux.lds 2003-12-01 16:54:50.000000000 +0300 +@@ -35,7 +35,8 @@ + + _edata = .; /* End of data section */ + +- . = ALIGN(8192); /* init_task */ ++/* chose the biggest of the possible stack sizes here? */ ++ . = ALIGN(65536); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ +Index: linux-2.4.20/include/asm-i386/current.h +=================================================================== +--- linux-2.4.20.orig/include/asm-i386/current.h 1998-08-15 03:35:22.000000000 +0400 ++++ linux-2.4.20/include/asm-i386/current.h 2003-12-01 16:54:50.000000000 +0300 +@@ -1,15 +1,43 @@ + #ifndef _I386_CURRENT_H + #define _I386_CURRENT_H ++#include ++ ++/* ++ * Configurable page sizes on i386, mainly for debugging purposes. ++ * (c) Balbir Singh ++ */ ++ ++#ifdef __ASSEMBLY__ ++ ++#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */ ++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) ++ ++#define GET_CURRENT(reg) \ ++ movl $-THREAD_SIZE, reg; \ ++ andl %esp, reg ++ ++#else /* __ASSEMBLY__ */ ++ ++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) ++#define alloc_task_struct() \ ++ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT)) ++ ++#define free_task_struct(p) \ ++ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT) ++ ++#define INIT_TASK_SIZE THREAD_SIZE + + struct task_struct; + + static inline struct task_struct * get_current(void) + { + struct task_struct *current; +- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); ++ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1))); + return current; + } + + #define current get_current() + ++#endif /* __ASSEMBLY__ */ ++ + #endif /* !(_I386_CURRENT_H) */ +Index: linux-2.4.20/include/asm-i386/hw_irq.h +=================================================================== +--- linux-2.4.20.orig/include/asm-i386/hw_irq.h 2003-11-13 17:17:28.000000000 +0300 ++++ linux-2.4.20/include/asm-i386/hw_irq.h 2003-12-01 16:54:50.000000000 +0300 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + /* + * IDT vectors usable for external interrupt sources start +@@ -113,10 +114,6 @@ + #define IRQ_NAME2(nr) nr##_interrupt(void) + #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +-#define GET_CURRENT \ +- "movl %esp, %ebx\n\t" \ +- "andl $-8192, %ebx\n\t" +- + /* + * SMP has a few special interrupts for IPI messages + */ +Index: linux-2.4.20/include/asm-i386/processor.h +=================================================================== +--- linux-2.4.20.orig/include/asm-i386/processor.h 2003-11-21 17:39:47.000000000 +0300 ++++ linux-2.4.20/include/asm-i386/processor.h 2003-12-01 16:54:50.000000000 +0300 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -451,9 +452,6 @@ + #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) + #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) + +-#define THREAD_SIZE (2*PAGE_SIZE) +-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) +-#define free_task_struct(p) free_pages((unsigned long) (p), 1) + #define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) + + #define init_task (init_task_union.task) +Index: linux-2.4.20/include/linux/sched.h +=================================================================== +--- linux-2.4.20.orig/include/linux/sched.h 2003-11-21 17:39:47.000000000 +0300 ++++ linux-2.4.20/include/linux/sched.h 2003-12-01 16:54:50.000000000 +0300 +@@ -2,6 +2,7 @@ + #define _LINUX_SCHED_H + + #include /* for HZ */ ++#include /* maybe for INIT_TASK_SIZE */ + + extern unsigned long event; + diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.22-rh.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.22-rh.patch new file mode 100644 index 0000000..856425a --- /dev/null +++ b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.22-rh.patch @@ -0,0 +1,311 @@ +Index: linux-2.4.22-ac1/arch/i386/kernel/entry.S +=================================================================== +--- linux-2.4.22-ac1.orig/arch/i386/kernel/entry.S 2003-09-25 14:16:34.000000000 +0400 ++++ linux-2.4.22-ac1/arch/i386/kernel/entry.S 2003-12-01 18:34:08.000000000 +0300 +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + EBX = 0x00 + ECX = 0x04 +@@ -131,10 +132,6 @@ + .long 3b,6b; \ + .previous + +-#define GET_CURRENT(reg) \ +- movl $-8192, reg; \ +- andl %esp, reg +- + ENTRY(lcall7) + pushfl # We get a different stack layout with call gates, + pushl %eax # which has to be cleaned up later.. +@@ -150,7 +147,7 @@ + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx +- andl $-8192,%ebx # GET_CURRENT ++ andl $-THREAD_SIZE,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x7 +@@ -174,7 +171,7 @@ + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx +- andl $-8192,%ebx # GET_CURRENT ++ andl $-THREAD_SIZE,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x27 +Index: linux-2.4.22-ac1/arch/i386/kernel/smpboot.c +=================================================================== +--- linux-2.4.22-ac1.orig/arch/i386/kernel/smpboot.c 2003-09-25 14:16:28.000000000 +0400 ++++ linux-2.4.22-ac1/arch/i386/kernel/smpboot.c 2003-12-01 18:34:08.000000000 +0300 +@@ -814,7 +814,7 @@ + + /* So we see what's up */ + printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); +- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); ++ stack_start.esp = (void *)idle->thread.esp; + + /* + * This grunge runs the startup process for +@@ -887,7 +887,7 @@ + Dprintk("CPU has booted.\n"); + } else { + boot_error= 1; +- if (*((volatile unsigned char *)phys_to_virt(8192)) ++ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE)) + == 0xA5) + /* trampoline started but...? */ + printk("Stuck ??\n"); +@@ -910,7 +910,7 @@ + } + + /* mark "stuck" area as not stuck */ +- *((volatile unsigned long *)phys_to_virt(8192)) = 0; ++ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0; + + if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { + printk("Restoring NMI vector\n"); +Index: linux-2.4.22-ac1/arch/i386/kernel/traps.c +=================================================================== +--- linux-2.4.22-ac1.orig/arch/i386/kernel/traps.c 2003-09-25 14:16:29.000000000 +0400 ++++ linux-2.4.22-ac1/arch/i386/kernel/traps.c 2003-12-01 18:34:08.000000000 +0300 +@@ -161,7 +161,7 @@ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ +- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) ++ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1)) + return; + show_trace((unsigned long *)esp); + } +Index: linux-2.4.22-ac1/arch/i386/kernel/head.S +=================================================================== +--- linux-2.4.22-ac1.orig/arch/i386/kernel/head.S 2003-09-25 14:16:27.000000000 +0400 ++++ linux-2.4.22-ac1/arch/i386/kernel/head.S 2003-12-01 18:34:08.000000000 +0300 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #define OLD_CL_MAGIC_ADDR 0x90020 + #define OLD_CL_MAGIC 0xA33F +@@ -315,7 +316,7 @@ + ret + + ENTRY(stack_start) +- .long SYMBOL_NAME(init_task_union)+8192 ++ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE + .long __KERNEL_DS + + /* This is the default interrupt "handler" :-) */ +Index: linux-2.4.22-ac1/arch/i386/kernel/irq.c +=================================================================== +--- linux-2.4.22-ac1.orig/arch/i386/kernel/irq.c 2003-09-25 14:16:18.000000000 +0400 ++++ linux-2.4.22-ac1/arch/i386/kernel/irq.c 2003-12-01 18:34:08.000000000 +0300 +@@ -581,7 +581,10 @@ + long esp; + + /* Debugging check for stack overflow: is there less than 1KB free? */ +- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191)); ++ __asm__ __volatile__( ++ "andl %%esp,%0" ++ : "=r" (esp) : "0" (THREAD_SIZE-1)); ++ + if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { + extern void show_stack(unsigned long *); + +Index: linux-2.4.22-ac1/arch/i386/lib/getuser.S +=================================================================== +--- linux-2.4.22-ac1.orig/arch/i386/lib/getuser.S 1998-01-13 00:42:52.000000000 +0300 ++++ linux-2.4.22-ac1/arch/i386/lib/getuser.S 2003-12-01 18:34:08.000000000 +0300 +@@ -21,6 +21,10 @@ + * as they get called from within inline assembly. + */ + ++/* Duplicated from asm/processor.h */ ++#include ++#include ++ + addr_limit = 12 + + .text +@@ -28,7 +32,7 @@ + .globl __get_user_1 + __get_user_1: + movl %esp,%edx +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 1: movzbl (%eax),%edx +@@ -41,7 +45,7 @@ + addl $1,%eax + movl %esp,%edx + jc bad_get_user +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 2: movzwl -1(%eax),%edx +@@ -54,7 +58,7 @@ + addl $3,%eax + movl %esp,%edx + jc bad_get_user +- andl $0xffffe000,%edx ++ andl $~(THREAD_SIZE - 1),%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user + 3: movl -3(%eax),%edx +Index: linux-2.4.22-ac1/arch/i386/config.in +=================================================================== +--- linux-2.4.22-ac1.orig/arch/i386/config.in 2003-09-25 14:16:34.000000000 +0400 ++++ linux-2.4.22-ac1/arch/i386/config.in 2003-12-01 18:34:08.000000000 +0300 +@@ -304,6 +304,29 @@ + if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then + define_bool CONFIG_HAVE_DEC_LOCK y + fi ++ ++choice 'Bigger Stack Size Support' \ ++ "off CONFIG_NOBIGSTACK \ ++ 16KB CONFIG_STACK_SIZE_16KB \ ++ 32KB CONFIG_STACK_SIZE_32KB \ ++ 64KB CONFIG_STACK_SIZE_64KB" off ++ ++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 1 ++else ++ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 2 ++ else ++ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 3 ++ else ++ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then ++ define_int CONFIG_STACK_SIZE_SHIFT 4 ++ fi ++ fi ++ fi ++fi ++ + endmenu + + mainmenu_option next_comment +Index: linux-2.4.22-ac1/arch/i386/vmlinux.lds +=================================================================== +--- linux-2.4.22-ac1.orig/arch/i386/vmlinux.lds 2003-09-25 14:16:28.000000000 +0400 ++++ linux-2.4.22-ac1/arch/i386/vmlinux.lds 2003-12-01 18:34:08.000000000 +0300 +@@ -38,7 +38,8 @@ + + _edata = .; /* End of data section */ + +- . = ALIGN(8192); /* init_task */ ++/* chose the biggest of the possible stack sizes here? */ ++ . = ALIGN(65536); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ +Index: linux-2.4.22-ac1/include/asm-i386/current.h +=================================================================== +--- linux-2.4.22-ac1.orig/include/asm-i386/current.h 1998-08-15 03:35:22.000000000 +0400 ++++ linux-2.4.22-ac1/include/asm-i386/current.h 2003-12-01 18:34:16.000000000 +0300 +@@ -1,15 +1,43 @@ + #ifndef _I386_CURRENT_H + #define _I386_CURRENT_H ++#include ++ ++/* ++ * Configurable page sizes on i386, mainly for debugging purposes. ++ * (c) Balbir Singh ++ */ ++ ++#ifdef __ASSEMBLY__ ++ ++#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */ ++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) ++ ++#define GET_CURRENT(reg) \ ++ movl $-THREAD_SIZE, reg; \ ++ andl %esp, reg ++ ++#else /* __ASSEMBLY__ */ ++ ++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE) ++#define __alloc_task_struct() \ ++ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT)) ++ ++#define __free_task_struct(p) \ ++ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT) ++ ++#define INIT_TASK_SIZE THREAD_SIZE + + struct task_struct; + + static inline struct task_struct * get_current(void) + { + struct task_struct *current; +- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); ++ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1))); + return current; + } + + #define current get_current() + ++#endif /* __ASSEMBLY__ */ ++ + #endif /* !(_I386_CURRENT_H) */ +Index: linux-2.4.22-ac1/include/asm-i386/hw_irq.h +=================================================================== +--- linux-2.4.22-ac1.orig/include/asm-i386/hw_irq.h 2003-09-26 00:54:45.000000000 +0400 ++++ linux-2.4.22-ac1/include/asm-i386/hw_irq.h 2003-12-01 18:34:08.000000000 +0300 +@@ -114,10 +114,6 @@ + #define IRQ_NAME2(nr) nr##_interrupt(void) + #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +-#define GET_CURRENT \ +- "movl %esp, %ebx\n\t" \ +- "andl $-8192, %ebx\n\t" +- + /* + * SMP has a few special interrupts for IPI messages + */ +Index: linux-2.4.22-ac1/include/asm-i386/processor.h +=================================================================== +--- linux-2.4.22-ac1.orig/include/asm-i386/processor.h 2003-09-26 00:54:44.000000000 +0400 ++++ linux-2.4.22-ac1/include/asm-i386/processor.h 2003-12-01 18:34:08.000000000 +0300 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -465,10 +466,6 @@ + #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) + #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) + +-#define THREAD_SIZE (2*PAGE_SIZE) +-#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) +-#define __free_task_struct(p) do { BUG_ON((p)->state < TASK_ZOMBIE); free_pages((unsigned long) (p), 1); } while (0) +- + #define init_task (init_task_union.task) + #define init_stack (init_task_union.stack) + +Index: linux-2.4.22-ac1/include/linux/sched.h +=================================================================== +--- linux-2.4.22-ac1.orig/include/linux/sched.h 2003-11-13 18:21:42.000000000 +0300 ++++ linux-2.4.22-ac1/include/linux/sched.h 2003-12-01 18:34:08.000000000 +0300 +@@ -2,6 +2,7 @@ + #define _LINUX_SCHED_H + + #include /* for HZ */ ++#include /* maybe for INIT_TASK_SIZE */ + + extern unsigned long event; + diff --git a/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch b/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch index a8489e6..97cd9dc 100644 --- a/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch +++ b/lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch @@ -1,7 +1,7 @@ Index: linux-2.4.18-chaos/include/linux/mm.h =================================================================== ---- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-13 17:06:48.000000000 +0300 -+++ linux-2.4.18-chaos/include/linux/mm.h 2003-11-17 15:46:32.000000000 +0300 +--- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-23 00:07:20.000000000 +0300 ++++ linux-2.4.18-chaos/include/linux/mm.h 2003-11-23 00:07:23.000000000 +0300 @@ -677,6 +677,7 @@ #define __GFP_IO 0x40 /* Can start low memory physical IO? */ #define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ @@ -20,8 +20,8 @@ Index: linux-2.4.18-chaos/include/linux/mm.h platforms, used as appropriate on others */ Index: linux-2.4.18-chaos/mm/page_alloc.c =================================================================== ---- linux-2.4.18-chaos.orig/mm/page_alloc.c 2003-11-13 17:06:47.000000000 +0300 -+++ linux-2.4.18-chaos/mm/page_alloc.c 2003-11-17 15:49:11.000000000 +0300 +--- linux-2.4.18-chaos.orig/mm/page_alloc.c 2003-11-23 00:07:20.000000000 +0300 ++++ linux-2.4.18-chaos/mm/page_alloc.c 2003-12-02 23:12:31.000000000 +0300 @@ -554,7 +554,7 @@ /* * Oh well, we didn't succeed. @@ -31,10 +31,21 @@ Index: linux-2.4.18-chaos/mm/page_alloc.c /* * Are we dealing with a higher order allocation? * +@@ -628,7 +628,9 @@ + + /* XXX: is pages_min/4 a good amount to reserve for this? */ + min += z->pages_min / 4; +- if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) { ++ if (z->free_pages > min || ++ (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC)) ++ && !in_interrupt())) { + page = rmqueue(z, order); + if (page) + return page; Index: linux-2.4.18-chaos/include/linux/slab.h =================================================================== --- linux-2.4.18-chaos.orig/include/linux/slab.h 2003-07-28 17:52:18.000000000 +0400 -+++ linux-2.4.18-chaos/include/linux/slab.h 2003-11-17 15:46:32.000000000 +0300 ++++ linux-2.4.18-chaos/include/linux/slab.h 2003-11-23 00:07:23.000000000 +0300 @@ -23,6 +23,7 @@ #define SLAB_KERNEL GFP_KERNEL #define SLAB_NFS GFP_NFS @@ -46,7 +57,7 @@ Index: linux-2.4.18-chaos/include/linux/slab.h Index: linux-2.4.18-chaos/mm/slab.c =================================================================== --- linux-2.4.18-chaos.orig/mm/slab.c 2003-07-28 17:52:20.000000000 +0400 -+++ linux-2.4.18-chaos/mm/slab.c 2003-11-17 15:46:32.000000000 +0300 ++++ linux-2.4.18-chaos/mm/slab.c 2003-11-23 00:07:23.000000000 +0300 @@ -1116,7 +1116,7 @@ /* Be lazy and only check for valid flags here, * keeping it out of the critical path in kmem_cache_alloc(). diff --git a/lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch b/lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch index 3f37e44..79caa76 100644 --- a/lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch +++ b/lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch @@ -32,6 +32,17 @@ Index: linux-2.4.20-rh-20.9/mm/page_alloc.c /* * Are we dealing with a higher order allocation? * +@@ -583,7 +583,9 @@ + + /* XXX: is pages_min/4 a good amount to reserve for this? */ + min += z->pages_min / 4; +- if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) { ++ if (z->free_pages > min || ++ (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC)) ++ && !in_interrupt())) { + page = rmqueue(z, order); + if (page) + return page; Index: linux-2.4.20-rh-20.9/include/linux/slab.h =================================================================== --- linux-2.4.20-rh-20.9.orig/include/linux/slab.h 2003-11-13 17:35:48.000000000 +0300 diff --git a/lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch b/lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch index 015bfc8..92e79c8 100644 --- a/lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch +++ b/lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch @@ -1,8 +1,8 @@ -Index: linux-2.4.22-vanilla/include/linux/mm.h +Index: linux-2.4.20/include/linux/mm.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/mm.h 2003-11-17 15:26:32.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/mm.h 2003-11-17 15:40:32.000000000 +0300 -@@ -612,6 +612,7 @@ +--- linux-2.4.20.orig/include/linux/mm.h 2003-12-01 17:07:14.000000000 +0300 ++++ linux-2.4.20/include/linux/mm.h 2003-12-02 23:17:06.000000000 +0300 +@@ -614,6 +614,7 @@ #define __GFP_IO 0x40 /* Can start low memory physical IO? */ #define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ #define __GFP_FS 0x100 /* Can call down to low-level FS? */ @@ -10,7 +10,7 @@ Index: linux-2.4.22-vanilla/include/linux/mm.h #define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) #define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) -@@ -622,6 +623,7 @@ +@@ -624,6 +625,7 @@ #define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) #define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) @@ -18,24 +18,24 @@ Index: linux-2.4.22-vanilla/include/linux/mm.h /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ -Index: linux-2.4.22-vanilla/mm/page_alloc.c +Index: linux-2.4.20/mm/page_alloc.c =================================================================== ---- linux-2.4.22-vanilla.orig/mm/page_alloc.c 2003-11-13 18:19:51.000000000 +0300 -+++ linux-2.4.22-vanilla/mm/page_alloc.c 2003-11-17 15:40:32.000000000 +0300 +--- linux-2.4.20.orig/mm/page_alloc.c 2003-12-01 17:02:43.000000000 +0300 ++++ linux-2.4.20/mm/page_alloc.c 2003-12-02 23:21:56.000000000 +0300 @@ -377,7 +377,8 @@ /* here we're in the low on memory slow path */ rebalance: - if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) { + if (current->flags & (PF_MEMALLOC | PF_MEMDIE) || -+ gfp_mask & __GFP_MEMALLOC) { ++ (gfp_mask & __GFP_MEMALLOC)) { zone = zonelist->zones; for (;;) { zone_t *z = *(zone++); -Index: linux-2.4.22-vanilla/include/linux/slab.h +Index: linux-2.4.20/include/linux/slab.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/slab.h 2003-11-17 14:58:37.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/slab.h 2003-11-17 15:42:13.000000000 +0300 +--- linux-2.4.20.orig/include/linux/slab.h 2003-12-01 17:07:14.000000000 +0300 ++++ linux-2.4.20/include/linux/slab.h 2003-12-02 23:17:06.000000000 +0300 @@ -23,6 +23,7 @@ #define SLAB_KERNEL GFP_KERNEL #define SLAB_NFS GFP_NFS @@ -44,11 +44,11 @@ Index: linux-2.4.22-vanilla/include/linux/slab.h #define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS) #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ -Index: linux-2.4.22-vanilla/mm/slab.c +Index: linux-2.4.20/mm/slab.c =================================================================== ---- linux-2.4.22-vanilla.orig/mm/slab.c 2003-11-13 17:39:29.000000000 +0300 -+++ linux-2.4.22-vanilla/mm/slab.c 2003-11-17 15:42:13.000000000 +0300 -@@ -1115,7 +1115,7 @@ +--- linux-2.4.20.orig/mm/slab.c 2003-12-01 17:02:34.000000000 +0300 ++++ linux-2.4.20/mm/slab.c 2003-12-02 23:17:06.000000000 +0300 +@@ -1113,7 +1113,7 @@ /* Be lazy and only check for valid flags here, * keeping it out of the critical path in kmem_cache_alloc(). */ diff --git a/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch index c5abbf3..937aa40 100644 --- a/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch +++ b/lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch @@ -67,7 +67,7 @@ Index: linux-2.4.22-vanilla/Documentation/Configure.help =================================================================== --- linux-2.4.22-vanilla.orig/Documentation/Configure.help 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/Documentation/Configure.help 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/Documentation/Configure.help 2003-12-02 23:55:38.000000000 +0300 @@ -15613,6 +15613,39 @@ be compiled as a module, and so this could be dangerous. Most everyone wants to say Y here. @@ -151,7 +151,7 @@ Index: linux-2.4.22-vanilla/Documentation/Configure.help Index: linux-2.4.22-vanilla/arch/alpha/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/alpha/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/alpha/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/alpha/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -169,7 +169,7 @@ Index: linux-2.4.22-vanilla/arch/alpha/defconfig Index: linux-2.4.22-vanilla/arch/alpha/kernel/entry.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/alpha/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/alpha/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/alpha/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300 @@ -1154,6 +1154,18 @@ .quad sys_readahead .quad sys_ni_syscall /* 380, sys_security */ @@ -192,7 +192,7 @@ Index: linux-2.4.22-vanilla/arch/alpha/kernel/entry.S Index: linux-2.4.22-vanilla/arch/arm/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/arm/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/arm/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/arm/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -210,7 +210,7 @@ Index: linux-2.4.22-vanilla/arch/arm/defconfig Index: linux-2.4.22-vanilla/arch/arm/kernel/calls.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/arm/kernel/calls.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/arm/kernel/calls.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/arm/kernel/calls.S 2003-12-02 23:55:38.000000000 +0300 @@ -240,18 +240,18 @@ .long SYMBOL_NAME(sys_ni_syscall) /* Security */ .long SYMBOL_NAME(sys_gettid) @@ -245,7 +245,7 @@ Index: linux-2.4.22-vanilla/arch/arm/kernel/calls.S Index: linux-2.4.22-vanilla/arch/i386/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/i386/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/i386/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/i386/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -263,7 +263,7 @@ Index: linux-2.4.22-vanilla/arch/i386/defconfig Index: linux-2.4.22-vanilla/arch/ia64/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/ia64/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/ia64/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/ia64/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -281,7 +281,7 @@ Index: linux-2.4.22-vanilla/arch/ia64/defconfig Index: linux-2.4.22-vanilla/arch/m68k/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/m68k/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/m68k/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/m68k/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -299,7 +299,7 @@ Index: linux-2.4.22-vanilla/arch/m68k/defconfig Index: linux-2.4.22-vanilla/arch/mips/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/mips/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/mips/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/mips/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -317,7 +317,7 @@ Index: linux-2.4.22-vanilla/arch/mips/defconfig Index: linux-2.4.22-vanilla/arch/mips64/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/mips64/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/mips64/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/mips64/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -335,7 +335,7 @@ Index: linux-2.4.22-vanilla/arch/mips64/defconfig Index: linux-2.4.22-vanilla/arch/s390/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -353,7 +353,7 @@ Index: linux-2.4.22-vanilla/arch/s390/defconfig Index: linux-2.4.22-vanilla/arch/s390/kernel/entry.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300 @@ -558,18 +558,18 @@ .long sys_fcntl64 .long sys_readahead @@ -388,7 +388,7 @@ Index: linux-2.4.22-vanilla/arch/s390/kernel/entry.S Index: linux-2.4.22-vanilla/arch/s390x/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390x/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390x/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390x/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -406,7 +406,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/defconfig Index: linux-2.4.22-vanilla/arch/s390x/kernel/entry.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390x/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390x/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390x/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300 @@ -591,18 +591,18 @@ .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) .long SYSCALL(sys_readahead,sys32_readahead) @@ -441,7 +441,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/kernel/entry.S Index: linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S =================================================================== --- linux-2.4.22-vanilla.orig/arch/s390x/kernel/wrapper32.S 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S 2003-12-02 23:55:38.000000000 +0300 @@ -1098,6 +1098,98 @@ llgfr %r4,%r4 # long jg sys32_fstat64 # branch to system call @@ -544,7 +544,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S Index: linux-2.4.22-vanilla/arch/sparc64/defconfig =================================================================== --- linux-2.4.22-vanilla.orig/arch/sparc64/defconfig 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/arch/sparc64/defconfig 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/arch/sparc64/defconfig 2003-12-02 23:55:38.000000000 +0300 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -562,7 +562,7 @@ Index: linux-2.4.22-vanilla/arch/sparc64/defconfig Index: linux-2.4.22-vanilla/fs/Config.in =================================================================== --- linux-2.4.22-vanilla.orig/fs/Config.in 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/Config.in 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/Config.in 2003-12-02 23:55:38.000000000 +0300 @@ -29,6 +29,11 @@ dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL @@ -600,8 +600,8 @@ Index: linux-2.4.22-vanilla/fs/Config.in source fs/partitions/Config.in Index: linux-2.4.22-vanilla/fs/Makefile =================================================================== ---- linux-2.4.22-vanilla.orig/fs/Makefile 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/Makefile 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/Makefile 2003-12-02 23:55:36.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/Makefile 2003-12-02 23:55:38.000000000 +0300 @@ -77,6 +77,9 @@ obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o @@ -615,7 +615,7 @@ Index: linux-2.4.22-vanilla/fs/Makefile Index: linux-2.4.22-vanilla/fs/ext2/Makefile =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/Makefile 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/Makefile 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/Makefile 2003-12-02 23:55:38.000000000 +0300 @@ -13,4 +13,8 @@ ioctl.o namei.o super.o symlink.o obj-m := $(O_TARGET) @@ -628,7 +628,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/Makefile Index: linux-2.4.22-vanilla/fs/ext2/file.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/file.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/file.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/file.c 2003-12-02 23:55:38.000000000 +0300 @@ -20,6 +20,7 @@ #include @@ -649,7 +649,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/file.c Index: linux-2.4.22-vanilla/fs/ext2/ialloc.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/ialloc.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/ialloc.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/ialloc.c 2003-12-02 23:55:38.000000000 +0300 @@ -15,6 +15,7 @@ #include #include @@ -669,7 +669,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/ialloc.c Index: linux-2.4.22-vanilla/fs/ext2/inode.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/inode.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/inode.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/inode.c 2003-12-02 23:55:38.000000000 +0300 @@ -39,6 +39,18 @@ static int ext2_update_inode(struct inode * inode, int do_sync); @@ -755,7 +755,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/inode.c Index: linux-2.4.22-vanilla/fs/ext2/namei.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/namei.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/namei.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/namei.c 2003-12-02 23:55:38.000000000 +0300 @@ -31,6 +31,7 @@ #include @@ -792,7 +792,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/namei.c Index: linux-2.4.22-vanilla/fs/ext2/super.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/super.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/super.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/super.c 2003-12-02 23:55:38.000000000 +0300 @@ -21,6 +21,7 @@ #include #include @@ -865,7 +865,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/super.c Index: linux-2.4.22-vanilla/fs/ext2/symlink.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext2/symlink.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/symlink.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/symlink.c 2003-12-02 23:55:38.000000000 +0300 @@ -19,6 +19,7 @@ #include @@ -897,8 +897,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/symlink.c }; Index: linux-2.4.22-vanilla/fs/ext2/xattr.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext2/xattr.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/xattr.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext2/xattr.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/xattr.c 2003-12-02 23:55:38.000000000 +0300 @@ -0,0 +1,1212 @@ +/* + * linux/fs/ext2/xattr.c @@ -2114,8 +2114,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr.c +#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ Index: linux-2.4.22-vanilla/fs/ext2/xattr_user.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext2/xattr_user.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext2/xattr_user.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext2/xattr_user.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext2/xattr_user.c 2003-12-02 23:55:38.000000000 +0300 @@ -0,0 +1,103 @@ +/* + * linux/fs/ext2/xattr_user.c @@ -2222,8 +2222,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr_user.c +} Index: linux-2.4.22-vanilla/fs/ext3/Makefile =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/Makefile 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/Makefile 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/Makefile 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/Makefile 2003-12-02 23:55:38.000000000 +0300 @@ -1,5 +1,5 @@ # -# Makefile for the linux ext2-filesystem routines. @@ -2250,8 +2250,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/Makefile include $(TOPDIR)/Rules.make Index: linux-2.4.22-vanilla/fs/ext3/file.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/file.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/file.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/file.c 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/file.c 2003-12-02 23:55:38.000000000 +0300 @@ -23,6 +23,7 @@ #include #include @@ -2273,7 +2273,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/file.c Index: linux-2.4.22-vanilla/fs/ext3/ialloc.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext3/ialloc.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/ialloc.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/ialloc.c 2003-12-02 23:55:38.000000000 +0300 @@ -17,6 +17,7 @@ #include #include @@ -2293,7 +2293,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/ialloc.c Index: linux-2.4.22-vanilla/fs/ext3/inode.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext3/inode.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/inode.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/inode.c 2003-12-02 23:55:38.000000000 +0300 @@ -39,6 +39,18 @@ */ #undef SEARCH_FROM_ZERO @@ -2386,8 +2386,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/inode.c return; Index: linux-2.4.22-vanilla/fs/ext3/namei.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/namei.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/namei.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/namei.c 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/namei.c 2003-12-02 23:55:38.000000000 +0300 @@ -29,6 +29,7 @@ #include #include @@ -2451,8 +2451,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/namei.c + Index: linux-2.4.22-vanilla/fs/ext3/super.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/super.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/super.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/super.c 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/super.c 2003-12-02 23:56:03.000000000 +0300 @@ -24,6 +24,7 @@ #include #include @@ -2504,12 +2504,18 @@ Index: linux-2.4.22-vanilla/fs/ext3/super.c if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { sb->s_dev = 0; goto out_fail; -@@ -1827,17 +1843,29 @@ +@@ -1822,22 +1838,35 @@ + + static int __init init_ext3_fs(void) + { ++ int error; + #ifdef CONFIG_QUOTA + init_dquot_operations(&ext3_qops); old_sync_dquot = ext3_qops.sync_dquot; ext3_qops.sync_dquot = ext3_sync_dquot; #endif - return register_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); ++ error = init_ext3_xattr(); + if (error) + return error; + error = init_ext3_xattr_user(); @@ -2541,7 +2547,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/super.c Index: linux-2.4.22-vanilla/fs/ext3/symlink.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/ext3/symlink.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/symlink.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/symlink.c 2003-12-02 23:55:38.000000000 +0300 @@ -20,6 +20,7 @@ #include #include @@ -2573,8 +2579,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/symlink.c }; Index: linux-2.4.22-vanilla/fs/ext3/xattr.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/xattr.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/xattr.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/xattr.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/xattr.c 2003-12-02 23:55:38.000000000 +0300 @@ -0,0 +1,1225 @@ +/* + * linux/fs/ext3/xattr.c @@ -3803,8 +3809,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr.c +#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ Index: linux-2.4.22-vanilla/fs/ext3/xattr_user.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/xattr_user.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/xattr_user.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/xattr_user.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/xattr_user.c 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,111 @@ +/* + * linux/fs/ext3/xattr_user.c @@ -3920,7 +3926,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr_user.c Index: linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h =================================================================== --- linux-2.4.22-vanilla.orig/fs/jfs/jfs_xattr.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h 2003-12-02 23:55:39.000000000 +0300 @@ -52,8 +52,10 @@ #define END_EALIST(ealist) \ ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) @@ -3937,7 +3943,7 @@ Index: linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h Index: linux-2.4.22-vanilla/fs/jfs/xattr.c =================================================================== --- linux-2.4.22-vanilla.orig/fs/jfs/xattr.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/jfs/xattr.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/jfs/xattr.c 2003-12-02 23:55:39.000000000 +0300 @@ -641,7 +641,7 @@ } @@ -3967,8 +3973,8 @@ Index: linux-2.4.22-vanilla/fs/jfs/xattr.c if (value == NULL) { /* empty EA, do not remove */ Index: linux-2.4.22-vanilla/fs/mbcache.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/mbcache.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/mbcache.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/mbcache.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/mbcache.c 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,648 @@ +/* + * linux/fs/mbcache.c @@ -4621,7 +4627,7 @@ Index: linux-2.4.22-vanilla/fs/mbcache.c Index: linux-2.4.22-vanilla/include/asm-arm/unistd.h =================================================================== --- linux-2.4.22-vanilla.orig/include/asm-arm/unistd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-arm/unistd.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/asm-arm/unistd.h 2003-12-02 23:55:39.000000000 +0300 @@ -250,7 +250,6 @@ #define __NR_security (__NR_SYSCALL_BASE+223) #define __NR_gettid (__NR_SYSCALL_BASE+224) @@ -4641,7 +4647,7 @@ Index: linux-2.4.22-vanilla/include/asm-arm/unistd.h Index: linux-2.4.22-vanilla/include/asm-ppc64/unistd.h =================================================================== --- linux-2.4.22-vanilla.orig/include/asm-ppc64/unistd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-ppc64/unistd.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/asm-ppc64/unistd.h 2003-12-02 23:55:39.000000000 +0300 @@ -218,6 +218,7 @@ #define __NR_mincore 206 #define __NR_gettid 207 @@ -4661,7 +4667,7 @@ Index: linux-2.4.22-vanilla/include/asm-ppc64/unistd.h Index: linux-2.4.22-vanilla/include/asm-s390/unistd.h =================================================================== --- linux-2.4.22-vanilla.orig/include/asm-s390/unistd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-s390/unistd.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/asm-s390/unistd.h 2003-12-02 23:55:39.000000000 +0300 @@ -213,9 +213,18 @@ #define __NR_getdents64 220 #define __NR_fcntl64 221 @@ -4687,7 +4693,7 @@ Index: linux-2.4.22-vanilla/include/asm-s390/unistd.h Index: linux-2.4.22-vanilla/include/asm-s390x/unistd.h =================================================================== --- linux-2.4.22-vanilla.orig/include/asm-s390x/unistd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/asm-s390x/unistd.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/asm-s390x/unistd.h 2003-12-02 23:55:39.000000000 +0300 @@ -181,9 +181,18 @@ #define __NR_mincore 218 #define __NR_madvise 219 @@ -4712,8 +4718,8 @@ Index: linux-2.4.22-vanilla/include/asm-s390x/unistd.h Index: linux-2.4.22-vanilla/include/linux/cache_def.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/cache_def.h 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/cache_def.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/cache_def.h 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/cache_def.h 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,15 @@ +/* + * linux/cache_def.h @@ -4733,7 +4739,7 @@ Index: linux-2.4.22-vanilla/include/linux/cache_def.h Index: linux-2.4.22-vanilla/include/linux/errno.h =================================================================== --- linux-2.4.22-vanilla.orig/include/linux/errno.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/errno.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/errno.h 2003-12-02 23:55:39.000000000 +0300 @@ -23,4 +23,8 @@ #endif @@ -4746,7 +4752,7 @@ Index: linux-2.4.22-vanilla/include/linux/errno.h Index: linux-2.4.22-vanilla/include/linux/ext2_fs.h =================================================================== --- linux-2.4.22-vanilla.orig/include/linux/ext2_fs.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext2_fs.h 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext2_fs.h 2003-12-02 23:55:39.000000000 +0300 @@ -57,8 +57,6 @@ */ #define EXT2_BAD_INO 1 /* Bad blocks inode */ @@ -4831,8 +4837,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext2_fs.h #endif /* __KERNEL__ */ Index: linux-2.4.22-vanilla/include/linux/ext2_xattr.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/ext2_xattr.h 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext2_xattr.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/ext2_xattr.h 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext2_xattr.h 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,157 @@ +/* + File: linux/ext2_xattr.h @@ -4993,8 +4999,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext2_xattr.h + Index: linux-2.4.22-vanilla/include/linux/ext3_fs.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/ext3_fs.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext3_fs.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/ext3_fs.h 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext3_fs.h 2003-12-02 23:55:39.000000000 +0300 @@ -63,8 +63,6 @@ */ #define EXT3_BAD_INO 1 /* Bad blocks inode */ @@ -5079,8 +5085,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_fs.h Index: linux-2.4.22-vanilla/include/linux/ext3_jbd.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/ext3_jbd.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext3_jbd.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/ext3_jbd.h 2003-12-02 23:55:37.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext3_jbd.h 2003-12-02 23:55:39.000000000 +0300 @@ -30,13 +30,19 @@ #define EXT3_SINGLEDATA_TRANS_BLOCKS 8U @@ -5104,8 +5110,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_jbd.h Index: linux-2.4.22-vanilla/include/linux/ext3_xattr.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/ext3_xattr.h 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/ext3_xattr.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/ext3_xattr.h 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/ext3_xattr.h 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,157 @@ +/* + File: linux/ext3_xattr.h @@ -5266,8 +5272,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_xattr.h + Index: linux-2.4.22-vanilla/include/linux/fs.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/fs.h 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/fs.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/fs.h 2003-12-02 23:55:35.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/fs.h 2003-12-02 23:55:39.000000000 +0300 @@ -913,7 +913,7 @@ int (*setattr) (struct dentry *, struct iattr *); int (*setattr_raw) (struct inode *, struct iattr *); @@ -5279,8 +5285,8 @@ Index: linux-2.4.22-vanilla/include/linux/fs.h int (*removexattr) (struct dentry *, const char *); Index: linux-2.4.22-vanilla/include/linux/mbcache.h =================================================================== ---- linux-2.4.22-vanilla.orig/include/linux/mbcache.h 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/include/linux/mbcache.h 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/include/linux/mbcache.h 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/include/linux/mbcache.h 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,69 @@ +/* + File: linux/mbcache.h @@ -5353,8 +5359,8 @@ Index: linux-2.4.22-vanilla/include/linux/mbcache.h +#endif Index: linux-2.4.22-vanilla/kernel/ksyms.c =================================================================== ---- linux-2.4.22-vanilla.orig/kernel/ksyms.c 2003-11-03 23:41:26.000000000 +0300 -+++ linux-2.4.22-vanilla/kernel/ksyms.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/kernel/ksyms.c 2003-12-02 23:55:34.000000000 +0300 ++++ linux-2.4.22-vanilla/kernel/ksyms.c 2003-12-02 23:55:39.000000000 +0300 @@ -11,6 +11,7 @@ #include @@ -5371,7 +5377,7 @@ Index: linux-2.4.22-vanilla/kernel/ksyms.c /* internal kernel memory management */ EXPORT_SYMBOL(_alloc_pages); -@@ -109,6 +111,8 @@ +@@ -108,6 +110,8 @@ EXPORT_SYMBOL(kmem_cache_alloc); EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_size); @@ -5383,7 +5389,7 @@ Index: linux-2.4.22-vanilla/kernel/ksyms.c Index: linux-2.4.22-vanilla/mm/vmscan.c =================================================================== --- linux-2.4.22-vanilla.orig/mm/vmscan.c 2003-11-03 23:41:27.000000000 +0300 -+++ linux-2.4.22-vanilla/mm/vmscan.c 2003-11-03 23:41:29.000000000 +0300 ++++ linux-2.4.22-vanilla/mm/vmscan.c 2003-12-02 23:55:39.000000000 +0300 @@ -18,6 +18,7 @@ #include #include @@ -5442,8 +5448,8 @@ Index: linux-2.4.22-vanilla/mm/vmscan.c #endif Index: linux-2.4.22-vanilla/fs/ext3/ext3-exports.c =================================================================== ---- linux-2.4.22-vanilla.orig/fs/ext3/ext3-exports.c 2003-11-03 23:41:29.000000000 +0300 -+++ linux-2.4.22-vanilla/fs/ext3/ext3-exports.c 2003-11-03 23:41:29.000000000 +0300 +--- linux-2.4.22-vanilla.orig/fs/ext3/ext3-exports.c 2003-12-02 23:55:38.000000000 +0300 ++++ linux-2.4.22-vanilla/fs/ext3/ext3-exports.c 2003-12-02 23:55:39.000000000 +0300 @@ -0,0 +1,13 @@ +#include +#include diff --git a/lustre/kernel_patches/series/hp-pnnl-2.4.20 b/lustre/kernel_patches/series/hp-pnnl-2.4.20 index 5b6876f..704cc29 100644 --- a/lustre/kernel_patches/series/hp-pnnl-2.4.20 +++ b/lustre/kernel_patches/series/hp-pnnl-2.4.20 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.20.patch dev_read_only_hp_2.4.20.patch exports_2.4.20-rh-hp.patch lustre_version.patch diff --git a/lustre/kernel_patches/series/rh-2.4.20 b/lustre/kernel_patches/series/rh-2.4.20 index 46052d9..9034dd7 100644 --- a/lustre/kernel_patches/series/rh-2.4.20 +++ b/lustre/kernel_patches/series/rh-2.4.20 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.20-rh.patch mcore-2.4.20-8.patch dsp.patch dev_read_only_2.4.20-rh.patch diff --git a/lustre/kernel_patches/series/rh-2.4.22 b/lustre/kernel_patches/series/rh-2.4.22 index a3bd2b9..a4ce33d 100644 --- a/lustre/kernel_patches/series/rh-2.4.22 +++ b/lustre/kernel_patches/series/rh-2.4.22 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.22-rh.patch dev_read_only_2.4.20-rh.patch exports_2.4.20-rh-hp.patch lustre_version.patch diff --git a/lustre/kernel_patches/series/suse-2.4.21 b/lustre/kernel_patches/series/suse-2.4.21 index b3c5d0d..715ff2c 100644 --- a/lustre/kernel_patches/series/suse-2.4.21 +++ b/lustre/kernel_patches/series/suse-2.4.21 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.20.patch dev_read_only_2.4.20-rh.patch exports_2.4.20-rh-hp.patch lustre_version.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.19-pre1 b/lustre/kernel_patches/series/vanilla-2.4.19-pre1 index b6ded90..9551189 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.19-pre1 +++ b/lustre/kernel_patches/series/vanilla-2.4.19-pre1 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.19-pre1.patch dev_read_only_2.4.20.patch exports_2.4.19-pre1.patch lustre_version.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.20 b/lustre/kernel_patches/series/vanilla-2.4.20 index 46f5fd8..6e715ad 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.20 +++ b/lustre/kernel_patches/series/vanilla-2.4.20 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.20.patch uml-patch-2.4.20-6.patch uml-2.4.20-do_mmap_pgoff-fix.patch uml-2.4.20-fixes-1.patch @@ -45,4 +46,5 @@ ext3-ea-in-inode-2.4.20.patch listman-2.4.20.patch ext3-trusted_ea-2.4.20.patch kernel_text_address-2.4.20-vanilla.patch -ext3-xattr-ptr-arith-fix.patch +ext3-xattr-ptr-arith-fix.patch +gfp_memalloc-2.4.22.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.22 b/lustre/kernel_patches/series/vanilla-2.4.22 index d3b7123..1e91487 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.22 +++ b/lustre/kernel_patches/series/vanilla-2.4.22 @@ -1,3 +1,4 @@ +configurable-x86-stack-2.4.20.patch dev_read_only_2.4.20-rh.patch exports_2.4.20-rh-hp.patch lustre_version.patch diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index 8dc312e..abd0f2e 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -46,6 +46,6 @@ struct ldlm_state { struct ldlm_bl_pool *ldlm_bl_pool; }; -int __init ldlm_init(void); -void __exit ldlm_exit(void); +int ldlm_init(void); +void ldlm_exit(void); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 2d2196a..9245d91 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1006,7 +1006,7 @@ static void ptlrpc_abort_reply (struct ptlrpc_request *req) * has finished. Note that if the ACK does arrive, its * callback wakes us in short order. --eeb */ lwi = LWI_TIMEOUT (HZ/4, NULL, NULL); - rc = l_wait_event(req->rq_wait_for_rep, !req->rq_want_ack, + rc = l_wait_event(req->rq_reply_waitq, !req->rq_want_ack, &lwi); CDEBUG (D_HA, "Retrying req %p: %d\n", req, rc); /* NB go back and test rq_want_ack with locking, to ensure @@ -1062,7 +1062,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) OBD_FREE(req->rq_repmsg, req->rq_replen); req->rq_repmsg = NULL; } - init_waitqueue_head(&req->rq_wait_for_rep); + init_waitqueue_head(&req->rq_reply_waitq); netrc = 0; } @@ -1076,7 +1076,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) init_waitqueue_entry(&commit_wait, current); add_wait_queue(&obd->obd_commit_waitq, &commit_wait); - rc = l_wait_event(req->rq_wait_for_rep, + rc = l_wait_event(req->rq_reply_waitq, !req->rq_want_ack || req->rq_resent || req->rq_transno <= obd->obd_last_committed, &lwi); remove_wait_queue(&obd->obd_commit_waitq, &commit_wait); diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 7c21ba4..dd70aa5 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -671,11 +671,12 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks, static int fsfilt_ext3_read_record(struct file * file, void *buf, int size, loff_t *offs) { - struct buffer_head *bh; - unsigned long block, boffs; struct inode *inode = file->f_dentry->d_inode; - int err; + unsigned long block; + struct buffer_head *bh; + int err, blocksize, csize, boffs; + /* prevent reading after eof */ if (inode->i_size < *offs + size) { size = inode->i_size - *offs; if (size < 0) { @@ -686,87 +687,85 @@ static int fsfilt_ext3_read_record(struct file * file, void *buf, return 0; } - block = *offs >> inode->i_blkbits; - bh = ext3_bread(NULL, inode, block, 0, &err); - if (!bh) { - CERROR("can't read block: %d\n", err); - return err; - } + blocksize = 1 << inode->i_blkbits; + + while (size > 0) { + block = *offs >> inode->i_blkbits; + boffs = *offs & (blocksize - 1); + csize = min(blocksize - boffs, size); + bh = ext3_bread(NULL, inode, block, 0, &err); + if (!bh) { + CERROR("can't read block: %d\n", err); + return err; + } - boffs = (unsigned)*offs % bh->b_size; - if (boffs + size > bh->b_size) { - CERROR("request crosses block's border. offset %llu, size %u\n", - *offs, size); + memcpy(buf, bh->b_data + boffs, csize); brelse(bh); - return -EIO; - } - memcpy(buf, bh->b_data + boffs, size); - brelse(bh); - *offs += size; + *offs += csize; + buf += csize; + size -= csize; + } return 0; } -static int fsfilt_ext3_write_record(struct file *file, void *buf, int size, +static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, loff_t *offs, int force_sync) { - struct buffer_head *bh; - unsigned long block, boffs; + struct buffer_head *bh = NULL; + unsigned long block; struct inode *inode = file->f_dentry->d_inode; - loff_t old_size = inode->i_size; + loff_t old_size = inode->i_size, offset = *offs; + loff_t new_size = inode->i_size; journal_t *journal; handle_t *handle; - int err; + int err, block_count = 0, blocksize, size, boffs; + /* Determine how many transaction credits are needed */ + blocksize = 1 << inode->i_blkbits; + block_count = (*offs & (blocksize - 1)) + bufsize; + block_count = (block_count + blocksize - 1) >> inode->i_blkbits; + journal = EXT3_SB(inode->i_sb)->s_journal; - handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2); + handle = journal_start(journal, + block_count * EXT3_DATA_TRANS_BLOCKS + 2); if (IS_ERR(handle)) { CERROR("can't start transaction\n"); return PTR_ERR(handle); } - block = *offs >> inode->i_blkbits; - if (*offs + size > inode->i_size) { - down(&inode->i_sem); - if (*offs + size > inode->i_size) - inode->i_size = *offs + size; - if (inode->i_size > EXT3_I(inode)->i_disksize) - EXT3_I(inode)->i_disksize = inode->i_size; - up(&inode->i_sem); - } - - bh = ext3_bread(handle, inode, block, 1, &err); - if (!bh) { - CERROR("can't read/create block: %d\n", err); - goto out; - } - - /* This is a hack only needed because ext3_get_block_handle() updates - * i_disksize after marking the inode dirty in ext3_splice_branch(). - * We will fix that when we get a chance, as ext3_mark_inode_dirty() - * is not without cost, nor is it even exported. - */ - if (inode->i_size > old_size) - mark_inode_dirty(inode); - - boffs = (unsigned)*offs % bh->b_size; - if (boffs + size > bh->b_size) { - CERROR("request crosses block's border. offset %llu, size %u\n", - *offs, size); - err = -EIO; - goto out; - } + while (bufsize > 0) { + if (bh != NULL) + brelse(bh); + + block = offset >> inode->i_blkbits; + boffs = offset & (blocksize - 1); + size = min(blocksize - boffs, bufsize); + bh = ext3_bread(handle, inode, block, 1, &err); + if (!bh) { + CERROR("can't read/create block: %d\n", err); + goto out; + } - err = ext3_journal_get_write_access(handle, bh); - if (err) { - CERROR("journal_get_write_access() returned error %d\n", err); - goto out; - } - memcpy(bh->b_data + boffs, buf, size); - err = ext3_journal_dirty_metadata(handle, bh); - if (err) { - CERROR("journal_dirty_metadata() returned error %d\n", err); - goto out; + err = ext3_journal_get_write_access(handle, bh); + if (err) { + CERROR("journal_get_write_access() returned error %d\n", + err); + goto out; + } + LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size); + memcpy(bh->b_data + boffs, buf, size); + err = ext3_journal_dirty_metadata(handle, bh); + if (err) { + CERROR("journal_dirty_metadata() returned error %d\n", + err); + goto out; + } + if (offset + size > new_size) + new_size = offset + size; + offset += size; + bufsize -= size; + buf += size; } if (force_sync) @@ -774,9 +773,22 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int size, out: if (bh) brelse(bh); + + /* correct in-core and on-disk sizes */ + if (new_size > inode->i_size) { + down(&inode->i_sem); + if (new_size > inode->i_size) + inode->i_size = new_size; + if (inode->i_size > EXT3_I(inode)->i_disksize) + EXT3_I(inode)->i_disksize = inode->i_size; + up(&inode->i_sem); + if (inode->i_size > old_size) + mark_inode_dirty(inode); + } + journal_stop(handle); if (err == 0) - *offs += size; + *offs = offset; return err; } diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 67b3eb2..e6cb437 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1015,7 +1015,7 @@ void mds_steal_ack_locks(struct obd_export *exp, sizeof req->rq_ack_locks); spin_lock_irqsave (&req->rq_lock, flags); oldrep->rq_resent = 1; - wake_up(&oldrep->rq_wait_for_rep); + wake_up(&oldrep->rq_reply_waitq); spin_unlock_irqrestore (&req->rq_lock, flags); DEBUG_REQ(D_HA, oldrep, "stole locks from"); DEBUG_REQ(D_HA, req, "stole locks for"); @@ -1031,8 +1031,6 @@ int mds_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0); - LASSERT(!strcmp(req->rq_obd->obd_type->typ_name, LUSTRE_MDT_NAME)); - LASSERT(current->journal_info == NULL); /* XXX identical to OST */ if (req->rq_reqmsg->opc != MDS_CONNECT) { diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 048112a..62e0f44 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -522,16 +522,6 @@ static int filter_cleanup_groups(struct obd_device *obd) int i; ENTRY; - if (filter->fo_subdir_count) { - for (i = 0; i < filter->fo_subdir_count; i++) { - struct dentry *dentry = filter->fo_dentry_O_sub[i]; - f_dput(dentry); - filter->fo_dentry_O_sub[i] = NULL; - } - OBD_FREE(filter->fo_dentry_O_sub, - filter->fo_subdir_count * - sizeof(*filter->fo_dentry_O_sub)); - } if (filter->fo_dentry_O_groups != NULL && filter->fo_last_objids != NULL && filter->fo_last_objid_files != NULL) { @@ -548,6 +538,18 @@ static int filter_cleanup_groups(struct obd_device *obd) } } } + if (filter->fo_dentry_O_sub != NULL && filter->fo_subdir_count) { + for (i = 0; i < filter->fo_subdir_count; i++) { + struct dentry *dentry = filter->fo_dentry_O_sub[i]; + if (dentry != NULL) { + f_dput(dentry); + filter->fo_dentry_O_sub[i] = NULL; + } + } + OBD_FREE(filter->fo_dentry_O_sub, + filter->fo_subdir_count * + sizeof(*filter->fo_dentry_O_sub)); + } if (filter->fo_dentry_O_groups != NULL) OBD_FREE(filter->fo_dentry_O_groups, FILTER_GROUPS * sizeof(struct dentry *)); @@ -648,7 +650,8 @@ static int filter_prep_groups(struct obd_device *obd) CDEBUG(D_INODE, "got/created O/%s: %p\n", name, dentry); if (IS_ERR(dentry)) { rc = PTR_ERR(dentry); - CERROR("cannot create O/%s: rc = %d\n", name, rc); + CERROR("cannot lookup/create O/%s: rc = %d\n", + name, rc); GOTO(cleanup, rc); } filter->fo_dentry_O_groups[i] = dentry; @@ -706,7 +709,8 @@ static int filter_prep_groups(struct obd_device *obd) CDEBUG(D_INODE, "got/created O/0/%s: %p\n", dir,dentry); if (IS_ERR(dentry)) { rc = PTR_ERR(dentry); - CERROR("can't create O/0/%s: rc = %d\n",dir,rc); + CERROR("can't lookup/create O/0/%s: rc = %d\n", + dir, rc); GOTO(cleanup, rc); } filter->fo_dentry_O_sub[i] = dentry; @@ -1858,14 +1862,14 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, filter = &exp->exp_obd->u.filter; /* an objid of zero is taken to mean "sync whole filesystem" */ - if (!oa || !oa->o_valid & OBD_MD_FLID) { + if (!oa || !(oa->o_valid & OBD_MD_FLID)) { rc = fsfilt_sync(exp->exp_obd, filter->fo_sb); - GOTO(out_exp, rc); + RETURN(rc); } dentry = filter_oa2dentry(exp->exp_obd, oa); if (IS_ERR(dentry)) - GOTO(out_exp, rc = PTR_ERR(dentry)); + RETURN(PTR_ERR(dentry)); push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); @@ -1890,7 +1894,6 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); f_dput(dentry); -out_exp: RETURN(rc); } diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 74a6d1d..a83592f 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -405,7 +405,6 @@ static int ost_brw_read(struct ptlrpc_request *req) lustre_swab_niobuf_remote (&remote_nb[i]); } - size[0] = sizeof(*body); rc = lustre_pack_reply(req, 1, size, NULL); if (rc) GOTO(out, rc); @@ -944,9 +943,6 @@ static int ost_handle(struct ptlrpc_request *req) } } - if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0) - GOTO(out, rc = -EINVAL); - oti_init(oti, req); switch (req->rq_reqmsg->opc) { diff --git a/lustre/portals/include/cygwin-ioctl.h b/lustre/portals/include/cygwin-ioctl.h index 3ecefff..900f0a4 100644 --- a/lustre/portals/include/cygwin-ioctl.h +++ b/lustre/portals/include/cygwin-ioctl.h @@ -1,4 +1,4 @@ -/* $Id: cygwin-ioctl.h,v 1.2 2003/12/03 03:14:43 phil Exp $ +/* $Id: cygwin-ioctl.h,v 1.3 2003/12/03 05:12:41 phil Exp $ * * linux/ioctl.h for Linux by H.H. Bergman. */ diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 3e6d5e3..3d60631 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -115,7 +115,7 @@ do { \ if (portal_cerror == 0) \ break; \ CHECK_STACK(CDEBUG_STACK); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \ + if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ (portal_debug & (mask) && \ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ @@ -283,17 +283,19 @@ do { \ #define GFP_MEMALLOC 0 #endif -#define PORTAL_ALLOC(ptr, size) \ +#define PORTAL_ALLOC_GFP(ptr, size, mask) \ do { \ LASSERT (!in_interrupt()); \ if ((size) > PORTAL_VMALLOC_SIZE) \ (ptr) = vmalloc(size); \ else \ - (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC)); \ - if ((ptr) == NULL) \ + (ptr) = kmalloc((size), (mask)); \ + if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ - else { \ + CERROR("PORTALS: %d total bytes allocated by portals\n", \ + atomic_read(&portal_kmemory)); \ + } else { \ portal_kmem_inc((ptr), (size)); \ memset((ptr), 0, (size)); \ } \ @@ -301,6 +303,12 @@ do { \ (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) +#define PORTAL_ALLOC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC)) + +#define PORTAL_ALLOC_ATOMIC(ptr, size) \ + PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC)) + #define PORTAL_FREE(ptr, size) \ do { \ int s = (size); \ @@ -330,11 +338,13 @@ do { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ " '" #ptr "' from slab '" #slab "')\n", __FILE__, \ __LINE__); \ + CERROR("PORTALS: %d total bytes allocated by portals\n", \ + atomic_read(&portal_kmemory)); \ } else { \ portal_kmem_inc((ptr), (size)); \ memset((ptr), 0, (size)); \ } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ (int)(size), (ptr), atomic_read(&portal_kmemory)); \ } while (0) @@ -690,7 +700,10 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); /******************************************************************************/ /* Light-weight trace * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 1 +#define LWT_SUPPORT 0 + +#define LWT_MEMORY (64<<20) +#define LWT_MAX_CPUS 4 typedef struct { cycles_t lwte_when; @@ -728,7 +741,7 @@ extern void lwt_fini (void); extern int lwt_lookup_string (int *size, char *knlptr, char *usrptr, int usrsize); extern int lwt_control (int enable, int clear); -extern int lwt_snapshot (int *ncpu, int *total_size, +extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, void *user_ptr, int user_size); /* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. @@ -775,6 +788,11 @@ do { \ #endif /* __KERNEL__ */ #endif /* LWT_SUPPORT */ +struct portals_device_userstate +{ + int pdu_memhog_pages; + struct page *pdu_memhog_root_page; +}; #include @@ -1044,7 +1062,8 @@ static inline int portal_ioctl_getdata(char *buf, char *end, void *arg) #define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long) #define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long) #define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long) -#define IOC_PORTAL_MAX_NR 41 +#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long) +#define IOC_PORTAL_MAX_NR 42 enum { QSWNAL = 1, diff --git a/lustre/portals/include/portals/lib-p30.h b/lustre/portals/include/portals/lib-p30.h index 55fd720..c402828 100644 --- a/lustre/portals/include/portals/lib-p30.h +++ b/lustre/portals/include/portals/lib-p30.h @@ -168,7 +168,8 @@ static inline lib_eq_t * lib_eq_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); + lib_eq_t *eq; + PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq)); if (eq == NULL) return (NULL); @@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) { /* ALWAYS called with statelock held */ atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); + PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq)); } static inline lib_md_t * lib_md_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); + lib_md_t *md; + PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md)); if (md == NULL) return (NULL); @@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md) { /* ALWAYS called with statelock held */ atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); + PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md)); } static inline lib_me_t * lib_me_alloc (nal_cb_t *nal) { /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); + lib_me_t *me; + PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me)); if (me == NULL) return (NULL); @@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me) { /* ALWAYS called with statelock held */ atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); + PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me)); } static inline lib_msg_t * lib_msg_alloc(nal_cb_t *nal) { /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); + lib_msg_t *msg; + PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg)); if (msg == NULL) return (NULL); @@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) { /* ALWAYS called with statelock held */ atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); + PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg)); } #endif diff --git a/lustre/portals/include/portals/ptlctl.h b/lustre/portals/include/portals/ptlctl.h index f581e72..12ef47a 100644 --- a/lustre/portals/include/portals/ptlctl.h +++ b/lustre/portals/include/portals/ptlctl.h @@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv); int jt_ptl_print_routes (int argc, char **argv); int jt_ptl_fail_nid (int argc, char **argv); int jt_ptl_lwt(int argc, char **argv); +int jt_ptl_memhog(int argc, char **argv); int dbg_initialize(int argc, char **argv); int jt_dbg_filter(int argc, char **argv); diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index 6f6fa7e..6de511c 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -1395,30 +1395,35 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) } void -ksocknal_free_buffers (void) +ksocknal_free_fmbs (ksock_fmb_pool_t *p) { - if (ksocknal_data.ksnd_fmbs != NULL) { - ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; - i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); - i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ksocknal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); + ksock_fmb_t *fmb; + int i; + + LASSERT (list_empty(&p->fmp_blocked_conns)); + LASSERT (p->fmp_nactive_fmbs == 0); + + while (!list_empty(&p->fmp_idle_fmbs)) { + + fmb = list_entry(p->fmp_idle_fmbs.next, + ksock_fmb_t, fmb_list); + + for (i = 0; i < fmb->fmb_npages; i++) + if (fmb->fmb_pages[i] != NULL) + __free_page(fmb->fmb_pages[i]); + + list_del(&fmb->fmb_list); + PORTAL_FREE(fmb, sizeof(*fmb)); } +} + +void +ksocknal_free_buffers (void) +{ + ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp); + ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp); - LASSERT (ksocknal_data.ksnd_active_ltxs == 0); - if (ksocknal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ksocknal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + - SOCKNAL_NNBLK_LTXS)); + LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0); if (ksocknal_data.ksnd_schedulers != NULL) PORTAL_FREE (ksocknal_data.ksnd_schedulers, @@ -1572,7 +1577,7 @@ ksocknal_module_init (void) PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); if (ksocknal_data.ksnd_peers == NULL) - RETURN (-ENOMEM); + return (-ENOMEM); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); @@ -1590,11 +1595,6 @@ ksocknal_module_init (void) INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); - spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq); - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); @@ -1614,7 +1614,7 @@ ksocknal_module_init (void) sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { ksocknal_module_fini (); - RETURN(-ENOMEM); + return (-ENOMEM); } for (i = 0; i < SOCKNAL_N_SCHED; i++) { @@ -1629,35 +1629,11 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t), - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - PORTAL_ALLOC(ksocknal_data.ksnd_ltxs, - sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS)); - if (ksocknal_data.ksnd_ltxs == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ksocknal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i]; - - ltx->ltx_tx.tx_hdr = <x->ltx_hdr; - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ksocknal_data.ksnd_idle_ltx_list : - &ksocknal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); if (rc != 0) { CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } PtlNIDebug(ksocknal_ni, ~0); @@ -1670,7 +1646,7 @@ ksocknal_module_init (void) CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1679,7 +1655,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } } @@ -1687,7 +1663,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); ksocknal_module_fini (); - RETURN (rc); + return (rc); } rc = kpr_register(&ksocknal_data.ksnd_router, @@ -1698,23 +1674,15 @@ ksocknal_module_init (void) } else { /* Only allocate forwarding buffers if I'm on a gateway */ - PORTAL_ALLOC(ksocknal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - if (ksocknal_data.ksnd_fmbs == NULL) { - ksocknal_module_fini (); - RETURN(-ENOMEM); - } - - /* NULL out buffer pointers etc */ - memset(ksocknal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb = - &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i]; + ksock_fmb_t *fmb; + + PORTAL_ALLOC(fmb, sizeof(*fmb)); + if (fmb == NULL) { + ksocknal_module_fini(); + return (-ENOMEM); + } if (i < SOCKNAL_SMALL_FWD_NMSGS) { fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; @@ -1724,7 +1692,6 @@ ksocknal_module_init (void) fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; } - LASSERT (fmb->fmb_npages > 0); for (j = 0; j < fmb->fmb_npages; j++) { fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); @@ -1733,8 +1700,7 @@ ksocknal_module_init (void) return (-ENOMEM); } - LASSERT(page_address (fmb->fmb_pages[j]) != - NULL); + LASSERT(page_address(fmb->fmb_pages[j]) != NULL); } list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index 227a24f..9dbe415 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -82,9 +82,6 @@ #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - #define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ #define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ @@ -113,8 +110,9 @@ typedef struct /* pool of forwarding buffers */ { spinlock_t fmp_lock; /* serialise */ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ + struct list_head fmp_idle_fmbs; /* free buffers */ struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ + int fmp_nactive_fmbs; /* # buffers in use */ } ksock_fmb_pool_t; @@ -164,16 +162,10 @@ typedef struct { kpr_router_t ksnd_router; /* THE router */ - void *ksnd_fmbs; /* all the pre-allocated FMBs */ ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - int ksnd_active_ltxs; /* #active ltxs */ + atomic_t ksnd_nactive_ltxs; /* #active ltxs */ struct list_head ksnd_deathrow_conns; /* conns to be closed */ struct list_head ksnd_zombie_conns; /* conns to be freed */ @@ -233,25 +225,15 @@ typedef struct /* transmit packet */ #define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) /* network zero copy callback descriptor embedded in ksock_tx_t */ -/* space for the tx frag descriptors: hdr is always 1 iovec - * and payload is PTL_MD_MAX of either type. */ -typedef struct -{ - struct iovec hdr; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } payload; -} ksock_txiovspace_t; - typedef struct /* locally transmitted packet */ { ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ void *ltx_private; /* lib_finalize() callback arg */ void *ltx_cookie; /* lib_finalize() callback arg */ - ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */ ptl_hdr_t ltx_hdr; /* buffer for packet header */ + int ltx_desc_size; /* bytes allocated for this desc */ + struct iovec ltx_iov[1]; /* iov for hdr + payload */ + ptl_kiov_t ltx_kiov[0]; /* kiov for payload */ } ksock_ltx_t; #define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c index 6ea4fa8..22345fe 100644 --- a/lustre/portals/knals/socknal/socknal_cb.c +++ b/lustre/portals/knals/socknal/socknal_cb.c @@ -129,60 +129,11 @@ ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) return 0; } -ksock_ltx_t * -ksocknal_get_ltx (int may_block) -{ - unsigned long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) { - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - break; - } - - if (!may_block) { - if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - ksocknal_data.ksnd_active_ltxs++; - } - break; - } - - spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock, - flags); - - wait_event (ksocknal_data.ksnd_idle_ltx_waitq, - !list_empty (&ksocknal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - return (ltx); -} - void -ksocknal_put_ltx (ksock_ltx_t *ltx) +ksocknal_free_ltx (ksock_ltx_t *ltx) { - unsigned long flags; - - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - ksocknal_data.ksnd_active_ltxs--; - list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list) - wake_up (&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); + atomic_dec(&ksocknal_data.ksnd_nactive_ltxs); + PORTAL_FREE(ltx, ltx->ltx_desc_size); } #if SOCKNAL_ZC @@ -364,7 +315,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) } int -ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) +ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { /* Return 0 on success, < 0 on error. * caller checks tx_resid to determine progress/completion */ @@ -377,17 +328,14 @@ ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx) } rc = ksocknal_getconnsock (conn); - if (rc != 0) + if (rc != 0) { + LASSERT (conn->ksnc_closing); return (rc); + } for (;;) { LASSERT (tx->tx_resid != 0); - if (conn->ksnc_closing) { - rc = -ESHUTDOWN; - break; - } - if (tx->tx_niov != 0) rc = ksocknal_send_iov (conn, tx); else @@ -554,7 +502,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) } int -ksocknal_recvmsg (ksock_conn_t *conn) +ksocknal_receive (ksock_conn_t *conn) { /* Return 1 on success, 0 on EOF, < 0 on error. * Caller checks ksnc_rx_nob_wanted to determine @@ -568,15 +516,12 @@ ksocknal_recvmsg (ksock_conn_t *conn) } rc = ksocknal_getconnsock (conn); - if (rc != 0) + if (rc != 0) { + LASSERT (conn->ksnc_closing); return (rc); + } for (;;) { - if (conn->ksnc_closing) { - rc = -ESHUTDOWN; - break; - } - if (conn->ksnc_rx_niov != 0) rc = ksocknal_recv_iov (conn); else @@ -665,7 +610,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch) lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx (ltx); EXIT; } @@ -696,7 +641,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) { int rc; - rc = ksocknal_sendmsg (conn, tx); + rc = ksocknal_transmit (conn, tx); CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); LASSERT (rc != -EAGAIN); @@ -840,13 +785,17 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) unsigned long flags; ksock_sched_t *sched = conn->ksnc_scheduler; - /* called holding global lock (read or irq-write) */ - + /* called holding global lock (read or irq-write) and caller may + * not have dropped this lock between finding conn and calling me, + * so we don't need the {get,put}connsock dance to deref + * ksnc_sock... */ + LASSERT(!conn->ksnc_closing); + LASSERT(tx->tx_resid == tx->tx_nob); + CDEBUG (D_NET, "Sending to "LPX64" on port %d\n", conn->ksnc_peer->ksnp_nid, conn->ksnc_port); atomic_add (tx->tx_nob, &conn->ksnc_tx_nob); - tx->tx_resid = tx->tx_nob; tx->tx_conn = conn; #if SOCKNAL_ZC @@ -854,7 +803,6 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) /* NB this sets 1 ref on zccd, so the callback can only occur after * I've released this ref. */ #endif - spin_lock_irqsave (&sched->kss_lock, flags); conn->ksnc_tx_deadline = jiffies + @@ -960,6 +908,8 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) tx->tx_nob, tx->tx_niov, tx->tx_nkiov); tx->tx_conn = NULL; /* only set when assigned a conn */ + tx->tx_resid = tx->tx_nob; + tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base; g_lock = &ksocknal_data.ksnd_global_lock; read_lock (g_lock); @@ -1024,115 +974,125 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) return (-EHOSTUNREACH); } -ksock_ltx_t * -ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type) +int +ksocknal_sendmsg(nal_cb_t *nal, + void *private, + lib_msg_t *cookie, + ptl_hdr_t *hdr, + int type, + ptl_nid_t nid, + ptl_pid_t pid, + unsigned int payload_niov, + struct iovec *payload_iov, + ptl_kiov_t *payload_kiov, + size_t payload_nob) { ksock_ltx_t *ltx; + int desc_size; + int rc; + + /* NB 'private' is different depending on what we're sending. + * Just ignore it... */ + + CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64 + " pid %d\n", payload_nob, payload_niov, nid , pid); - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); + LASSERT (payload_nob == 0 || payload_niov > 0); + LASSERT (payload_niov <= PTL_MD_MAX_IOV); + + /* It must be OK to kmap() if required */ + LASSERT (payload_kiov == NULL || !in_interrupt ()); + /* payload is either all vaddrs or all pages */ + LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); + + if (payload_iov != NULL) + desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]); + else + desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]); + + if (in_interrupt() || + type == PTL_MSG_ACK || + type == PTL_MSG_REPLY) { + /* Can't block if in interrupt or responding to an incoming + * message */ + PORTAL_ALLOC_ATOMIC(ltx, desc_size); + } else { + PORTAL_ALLOC(ltx, desc_size); + } + if (ltx == NULL) { - CERROR ("Can't allocate tx desc\n"); - return (NULL); + CERROR("Can't allocate tx desc type %d size %d %s\n", + type, desc_size, in_interrupt() ? "(intr)" : ""); + return (PTL_NOSPACE); } - /* Init local send packet (storage for hdr, finalize() args) */ + atomic_inc(&ksocknal_data.ksnd_nactive_ltxs); + + ltx->ltx_desc_size = desc_size; + + /* We always have 1 mapped frag for the header */ + ltx->ltx_tx.tx_iov = ltx->ltx_iov; + ltx->ltx_iov[0].iov_base = <x->ltx_hdr; + ltx->ltx_iov[0].iov_len = sizeof(*hdr); ltx->ltx_hdr = *hdr; + ltx->ltx_private = private; ltx->ltx_cookie = cookie; - /* Init common ltx_tx */ ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr); - - /* We always have 1 mapped frag for the header */ - ltx->ltx_tx.tx_niov = 1; - ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr; - ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - ltx->ltx_tx.tx_kiov = NULL; - ltx->ltx_tx.tx_nkiov = 0; + ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob; - return (ltx); -} - -int -ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_len) -{ - ksock_ltx_t *ltx; - int rc; + if (payload_iov != NULL) { + /* payload is all mapped */ + ltx->ltx_tx.tx_kiov = NULL; + ltx->ltx_tx.tx_nkiov = 0; - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it - */ + ltx->ltx_tx.tx_niov = 1 + payload_niov; - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64 - " pid %d\n", payload_len, payload_niov, nid, pid); + memcpy(ltx->ltx_iov + 1, payload_iov, + payload_niov * sizeof (*payload_iov)); - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); + } else { + /* payload is all pages */ + ltx->ltx_tx.tx_kiov = ltx->ltx_kiov; + ltx->ltx_tx.tx_nkiov = payload_niov; - /* append the payload_iovs to the one pointing at the header */ - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); + ltx->ltx_tx.tx_niov = 1; - memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; + memcpy(ltx->ltx_kiov, payload_kiov, + payload_niov * sizeof (*payload_kiov)); + } - rc = ksocknal_launch_packet (<x->ltx_tx, nid); + rc = ksocknal_launch_packet(<x->ltx_tx, nid); if (rc == 0) return (PTL_OK); - ksocknal_put_ltx (ltx); + ksocknal_free_ltx(ltx); return (PTL_FAIL); } int +ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int payload_niov, struct iovec *payload_iov, + size_t payload_len) +{ + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, payload_iov, NULL, + payload_len)); +} + +int ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len) + unsigned int payload_niov, ptl_kiov_t *payload_kiov, + size_t payload_len) { - ksock_ltx_t *ltx; - int rc; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n", - payload_len, payload_niov, nid, pid); - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) - return (PTL_FAIL); - - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov; - memcpy (ltx->ltx_tx.tx_kiov, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_nkiov = payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc == 0) - return (PTL_OK); - - ksocknal_put_ltx (ltx); - return (PTL_FAIL); + return (ksocknal_sendmsg(nal, private, cookie, + hdr, type, nid, pid, + payload_niov, NULL, payload_kiov, + payload_len)); } void @@ -1155,7 +1115,6 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) tx->tx_iov = fwd->kprfd_iov; tx->tx_nkiov = 0; tx->tx_kiov = NULL; - tx->tx_hdr = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base; rc = ksocknal_launch_packet (tx, nid); if (rc != 0) @@ -1204,6 +1163,7 @@ ksocknal_fmb_callback (void *arg, int error) spin_lock_irqsave (&fmp->fmp_lock, flags); list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); + fmp->fmp_nactive_fmbs--; if (!list_empty (&fmp->fmp_blocked_conns)) { conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, @@ -1242,7 +1202,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) ksock_fmb_t *fmb; LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (ksocknal_data.ksnd_fmbs != NULL); + LASSERT (kpr_routing(&ksocknal_data.ksnd_router)); if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) pool = &ksocknal_data.ksnd_small_fmp; @@ -1255,6 +1215,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) fmb = list_entry(pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); list_del (&fmb->fmb_list); + pool->fmp_nactive_fmbs++; spin_unlock_irqrestore (&pool->fmp_lock, flags); return (fmb); @@ -1397,7 +1358,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) return; } - if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */ + if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */ CERROR("dropping packet from "LPX64" (%s) for "LPX64 " (%s): not forwarding\n", src_nid, portals_nid2str(TCPNAL, src_nid, str), @@ -1525,9 +1486,11 @@ ksocknal_process_receive (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_nob_wanted > 0); - rc = ksocknal_recvmsg(conn); + rc = ksocknal_receive(conn); if (rc <= 0) { + LASSERT (rc != -EAGAIN); + if (rc == 0) CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n", conn, conn->ksnc_peer->ksnp_nid, @@ -1766,9 +1729,9 @@ int ksocknal_scheduler (void *arg) * kss_lock. */ conn->ksnc_tx_ready = 0; spin_unlock_irqrestore (&sched->kss_lock, flags); - + rc = ksocknal_process_transmit(conn, tx); - + spin_lock_irqsave (&sched->kss_lock, flags); if (rc != -EAGAIN) { @@ -1851,7 +1814,7 @@ ksocknal_data_ready (struct sock *sk, int n) read_lock (&ksocknal_data.ksnd_global_lock); conn = sk->sk_user_data; - if (conn == NULL) { /* raced with ksocknal_close_sock */ + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ LASSERT (sk->sk_data_ready != &ksocknal_data_ready); sk->sk_data_ready (sk, n); } else { @@ -1900,7 +1863,7 @@ ksocknal_write_space (struct sock *sk) (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued")); - if (conn == NULL) { /* raced with ksocknal_close_sock */ + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ LASSERT (sk->sk_write_space != &ksocknal_write_space); sk->sk_write_space (sk); @@ -2136,7 +2099,7 @@ ksocknal_setup_sock (struct socket *sock) int option; struct linger linger; - sock->sk->allocation = GFP_NOFS; + sock->sk->allocation = GFP_MEMALLOC; /* Ensure this socket aborts active sends immediately when we close * it. */ @@ -2421,6 +2384,8 @@ ksocknal_autoconnectd (void *arg) kportal_daemonize (name); kportal_blockallsigs (); + current->flags |= PF_MEMALLOC; + spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { @@ -2548,6 +2513,8 @@ ksocknal_reaper (void *arg) init_waitqueue_entry (&wait, current); + current->flags |= PF_MEMALLOC; + spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 6e2c1ca..ad2c966 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -974,18 +974,14 @@ char *portals_debug_dumpstack(void) return buf; } -#elif defined(CONFIG_X86) +#elif defined(__i386__) extern int is_kernel_text_address(unsigned long addr); extern int lookup_symbol(unsigned long address, char *buf, int buflen); char *portals_debug_dumpstack(void) { -#if defined(__x86_64__) - unsigned long esp = current->thread.rsp; -#else unsigned long esp = current->thread.esp; -#endif unsigned long *stack = (unsigned long *)&esp; int size; unsigned long addr; diff --git a/lustre/portals/libcfs/lwt.c b/lustre/portals/libcfs/lwt.c index 89fe8f7..a24423e 100644 --- a/lustre/portals/libcfs/lwt.c +++ b/lustre/portals/libcfs/lwt.c @@ -45,9 +45,6 @@ #if LWT_SUPPORT -#define LWT_MEMORY (1<<20) /* 1Mb of trace memory */ -#define LWT_MAX_CPUS 4 - int lwt_enabled; int lwt_pages_per_cpu; lwt_cpu_t lwt_cpus[LWT_MAX_CPUS]; @@ -123,7 +120,8 @@ lwt_control (int enable, int clear) } int -lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size) +lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, + void *user_ptr, int user_size) { const int events_per_page = PAGE_SIZE / sizeof(lwt_event_t); const int bytes_per_page = events_per_page * sizeof(lwt_event_t); @@ -136,7 +134,8 @@ lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size) *ncpu = num_online_cpus(); *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page; - + *now = get_cycles(); + if (user_ptr == NULL) return (0); diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c index 7c0cafc..55e1935 100644 --- a/lustre/portals/libcfs/module.c +++ b/lustre/portals/libcfs/module.c @@ -83,6 +83,115 @@ kportal_daemonize (char *str) } void +kportal_memhog_free (struct portals_device_userstate *pdu) +{ + struct page **level0p = &pdu->pdu_memhog_root_page; + struct page **level1p; + struct page **level2p; + int count1; + int count2; + + if (*level0p != NULL) { + + level1p = (struct page **)page_address(*level0p); + count1 = 0; + + while (count1 < PAGE_SIZE/sizeof(struct page *) && + *level1p != NULL) { + + level2p = (struct page **)page_address(*level1p); + count2 = 0; + + while (count2 < PAGE_SIZE/sizeof(struct page *) && + *level2p != NULL) { + + __free_page(*level2p); + pdu->pdu_memhog_pages--; + level2p++; + count2++; + } + + __free_page(*level1p); + pdu->pdu_memhog_pages--; + level1p++; + count1++; + } + + __free_page(*level0p); + pdu->pdu_memhog_pages--; + + *level0p = NULL; + } + + LASSERT (pdu->pdu_memhog_pages == 0); +} + +int +kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags) +{ + struct page **level0p; + struct page **level1p; + struct page **level2p; + int count1; + int count2; + + LASSERT (pdu->pdu_memhog_pages == 0); + LASSERT (pdu->pdu_memhog_root_page == NULL); + + if (npages < 0) + return -EINVAL; + + if (npages == 0) + return 0; + + level0p = &pdu->pdu_memhog_root_page; + *level0p = alloc_page(flags); + if (*level0p == NULL) + return -ENOMEM; + pdu->pdu_memhog_pages++; + + level1p = (struct page **)page_address(*level0p); + count1 = 0; + memset(level1p, 0, PAGE_SIZE); + + while (pdu->pdu_memhog_pages < npages && + count1 < PAGE_SIZE/sizeof(struct page *)) { + + if (signal_pending(current)) + return (-EINTR); + + *level1p = alloc_page(flags); + if (*level1p == NULL) + return -ENOMEM; + pdu->pdu_memhog_pages++; + + level2p = (struct page **)page_address(*level1p); + count2 = 0; + memset(level2p, 0, PAGE_SIZE); + + while (pdu->pdu_memhog_pages < npages && + count2 < PAGE_SIZE/sizeof(struct page *)) { + + if (signal_pending(current)) + return (-EINTR); + + *level2p = alloc_page(flags); + if (*level2p == NULL) + return (-ENOMEM); + pdu->pdu_memhog_pages++; + + level2p++; + count2++; + } + + level1p++; + count1++; + } + + return 0; +} + +void kportal_blockallsigs () { unsigned long flags; @@ -96,22 +205,39 @@ kportal_blockallsigs () /* called when opening /dev/device */ static int kportal_psdev_open(struct inode * inode, struct file * file) { + struct portals_device_userstate *pdu; ENTRY; - + if (!inode) RETURN(-EINVAL); + PORTAL_MODULE_USE; + + PORTAL_ALLOC(pdu, sizeof(*pdu)); + if (pdu != NULL) { + pdu->pdu_memhog_pages = 0; + pdu->pdu_memhog_root_page = NULL; + } + file->private_data = pdu; + RETURN(0); } /* called when closing /dev/device */ static int kportal_psdev_release(struct inode * inode, struct file * file) { + struct portals_device_userstate *pdu; ENTRY; if (!inode) RETURN(-EINVAL); + pdu = file->private_data; + if (pdu != NULL) { + kportal_memhog_free(pdu); + PORTAL_FREE(pdu, sizeof(*pdu)); + } + PORTAL_MODULE_UNUSE; RETURN(0); } @@ -514,7 +640,8 @@ static int kportal_ioctl(struct inode *inode, struct file *file, break; case IOC_PORTAL_LWT_SNAPSHOT: - err = lwt_snapshot (&data->ioc_count, &data->ioc_misc, + err = lwt_snapshot (&data->ioc_nid, + &data->ioc_count, &data->ioc_misc, data->ioc_pbuf1, data->ioc_plen1); if (err == 0 && copy_to_user((char *)arg, data, sizeof (*data))) @@ -528,7 +655,22 @@ static int kportal_ioctl(struct inode *inode, struct file *file, copy_to_user((char *)arg, data, sizeof (*data))) err = -EFAULT; break; -#endif +#endif + case IOC_PORTAL_MEMHOG: + if (!capable (CAP_SYS_ADMIN)) + err = -EPERM; + else if (file->private_data == NULL) { + err = -EINVAL; + } else { + kportal_memhog_free(file->private_data); + err = kportal_memhog_alloc(file->private_data, + data->ioc_count, + data->ioc_flags); + if (err != 0) + kportal_memhog_free(file->private_data); + } + break; + default: err = -EINVAL; break; @@ -612,8 +754,8 @@ static int init_kportals_module(void) cleanup_lwt: #if LWT_SUPPORT lwt_fini(); -#endif cleanup_debug: +#endif portals_debug_cleanup(); return rc; } diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index 3325892..b46ee16 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -1371,7 +1371,8 @@ lwt_control(int enable, int clear) } static int -lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size) +lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, + lwt_event_t *events, int size) { struct portal_ioctl_data data; int rc; @@ -1390,6 +1391,9 @@ lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size) LASSERT (data.ioc_count != 0); LASSERT (data.ioc_misc != 0); + if (now != NULL) + *now = data.ioc_nid; + if (ncpu != NULL) *ncpu = data.ioc_count; @@ -1499,14 +1503,13 @@ get_cycles_per_usec () int jt_ptl_lwt(int argc, char **argv) { -#define MAX_CPUS 8 int ncpus; int totalspace; int nevents_per_cpu; lwt_event_t *events; - lwt_event_t *cpu_event[MAX_CPUS + 1]; - lwt_event_t *next_event[MAX_CPUS]; - lwt_event_t *first_event[MAX_CPUS]; + lwt_event_t *cpu_event[LWT_MAX_CPUS + 1]; + lwt_event_t *next_event[LWT_MAX_CPUS]; + lwt_event_t *first_event[LWT_MAX_CPUS]; int cpu; lwt_event_t *e; int rc; @@ -1514,6 +1517,9 @@ jt_ptl_lwt(int argc, char **argv) double mhz; cycles_t t0; cycles_t tlast; + cycles_t tnow; + struct timeval tvnow; + int printed_date = 0; FILE *f = stdout; if (argc < 2 || @@ -1541,11 +1547,12 @@ jt_ptl_lwt(int argc, char **argv) return (0); } - if (lwt_snapshot(&ncpus, &totalspace, NULL, 0) != 0) + if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0) return (-1); - if (ncpus > MAX_CPUS) { - fprintf(stderr, "Too many cpus: %d (%d)\n", ncpus, MAX_CPUS); + if (ncpus > LWT_MAX_CPUS) { + fprintf(stderr, "Too many cpus: %d (%d)\n", + ncpus, LWT_MAX_CPUS); return (-1); } @@ -1560,11 +1567,14 @@ jt_ptl_lwt(int argc, char **argv) return (-1); } - if (lwt_snapshot(NULL, NULL, events, totalspace)) { + if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) { free(events); return (-1); } + /* we want this time to be sampled at snapshot time */ + gettimeofday(&tvnow, NULL); + if (argc > 2) { f = fopen (argv[2], "w"); if (f == NULL) { @@ -1645,6 +1655,17 @@ jt_ptl_lwt(int argc, char **argv) if (t0 <= next_event[cpu]->lwte_when) { /* on or after the first event */ + if (!printed_date) { + cycles_t du = (tnow - t0) / mhz; + time_t then = tvnow.tv_sec - du/1000000; + + if (du % 1000000 > tvnow.tv_usec) + then--; + + fprintf(f, "%s", ctime(&then)); + printed_date = 1; + } + rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]); if (rc != 0) break; @@ -1666,5 +1687,48 @@ jt_ptl_lwt(int argc, char **argv) free(events); return (0); -#undef MAX_CPUS } + +int jt_ptl_memhog(int argc, char **argv) +{ + static int gfp = 0; /* sticky! */ + + struct portal_ioctl_data data; + int rc; + int count; + char *end; + + if (argc < 2) { + fprintf(stderr, "usage: %s []\n", argv[0]); + return 0; + } + + count = strtol(argv[1], &end, 0); + if (count < 0 || *end != 0) { + fprintf(stderr, "Can't parse page count '%s'\n", argv[1]); + return -1; + } + + if (argc >= 3) { + rc = strtol(argv[2], &end, 0); + if (*end != 0) { + fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]); + return -1; + } + gfp = rc; + } + + PORTAL_IOC_INIT(data); + data.ioc_count = count; + data.ioc_flags = gfp; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data); + + if (rc != 0) { + fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno)); + return -1; + } + + printf("memhog %d OK\n", count); + return 0; +} + diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 60f2d4e..1d17038 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -250,7 +250,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, spin_lock_init(&request->rq_lock); INIT_LIST_HEAD(&request->rq_list); - init_waitqueue_head(&request->rq_wait_for_rep); + init_waitqueue_head(&request->rq_reply_waitq); request->rq_xid = ptlrpc_next_xid(); atomic_set(&request->rq_refcount, 1); @@ -1127,7 +1127,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) * the timeout lets us CERROR for visibility */ struct l_wait_info lwi = LWI_TIMEOUT(10*HZ, NULL, NULL); - rc = l_wait_event (request->rq_wait_for_rep, + rc = l_wait_event (request->rq_reply_waitq, request->rq_replied, &lwi); LASSERT(rc == 0 || rc == -ETIMEDOUT); if (rc == 0) { @@ -1228,7 +1228,7 @@ void ptlrpc_resend_req(struct ptlrpc_request *req) if (req->rq_set != NULL) wake_up (&req->rq_set->set_waitq); else - wake_up(&req->rq_wait_for_rep); + wake_up(&req->rq_reply_waitq); spin_unlock_irqrestore (&req->rq_lock, flags); } @@ -1246,7 +1246,7 @@ void ptlrpc_restart_req(struct ptlrpc_request *req) if (req->rq_set != NULL) wake_up (&req->rq_set->set_waitq); else - wake_up(&req->rq_wait_for_rep); + wake_up(&req->rq_reply_waitq); spin_unlock_irqrestore (&req->rq_lock, flags); } @@ -1354,7 +1354,7 @@ restart: DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%d > %d)", current->comm, req->rq_send_state, imp->imp_state); lwi = LWI_INTR(interrupted_request, req); - rc = l_wait_event(req->rq_wait_for_rep, + rc = l_wait_event(req->rq_reply_waitq, (req->rq_send_state == imp->imp_state || req->rq_err), &lwi); @@ -1398,7 +1398,7 @@ restart: } lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request, req); - l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi); + l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi); DEBUG_REQ(D_NET, req, "-- done sleeping"); CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:opc " @@ -1472,7 +1472,7 @@ restart: if (req->rq_bulk != NULL) { if (rc >= 0) { /* success so far */ lwi = LWI_TIMEOUT(timeout, NULL, NULL); - brc = l_wait_event(req->rq_wait_for_rep, + brc = l_wait_event(req->rq_reply_waitq, ptlrpc_bulk_complete(req->rq_bulk), &lwi); if (brc != 0) { @@ -1535,7 +1535,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) CDEBUG(D_OTHER, "-- sleeping\n"); lwi = LWI_INTR(NULL, NULL); /* XXX needs timeout, nested recovery */ - l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi); + l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi); CDEBUG(D_OTHER, "-- done\n"); // up(&cli->cli_rpc_sem); @@ -1626,7 +1626,7 @@ void ptlrpc_abort_inflight(struct obd_import *imp) if (req->rq_set != NULL) wake_up(&req->rq_set->set_waitq); else - wake_up(&req->rq_wait_for_rep); + wake_up(&req->rq_reply_waitq); } spin_unlock (&req->rq_lock); } @@ -1643,7 +1643,7 @@ void ptlrpc_abort_inflight(struct obd_import *imp) if (req->rq_set != NULL) wake_up(&req->rq_set->set_waitq); else - wake_up(&req->rq_wait_for_rep); + wake_up(&req->rq_reply_waitq); } spin_unlock (&req->rq_lock); } diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 7807dcc..01cbce0 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -76,7 +76,7 @@ static int reply_out_callback(ptl_event_t *ev) LASSERT(req->rq_want_ack); spin_lock_irqsave(&req->rq_lock, flags); req->rq_want_ack = 0; - wake_up(&req->rq_wait_for_rep); + wake_up(&req->rq_reply_waitq); spin_unlock_irqrestore(&req->rq_lock, flags); } else { // XXX make sure we understand all events @@ -122,7 +122,7 @@ int reply_in_callback(ptl_event_t *ev) if (req->rq_set != NULL) wake_up(&req->rq_set->set_waitq); else - wake_up(&req->rq_wait_for_rep); + wake_up(&req->rq_reply_waitq); spin_unlock_irqrestore (&req->rq_lock, flags); } else { // XXX make sure we understand all events, including ACKs @@ -254,7 +254,7 @@ static int bulk_put_sink_callback(ptl_event_t *ev) if (desc->bd_req->rq_set != NULL) wake_up (&desc->bd_req->rq_set->set_waitq); else - wake_up (&desc->bd_req->rq_wait_for_rep); + wake_up (&desc->bd_req->rq_reply_waitq); spin_unlock_irqrestore (&desc->bd_lock, flags); RETURN(1); @@ -304,7 +304,7 @@ static int bulk_get_source_callback(ptl_event_t *ev) if (desc->bd_req->rq_set != NULL) wake_up (&desc->bd_req->rq_set->set_waitq); else - wake_up (&desc->bd_req->rq_wait_for_rep); + wake_up (&desc->bd_req->rq_reply_waitq); spin_unlock_irqrestore (&desc->bd_lock, flags); RETURN(1); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 43e650e..1559403b 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -159,8 +159,8 @@ void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry, struct ptlrpc_service *svc) { ptlrpc_lprocfs_register(entry, svc->srv_name, - "stats", &svc->svc_procroot, - &svc->svc_stats); + "stats", &svc->srv_procroot, + &svc->srv_stats); } void ptlrpc_lprocfs_register_obd(struct obd_device *obddev) @@ -184,13 +184,13 @@ void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req) void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) { - if (svc->svc_procroot) { - lprocfs_remove(svc->svc_procroot); - svc->svc_procroot = NULL; + if (svc->srv_procroot != NULL) { + lprocfs_remove(svc->srv_procroot); + svc->srv_procroot = NULL; } - if (svc->svc_stats) { - lprocfs_free_stats(svc->svc_stats); - svc->svc_stats = NULL; + if (svc->srv_stats) { + lprocfs_free_stats(svc->srv_stats); + svc->srv_stats = NULL; } } void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index fe8a4cd..fd523a4 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -529,7 +529,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req) if (desc->bd_req->rq_set != NULL) wq = &req->rq_set->set_waitq; else - wq = &req->rq_wait_for_rep; + wq = &req->rq_reply_waitq; lwi = LWI_TIMEOUT (10 * HZ, NULL, NULL); rc = l_wait_event(*wq, ptlrpc_bulk_complete(desc), &lwi); LASSERT (rc == 0 || rc == -ETIMEDOUT); @@ -565,7 +565,7 @@ int ptlrpc_reply(struct ptlrpc_request *req) req->rq_repmsg->status = req->rq_status; req->rq_repmsg->opc = req->rq_reqmsg->opc; - init_waitqueue_head(&req->rq_wait_for_rep); + init_waitqueue_head(&req->rq_reply_waitq); rc = ptl_send_buf(req, req->rq_connection, req->rq_svc->srv_rep_portal); if (rc != 0) { /* Do what the callback handler would have done */ diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 3d5c1ec..ed969fe 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -208,7 +208,7 @@ void ptlrpc_wake_delayed(struct obd_import *imp) wake_up(&req->rq_set->set_waitq); } else { DEBUG_REQ(D_HA, req, "waking:"); - wake_up(&req->rq_wait_for_rep); + wake_up(&req->rq_reply_waitq); } } spin_unlock_irqrestore(&imp->imp_lock, flags); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 526b35c..9d3ff82 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -206,7 +206,6 @@ static int handle_incoming_request(struct obd_device *obddev, spin_lock_init (&request->rq_lock); INIT_LIST_HEAD(&request->rq_list); request->rq_svc = svc; - request->rq_obd = obddev; request->rq_xid = event->match_bits; request->rq_reqmsg = event->mem_desc.start + event->offset; request->rq_reqlen = event->mlength; @@ -375,15 +374,15 @@ static int ptlrpc_main(void *arg) do_gettimeofday(&start_time); total = timeval_sub(&start_time, &event->arrival_time); - if (svc->svc_stats != NULL) { - lprocfs_counter_add(svc->svc_stats, PTLRPC_REQWAIT_CNTR, + if (svc->srv_stats != NULL) { + lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR, total); - lprocfs_counter_add(svc->svc_stats, + lprocfs_counter_add(svc->srv_stats, PTLRPC_SVCIDLETIME_CNTR, timeval_sub(&start_time, &finish_time)); #if 0 /* Wait for b_eq branch */ - lprocfs_counter_add(svc->svc_stats, + lprocfs_counter_add(svc->srv_stats, PTLRPC_SVCEQDEPTH_CNTR, 0); #endif } @@ -406,11 +405,11 @@ static int ptlrpc_main(void *arg) "(%ldus total)\n", request->rq_xid, event->initiator.nid, total, timeval_sub(&finish_time, &event->arrival_time)); - if (svc->svc_stats != NULL) { + if (svc->srv_stats != NULL) { int opc = opcode_offset(request->rq_reqmsg->opc); if (opc > 0) { LASSERT(opc < LUSTRE_MAX_OPCODES); - lprocfs_counter_add(svc->svc_stats, + lprocfs_counter_add(svc->srv_stats, opc + PTLRPC_LAST_CNTR, total); } diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 0e8c3a7..3c4c0ad 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -285,6 +285,9 @@ command_t cmdlist[] = { "light-weight tracing\n" "usage: lwt start\n" " lwt stop [file]"}, + {"memhog", jt_ptl_memhog, 0, + "memory pressure testing\n" + "usage: memhog []"}, /* User interface commands */ {"======= control ========", jt_noop, 0, "control commands"}, diff --git a/lustre/utils/lmc b/lustre/utils/lmc index f06a5bd..f8375b9 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -148,7 +148,7 @@ lmc_options = [ ('route', "Add a new route for the cluster.", PARAM), ('router', "Optional flag to mark a node as router."), ('gw', "Specify the nid of the gateway for a route.", PARAM), - ('gw_cluster_id', "", PARAM, "0"), + ('gateway_cluster_id', "", PARAM, "0"), ('target_cluster_id', "", PARAM, "0"), ('lo', "For a range route, this is the low value nid.", PARAM), ('hi', "For a range route, this is a hi value nid.", PARAM,""), @@ -624,7 +624,7 @@ def add_route(gen, lustre, options): node_name = get_option(options, 'node') gw_net_type = get_option(options, 'nettype') gw = get_option(options, 'gw') - gw_cluster_id = get_option(options, 'gw_cluster_id') + gw_cluster_id = get_option(options, 'gateway_cluster_id') tgt_cluster_id = get_option(options, 'target_cluster_id') lo = get_option(options, 'lo') hi = get_option(options, 'hi') -- 1.8.3.1