Whamcloud - gitweb
land 1.0.1 fixes on main development branch (head)
authorphil <phil>
Wed, 3 Dec 2003 05:12:52 +0000 (05:12 +0000)
committerphil <phil>
Wed, 3 Dec 2003 05:12:52 +0000 (05:12 +0000)
60 files changed:
lnet/include/cygwin-ioctl.h
lnet/include/linux/kp30.h
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-p30.h
lnet/include/lnet/lnetctl.h
lnet/include/lnet/ptlctl.h
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/libcfs/debug.c
lnet/libcfs/lwt.c
lnet/libcfs/module.c
lnet/utils/portals.c
lustre/ChangeLog
lustre/autogen.sh
lustre/doc/lfs.lyx
lustre/include/linux/lustre_net.h
lustre/include/linux/obd_support.h
lustre/kernel_patches/patches/2.6.0-test6-mm4.patch
lustre/kernel_patches/patches/bproc-patch-2.4.20
lustre/kernel_patches/patches/configurable-x86-stack-2.4.19-pre1.patch [new file with mode: 0644]
lustre/kernel_patches/patches/configurable-x86-stack-2.4.20-rh.patch [new file with mode: 0644]
lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch [new file with mode: 0644]
lustre/kernel_patches/patches/configurable-x86-stack-2.4.22-rh.patch [new file with mode: 0644]
lustre/kernel_patches/patches/gfp_memalloc-2.4.18-chaos.patch
lustre/kernel_patches/patches/gfp_memalloc-2.4.20-rh.patch
lustre/kernel_patches/patches/gfp_memalloc-2.4.22.patch
lustre/kernel_patches/patches/linux-2.4.22-xattr-0.8.54.patch
lustre/kernel_patches/series/hp-pnnl-2.4.20
lustre/kernel_patches/series/rh-2.4.20
lustre/kernel_patches/series/rh-2.4.22
lustre/kernel_patches/series/suse-2.4.21
lustre/kernel_patches/series/vanilla-2.4.19-pre1
lustre/kernel_patches/series/vanilla-2.4.20
lustre/kernel_patches/series/vanilla-2.4.22
lustre/ldlm/ldlm_internal.h
lustre/ldlm/ldlm_lib.c
lustre/lvfs/fsfilt_ext3.c
lustre/mds/handler.c
lustre/obdfilter/filter.c
lustre/ost/ost_handler.c
lustre/portals/include/cygwin-ioctl.h
lustre/portals/include/linux/kp30.h
lustre/portals/include/portals/lib-p30.h
lustre/portals/include/portals/ptlctl.h
lustre/portals/knals/socknal/socknal.c
lustre/portals/knals/socknal/socknal.h
lustre/portals/knals/socknal/socknal_cb.c
lustre/portals/libcfs/debug.c
lustre/portals/libcfs/lwt.c
lustre/portals/libcfs/module.c
lustre/portals/utils/portals.c
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/service.c
lustre/utils/lctl.c
lustre/utils/lmc

index 3ecefff..900f0a4 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: cygwin-ioctl.h,v 1.2 2003/12/03 03:14:43 phil Exp $
+/* $Id: cygwin-ioctl.h,v 1.3 2003/12/03 05:12:41 phil Exp $
  *
  * linux/ioctl.h for Linux by H.H. Bergman.
  */
index 3e6d5e3..3d60631 100644 (file)
@@ -115,7 +115,7 @@ do {                                                                          \
         if (portal_cerror == 0)                                               \
                 break;                                                        \
         CHECK_STACK(CDEBUG_STACK);                                            \
-        if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
+        if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) ||          \
             (portal_debug & (mask) &&                                         \
              portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
                 portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
@@ -283,17 +283,19 @@ do {                                                                          \
 #define GFP_MEMALLOC 0
 #endif
 
-#define PORTAL_ALLOC(ptr, size)                                           \
+#define PORTAL_ALLOC_GFP(ptr, size, mask)                                 \
 do {                                                                      \
         LASSERT (!in_interrupt());                                        \
         if ((size) > PORTAL_VMALLOC_SIZE)                                 \
                 (ptr) = vmalloc(size);                                    \
         else                                                              \
-                (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC));     \
-        if ((ptr) == NULL)                                                \
+                (ptr) = kmalloc((size), (mask));                          \
+        if ((ptr) == NULL) {                                              \
                 CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\
                        #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
-        else {                                                            \
+                CERROR("PORTALS: %d total bytes allocated by portals\n",  \
+                       atomic_read(&portal_kmemory));                     \
+        } else {                                                          \
                 portal_kmem_inc((ptr), (size));                           \
                 memset((ptr), 0, (size));                                 \
         }                                                                 \
@@ -301,6 +303,12 @@ do {                                                                      \
                (int)(size), (ptr), atomic_read (&portal_kmemory));        \
 } while (0)
 
+#define PORTAL_ALLOC(ptr, size) \
+        PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC))
+
+#define PORTAL_ALLOC_ATOMIC(ptr, size) \
+        PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC))
+
 #define PORTAL_FREE(ptr, size)                                          \
 do {                                                                    \
         int s = (size);                                                 \
@@ -330,11 +338,13 @@ do {                                                                      \
                 CERROR("PORTALS: out of memory at %s:%d (tried to alloc"  \
                        " '" #ptr "' from slab '" #slab "')\n", __FILE__,  \
                        __LINE__);                                         \
+                CERROR("PORTALS: %d total bytes allocated by portals\n",  \
+                       atomic_read(&portal_kmemory));                     \
         } else {                                                          \
                 portal_kmem_inc((ptr), (size));                           \
                 memset((ptr), 0, (size));                                 \
         }                                                                 \
-        CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n",   \
+        CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n",    \
                (int)(size), (ptr), atomic_read(&portal_kmemory));         \
 } while (0)
 
@@ -690,7 +700,10 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str);
 /******************************************************************************/
 /* Light-weight trace 
  * Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT  1
+#define LWT_SUPPORT  0
+
+#define LWT_MEMORY   (64<<20)
+#define LWT_MAX_CPUS 4
 
 typedef struct {
         cycles_t    lwte_when;
@@ -728,7 +741,7 @@ extern void lwt_fini (void);
 extern int  lwt_lookup_string (int *size, char *knlptr,
                                char *usrptr, int usrsize);
 extern int  lwt_control (int enable, int clear);
-extern int  lwt_snapshot (int *ncpu, int *total_size,
+extern int  lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
                           void *user_ptr, int user_size);
 
 /* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
@@ -775,6 +788,11 @@ do {                                                                    \
 #endif /* __KERNEL__ */
 #endif /* LWT_SUPPORT */
 
+struct portals_device_userstate
+{
+        int          pdu_memhog_pages;
+        struct page *pdu_memhog_root_page;
+};
 
 #include <linux/portals_lib.h>
 
@@ -1044,7 +1062,8 @@ static inline int portal_ioctl_getdata(char *buf, char *end, void *arg)
 #define IOC_PORTAL_LWT_CONTROL             _IOWR('e', 39, long)
 #define IOC_PORTAL_LWT_SNAPSHOT            _IOWR('e', 40, long)
 #define IOC_PORTAL_LWT_LOOKUP_STRING       _IOWR('e', 41, long)
-#define IOC_PORTAL_MAX_NR                             41
+#define IOC_PORTAL_MEMHOG                  _IOWR('e', 42, long)
+#define IOC_PORTAL_MAX_NR                             42
 
 enum {
         QSWNAL  =  1,
index 55fd720..c402828 100644 (file)
@@ -168,7 +168,8 @@ static inline lib_eq_t *
 lib_eq_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS);
+        lib_eq_t *eq;
+        PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq));
 
         if (eq == NULL)
                 return (NULL);
@@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&eq_in_use_count);
-        kmem_cache_free(ptl_eq_slab, eq);
+        PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq));
 }
 
 static inline lib_md_t *
 lib_md_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS);
+        lib_md_t *md;
+        PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md));
 
         if (md == NULL)
                 return (NULL);
@@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&md_in_use_count);
-        kmem_cache_free(ptl_md_slab, md); 
+        PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md));
 }
 
 static inline lib_me_t *
 lib_me_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS);
+        lib_me_t *me;
+        PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me));
 
         if (me == NULL)
                 return (NULL);
@@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&me_in_use_count);
-        kmem_cache_free(ptl_me_slab, me);
+        PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me));
 }
 
 static inline lib_msg_t *
 lib_msg_alloc(nal_cb_t *nal)
 {
         /* ALWAYS called with statelock held */
-        lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+        lib_msg_t *msg;
+        PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg));
 
         if (msg == NULL)
                 return (NULL);
@@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&msg_in_use_count);
-        kmem_cache_free(ptl_msg_slab, msg); 
+        PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg));
 }
 #endif
 
index 55fd720..c402828 100644 (file)
@@ -168,7 +168,8 @@ static inline lib_eq_t *
 lib_eq_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS);
+        lib_eq_t *eq;
+        PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq));
 
         if (eq == NULL)
                 return (NULL);
@@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&eq_in_use_count);
-        kmem_cache_free(ptl_eq_slab, eq);
+        PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq));
 }
 
 static inline lib_md_t *
 lib_md_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS);
+        lib_md_t *md;
+        PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md));
 
         if (md == NULL)
                 return (NULL);
@@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&md_in_use_count);
-        kmem_cache_free(ptl_md_slab, md); 
+        PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md));
 }
 
 static inline lib_me_t *
 lib_me_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS);
+        lib_me_t *me;
+        PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me));
 
         if (me == NULL)
                 return (NULL);
@@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&me_in_use_count);
-        kmem_cache_free(ptl_me_slab, me);
+        PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me));
 }
 
 static inline lib_msg_t *
 lib_msg_alloc(nal_cb_t *nal)
 {
         /* ALWAYS called with statelock held */
-        lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+        lib_msg_t *msg;
+        PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg));
 
         if (msg == NULL)
                 return (NULL);
@@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&msg_in_use_count);
-        kmem_cache_free(ptl_msg_slab, msg); 
+        PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg));
 }
 #endif
 
index f581e72..12ef47a 100644 (file)
@@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv);
 int jt_ptl_print_routes (int argc, char **argv);
 int jt_ptl_fail_nid (int argc, char **argv);
 int jt_ptl_lwt(int argc, char **argv);
+int jt_ptl_memhog(int argc, char **argv);
 
 int dbg_initialize(int argc, char **argv);
 int jt_dbg_filter(int argc, char **argv);
index f581e72..12ef47a 100644 (file)
@@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv);
 int jt_ptl_print_routes (int argc, char **argv);
 int jt_ptl_fail_nid (int argc, char **argv);
 int jt_ptl_lwt(int argc, char **argv);
+int jt_ptl_memhog(int argc, char **argv);
 
 int dbg_initialize(int argc, char **argv);
 int jt_dbg_filter(int argc, char **argv);
index 6f6fa7e..6de511c 100644 (file)
@@ -1395,30 +1395,35 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private)
 }
 
 void
-ksocknal_free_buffers (void)
+ksocknal_free_fmbs (ksock_fmb_pool_t *p)
 {
-        if (ksocknal_data.ksnd_fmbs != NULL) {
-                ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs;
-                int          i;
-                int          j;
-
-                for (i = 0;
-                     i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS);
-                     i++, fmb++)
-                        for (j = 0; j < fmb->fmb_npages; j++)
-                                if (fmb->fmb_pages[j] != NULL)
-                                        __free_page (fmb->fmb_pages[j]);
-
-                PORTAL_FREE (ksocknal_data.ksnd_fmbs,
-                             sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
-                                                     SOCKNAL_LARGE_FWD_NMSGS));
+        ksock_fmb_t *fmb;
+        int          i;
+
+        LASSERT (list_empty(&p->fmp_blocked_conns));
+        LASSERT (p->fmp_nactive_fmbs == 0);
+        
+        while (!list_empty(&p->fmp_idle_fmbs)) {
+
+                fmb = list_entry(p->fmp_idle_fmbs.next,
+                                 ksock_fmb_t, fmb_list);
+                
+                for (i = 0; i < fmb->fmb_npages; i++)
+                        if (fmb->fmb_pages[i] != NULL)
+                                __free_page(fmb->fmb_pages[i]);
+                
+                list_del(&fmb->fmb_list);
+                PORTAL_FREE(fmb, sizeof(*fmb));
         }
+}
+
+void
+ksocknal_free_buffers (void)
+{
+        ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp);
+        ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp);
 
-        LASSERT (ksocknal_data.ksnd_active_ltxs == 0);
-        if (ksocknal_data.ksnd_ltxs != NULL)
-                PORTAL_FREE (ksocknal_data.ksnd_ltxs,
-                             sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS +
-                                                     SOCKNAL_NNBLK_LTXS));
+        LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0);
 
         if (ksocknal_data.ksnd_schedulers != NULL)
                 PORTAL_FREE (ksocknal_data.ksnd_schedulers,
@@ -1572,7 +1577,7 @@ ksocknal_module_init (void)
         PORTAL_ALLOC (ksocknal_data.ksnd_peers,
                       sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
         if (ksocknal_data.ksnd_peers == NULL)
-                RETURN (-ENOMEM);
+                return (-ENOMEM);
 
         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
                 INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
@@ -1590,11 +1595,6 @@ ksocknal_module_init (void)
         INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
 
-        spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock);
-        INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list);
-        INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list);
-        init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq);
-
         spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
         INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
         INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
@@ -1614,7 +1614,7 @@ ksocknal_module_init (void)
                      sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
         if (ksocknal_data.ksnd_schedulers == NULL) {
                 ksocknal_module_fini ();
-                RETURN(-ENOMEM);
+                return (-ENOMEM);
         }
 
         for (i = 0; i < SOCKNAL_N_SCHED; i++) {
@@ -1629,35 +1629,11 @@ ksocknal_module_init (void)
                 init_waitqueue_head (&kss->kss_waitq);
         }
 
-        CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t),
-                sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
-        PORTAL_ALLOC(ksocknal_data.ksnd_ltxs,
-                     sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS));
-        if (ksocknal_data.ksnd_ltxs == NULL) {
-                ksocknal_module_fini ();
-                return (-ENOMEM);
-        }
-
-        /* Deterministic bugs please */
-        memset (ksocknal_data.ksnd_ltxs, 0xeb,
-                sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
-        for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) {
-                ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i];
-
-                ltx->ltx_tx.tx_hdr = &ltx->ltx_hdr;
-                ltx->ltx_idle = i < SOCKNAL_NLTXS ?
-                                &ksocknal_data.ksnd_idle_ltx_list :
-                                &ksocknal_data.ksnd_idle_nblk_ltx_list;
-                list_add (&ltx->ltx_tx.tx_list, ltx->ltx_idle);
-        }
-
         rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni);
         if (rc != 0) {
                 CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
                 ksocknal_module_fini ();
-                RETURN (rc);
+                return (rc);
         }
         PtlNIDebug(ksocknal_ni, ~0);
 
@@ -1670,7 +1646,7 @@ ksocknal_module_init (void)
                         CERROR("Can't spawn socknal scheduler[%d]: %d\n",
                                i, rc);
                         ksocknal_module_fini ();
-                        RETURN (rc);
+                        return (rc);
                 }
         }
 
@@ -1679,7 +1655,7 @@ ksocknal_module_init (void)
                 if (rc != 0) {
                         CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
                         ksocknal_module_fini ();
-                        RETURN (rc);
+                        return (rc);
                 }
         }
 
@@ -1687,7 +1663,7 @@ ksocknal_module_init (void)
         if (rc != 0) {
                 CERROR ("Can't spawn socknal reaper: %d\n", rc);
                 ksocknal_module_fini ();
-                RETURN (rc);
+                return (rc);
         }
 
         rc = kpr_register(&ksocknal_data.ksnd_router,
@@ -1698,23 +1674,15 @@ ksocknal_module_init (void)
         } else {
                 /* Only allocate forwarding buffers if I'm on a gateway */
 
-                PORTAL_ALLOC(ksocknal_data.ksnd_fmbs,
-                             sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
-                                                    SOCKNAL_LARGE_FWD_NMSGS));
-                if (ksocknal_data.ksnd_fmbs == NULL) {
-                        ksocknal_module_fini ();
-                        RETURN(-ENOMEM);
-                }
-
-                /* NULL out buffer pointers etc */
-                memset(ksocknal_data.ksnd_fmbs, 0,
-                       sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
-                                              SOCKNAL_LARGE_FWD_NMSGS));
-
                 for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
                                  SOCKNAL_LARGE_FWD_NMSGS); i++) {
-                        ksock_fmb_t *fmb =
-                                &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i];
+                        ksock_fmb_t *fmb;
+                        
+                        PORTAL_ALLOC(fmb, sizeof(*fmb));
+                        if (fmb == NULL) {
+                                ksocknal_module_fini();
+                                return (-ENOMEM);
+                        }
 
                         if (i < SOCKNAL_SMALL_FWD_NMSGS) {
                                 fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
@@ -1724,7 +1692,6 @@ ksocknal_module_init (void)
                                 fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
                         }
 
-                        LASSERT (fmb->fmb_npages > 0);
                         for (j = 0; j < fmb->fmb_npages; j++) {
                                 fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
 
@@ -1733,8 +1700,7 @@ ksocknal_module_init (void)
                                         return (-ENOMEM);
                                 }
 
-                                LASSERT(page_address (fmb->fmb_pages[j]) !=
-                                        NULL);
+                                LASSERT(page_address(fmb->fmb_pages[j]) != NULL);
                         }
 
                         list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
index 227a24f..9dbe415 100644 (file)
@@ -82,9 +82,6 @@
 
 #define SOCKNAL_PEER_HASH_SIZE   101            /* # peer lists */
 
-#define SOCKNAL_NLTXS           128             /* # normal transmit messages */
-#define SOCKNAL_NNBLK_LTXS     128             /* # transmit messages reserved if can't block */
-
 #define SOCKNAL_SMALL_FWD_NMSGS        128             /* # small messages I can be forwarding at any time */
 #define SOCKNAL_LARGE_FWD_NMSGS 64              /* # large messages I can be forwarding at any time */
 
 typedef struct                                  /* pool of forwarding buffers */
 {
         spinlock_t        fmp_lock;             /* serialise */
-        struct list_head  fmp_idle_fmbs;        /* buffers waiting for a connection */
+        struct list_head  fmp_idle_fmbs;        /* free buffers */
         struct list_head  fmp_blocked_conns;    /* connections waiting for a buffer */
+        int               fmp_nactive_fmbs;     /* # buffers in use */
 } ksock_fmb_pool_t;
 
 
@@ -164,16 +162,10 @@ typedef struct {
 
         kpr_router_t      ksnd_router;          /* THE router */
 
-        void             *ksnd_fmbs;            /* all the pre-allocated FMBs */
         ksock_fmb_pool_t  ksnd_small_fmp;       /* small message forwarding buffers */
         ksock_fmb_pool_t  ksnd_large_fmp;       /* large message forwarding buffers */
 
-        void             *ksnd_ltxs;            /* all the pre-allocated LTXs */
-        spinlock_t        ksnd_idle_ltx_lock;   /* serialise ltx alloc/free */
-        struct list_head  ksnd_idle_ltx_list;   /* where to get an idle LTX */
-        struct list_head  ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
-        wait_queue_head_t ksnd_idle_ltx_waitq;  /* where to block for an idle LTX */
-        int               ksnd_active_ltxs;     /* #active ltxs */
+        atomic_t          ksnd_nactive_ltxs;    /* #active ltxs */
 
         struct list_head  ksnd_deathrow_conns;  /* conns to be closed */
         struct list_head  ksnd_zombie_conns;    /* conns to be freed */
@@ -233,25 +225,15 @@ typedef struct                                  /* transmit packet */
 #define KSOCK_ZCCD_2_TX(ptr)   list_entry (ptr, ksock_tx_t, tx_zccd)
 /* network zero copy callback descriptor embedded in ksock_tx_t */
 
-/* space for the tx frag descriptors: hdr is always 1 iovec
- * and payload is PTL_MD_MAX of either type. */
-typedef struct
-{
-        struct iovec            hdr;
-        union {
-                struct iovec    iov[PTL_MD_MAX_IOV];
-                ptl_kiov_t      kiov[PTL_MD_MAX_IOV];
-        }                       payload;
-} ksock_txiovspace_t;
-
 typedef struct                                  /* locally transmitted packet */
 {
         ksock_tx_t              ltx_tx;         /* send info */
-        struct list_head       *ltx_idle;       /* where to put when idle */
         void                   *ltx_private;    /* lib_finalize() callback arg */
         void                   *ltx_cookie;     /* lib_finalize() callback arg */
-        ksock_txiovspace_t      ltx_iov_space;  /* where to stash frag descriptors */
         ptl_hdr_t               ltx_hdr;        /* buffer for packet header */
+        int                     ltx_desc_size;  /* bytes allocated for this desc */
+        struct iovec            ltx_iov[1];     /* iov for hdr + payload */
+        ptl_kiov_t              ltx_kiov[0];    /* kiov for payload */
 } ksock_ltx_t;
 
 #define KSOCK_TX_2_KPR_FWD_DESC(ptr)    list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
index 6ea4fa8..22345fe 100644 (file)
@@ -129,60 +129,11 @@ ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
         return 0;
 }
 
-ksock_ltx_t *
-ksocknal_get_ltx (int may_block)
-{
-        unsigned long flags;
-        ksock_ltx_t *ltx = NULL;
-
-        for (;;) {
-                spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
-                if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) {
-                        ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next,
-                                         ksock_ltx_t, ltx_tx.tx_list);
-                        list_del (&ltx->ltx_tx.tx_list);
-                        ksocknal_data.ksnd_active_ltxs++;
-                        break;
-                }
-
-                if (!may_block) {
-                        if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) {
-                                ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next,
-                                                 ksock_ltx_t, ltx_tx.tx_list);
-                                list_del (&ltx->ltx_tx.tx_list);
-                                ksocknal_data.ksnd_active_ltxs++;
-                        }
-                        break;
-                }
-
-                spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock,
-                                       flags);
-
-                wait_event (ksocknal_data.ksnd_idle_ltx_waitq,
-                            !list_empty (&ksocknal_data.ksnd_idle_ltx_list));
-        }
-
-        spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
-        return (ltx);
-}
-
 void
-ksocknal_put_ltx (ksock_ltx_t *ltx)
+ksocknal_free_ltx (ksock_ltx_t *ltx)
 {
-        unsigned long   flags;
-        
-        spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
-        ksocknal_data.ksnd_active_ltxs--;
-        list_add_tail (&ltx->ltx_tx.tx_list, ltx->ltx_idle);
-
-        /* normal tx desc => wakeup anyone blocking for one */
-        if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list)
-                wake_up (&ksocknal_data.ksnd_idle_ltx_waitq);
-
-        spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+        atomic_dec(&ksocknal_data.ksnd_nactive_ltxs);
+        PORTAL_FREE(ltx, ltx->ltx_desc_size);
 }
 
 #if SOCKNAL_ZC
@@ -364,7 +315,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 int
-ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         /* Return 0 on success, < 0 on error.
          * caller checks tx_resid to determine progress/completion */
@@ -377,17 +328,14 @@ ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx)
         }
 
         rc = ksocknal_getconnsock (conn);
-        if (rc != 0)
+        if (rc != 0) {
+                LASSERT (conn->ksnc_closing);
                 return (rc);
+        }
 
         for (;;) {
                 LASSERT (tx->tx_resid != 0);
 
-                if (conn->ksnc_closing) {
-                        rc = -ESHUTDOWN;
-                        break;
-                }
-
                 if (tx->tx_niov != 0)
                         rc = ksocknal_send_iov (conn, tx);
                 else
@@ -554,7 +502,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn)
 }
 
 int
-ksocknal_recvmsg (ksock_conn_t *conn) 
+ksocknal_receive (ksock_conn_t *conn) 
 {
         /* Return 1 on success, 0 on EOF, < 0 on error.
          * Caller checks ksnc_rx_nob_wanted to determine
@@ -568,15 +516,12 @@ ksocknal_recvmsg (ksock_conn_t *conn)
         }
 
         rc = ksocknal_getconnsock (conn);
-        if (rc != 0)
+        if (rc != 0) {
+                LASSERT (conn->ksnc_closing);
                 return (rc);
+        }
 
         for (;;) {
-                if (conn->ksnc_closing) {
-                        rc = -ESHUTDOWN;
-                        break;
-                }
-
                 if (conn->ksnc_rx_niov != 0)
                         rc = ksocknal_recv_iov (conn);
                 else
@@ -665,7 +610,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch)
 
         lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie);
 
-        ksocknal_put_ltx (ltx);
+        ksocknal_free_ltx (ltx);
         EXIT;
 }
 
@@ -696,7 +641,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         int            rc;
        
-        rc = ksocknal_sendmsg (conn, tx);
+        rc = ksocknal_transmit (conn, tx);
 
         CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
         LASSERT (rc != -EAGAIN);
@@ -840,13 +785,17 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
         unsigned long  flags;
         ksock_sched_t *sched = conn->ksnc_scheduler;
 
-        /* called holding global lock (read or irq-write) */
-
+        /* called holding global lock (read or irq-write) and caller may
+         * not have dropped this lock between finding conn and calling me,
+         * so we don't need the {get,put}connsock dance to deref
+         * ksnc_sock... */
+        LASSERT(!conn->ksnc_closing);
+        LASSERT(tx->tx_resid == tx->tx_nob);
+        
         CDEBUG (D_NET, "Sending to "LPX64" on port %d\n", 
                 conn->ksnc_peer->ksnp_nid, conn->ksnc_port);
 
         atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
-        tx->tx_resid = tx->tx_nob;
         tx->tx_conn = conn;
 
 #if SOCKNAL_ZC
@@ -854,7 +803,6 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
         /* NB this sets 1 ref on zccd, so the callback can only occur after
          * I've released this ref. */
 #endif
-
         spin_lock_irqsave (&sched->kss_lock, flags);
 
         conn->ksnc_tx_deadline = jiffies + 
@@ -960,6 +908,8 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
                 tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
 
         tx->tx_conn = NULL;                     /* only set when assigned a conn */
+        tx->tx_resid = tx->tx_nob;
+        tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base;
 
         g_lock = &ksocknal_data.ksnd_global_lock;
         read_lock (g_lock);
@@ -1024,115 +974,125 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
         return (-EHOSTUNREACH);
 }
 
-ksock_ltx_t *
-ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, 
-                    ptl_hdr_t *hdr, int type)
+int
+ksocknal_sendmsg(nal_cb_t     *nal, 
+                 void         *private, 
+                 lib_msg_t    *cookie,
+                 ptl_hdr_t    *hdr, 
+                 int           type, 
+                 ptl_nid_t     nid, 
+                 ptl_pid_t     pid,
+                 unsigned int  payload_niov, 
+                 struct iovec *payload_iov, 
+                 ptl_kiov_t   *payload_kiov,
+                 size_t        payload_nob)
 {
         ksock_ltx_t  *ltx;
+        int           desc_size;
+        int           rc;
+
+        /* NB 'private' is different depending on what we're sending.
+         * Just ignore it... */
+
+        CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
+               " pid %d\n", payload_nob, payload_niov, nid , pid);
 
-        /* I may not block for a transmit descriptor if I might block the
-         * receiver, or an interrupt handler. */
-        ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK ||
-                                  type == PTL_MSG_REPLY ||
-                                  in_interrupt ()));
+        LASSERT (payload_nob == 0 || payload_niov > 0);
+        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+        /* It must be OK to kmap() if required */
+        LASSERT (payload_kiov == NULL || !in_interrupt ());
+        /* payload is either all vaddrs or all pages */
+        LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+        
+        if (payload_iov != NULL)
+                desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]);
+        else
+                desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]);
+        
+        if (in_interrupt() ||
+            type == PTL_MSG_ACK ||
+            type == PTL_MSG_REPLY) {
+                /* Can't block if in interrupt or responding to an incoming
+                 * message */
+                PORTAL_ALLOC_ATOMIC(ltx, desc_size);
+        } else {
+                PORTAL_ALLOC(ltx, desc_size);
+        }
+        
         if (ltx == NULL) {
-                CERROR ("Can't allocate tx desc\n");
-                return (NULL);
+                CERROR("Can't allocate tx desc type %d size %d %s\n",
+                       type, desc_size, in_interrupt() ? "(intr)" : "");
+                return (PTL_NOSPACE);
         }
 
-        /* Init local send packet (storage for hdr, finalize() args) */
+        atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
+
+        ltx->ltx_desc_size = desc_size;
+        
+        /* We always have 1 mapped frag for the header */
+        ltx->ltx_tx.tx_iov = ltx->ltx_iov;
+        ltx->ltx_iov[0].iov_base = &ltx->ltx_hdr;
+        ltx->ltx_iov[0].iov_len = sizeof(*hdr);
         ltx->ltx_hdr = *hdr;
+        
         ltx->ltx_private = private;
         ltx->ltx_cookie = cookie;
         
-        /* Init common ltx_tx */
         ltx->ltx_tx.tx_isfwd = 0;
-        ltx->ltx_tx.tx_nob = sizeof (*hdr);
-
-        /* We always have 1 mapped frag for the header */
-        ltx->ltx_tx.tx_niov = 1;
-        ltx->ltx_tx.tx_iov = &ltx->ltx_iov_space.hdr;
-        ltx->ltx_tx.tx_iov[0].iov_base = &ltx->ltx_hdr;
-        ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
-
-        ltx->ltx_tx.tx_kiov  = NULL;
-        ltx->ltx_tx.tx_nkiov = 0;
+        ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob;
 
-        return (ltx);
-}
-
-int
-ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
-               ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-               unsigned int payload_niov, struct iovec *payload_iov,
-               size_t payload_len)
-{
-        ksock_ltx_t  *ltx;
-        int           rc;
+        if (payload_iov != NULL) {
+                /* payload is all mapped */
+                ltx->ltx_tx.tx_kiov  = NULL;
+                ltx->ltx_tx.tx_nkiov = 0;
 
-        /* NB 'private' is different depending on what we're sending.
-         * Just ignore it until we can rely on it
-         */
+                ltx->ltx_tx.tx_niov = 1 + payload_niov;
 
-        CDEBUG(D_NET,
-               "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64
-               " pid %d\n", payload_len, payload_niov, nid, pid);
+                memcpy(ltx->ltx_iov + 1, payload_iov,
+                       payload_niov * sizeof (*payload_iov));
 
-        ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
-        if (ltx == NULL)
-                return (PTL_FAIL);
+        } else {
+                /* payload is all pages */
+                ltx->ltx_tx.tx_kiov = ltx->ltx_kiov;
+                ltx->ltx_tx.tx_nkiov = payload_niov;
 
-        /* append the payload_iovs to the one pointing at the header */
-        LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
-        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+                ltx->ltx_tx.tx_niov = 1;
 
-        memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov,
-                payload_niov * sizeof (*payload_iov));
-        ltx->ltx_tx.tx_niov = 1 + payload_niov;
-        ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+                memcpy(ltx->ltx_kiov, payload_kiov, 
+                       payload_niov * sizeof (*payload_kiov));
+        }
 
-        rc = ksocknal_launch_packet (&ltx->ltx_tx, nid);
+        rc = ksocknal_launch_packet(&ltx->ltx_tx, nid);
         if (rc == 0)
                 return (PTL_OK);
         
-        ksocknal_put_ltx (ltx);
+        ksocknal_free_ltx(ltx);
         return (PTL_FAIL);
 }
 
 int
+ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+               ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+               unsigned int payload_niov, struct iovec *payload_iov,
+               size_t payload_len)
+{
+        return (ksocknal_sendmsg(nal, private, cookie,
+                                 hdr, type, nid, pid,
+                                 payload_niov, payload_iov, NULL,
+                                 payload_len));
+}
+
+int
 ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, 
                      ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                     unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len)
+                     unsigned int payload_niov, ptl_kiov_t *payload_kiov, 
+                     size_t payload_len)
 {
-        ksock_ltx_t *ltx;
-        int          rc;
-
-        /* NB 'private' is different depending on what we're sending.
-         * Just ignore it until we can rely on it */
-
-        CDEBUG(D_NET,
-               "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n",
-               payload_len, payload_niov, nid, pid);
-
-        ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
-        if (ltx == NULL)
-                return (PTL_FAIL);
-
-        LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
-        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-        
-        ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov;
-        memcpy (ltx->ltx_tx.tx_kiov, payload_iov, 
-                payload_niov * sizeof (*payload_iov));
-        ltx->ltx_tx.tx_nkiov = payload_niov;
-        ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
-
-        rc = ksocknal_launch_packet (&ltx->ltx_tx, nid);
-        if (rc == 0)
-                return (PTL_OK);
-
-        ksocknal_put_ltx (ltx);
-        return (PTL_FAIL);
+        return (ksocknal_sendmsg(nal, private, cookie,
+                                 hdr, type, nid, pid,
+                                 payload_niov, NULL, payload_kiov,
+                                 payload_len));
 }
 
 void
@@ -1155,7 +1115,6 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         tx->tx_iov   = fwd->kprfd_iov;
         tx->tx_nkiov = 0;
         tx->tx_kiov  = NULL;
-        tx->tx_hdr   = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base;
 
         rc = ksocknal_launch_packet (tx, nid);
         if (rc != 0)
@@ -1204,6 +1163,7 @@ ksocknal_fmb_callback (void *arg, int error)
         spin_lock_irqsave (&fmp->fmp_lock, flags);
 
         list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs);
+        fmp->fmp_nactive_fmbs--;
 
         if (!list_empty (&fmp->fmp_blocked_conns)) {
                 conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next,
@@ -1242,7 +1202,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn)
         ksock_fmb_t      *fmb;
 
         LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
-        LASSERT (ksocknal_data.ksnd_fmbs != NULL);
+        LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
 
         if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
                 pool = &ksocknal_data.ksnd_small_fmp;
@@ -1255,6 +1215,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn)
                 fmb = list_entry(pool->fmp_idle_fmbs.next,
                                  ksock_fmb_t, fmb_list);
                 list_del (&fmb->fmb_list);
+                pool->fmp_nactive_fmbs++;
                 spin_unlock_irqrestore (&pool->fmp_lock, flags);
 
                 return (fmb);
@@ -1397,7 +1358,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn)
                 return;
         }
 
-        if (ksocknal_data.ksnd_fmbs == NULL) {        /* not forwarding */
+        if (!kpr_routing(&ksocknal_data.ksnd_router)) {    /* not forwarding */
                 CERROR("dropping packet from "LPX64" (%s) for "LPX64
                        " (%s): not forwarding\n",
                        src_nid, portals_nid2str(TCPNAL, src_nid, str),
@@ -1525,9 +1486,11 @@ ksocknal_process_receive (ksock_conn_t *conn)
 
         LASSERT (conn->ksnc_rx_nob_wanted > 0);
 
-        rc = ksocknal_recvmsg(conn);
+        rc = ksocknal_receive(conn);
 
         if (rc <= 0) {
+                LASSERT (rc != -EAGAIN);
+
                 if (rc == 0)
                         CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n",
                                conn, conn->ksnc_peer->ksnp_nid,
@@ -1766,9 +1729,9 @@ int ksocknal_scheduler (void *arg)
                          * kss_lock. */
                         conn->ksnc_tx_ready = 0;
                         spin_unlock_irqrestore (&sched->kss_lock, flags);
-                        
+
                         rc = ksocknal_process_transmit(conn, tx);
-                        
+
                         spin_lock_irqsave (&sched->kss_lock, flags);
 
                         if (rc != -EAGAIN) {
@@ -1851,7 +1814,7 @@ ksocknal_data_ready (struct sock *sk, int n)
         read_lock (&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
-        if (conn == NULL) {             /* raced with ksocknal_close_sock */
+        if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
                 LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
                 sk->sk_data_ready (sk, n);
         } else {
@@ -1900,7 +1863,7 @@ ksocknal_write_space (struct sock *sk)
                (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
                                       " empty" : " queued"));
 
-        if (conn == NULL) {             /* raced with ksocknal_close_sock */
+        if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
                 LASSERT (sk->sk_write_space != &ksocknal_write_space);
                 sk->sk_write_space (sk);
 
@@ -2136,7 +2099,7 @@ ksocknal_setup_sock (struct socket *sock)
         int             option;
         struct linger   linger;
 
-        sock->sk->allocation = GFP_NOFS;
+        sock->sk->allocation = GFP_MEMALLOC;
 
         /* Ensure this socket aborts active sends immediately when we close
          * it. */
@@ -2421,6 +2384,8 @@ ksocknal_autoconnectd (void *arg)
         kportal_daemonize (name);
         kportal_blockallsigs ();
 
+        current->flags |= PF_MEMALLOC;
+
         spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
 
         while (!ksocknal_data.ksnd_shuttingdown) {
@@ -2548,6 +2513,8 @@ ksocknal_reaper (void *arg)
 
         init_waitqueue_entry (&wait, current);
 
+        current->flags |= PF_MEMALLOC;
+
         spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
 
         while (!ksocknal_data.ksnd_shuttingdown) {
index 6e2c1ca..ad2c966 100644 (file)
@@ -974,18 +974,14 @@ char *portals_debug_dumpstack(void)
         return buf;
 }
 
-#elif defined(CONFIG_X86)
+#elif defined(__i386__)
 
 extern int is_kernel_text_address(unsigned long addr);
 extern int lookup_symbol(unsigned long address, char *buf, int buflen);
 
 char *portals_debug_dumpstack(void)
 {
-#if defined(__x86_64__)
-        unsigned long esp = current->thread.rsp;
-#else
         unsigned long esp = current->thread.esp;
-#endif
         unsigned long *stack = (unsigned long *)&esp;
         int size;
         unsigned long addr;
index 89fe8f7..a24423e 100644 (file)
@@ -45,9 +45,6 @@
 
 #if LWT_SUPPORT
 
-#define LWT_MEMORY              (1<<20)         /* 1Mb of trace memory */
-#define LWT_MAX_CPUS             4
-
 int         lwt_enabled;
 int         lwt_pages_per_cpu;
 lwt_cpu_t   lwt_cpus[LWT_MAX_CPUS];
@@ -123,7 +120,8 @@ lwt_control (int enable, int clear)
 }
 
 int
-lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size)
+lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, 
+              void *user_ptr, int user_size)
 {
         const int    events_per_page = PAGE_SIZE / sizeof(lwt_event_t);
         const int    bytes_per_page = events_per_page * sizeof(lwt_event_t);
@@ -136,7 +134,8 @@ lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size)
 
         *ncpu = num_online_cpus();
         *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page;
-
+        *now = get_cycles();
+        
         if (user_ptr == NULL)
                 return (0);
 
index 7c0cafc..55e1935 100644 (file)
@@ -83,6 +83,115 @@ kportal_daemonize (char *str)
 }
 
 void
+kportal_memhog_free (struct portals_device_userstate *pdu)
+{
+        struct page **level0p = &pdu->pdu_memhog_root_page;
+        struct page **level1p;
+        struct page **level2p;
+        int           count1;
+        int           count2;
+        
+        if (*level0p != NULL) {
+
+                level1p = (struct page **)page_address(*level0p);
+                count1 = 0;
+                
+                while (count1 < PAGE_SIZE/sizeof(struct page *) &&
+                       *level1p != NULL) {
+
+                        level2p = (struct page **)page_address(*level1p);
+                        count2 = 0;
+                        
+                        while (count2 < PAGE_SIZE/sizeof(struct page *) &&
+                               *level2p != NULL) {
+                                
+                                __free_page(*level2p);
+                                pdu->pdu_memhog_pages--;
+                                level2p++;
+                                count2++;
+                        }
+                        
+                        __free_page(*level1p);
+                        pdu->pdu_memhog_pages--;
+                        level1p++;
+                        count1++;
+                }
+                
+                __free_page(*level0p);
+                pdu->pdu_memhog_pages--;
+
+                *level0p = NULL;
+        }
+        
+        LASSERT (pdu->pdu_memhog_pages == 0);
+}
+
+int
+kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags)
+{
+        struct page **level0p;
+        struct page **level1p;
+        struct page **level2p;
+        int           count1;
+        int           count2;
+        
+        LASSERT (pdu->pdu_memhog_pages == 0);
+        LASSERT (pdu->pdu_memhog_root_page == NULL);
+
+        if (npages < 0)
+                return -EINVAL;
+
+        if (npages == 0)
+                return 0;
+
+        level0p = &pdu->pdu_memhog_root_page;
+        *level0p = alloc_page(flags);
+        if (*level0p == NULL)
+                return -ENOMEM;
+        pdu->pdu_memhog_pages++;
+
+        level1p = (struct page **)page_address(*level0p);
+        count1 = 0;
+        memset(level1p, 0, PAGE_SIZE);
+        
+        while (pdu->pdu_memhog_pages < npages &&
+               count1 < PAGE_SIZE/sizeof(struct page *)) {
+
+                if (signal_pending(current))
+                        return (-EINTR);
+                
+                *level1p = alloc_page(flags);
+                if (*level1p == NULL)
+                        return -ENOMEM;
+                pdu->pdu_memhog_pages++;
+
+                level2p = (struct page **)page_address(*level1p);
+                count2 = 0;
+                memset(level2p, 0, PAGE_SIZE);
+                
+                while (pdu->pdu_memhog_pages < npages &&
+                       count2 < PAGE_SIZE/sizeof(struct page *)) {
+                        
+                        if (signal_pending(current))
+                                return (-EINTR);
+
+                        *level2p = alloc_page(flags);
+                        if (*level2p == NULL)
+                                return (-ENOMEM);
+                        pdu->pdu_memhog_pages++;
+                        
+                        level2p++;
+                        count2++;
+                }
+                
+                level1p++;
+                count1++;
+        }
+
+        return 0;
+}
+
+void
 kportal_blockallsigs ()
 {
         unsigned long  flags;
@@ -96,22 +205,39 @@ kportal_blockallsigs ()
 /* called when opening /dev/device */
 static int kportal_psdev_open(struct inode * inode, struct file * file)
 {
+        struct portals_device_userstate *pdu;
         ENTRY;
-
+        
         if (!inode)
                 RETURN(-EINVAL);
+
         PORTAL_MODULE_USE;
+
+        PORTAL_ALLOC(pdu, sizeof(*pdu));
+        if (pdu != NULL) {
+                pdu->pdu_memhog_pages = 0;
+                pdu->pdu_memhog_root_page = NULL;
+        }
+        file->private_data = pdu;
+        
         RETURN(0);
 }
 
 /* called when closing /dev/device */
 static int kportal_psdev_release(struct inode * inode, struct file * file)
 {
+        struct portals_device_userstate *pdu;
         ENTRY;
 
         if (!inode)
                 RETURN(-EINVAL);
 
+        pdu = file->private_data;
+        if (pdu != NULL) {
+                kportal_memhog_free(pdu);
+                PORTAL_FREE(pdu, sizeof(*pdu));
+        }
+        
         PORTAL_MODULE_UNUSE;
         RETURN(0);
 }
@@ -514,7 +640,8 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                 break;
                 
         case IOC_PORTAL_LWT_SNAPSHOT:
-                err = lwt_snapshot (&data->ioc_count, &data->ioc_misc,
+                err = lwt_snapshot (&data->ioc_nid,
+                                    &data->ioc_count, &data->ioc_misc,
                                     data->ioc_pbuf1, data->ioc_plen1);
                 if (err == 0 &&
                     copy_to_user((char *)arg, data, sizeof (*data)))
@@ -528,7 +655,22 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                     copy_to_user((char *)arg, data, sizeof (*data)))
                         err = -EFAULT;
                 break;
-#endif                        
+#endif
+        case IOC_PORTAL_MEMHOG:
+                if (!capable (CAP_SYS_ADMIN))
+                        err = -EPERM;
+                else if (file->private_data == NULL) {
+                        err = -EINVAL;
+                } else {
+                        kportal_memhog_free(file->private_data);
+                        err = kportal_memhog_alloc(file->private_data,
+                                                   data->ioc_count,
+                                                   data->ioc_flags);
+                        if (err != 0)
+                                kportal_memhog_free(file->private_data);
+                }
+                break;
+
         default:
                 err = -EINVAL;
                 break;
@@ -612,8 +754,8 @@ static int init_kportals_module(void)
  cleanup_lwt:
 #if LWT_SUPPORT
         lwt_fini();
-#endif
  cleanup_debug:
+#endif
         portals_debug_cleanup();
         return rc;
 }
index 3325892..b46ee16 100644 (file)
@@ -1371,7 +1371,8 @@ lwt_control(int enable, int clear)
 }
 
 static int
-lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size)
+lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, 
+             lwt_event_t *events, int size)
 {
         struct portal_ioctl_data data;
         int                      rc;
@@ -1390,6 +1391,9 @@ lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size)
         LASSERT (data.ioc_count != 0);
         LASSERT (data.ioc_misc != 0);
         
+        if (now != NULL)
+                *now = data.ioc_nid;
+
         if (ncpu != NULL)
                 *ncpu = data.ioc_count;
 
@@ -1499,14 +1503,13 @@ get_cycles_per_usec ()
 int
 jt_ptl_lwt(int argc, char **argv)
 {
-#define MAX_CPUS 8
         int             ncpus;
         int             totalspace;
         int             nevents_per_cpu;
         lwt_event_t    *events;
-        lwt_event_t    *cpu_event[MAX_CPUS + 1];
-        lwt_event_t    *next_event[MAX_CPUS];
-        lwt_event_t    *first_event[MAX_CPUS];
+        lwt_event_t    *cpu_event[LWT_MAX_CPUS + 1];
+        lwt_event_t    *next_event[LWT_MAX_CPUS];
+        lwt_event_t    *first_event[LWT_MAX_CPUS];
         int             cpu;
         lwt_event_t    *e;
         int             rc;
@@ -1514,6 +1517,9 @@ jt_ptl_lwt(int argc, char **argv)
         double          mhz;
         cycles_t        t0;
         cycles_t        tlast;
+        cycles_t        tnow;
+        struct timeval  tvnow;
+        int             printed_date = 0;
         FILE           *f = stdout;
 
         if (argc < 2 ||
@@ -1541,11 +1547,12 @@ jt_ptl_lwt(int argc, char **argv)
                 return (0);
         }
                 
-        if (lwt_snapshot(&ncpus, &totalspace, NULL, 0) != 0)
+        if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0)
                 return (-1);
 
-        if (ncpus > MAX_CPUS) {
-                fprintf(stderr, "Too many cpus: %d (%d)\n", ncpus, MAX_CPUS);
+        if (ncpus > LWT_MAX_CPUS) {
+                fprintf(stderr, "Too many cpus: %d (%d)\n", 
+                        ncpus, LWT_MAX_CPUS);
                 return (-1);
         }
 
@@ -1560,11 +1567,14 @@ jt_ptl_lwt(int argc, char **argv)
                 return (-1);
         }
 
-        if (lwt_snapshot(NULL, NULL, events, totalspace)) {
+        if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) {
                 free(events);
                 return (-1);
         }
 
+        /* we want this time to be sampled at snapshot time */
+        gettimeofday(&tvnow, NULL);
+
         if (argc > 2) {
                 f = fopen (argv[2], "w");
                 if (f == NULL) {
@@ -1645,6 +1655,17 @@ jt_ptl_lwt(int argc, char **argv)
                 
                 if (t0 <= next_event[cpu]->lwte_when) {
                         /* on or after the first event */
+                        if (!printed_date) {
+                                cycles_t du = (tnow - t0) / mhz;
+                                time_t   then = tvnow.tv_sec - du/1000000;
+                                
+                                if (du % 1000000 > tvnow.tv_usec)
+                                        then--;
+
+                                fprintf(f, "%s", ctime(&then));
+                                printed_date = 1;
+                        }
+                        
                         rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]);
                         if (rc != 0)
                                 break;
@@ -1666,5 +1687,48 @@ jt_ptl_lwt(int argc, char **argv)
 
         free(events);
         return (0);
-#undef MAX_CPUS
 }
+
+int jt_ptl_memhog(int argc, char **argv)
+{
+        static int                gfp = 0;        /* sticky! */
+
+        struct portal_ioctl_data  data;
+        int                       rc;
+        int                       count;
+        char                     *end;
+        
+        if (argc < 2)  {
+                fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]);
+                return 0;
+        }
+
+        count = strtol(argv[1], &end, 0);
+        if (count < 0 || *end != 0) {
+                fprintf(stderr, "Can't parse page count '%s'\n", argv[1]);
+                return -1;
+        }
+
+        if (argc >= 3) {
+                rc = strtol(argv[2], &end, 0);
+                if (*end != 0) {
+                        fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]);
+                        return -1;
+                }
+                gfp = rc;
+        }
+        
+        PORTAL_IOC_INIT(data);
+        data.ioc_count = count;
+        data.ioc_flags = gfp;
+        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data);
+
+        if (rc != 0) {
+                fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno));
+                return -1;
+        }
+        
+        printf("memhog %d OK\n", count);
+        return 0;
+}
+
index 22a6196..872df60 100644 (file)
@@ -1,3 +1,24 @@
+tbd         Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.0.1
+       * bug fixes
+       - remove now-unused request->rq_obd (278)
+       - if an allocation fails, print out how much memory we've used (1933)
+       - use PORTAL_SLAB_ALLOC for structures, to get GFP_MEMALLOC (1933)
+       - add the "configurable stack size" patch to most series files (1256)
+       - ability to write large log records, for 100+ OST configs (2306)
+       - fix NULL deref when filter_prep fails (2314)
+       - fix operator precedence error in filter_sync
+       - dynamic allocation of socknal TX descriptors (2315)
+       - fix a missed case in the GFP_MEMALLOC patch, can cause deadlock (2310)
+       - fix gcc 2.96 compilation problem in xattr kernel patch (2294)
+       - ensure that CWARN messages in Portals always get to the syslog
+       - __init/__exit are not for prototype decls (ldlm_init/exit)
+       - x86-64 compile warning fixes
+       - fix gateway LMC keyword conflict (2318)
+       * miscellania
+       - allow configurable automake binary, for testing new versions
+       - small update to the lfs documentation
+
 2003-12-03  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.0.0
        * fix negative export reference count in fsfilt_sync (2312)
index 9deed73..be0d42d 100644 (file)
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 aclocal &&
-automake --add-missing &&
+${AUTOMAKE:-automake} --add-missing &&
 ${AUTOCONF:-autoconf}
index b8568da..28890e0 100644 (file)
@@ -59,6 +59,12 @@ getstripe <file_name>
 \series bold 
 lfs\SpecialChar ~
 setstripe <filename> <stripe_size> <start_ost> <stripe_cnt>
+\layout Standard
+
+
+\series bold 
+lfs\SpecialChar ~
+check <mds| osts| servers>
 \layout Subsection
 
 DESCRIPTION
@@ -101,6 +107,23 @@ getstripe
 
 
 \series bold 
+check 
+\series default 
+Display the status of MDS or OSTs (as specified in the command) or all the
+ servers (MDS and OSTs)
+\layout List
+\labelwidthstring 00.00.0000
+
+
+\series bold 
+osts
+\series default 
+ List all the OSTs for the filesystem
+\layout List
+\labelwidthstring 00.00.0000
+
+
+\series bold 
 help
 \series default 
  Provides brief help on the various arguments
@@ -167,7 +190,22 @@ given
 file
 \layout LyX-Code
 
-   $lfs find /mnt/lustre/file1
+   $lfs find /mnt/lustre/foo1
+\layout LyX-Code
+
+   OBDS:    
+\layout LyX-Code
+
+      0: OST_localhost_UUID 
+\layout LyX-Code
+
+   /mnt/lustre/foo1         
+\layout LyX-Code
+
+   obdidx           objid          objid            group              
+\layout LyX-Code
+
+   0                 1            0x1                0 
 \layout Description
 
 Listing\SpecialChar ~
@@ -218,8 +256,50 @@ OST
 \layout LyX-Code
 
    $lfs find -r --obd OST2_UUID /mnt/lustre/
+\layout Description
+
+Check\SpecialChar ~
+the\SpecialChar ~
+status\SpecialChar ~
+of\SpecialChar ~
+all\SpecialChar ~
+servers(mds,\SpecialChar ~
+osts)
+\layout LyX-Code
+
+   $lfs check servers 
+\layout LyX-Code
+
+   OSC_localhost.localdomain_OST_localhost_mds1 active.
+\layout LyX-Code
+
+   OSC_localhost.localdomain_OST_localhost_MNT_localhost active.
+\layout LyX-Code
+
+   MDC_localhost.localdomain_mds1_MNT_localhost active.
+\layout LyX-Code
+
+   $
+\layout Description
+
+List\SpecialChar ~
+all\SpecialChar ~
+the\SpecialChar ~
+OSTs
+\layout LyX-Code
+
+   $lfs osts 
+\layout LyX-Code
+
+   OBDS:    
+\layout LyX-Code
+
+   0: OST_localhost_UUID   
 \layout LyX-Code
 
+   $
 \layout Subsection
 
 BUGS
index 7fe649b..d95ae9c 100644 (file)
@@ -201,7 +201,6 @@ struct ptlrpc_bulk_desc;
 struct ptlrpc_request {
         int rq_type; /* one of PTL_RPC_MSG_* */
         struct list_head rq_list;
-        struct obd_device *rq_obd;
         int rq_status;
         spinlock_t rq_lock;
         unsigned int rq_intr:1, rq_replied:1, rq_want_ack:1, rq_err:1,
@@ -230,7 +229,7 @@ struct ptlrpc_request {
 
         int rq_import_generation;
         enum lustre_imp_state rq_send_state;
-        wait_queue_head_t rq_wait_for_rep; /* XXX also _for_ack */
+        wait_queue_head_t rq_reply_waitq; /* XXX also _for_ack */
 
         /* incoming reply */
         ptl_md_t rq_reply_md;
@@ -413,8 +412,8 @@ struct ptlrpc_service {
         struct list_head srv_threads;
         int (*srv_handler)(struct ptlrpc_request *req);
         char *srv_name;  /* only statically allocated strings here; we don't clean them */
-        struct proc_dir_entry *svc_procroot;
-        struct lprocfs_stats  *svc_stats;
+        struct proc_dir_entry *srv_procroot;
+        struct lprocfs_stats  *srv_stats;
 
         int                  srv_interface_rover;
         struct ptlrpc_srv_ni srv_interfaces[0];
index 003daad..90b9612 100644 (file)
@@ -28,8 +28,6 @@
 #include <linux/autoconf.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#else
-
 #endif
 #include <linux/kp30.h>
 #include <linux/lustre_compat25.h>
@@ -173,23 +171,23 @@ do {                                                                         \
 #define fixme() CDEBUG(D_OTHER, "FIXME\n");
 
 #ifdef __KERNEL__
-#include <linux/types.h>
-#include <linux/blkdev.h>
+# include <linux/types.h>
+# include <linux/blkdev.h>
 
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE]
-#define ll_bdevname(SB, STORAGE) __bdevname(kdev_t_to_nr(SB->s_dev), STORAGE)
-#define ll_lock_kernel lock_kernel()
-#define ll_sbdev(SB)    ((SB)->s_bdev)
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE]
+#  define ll_bdevname(SB, STORAGE) __bdevname(kdev_t_to_nr(SB->s_dev), STORAGE)
+#  define ll_lock_kernel lock_kernel()
+#  define ll_sbdev(SB)    ((SB)->s_bdev)
 void dev_set_rdonly(struct block_device *, int);
-#else
-#define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo
-#define ll_sbdev(SB)    (kdev_t_to_nr((SB)->s_dev))
-#define ll_bdevname(SB,STORAGE) ((void)__unused_##STORAGE,bdevname(ll_sbdev(SB)))
-#define ll_lock_kernel
+# else
+#  define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo
+#  define ll_sbdev(SB)    (kdev_t_to_nr((SB)->s_dev))
+#  define ll_bdevname(SB,STORAGE) ((void)__unused_##STORAGE,bdevname(ll_sbdev(SB)))
+#  define ll_lock_kernel
 void dev_set_rdonly(kdev_t, int);
-#endif
+# endif
 
 void dev_clear_rdonly(int);
 
@@ -205,23 +203,27 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb)
         }
 }
 #else /* !__KERNEL__ */
-#define LTIME_S(time) (time)
+# define LTIME_S(time) (time)
 /* for obd_class.h */
-#ifndef ERR_PTR
-# define ERR_PTR(a) ((void *)(a))
-#endif
+# ifndef ERR_PTR
+#  define ERR_PTR(a) ((void *)(a))
+# endif
 #endif  /* __KERNEL__ */
 
 #ifndef GFP_MEMALLOC
 #define GFP_MEMALLOC 0
 #endif
 
+extern atomic_t portal_kmemory;
+
 #define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
 do {                                                                          \
         (ptr) = kmalloc(size, (gfp_mask));                                    \
         if ((ptr) == NULL) {                                                  \
                 CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
                        (int)(size), __FILE__, __LINE__);                      \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
         } else {                                                              \
                 memset(ptr, 0, size);                                         \
                 atomic_add(size, &obd_memory);                                \
@@ -248,6 +250,8 @@ do {                                                                          \
         if ((ptr) == NULL) {                                                  \
                 CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
                        (int)(size), __FILE__, __LINE__);                      \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
         } else {                                                              \
                 memset(ptr, 0, size);                                         \
                 atomic_add(size, &obd_memory);                                \
@@ -312,6 +316,8 @@ do {                                                                          \
         if ((ptr) == NULL) {                                                  \
                 CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
                        (int)(size), __FILE__, __LINE__);                      \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
         } else {                                                              \
                 memset(ptr, 0, size);                                         \
                 atomic_add(size, &obd_memory);                                \
index 320311e..6293972 100644 (file)
 +++ 25/arch/parisc/lib/checksum.c      2003-10-05 00:33:23.000000000 -0700
 @@ -16,8 +16,10 @@
   *
-  * $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+  * $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
   */
 -#include <net/checksum.h>
 +#include <linux/module.h>
 --- linux-2.6.0-test6/drivers/char/ftape/compressor/zftape-compress.c  2003-06-14 12:18:32.000000000 -0700
 +++ 25/drivers/char/ftape/compressor/zftape-compress.c 2003-10-05 00:33:24.000000000 -0700
 @@ -31,6 +31,7 @@
-  char zftc_rev[] = "$Revision: 1.2 $";
-  char zftc_dat[] = "$Date: 2003/12/03 03:13:28 $";
+  char zftc_rev[] = "$Revision: 1.3 $";
+  char zftc_dat[] = "$Date: 2003/12/03 05:12:20 $";
  
 +#include <linux/version.h>
  #include <linux/errno.h>
 --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divamnt.c    2003-09-27 18:57:44.000000000 -0700
 +++ 25/drivers/isdn/hardware/eicon/divamnt.c   2003-10-05 00:33:24.000000000 -0700
 @@ -1,4 +1,4 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
   *
   * Driver for Eicon DIVA Server ISDN cards.
   * Maint module
 -#include "di_defs.h"
  #include "debug_if.h"
  
--static char *main_revision = "$Revision: 1.2 $";
-+static char *main_revision = "$Revision: 1.2 $";
+-static char *main_revision = "$Revision: 1.3 $";
++static char *main_revision = "$Revision: 1.3 $";
  
  static int major;
  
 --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divasmain.c  2003-09-27 18:57:44.000000000 -0700
 +++ 25/drivers/isdn/hardware/eicon/divasmain.c 2003-10-05 00:33:24.000000000 -0700
 @@ -1,4 +1,4 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
   *
   * Low level driver for Eicon DIVA Server ISDN cards.
   *
  #include "diva_dma.h"
  #include "diva_pci.h"
  
--static char *main_revision = "$Revision: 1.2 $";
-+static char *main_revision = "$Revision: 1.2 $";
+-static char *main_revision = "$Revision: 1.3 $";
++static char *main_revision = "$Revision: 1.3 $";
  
  static int major;
  
 --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/dqueue.c     2003-06-14 12:18:22.000000000 -0700
 +++ 25/drivers/isdn/hardware/eicon/dqueue.c    2003-10-05 00:33:24.000000000 -0700
 @@ -1,10 +1,10 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
   *
   * Driver for Eicon DIVA Server ISDN cards.
   * User Mode IDI Interface
 --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/mntfunc.c    2003-09-27 18:57:44.000000000 -0700
 +++ 25/drivers/isdn/hardware/eicon/mntfunc.c   2003-10-05 00:33:24.000000000 -0700
 @@ -1,4 +1,4 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
   *
   * Driver for Eicon DIVA Server ISDN cards.
   * Maint module
 --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/os_capi.h    2003-06-14 12:18:25.000000000 -0700
 +++ 25/drivers/isdn/hardware/eicon/os_capi.h   2003-10-05 00:33:24.000000000 -0700
 @@ -1,10 +1,10 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
   *
   * ISDN interface module for Eicon active cards DIVA.
   * CAPI Interface OS include files 
 --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/platform.h   2003-09-27 18:57:44.000000000 -0700
 +++ 25/drivers/isdn/hardware/eicon/platform.h  2003-10-05 00:33:24.000000000 -0700
 @@ -1,4 +1,4 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
   *
   * platform.h
   * 
 +++ 25/drivers/media/video/planb.c     2003-10-05 00:33:24.000000000 -0700
 @@ -27,7 +27,6 @@
  
- /* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ */
+ /* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ */
  
 -#include <linux/version.h>
  #include <linux/init.h>
 --- linux-2.6.0-test6/drivers/mtd/chips/map_rom.c      2003-06-14 12:18:24.000000000 -0700
 +++ 25/drivers/mtd/chips/map_rom.c     2003-10-05 00:33:24.000000000 -0700
 @@ -4,7 +4,6 @@
-  * $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+  * $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
   */
  
 -#include <linux/version.h>
  #include <linux/hdlc.h>
  
  /* Version */
--static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ for Linux\n";
-+static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ for Linux\n";
+-static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ for Linux\n";
++static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ for Linux\n";
  static int debug;
  static int quartz;
  
index 00bb337..5411d9c 100644 (file)
@@ -1,4 +1,4 @@
-$Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $
+$Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $
 
 Index: linux/fs/exec.c
 ===================================================================
@@ -764,7 +764,7 @@ Index: linux/kernel/bproc_hook.c
 + *  along with this program; if not, write to the Free Software
 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 + *
-+ * $Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $
++ * $Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $
 + *-----------------------------------------------------------------------*/
 +#include <linux/kernel.h>
 +#include <linux/sched.h>
@@ -832,7 +832,7 @@ Index: linux/include/linux/bproc.h
 + *  along with this program; if not, write to the Free Software
 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 + *
-+ * $Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $
++ * $Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $
 + *-----------------------------------------------------------------------*/
 +#ifndef _LINUX_BPROC_H
 +#define _LINUX_BPROC_H
diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.19-pre1.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.19-pre1.patch
new file mode 100644 (file)
index 0000000..3f79b5b
--- /dev/null
@@ -0,0 +1,302 @@
+Index: linux-2.4.19-pre1/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/entry.S    2003-11-21 03:38:55.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/entry.S 2003-12-01 18:14:32.000000000 +0300
+@@ -45,6 +45,7 @@
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+ EBX           = 0x00
+ ECX           = 0x04
+@@ -128,10 +129,6 @@
+       .long 3b,6b;    \
+ .previous
+-#define GET_CURRENT(reg) \
+-      movl $-8192, reg; \
+-      andl %esp, reg
+-
+ ENTRY(lcall7)
+       pushfl                  # We get a different stack layout with call gates,
+       pushl %eax              # which has to be cleaned up later..
+@@ -144,7 +141,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x7
+@@ -165,7 +162,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x27
+Index: linux-2.4.19-pre1/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/smpboot.c  2001-12-21 20:41:53.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/smpboot.c       2003-12-01 18:14:32.000000000 +0300
+@@ -819,7 +819,7 @@
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+-      stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++      stack_start.esp = (void *)idle->thread.esp;
+       /*
+        * This grunge runs the startup process for
+@@ -892,7 +892,7 @@
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+-                      if (*((volatile unsigned char *)phys_to_virt(8192))
++                      if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+@@ -915,7 +915,7 @@
+       }
+       /* mark "stuck" area as not stuck */
+-      *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++      *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+       if(clustered_apic_mode) {
+               printk("Restoring NMI vector\n");
+Index: linux-2.4.19-pre1/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/traps.c    2003-12-01 18:11:31.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/traps.c 2003-12-01 18:14:32.000000000 +0300
+@@ -158,7 +158,7 @@
+       unsigned long esp = tsk->thread.esp;
+       /* User space on another CPU? */
+-      if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++      if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+               return;
+       show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.19-pre1/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/head.S     2003-11-20 19:01:35.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/head.S  2003-12-01 18:14:32.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+ #define OLD_CL_MAGIC_ADDR     0x90020
+ #define OLD_CL_MAGIC          0xA33F
+@@ -320,7 +321,7 @@
+       ret
+ ENTRY(stack_start)
+-      .long SYMBOL_NAME(init_task_union)+8192
++      .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+       .long __KERNEL_DS
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.19-pre1/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/lib/getuser.S     1998-01-13 00:42:52.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/lib/getuser.S  2003-12-01 18:14:32.000000000 +0300
+@@ -21,6 +21,10 @@
+  * as they get called from within inline assembly.
+  */
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+       movl %esp,%edx
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 1:    movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+       addl $1,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 2:    movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+       addl $3,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 3:    movl -3(%eax),%edx
+Index: linux-2.4.19-pre1/arch/i386/config.in
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/config.in 2003-11-20 19:01:35.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/config.in      2003-12-01 18:14:32.000000000 +0300
+@@ -201,6 +201,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+    define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++     "off    CONFIG_NOBIGSTACK \
++      16KB   CONFIG_STACK_SIZE_16KB \
++      32KB   CONFIG_STACK_SIZE_32KB \
++      64KB   CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++   define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++  if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++     define_int CONFIG_STACK_SIZE_SHIFT 2
++  else
++    if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++      define_int CONFIG_STACK_SIZE_SHIFT 3
++    else
++      if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++        define_int CONFIG_STACK_SIZE_SHIFT 4
++      fi
++    fi
++  fi
++fi
++ 
+ endmenu
+ mainmenu_option next_comment
+Index: linux-2.4.19-pre1/arch/i386/vmlinux.lds
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/vmlinux.lds       2003-11-20 19:01:35.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/vmlinux.lds    2003-12-01 18:14:32.000000000 +0300
+@@ -35,7 +35,8 @@
+   _edata = .;                 /* End of data section */
+-  . = ALIGN(8192);            /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++  . = ALIGN(65536);           /* init_task */
+   .data.init_task : { *(.data.init_task) }
+   . = ALIGN(4096);            /* Init code and data */
+Index: linux-2.4.19-pre1/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-i386/current.h  1998-08-15 03:35:22.000000000 +0400
++++ linux-2.4.19-pre1/include/asm-i386/current.h       2003-12-01 18:14:32.000000000 +0300
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE      4096    /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++        movl $-THREAD_SIZE, reg; \
++        andl %esp, reg
++
++#else  /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define alloc_task_struct() \
++  ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define free_task_struct(p) \
++  free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+ struct task_struct;
+ static inline struct task_struct * get_current(void)
+ {
+       struct task_struct *current;
+-      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+       return current;
+  }
+  
+ #define current get_current()
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.19-pre1/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-i386/hw_irq.h   2003-11-21 02:59:05.000000000 +0300
++++ linux-2.4.19-pre1/include/asm-i386/hw_irq.h        2003-12-01 18:14:32.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/config.h>
+ #include <asm/atomic.h>
+ #include <asm/irq.h>
++#include <asm/current.h>
+ /*
+  * IDT vectors usable for external interrupt sources start
+@@ -113,10 +114,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+-#define GET_CURRENT \
+-      "movl %esp, %ebx\n\t" \
+-      "andl $-8192, %ebx\n\t"
+-
+ /*
+  *    SMP has a few special interrupts for IPI messages
+  */
+Index: linux-2.4.19-pre1/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-i386/processor.h        2003-11-21 02:59:05.000000000 +0300
++++ linux-2.4.19-pre1/include/asm-i386/processor.h     2003-12-01 18:14:32.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -447,9 +448,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+ #define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+ #define init_task     (init_task_union.task)
+Index: linux-2.4.19-pre1/include/linux/sched.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/linux/sched.h       2003-12-01 18:11:28.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/sched.h    2003-12-01 18:14:32.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+ #include <asm/param.h>        /* for HZ */
++#include <asm/current.h>      /* maybe for INIT_TASK_SIZE */
+ extern unsigned long event;
diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20-rh.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20-rh.patch
new file mode 100644 (file)
index 0000000..f70b0d4
--- /dev/null
@@ -0,0 +1,311 @@
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/entry.S 2003-09-13 19:34:35.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/entry.S      2003-12-01 18:02:14.000000000 +0300
+@@ -45,6 +45,7 @@
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+ EBX           = 0x00
+ ECX           = 0x04
+@@ -130,10 +131,6 @@
+       .long 3b,6b;    \
+ .previous
+-#define GET_CURRENT(reg) \
+-      movl $-8192, reg; \
+-      andl %esp, reg
+-
+ ENTRY(lcall7)
+       pushfl                  # We get a different stack layout with call gates,
+       pushl %eax              # which has to be cleaned up later..
+@@ -149,7 +146,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x7
+@@ -173,7 +170,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x27
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/smpboot.c       2003-09-13 19:34:35.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/smpboot.c    2003-12-01 18:02:14.000000000 +0300
+@@ -811,7 +811,7 @@
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+-      stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++      stack_start.esp = (void *)idle->thread.esp;
+       /*
+        * This grunge runs the startup process for
+@@ -884,7 +884,7 @@
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+-                      if (*((volatile unsigned char *)phys_to_virt(8192))
++                      if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+@@ -907,7 +907,7 @@
+       }
+       /* mark "stuck" area as not stuck */
+-      *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++      *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+       if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+               printk("Restoring NMI vector\n");
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/traps.c 2003-09-13 19:34:35.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/traps.c      2003-12-01 18:02:14.000000000 +0300
+@@ -161,7 +161,7 @@
+       unsigned long esp = tsk->thread.esp;
+       /* User space on another CPU? */
+-      if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++      if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+               return;
+       show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/head.S  2003-09-13 19:34:35.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/head.S       2003-12-01 18:02:14.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+ #define OLD_CL_MAGIC_ADDR     0x90020
+ #define OLD_CL_MAGIC          0xA33F
+@@ -315,7 +316,7 @@
+       ret
+ ENTRY(stack_start)
+-      .long SYMBOL_NAME(init_task_union)+8192
++      .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+       .long __KERNEL_DS
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/irq.c
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/irq.c   2003-12-01 17:42:59.000000000 +0300
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/irq.c        2003-12-01 18:02:14.000000000 +0300
+@@ -581,7 +581,10 @@
+       long esp;
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+-      __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++      __asm__ __volatile__(
++              "andl %%esp,%0" 
++              : "=r" (esp) : "0" (THREAD_SIZE-1));
++
+       if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+               extern void show_stack(unsigned long *);
+Index: linux-2.4.20-rh-20.9/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/lib/getuser.S  1998-01-13 00:42:52.000000000 +0300
++++ linux-2.4.20-rh-20.9/arch/i386/lib/getuser.S       2003-12-01 18:02:14.000000000 +0300
+@@ -21,6 +21,10 @@
+  * as they get called from within inline assembly.
+  */
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+       movl %esp,%edx
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 1:    movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+       addl $1,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 2:    movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+       addl $3,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 3:    movl -3(%eax),%edx
+Index: linux-2.4.20-rh-20.9/arch/i386/config.in
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/config.in      2003-09-13 19:34:34.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/config.in   2003-12-01 18:02:14.000000000 +0300
+@@ -266,6 +266,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+    define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++     "off    CONFIG_NOBIGSTACK \
++      16KB   CONFIG_STACK_SIZE_16KB \
++      32KB   CONFIG_STACK_SIZE_32KB \
++      64KB   CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++   define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++  if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++     define_int CONFIG_STACK_SIZE_SHIFT 2
++  else
++    if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++      define_int CONFIG_STACK_SIZE_SHIFT 3
++    else
++      if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++        define_int CONFIG_STACK_SIZE_SHIFT 4
++      fi
++    fi
++  fi
++fi
++ 
+ endmenu
+ mainmenu_option next_comment
+Index: linux-2.4.20-rh-20.9/arch/i386/vmlinux.lds
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/vmlinux.lds    2003-09-13 19:34:24.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/vmlinux.lds 2003-12-01 18:02:14.000000000 +0300
+@@ -38,7 +38,8 @@
+   _edata = .;                 /* End of data section */
+-  . = ALIGN(8192);            /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++  . = ALIGN(65536);           /* init_task */
+   .data.init_task : { *(.data.init_task) }
+   . = ALIGN(4096);            /* Init code and data */
+Index: linux-2.4.20-rh-20.9/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/include/asm-i386/current.h       1998-08-15 03:35:22.000000000 +0400
++++ linux-2.4.20-rh-20.9/include/asm-i386/current.h    2003-12-01 18:03:28.000000000 +0300
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE      4096    /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++        movl $-THREAD_SIZE, reg; \
++        andl %esp, reg
++
++#else  /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define __alloc_task_struct() \
++  ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define __free_task_struct(p) \
++  free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+ struct task_struct;
+ static inline struct task_struct * get_current(void)
+ {
+       struct task_struct *current;
+-      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+       return current;
+  }
+  
+ #define current get_current()
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.20-rh-20.9/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/include/asm-i386/hw_irq.h        2003-11-13 17:35:48.000000000 +0300
++++ linux-2.4.20-rh-20.9/include/asm-i386/hw_irq.h     2003-12-01 18:02:14.000000000 +0300
+@@ -116,10 +116,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+-#define GET_CURRENT \
+-      "movl %esp, %ebx\n\t" \
+-      "andl $-8192, %ebx\n\t"
+-
+ /*
+  *    SMP has a few special interrupts for IPI messages
+  */
+Index: linux-2.4.20-rh-20.9/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/include/asm-i386/processor.h     2003-10-08 12:29:57.000000000 +0400
++++ linux-2.4.20-rh-20.9/include/asm-i386/processor.h  2003-12-01 18:02:14.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -469,10 +470,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define __free_task_struct(p) do { BUG_ON((p)->state < TASK_ZOMBIE); free_pages((unsigned long) (p), 1); } while (0)
+-
+ #define init_task     (init_task_union.task)
+ #define init_stack    (init_task_union.stack)
+Index: linux-2.4.20-rh-20.9/include/linux/sched.h
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/include/linux/sched.h    2003-11-13 17:35:48.000000000 +0300
++++ linux-2.4.20-rh-20.9/include/linux/sched.h 2003-12-01 18:02:14.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+ #include <asm/param.h>        /* for HZ */
++#include <asm/current.h>      /* maybe for INIT_TASK_SIZE */
+ extern unsigned long event;
diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.20.patch
new file mode 100644 (file)
index 0000000..4fc4938
--- /dev/null
@@ -0,0 +1,318 @@
+Index: linux-2.4.20/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/entry.S 2003-05-16 05:28:59.000000000 +0400
++++ linux-2.4.20/arch/i386/kernel/entry.S      2003-12-01 16:54:50.000000000 +0300
+@@ -45,6 +45,7 @@
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+ EBX           = 0x00
+ ECX           = 0x04
+@@ -130,10 +131,6 @@
+       .long 3b,6b;    \
+ .previous
+-#define GET_CURRENT(reg) \
+-      movl $-8192, reg; \
+-      andl %esp, reg
+-
+ ENTRY(lcall7)
+       pushfl                  # We get a different stack layout with call gates,
+       pushl %eax              # which has to be cleaned up later..
+@@ -149,7 +146,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x7
+@@ -173,7 +170,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x27
+Index: linux-2.4.20/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/smpboot.c       2003-05-16 05:28:59.000000000 +0400
++++ linux-2.4.20/arch/i386/kernel/smpboot.c    2003-12-01 16:54:50.000000000 +0300
+@@ -819,7 +819,7 @@
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+-      stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++      stack_start.esp = (void *)idle->thread.esp;
+       /*
+        * This grunge runs the startup process for
+@@ -892,7 +892,7 @@
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+-                      if (*((volatile unsigned char *)phys_to_virt(8192))
++                      if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+@@ -915,7 +915,7 @@
+       }
+       /* mark "stuck" area as not stuck */
+-      *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++      *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+       if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+               printk("Restoring NMI vector\n");
+Index: linux-2.4.20/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/traps.c 2003-12-01 16:53:23.000000000 +0300
++++ linux-2.4.20/arch/i386/kernel/traps.c      2003-12-01 16:54:50.000000000 +0300
+@@ -158,7 +158,7 @@
+       unsigned long esp = tsk->thread.esp;
+       /* User space on another CPU? */
+-      if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++      if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+               return;
+       show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.20/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/head.S  2003-05-16 05:28:28.000000000 +0400
++++ linux-2.4.20/arch/i386/kernel/head.S       2003-12-01 16:54:50.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+ #define OLD_CL_MAGIC_ADDR     0x90020
+ #define OLD_CL_MAGIC          0xA33F
+@@ -320,7 +321,7 @@
+       ret
+ ENTRY(stack_start)
+-      .long SYMBOL_NAME(init_task_union)+8192
++      .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+       .long __KERNEL_DS
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.20/arch/i386/kernel/irq.c
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/irq.c   2003-05-16 05:28:59.000000000 +0400
++++ linux-2.4.20/arch/i386/kernel/irq.c        2003-12-01 16:57:05.000000000 +0300
+@@ -581,7 +581,10 @@
+       long esp;
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+-      __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++      __asm__ __volatile__(
++              "andl %%esp,%0" 
++              : "=r" (esp) : "0" (THREAD_SIZE-1));
++
+       if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+               extern void show_stack(unsigned long *);
+Index: linux-2.4.20/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.20.orig/arch/i386/lib/getuser.S  1998-01-13 00:42:52.000000000 +0300
++++ linux-2.4.20/arch/i386/lib/getuser.S       2003-12-01 16:54:50.000000000 +0300
+@@ -21,6 +21,10 @@
+  * as they get called from within inline assembly.
+  */
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+       movl %esp,%edx
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 1:    movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+       addl $1,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 2:    movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+       addl $3,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 3:    movl -3(%eax),%edx
+Index: linux-2.4.20/arch/i386/config.in
+===================================================================
+--- linux-2.4.20.orig/arch/i386/config.in      2003-05-16 05:28:59.000000000 +0400
++++ linux-2.4.20/arch/i386/config.in   2003-12-01 17:01:56.000000000 +0300
+@@ -227,6 +227,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+    define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++     "off    CONFIG_NOBIGSTACK \
++      16KB   CONFIG_STACK_SIZE_16KB \
++      32KB   CONFIG_STACK_SIZE_32KB \
++      64KB   CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++   define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++  if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++     define_int CONFIG_STACK_SIZE_SHIFT 2
++  else
++    if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++      define_int CONFIG_STACK_SIZE_SHIFT 3
++    else
++      if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++        define_int CONFIG_STACK_SIZE_SHIFT 4
++      fi
++    fi
++  fi
++fi
++ 
+ endmenu
+ mainmenu_option next_comment
+Index: linux-2.4.20/arch/i386/vmlinux.lds
+===================================================================
+--- linux-2.4.20.orig/arch/i386/vmlinux.lds    2003-05-16 05:28:09.000000000 +0400
++++ linux-2.4.20/arch/i386/vmlinux.lds 2003-12-01 16:54:50.000000000 +0300
+@@ -35,7 +35,8 @@
+   _edata = .;                 /* End of data section */
+-  . = ALIGN(8192);            /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++  . = ALIGN(65536);           /* init_task */
+   .data.init_task : { *(.data.init_task) }
+   . = ALIGN(4096);            /* Init code and data */
+Index: linux-2.4.20/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.20.orig/include/asm-i386/current.h       1998-08-15 03:35:22.000000000 +0400
++++ linux-2.4.20/include/asm-i386/current.h    2003-12-01 16:54:50.000000000 +0300
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE      4096    /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++        movl $-THREAD_SIZE, reg; \
++        andl %esp, reg
++
++#else  /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define alloc_task_struct() \
++  ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define free_task_struct(p) \
++  free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+ struct task_struct;
+ static inline struct task_struct * get_current(void)
+ {
+       struct task_struct *current;
+-      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+       return current;
+  }
+  
+ #define current get_current()
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.20/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.20.orig/include/asm-i386/hw_irq.h        2003-11-13 17:17:28.000000000 +0300
++++ linux-2.4.20/include/asm-i386/hw_irq.h     2003-12-01 16:54:50.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/config.h>
+ #include <asm/atomic.h>
+ #include <asm/irq.h>
++#include <asm/current.h>
+ /*
+  * IDT vectors usable for external interrupt sources start
+@@ -113,10 +114,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+-#define GET_CURRENT \
+-      "movl %esp, %ebx\n\t" \
+-      "andl $-8192, %ebx\n\t"
+-
+ /*
+  *    SMP has a few special interrupts for IPI messages
+  */
+Index: linux-2.4.20/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.20.orig/include/asm-i386/processor.h     2003-11-21 17:39:47.000000000 +0300
++++ linux-2.4.20/include/asm-i386/processor.h  2003-12-01 16:54:50.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -451,9 +452,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+ #define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+ #define init_task     (init_task_union.task)
+Index: linux-2.4.20/include/linux/sched.h
+===================================================================
+--- linux-2.4.20.orig/include/linux/sched.h    2003-11-21 17:39:47.000000000 +0300
++++ linux-2.4.20/include/linux/sched.h 2003-12-01 16:54:50.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+ #include <asm/param.h>        /* for HZ */
++#include <asm/current.h>      /* maybe for INIT_TASK_SIZE */
+ extern unsigned long event;
diff --git a/lustre/kernel_patches/patches/configurable-x86-stack-2.4.22-rh.patch b/lustre/kernel_patches/patches/configurable-x86-stack-2.4.22-rh.patch
new file mode 100644 (file)
index 0000000..856425a
--- /dev/null
@@ -0,0 +1,311 @@
+Index: linux-2.4.22-ac1/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/entry.S     2003-09-25 14:16:34.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/entry.S  2003-12-01 18:34:08.000000000 +0300
+@@ -46,6 +46,7 @@
+ #include <asm/segment.h>
+ #include <asm/smp.h>
+ #include <asm/unistd.h>
++#include <asm/current.h>
+ EBX           = 0x00
+ ECX           = 0x04
+@@ -131,10 +132,6 @@
+       .long 3b,6b;    \
+ .previous
+-#define GET_CURRENT(reg) \
+-      movl $-8192, reg; \
+-      andl %esp, reg
+-
+ ENTRY(lcall7)
+       pushfl                  # We get a different stack layout with call gates,
+       pushl %eax              # which has to be cleaned up later..
+@@ -150,7 +147,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x7
+@@ -174,7 +171,7 @@
+       movl %ecx,CS(%esp)      #
+       movl %esp,%ebx
+       pushl %ebx
+-      andl $-8192,%ebx        # GET_CURRENT
++      andl $-THREAD_SIZE,%ebx # GET_CURRENT
+       movl exec_domain(%ebx),%edx     # Get the execution domain
+       movl 4(%edx),%edx       # Get the lcall7 handler for the domain
+       pushl $0x27
+Index: linux-2.4.22-ac1/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/smpboot.c   2003-09-25 14:16:28.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/smpboot.c        2003-12-01 18:34:08.000000000 +0300
+@@ -814,7 +814,7 @@
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+-      stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++      stack_start.esp = (void *)idle->thread.esp;
+       /*
+        * This grunge runs the startup process for
+@@ -887,7 +887,7 @@
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+-                      if (*((volatile unsigned char *)phys_to_virt(8192))
++                      if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+@@ -910,7 +910,7 @@
+       }
+       /* mark "stuck" area as not stuck */
+-      *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++      *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+       if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+               printk("Restoring NMI vector\n");
+Index: linux-2.4.22-ac1/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/traps.c     2003-09-25 14:16:29.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/traps.c  2003-12-01 18:34:08.000000000 +0300
+@@ -161,7 +161,7 @@
+       unsigned long esp = tsk->thread.esp;
+       /* User space on another CPU? */
+-      if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++      if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+               return;
+       show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.22-ac1/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/head.S      2003-09-25 14:16:27.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/head.S   2003-12-01 18:34:08.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+ #define OLD_CL_MAGIC_ADDR     0x90020
+ #define OLD_CL_MAGIC          0xA33F
+@@ -315,7 +316,7 @@
+       ret
+ ENTRY(stack_start)
+-      .long SYMBOL_NAME(init_task_union)+8192
++      .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+       .long __KERNEL_DS
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.22-ac1/arch/i386/kernel/irq.c
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/irq.c       2003-09-25 14:16:18.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/irq.c    2003-12-01 18:34:08.000000000 +0300
+@@ -581,7 +581,10 @@
+       long esp;
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+-      __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++      __asm__ __volatile__(
++              "andl %%esp,%0" 
++              : "=r" (esp) : "0" (THREAD_SIZE-1));
++
+       if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+               extern void show_stack(unsigned long *);
+Index: linux-2.4.22-ac1/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/lib/getuser.S      1998-01-13 00:42:52.000000000 +0300
++++ linux-2.4.22-ac1/arch/i386/lib/getuser.S   2003-12-01 18:34:08.000000000 +0300
+@@ -21,6 +21,10 @@
+  * as they get called from within inline assembly.
+  */
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+       movl %esp,%edx
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 1:    movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+       addl $1,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 2:    movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+       addl $3,%eax
+       movl %esp,%edx
+       jc bad_get_user
+-      andl $0xffffe000,%edx
++      andl $~(THREAD_SIZE - 1),%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+ 3:    movl -3(%eax),%edx
+Index: linux-2.4.22-ac1/arch/i386/config.in
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/config.in  2003-09-25 14:16:34.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/config.in       2003-12-01 18:34:08.000000000 +0300
+@@ -304,6 +304,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+    define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++     "off    CONFIG_NOBIGSTACK \
++      16KB   CONFIG_STACK_SIZE_16KB \
++      32KB   CONFIG_STACK_SIZE_32KB \
++      64KB   CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++   define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++  if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++     define_int CONFIG_STACK_SIZE_SHIFT 2
++  else
++    if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++      define_int CONFIG_STACK_SIZE_SHIFT 3
++    else
++      if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++        define_int CONFIG_STACK_SIZE_SHIFT 4
++      fi
++    fi
++  fi
++fi
++ 
+ endmenu
+ mainmenu_option next_comment
+Index: linux-2.4.22-ac1/arch/i386/vmlinux.lds
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/vmlinux.lds        2003-09-25 14:16:28.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/vmlinux.lds     2003-12-01 18:34:08.000000000 +0300
+@@ -38,7 +38,8 @@
+   _edata = .;                 /* End of data section */
+-  . = ALIGN(8192);            /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++  . = ALIGN(65536);           /* init_task */
+   .data.init_task : { *(.data.init_task) }
+   . = ALIGN(4096);            /* Init code and data */
+Index: linux-2.4.22-ac1/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.22-ac1.orig/include/asm-i386/current.h   1998-08-15 03:35:22.000000000 +0400
++++ linux-2.4.22-ac1/include/asm-i386/current.h        2003-12-01 18:34:16.000000000 +0300
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE      4096    /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++        movl $-THREAD_SIZE, reg; \
++        andl %esp, reg
++
++#else  /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define __alloc_task_struct() \
++  ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define __free_task_struct(p) \
++  free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+ struct task_struct;
+ static inline struct task_struct * get_current(void)
+ {
+       struct task_struct *current;
+-      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++      __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+       return current;
+  }
+  
+ #define current get_current()
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.22-ac1/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.22-ac1.orig/include/asm-i386/hw_irq.h    2003-09-26 00:54:45.000000000 +0400
++++ linux-2.4.22-ac1/include/asm-i386/hw_irq.h 2003-12-01 18:34:08.000000000 +0300
+@@ -114,10 +114,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+-#define GET_CURRENT \
+-      "movl %esp, %ebx\n\t" \
+-      "andl $-8192, %ebx\n\t"
+-
+ /*
+  *    SMP has a few special interrupts for IPI messages
+  */
+Index: linux-2.4.22-ac1/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.22-ac1.orig/include/asm-i386/processor.h 2003-09-26 00:54:44.000000000 +0400
++++ linux-2.4.22-ac1/include/asm-i386/processor.h      2003-12-01 18:34:08.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -465,10 +466,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define __free_task_struct(p) do { BUG_ON((p)->state < TASK_ZOMBIE); free_pages((unsigned long) (p), 1); } while (0)
+-
+ #define init_task     (init_task_union.task)
+ #define init_stack    (init_task_union.stack)
+Index: linux-2.4.22-ac1/include/linux/sched.h
+===================================================================
+--- linux-2.4.22-ac1.orig/include/linux/sched.h        2003-11-13 18:21:42.000000000 +0300
++++ linux-2.4.22-ac1/include/linux/sched.h     2003-12-01 18:34:08.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+ #include <asm/param.h>        /* for HZ */
++#include <asm/current.h>      /* maybe for INIT_TASK_SIZE */
+ extern unsigned long event;
index a8489e6..97cd9dc 100644 (file)
@@ -1,7 +1,7 @@
 Index: linux-2.4.18-chaos/include/linux/mm.h
 ===================================================================
---- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-13 17:06:48.000000000 +0300
-+++ linux-2.4.18-chaos/include/linux/mm.h      2003-11-17 15:46:32.000000000 +0300
+--- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-23 00:07:20.000000000 +0300
++++ linux-2.4.18-chaos/include/linux/mm.h      2003-11-23 00:07:23.000000000 +0300
 @@ -677,6 +677,7 @@
  #define __GFP_IO      0x40    /* Can start low memory physical IO? */
  #define __GFP_HIGHIO  0x80    /* Can start high mem physical IO? */
@@ -20,8 +20,8 @@ Index: linux-2.4.18-chaos/include/linux/mm.h
     platforms, used as appropriate on others */
 Index: linux-2.4.18-chaos/mm/page_alloc.c
 ===================================================================
---- linux-2.4.18-chaos.orig/mm/page_alloc.c    2003-11-13 17:06:47.000000000 +0300
-+++ linux-2.4.18-chaos/mm/page_alloc.c 2003-11-17 15:49:11.000000000 +0300
+--- linux-2.4.18-chaos.orig/mm/page_alloc.c    2003-11-23 00:07:20.000000000 +0300
++++ linux-2.4.18-chaos/mm/page_alloc.c 2003-12-02 23:12:31.000000000 +0300
 @@ -554,7 +554,7 @@
        /*
         * Oh well, we didn't succeed.
@@ -31,10 +31,21 @@ Index: linux-2.4.18-chaos/mm/page_alloc.c
                /*
                 * Are we dealing with a higher order allocation?
                 *
+@@ -628,7 +628,9 @@
+               /* XXX: is pages_min/4 a good amount to reserve for this? */
+               min += z->pages_min / 4;
+-              if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) {
++              if (z->free_pages > min ||
++                      (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC))
++                       && !in_interrupt())) {
+                       page = rmqueue(z, order);
+                       if (page)
+                               return page;
 Index: linux-2.4.18-chaos/include/linux/slab.h
 ===================================================================
 --- linux-2.4.18-chaos.orig/include/linux/slab.h       2003-07-28 17:52:18.000000000 +0400
-+++ linux-2.4.18-chaos/include/linux/slab.h    2003-11-17 15:46:32.000000000 +0300
++++ linux-2.4.18-chaos/include/linux/slab.h    2003-11-23 00:07:23.000000000 +0300
 @@ -23,6 +23,7 @@
  #define       SLAB_KERNEL             GFP_KERNEL
  #define       SLAB_NFS                GFP_NFS
@@ -46,7 +57,7 @@ Index: linux-2.4.18-chaos/include/linux/slab.h
 Index: linux-2.4.18-chaos/mm/slab.c
 ===================================================================
 --- linux-2.4.18-chaos.orig/mm/slab.c  2003-07-28 17:52:20.000000000 +0400
-+++ linux-2.4.18-chaos/mm/slab.c       2003-11-17 15:46:32.000000000 +0300
++++ linux-2.4.18-chaos/mm/slab.c       2003-11-23 00:07:23.000000000 +0300
 @@ -1116,7 +1116,7 @@
        /* Be lazy and only check for valid flags here,
         * keeping it out of the critical path in kmem_cache_alloc().
index 3f37e44..79caa76 100644 (file)
@@ -32,6 +32,17 @@ Index: linux-2.4.20-rh-20.9/mm/page_alloc.c
                /*
                 * Are we dealing with a higher order allocation?
                 *
+@@ -583,7 +583,9 @@
+               /* XXX: is pages_min/4 a good amount to reserve for this? */
+               min += z->pages_min / 4;
+-              if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) {
++              if (z->free_pages > min ||
++                  (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC))
++                   && !in_interrupt())) {
+                       page = rmqueue(z, order);
+                       if (page)
+                               return page;
 Index: linux-2.4.20-rh-20.9/include/linux/slab.h
 ===================================================================
 --- linux-2.4.20-rh-20.9.orig/include/linux/slab.h     2003-11-13 17:35:48.000000000 +0300
index 015bfc8..92e79c8 100644 (file)
@@ -1,8 +1,8 @@
-Index: linux-2.4.22-vanilla/include/linux/mm.h
+Index: linux-2.4.20/include/linux/mm.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/mm.h       2003-11-17 15:26:32.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/mm.h    2003-11-17 15:40:32.000000000 +0300
-@@ -612,6 +612,7 @@
+--- linux-2.4.20.orig/include/linux/mm.h       2003-12-01 17:07:14.000000000 +0300
++++ linux-2.4.20/include/linux/mm.h    2003-12-02 23:17:06.000000000 +0300
+@@ -614,6 +614,7 @@
  #define __GFP_IO      0x40    /* Can start low memory physical IO? */
  #define __GFP_HIGHIO  0x80    /* Can start high mem physical IO? */
  #define __GFP_FS      0x100   /* Can call down to low-level FS? */
@@ -10,7 +10,7 @@ Index: linux-2.4.22-vanilla/include/linux/mm.h
  
  #define GFP_NOHIGHIO  (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
  #define GFP_NOIO      (__GFP_HIGH | __GFP_WAIT)
-@@ -622,6 +623,7 @@
+@@ -624,6 +625,7 @@
  #define GFP_KERNEL    (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
  #define GFP_NFS               (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
  #define GFP_KSWAPD    (             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
@@ -18,24 +18,24 @@ Index: linux-2.4.22-vanilla/include/linux/mm.h
  
  /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
     platforms, used as appropriate on others */
-Index: linux-2.4.22-vanilla/mm/page_alloc.c
+Index: linux-2.4.20/mm/page_alloc.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/mm/page_alloc.c  2003-11-13 18:19:51.000000000 +0300
-+++ linux-2.4.22-vanilla/mm/page_alloc.c       2003-11-17 15:40:32.000000000 +0300
+--- linux-2.4.20.orig/mm/page_alloc.c  2003-12-01 17:02:43.000000000 +0300
++++ linux-2.4.20/mm/page_alloc.c       2003-12-02 23:21:56.000000000 +0300
 @@ -377,7 +377,8 @@
        /* here we're in the low on memory slow path */
  
  rebalance:
 -      if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
 +      if (current->flags & (PF_MEMALLOC | PF_MEMDIE) || 
-+                      gfp_mask & __GFP_MEMALLOC) {
++                      (gfp_mask & __GFP_MEMALLOC)) {
                zone = zonelist->zones;
                for (;;) {
                        zone_t *z = *(zone++);
-Index: linux-2.4.22-vanilla/include/linux/slab.h
+Index: linux-2.4.20/include/linux/slab.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/slab.h     2003-11-17 14:58:37.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/slab.h  2003-11-17 15:42:13.000000000 +0300
+--- linux-2.4.20.orig/include/linux/slab.h     2003-12-01 17:07:14.000000000 +0300
++++ linux-2.4.20/include/linux/slab.h  2003-12-02 23:17:06.000000000 +0300
 @@ -23,6 +23,7 @@
  #define       SLAB_KERNEL             GFP_KERNEL
  #define       SLAB_NFS                GFP_NFS
@@ -44,11 +44,11 @@ Index: linux-2.4.22-vanilla/include/linux/slab.h
  
  #define SLAB_LEVEL_MASK               (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS)
  #define       SLAB_NO_GROW            0x00001000UL    /* don't grow a cache */
-Index: linux-2.4.22-vanilla/mm/slab.c
+Index: linux-2.4.20/mm/slab.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/mm/slab.c        2003-11-13 17:39:29.000000000 +0300
-+++ linux-2.4.22-vanilla/mm/slab.c     2003-11-17 15:42:13.000000000 +0300
-@@ -1115,7 +1115,7 @@
+--- linux-2.4.20.orig/mm/slab.c        2003-12-01 17:02:34.000000000 +0300
++++ linux-2.4.20/mm/slab.c     2003-12-02 23:17:06.000000000 +0300
+@@ -1113,7 +1113,7 @@
        /* Be lazy and only check for valid flags here,
         * keeping it out of the critical path in kmem_cache_alloc().
         */
index c5abbf3..937aa40 100644 (file)
@@ -67,7 +67,7 @@
 Index: linux-2.4.22-vanilla/Documentation/Configure.help
 ===================================================================
 --- linux-2.4.22-vanilla.orig/Documentation/Configure.help     2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/Documentation/Configure.help  2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/Documentation/Configure.help  2003-12-02 23:55:38.000000000 +0300
 @@ -15613,6 +15613,39 @@
    be compiled as a module, and so this could be dangerous.  Most
    everyone wants to say Y here.
@@ -151,7 +151,7 @@ Index: linux-2.4.22-vanilla/Documentation/Configure.help
 Index: linux-2.4.22-vanilla/arch/alpha/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/alpha/defconfig     2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/alpha/defconfig  2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/alpha/defconfig  2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -169,7 +169,7 @@ Index: linux-2.4.22-vanilla/arch/alpha/defconfig
 Index: linux-2.4.22-vanilla/arch/alpha/kernel/entry.S
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/alpha/kernel/entry.S        2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/alpha/kernel/entry.S     2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/alpha/kernel/entry.S     2003-12-02 23:55:38.000000000 +0300
 @@ -1154,6 +1154,18 @@
        .quad sys_readahead
        .quad sys_ni_syscall                    /* 380, sys_security */
@@ -192,7 +192,7 @@ Index: linux-2.4.22-vanilla/arch/alpha/kernel/entry.S
 Index: linux-2.4.22-vanilla/arch/arm/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/arm/defconfig       2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/arm/defconfig    2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/arm/defconfig    2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -210,7 +210,7 @@ Index: linux-2.4.22-vanilla/arch/arm/defconfig
 Index: linux-2.4.22-vanilla/arch/arm/kernel/calls.S
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/arm/kernel/calls.S  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/arm/kernel/calls.S       2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/arm/kernel/calls.S       2003-12-02 23:55:38.000000000 +0300
 @@ -240,18 +240,18 @@
                .long   SYMBOL_NAME(sys_ni_syscall) /* Security */
                .long   SYMBOL_NAME(sys_gettid)
@@ -245,7 +245,7 @@ Index: linux-2.4.22-vanilla/arch/arm/kernel/calls.S
 Index: linux-2.4.22-vanilla/arch/i386/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/i386/defconfig      2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/i386/defconfig   2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/i386/defconfig   2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -263,7 +263,7 @@ Index: linux-2.4.22-vanilla/arch/i386/defconfig
 Index: linux-2.4.22-vanilla/arch/ia64/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/ia64/defconfig      2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/ia64/defconfig   2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/ia64/defconfig   2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -281,7 +281,7 @@ Index: linux-2.4.22-vanilla/arch/ia64/defconfig
 Index: linux-2.4.22-vanilla/arch/m68k/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/m68k/defconfig      2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/m68k/defconfig   2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/m68k/defconfig   2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -299,7 +299,7 @@ Index: linux-2.4.22-vanilla/arch/m68k/defconfig
 Index: linux-2.4.22-vanilla/arch/mips/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/mips/defconfig      2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/mips/defconfig   2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/mips/defconfig   2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -317,7 +317,7 @@ Index: linux-2.4.22-vanilla/arch/mips/defconfig
 Index: linux-2.4.22-vanilla/arch/mips64/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/mips64/defconfig    2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/mips64/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/mips64/defconfig 2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -335,7 +335,7 @@ Index: linux-2.4.22-vanilla/arch/mips64/defconfig
 Index: linux-2.4.22-vanilla/arch/s390/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/s390/defconfig      2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390/defconfig   2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390/defconfig   2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -353,7 +353,7 @@ Index: linux-2.4.22-vanilla/arch/s390/defconfig
 Index: linux-2.4.22-vanilla/arch/s390/kernel/entry.S
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/s390/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390/kernel/entry.S      2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390/kernel/entry.S      2003-12-02 23:55:38.000000000 +0300
 @@ -558,18 +558,18 @@
          .long  sys_fcntl64 
        .long  sys_readahead
@@ -388,7 +388,7 @@ Index: linux-2.4.22-vanilla/arch/s390/kernel/entry.S
 Index: linux-2.4.22-vanilla/arch/s390x/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/s390x/defconfig     2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390x/defconfig  2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390x/defconfig  2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -406,7 +406,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/defconfig
 Index: linux-2.4.22-vanilla/arch/s390x/kernel/entry.S
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/s390x/kernel/entry.S        2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390x/kernel/entry.S     2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390x/kernel/entry.S     2003-12-02 23:55:38.000000000 +0300
 @@ -591,18 +591,18 @@
        .long  SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper)
        .long  SYSCALL(sys_readahead,sys32_readahead)
@@ -441,7 +441,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/kernel/entry.S
 Index: linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/s390x/kernel/wrapper32.S    2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S 2003-12-02 23:55:38.000000000 +0300
 @@ -1098,6 +1098,98 @@
        llgfr   %r4,%r4                 # long
        jg      sys32_fstat64           # branch to system call
@@ -544,7 +544,7 @@ Index: linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S
 Index: linux-2.4.22-vanilla/arch/sparc64/defconfig
 ===================================================================
 --- linux-2.4.22-vanilla.orig/arch/sparc64/defconfig   2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/sparc64/defconfig        2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/sparc64/defconfig        2003-12-02 23:55:38.000000000 +0300
 @@ -1,6 +1,13 @@
  #
  # Automatically generated make config: don't edit
@@ -562,7 +562,7 @@ Index: linux-2.4.22-vanilla/arch/sparc64/defconfig
 Index: linux-2.4.22-vanilla/fs/Config.in
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/Config.in     2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/Config.in  2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/Config.in  2003-12-02 23:55:38.000000000 +0300
 @@ -29,6 +29,11 @@
  dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
  
@@ -600,8 +600,8 @@ Index: linux-2.4.22-vanilla/fs/Config.in
  source fs/partitions/Config.in
 Index: linux-2.4.22-vanilla/fs/Makefile
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/Makefile      2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/Makefile   2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/Makefile      2003-12-02 23:55:36.000000000 +0300
++++ linux-2.4.22-vanilla/fs/Makefile   2003-12-02 23:55:38.000000000 +0300
 @@ -77,6 +77,9 @@
  
  obj-$(CONFIG_BINFMT_ELF)      += binfmt_elf.o
@@ -615,7 +615,7 @@ Index: linux-2.4.22-vanilla/fs/Makefile
 Index: linux-2.4.22-vanilla/fs/ext2/Makefile
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext2/Makefile 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/Makefile      2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/Makefile      2003-12-02 23:55:38.000000000 +0300
 @@ -13,4 +13,8 @@
                ioctl.o namei.o super.o symlink.o
  obj-m    := $(O_TARGET)
@@ -628,7 +628,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/Makefile
 Index: linux-2.4.22-vanilla/fs/ext2/file.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext2/file.c   2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/file.c        2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/file.c        2003-12-02 23:55:38.000000000 +0300
 @@ -20,6 +20,7 @@
  
  #include <linux/fs.h>
@@ -649,7 +649,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/file.c
 Index: linux-2.4.22-vanilla/fs/ext2/ialloc.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext2/ialloc.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/ialloc.c      2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/ialloc.c      2003-12-02 23:55:38.000000000 +0300
 @@ -15,6 +15,7 @@
  #include <linux/config.h>
  #include <linux/fs.h>
@@ -669,7 +669,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/ialloc.c
 Index: linux-2.4.22-vanilla/fs/ext2/inode.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext2/inode.c  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/inode.c       2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/inode.c       2003-12-02 23:55:38.000000000 +0300
 @@ -39,6 +39,18 @@
  static int ext2_update_inode(struct inode * inode, int do_sync);
  
@@ -755,7 +755,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/inode.c
 Index: linux-2.4.22-vanilla/fs/ext2/namei.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext2/namei.c  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/namei.c       2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/namei.c       2003-12-02 23:55:38.000000000 +0300
 @@ -31,6 +31,7 @@
  
  #include <linux/fs.h>
@@ -792,7 +792,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/namei.c
 Index: linux-2.4.22-vanilla/fs/ext2/super.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext2/super.c  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/super.c       2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/super.c       2003-12-02 23:55:38.000000000 +0300
 @@ -21,6 +21,7 @@
  #include <linux/string.h>
  #include <linux/fs.h>
@@ -865,7 +865,7 @@ Index: linux-2.4.22-vanilla/fs/ext2/super.c
 Index: linux-2.4.22-vanilla/fs/ext2/symlink.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext2/symlink.c        2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/symlink.c     2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/symlink.c     2003-12-02 23:55:38.000000000 +0300
 @@ -19,6 +19,7 @@
  
  #include <linux/fs.h>
@@ -897,8 +897,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/symlink.c
  };
 Index: linux-2.4.22-vanilla/fs/ext2/xattr.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext2/xattr.c  2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/xattr.c       2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext2/xattr.c  2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/xattr.c       2003-12-02 23:55:38.000000000 +0300
 @@ -0,0 +1,1212 @@
 +/*
 + * linux/fs/ext2/xattr.c
@@ -2114,8 +2114,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr.c
 +#endif  /* CONFIG_EXT2_FS_XATTR_SHARING */
 Index: linux-2.4.22-vanilla/fs/ext2/xattr_user.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext2/xattr_user.c     2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/xattr_user.c  2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext2/xattr_user.c     2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/xattr_user.c  2003-12-02 23:55:38.000000000 +0300
 @@ -0,0 +1,103 @@
 +/*
 + * linux/fs/ext2/xattr_user.c
@@ -2222,8 +2222,8 @@ Index: linux-2.4.22-vanilla/fs/ext2/xattr_user.c
 +}
 Index: linux-2.4.22-vanilla/fs/ext3/Makefile
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/Makefile 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/Makefile      2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/Makefile 2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/Makefile      2003-12-02 23:55:38.000000000 +0300
 @@ -1,5 +1,5 @@
  #
 -# Makefile for the linux ext2-filesystem routines.
@@ -2250,8 +2250,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/Makefile
  include $(TOPDIR)/Rules.make
 Index: linux-2.4.22-vanilla/fs/ext3/file.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/file.c   2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/file.c        2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/file.c   2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/file.c        2003-12-02 23:55:38.000000000 +0300
 @@ -23,6 +23,7 @@
  #include <linux/locks.h>
  #include <linux/jbd.h>
@@ -2273,7 +2273,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/file.c
 Index: linux-2.4.22-vanilla/fs/ext3/ialloc.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext3/ialloc.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/ialloc.c      2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/ialloc.c      2003-12-02 23:55:38.000000000 +0300
 @@ -17,6 +17,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
@@ -2293,7 +2293,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/ialloc.c
 Index: linux-2.4.22-vanilla/fs/ext3/inode.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext3/inode.c  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/inode.c       2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/inode.c       2003-12-02 23:55:38.000000000 +0300
 @@ -39,6 +39,18 @@
   */
  #undef SEARCH_FROM_ZERO
@@ -2386,8 +2386,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/inode.c
        return;
 Index: linux-2.4.22-vanilla/fs/ext3/namei.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/namei.c  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/namei.c       2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/namei.c  2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/namei.c       2003-12-02 23:55:38.000000000 +0300
 @@ -29,6 +29,7 @@
  #include <linux/sched.h>
  #include <linux/ext3_fs.h>
@@ -2451,8 +2451,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/namei.c
 +
 Index: linux-2.4.22-vanilla/fs/ext3/super.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/super.c  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/super.c       2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/super.c  2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/super.c       2003-12-02 23:56:03.000000000 +0300
 @@ -24,6 +24,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
@@ -2504,12 +2504,18 @@ Index: linux-2.4.22-vanilla/fs/ext3/super.c
        if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) {
                sb->s_dev = 0;
                goto out_fail;
-@@ -1827,17 +1843,29 @@
+@@ -1822,22 +1838,35 @@
+ static int __init init_ext3_fs(void)
+ {
++      int error;
+ #ifdef CONFIG_QUOTA
+       init_dquot_operations(&ext3_qops);
        old_sync_dquot = ext3_qops.sync_dquot;
        ext3_qops.sync_dquot = ext3_sync_dquot;
  #endif
 -        return register_filesystem(&ext3_fs_type);
-+      int error = init_ext3_xattr();
++      error = init_ext3_xattr();
 +      if (error)
 +              return error;
 +      error = init_ext3_xattr_user();
@@ -2541,7 +2547,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/super.c
 Index: linux-2.4.22-vanilla/fs/ext3/symlink.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/ext3/symlink.c        2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/symlink.c     2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/symlink.c     2003-12-02 23:55:38.000000000 +0300
 @@ -20,6 +20,7 @@
  #include <linux/fs.h>
  #include <linux/jbd.h>
@@ -2573,8 +2579,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/symlink.c
  };
 Index: linux-2.4.22-vanilla/fs/ext3/xattr.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/xattr.c  2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/xattr.c       2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/xattr.c  2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/xattr.c       2003-12-02 23:55:38.000000000 +0300
 @@ -0,0 +1,1225 @@
 +/*
 + * linux/fs/ext3/xattr.c
@@ -3803,8 +3809,8 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr.c
 +#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
 Index: linux-2.4.22-vanilla/fs/ext3/xattr_user.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/xattr_user.c     2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/xattr_user.c  2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/xattr_user.c     2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/xattr_user.c  2003-12-02 23:55:39.000000000 +0300
 @@ -0,0 +1,111 @@
 +/*
 + * linux/fs/ext3/xattr_user.c
@@ -3920,7 +3926,7 @@ Index: linux-2.4.22-vanilla/fs/ext3/xattr_user.c
 Index: linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/jfs/jfs_xattr.h       2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h    2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h    2003-12-02 23:55:39.000000000 +0300
 @@ -52,8 +52,10 @@
  #define       END_EALIST(ealist) \
        ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
@@ -3937,7 +3943,7 @@ Index: linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h
 Index: linux-2.4.22-vanilla/fs/jfs/xattr.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/fs/jfs/xattr.c   2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/jfs/xattr.c        2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/jfs/xattr.c        2003-12-02 23:55:39.000000000 +0300
 @@ -641,7 +641,7 @@
  }
  
@@ -3967,8 +3973,8 @@ Index: linux-2.4.22-vanilla/fs/jfs/xattr.c
        if (value == NULL) {    /* empty EA, do not remove */
 Index: linux-2.4.22-vanilla/fs/mbcache.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/mbcache.c     2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/mbcache.c  2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/mbcache.c     2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/mbcache.c  2003-12-02 23:55:39.000000000 +0300
 @@ -0,0 +1,648 @@
 +/*
 + * linux/fs/mbcache.c
@@ -4621,7 +4627,7 @@ Index: linux-2.4.22-vanilla/fs/mbcache.c
 Index: linux-2.4.22-vanilla/include/asm-arm/unistd.h
 ===================================================================
 --- linux-2.4.22-vanilla.orig/include/asm-arm/unistd.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/asm-arm/unistd.h      2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/asm-arm/unistd.h      2003-12-02 23:55:39.000000000 +0300
 @@ -250,7 +250,6 @@
  #define __NR_security                 (__NR_SYSCALL_BASE+223)
  #define __NR_gettid                   (__NR_SYSCALL_BASE+224)
@@ -4641,7 +4647,7 @@ Index: linux-2.4.22-vanilla/include/asm-arm/unistd.h
 Index: linux-2.4.22-vanilla/include/asm-ppc64/unistd.h
 ===================================================================
 --- linux-2.4.22-vanilla.orig/include/asm-ppc64/unistd.h       2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/asm-ppc64/unistd.h    2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/asm-ppc64/unistd.h    2003-12-02 23:55:39.000000000 +0300
 @@ -218,6 +218,7 @@
  #define __NR_mincore          206
  #define __NR_gettid           207
@@ -4661,7 +4667,7 @@ Index: linux-2.4.22-vanilla/include/asm-ppc64/unistd.h
 Index: linux-2.4.22-vanilla/include/asm-s390/unistd.h
 ===================================================================
 --- linux-2.4.22-vanilla.orig/include/asm-s390/unistd.h        2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/asm-s390/unistd.h     2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/asm-s390/unistd.h     2003-12-02 23:55:39.000000000 +0300
 @@ -213,9 +213,18 @@
  #define __NR_getdents64               220
  #define __NR_fcntl64          221
@@ -4687,7 +4693,7 @@ Index: linux-2.4.22-vanilla/include/asm-s390/unistd.h
 Index: linux-2.4.22-vanilla/include/asm-s390x/unistd.h
 ===================================================================
 --- linux-2.4.22-vanilla.orig/include/asm-s390x/unistd.h       2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/asm-s390x/unistd.h    2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/asm-s390x/unistd.h    2003-12-02 23:55:39.000000000 +0300
 @@ -181,9 +181,18 @@
  #define __NR_mincore            218
  #define __NR_madvise            219
@@ -4712,8 +4718,8 @@ Index: linux-2.4.22-vanilla/include/asm-s390x/unistd.h
  
 Index: linux-2.4.22-vanilla/include/linux/cache_def.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/cache_def.h        2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/cache_def.h     2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/cache_def.h        2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/cache_def.h     2003-12-02 23:55:39.000000000 +0300
 @@ -0,0 +1,15 @@
 +/*
 + * linux/cache_def.h
@@ -4733,7 +4739,7 @@ Index: linux-2.4.22-vanilla/include/linux/cache_def.h
 Index: linux-2.4.22-vanilla/include/linux/errno.h
 ===================================================================
 --- linux-2.4.22-vanilla.orig/include/linux/errno.h    2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/errno.h 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/errno.h 2003-12-02 23:55:39.000000000 +0300
 @@ -23,4 +23,8 @@
  
  #endif
@@ -4746,7 +4752,7 @@ Index: linux-2.4.22-vanilla/include/linux/errno.h
 Index: linux-2.4.22-vanilla/include/linux/ext2_fs.h
 ===================================================================
 --- linux-2.4.22-vanilla.orig/include/linux/ext2_fs.h  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext2_fs.h       2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext2_fs.h       2003-12-02 23:55:39.000000000 +0300
 @@ -57,8 +57,6 @@
   */
  #define       EXT2_BAD_INO             1      /* Bad blocks inode */
@@ -4831,8 +4837,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext2_fs.h
  #endif        /* __KERNEL__ */
 Index: linux-2.4.22-vanilla/include/linux/ext2_xattr.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/ext2_xattr.h       2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext2_xattr.h    2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/ext2_xattr.h       2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext2_xattr.h    2003-12-02 23:55:39.000000000 +0300
 @@ -0,0 +1,157 @@
 +/*
 +  File: linux/ext2_xattr.h
@@ -4993,8 +4999,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext2_xattr.h
 +
 Index: linux-2.4.22-vanilla/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/ext3_fs.h  2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext3_fs.h       2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/ext3_fs.h  2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext3_fs.h       2003-12-02 23:55:39.000000000 +0300
 @@ -63,8 +63,6 @@
   */
  #define       EXT3_BAD_INO             1      /* Bad blocks inode */
@@ -5079,8 +5085,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_fs.h
  
 Index: linux-2.4.22-vanilla/include/linux/ext3_jbd.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/ext3_jbd.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext3_jbd.h      2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/ext3_jbd.h 2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext3_jbd.h      2003-12-02 23:55:39.000000000 +0300
 @@ -30,13 +30,19 @@
  
  #define EXT3_SINGLEDATA_TRANS_BLOCKS  8U
@@ -5104,8 +5110,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_jbd.h
  
 Index: linux-2.4.22-vanilla/include/linux/ext3_xattr.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/ext3_xattr.h       2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext3_xattr.h    2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/ext3_xattr.h       2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext3_xattr.h    2003-12-02 23:55:39.000000000 +0300
 @@ -0,0 +1,157 @@
 +/*
 +  File: linux/ext3_xattr.h
@@ -5266,8 +5272,8 @@ Index: linux-2.4.22-vanilla/include/linux/ext3_xattr.h
 +
 Index: linux-2.4.22-vanilla/include/linux/fs.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/fs.h       2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/fs.h    2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/fs.h       2003-12-02 23:55:35.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/fs.h    2003-12-02 23:55:39.000000000 +0300
 @@ -913,7 +913,7 @@
        int (*setattr) (struct dentry *, struct iattr *);
        int (*setattr_raw) (struct inode *, struct iattr *);
@@ -5279,8 +5285,8 @@ Index: linux-2.4.22-vanilla/include/linux/fs.h
        int (*removexattr) (struct dentry *, const char *);
 Index: linux-2.4.22-vanilla/include/linux/mbcache.h
 ===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/mbcache.h  2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/mbcache.h       2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/mbcache.h  2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/mbcache.h       2003-12-02 23:55:39.000000000 +0300
 @@ -0,0 +1,69 @@
 +/*
 +  File: linux/mbcache.h
@@ -5353,8 +5359,8 @@ Index: linux-2.4.22-vanilla/include/linux/mbcache.h
 +#endif
 Index: linux-2.4.22-vanilla/kernel/ksyms.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/kernel/ksyms.c   2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/kernel/ksyms.c        2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/kernel/ksyms.c   2003-12-02 23:55:34.000000000 +0300
++++ linux-2.4.22-vanilla/kernel/ksyms.c        2003-12-02 23:55:39.000000000 +0300
 @@ -11,6 +11,7 @@
  
  #include <linux/config.h>
@@ -5371,7 +5377,7 @@ Index: linux-2.4.22-vanilla/kernel/ksyms.c
  
  /* internal kernel memory management */
  EXPORT_SYMBOL(_alloc_pages);
-@@ -109,6 +111,8 @@
+@@ -108,6 +110,8 @@
  EXPORT_SYMBOL(kmem_cache_alloc);
  EXPORT_SYMBOL(kmem_cache_free);
  EXPORT_SYMBOL(kmem_cache_size);
@@ -5383,7 +5389,7 @@ Index: linux-2.4.22-vanilla/kernel/ksyms.c
 Index: linux-2.4.22-vanilla/mm/vmscan.c
 ===================================================================
 --- linux-2.4.22-vanilla.orig/mm/vmscan.c      2003-11-03 23:41:27.000000000 +0300
-+++ linux-2.4.22-vanilla/mm/vmscan.c   2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/mm/vmscan.c   2003-12-02 23:55:39.000000000 +0300
 @@ -18,6 +18,7 @@
  #include <linux/kernel_stat.h>
  #include <linux/swap.h>
@@ -5442,8 +5448,8 @@ Index: linux-2.4.22-vanilla/mm/vmscan.c
  #endif
 Index: linux-2.4.22-vanilla/fs/ext3/ext3-exports.c
 ===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/ext3-exports.c   2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/ext3-exports.c        2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/ext3-exports.c   2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/ext3-exports.c        2003-12-02 23:55:39.000000000 +0300
 @@ -0,0 +1,13 @@
 +#include <linux/config.h>
 +#include <linux/module.h>
index 5b6876f..704cc29 100644 (file)
@@ -1,3 +1,4 @@
+configurable-x86-stack-2.4.20.patch
 dev_read_only_hp_2.4.20.patch
 exports_2.4.20-rh-hp.patch
 lustre_version.patch
index 46052d9..9034dd7 100644 (file)
@@ -1,3 +1,4 @@
+configurable-x86-stack-2.4.20-rh.patch
 mcore-2.4.20-8.patch
 dsp.patch
 dev_read_only_2.4.20-rh.patch
index a3bd2b9..a4ce33d 100644 (file)
@@ -1,3 +1,4 @@
+configurable-x86-stack-2.4.22-rh.patch
 dev_read_only_2.4.20-rh.patch
 exports_2.4.20-rh-hp.patch
 lustre_version.patch
index b3c5d0d..715ff2c 100644 (file)
@@ -1,3 +1,4 @@
+configurable-x86-stack-2.4.20.patch
 dev_read_only_2.4.20-rh.patch
 exports_2.4.20-rh-hp.patch
 lustre_version.patch
index b6ded90..9551189 100644 (file)
@@ -1,3 +1,4 @@
+configurable-x86-stack-2.4.19-pre1.patch
 dev_read_only_2.4.20.patch
 exports_2.4.19-pre1.patch 
 lustre_version.patch
index 46f5fd8..6e715ad 100644 (file)
@@ -1,3 +1,4 @@
+configurable-x86-stack-2.4.20.patch
 uml-patch-2.4.20-6.patch
 uml-2.4.20-do_mmap_pgoff-fix.patch
 uml-2.4.20-fixes-1.patch
@@ -45,4 +46,5 @@ ext3-ea-in-inode-2.4.20.patch
 listman-2.4.20.patch
 ext3-trusted_ea-2.4.20.patch
 kernel_text_address-2.4.20-vanilla.patch 
-ext3-xattr-ptr-arith-fix.patch 
+ext3-xattr-ptr-arith-fix.patch
+gfp_memalloc-2.4.22.patch
index d3b7123..1e91487 100644 (file)
@@ -1,3 +1,4 @@
+configurable-x86-stack-2.4.20.patch
 dev_read_only_2.4.20-rh.patch
 exports_2.4.20-rh-hp.patch
 lustre_version.patch
index 8dc312e..abd0f2e 100644 (file)
@@ -46,6 +46,6 @@ struct ldlm_state {
         struct ldlm_bl_pool *ldlm_bl_pool;
 };
 
-int __init ldlm_init(void);
-void __exit ldlm_exit(void);
+int ldlm_init(void);
+void ldlm_exit(void);
 
index 2d2196a..9245d91 100644 (file)
@@ -1006,7 +1006,7 @@ static void ptlrpc_abort_reply (struct ptlrpc_request *req)
                  * has finished.  Note that if the ACK does arrive, its
                  * callback wakes us in short order. --eeb */
                 lwi = LWI_TIMEOUT (HZ/4, NULL, NULL);
-                rc = l_wait_event(req->rq_wait_for_rep, !req->rq_want_ack,
+                rc = l_wait_event(req->rq_reply_waitq, !req->rq_want_ack,
                                   &lwi);
                 CDEBUG (D_HA, "Retrying req %p: %d\n", req, rc);
                 /* NB go back and test rq_want_ack with locking, to ensure
@@ -1062,7 +1062,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
                         OBD_FREE(req->rq_repmsg, req->rq_replen);
                         req->rq_repmsg = NULL;
                 }
-                init_waitqueue_head(&req->rq_wait_for_rep);
+                init_waitqueue_head(&req->rq_reply_waitq);
                 netrc = 0;
         }
 
@@ -1076,7 +1076,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
 
         init_waitqueue_entry(&commit_wait, current);
         add_wait_queue(&obd->obd_commit_waitq, &commit_wait);
-        rc = l_wait_event(req->rq_wait_for_rep,
+        rc = l_wait_event(req->rq_reply_waitq,
                           !req->rq_want_ack || req->rq_resent ||
                           req->rq_transno <= obd->obd_last_committed, &lwi);
         remove_wait_queue(&obd->obd_commit_waitq, &commit_wait);
index 7c21ba4..dd70aa5 100644 (file)
@@ -671,11 +671,12 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks,
 static int fsfilt_ext3_read_record(struct file * file, void *buf,
                                    int size, loff_t *offs)
 {
-        struct buffer_head *bh;
-        unsigned long block, boffs;
         struct inode *inode = file->f_dentry->d_inode;
-        int err;
+        unsigned long block;
+        struct buffer_head *bh;
+        int err, blocksize, csize, boffs;
 
+        /* prevent reading after eof */
         if (inode->i_size < *offs + size) {
                 size = inode->i_size - *offs;
                 if (size < 0) {
@@ -686,87 +687,85 @@ static int fsfilt_ext3_read_record(struct file * file, void *buf,
                         return 0;
         }
 
-        block = *offs >> inode->i_blkbits;
-        bh = ext3_bread(NULL, inode, block, 0, &err);
-        if (!bh) {
-                CERROR("can't read block: %d\n", err);
-                return err;
-        }
+        blocksize = 1 << inode->i_blkbits;
+
+        while (size > 0) {
+                block = *offs >> inode->i_blkbits;
+                boffs = *offs & (blocksize - 1);
+                csize = min(blocksize - boffs, size);
+                bh = ext3_bread(NULL, inode, block, 0, &err);
+                if (!bh) {
+                        CERROR("can't read block: %d\n", err);
+                        return err;
+                }
 
-        boffs = (unsigned)*offs % bh->b_size;
-        if (boffs + size > bh->b_size) {
-                CERROR("request crosses block's border. offset %llu, size %u\n",
-                       *offs, size);
+                memcpy(buf, bh->b_data + boffs, csize);
                 brelse(bh);
-                return -EIO;
-        }
 
-        memcpy(buf, bh->b_data + boffs, size);
-        brelse(bh);
-        *offs += size;
+                *offs += csize;
+                buf += csize;
+                size -= csize;
+        }
         return 0;
 }
 
-static int fsfilt_ext3_write_record(struct file *file, void *buf, int size,
+static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
                                     loff_t *offs, int force_sync)
 {
-        struct buffer_head *bh;
-        unsigned long block, boffs;
+        struct buffer_head *bh = NULL;
+        unsigned long block;
         struct inode *inode = file->f_dentry->d_inode;
-        loff_t old_size = inode->i_size;
+        loff_t old_size = inode->i_size, offset = *offs;
+        loff_t new_size = inode->i_size;
         journal_t *journal;
         handle_t *handle;
-        int err;
+        int err, block_count = 0, blocksize, size, boffs;
 
+        /* Determine how many transaction credits are needed */
+        blocksize = 1 << inode->i_blkbits;
+        block_count = (*offs & (blocksize - 1)) + bufsize;
+        block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
+        
         journal = EXT3_SB(inode->i_sb)->s_journal;
-        handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2);
+        handle = journal_start(journal,
+                               block_count * EXT3_DATA_TRANS_BLOCKS + 2);
         if (IS_ERR(handle)) {
                 CERROR("can't start transaction\n");
                 return PTR_ERR(handle);
         }
 
-        block = *offs >> inode->i_blkbits;
-        if (*offs + size > inode->i_size) {
-                down(&inode->i_sem);
-                if (*offs + size > inode->i_size)
-                        inode->i_size = *offs + size;
-                if (inode->i_size > EXT3_I(inode)->i_disksize)
-                        EXT3_I(inode)->i_disksize = inode->i_size;
-                up(&inode->i_sem);
-        }
-
-        bh = ext3_bread(handle, inode, block, 1, &err);
-        if (!bh) {
-                CERROR("can't read/create block: %d\n", err);
-                goto out;
-        }
-
-        /* This is a hack only needed because ext3_get_block_handle() updates
-         * i_disksize after marking the inode dirty in ext3_splice_branch().
-         * We will fix that when we get a chance, as ext3_mark_inode_dirty()
-         * is not without cost, nor is it even exported.
-         */
-        if (inode->i_size > old_size)
-                mark_inode_dirty(inode);
-
-        boffs = (unsigned)*offs % bh->b_size;
-        if (boffs + size > bh->b_size) {
-                CERROR("request crosses block's border. offset %llu, size %u\n",
-                       *offs, size);
-                err = -EIO;
-                goto out;
-        }
+        while (bufsize > 0) {
+                if (bh != NULL)
+                        brelse(bh);
+
+                block = offset >> inode->i_blkbits;
+                boffs = offset & (blocksize - 1);
+                size = min(blocksize - boffs, bufsize);
+                bh = ext3_bread(handle, inode, block, 1, &err);
+                if (!bh) {
+                        CERROR("can't read/create block: %d\n", err);
+                        goto out;
+                }
 
-        err = ext3_journal_get_write_access(handle, bh);
-        if (err) {
-                CERROR("journal_get_write_access() returned error %d\n", err);
-                goto out;
-        }
-        memcpy(bh->b_data + boffs, buf, size);
-        err = ext3_journal_dirty_metadata(handle, bh);
-        if (err) {
-                CERROR("journal_dirty_metadata() returned error %d\n", err);
-                goto out;
+                err = ext3_journal_get_write_access(handle, bh);
+                if (err) {
+                        CERROR("journal_get_write_access() returned error %d\n",
+                               err);
+                        goto out;
+                }
+                LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size);
+                memcpy(bh->b_data + boffs, buf, size);
+                err = ext3_journal_dirty_metadata(handle, bh);
+                if (err) {
+                        CERROR("journal_dirty_metadata() returned error %d\n",
+                               err);
+                        goto out;
+                }
+                if (offset + size > new_size)
+                        new_size = offset + size;
+                offset += size;
+                bufsize -= size;
+                buf += size;
         }
 
         if (force_sync)
@@ -774,9 +773,22 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int size,
 out:
         if (bh)
                 brelse(bh);
+
+        /* correct in-core and on-disk sizes */
+        if (new_size > inode->i_size) {
+                down(&inode->i_sem);
+                if (new_size > inode->i_size)
+                        inode->i_size = new_size;
+                if (inode->i_size > EXT3_I(inode)->i_disksize)
+                        EXT3_I(inode)->i_disksize = inode->i_size;
+                up(&inode->i_sem);
+                if (inode->i_size > old_size)
+                        mark_inode_dirty(inode);
+        }
+
         journal_stop(handle);
         if (err == 0)
-                *offs += size;
+                *offs = offset;
         return err;
 }
 
index 67b3eb2..e6cb437 100644 (file)
@@ -1015,7 +1015,7 @@ void mds_steal_ack_locks(struct obd_export *exp,
                sizeof req->rq_ack_locks);
         spin_lock_irqsave (&req->rq_lock, flags);
         oldrep->rq_resent = 1;
-        wake_up(&oldrep->rq_wait_for_rep);
+        wake_up(&oldrep->rq_reply_waitq);
         spin_unlock_irqrestore (&req->rq_lock, flags);
         DEBUG_REQ(D_HA, oldrep, "stole locks from");
         DEBUG_REQ(D_HA, req, "stole locks for");
@@ -1031,8 +1031,6 @@ int mds_handle(struct ptlrpc_request *req)
 
         OBD_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0);
 
-        LASSERT(!strcmp(req->rq_obd->obd_type->typ_name, LUSTRE_MDT_NAME));
-
         LASSERT(current->journal_info == NULL);
         /* XXX identical to OST */
         if (req->rq_reqmsg->opc != MDS_CONNECT) {
index 048112a..62e0f44 100644 (file)
@@ -522,16 +522,6 @@ static int filter_cleanup_groups(struct obd_device *obd)
         int i;
         ENTRY;
 
-        if (filter->fo_subdir_count) {
-                for (i = 0; i < filter->fo_subdir_count; i++) {
-                        struct dentry *dentry = filter->fo_dentry_O_sub[i];
-                        f_dput(dentry);
-                        filter->fo_dentry_O_sub[i] = NULL;
-                }
-                OBD_FREE(filter->fo_dentry_O_sub,
-                         filter->fo_subdir_count *
-                         sizeof(*filter->fo_dentry_O_sub));
-        }
         if (filter->fo_dentry_O_groups != NULL &&
             filter->fo_last_objids != NULL &&
             filter->fo_last_objid_files != NULL) {
@@ -548,6 +538,18 @@ static int filter_cleanup_groups(struct obd_device *obd)
                         }
                 }
         }
+        if (filter->fo_dentry_O_sub != NULL && filter->fo_subdir_count) {
+                for (i = 0; i < filter->fo_subdir_count; i++) {
+                        struct dentry *dentry = filter->fo_dentry_O_sub[i];
+                        if (dentry != NULL) {
+                                f_dput(dentry);
+                                filter->fo_dentry_O_sub[i] = NULL;
+                        }
+                }
+                OBD_FREE(filter->fo_dentry_O_sub,
+                         filter->fo_subdir_count *
+                         sizeof(*filter->fo_dentry_O_sub));
+        }
         if (filter->fo_dentry_O_groups != NULL)
                 OBD_FREE(filter->fo_dentry_O_groups,
                          FILTER_GROUPS * sizeof(struct dentry *));
@@ -648,7 +650,8 @@ static int filter_prep_groups(struct obd_device *obd)
                 CDEBUG(D_INODE, "got/created O/%s: %p\n", name, dentry);
                 if (IS_ERR(dentry)) {
                         rc = PTR_ERR(dentry);
-                        CERROR("cannot create O/%s: rc = %d\n", name, rc);
+                        CERROR("cannot lookup/create O/%s: rc = %d\n",
+                               name, rc);
                         GOTO(cleanup, rc);
                 }
                 filter->fo_dentry_O_groups[i] = dentry;
@@ -706,7 +709,8 @@ static int filter_prep_groups(struct obd_device *obd)
                         CDEBUG(D_INODE, "got/created O/0/%s: %p\n", dir,dentry);
                         if (IS_ERR(dentry)) {
                                 rc = PTR_ERR(dentry);
-                                CERROR("can't create O/0/%s: rc = %d\n",dir,rc);
+                                CERROR("can't lookup/create O/0/%s: rc = %d\n",
+                                       dir, rc);
                                 GOTO(cleanup, rc);
                         }
                         filter->fo_dentry_O_sub[i] = dentry;
@@ -1858,14 +1862,14 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
         filter = &exp->exp_obd->u.filter;
 
         /* an objid of zero is taken to mean "sync whole filesystem" */
-        if (!oa || !oa->o_valid & OBD_MD_FLID) {
+        if (!oa || !(oa->o_valid & OBD_MD_FLID)) {
                 rc = fsfilt_sync(exp->exp_obd, filter->fo_sb);
-                GOTO(out_exp, rc);
+                RETURN(rc);
         }
 
         dentry = filter_oa2dentry(exp->exp_obd, oa);
         if (IS_ERR(dentry))
-                GOTO(out_exp, rc = PTR_ERR(dentry));
+                RETURN(PTR_ERR(dentry));
 
         push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
 
@@ -1890,7 +1894,6 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
         pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
 
         f_dput(dentry);
-out_exp:
         RETURN(rc);
 }
 
index 74a6d1d..a83592f 100644 (file)
@@ -405,7 +405,6 @@ static int ost_brw_read(struct ptlrpc_request *req)
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
-        size[0] = sizeof(*body);
         rc = lustre_pack_reply(req, 1, size, NULL);
         if (rc)
                 GOTO(out, rc);
@@ -944,9 +943,6 @@ static int ost_handle(struct ptlrpc_request *req)
                 }
         }
 
-        if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
-                GOTO(out, rc = -EINVAL);
-
         oti_init(oti, req);
 
         switch (req->rq_reqmsg->opc) {
index 3ecefff..900f0a4 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: cygwin-ioctl.h,v 1.2 2003/12/03 03:14:43 phil Exp $
+/* $Id: cygwin-ioctl.h,v 1.3 2003/12/03 05:12:41 phil Exp $
  *
  * linux/ioctl.h for Linux by H.H. Bergman.
  */
index 3e6d5e3..3d60631 100644 (file)
@@ -115,7 +115,7 @@ do {                                                                          \
         if (portal_cerror == 0)                                               \
                 break;                                                        \
         CHECK_STACK(CDEBUG_STACK);                                            \
-        if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) ||                      \
+        if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) ||          \
             (portal_debug & (mask) &&                                         \
              portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
                 portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
@@ -283,17 +283,19 @@ do {                                                                          \
 #define GFP_MEMALLOC 0
 #endif
 
-#define PORTAL_ALLOC(ptr, size)                                           \
+#define PORTAL_ALLOC_GFP(ptr, size, mask)                                 \
 do {                                                                      \
         LASSERT (!in_interrupt());                                        \
         if ((size) > PORTAL_VMALLOC_SIZE)                                 \
                 (ptr) = vmalloc(size);                                    \
         else                                                              \
-                (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC));     \
-        if ((ptr) == NULL)                                                \
+                (ptr) = kmalloc((size), (mask));                          \
+        if ((ptr) == NULL) {                                              \
                 CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\
                        #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
-        else {                                                            \
+                CERROR("PORTALS: %d total bytes allocated by portals\n",  \
+                       atomic_read(&portal_kmemory));                     \
+        } else {                                                          \
                 portal_kmem_inc((ptr), (size));                           \
                 memset((ptr), 0, (size));                                 \
         }                                                                 \
@@ -301,6 +303,12 @@ do {                                                                      \
                (int)(size), (ptr), atomic_read (&portal_kmemory));        \
 } while (0)
 
+#define PORTAL_ALLOC(ptr, size) \
+        PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC))
+
+#define PORTAL_ALLOC_ATOMIC(ptr, size) \
+        PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC))
+
 #define PORTAL_FREE(ptr, size)                                          \
 do {                                                                    \
         int s = (size);                                                 \
@@ -330,11 +338,13 @@ do {                                                                      \
                 CERROR("PORTALS: out of memory at %s:%d (tried to alloc"  \
                        " '" #ptr "' from slab '" #slab "')\n", __FILE__,  \
                        __LINE__);                                         \
+                CERROR("PORTALS: %d total bytes allocated by portals\n",  \
+                       atomic_read(&portal_kmemory));                     \
         } else {                                                          \
                 portal_kmem_inc((ptr), (size));                           \
                 memset((ptr), 0, (size));                                 \
         }                                                                 \
-        CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n",   \
+        CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n",    \
                (int)(size), (ptr), atomic_read(&portal_kmemory));         \
 } while (0)
 
@@ -690,7 +700,10 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str);
 /******************************************************************************/
 /* Light-weight trace 
  * Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT  1
+#define LWT_SUPPORT  0
+
+#define LWT_MEMORY   (64<<20)
+#define LWT_MAX_CPUS 4
 
 typedef struct {
         cycles_t    lwte_when;
@@ -728,7 +741,7 @@ extern void lwt_fini (void);
 extern int  lwt_lookup_string (int *size, char *knlptr,
                                char *usrptr, int usrsize);
 extern int  lwt_control (int enable, int clear);
-extern int  lwt_snapshot (int *ncpu, int *total_size,
+extern int  lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
                           void *user_ptr, int user_size);
 
 /* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
@@ -775,6 +788,11 @@ do {                                                                    \
 #endif /* __KERNEL__ */
 #endif /* LWT_SUPPORT */
 
+struct portals_device_userstate
+{
+        int          pdu_memhog_pages;
+        struct page *pdu_memhog_root_page;
+};
 
 #include <linux/portals_lib.h>
 
@@ -1044,7 +1062,8 @@ static inline int portal_ioctl_getdata(char *buf, char *end, void *arg)
 #define IOC_PORTAL_LWT_CONTROL             _IOWR('e', 39, long)
 #define IOC_PORTAL_LWT_SNAPSHOT            _IOWR('e', 40, long)
 #define IOC_PORTAL_LWT_LOOKUP_STRING       _IOWR('e', 41, long)
-#define IOC_PORTAL_MAX_NR                             41
+#define IOC_PORTAL_MEMHOG                  _IOWR('e', 42, long)
+#define IOC_PORTAL_MAX_NR                             42
 
 enum {
         QSWNAL  =  1,
index 55fd720..c402828 100644 (file)
@@ -168,7 +168,8 @@ static inline lib_eq_t *
 lib_eq_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS);
+        lib_eq_t *eq;
+        PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq));
 
         if (eq == NULL)
                 return (NULL);
@@ -182,14 +183,15 @@ lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&eq_in_use_count);
-        kmem_cache_free(ptl_eq_slab, eq);
+        PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq));
 }
 
 static inline lib_md_t *
 lib_md_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS);
+        lib_md_t *md;
+        PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md));
 
         if (md == NULL)
                 return (NULL);
@@ -203,14 +205,15 @@ lib_md_free (nal_cb_t *nal, lib_md_t *md)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&md_in_use_count);
-        kmem_cache_free(ptl_md_slab, md); 
+        PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md));
 }
 
 static inline lib_me_t *
 lib_me_alloc (nal_cb_t *nal)
 {
         /* NEVER called with statelock held */
-        lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS);
+        lib_me_t *me;
+        PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me));
 
         if (me == NULL)
                 return (NULL);
@@ -224,14 +227,15 @@ lib_me_free(nal_cb_t *nal, lib_me_t *me)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&me_in_use_count);
-        kmem_cache_free(ptl_me_slab, me);
+        PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me));
 }
 
 static inline lib_msg_t *
 lib_msg_alloc(nal_cb_t *nal)
 {
         /* ALWAYS called with statelock held */
-        lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+        lib_msg_t *msg;
+        PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg));
 
         if (msg == NULL)
                 return (NULL);
@@ -245,7 +249,7 @@ lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
 {
         /* ALWAYS called with statelock held */
         atomic_dec (&msg_in_use_count);
-        kmem_cache_free(ptl_msg_slab, msg); 
+        PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg));
 }
 #endif
 
index f581e72..12ef47a 100644 (file)
@@ -59,6 +59,7 @@ int jt_ptl_notify_router (int argc, char **argv);
 int jt_ptl_print_routes (int argc, char **argv);
 int jt_ptl_fail_nid (int argc, char **argv);
 int jt_ptl_lwt(int argc, char **argv);
+int jt_ptl_memhog(int argc, char **argv);
 
 int dbg_initialize(int argc, char **argv);
 int jt_dbg_filter(int argc, char **argv);
index 6f6fa7e..6de511c 100644 (file)
@@ -1395,30 +1395,35 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private)
 }
 
 void
-ksocknal_free_buffers (void)
+ksocknal_free_fmbs (ksock_fmb_pool_t *p)
 {
-        if (ksocknal_data.ksnd_fmbs != NULL) {
-                ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs;
-                int          i;
-                int          j;
-
-                for (i = 0;
-                     i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS);
-                     i++, fmb++)
-                        for (j = 0; j < fmb->fmb_npages; j++)
-                                if (fmb->fmb_pages[j] != NULL)
-                                        __free_page (fmb->fmb_pages[j]);
-
-                PORTAL_FREE (ksocknal_data.ksnd_fmbs,
-                             sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
-                                                     SOCKNAL_LARGE_FWD_NMSGS));
+        ksock_fmb_t *fmb;
+        int          i;
+
+        LASSERT (list_empty(&p->fmp_blocked_conns));
+        LASSERT (p->fmp_nactive_fmbs == 0);
+        
+        while (!list_empty(&p->fmp_idle_fmbs)) {
+
+                fmb = list_entry(p->fmp_idle_fmbs.next,
+                                 ksock_fmb_t, fmb_list);
+                
+                for (i = 0; i < fmb->fmb_npages; i++)
+                        if (fmb->fmb_pages[i] != NULL)
+                                __free_page(fmb->fmb_pages[i]);
+                
+                list_del(&fmb->fmb_list);
+                PORTAL_FREE(fmb, sizeof(*fmb));
         }
+}
+
+void
+ksocknal_free_buffers (void)
+{
+        ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp);
+        ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp);
 
-        LASSERT (ksocknal_data.ksnd_active_ltxs == 0);
-        if (ksocknal_data.ksnd_ltxs != NULL)
-                PORTAL_FREE (ksocknal_data.ksnd_ltxs,
-                             sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS +
-                                                     SOCKNAL_NNBLK_LTXS));
+        LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0);
 
         if (ksocknal_data.ksnd_schedulers != NULL)
                 PORTAL_FREE (ksocknal_data.ksnd_schedulers,
@@ -1572,7 +1577,7 @@ ksocknal_module_init (void)
         PORTAL_ALLOC (ksocknal_data.ksnd_peers,
                       sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
         if (ksocknal_data.ksnd_peers == NULL)
-                RETURN (-ENOMEM);
+                return (-ENOMEM);
 
         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
                 INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
@@ -1590,11 +1595,6 @@ ksocknal_module_init (void)
         INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
         INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
 
-        spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock);
-        INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list);
-        INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list);
-        init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq);
-
         spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
         INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
         INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
@@ -1614,7 +1614,7 @@ ksocknal_module_init (void)
                      sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
         if (ksocknal_data.ksnd_schedulers == NULL) {
                 ksocknal_module_fini ();
-                RETURN(-ENOMEM);
+                return (-ENOMEM);
         }
 
         for (i = 0; i < SOCKNAL_N_SCHED; i++) {
@@ -1629,35 +1629,11 @@ ksocknal_module_init (void)
                 init_waitqueue_head (&kss->kss_waitq);
         }
 
-        CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t),
-                sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
-        PORTAL_ALLOC(ksocknal_data.ksnd_ltxs,
-                     sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS));
-        if (ksocknal_data.ksnd_ltxs == NULL) {
-                ksocknal_module_fini ();
-                return (-ENOMEM);
-        }
-
-        /* Deterministic bugs please */
-        memset (ksocknal_data.ksnd_ltxs, 0xeb,
-                sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
-        for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) {
-                ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i];
-
-                ltx->ltx_tx.tx_hdr = &ltx->ltx_hdr;
-                ltx->ltx_idle = i < SOCKNAL_NLTXS ?
-                                &ksocknal_data.ksnd_idle_ltx_list :
-                                &ksocknal_data.ksnd_idle_nblk_ltx_list;
-                list_add (&ltx->ltx_tx.tx_list, ltx->ltx_idle);
-        }
-
         rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni);
         if (rc != 0) {
                 CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
                 ksocknal_module_fini ();
-                RETURN (rc);
+                return (rc);
         }
         PtlNIDebug(ksocknal_ni, ~0);
 
@@ -1670,7 +1646,7 @@ ksocknal_module_init (void)
                         CERROR("Can't spawn socknal scheduler[%d]: %d\n",
                                i, rc);
                         ksocknal_module_fini ();
-                        RETURN (rc);
+                        return (rc);
                 }
         }
 
@@ -1679,7 +1655,7 @@ ksocknal_module_init (void)
                 if (rc != 0) {
                         CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
                         ksocknal_module_fini ();
-                        RETURN (rc);
+                        return (rc);
                 }
         }
 
@@ -1687,7 +1663,7 @@ ksocknal_module_init (void)
         if (rc != 0) {
                 CERROR ("Can't spawn socknal reaper: %d\n", rc);
                 ksocknal_module_fini ();
-                RETURN (rc);
+                return (rc);
         }
 
         rc = kpr_register(&ksocknal_data.ksnd_router,
@@ -1698,23 +1674,15 @@ ksocknal_module_init (void)
         } else {
                 /* Only allocate forwarding buffers if I'm on a gateway */
 
-                PORTAL_ALLOC(ksocknal_data.ksnd_fmbs,
-                             sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
-                                                    SOCKNAL_LARGE_FWD_NMSGS));
-                if (ksocknal_data.ksnd_fmbs == NULL) {
-                        ksocknal_module_fini ();
-                        RETURN(-ENOMEM);
-                }
-
-                /* NULL out buffer pointers etc */
-                memset(ksocknal_data.ksnd_fmbs, 0,
-                       sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
-                                              SOCKNAL_LARGE_FWD_NMSGS));
-
                 for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
                                  SOCKNAL_LARGE_FWD_NMSGS); i++) {
-                        ksock_fmb_t *fmb =
-                                &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i];
+                        ksock_fmb_t *fmb;
+                        
+                        PORTAL_ALLOC(fmb, sizeof(*fmb));
+                        if (fmb == NULL) {
+                                ksocknal_module_fini();
+                                return (-ENOMEM);
+                        }
 
                         if (i < SOCKNAL_SMALL_FWD_NMSGS) {
                                 fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
@@ -1724,7 +1692,6 @@ ksocknal_module_init (void)
                                 fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
                         }
 
-                        LASSERT (fmb->fmb_npages > 0);
                         for (j = 0; j < fmb->fmb_npages; j++) {
                                 fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
 
@@ -1733,8 +1700,7 @@ ksocknal_module_init (void)
                                         return (-ENOMEM);
                                 }
 
-                                LASSERT(page_address (fmb->fmb_pages[j]) !=
-                                        NULL);
+                                LASSERT(page_address(fmb->fmb_pages[j]) != NULL);
                         }
 
                         list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
index 227a24f..9dbe415 100644 (file)
@@ -82,9 +82,6 @@
 
 #define SOCKNAL_PEER_HASH_SIZE   101            /* # peer lists */
 
-#define SOCKNAL_NLTXS           128             /* # normal transmit messages */
-#define SOCKNAL_NNBLK_LTXS     128             /* # transmit messages reserved if can't block */
-
 #define SOCKNAL_SMALL_FWD_NMSGS        128             /* # small messages I can be forwarding at any time */
 #define SOCKNAL_LARGE_FWD_NMSGS 64              /* # large messages I can be forwarding at any time */
 
 typedef struct                                  /* pool of forwarding buffers */
 {
         spinlock_t        fmp_lock;             /* serialise */
-        struct list_head  fmp_idle_fmbs;        /* buffers waiting for a connection */
+        struct list_head  fmp_idle_fmbs;        /* free buffers */
         struct list_head  fmp_blocked_conns;    /* connections waiting for a buffer */
+        int               fmp_nactive_fmbs;     /* # buffers in use */
 } ksock_fmb_pool_t;
 
 
@@ -164,16 +162,10 @@ typedef struct {
 
         kpr_router_t      ksnd_router;          /* THE router */
 
-        void             *ksnd_fmbs;            /* all the pre-allocated FMBs */
         ksock_fmb_pool_t  ksnd_small_fmp;       /* small message forwarding buffers */
         ksock_fmb_pool_t  ksnd_large_fmp;       /* large message forwarding buffers */
 
-        void             *ksnd_ltxs;            /* all the pre-allocated LTXs */
-        spinlock_t        ksnd_idle_ltx_lock;   /* serialise ltx alloc/free */
-        struct list_head  ksnd_idle_ltx_list;   /* where to get an idle LTX */
-        struct list_head  ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
-        wait_queue_head_t ksnd_idle_ltx_waitq;  /* where to block for an idle LTX */
-        int               ksnd_active_ltxs;     /* #active ltxs */
+        atomic_t          ksnd_nactive_ltxs;    /* #active ltxs */
 
         struct list_head  ksnd_deathrow_conns;  /* conns to be closed */
         struct list_head  ksnd_zombie_conns;    /* conns to be freed */
@@ -233,25 +225,15 @@ typedef struct                                  /* transmit packet */
 #define KSOCK_ZCCD_2_TX(ptr)   list_entry (ptr, ksock_tx_t, tx_zccd)
 /* network zero copy callback descriptor embedded in ksock_tx_t */
 
-/* space for the tx frag descriptors: hdr is always 1 iovec
- * and payload is PTL_MD_MAX of either type. */
-typedef struct
-{
-        struct iovec            hdr;
-        union {
-                struct iovec    iov[PTL_MD_MAX_IOV];
-                ptl_kiov_t      kiov[PTL_MD_MAX_IOV];
-        }                       payload;
-} ksock_txiovspace_t;
-
 typedef struct                                  /* locally transmitted packet */
 {
         ksock_tx_t              ltx_tx;         /* send info */
-        struct list_head       *ltx_idle;       /* where to put when idle */
         void                   *ltx_private;    /* lib_finalize() callback arg */
         void                   *ltx_cookie;     /* lib_finalize() callback arg */
-        ksock_txiovspace_t      ltx_iov_space;  /* where to stash frag descriptors */
         ptl_hdr_t               ltx_hdr;        /* buffer for packet header */
+        int                     ltx_desc_size;  /* bytes allocated for this desc */
+        struct iovec            ltx_iov[1];     /* iov for hdr + payload */
+        ptl_kiov_t              ltx_kiov[0];    /* kiov for payload */
 } ksock_ltx_t;
 
 #define KSOCK_TX_2_KPR_FWD_DESC(ptr)    list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
index 6ea4fa8..22345fe 100644 (file)
@@ -129,60 +129,11 @@ ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
         return 0;
 }
 
-ksock_ltx_t *
-ksocknal_get_ltx (int may_block)
-{
-        unsigned long flags;
-        ksock_ltx_t *ltx = NULL;
-
-        for (;;) {
-                spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
-                if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) {
-                        ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next,
-                                         ksock_ltx_t, ltx_tx.tx_list);
-                        list_del (&ltx->ltx_tx.tx_list);
-                        ksocknal_data.ksnd_active_ltxs++;
-                        break;
-                }
-
-                if (!may_block) {
-                        if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) {
-                                ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next,
-                                                 ksock_ltx_t, ltx_tx.tx_list);
-                                list_del (&ltx->ltx_tx.tx_list);
-                                ksocknal_data.ksnd_active_ltxs++;
-                        }
-                        break;
-                }
-
-                spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock,
-                                       flags);
-
-                wait_event (ksocknal_data.ksnd_idle_ltx_waitq,
-                            !list_empty (&ksocknal_data.ksnd_idle_ltx_list));
-        }
-
-        spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
-        return (ltx);
-}
-
 void
-ksocknal_put_ltx (ksock_ltx_t *ltx)
+ksocknal_free_ltx (ksock_ltx_t *ltx)
 {
-        unsigned long   flags;
-        
-        spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
-        ksocknal_data.ksnd_active_ltxs--;
-        list_add_tail (&ltx->ltx_tx.tx_list, ltx->ltx_idle);
-
-        /* normal tx desc => wakeup anyone blocking for one */
-        if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list)
-                wake_up (&ksocknal_data.ksnd_idle_ltx_waitq);
-
-        spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+        atomic_dec(&ksocknal_data.ksnd_nactive_ltxs);
+        PORTAL_FREE(ltx, ltx->ltx_desc_size);
 }
 
 #if SOCKNAL_ZC
@@ -364,7 +315,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 }
 
 int
-ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         /* Return 0 on success, < 0 on error.
          * caller checks tx_resid to determine progress/completion */
@@ -377,17 +328,14 @@ ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx)
         }
 
         rc = ksocknal_getconnsock (conn);
-        if (rc != 0)
+        if (rc != 0) {
+                LASSERT (conn->ksnc_closing);
                 return (rc);
+        }
 
         for (;;) {
                 LASSERT (tx->tx_resid != 0);
 
-                if (conn->ksnc_closing) {
-                        rc = -ESHUTDOWN;
-                        break;
-                }
-
                 if (tx->tx_niov != 0)
                         rc = ksocknal_send_iov (conn, tx);
                 else
@@ -554,7 +502,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn)
 }
 
 int
-ksocknal_recvmsg (ksock_conn_t *conn) 
+ksocknal_receive (ksock_conn_t *conn) 
 {
         /* Return 1 on success, 0 on EOF, < 0 on error.
          * Caller checks ksnc_rx_nob_wanted to determine
@@ -568,15 +516,12 @@ ksocknal_recvmsg (ksock_conn_t *conn)
         }
 
         rc = ksocknal_getconnsock (conn);
-        if (rc != 0)
+        if (rc != 0) {
+                LASSERT (conn->ksnc_closing);
                 return (rc);
+        }
 
         for (;;) {
-                if (conn->ksnc_closing) {
-                        rc = -ESHUTDOWN;
-                        break;
-                }
-
                 if (conn->ksnc_rx_niov != 0)
                         rc = ksocknal_recv_iov (conn);
                 else
@@ -665,7 +610,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch)
 
         lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie);
 
-        ksocknal_put_ltx (ltx);
+        ksocknal_free_ltx (ltx);
         EXIT;
 }
 
@@ -696,7 +641,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         int            rc;
        
-        rc = ksocknal_sendmsg (conn, tx);
+        rc = ksocknal_transmit (conn, tx);
 
         CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
         LASSERT (rc != -EAGAIN);
@@ -840,13 +785,17 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
         unsigned long  flags;
         ksock_sched_t *sched = conn->ksnc_scheduler;
 
-        /* called holding global lock (read or irq-write) */
-
+        /* called holding global lock (read or irq-write) and caller may
+         * not have dropped this lock between finding conn and calling me,
+         * so we don't need the {get,put}connsock dance to deref
+         * ksnc_sock... */
+        LASSERT(!conn->ksnc_closing);
+        LASSERT(tx->tx_resid == tx->tx_nob);
+        
         CDEBUG (D_NET, "Sending to "LPX64" on port %d\n", 
                 conn->ksnc_peer->ksnp_nid, conn->ksnc_port);
 
         atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
-        tx->tx_resid = tx->tx_nob;
         tx->tx_conn = conn;
 
 #if SOCKNAL_ZC
@@ -854,7 +803,6 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
         /* NB this sets 1 ref on zccd, so the callback can only occur after
          * I've released this ref. */
 #endif
-
         spin_lock_irqsave (&sched->kss_lock, flags);
 
         conn->ksnc_tx_deadline = jiffies + 
@@ -960,6 +908,8 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
                 tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
 
         tx->tx_conn = NULL;                     /* only set when assigned a conn */
+        tx->tx_resid = tx->tx_nob;
+        tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base;
 
         g_lock = &ksocknal_data.ksnd_global_lock;
         read_lock (g_lock);
@@ -1024,115 +974,125 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
         return (-EHOSTUNREACH);
 }
 
-ksock_ltx_t *
-ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, 
-                    ptl_hdr_t *hdr, int type)
+int
+ksocknal_sendmsg(nal_cb_t     *nal, 
+                 void         *private, 
+                 lib_msg_t    *cookie,
+                 ptl_hdr_t    *hdr, 
+                 int           type, 
+                 ptl_nid_t     nid, 
+                 ptl_pid_t     pid,
+                 unsigned int  payload_niov, 
+                 struct iovec *payload_iov, 
+                 ptl_kiov_t   *payload_kiov,
+                 size_t        payload_nob)
 {
         ksock_ltx_t  *ltx;
+        int           desc_size;
+        int           rc;
+
+        /* NB 'private' is different depending on what we're sending.
+         * Just ignore it... */
+
+        CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
+               " pid %d\n", payload_nob, payload_niov, nid , pid);
 
-        /* I may not block for a transmit descriptor if I might block the
-         * receiver, or an interrupt handler. */
-        ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK ||
-                                  type == PTL_MSG_REPLY ||
-                                  in_interrupt ()));
+        LASSERT (payload_nob == 0 || payload_niov > 0);
+        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+        /* It must be OK to kmap() if required */
+        LASSERT (payload_kiov == NULL || !in_interrupt ());
+        /* payload is either all vaddrs or all pages */
+        LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+        
+        if (payload_iov != NULL)
+                desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]);
+        else
+                desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]);
+        
+        if (in_interrupt() ||
+            type == PTL_MSG_ACK ||
+            type == PTL_MSG_REPLY) {
+                /* Can't block if in interrupt or responding to an incoming
+                 * message */
+                PORTAL_ALLOC_ATOMIC(ltx, desc_size);
+        } else {
+                PORTAL_ALLOC(ltx, desc_size);
+        }
+        
         if (ltx == NULL) {
-                CERROR ("Can't allocate tx desc\n");
-                return (NULL);
+                CERROR("Can't allocate tx desc type %d size %d %s\n",
+                       type, desc_size, in_interrupt() ? "(intr)" : "");
+                return (PTL_NOSPACE);
         }
 
-        /* Init local send packet (storage for hdr, finalize() args) */
+        atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
+
+        ltx->ltx_desc_size = desc_size;
+        
+        /* We always have 1 mapped frag for the header */
+        ltx->ltx_tx.tx_iov = ltx->ltx_iov;
+        ltx->ltx_iov[0].iov_base = &ltx->ltx_hdr;
+        ltx->ltx_iov[0].iov_len = sizeof(*hdr);
         ltx->ltx_hdr = *hdr;
+        
         ltx->ltx_private = private;
         ltx->ltx_cookie = cookie;
         
-        /* Init common ltx_tx */
         ltx->ltx_tx.tx_isfwd = 0;
-        ltx->ltx_tx.tx_nob = sizeof (*hdr);
-
-        /* We always have 1 mapped frag for the header */
-        ltx->ltx_tx.tx_niov = 1;
-        ltx->ltx_tx.tx_iov = &ltx->ltx_iov_space.hdr;
-        ltx->ltx_tx.tx_iov[0].iov_base = &ltx->ltx_hdr;
-        ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
-
-        ltx->ltx_tx.tx_kiov  = NULL;
-        ltx->ltx_tx.tx_nkiov = 0;
+        ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob;
 
-        return (ltx);
-}
-
-int
-ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
-               ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-               unsigned int payload_niov, struct iovec *payload_iov,
-               size_t payload_len)
-{
-        ksock_ltx_t  *ltx;
-        int           rc;
+        if (payload_iov != NULL) {
+                /* payload is all mapped */
+                ltx->ltx_tx.tx_kiov  = NULL;
+                ltx->ltx_tx.tx_nkiov = 0;
 
-        /* NB 'private' is different depending on what we're sending.
-         * Just ignore it until we can rely on it
-         */
+                ltx->ltx_tx.tx_niov = 1 + payload_niov;
 
-        CDEBUG(D_NET,
-               "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64
-               " pid %d\n", payload_len, payload_niov, nid, pid);
+                memcpy(ltx->ltx_iov + 1, payload_iov,
+                       payload_niov * sizeof (*payload_iov));
 
-        ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
-        if (ltx == NULL)
-                return (PTL_FAIL);
+        } else {
+                /* payload is all pages */
+                ltx->ltx_tx.tx_kiov = ltx->ltx_kiov;
+                ltx->ltx_tx.tx_nkiov = payload_niov;
 
-        /* append the payload_iovs to the one pointing at the header */
-        LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
-        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+                ltx->ltx_tx.tx_niov = 1;
 
-        memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov,
-                payload_niov * sizeof (*payload_iov));
-        ltx->ltx_tx.tx_niov = 1 + payload_niov;
-        ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+                memcpy(ltx->ltx_kiov, payload_kiov, 
+                       payload_niov * sizeof (*payload_kiov));
+        }
 
-        rc = ksocknal_launch_packet (&ltx->ltx_tx, nid);
+        rc = ksocknal_launch_packet(&ltx->ltx_tx, nid);
         if (rc == 0)
                 return (PTL_OK);
         
-        ksocknal_put_ltx (ltx);
+        ksocknal_free_ltx(ltx);
         return (PTL_FAIL);
 }
 
 int
+ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+               ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+               unsigned int payload_niov, struct iovec *payload_iov,
+               size_t payload_len)
+{
+        return (ksocknal_sendmsg(nal, private, cookie,
+                                 hdr, type, nid, pid,
+                                 payload_niov, payload_iov, NULL,
+                                 payload_len));
+}
+
+int
 ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, 
                      ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                     unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len)
+                     unsigned int payload_niov, ptl_kiov_t *payload_kiov, 
+                     size_t payload_len)
 {
-        ksock_ltx_t *ltx;
-        int          rc;
-
-        /* NB 'private' is different depending on what we're sending.
-         * Just ignore it until we can rely on it */
-
-        CDEBUG(D_NET,
-               "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n",
-               payload_len, payload_niov, nid, pid);
-
-        ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
-        if (ltx == NULL)
-                return (PTL_FAIL);
-
-        LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
-        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-        
-        ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov;
-        memcpy (ltx->ltx_tx.tx_kiov, payload_iov, 
-                payload_niov * sizeof (*payload_iov));
-        ltx->ltx_tx.tx_nkiov = payload_niov;
-        ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
-
-        rc = ksocknal_launch_packet (&ltx->ltx_tx, nid);
-        if (rc == 0)
-                return (PTL_OK);
-
-        ksocknal_put_ltx (ltx);
-        return (PTL_FAIL);
+        return (ksocknal_sendmsg(nal, private, cookie,
+                                 hdr, type, nid, pid,
+                                 payload_niov, NULL, payload_kiov,
+                                 payload_len));
 }
 
 void
@@ -1155,7 +1115,6 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         tx->tx_iov   = fwd->kprfd_iov;
         tx->tx_nkiov = 0;
         tx->tx_kiov  = NULL;
-        tx->tx_hdr   = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base;
 
         rc = ksocknal_launch_packet (tx, nid);
         if (rc != 0)
@@ -1204,6 +1163,7 @@ ksocknal_fmb_callback (void *arg, int error)
         spin_lock_irqsave (&fmp->fmp_lock, flags);
 
         list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs);
+        fmp->fmp_nactive_fmbs--;
 
         if (!list_empty (&fmp->fmp_blocked_conns)) {
                 conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next,
@@ -1242,7 +1202,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn)
         ksock_fmb_t      *fmb;
 
         LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
-        LASSERT (ksocknal_data.ksnd_fmbs != NULL);
+        LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
 
         if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
                 pool = &ksocknal_data.ksnd_small_fmp;
@@ -1255,6 +1215,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn)
                 fmb = list_entry(pool->fmp_idle_fmbs.next,
                                  ksock_fmb_t, fmb_list);
                 list_del (&fmb->fmb_list);
+                pool->fmp_nactive_fmbs++;
                 spin_unlock_irqrestore (&pool->fmp_lock, flags);
 
                 return (fmb);
@@ -1397,7 +1358,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn)
                 return;
         }
 
-        if (ksocknal_data.ksnd_fmbs == NULL) {        /* not forwarding */
+        if (!kpr_routing(&ksocknal_data.ksnd_router)) {    /* not forwarding */
                 CERROR("dropping packet from "LPX64" (%s) for "LPX64
                        " (%s): not forwarding\n",
                        src_nid, portals_nid2str(TCPNAL, src_nid, str),
@@ -1525,9 +1486,11 @@ ksocknal_process_receive (ksock_conn_t *conn)
 
         LASSERT (conn->ksnc_rx_nob_wanted > 0);
 
-        rc = ksocknal_recvmsg(conn);
+        rc = ksocknal_receive(conn);
 
         if (rc <= 0) {
+                LASSERT (rc != -EAGAIN);
+
                 if (rc == 0)
                         CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n",
                                conn, conn->ksnc_peer->ksnp_nid,
@@ -1766,9 +1729,9 @@ int ksocknal_scheduler (void *arg)
                          * kss_lock. */
                         conn->ksnc_tx_ready = 0;
                         spin_unlock_irqrestore (&sched->kss_lock, flags);
-                        
+
                         rc = ksocknal_process_transmit(conn, tx);
-                        
+
                         spin_lock_irqsave (&sched->kss_lock, flags);
 
                         if (rc != -EAGAIN) {
@@ -1851,7 +1814,7 @@ ksocknal_data_ready (struct sock *sk, int n)
         read_lock (&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
-        if (conn == NULL) {             /* raced with ksocknal_close_sock */
+        if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
                 LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
                 sk->sk_data_ready (sk, n);
         } else {
@@ -1900,7 +1863,7 @@ ksocknal_write_space (struct sock *sk)
                (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
                                       " empty" : " queued"));
 
-        if (conn == NULL) {             /* raced with ksocknal_close_sock */
+        if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
                 LASSERT (sk->sk_write_space != &ksocknal_write_space);
                 sk->sk_write_space (sk);
 
@@ -2136,7 +2099,7 @@ ksocknal_setup_sock (struct socket *sock)
         int             option;
         struct linger   linger;
 
-        sock->sk->allocation = GFP_NOFS;
+        sock->sk->allocation = GFP_MEMALLOC;
 
         /* Ensure this socket aborts active sends immediately when we close
          * it. */
@@ -2421,6 +2384,8 @@ ksocknal_autoconnectd (void *arg)
         kportal_daemonize (name);
         kportal_blockallsigs ();
 
+        current->flags |= PF_MEMALLOC;
+
         spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
 
         while (!ksocknal_data.ksnd_shuttingdown) {
@@ -2548,6 +2513,8 @@ ksocknal_reaper (void *arg)
 
         init_waitqueue_entry (&wait, current);
 
+        current->flags |= PF_MEMALLOC;
+
         spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
 
         while (!ksocknal_data.ksnd_shuttingdown) {
index 6e2c1ca..ad2c966 100644 (file)
@@ -974,18 +974,14 @@ char *portals_debug_dumpstack(void)
         return buf;
 }
 
-#elif defined(CONFIG_X86)
+#elif defined(__i386__)
 
 extern int is_kernel_text_address(unsigned long addr);
 extern int lookup_symbol(unsigned long address, char *buf, int buflen);
 
 char *portals_debug_dumpstack(void)
 {
-#if defined(__x86_64__)
-        unsigned long esp = current->thread.rsp;
-#else
         unsigned long esp = current->thread.esp;
-#endif
         unsigned long *stack = (unsigned long *)&esp;
         int size;
         unsigned long addr;
index 89fe8f7..a24423e 100644 (file)
@@ -45,9 +45,6 @@
 
 #if LWT_SUPPORT
 
-#define LWT_MEMORY              (1<<20)         /* 1Mb of trace memory */
-#define LWT_MAX_CPUS             4
-
 int         lwt_enabled;
 int         lwt_pages_per_cpu;
 lwt_cpu_t   lwt_cpus[LWT_MAX_CPUS];
@@ -123,7 +120,8 @@ lwt_control (int enable, int clear)
 }
 
 int
-lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size)
+lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, 
+              void *user_ptr, int user_size)
 {
         const int    events_per_page = PAGE_SIZE / sizeof(lwt_event_t);
         const int    bytes_per_page = events_per_page * sizeof(lwt_event_t);
@@ -136,7 +134,8 @@ lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size)
 
         *ncpu = num_online_cpus();
         *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page;
-
+        *now = get_cycles();
+        
         if (user_ptr == NULL)
                 return (0);
 
index 7c0cafc..55e1935 100644 (file)
@@ -83,6 +83,115 @@ kportal_daemonize (char *str)
 }
 
 void
+kportal_memhog_free (struct portals_device_userstate *pdu)
+{
+        struct page **level0p = &pdu->pdu_memhog_root_page;
+        struct page **level1p;
+        struct page **level2p;
+        int           count1;
+        int           count2;
+        
+        if (*level0p != NULL) {
+
+                level1p = (struct page **)page_address(*level0p);
+                count1 = 0;
+                
+                while (count1 < PAGE_SIZE/sizeof(struct page *) &&
+                       *level1p != NULL) {
+
+                        level2p = (struct page **)page_address(*level1p);
+                        count2 = 0;
+                        
+                        while (count2 < PAGE_SIZE/sizeof(struct page *) &&
+                               *level2p != NULL) {
+                                
+                                __free_page(*level2p);
+                                pdu->pdu_memhog_pages--;
+                                level2p++;
+                                count2++;
+                        }
+                        
+                        __free_page(*level1p);
+                        pdu->pdu_memhog_pages--;
+                        level1p++;
+                        count1++;
+                }
+                
+                __free_page(*level0p);
+                pdu->pdu_memhog_pages--;
+
+                *level0p = NULL;
+        }
+        
+        LASSERT (pdu->pdu_memhog_pages == 0);
+}
+
+int
+kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags)
+{
+        struct page **level0p;
+        struct page **level1p;
+        struct page **level2p;
+        int           count1;
+        int           count2;
+        
+        LASSERT (pdu->pdu_memhog_pages == 0);
+        LASSERT (pdu->pdu_memhog_root_page == NULL);
+
+        if (npages < 0)
+                return -EINVAL;
+
+        if (npages == 0)
+                return 0;
+
+        level0p = &pdu->pdu_memhog_root_page;
+        *level0p = alloc_page(flags);
+        if (*level0p == NULL)
+                return -ENOMEM;
+        pdu->pdu_memhog_pages++;
+
+        level1p = (struct page **)page_address(*level0p);
+        count1 = 0;
+        memset(level1p, 0, PAGE_SIZE);
+        
+        while (pdu->pdu_memhog_pages < npages &&
+               count1 < PAGE_SIZE/sizeof(struct page *)) {
+
+                if (signal_pending(current))
+                        return (-EINTR);
+                
+                *level1p = alloc_page(flags);
+                if (*level1p == NULL)
+                        return -ENOMEM;
+                pdu->pdu_memhog_pages++;
+
+                level2p = (struct page **)page_address(*level1p);
+                count2 = 0;
+                memset(level2p, 0, PAGE_SIZE);
+                
+                while (pdu->pdu_memhog_pages < npages &&
+                       count2 < PAGE_SIZE/sizeof(struct page *)) {
+                        
+                        if (signal_pending(current))
+                                return (-EINTR);
+
+                        *level2p = alloc_page(flags);
+                        if (*level2p == NULL)
+                                return (-ENOMEM);
+                        pdu->pdu_memhog_pages++;
+                        
+                        level2p++;
+                        count2++;
+                }
+                
+                level1p++;
+                count1++;
+        }
+
+        return 0;
+}
+
+void
 kportal_blockallsigs ()
 {
         unsigned long  flags;
@@ -96,22 +205,39 @@ kportal_blockallsigs ()
 /* called when opening /dev/device */
 static int kportal_psdev_open(struct inode * inode, struct file * file)
 {
+        struct portals_device_userstate *pdu;
         ENTRY;
-
+        
         if (!inode)
                 RETURN(-EINVAL);
+
         PORTAL_MODULE_USE;
+
+        PORTAL_ALLOC(pdu, sizeof(*pdu));
+        if (pdu != NULL) {
+                pdu->pdu_memhog_pages = 0;
+                pdu->pdu_memhog_root_page = NULL;
+        }
+        file->private_data = pdu;
+        
         RETURN(0);
 }
 
 /* called when closing /dev/device */
 static int kportal_psdev_release(struct inode * inode, struct file * file)
 {
+        struct portals_device_userstate *pdu;
         ENTRY;
 
         if (!inode)
                 RETURN(-EINVAL);
 
+        pdu = file->private_data;
+        if (pdu != NULL) {
+                kportal_memhog_free(pdu);
+                PORTAL_FREE(pdu, sizeof(*pdu));
+        }
+        
         PORTAL_MODULE_UNUSE;
         RETURN(0);
 }
@@ -514,7 +640,8 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                 break;
                 
         case IOC_PORTAL_LWT_SNAPSHOT:
-                err = lwt_snapshot (&data->ioc_count, &data->ioc_misc,
+                err = lwt_snapshot (&data->ioc_nid,
+                                    &data->ioc_count, &data->ioc_misc,
                                     data->ioc_pbuf1, data->ioc_plen1);
                 if (err == 0 &&
                     copy_to_user((char *)arg, data, sizeof (*data)))
@@ -528,7 +655,22 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                     copy_to_user((char *)arg, data, sizeof (*data)))
                         err = -EFAULT;
                 break;
-#endif                        
+#endif
+        case IOC_PORTAL_MEMHOG:
+                if (!capable (CAP_SYS_ADMIN))
+                        err = -EPERM;
+                else if (file->private_data == NULL) {
+                        err = -EINVAL;
+                } else {
+                        kportal_memhog_free(file->private_data);
+                        err = kportal_memhog_alloc(file->private_data,
+                                                   data->ioc_count,
+                                                   data->ioc_flags);
+                        if (err != 0)
+                                kportal_memhog_free(file->private_data);
+                }
+                break;
+
         default:
                 err = -EINVAL;
                 break;
@@ -612,8 +754,8 @@ static int init_kportals_module(void)
  cleanup_lwt:
 #if LWT_SUPPORT
         lwt_fini();
-#endif
  cleanup_debug:
+#endif
         portals_debug_cleanup();
         return rc;
 }
index 3325892..b46ee16 100644 (file)
@@ -1371,7 +1371,8 @@ lwt_control(int enable, int clear)
 }
 
 static int
-lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size)
+lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, 
+             lwt_event_t *events, int size)
 {
         struct portal_ioctl_data data;
         int                      rc;
@@ -1390,6 +1391,9 @@ lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size)
         LASSERT (data.ioc_count != 0);
         LASSERT (data.ioc_misc != 0);
         
+        if (now != NULL)
+                *now = data.ioc_nid;
+
         if (ncpu != NULL)
                 *ncpu = data.ioc_count;
 
@@ -1499,14 +1503,13 @@ get_cycles_per_usec ()
 int
 jt_ptl_lwt(int argc, char **argv)
 {
-#define MAX_CPUS 8
         int             ncpus;
         int             totalspace;
         int             nevents_per_cpu;
         lwt_event_t    *events;
-        lwt_event_t    *cpu_event[MAX_CPUS + 1];
-        lwt_event_t    *next_event[MAX_CPUS];
-        lwt_event_t    *first_event[MAX_CPUS];
+        lwt_event_t    *cpu_event[LWT_MAX_CPUS + 1];
+        lwt_event_t    *next_event[LWT_MAX_CPUS];
+        lwt_event_t    *first_event[LWT_MAX_CPUS];
         int             cpu;
         lwt_event_t    *e;
         int             rc;
@@ -1514,6 +1517,9 @@ jt_ptl_lwt(int argc, char **argv)
         double          mhz;
         cycles_t        t0;
         cycles_t        tlast;
+        cycles_t        tnow;
+        struct timeval  tvnow;
+        int             printed_date = 0;
         FILE           *f = stdout;
 
         if (argc < 2 ||
@@ -1541,11 +1547,12 @@ jt_ptl_lwt(int argc, char **argv)
                 return (0);
         }
                 
-        if (lwt_snapshot(&ncpus, &totalspace, NULL, 0) != 0)
+        if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0)
                 return (-1);
 
-        if (ncpus > MAX_CPUS) {
-                fprintf(stderr, "Too many cpus: %d (%d)\n", ncpus, MAX_CPUS);
+        if (ncpus > LWT_MAX_CPUS) {
+                fprintf(stderr, "Too many cpus: %d (%d)\n", 
+                        ncpus, LWT_MAX_CPUS);
                 return (-1);
         }
 
@@ -1560,11 +1567,14 @@ jt_ptl_lwt(int argc, char **argv)
                 return (-1);
         }
 
-        if (lwt_snapshot(NULL, NULL, events, totalspace)) {
+        if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) {
                 free(events);
                 return (-1);
         }
 
+        /* we want this time to be sampled at snapshot time */
+        gettimeofday(&tvnow, NULL);
+
         if (argc > 2) {
                 f = fopen (argv[2], "w");
                 if (f == NULL) {
@@ -1645,6 +1655,17 @@ jt_ptl_lwt(int argc, char **argv)
                 
                 if (t0 <= next_event[cpu]->lwte_when) {
                         /* on or after the first event */
+                        if (!printed_date) {
+                                cycles_t du = (tnow - t0) / mhz;
+                                time_t   then = tvnow.tv_sec - du/1000000;
+                                
+                                if (du % 1000000 > tvnow.tv_usec)
+                                        then--;
+
+                                fprintf(f, "%s", ctime(&then));
+                                printed_date = 1;
+                        }
+                        
                         rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]);
                         if (rc != 0)
                                 break;
@@ -1666,5 +1687,48 @@ jt_ptl_lwt(int argc, char **argv)
 
         free(events);
         return (0);
-#undef MAX_CPUS
 }
+
+int jt_ptl_memhog(int argc, char **argv)
+{
+        static int                gfp = 0;        /* sticky! */
+
+        struct portal_ioctl_data  data;
+        int                       rc;
+        int                       count;
+        char                     *end;
+        
+        if (argc < 2)  {
+                fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]);
+                return 0;
+        }
+
+        count = strtol(argv[1], &end, 0);
+        if (count < 0 || *end != 0) {
+                fprintf(stderr, "Can't parse page count '%s'\n", argv[1]);
+                return -1;
+        }
+
+        if (argc >= 3) {
+                rc = strtol(argv[2], &end, 0);
+                if (*end != 0) {
+                        fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]);
+                        return -1;
+                }
+                gfp = rc;
+        }
+        
+        PORTAL_IOC_INIT(data);
+        data.ioc_count = count;
+        data.ioc_flags = gfp;
+        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data);
+
+        if (rc != 0) {
+                fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno));
+                return -1;
+        }
+        
+        printf("memhog %d OK\n", count);
+        return 0;
+}
+
index 60f2d4e..1d17038 100644 (file)
@@ -250,7 +250,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
 
         spin_lock_init(&request->rq_lock);
         INIT_LIST_HEAD(&request->rq_list);
-        init_waitqueue_head(&request->rq_wait_for_rep);
+        init_waitqueue_head(&request->rq_reply_waitq);
         request->rq_xid = ptlrpc_next_xid();
         atomic_set(&request->rq_refcount, 1);
 
@@ -1127,7 +1127,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
                          * the timeout lets us CERROR for visibility */
                         struct l_wait_info lwi = LWI_TIMEOUT(10*HZ, NULL, NULL);
 
-                        rc = l_wait_event (request->rq_wait_for_rep,
+                        rc = l_wait_event (request->rq_reply_waitq,
                                            request->rq_replied, &lwi);
                         LASSERT(rc == 0 || rc == -ETIMEDOUT);
                         if (rc == 0) {
@@ -1228,7 +1228,7 @@ void ptlrpc_resend_req(struct ptlrpc_request *req)
         if (req->rq_set != NULL)
                 wake_up (&req->rq_set->set_waitq);
         else
-                wake_up(&req->rq_wait_for_rep);
+                wake_up(&req->rq_reply_waitq);
         spin_unlock_irqrestore (&req->rq_lock, flags);
 }
 
@@ -1246,7 +1246,7 @@ void ptlrpc_restart_req(struct ptlrpc_request *req)
         if (req->rq_set != NULL)
                 wake_up (&req->rq_set->set_waitq);
         else
-                wake_up(&req->rq_wait_for_rep);
+                wake_up(&req->rq_reply_waitq);
         spin_unlock_irqrestore (&req->rq_lock, flags);
 }
 
@@ -1354,7 +1354,7 @@ restart:
                 DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%d > %d)",
                           current->comm, req->rq_send_state, imp->imp_state);
                 lwi = LWI_INTR(interrupted_request, req);
-                rc = l_wait_event(req->rq_wait_for_rep,
+                rc = l_wait_event(req->rq_reply_waitq,
                                   (req->rq_send_state == imp->imp_state ||
                                    req->rq_err),
                                   &lwi);
@@ -1398,7 +1398,7 @@ restart:
         }
         lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request,
                                req);
-        l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
+        l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi);
         DEBUG_REQ(D_NET, req, "-- done sleeping");
 
         CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:opc "
@@ -1472,7 +1472,7 @@ restart:
         if (req->rq_bulk != NULL) {
                 if (rc >= 0) {                  /* success so far */
                         lwi = LWI_TIMEOUT(timeout, NULL, NULL);
-                        brc = l_wait_event(req->rq_wait_for_rep,
+                        brc = l_wait_event(req->rq_reply_waitq,
                                            ptlrpc_bulk_complete(req->rq_bulk),
                                            &lwi);
                         if (brc != 0) {
@@ -1535,7 +1535,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
 
         CDEBUG(D_OTHER, "-- sleeping\n");
         lwi = LWI_INTR(NULL, NULL); /* XXX needs timeout, nested recovery */
-        l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
+        l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi);
         CDEBUG(D_OTHER, "-- done\n");
 
         // up(&cli->cli_rpc_sem);
@@ -1626,7 +1626,7 @@ void ptlrpc_abort_inflight(struct obd_import *imp)
                         if (req->rq_set != NULL)
                                 wake_up(&req->rq_set->set_waitq);
                         else
-                                wake_up(&req->rq_wait_for_rep);
+                                wake_up(&req->rq_reply_waitq);
                 }
                 spin_unlock (&req->rq_lock);
         }
@@ -1643,7 +1643,7 @@ void ptlrpc_abort_inflight(struct obd_import *imp)
                         if (req->rq_set != NULL)
                                 wake_up(&req->rq_set->set_waitq);
                         else
-                                wake_up(&req->rq_wait_for_rep);
+                                wake_up(&req->rq_reply_waitq);
                 }
                 spin_unlock (&req->rq_lock);
         }
index 7807dcc..01cbce0 100644 (file)
@@ -76,7 +76,7 @@ static int reply_out_callback(ptl_event_t *ev)
                 LASSERT(req->rq_want_ack);
                 spin_lock_irqsave(&req->rq_lock, flags);
                 req->rq_want_ack = 0;
-                wake_up(&req->rq_wait_for_rep);
+                wake_up(&req->rq_reply_waitq);
                 spin_unlock_irqrestore(&req->rq_lock, flags);
         } else {
                 // XXX make sure we understand all events
@@ -122,7 +122,7 @@ int reply_in_callback(ptl_event_t *ev)
                 if (req->rq_set != NULL)
                         wake_up(&req->rq_set->set_waitq);
                 else
-                        wake_up(&req->rq_wait_for_rep);
+                        wake_up(&req->rq_reply_waitq);
                 spin_unlock_irqrestore (&req->rq_lock, flags);
         } else {
                 // XXX make sure we understand all events, including ACKs
@@ -254,7 +254,7 @@ static int bulk_put_sink_callback(ptl_event_t *ev)
         if (desc->bd_req->rq_set != NULL)
                 wake_up (&desc->bd_req->rq_set->set_waitq);
         else
-                wake_up (&desc->bd_req->rq_wait_for_rep);
+                wake_up (&desc->bd_req->rq_reply_waitq);
         spin_unlock_irqrestore (&desc->bd_lock, flags);
 
         RETURN(1);
@@ -304,7 +304,7 @@ static int bulk_get_source_callback(ptl_event_t *ev)
         if (desc->bd_req->rq_set != NULL)
                 wake_up (&desc->bd_req->rq_set->set_waitq);
         else
-                wake_up (&desc->bd_req->rq_wait_for_rep);
+                wake_up (&desc->bd_req->rq_reply_waitq);
         spin_unlock_irqrestore (&desc->bd_lock, flags);
 
         RETURN(1);
index 43e650e..1559403 100644 (file)
@@ -159,8 +159,8 @@ void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry,
                                      struct ptlrpc_service *svc)
 {
         ptlrpc_lprocfs_register(entry, svc->srv_name,
-                                "stats", &svc->svc_procroot, 
-                                &svc->svc_stats);
+                                "stats", &svc->srv_procroot, 
+                                &svc->srv_stats);
 }
 
 void ptlrpc_lprocfs_register_obd(struct obd_device *obddev)
@@ -184,13 +184,13 @@ void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req)
 
 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
 {
-        if (svc->svc_procroot) {
-                lprocfs_remove(svc->svc_procroot);
-                svc->svc_procroot = NULL;
+        if (svc->srv_procroot != NULL) {
+                lprocfs_remove(svc->srv_procroot);
+                svc->srv_procroot = NULL;
         }
-        if (svc->svc_stats) {
-                lprocfs_free_stats(svc->svc_stats);
-                svc->svc_stats = NULL;
+        if (svc->srv_stats) {
+                lprocfs_free_stats(svc->srv_stats);
+                svc->srv_stats = NULL;
         }
 }
 void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd)
index fe8a4cd..fd523a4 100644 (file)
@@ -529,7 +529,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req)
                         if (desc->bd_req->rq_set != NULL)
                                 wq = &req->rq_set->set_waitq;
                         else
-                                wq = &req->rq_wait_for_rep;
+                                wq = &req->rq_reply_waitq;
                         lwi = LWI_TIMEOUT (10 * HZ, NULL, NULL);
                         rc = l_wait_event(*wq, ptlrpc_bulk_complete(desc), &lwi);
                         LASSERT (rc == 0 || rc == -ETIMEDOUT);
@@ -565,7 +565,7 @@ int ptlrpc_reply(struct ptlrpc_request *req)
         req->rq_repmsg->status = req->rq_status;
         req->rq_repmsg->opc = req->rq_reqmsg->opc;
 
-        init_waitqueue_head(&req->rq_wait_for_rep);
+        init_waitqueue_head(&req->rq_reply_waitq);
         rc = ptl_send_buf(req, req->rq_connection, req->rq_svc->srv_rep_portal);
         if (rc != 0) {
                 /* Do what the callback handler would have done */
index 3d5c1ec..ed969fe 100644 (file)
@@ -208,7 +208,7 @@ void ptlrpc_wake_delayed(struct obd_import *imp)
                         wake_up(&req->rq_set->set_waitq);
                 } else {
                         DEBUG_REQ(D_HA, req, "waking:");
-                        wake_up(&req->rq_wait_for_rep);
+                        wake_up(&req->rq_reply_waitq);
                 }
         }
         spin_unlock_irqrestore(&imp->imp_lock, flags);
index 526b35c..9d3ff82 100644 (file)
@@ -206,7 +206,6 @@ static int handle_incoming_request(struct obd_device *obddev,
         spin_lock_init (&request->rq_lock);
         INIT_LIST_HEAD(&request->rq_list);
         request->rq_svc = svc;
-        request->rq_obd = obddev;
         request->rq_xid = event->match_bits;
         request->rq_reqmsg = event->mem_desc.start + event->offset;
         request->rq_reqlen = event->mlength;
@@ -375,15 +374,15 @@ static int ptlrpc_main(void *arg)
 
                 do_gettimeofday(&start_time);
                 total = timeval_sub(&start_time, &event->arrival_time);
-                if (svc->svc_stats != NULL) {
-                        lprocfs_counter_add(svc->svc_stats, PTLRPC_REQWAIT_CNTR,
+                if (svc->srv_stats != NULL) {
+                        lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
                                             total);
-                        lprocfs_counter_add(svc->svc_stats,
+                        lprocfs_counter_add(svc->srv_stats,
                                             PTLRPC_SVCIDLETIME_CNTR,
                                             timeval_sub(&start_time,
                                                         &finish_time));
 #if 0 /* Wait for b_eq branch */
-                        lprocfs_counter_add(svc->svc_stats,
+                        lprocfs_counter_add(svc->srv_stats,
                                             PTLRPC_SVCEQDEPTH_CNTR, 0);
 #endif
                 }
@@ -406,11 +405,11 @@ static int ptlrpc_main(void *arg)
                        "(%ldus total)\n", request->rq_xid, event->initiator.nid,
                        total, timeval_sub(&finish_time, &event->arrival_time));
 
-                if (svc->svc_stats != NULL) {
+                if (svc->srv_stats != NULL) {
                         int opc = opcode_offset(request->rq_reqmsg->opc);
                         if (opc > 0) {
                                 LASSERT(opc < LUSTRE_MAX_OPCODES);
-                                lprocfs_counter_add(svc->svc_stats,
+                                lprocfs_counter_add(svc->srv_stats,
                                                     opc + PTLRPC_LAST_CNTR,
                                                     total);
                         }
index 0e8c3a7..3c4c0ad 100644 (file)
@@ -285,6 +285,9 @@ command_t cmdlist[] = {
          "light-weight tracing\n"
          "usage: lwt start\n"
          "       lwt stop [file]"},
+        {"memhog", jt_ptl_memhog, 0,
+         "memory pressure testing\n"
+         "usage: memhog <page count> [<gfp flags>]"},
                 
         /* User interface commands */
         {"======= control ========", jt_noop, 0, "control commands"},
index f06a5bd..f8375b9 100755 (executable)
@@ -148,7 +148,7 @@ lmc_options = [
     ('route', "Add a new route for the cluster.", PARAM),
     ('router', "Optional flag to mark a node as router."),
     ('gw', "Specify the nid of the gateway for a route.", PARAM),
-    ('gw_cluster_id', "", PARAM, "0"),
+    ('gateway_cluster_id', "", PARAM, "0"),
     ('target_cluster_id', "", PARAM, "0"),
     ('lo', "For a range route, this is the low value nid.", PARAM),
     ('hi', "For a range route, this is a hi value nid.", PARAM,""),
@@ -624,7 +624,7 @@ def add_route(gen, lustre, options):
     node_name = get_option(options, 'node')
     gw_net_type = get_option(options, 'nettype')
     gw = get_option(options, 'gw')
-    gw_cluster_id = get_option(options, 'gw_cluster_id')
+    gw_cluster_id = get_option(options, 'gateway_cluster_id')
     tgt_cluster_id = get_option(options, 'target_cluster_id')
     lo = get_option(options, 'lo')
     hi = get_option(options, 'hi')