-/* $Id: cygwin-ioctl.h,v 1.2 2003/12/03 03:14:43 phil Exp $
+/* $Id: cygwin-ioctl.h,v 1.3 2003/12/03 05:12:41 phil Exp $
*
* linux/ioctl.h for Linux by H.H. Bergman.
*/
if (portal_cerror == 0) \
break; \
CHECK_STACK(CDEBUG_STACK); \
- if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \
+ if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \
(portal_debug & (mask) && \
portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
#define GFP_MEMALLOC 0
#endif
-#define PORTAL_ALLOC(ptr, size) \
+#define PORTAL_ALLOC_GFP(ptr, size, mask) \
do { \
LASSERT (!in_interrupt()); \
if ((size) > PORTAL_VMALLOC_SIZE) \
(ptr) = vmalloc(size); \
else \
- (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC)); \
- if ((ptr) == NULL) \
+ (ptr) = kmalloc((size), (mask)); \
+ if ((ptr) == NULL) { \
CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\
#ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
- else { \
+ CERROR("PORTALS: %d total bytes allocated by portals\n", \
+ atomic_read(&portal_kmemory)); \
+ } else { \
portal_kmem_inc((ptr), (size)); \
memset((ptr), 0, (size)); \
} \
(int)(size), (ptr), atomic_read (&portal_kmemory)); \
} while (0)
+#define PORTAL_ALLOC(ptr, size) \
+ PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC))
+
+#define PORTAL_ALLOC_ATOMIC(ptr, size) \
+ PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC))
+
#define PORTAL_FREE(ptr, size) \
do { \
int s = (size); \
CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \
" '" #ptr "' from slab '" #slab "')\n", __FILE__, \
__LINE__); \
+ CERROR("PORTALS: %d total bytes allocated by portals\n", \
+ atomic_read(&portal_kmemory)); \
} else { \
portal_kmem_inc((ptr), (size)); \
memset((ptr), 0, (size)); \
} \
- CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \
+ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \
(int)(size), (ptr), atomic_read(&portal_kmemory)); \
} while (0)
/******************************************************************************/
/* Light-weight trace
* Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT 1
+#define LWT_SUPPORT 0
+
+#define LWT_MEMORY (64<<20)
+#define LWT_MAX_CPUS 4
typedef struct {
cycles_t lwte_when;
extern int lwt_lookup_string (int *size, char *knlptr,
char *usrptr, int usrsize);
extern int lwt_control (int enable, int clear);
-extern int lwt_snapshot (int *ncpu, int *total_size,
+extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
void *user_ptr, int user_size);
/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
#endif /* __KERNEL__ */
#endif /* LWT_SUPPORT */
+struct portals_device_userstate
+{
+ int pdu_memhog_pages;
+ struct page *pdu_memhog_root_page;
+};
#include <linux/portals_lib.h>
#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long)
#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long)
#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long)
-#define IOC_PORTAL_MAX_NR 41
+#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long)
+#define IOC_PORTAL_MAX_NR 42
enum {
QSWNAL = 1,
lib_eq_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS);
+ lib_eq_t *eq;
+ PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq));
if (eq == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&eq_in_use_count);
- kmem_cache_free(ptl_eq_slab, eq);
+ PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq));
}
static inline lib_md_t *
lib_md_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS);
+ lib_md_t *md;
+ PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md));
if (md == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&md_in_use_count);
- kmem_cache_free(ptl_md_slab, md);
+ PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md));
}
static inline lib_me_t *
lib_me_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS);
+ lib_me_t *me;
+ PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me));
if (me == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&me_in_use_count);
- kmem_cache_free(ptl_me_slab, me);
+ PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me));
}
static inline lib_msg_t *
lib_msg_alloc(nal_cb_t *nal)
{
/* ALWAYS called with statelock held */
- lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+ lib_msg_t *msg;
+ PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg));
if (msg == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&msg_in_use_count);
- kmem_cache_free(ptl_msg_slab, msg);
+ PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg));
}
#endif
lib_eq_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS);
+ lib_eq_t *eq;
+ PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq));
if (eq == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&eq_in_use_count);
- kmem_cache_free(ptl_eq_slab, eq);
+ PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq));
}
static inline lib_md_t *
lib_md_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS);
+ lib_md_t *md;
+ PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md));
if (md == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&md_in_use_count);
- kmem_cache_free(ptl_md_slab, md);
+ PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md));
}
static inline lib_me_t *
lib_me_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS);
+ lib_me_t *me;
+ PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me));
if (me == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&me_in_use_count);
- kmem_cache_free(ptl_me_slab, me);
+ PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me));
}
static inline lib_msg_t *
lib_msg_alloc(nal_cb_t *nal)
{
/* ALWAYS called with statelock held */
- lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+ lib_msg_t *msg;
+ PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg));
if (msg == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&msg_in_use_count);
- kmem_cache_free(ptl_msg_slab, msg);
+ PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg));
}
#endif
int jt_ptl_print_routes (int argc, char **argv);
int jt_ptl_fail_nid (int argc, char **argv);
int jt_ptl_lwt(int argc, char **argv);
+int jt_ptl_memhog(int argc, char **argv);
int dbg_initialize(int argc, char **argv);
int jt_dbg_filter(int argc, char **argv);
int jt_ptl_print_routes (int argc, char **argv);
int jt_ptl_fail_nid (int argc, char **argv);
int jt_ptl_lwt(int argc, char **argv);
+int jt_ptl_memhog(int argc, char **argv);
int dbg_initialize(int argc, char **argv);
int jt_dbg_filter(int argc, char **argv);
}
void
-ksocknal_free_buffers (void)
+ksocknal_free_fmbs (ksock_fmb_pool_t *p)
{
- if (ksocknal_data.ksnd_fmbs != NULL) {
- ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs;
- int i;
- int j;
-
- for (i = 0;
- i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS);
- i++, fmb++)
- for (j = 0; j < fmb->fmb_npages; j++)
- if (fmb->fmb_pages[j] != NULL)
- __free_page (fmb->fmb_pages[j]);
-
- PORTAL_FREE (ksocknal_data.ksnd_fmbs,
- sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
- SOCKNAL_LARGE_FWD_NMSGS));
+ ksock_fmb_t *fmb;
+ int i;
+
+ LASSERT (list_empty(&p->fmp_blocked_conns));
+ LASSERT (p->fmp_nactive_fmbs == 0);
+
+ while (!list_empty(&p->fmp_idle_fmbs)) {
+
+ fmb = list_entry(p->fmp_idle_fmbs.next,
+ ksock_fmb_t, fmb_list);
+
+ for (i = 0; i < fmb->fmb_npages; i++)
+ if (fmb->fmb_pages[i] != NULL)
+ __free_page(fmb->fmb_pages[i]);
+
+ list_del(&fmb->fmb_list);
+ PORTAL_FREE(fmb, sizeof(*fmb));
}
+}
+
+void
+ksocknal_free_buffers (void)
+{
+ ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp);
+ ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp);
- LASSERT (ksocknal_data.ksnd_active_ltxs == 0);
- if (ksocknal_data.ksnd_ltxs != NULL)
- PORTAL_FREE (ksocknal_data.ksnd_ltxs,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS +
- SOCKNAL_NNBLK_LTXS));
+ LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0);
if (ksocknal_data.ksnd_schedulers != NULL)
PORTAL_FREE (ksocknal_data.ksnd_schedulers,
PORTAL_ALLOC (ksocknal_data.ksnd_peers,
sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
if (ksocknal_data.ksnd_peers == NULL)
- RETURN (-ENOMEM);
+ return (-ENOMEM);
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
- spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list);
- init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq);
-
spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
if (ksocknal_data.ksnd_schedulers == NULL) {
ksocknal_module_fini ();
- RETURN(-ENOMEM);
+ return (-ENOMEM);
}
for (i = 0; i < SOCKNAL_N_SCHED; i++) {
init_waitqueue_head (&kss->kss_waitq);
}
- CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t),
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
- PORTAL_ALLOC(ksocknal_data.ksnd_ltxs,
- sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS));
- if (ksocknal_data.ksnd_ltxs == NULL) {
- ksocknal_module_fini ();
- return (-ENOMEM);
- }
-
- /* Deterministic bugs please */
- memset (ksocknal_data.ksnd_ltxs, 0xeb,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
- for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) {
- ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i];
-
- ltx->ltx_tx.tx_hdr = <x->ltx_hdr;
- ltx->ltx_idle = i < SOCKNAL_NLTXS ?
- &ksocknal_data.ksnd_idle_ltx_list :
- &ksocknal_data.ksnd_idle_nblk_ltx_list;
- list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
- }
-
rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni);
if (rc != 0) {
CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
ksocknal_module_fini ();
- RETURN (rc);
+ return (rc);
}
PtlNIDebug(ksocknal_ni, ~0);
CERROR("Can't spawn socknal scheduler[%d]: %d\n",
i, rc);
ksocknal_module_fini ();
- RETURN (rc);
+ return (rc);
}
}
if (rc != 0) {
CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
ksocknal_module_fini ();
- RETURN (rc);
+ return (rc);
}
}
if (rc != 0) {
CERROR ("Can't spawn socknal reaper: %d\n", rc);
ksocknal_module_fini ();
- RETURN (rc);
+ return (rc);
}
rc = kpr_register(&ksocknal_data.ksnd_router,
} else {
/* Only allocate forwarding buffers if I'm on a gateway */
- PORTAL_ALLOC(ksocknal_data.ksnd_fmbs,
- sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
- SOCKNAL_LARGE_FWD_NMSGS));
- if (ksocknal_data.ksnd_fmbs == NULL) {
- ksocknal_module_fini ();
- RETURN(-ENOMEM);
- }
-
- /* NULL out buffer pointers etc */
- memset(ksocknal_data.ksnd_fmbs, 0,
- sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
- SOCKNAL_LARGE_FWD_NMSGS));
-
for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
SOCKNAL_LARGE_FWD_NMSGS); i++) {
- ksock_fmb_t *fmb =
- &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i];
+ ksock_fmb_t *fmb;
+
+ PORTAL_ALLOC(fmb, sizeof(*fmb));
+ if (fmb == NULL) {
+ ksocknal_module_fini();
+ return (-ENOMEM);
+ }
if (i < SOCKNAL_SMALL_FWD_NMSGS) {
fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
}
- LASSERT (fmb->fmb_npages > 0);
for (j = 0; j < fmb->fmb_npages; j++) {
fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
return (-ENOMEM);
}
- LASSERT(page_address (fmb->fmb_pages[j]) !=
- NULL);
+ LASSERT(page_address(fmb->fmb_pages[j]) != NULL);
}
list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
-#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
-
#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */
typedef struct /* pool of forwarding buffers */
{
spinlock_t fmp_lock; /* serialise */
- struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
+ struct list_head fmp_idle_fmbs; /* free buffers */
struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
+ int fmp_nactive_fmbs; /* # buffers in use */
} ksock_fmb_pool_t;
kpr_router_t ksnd_router; /* THE router */
- void *ksnd_fmbs; /* all the pre-allocated FMBs */
ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
- void *ksnd_ltxs; /* all the pre-allocated LTXs */
- spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */
- struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
- struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
- wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
- int ksnd_active_ltxs; /* #active ltxs */
+ atomic_t ksnd_nactive_ltxs; /* #active ltxs */
struct list_head ksnd_deathrow_conns; /* conns to be closed */
struct list_head ksnd_zombie_conns; /* conns to be freed */
#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
/* network zero copy callback descriptor embedded in ksock_tx_t */
-/* space for the tx frag descriptors: hdr is always 1 iovec
- * and payload is PTL_MD_MAX of either type. */
-typedef struct
-{
- struct iovec hdr;
- union {
- struct iovec iov[PTL_MD_MAX_IOV];
- ptl_kiov_t kiov[PTL_MD_MAX_IOV];
- } payload;
-} ksock_txiovspace_t;
-
typedef struct /* locally transmitted packet */
{
ksock_tx_t ltx_tx; /* send info */
- struct list_head *ltx_idle; /* where to put when idle */
void *ltx_private; /* lib_finalize() callback arg */
void *ltx_cookie; /* lib_finalize() callback arg */
- ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */
ptl_hdr_t ltx_hdr; /* buffer for packet header */
+ int ltx_desc_size; /* bytes allocated for this desc */
+ struct iovec ltx_iov[1]; /* iov for hdr + payload */
+ ptl_kiov_t ltx_kiov[0]; /* kiov for payload */
} ksock_ltx_t;
#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
return 0;
}
-ksock_ltx_t *
-ksocknal_get_ltx (int may_block)
-{
- unsigned long flags;
- ksock_ltx_t *ltx = NULL;
-
- for (;;) {
- spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
- if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) {
- ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next,
- ksock_ltx_t, ltx_tx.tx_list);
- list_del (<x->ltx_tx.tx_list);
- ksocknal_data.ksnd_active_ltxs++;
- break;
- }
-
- if (!may_block) {
- if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) {
- ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next,
- ksock_ltx_t, ltx_tx.tx_list);
- list_del (<x->ltx_tx.tx_list);
- ksocknal_data.ksnd_active_ltxs++;
- }
- break;
- }
-
- spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock,
- flags);
-
- wait_event (ksocknal_data.ksnd_idle_ltx_waitq,
- !list_empty (&ksocknal_data.ksnd_idle_ltx_list));
- }
-
- spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
- return (ltx);
-}
-
void
-ksocknal_put_ltx (ksock_ltx_t *ltx)
+ksocknal_free_ltx (ksock_ltx_t *ltx)
{
- unsigned long flags;
-
- spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
- ksocknal_data.ksnd_active_ltxs--;
- list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle);
-
- /* normal tx desc => wakeup anyone blocking for one */
- if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list)
- wake_up (&ksocknal_data.ksnd_idle_ltx_waitq);
-
- spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+ atomic_dec(&ksocknal_data.ksnd_nactive_ltxs);
+ PORTAL_FREE(ltx, ltx->ltx_desc_size);
}
#if SOCKNAL_ZC
}
int
-ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
{
/* Return 0 on success, < 0 on error.
* caller checks tx_resid to determine progress/completion */
}
rc = ksocknal_getconnsock (conn);
- if (rc != 0)
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
return (rc);
+ }
for (;;) {
LASSERT (tx->tx_resid != 0);
- if (conn->ksnc_closing) {
- rc = -ESHUTDOWN;
- break;
- }
-
if (tx->tx_niov != 0)
rc = ksocknal_send_iov (conn, tx);
else
}
int
-ksocknal_recvmsg (ksock_conn_t *conn)
+ksocknal_receive (ksock_conn_t *conn)
{
/* Return 1 on success, 0 on EOF, < 0 on error.
* Caller checks ksnc_rx_nob_wanted to determine
}
rc = ksocknal_getconnsock (conn);
- if (rc != 0)
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
return (rc);
+ }
for (;;) {
- if (conn->ksnc_closing) {
- rc = -ESHUTDOWN;
- break;
- }
-
if (conn->ksnc_rx_niov != 0)
rc = ksocknal_recv_iov (conn);
else
lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie);
- ksocknal_put_ltx (ltx);
+ ksocknal_free_ltx (ltx);
EXIT;
}
{
int rc;
- rc = ksocknal_sendmsg (conn, tx);
+ rc = ksocknal_transmit (conn, tx);
CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
LASSERT (rc != -EAGAIN);
unsigned long flags;
ksock_sched_t *sched = conn->ksnc_scheduler;
- /* called holding global lock (read or irq-write) */
-
+ /* called holding global lock (read or irq-write) and caller may
+ * not have dropped this lock between finding conn and calling me,
+ * so we don't need the {get,put}connsock dance to deref
+ * ksnc_sock... */
+ LASSERT(!conn->ksnc_closing);
+ LASSERT(tx->tx_resid == tx->tx_nob);
+
CDEBUG (D_NET, "Sending to "LPX64" on port %d\n",
conn->ksnc_peer->ksnp_nid, conn->ksnc_port);
atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
- tx->tx_resid = tx->tx_nob;
tx->tx_conn = conn;
#if SOCKNAL_ZC
/* NB this sets 1 ref on zccd, so the callback can only occur after
* I've released this ref. */
#endif
-
spin_lock_irqsave (&sched->kss_lock, flags);
conn->ksnc_tx_deadline = jiffies +
tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
tx->tx_conn = NULL; /* only set when assigned a conn */
+ tx->tx_resid = tx->tx_nob;
+ tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base;
g_lock = &ksocknal_data.ksnd_global_lock;
read_lock (g_lock);
return (-EHOSTUNREACH);
}
-ksock_ltx_t *
-ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type)
+int
+ksocknal_sendmsg(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ struct iovec *payload_iov,
+ ptl_kiov_t *payload_kiov,
+ size_t payload_nob)
{
ksock_ltx_t *ltx;
+ int desc_size;
+ int rc;
+
+ /* NB 'private' is different depending on what we're sending.
+ * Just ignore it... */
+
+ CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
+ " pid %d\n", payload_nob, payload_niov, nid , pid);
- /* I may not block for a transmit descriptor if I might block the
- * receiver, or an interrupt handler. */
- ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK ||
- type == PTL_MSG_REPLY ||
- in_interrupt ()));
+ LASSERT (payload_nob == 0 || payload_niov > 0);
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ /* It must be OK to kmap() if required */
+ LASSERT (payload_kiov == NULL || !in_interrupt ());
+ /* payload is either all vaddrs or all pages */
+ LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+
+ if (payload_iov != NULL)
+ desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]);
+ else
+ desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]);
+
+ if (in_interrupt() ||
+ type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY) {
+ /* Can't block if in interrupt or responding to an incoming
+ * message */
+ PORTAL_ALLOC_ATOMIC(ltx, desc_size);
+ } else {
+ PORTAL_ALLOC(ltx, desc_size);
+ }
+
if (ltx == NULL) {
- CERROR ("Can't allocate tx desc\n");
- return (NULL);
+ CERROR("Can't allocate tx desc type %d size %d %s\n",
+ type, desc_size, in_interrupt() ? "(intr)" : "");
+ return (PTL_NOSPACE);
}
- /* Init local send packet (storage for hdr, finalize() args) */
+ atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
+
+ ltx->ltx_desc_size = desc_size;
+
+ /* We always have 1 mapped frag for the header */
+ ltx->ltx_tx.tx_iov = ltx->ltx_iov;
+ ltx->ltx_iov[0].iov_base = <x->ltx_hdr;
+ ltx->ltx_iov[0].iov_len = sizeof(*hdr);
ltx->ltx_hdr = *hdr;
+
ltx->ltx_private = private;
ltx->ltx_cookie = cookie;
- /* Init common ltx_tx */
ltx->ltx_tx.tx_isfwd = 0;
- ltx->ltx_tx.tx_nob = sizeof (*hdr);
-
- /* We always have 1 mapped frag for the header */
- ltx->ltx_tx.tx_niov = 1;
- ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr;
- ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr;
- ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
-
- ltx->ltx_tx.tx_kiov = NULL;
- ltx->ltx_tx.tx_nkiov = 0;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob;
- return (ltx);
-}
-
-int
-ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int payload_niov, struct iovec *payload_iov,
- size_t payload_len)
-{
- ksock_ltx_t *ltx;
- int rc;
+ if (payload_iov != NULL) {
+ /* payload is all mapped */
+ ltx->ltx_tx.tx_kiov = NULL;
+ ltx->ltx_tx.tx_nkiov = 0;
- /* NB 'private' is different depending on what we're sending.
- * Just ignore it until we can rely on it
- */
+ ltx->ltx_tx.tx_niov = 1 + payload_niov;
- CDEBUG(D_NET,
- "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64
- " pid %d\n", payload_len, payload_niov, nid, pid);
+ memcpy(ltx->ltx_iov + 1, payload_iov,
+ payload_niov * sizeof (*payload_iov));
- ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
- if (ltx == NULL)
- return (PTL_FAIL);
+ } else {
+ /* payload is all pages */
+ ltx->ltx_tx.tx_kiov = ltx->ltx_kiov;
+ ltx->ltx_tx.tx_nkiov = payload_niov;
- /* append the payload_iovs to the one pointing at the header */
- LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
- LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+ ltx->ltx_tx.tx_niov = 1;
- memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov,
- payload_niov * sizeof (*payload_iov));
- ltx->ltx_tx.tx_niov = 1 + payload_niov;
- ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+ memcpy(ltx->ltx_kiov, payload_kiov,
+ payload_niov * sizeof (*payload_kiov));
+ }
- rc = ksocknal_launch_packet (<x->ltx_tx, nid);
+ rc = ksocknal_launch_packet(<x->ltx_tx, nid);
if (rc == 0)
return (PTL_OK);
- ksocknal_put_ltx (ltx);
+ ksocknal_free_ltx(ltx);
return (PTL_FAIL);
}
int
+ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int payload_niov, struct iovec *payload_iov,
+ size_t payload_len)
+{
+ return (ksocknal_sendmsg(nal, private, cookie,
+ hdr, type, nid, pid,
+ payload_niov, payload_iov, NULL,
+ payload_len));
+}
+
+int
ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len)
+ unsigned int payload_niov, ptl_kiov_t *payload_kiov,
+ size_t payload_len)
{
- ksock_ltx_t *ltx;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.
- * Just ignore it until we can rely on it */
-
- CDEBUG(D_NET,
- "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n",
- payload_len, payload_niov, nid, pid);
-
- ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
- if (ltx == NULL)
- return (PTL_FAIL);
-
- LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
- LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-
- ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov;
- memcpy (ltx->ltx_tx.tx_kiov, payload_iov,
- payload_niov * sizeof (*payload_iov));
- ltx->ltx_tx.tx_nkiov = payload_niov;
- ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
-
- rc = ksocknal_launch_packet (<x->ltx_tx, nid);
- if (rc == 0)
- return (PTL_OK);
-
- ksocknal_put_ltx (ltx);
- return (PTL_FAIL);
+ return (ksocknal_sendmsg(nal, private, cookie,
+ hdr, type, nid, pid,
+ payload_niov, NULL, payload_kiov,
+ payload_len));
}
void
tx->tx_iov = fwd->kprfd_iov;
tx->tx_nkiov = 0;
tx->tx_kiov = NULL;
- tx->tx_hdr = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base;
rc = ksocknal_launch_packet (tx, nid);
if (rc != 0)
spin_lock_irqsave (&fmp->fmp_lock, flags);
list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs);
+ fmp->fmp_nactive_fmbs--;
if (!list_empty (&fmp->fmp_blocked_conns)) {
conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next,
ksock_fmb_t *fmb;
LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
- LASSERT (ksocknal_data.ksnd_fmbs != NULL);
+ LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
pool = &ksocknal_data.ksnd_small_fmp;
fmb = list_entry(pool->fmp_idle_fmbs.next,
ksock_fmb_t, fmb_list);
list_del (&fmb->fmb_list);
+ pool->fmp_nactive_fmbs++;
spin_unlock_irqrestore (&pool->fmp_lock, flags);
return (fmb);
return;
}
- if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */
+ if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */
CERROR("dropping packet from "LPX64" (%s) for "LPX64
" (%s): not forwarding\n",
src_nid, portals_nid2str(TCPNAL, src_nid, str),
LASSERT (conn->ksnc_rx_nob_wanted > 0);
- rc = ksocknal_recvmsg(conn);
+ rc = ksocknal_receive(conn);
if (rc <= 0) {
+ LASSERT (rc != -EAGAIN);
+
if (rc == 0)
CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n",
conn, conn->ksnc_peer->ksnp_nid,
* kss_lock. */
conn->ksnc_tx_ready = 0;
spin_unlock_irqrestore (&sched->kss_lock, flags);
-
+
rc = ksocknal_process_transmit(conn, tx);
-
+
spin_lock_irqsave (&sched->kss_lock, flags);
if (rc != -EAGAIN) {
read_lock (&ksocknal_data.ksnd_global_lock);
conn = sk->sk_user_data;
- if (conn == NULL) { /* raced with ksocknal_close_sock */
+ if (conn == NULL) { /* raced with ksocknal_terminate_conn */
LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
sk->sk_data_ready (sk, n);
} else {
(conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
" empty" : " queued"));
- if (conn == NULL) { /* raced with ksocknal_close_sock */
+ if (conn == NULL) { /* raced with ksocknal_terminate_conn */
LASSERT (sk->sk_write_space != &ksocknal_write_space);
sk->sk_write_space (sk);
int option;
struct linger linger;
- sock->sk->allocation = GFP_NOFS;
+ sock->sk->allocation = GFP_MEMALLOC;
/* Ensure this socket aborts active sends immediately when we close
* it. */
kportal_daemonize (name);
kportal_blockallsigs ();
+ current->flags |= PF_MEMALLOC;
+
spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
while (!ksocknal_data.ksnd_shuttingdown) {
init_waitqueue_entry (&wait, current);
+ current->flags |= PF_MEMALLOC;
+
spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
while (!ksocknal_data.ksnd_shuttingdown) {
return buf;
}
-#elif defined(CONFIG_X86)
+#elif defined(__i386__)
extern int is_kernel_text_address(unsigned long addr);
extern int lookup_symbol(unsigned long address, char *buf, int buflen);
char *portals_debug_dumpstack(void)
{
-#if defined(__x86_64__)
- unsigned long esp = current->thread.rsp;
-#else
unsigned long esp = current->thread.esp;
-#endif
unsigned long *stack = (unsigned long *)&esp;
int size;
unsigned long addr;
#if LWT_SUPPORT
-#define LWT_MEMORY (1<<20) /* 1Mb of trace memory */
-#define LWT_MAX_CPUS 4
-
int lwt_enabled;
int lwt_pages_per_cpu;
lwt_cpu_t lwt_cpus[LWT_MAX_CPUS];
}
int
-lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size)
+lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
+ void *user_ptr, int user_size)
{
const int events_per_page = PAGE_SIZE / sizeof(lwt_event_t);
const int bytes_per_page = events_per_page * sizeof(lwt_event_t);
*ncpu = num_online_cpus();
*total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page;
-
+ *now = get_cycles();
+
if (user_ptr == NULL)
return (0);
}
void
+kportal_memhog_free (struct portals_device_userstate *pdu)
+{
+ struct page **level0p = &pdu->pdu_memhog_root_page;
+ struct page **level1p;
+ struct page **level2p;
+ int count1;
+ int count2;
+
+ if (*level0p != NULL) {
+
+ level1p = (struct page **)page_address(*level0p);
+ count1 = 0;
+
+ while (count1 < PAGE_SIZE/sizeof(struct page *) &&
+ *level1p != NULL) {
+
+ level2p = (struct page **)page_address(*level1p);
+ count2 = 0;
+
+ while (count2 < PAGE_SIZE/sizeof(struct page *) &&
+ *level2p != NULL) {
+
+ __free_page(*level2p);
+ pdu->pdu_memhog_pages--;
+ level2p++;
+ count2++;
+ }
+
+ __free_page(*level1p);
+ pdu->pdu_memhog_pages--;
+ level1p++;
+ count1++;
+ }
+
+ __free_page(*level0p);
+ pdu->pdu_memhog_pages--;
+
+ *level0p = NULL;
+ }
+
+ LASSERT (pdu->pdu_memhog_pages == 0);
+}
+
+int
+kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags)
+{
+ struct page **level0p;
+ struct page **level1p;
+ struct page **level2p;
+ int count1;
+ int count2;
+
+ LASSERT (pdu->pdu_memhog_pages == 0);
+ LASSERT (pdu->pdu_memhog_root_page == NULL);
+
+ if (npages < 0)
+ return -EINVAL;
+
+ if (npages == 0)
+ return 0;
+
+ level0p = &pdu->pdu_memhog_root_page;
+ *level0p = alloc_page(flags);
+ if (*level0p == NULL)
+ return -ENOMEM;
+ pdu->pdu_memhog_pages++;
+
+ level1p = (struct page **)page_address(*level0p);
+ count1 = 0;
+ memset(level1p, 0, PAGE_SIZE);
+
+ while (pdu->pdu_memhog_pages < npages &&
+ count1 < PAGE_SIZE/sizeof(struct page *)) {
+
+ if (signal_pending(current))
+ return (-EINTR);
+
+ *level1p = alloc_page(flags);
+ if (*level1p == NULL)
+ return -ENOMEM;
+ pdu->pdu_memhog_pages++;
+
+ level2p = (struct page **)page_address(*level1p);
+ count2 = 0;
+ memset(level2p, 0, PAGE_SIZE);
+
+ while (pdu->pdu_memhog_pages < npages &&
+ count2 < PAGE_SIZE/sizeof(struct page *)) {
+
+ if (signal_pending(current))
+ return (-EINTR);
+
+ *level2p = alloc_page(flags);
+ if (*level2p == NULL)
+ return (-ENOMEM);
+ pdu->pdu_memhog_pages++;
+
+ level2p++;
+ count2++;
+ }
+
+ level1p++;
+ count1++;
+ }
+
+ return 0;
+}
+
+void
kportal_blockallsigs ()
{
unsigned long flags;
/* called when opening /dev/device */
static int kportal_psdev_open(struct inode * inode, struct file * file)
{
+ struct portals_device_userstate *pdu;
ENTRY;
-
+
if (!inode)
RETURN(-EINVAL);
+
PORTAL_MODULE_USE;
+
+ PORTAL_ALLOC(pdu, sizeof(*pdu));
+ if (pdu != NULL) {
+ pdu->pdu_memhog_pages = 0;
+ pdu->pdu_memhog_root_page = NULL;
+ }
+ file->private_data = pdu;
+
RETURN(0);
}
/* called when closing /dev/device */
static int kportal_psdev_release(struct inode * inode, struct file * file)
{
+ struct portals_device_userstate *pdu;
ENTRY;
if (!inode)
RETURN(-EINVAL);
+ pdu = file->private_data;
+ if (pdu != NULL) {
+ kportal_memhog_free(pdu);
+ PORTAL_FREE(pdu, sizeof(*pdu));
+ }
+
PORTAL_MODULE_UNUSE;
RETURN(0);
}
break;
case IOC_PORTAL_LWT_SNAPSHOT:
- err = lwt_snapshot (&data->ioc_count, &data->ioc_misc,
+ err = lwt_snapshot (&data->ioc_nid,
+ &data->ioc_count, &data->ioc_misc,
data->ioc_pbuf1, data->ioc_plen1);
if (err == 0 &&
copy_to_user((char *)arg, data, sizeof (*data)))
copy_to_user((char *)arg, data, sizeof (*data)))
err = -EFAULT;
break;
-#endif
+#endif
+ case IOC_PORTAL_MEMHOG:
+ if (!capable (CAP_SYS_ADMIN))
+ err = -EPERM;
+ else if (file->private_data == NULL) {
+ err = -EINVAL;
+ } else {
+ kportal_memhog_free(file->private_data);
+ err = kportal_memhog_alloc(file->private_data,
+ data->ioc_count,
+ data->ioc_flags);
+ if (err != 0)
+ kportal_memhog_free(file->private_data);
+ }
+ break;
+
default:
err = -EINVAL;
break;
cleanup_lwt:
#if LWT_SUPPORT
lwt_fini();
-#endif
cleanup_debug:
+#endif
portals_debug_cleanup();
return rc;
}
}
static int
-lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size)
+lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize,
+ lwt_event_t *events, int size)
{
struct portal_ioctl_data data;
int rc;
LASSERT (data.ioc_count != 0);
LASSERT (data.ioc_misc != 0);
+ if (now != NULL)
+ *now = data.ioc_nid;
+
if (ncpu != NULL)
*ncpu = data.ioc_count;
int
jt_ptl_lwt(int argc, char **argv)
{
-#define MAX_CPUS 8
int ncpus;
int totalspace;
int nevents_per_cpu;
lwt_event_t *events;
- lwt_event_t *cpu_event[MAX_CPUS + 1];
- lwt_event_t *next_event[MAX_CPUS];
- lwt_event_t *first_event[MAX_CPUS];
+ lwt_event_t *cpu_event[LWT_MAX_CPUS + 1];
+ lwt_event_t *next_event[LWT_MAX_CPUS];
+ lwt_event_t *first_event[LWT_MAX_CPUS];
int cpu;
lwt_event_t *e;
int rc;
double mhz;
cycles_t t0;
cycles_t tlast;
+ cycles_t tnow;
+ struct timeval tvnow;
+ int printed_date = 0;
FILE *f = stdout;
if (argc < 2 ||
return (0);
}
- if (lwt_snapshot(&ncpus, &totalspace, NULL, 0) != 0)
+ if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0)
return (-1);
- if (ncpus > MAX_CPUS) {
- fprintf(stderr, "Too many cpus: %d (%d)\n", ncpus, MAX_CPUS);
+ if (ncpus > LWT_MAX_CPUS) {
+ fprintf(stderr, "Too many cpus: %d (%d)\n",
+ ncpus, LWT_MAX_CPUS);
return (-1);
}
return (-1);
}
- if (lwt_snapshot(NULL, NULL, events, totalspace)) {
+ if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) {
free(events);
return (-1);
}
+ /* we want this time to be sampled at snapshot time */
+ gettimeofday(&tvnow, NULL);
+
if (argc > 2) {
f = fopen (argv[2], "w");
if (f == NULL) {
if (t0 <= next_event[cpu]->lwte_when) {
/* on or after the first event */
+ if (!printed_date) {
+ cycles_t du = (tnow - t0) / mhz;
+ time_t then = tvnow.tv_sec - du/1000000;
+
+ if (du % 1000000 > tvnow.tv_usec)
+ then--;
+
+ fprintf(f, "%s", ctime(&then));
+ printed_date = 1;
+ }
+
rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]);
if (rc != 0)
break;
free(events);
return (0);
-#undef MAX_CPUS
}
+
+int jt_ptl_memhog(int argc, char **argv)
+{
+ static int gfp = 0; /* sticky! */
+
+ struct portal_ioctl_data data;
+ int rc;
+ int count;
+ char *end;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]);
+ return 0;
+ }
+
+ count = strtol(argv[1], &end, 0);
+ if (count < 0 || *end != 0) {
+ fprintf(stderr, "Can't parse page count '%s'\n", argv[1]);
+ return -1;
+ }
+
+ if (argc >= 3) {
+ rc = strtol(argv[2], &end, 0);
+ if (*end != 0) {
+ fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]);
+ return -1;
+ }
+ gfp = rc;
+ }
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_count = count;
+ data.ioc_flags = gfp;
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data);
+
+ if (rc != 0) {
+ fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno));
+ return -1;
+ }
+
+ printf("memhog %d OK\n", count);
+ return 0;
+}
+
+tbd Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.0.1
+ * bug fixes
+ - remove now-unused request->rq_obd (278)
+ - if an allocation fails, print out how much memory we've used (1933)
+ - use PORTAL_SLAB_ALLOC for structures, to get GFP_MEMALLOC (1933)
+ - add the "configurable stack size" patch to most series files (1256)
+ - ability to write large log records, for 100+ OST configs (2306)
+ - fix NULL deref when filter_prep fails (2314)
+ - fix operator precedence error in filter_sync
+ - dynamic allocation of socknal TX descriptors (2315)
+ - fix a missed case in the GFP_MEMALLOC patch, can cause deadlock (2310)
+ - fix gcc 2.96 compilation problem in xattr kernel patch (2294)
+ - ensure that CWARN messages in Portals always get to the syslog
+ - __init/__exit are not for prototype decls (ldlm_init/exit)
+ - x86-64 compile warning fixes
+ - fix gateway LMC keyword conflict (2318)
+ * miscellania
+ - allow configurable automake binary, for testing new versions
+ - small update to the lfs documentation
+
2003-12-03 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.0.0
* fix negative export reference count in fsfilt_sync (2312)
#!/bin/sh
aclocal &&
-automake --add-missing &&
+${AUTOMAKE:-automake} --add-missing &&
${AUTOCONF:-autoconf}
\series bold
lfs\SpecialChar ~
setstripe <filename> <stripe_size> <start_ost> <stripe_cnt>
+\layout Standard
+
+
+\series bold
+lfs\SpecialChar ~
+check <mds| osts| servers>
\layout Subsection
DESCRIPTION
\series bold
+check
+\series default
+Display the status of MDS or OSTs (as specified in the command) or all the
+ servers (MDS and OSTs)
+\layout List
+\labelwidthstring 00.00.0000
+
+
+\series bold
+osts
+\series default
+ List all the OSTs for the filesystem
+\layout List
+\labelwidthstring 00.00.0000
+
+
+\series bold
help
\series default
Provides brief help on the various arguments
file
\layout LyX-Code
- $lfs find /mnt/lustre/file1
+ $lfs find /mnt/lustre/foo1
+\layout LyX-Code
+
+ OBDS:
+\layout LyX-Code
+
+ 0: OST_localhost_UUID
+\layout LyX-Code
+
+ /mnt/lustre/foo1
+\layout LyX-Code
+
+ obdidx objid objid group
+\layout LyX-Code
+
+ 0 1 0x1 0
\layout Description
Listing\SpecialChar ~
\layout LyX-Code
$lfs find -r --obd OST2_UUID /mnt/lustre/
+\layout Description
+
+Check\SpecialChar ~
+the\SpecialChar ~
+status\SpecialChar ~
+of\SpecialChar ~
+all\SpecialChar ~
+servers(mds,\SpecialChar ~
+osts)
+\layout LyX-Code
+
+ $lfs check servers
+\layout LyX-Code
+
+ OSC_localhost.localdomain_OST_localhost_mds1 active.
+
+\layout LyX-Code
+
+ OSC_localhost.localdomain_OST_localhost_MNT_localhost active.
+
+\layout LyX-Code
+
+ MDC_localhost.localdomain_mds1_MNT_localhost active.
+\layout LyX-Code
+
+ $
+\layout Description
+
+List\SpecialChar ~
+all\SpecialChar ~
+the\SpecialChar ~
+OSTs
+\layout LyX-Code
+
+ $lfs osts
+\layout LyX-Code
+
+ OBDS:
+\layout LyX-Code
+
+ 0: OST_localhost_UUID
\layout LyX-Code
+ $
\layout Subsection
BUGS
struct ptlrpc_request {
int rq_type; /* one of PTL_RPC_MSG_* */
struct list_head rq_list;
- struct obd_device *rq_obd;
int rq_status;
spinlock_t rq_lock;
unsigned int rq_intr:1, rq_replied:1, rq_want_ack:1, rq_err:1,
int rq_import_generation;
enum lustre_imp_state rq_send_state;
- wait_queue_head_t rq_wait_for_rep; /* XXX also _for_ack */
+ wait_queue_head_t rq_reply_waitq; /* XXX also _for_ack */
/* incoming reply */
ptl_md_t rq_reply_md;
struct list_head srv_threads;
int (*srv_handler)(struct ptlrpc_request *req);
char *srv_name; /* only statically allocated strings here; we don't clean them */
- struct proc_dir_entry *svc_procroot;
- struct lprocfs_stats *svc_stats;
+ struct proc_dir_entry *srv_procroot;
+ struct lprocfs_stats *srv_stats;
int srv_interface_rover;
struct ptlrpc_srv_ni srv_interfaces[0];
#include <linux/autoconf.h>
#include <linux/slab.h>
#include <linux/highmem.h>
-#else
-
#endif
#include <linux/kp30.h>
#include <linux/lustre_compat25.h>
#define fixme() CDEBUG(D_OTHER, "FIXME\n");
#ifdef __KERNEL__
-#include <linux/types.h>
-#include <linux/blkdev.h>
+# include <linux/types.h>
+# include <linux/blkdev.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE]
-#define ll_bdevname(SB, STORAGE) __bdevname(kdev_t_to_nr(SB->s_dev), STORAGE)
-#define ll_lock_kernel lock_kernel()
-#define ll_sbdev(SB) ((SB)->s_bdev)
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE]
+# define ll_bdevname(SB, STORAGE) __bdevname(kdev_t_to_nr(SB->s_dev), STORAGE)
+# define ll_lock_kernel lock_kernel()
+# define ll_sbdev(SB) ((SB)->s_bdev)
void dev_set_rdonly(struct block_device *, int);
-#else
-#define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo
-#define ll_sbdev(SB) (kdev_t_to_nr((SB)->s_dev))
-#define ll_bdevname(SB,STORAGE) ((void)__unused_##STORAGE,bdevname(ll_sbdev(SB)))
-#define ll_lock_kernel
+# else
+# define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo
+# define ll_sbdev(SB) (kdev_t_to_nr((SB)->s_dev))
+# define ll_bdevname(SB,STORAGE) ((void)__unused_##STORAGE,bdevname(ll_sbdev(SB)))
+# define ll_lock_kernel
void dev_set_rdonly(kdev_t, int);
-#endif
+# endif
void dev_clear_rdonly(int);
}
}
#else /* !__KERNEL__ */
-#define LTIME_S(time) (time)
+# define LTIME_S(time) (time)
/* for obd_class.h */
-#ifndef ERR_PTR
-# define ERR_PTR(a) ((void *)(a))
-#endif
+# ifndef ERR_PTR
+# define ERR_PTR(a) ((void *)(a))
+# endif
#endif /* __KERNEL__ */
#ifndef GFP_MEMALLOC
#define GFP_MEMALLOC 0
#endif
+extern atomic_t portal_kmemory;
+
#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \
do { \
(ptr) = kmalloc(size, (gfp_mask)); \
if ((ptr) == NULL) { \
CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \
(int)(size), __FILE__, __LINE__); \
+ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+ atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
} else { \
memset(ptr, 0, size); \
atomic_add(size, &obd_memory); \
if ((ptr) == NULL) { \
CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \
(int)(size), __FILE__, __LINE__); \
+ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+ atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
} else { \
memset(ptr, 0, size); \
atomic_add(size, &obd_memory); \
if ((ptr) == NULL) { \
CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
(int)(size), __FILE__, __LINE__); \
+ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+ atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
} else { \
memset(ptr, 0, size); \
atomic_add(size, &obd_memory); \
+++ 25/arch/parisc/lib/checksum.c 2003-10-05 00:33:23.000000000 -0700
@@ -16,8 +16,10 @@
*
- * $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+ * $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
*/
-#include <net/checksum.h>
+#include <linux/module.h>
--- linux-2.6.0-test6/drivers/char/ftape/compressor/zftape-compress.c 2003-06-14 12:18:32.000000000 -0700
+++ 25/drivers/char/ftape/compressor/zftape-compress.c 2003-10-05 00:33:24.000000000 -0700
@@ -31,6 +31,7 @@
- char zftc_rev[] = "$Revision: 1.2 $";
- char zftc_dat[] = "$Date: 2003/12/03 03:13:28 $";
+ char zftc_rev[] = "$Revision: 1.3 $";
+ char zftc_dat[] = "$Date: 2003/12/03 05:12:20 $";
+#include <linux/version.h>
#include <linux/errno.h>
--- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divamnt.c 2003-09-27 18:57:44.000000000 -0700
+++ 25/drivers/isdn/hardware/eicon/divamnt.c 2003-10-05 00:33:24.000000000 -0700
@@ -1,4 +1,4 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
*
* Driver for Eicon DIVA Server ISDN cards.
* Maint module
-#include "di_defs.h"
#include "debug_if.h"
--static char *main_revision = "$Revision: 1.2 $";
-+static char *main_revision = "$Revision: 1.2 $";
+-static char *main_revision = "$Revision: 1.3 $";
++static char *main_revision = "$Revision: 1.3 $";
static int major;
--- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divasmain.c 2003-09-27 18:57:44.000000000 -0700
+++ 25/drivers/isdn/hardware/eicon/divasmain.c 2003-10-05 00:33:24.000000000 -0700
@@ -1,4 +1,4 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
*
* Low level driver for Eicon DIVA Server ISDN cards.
*
#include "diva_dma.h"
#include "diva_pci.h"
--static char *main_revision = "$Revision: 1.2 $";
-+static char *main_revision = "$Revision: 1.2 $";
+-static char *main_revision = "$Revision: 1.3 $";
++static char *main_revision = "$Revision: 1.3 $";
static int major;
--- linux-2.6.0-test6/drivers/isdn/hardware/eicon/dqueue.c 2003-06-14 12:18:22.000000000 -0700
+++ 25/drivers/isdn/hardware/eicon/dqueue.c 2003-10-05 00:33:24.000000000 -0700
@@ -1,10 +1,10 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
*
* Driver for Eicon DIVA Server ISDN cards.
* User Mode IDI Interface
--- linux-2.6.0-test6/drivers/isdn/hardware/eicon/mntfunc.c 2003-09-27 18:57:44.000000000 -0700
+++ 25/drivers/isdn/hardware/eicon/mntfunc.c 2003-10-05 00:33:24.000000000 -0700
@@ -1,4 +1,4 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
*
* Driver for Eicon DIVA Server ISDN cards.
* Maint module
--- linux-2.6.0-test6/drivers/isdn/hardware/eicon/os_capi.h 2003-06-14 12:18:25.000000000 -0700
+++ 25/drivers/isdn/hardware/eicon/os_capi.h 2003-10-05 00:33:24.000000000 -0700
@@ -1,10 +1,10 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
*
* ISDN interface module for Eicon active cards DIVA.
* CAPI Interface OS include files
--- linux-2.6.0-test6/drivers/isdn/hardware/eicon/platform.h 2003-09-27 18:57:44.000000000 -0700
+++ 25/drivers/isdn/hardware/eicon/platform.h 2003-10-05 00:33:24.000000000 -0700
@@ -1,4 +1,4 @@
--/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
-+/* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+-/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
++/* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
*
* platform.h
*
+++ 25/drivers/media/video/planb.c 2003-10-05 00:33:24.000000000 -0700
@@ -27,7 +27,6 @@
- /* $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ */
+ /* $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ */
-#include <linux/version.h>
#include <linux/init.h>
--- linux-2.6.0-test6/drivers/mtd/chips/map_rom.c 2003-06-14 12:18:24.000000000 -0700
+++ 25/drivers/mtd/chips/map_rom.c 2003-10-05 00:33:24.000000000 -0700
@@ -4,7 +4,6 @@
- * $Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $
+ * $Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $
*/
-#include <linux/version.h>
#include <linux/hdlc.h>
/* Version */
--static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ for Linux\n";
-+static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.2 2003/12/03 03:13:28 phil Exp $ for Linux\n";
+-static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ for Linux\n";
++static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.3 2003/12/03 05:12:20 phil Exp $ for Linux\n";
static int debug;
static int quartz;
-$Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $
+$Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $
Index: linux/fs/exec.c
===================================================================
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
-+ * $Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $
++ * $Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $
+ *-----------------------------------------------------------------------*/
+#include <linux/kernel.h>
+#include <linux/sched.h>
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
-+ * $Id: bproc-patch-2.4.20,v 1.2 2003/12/03 03:13:30 phil Exp $
++ * $Id: bproc-patch-2.4.20,v 1.3 2003/12/03 05:12:25 phil Exp $
+ *-----------------------------------------------------------------------*/
+#ifndef _LINUX_BPROC_H
+#define _LINUX_BPROC_H
--- /dev/null
+Index: linux-2.4.19-pre1/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/entry.S 2003-11-21 03:38:55.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/entry.S 2003-12-01 18:14:32.000000000 +0300
+@@ -45,6 +45,7 @@
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+
+ EBX = 0x00
+ ECX = 0x04
+@@ -128,10 +129,6 @@
+ .long 3b,6b; \
+ .previous
+
+-#define GET_CURRENT(reg) \
+- movl $-8192, reg; \
+- andl %esp, reg
+-
+ ENTRY(lcall7)
+ pushfl # We get a different stack layout with call gates,
+ pushl %eax # which has to be cleaned up later..
+@@ -144,7 +141,7 @@
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+- andl $-8192,%ebx # GET_CURRENT
++ andl $-THREAD_SIZE,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x7
+@@ -165,7 +162,7 @@
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+- andl $-8192,%ebx # GET_CURRENT
++ andl $-THREAD_SIZE,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x27
+Index: linux-2.4.19-pre1/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/smpboot.c 2001-12-21 20:41:53.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/smpboot.c 2003-12-01 18:14:32.000000000 +0300
+@@ -819,7 +819,7 @@
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++ stack_start.esp = (void *)idle->thread.esp;
+
+ /*
+ * This grunge runs the startup process for
+@@ -892,7 +892,7 @@
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+- if (*((volatile unsigned char *)phys_to_virt(8192))
++ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+ == 0xA5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+@@ -915,7 +915,7 @@
+ }
+
+ /* mark "stuck" area as not stuck */
+- *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+
+ if(clustered_apic_mode) {
+ printk("Restoring NMI vector\n");
+Index: linux-2.4.19-pre1/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/traps.c 2003-12-01 18:11:31.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/traps.c 2003-12-01 18:14:32.000000000 +0300
+@@ -158,7 +158,7 @@
+ unsigned long esp = tsk->thread.esp;
+
+ /* User space on another CPU? */
+- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+ return;
+ show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.19-pre1/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/head.S 2003-11-20 19:01:35.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/head.S 2003-12-01 18:14:32.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+
+ #define OLD_CL_MAGIC_ADDR 0x90020
+ #define OLD_CL_MAGIC 0xA33F
+@@ -320,7 +321,7 @@
+ ret
+
+ ENTRY(stack_start)
+- .long SYMBOL_NAME(init_task_union)+8192
++ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+ .long __KERNEL_DS
+
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.19-pre1/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/lib/getuser.S 1998-01-13 00:42:52.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/lib/getuser.S 2003-12-01 18:14:32.000000000 +0300
+@@ -21,6 +21,10 @@
+ * as they get called from within inline assembly.
+ */
+
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+ movl %esp,%edx
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 1: movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+ addl $1,%eax
+ movl %esp,%edx
+ jc bad_get_user
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 2: movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+ addl $3,%eax
+ movl %esp,%edx
+ jc bad_get_user
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 3: movl -3(%eax),%edx
+Index: linux-2.4.19-pre1/arch/i386/config.in
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/config.in 2003-11-20 19:01:35.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/config.in 2003-12-01 18:14:32.000000000 +0300
+@@ -201,6 +201,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+ define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++ "off CONFIG_NOBIGSTACK \
++ 16KB CONFIG_STACK_SIZE_16KB \
++ 32KB CONFIG_STACK_SIZE_32KB \
++ 64KB CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 2
++ else
++ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 3
++ else
++ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 4
++ fi
++ fi
++ fi
++fi
++
+ endmenu
+
+ mainmenu_option next_comment
+Index: linux-2.4.19-pre1/arch/i386/vmlinux.lds
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/vmlinux.lds 2003-11-20 19:01:35.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/vmlinux.lds 2003-12-01 18:14:32.000000000 +0300
+@@ -35,7 +35,8 @@
+
+ _edata = .; /* End of data section */
+
+- . = ALIGN(8192); /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++ . = ALIGN(65536); /* init_task */
+ .data.init_task : { *(.data.init_task) }
+
+ . = ALIGN(4096); /* Init code and data */
+Index: linux-2.4.19-pre1/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-i386/current.h 1998-08-15 03:35:22.000000000 +0400
++++ linux-2.4.19-pre1/include/asm-i386/current.h 2003-12-01 18:14:32.000000000 +0300
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++ movl $-THREAD_SIZE, reg; \
++ andl %esp, reg
++
++#else /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define alloc_task_struct() \
++ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define free_task_struct(p) \
++ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+
+ struct task_struct;
+
+ static inline struct task_struct * get_current(void)
+ {
+ struct task_struct *current;
+- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+ return current;
+ }
+
+ #define current get_current()
+
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.19-pre1/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-i386/hw_irq.h 2003-11-21 02:59:05.000000000 +0300
++++ linux-2.4.19-pre1/include/asm-i386/hw_irq.h 2003-12-01 18:14:32.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/config.h>
+ #include <asm/atomic.h>
+ #include <asm/irq.h>
++#include <asm/current.h>
+
+ /*
+ * IDT vectors usable for external interrupt sources start
+@@ -113,10 +114,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+-#define GET_CURRENT \
+- "movl %esp, %ebx\n\t" \
+- "andl $-8192, %ebx\n\t"
+-
+ /*
+ * SMP has a few special interrupts for IPI messages
+ */
+Index: linux-2.4.19-pre1/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-i386/processor.h 2003-11-21 02:59:05.000000000 +0300
++++ linux-2.4.19-pre1/include/asm-i386/processor.h 2003-12-01 18:14:32.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -447,9 +448,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+ #define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count)
+
+ #define init_task (init_task_union.task)
+Index: linux-2.4.19-pre1/include/linux/sched.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/linux/sched.h 2003-12-01 18:11:28.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/sched.h 2003-12-01 18:14:32.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+
+ #include <asm/param.h> /* for HZ */
++#include <asm/current.h> /* maybe for INIT_TASK_SIZE */
+
+ extern unsigned long event;
+
--- /dev/null
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/entry.S 2003-09-13 19:34:35.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/entry.S 2003-12-01 18:02:14.000000000 +0300
+@@ -45,6 +45,7 @@
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+
+ EBX = 0x00
+ ECX = 0x04
+@@ -130,10 +131,6 @@
+ .long 3b,6b; \
+ .previous
+
+-#define GET_CURRENT(reg) \
+- movl $-8192, reg; \
+- andl %esp, reg
+-
+ ENTRY(lcall7)
+ pushfl # We get a different stack layout with call gates,
+ pushl %eax # which has to be cleaned up later..
+@@ -149,7 +146,7 @@
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+- andl $-8192,%ebx # GET_CURRENT
++ andl $-THREAD_SIZE,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x7
+@@ -173,7 +170,7 @@
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+- andl $-8192,%ebx # GET_CURRENT
++ andl $-THREAD_SIZE,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x27
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/smpboot.c 2003-09-13 19:34:35.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/smpboot.c 2003-12-01 18:02:14.000000000 +0300
+@@ -811,7 +811,7 @@
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++ stack_start.esp = (void *)idle->thread.esp;
+
+ /*
+ * This grunge runs the startup process for
+@@ -884,7 +884,7 @@
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+- if (*((volatile unsigned char *)phys_to_virt(8192))
++ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+ == 0xA5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+@@ -907,7 +907,7 @@
+ }
+
+ /* mark "stuck" area as not stuck */
+- *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ printk("Restoring NMI vector\n");
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/traps.c 2003-09-13 19:34:35.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/traps.c 2003-12-01 18:02:14.000000000 +0300
+@@ -161,7 +161,7 @@
+ unsigned long esp = tsk->thread.esp;
+
+ /* User space on another CPU? */
+- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+ return;
+ show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/head.S 2003-09-13 19:34:35.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/head.S 2003-12-01 18:02:14.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+
+ #define OLD_CL_MAGIC_ADDR 0x90020
+ #define OLD_CL_MAGIC 0xA33F
+@@ -315,7 +316,7 @@
+ ret
+
+ ENTRY(stack_start)
+- .long SYMBOL_NAME(init_task_union)+8192
++ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+ .long __KERNEL_DS
+
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.20-rh-20.9/arch/i386/kernel/irq.c
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/kernel/irq.c 2003-12-01 17:42:59.000000000 +0300
++++ linux-2.4.20-rh-20.9/arch/i386/kernel/irq.c 2003-12-01 18:02:14.000000000 +0300
+@@ -581,7 +581,10 @@
+ long esp;
+
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++ __asm__ __volatile__(
++ "andl %%esp,%0"
++ : "=r" (esp) : "0" (THREAD_SIZE-1));
++
+ if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+ extern void show_stack(unsigned long *);
+
+Index: linux-2.4.20-rh-20.9/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/lib/getuser.S 1998-01-13 00:42:52.000000000 +0300
++++ linux-2.4.20-rh-20.9/arch/i386/lib/getuser.S 2003-12-01 18:02:14.000000000 +0300
+@@ -21,6 +21,10 @@
+ * as they get called from within inline assembly.
+ */
+
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+ movl %esp,%edx
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 1: movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+ addl $1,%eax
+ movl %esp,%edx
+ jc bad_get_user
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 2: movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+ addl $3,%eax
+ movl %esp,%edx
+ jc bad_get_user
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 3: movl -3(%eax),%edx
+Index: linux-2.4.20-rh-20.9/arch/i386/config.in
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/config.in 2003-09-13 19:34:34.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/config.in 2003-12-01 18:02:14.000000000 +0300
+@@ -266,6 +266,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+ define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++ "off CONFIG_NOBIGSTACK \
++ 16KB CONFIG_STACK_SIZE_16KB \
++ 32KB CONFIG_STACK_SIZE_32KB \
++ 64KB CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 2
++ else
++ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 3
++ else
++ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 4
++ fi
++ fi
++ fi
++fi
++
+ endmenu
+
+ mainmenu_option next_comment
+Index: linux-2.4.20-rh-20.9/arch/i386/vmlinux.lds
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/arch/i386/vmlinux.lds 2003-09-13 19:34:24.000000000 +0400
++++ linux-2.4.20-rh-20.9/arch/i386/vmlinux.lds 2003-12-01 18:02:14.000000000 +0300
+@@ -38,7 +38,8 @@
+
+ _edata = .; /* End of data section */
+
+- . = ALIGN(8192); /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++ . = ALIGN(65536); /* init_task */
+ .data.init_task : { *(.data.init_task) }
+
+ . = ALIGN(4096); /* Init code and data */
+Index: linux-2.4.20-rh-20.9/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/include/asm-i386/current.h 1998-08-15 03:35:22.000000000 +0400
++++ linux-2.4.20-rh-20.9/include/asm-i386/current.h 2003-12-01 18:03:28.000000000 +0300
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++ movl $-THREAD_SIZE, reg; \
++ andl %esp, reg
++
++#else /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define __alloc_task_struct() \
++ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define __free_task_struct(p) \
++ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+
+ struct task_struct;
+
+ static inline struct task_struct * get_current(void)
+ {
+ struct task_struct *current;
+- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+ return current;
+ }
+
+ #define current get_current()
+
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.20-rh-20.9/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/include/asm-i386/hw_irq.h 2003-11-13 17:35:48.000000000 +0300
++++ linux-2.4.20-rh-20.9/include/asm-i386/hw_irq.h 2003-12-01 18:02:14.000000000 +0300
+@@ -116,10 +116,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+-#define GET_CURRENT \
+- "movl %esp, %ebx\n\t" \
+- "andl $-8192, %ebx\n\t"
+-
+ /*
+ * SMP has a few special interrupts for IPI messages
+ */
+Index: linux-2.4.20-rh-20.9/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/include/asm-i386/processor.h 2003-10-08 12:29:57.000000000 +0400
++++ linux-2.4.20-rh-20.9/include/asm-i386/processor.h 2003-12-01 18:02:14.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -469,10 +470,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define __free_task_struct(p) do { BUG_ON((p)->state < TASK_ZOMBIE); free_pages((unsigned long) (p), 1); } while (0)
+-
+ #define init_task (init_task_union.task)
+ #define init_stack (init_task_union.stack)
+
+Index: linux-2.4.20-rh-20.9/include/linux/sched.h
+===================================================================
+--- linux-2.4.20-rh-20.9.orig/include/linux/sched.h 2003-11-13 17:35:48.000000000 +0300
++++ linux-2.4.20-rh-20.9/include/linux/sched.h 2003-12-01 18:02:14.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+
+ #include <asm/param.h> /* for HZ */
++#include <asm/current.h> /* maybe for INIT_TASK_SIZE */
+
+ extern unsigned long event;
+
--- /dev/null
+Index: linux-2.4.20/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/entry.S 2003-05-16 05:28:59.000000000 +0400
++++ linux-2.4.20/arch/i386/kernel/entry.S 2003-12-01 16:54:50.000000000 +0300
+@@ -45,6 +45,7 @@
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
++#include <asm/current.h>
+
+ EBX = 0x00
+ ECX = 0x04
+@@ -130,10 +131,6 @@
+ .long 3b,6b; \
+ .previous
+
+-#define GET_CURRENT(reg) \
+- movl $-8192, reg; \
+- andl %esp, reg
+-
+ ENTRY(lcall7)
+ pushfl # We get a different stack layout with call gates,
+ pushl %eax # which has to be cleaned up later..
+@@ -149,7 +146,7 @@
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+- andl $-8192,%ebx # GET_CURRENT
++ andl $-THREAD_SIZE,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x7
+@@ -173,7 +170,7 @@
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+- andl $-8192,%ebx # GET_CURRENT
++ andl $-THREAD_SIZE,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x27
+Index: linux-2.4.20/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/smpboot.c 2003-05-16 05:28:59.000000000 +0400
++++ linux-2.4.20/arch/i386/kernel/smpboot.c 2003-12-01 16:54:50.000000000 +0300
+@@ -819,7 +819,7 @@
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++ stack_start.esp = (void *)idle->thread.esp;
+
+ /*
+ * This grunge runs the startup process for
+@@ -892,7 +892,7 @@
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+- if (*((volatile unsigned char *)phys_to_virt(8192))
++ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+ == 0xA5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+@@ -915,7 +915,7 @@
+ }
+
+ /* mark "stuck" area as not stuck */
+- *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ printk("Restoring NMI vector\n");
+Index: linux-2.4.20/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/traps.c 2003-12-01 16:53:23.000000000 +0300
++++ linux-2.4.20/arch/i386/kernel/traps.c 2003-12-01 16:54:50.000000000 +0300
+@@ -158,7 +158,7 @@
+ unsigned long esp = tsk->thread.esp;
+
+ /* User space on another CPU? */
+- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+ return;
+ show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.20/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/head.S 2003-05-16 05:28:28.000000000 +0400
++++ linux-2.4.20/arch/i386/kernel/head.S 2003-12-01 16:54:50.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+
+ #define OLD_CL_MAGIC_ADDR 0x90020
+ #define OLD_CL_MAGIC 0xA33F
+@@ -320,7 +321,7 @@
+ ret
+
+ ENTRY(stack_start)
+- .long SYMBOL_NAME(init_task_union)+8192
++ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+ .long __KERNEL_DS
+
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.20/arch/i386/kernel/irq.c
+===================================================================
+--- linux-2.4.20.orig/arch/i386/kernel/irq.c 2003-05-16 05:28:59.000000000 +0400
++++ linux-2.4.20/arch/i386/kernel/irq.c 2003-12-01 16:57:05.000000000 +0300
+@@ -581,7 +581,10 @@
+ long esp;
+
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++ __asm__ __volatile__(
++ "andl %%esp,%0"
++ : "=r" (esp) : "0" (THREAD_SIZE-1));
++
+ if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+ extern void show_stack(unsigned long *);
+
+Index: linux-2.4.20/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.20.orig/arch/i386/lib/getuser.S 1998-01-13 00:42:52.000000000 +0300
++++ linux-2.4.20/arch/i386/lib/getuser.S 2003-12-01 16:54:50.000000000 +0300
+@@ -21,6 +21,10 @@
+ * as they get called from within inline assembly.
+ */
+
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+ movl %esp,%edx
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 1: movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+ addl $1,%eax
+ movl %esp,%edx
+ jc bad_get_user
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 2: movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+ addl $3,%eax
+ movl %esp,%edx
+ jc bad_get_user
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 3: movl -3(%eax),%edx
+Index: linux-2.4.20/arch/i386/config.in
+===================================================================
+--- linux-2.4.20.orig/arch/i386/config.in 2003-05-16 05:28:59.000000000 +0400
++++ linux-2.4.20/arch/i386/config.in 2003-12-01 17:01:56.000000000 +0300
+@@ -227,6 +227,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+ define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++ "off CONFIG_NOBIGSTACK \
++ 16KB CONFIG_STACK_SIZE_16KB \
++ 32KB CONFIG_STACK_SIZE_32KB \
++ 64KB CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 2
++ else
++ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 3
++ else
++ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 4
++ fi
++ fi
++ fi
++fi
++
+ endmenu
+
+ mainmenu_option next_comment
+Index: linux-2.4.20/arch/i386/vmlinux.lds
+===================================================================
+--- linux-2.4.20.orig/arch/i386/vmlinux.lds 2003-05-16 05:28:09.000000000 +0400
++++ linux-2.4.20/arch/i386/vmlinux.lds 2003-12-01 16:54:50.000000000 +0300
+@@ -35,7 +35,8 @@
+
+ _edata = .; /* End of data section */
+
+- . = ALIGN(8192); /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++ . = ALIGN(65536); /* init_task */
+ .data.init_task : { *(.data.init_task) }
+
+ . = ALIGN(4096); /* Init code and data */
+Index: linux-2.4.20/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.20.orig/include/asm-i386/current.h 1998-08-15 03:35:22.000000000 +0400
++++ linux-2.4.20/include/asm-i386/current.h 2003-12-01 16:54:50.000000000 +0300
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++ movl $-THREAD_SIZE, reg; \
++ andl %esp, reg
++
++#else /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define alloc_task_struct() \
++ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define free_task_struct(p) \
++ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+
+ struct task_struct;
+
+ static inline struct task_struct * get_current(void)
+ {
+ struct task_struct *current;
+- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+ return current;
+ }
+
+ #define current get_current()
+
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.20/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.20.orig/include/asm-i386/hw_irq.h 2003-11-13 17:17:28.000000000 +0300
++++ linux-2.4.20/include/asm-i386/hw_irq.h 2003-12-01 16:54:50.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/config.h>
+ #include <asm/atomic.h>
+ #include <asm/irq.h>
++#include <asm/current.h>
+
+ /*
+ * IDT vectors usable for external interrupt sources start
+@@ -113,10 +114,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+-#define GET_CURRENT \
+- "movl %esp, %ebx\n\t" \
+- "andl $-8192, %ebx\n\t"
+-
+ /*
+ * SMP has a few special interrupts for IPI messages
+ */
+Index: linux-2.4.20/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.20.orig/include/asm-i386/processor.h 2003-11-21 17:39:47.000000000 +0300
++++ linux-2.4.20/include/asm-i386/processor.h 2003-12-01 16:54:50.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -451,9 +452,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+ #define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count)
+
+ #define init_task (init_task_union.task)
+Index: linux-2.4.20/include/linux/sched.h
+===================================================================
+--- linux-2.4.20.orig/include/linux/sched.h 2003-11-21 17:39:47.000000000 +0300
++++ linux-2.4.20/include/linux/sched.h 2003-12-01 16:54:50.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+
+ #include <asm/param.h> /* for HZ */
++#include <asm/current.h> /* maybe for INIT_TASK_SIZE */
+
+ extern unsigned long event;
+
--- /dev/null
+Index: linux-2.4.22-ac1/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/entry.S 2003-09-25 14:16:34.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/entry.S 2003-12-01 18:34:08.000000000 +0300
+@@ -46,6 +46,7 @@
+ #include <asm/segment.h>
+ #include <asm/smp.h>
+ #include <asm/unistd.h>
++#include <asm/current.h>
+
+ EBX = 0x00
+ ECX = 0x04
+@@ -131,10 +132,6 @@
+ .long 3b,6b; \
+ .previous
+
+-#define GET_CURRENT(reg) \
+- movl $-8192, reg; \
+- andl %esp, reg
+-
+ ENTRY(lcall7)
+ pushfl # We get a different stack layout with call gates,
+ pushl %eax # which has to be cleaned up later..
+@@ -150,7 +147,7 @@
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+- andl $-8192,%ebx # GET_CURRENT
++ andl $-THREAD_SIZE,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x7
+@@ -174,7 +171,7 @@
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+- andl $-8192,%ebx # GET_CURRENT
++ andl $-THREAD_SIZE,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x27
+Index: linux-2.4.22-ac1/arch/i386/kernel/smpboot.c
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/smpboot.c 2003-09-25 14:16:28.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/smpboot.c 2003-12-01 18:34:08.000000000 +0300
+@@ -814,7 +814,7 @@
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
++ stack_start.esp = (void *)idle->thread.esp;
+
+ /*
+ * This grunge runs the startup process for
+@@ -887,7 +887,7 @@
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+- if (*((volatile unsigned char *)phys_to_virt(8192))
++ if (*((volatile unsigned char *)phys_to_virt(THREAD_SIZE))
+ == 0xA5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+@@ -910,7 +910,7 @@
+ }
+
+ /* mark "stuck" area as not stuck */
+- *((volatile unsigned long *)phys_to_virt(8192)) = 0;
++ *((volatile unsigned long *)phys_to_virt(THREAD_SIZE)) = 0;
+
+ if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) {
+ printk("Restoring NMI vector\n");
+Index: linux-2.4.22-ac1/arch/i386/kernel/traps.c
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/traps.c 2003-09-25 14:16:29.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/traps.c 2003-12-01 18:34:08.000000000 +0300
+@@ -161,7 +161,7 @@
+ unsigned long esp = tsk->thread.esp;
+
+ /* User space on another CPU? */
+- if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
++ if ((esp ^ (unsigned long)tsk) & ~(THREAD_SIZE - 1))
+ return;
+ show_trace((unsigned long *)esp);
+ }
+Index: linux-2.4.22-ac1/arch/i386/kernel/head.S
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/head.S 2003-09-25 14:16:27.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/head.S 2003-12-01 18:34:08.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
++#include <asm/current.h>
+
+ #define OLD_CL_MAGIC_ADDR 0x90020
+ #define OLD_CL_MAGIC 0xA33F
+@@ -315,7 +316,7 @@
+ ret
+
+ ENTRY(stack_start)
+- .long SYMBOL_NAME(init_task_union)+8192
++ .long SYMBOL_NAME(init_task_union)+THREAD_SIZE
+ .long __KERNEL_DS
+
+ /* This is the default interrupt "handler" :-) */
+Index: linux-2.4.22-ac1/arch/i386/kernel/irq.c
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/kernel/irq.c 2003-09-25 14:16:18.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/kernel/irq.c 2003-12-01 18:34:08.000000000 +0300
+@@ -581,7 +581,10 @@
+ long esp;
+
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+- __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
++ __asm__ __volatile__(
++ "andl %%esp,%0"
++ : "=r" (esp) : "0" (THREAD_SIZE-1));
++
+ if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+ extern void show_stack(unsigned long *);
+
+Index: linux-2.4.22-ac1/arch/i386/lib/getuser.S
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/lib/getuser.S 1998-01-13 00:42:52.000000000 +0300
++++ linux-2.4.22-ac1/arch/i386/lib/getuser.S 2003-12-01 18:34:08.000000000 +0300
+@@ -21,6 +21,10 @@
+ * as they get called from within inline assembly.
+ */
+
++/* Duplicated from asm/processor.h */
++#include <asm/current.h>
++#include <linux/config.h>
++
+ addr_limit = 12
+
+ .text
+@@ -28,7 +32,7 @@
+ .globl __get_user_1
+ __get_user_1:
+ movl %esp,%edx
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 1: movzbl (%eax),%edx
+@@ -41,7 +45,7 @@
+ addl $1,%eax
+ movl %esp,%edx
+ jc bad_get_user
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 2: movzwl -1(%eax),%edx
+@@ -54,7 +58,7 @@
+ addl $3,%eax
+ movl %esp,%edx
+ jc bad_get_user
+- andl $0xffffe000,%edx
++ andl $~(THREAD_SIZE - 1),%edx
+ cmpl addr_limit(%edx),%eax
+ jae bad_get_user
+ 3: movl -3(%eax),%edx
+Index: linux-2.4.22-ac1/arch/i386/config.in
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/config.in 2003-09-25 14:16:34.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/config.in 2003-12-01 18:34:08.000000000 +0300
+@@ -304,6 +304,29 @@
+ if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+ define_bool CONFIG_HAVE_DEC_LOCK y
+ fi
++
++choice 'Bigger Stack Size Support' \
++ "off CONFIG_NOBIGSTACK \
++ 16KB CONFIG_STACK_SIZE_16KB \
++ 32KB CONFIG_STACK_SIZE_32KB \
++ 64KB CONFIG_STACK_SIZE_64KB" off
++
++if [ "$CONFIG_NOBIGSTACK" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 1
++else
++ if [ "$CONFIG_STACK_SIZE_16KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 2
++ else
++ if [ "$CONFIG_STACK_SIZE_32KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 3
++ else
++ if [ "$CONFIG_STACK_SIZE_64KB" = "y" ]; then
++ define_int CONFIG_STACK_SIZE_SHIFT 4
++ fi
++ fi
++ fi
++fi
++
+ endmenu
+
+ mainmenu_option next_comment
+Index: linux-2.4.22-ac1/arch/i386/vmlinux.lds
+===================================================================
+--- linux-2.4.22-ac1.orig/arch/i386/vmlinux.lds 2003-09-25 14:16:28.000000000 +0400
++++ linux-2.4.22-ac1/arch/i386/vmlinux.lds 2003-12-01 18:34:08.000000000 +0300
+@@ -38,7 +38,8 @@
+
+ _edata = .; /* End of data section */
+
+- . = ALIGN(8192); /* init_task */
++/* chose the biggest of the possible stack sizes here? */
++ . = ALIGN(65536); /* init_task */
+ .data.init_task : { *(.data.init_task) }
+
+ . = ALIGN(4096); /* Init code and data */
+Index: linux-2.4.22-ac1/include/asm-i386/current.h
+===================================================================
+--- linux-2.4.22-ac1.orig/include/asm-i386/current.h 1998-08-15 03:35:22.000000000 +0400
++++ linux-2.4.22-ac1/include/asm-i386/current.h 2003-12-01 18:34:16.000000000 +0300
+@@ -1,15 +1,43 @@
+ #ifndef _I386_CURRENT_H
+ #define _I386_CURRENT_H
++#include <asm/page.h>
++
++/*
++ * Configurable page sizes on i386, mainly for debugging purposes.
++ * (c) Balbir Singh
++ */
++
++#ifdef __ASSEMBLY__
++
++#define PAGE_SIZE 4096 /* as cannot handle 1UL << 12 */
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++
++#define GET_CURRENT(reg) \
++ movl $-THREAD_SIZE, reg; \
++ andl %esp, reg
++
++#else /* __ASSEMBLY__ */
++
++#define THREAD_SIZE ((1 << CONFIG_STACK_SIZE_SHIFT) * PAGE_SIZE)
++#define __alloc_task_struct() \
++ ((struct task_struct *) __get_free_pages(GFP_KERNEL,CONFIG_STACK_SIZE_SHIFT))
++
++#define __free_task_struct(p) \
++ free_pages((unsigned long) (p), CONFIG_STACK_SIZE_SHIFT)
++
++#define INIT_TASK_SIZE THREAD_SIZE
+
+ struct task_struct;
+
+ static inline struct task_struct * get_current(void)
+ {
+ struct task_struct *current;
+- __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
++ __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~(THREAD_SIZE - 1)));
+ return current;
+ }
+
+ #define current get_current()
+
++#endif /* __ASSEMBLY__ */
++
+ #endif /* !(_I386_CURRENT_H) */
+Index: linux-2.4.22-ac1/include/asm-i386/hw_irq.h
+===================================================================
+--- linux-2.4.22-ac1.orig/include/asm-i386/hw_irq.h 2003-09-26 00:54:45.000000000 +0400
++++ linux-2.4.22-ac1/include/asm-i386/hw_irq.h 2003-12-01 18:34:08.000000000 +0300
+@@ -114,10 +114,6 @@
+ #define IRQ_NAME2(nr) nr##_interrupt(void)
+ #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+-#define GET_CURRENT \
+- "movl %esp, %ebx\n\t" \
+- "andl $-8192, %ebx\n\t"
+-
+ /*
+ * SMP has a few special interrupts for IPI messages
+ */
+Index: linux-2.4.22-ac1/include/asm-i386/processor.h
+===================================================================
+--- linux-2.4.22-ac1.orig/include/asm-i386/processor.h 2003-09-26 00:54:44.000000000 +0400
++++ linux-2.4.22-ac1/include/asm-i386/processor.h 2003-12-01 18:34:08.000000000 +0300
+@@ -14,6 +14,7 @@
+ #include <asm/types.h>
+ #include <asm/sigcontext.h>
+ #include <asm/cpufeature.h>
++#include <asm/current.h>
+ #include <linux/cache.h>
+ #include <linux/config.h>
+ #include <linux/threads.h>
+@@ -465,10 +466,6 @@
+ #define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+ #define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+-#define THREAD_SIZE (2*PAGE_SIZE)
+-#define __alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+-#define __free_task_struct(p) do { BUG_ON((p)->state < TASK_ZOMBIE); free_pages((unsigned long) (p), 1); } while (0)
+-
+ #define init_task (init_task_union.task)
+ #define init_stack (init_task_union.stack)
+
+Index: linux-2.4.22-ac1/include/linux/sched.h
+===================================================================
+--- linux-2.4.22-ac1.orig/include/linux/sched.h 2003-11-13 18:21:42.000000000 +0300
++++ linux-2.4.22-ac1/include/linux/sched.h 2003-12-01 18:34:08.000000000 +0300
+@@ -2,6 +2,7 @@
+ #define _LINUX_SCHED_H
+
+ #include <asm/param.h> /* for HZ */
++#include <asm/current.h> /* maybe for INIT_TASK_SIZE */
+
+ extern unsigned long event;
+
Index: linux-2.4.18-chaos/include/linux/mm.h
===================================================================
---- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-13 17:06:48.000000000 +0300
-+++ linux-2.4.18-chaos/include/linux/mm.h 2003-11-17 15:46:32.000000000 +0300
+--- linux-2.4.18-chaos.orig/include/linux/mm.h 2003-11-23 00:07:20.000000000 +0300
++++ linux-2.4.18-chaos/include/linux/mm.h 2003-11-23 00:07:23.000000000 +0300
@@ -677,6 +677,7 @@
#define __GFP_IO 0x40 /* Can start low memory physical IO? */
#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */
platforms, used as appropriate on others */
Index: linux-2.4.18-chaos/mm/page_alloc.c
===================================================================
---- linux-2.4.18-chaos.orig/mm/page_alloc.c 2003-11-13 17:06:47.000000000 +0300
-+++ linux-2.4.18-chaos/mm/page_alloc.c 2003-11-17 15:49:11.000000000 +0300
+--- linux-2.4.18-chaos.orig/mm/page_alloc.c 2003-11-23 00:07:20.000000000 +0300
++++ linux-2.4.18-chaos/mm/page_alloc.c 2003-12-02 23:12:31.000000000 +0300
@@ -554,7 +554,7 @@
/*
* Oh well, we didn't succeed.
/*
* Are we dealing with a higher order allocation?
*
+@@ -628,7 +628,9 @@
+
+ /* XXX: is pages_min/4 a good amount to reserve for this? */
+ min += z->pages_min / 4;
+- if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) {
++ if (z->free_pages > min ||
++ (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC))
++ && !in_interrupt())) {
+ page = rmqueue(z, order);
+ if (page)
+ return page;
Index: linux-2.4.18-chaos/include/linux/slab.h
===================================================================
--- linux-2.4.18-chaos.orig/include/linux/slab.h 2003-07-28 17:52:18.000000000 +0400
-+++ linux-2.4.18-chaos/include/linux/slab.h 2003-11-17 15:46:32.000000000 +0300
++++ linux-2.4.18-chaos/include/linux/slab.h 2003-11-23 00:07:23.000000000 +0300
@@ -23,6 +23,7 @@
#define SLAB_KERNEL GFP_KERNEL
#define SLAB_NFS GFP_NFS
Index: linux-2.4.18-chaos/mm/slab.c
===================================================================
--- linux-2.4.18-chaos.orig/mm/slab.c 2003-07-28 17:52:20.000000000 +0400
-+++ linux-2.4.18-chaos/mm/slab.c 2003-11-17 15:46:32.000000000 +0300
++++ linux-2.4.18-chaos/mm/slab.c 2003-11-23 00:07:23.000000000 +0300
@@ -1116,7 +1116,7 @@
/* Be lazy and only check for valid flags here,
* keeping it out of the critical path in kmem_cache_alloc().
/*
* Are we dealing with a higher order allocation?
*
+@@ -583,7 +583,9 @@
+
+ /* XXX: is pages_min/4 a good amount to reserve for this? */
+ min += z->pages_min / 4;
+- if (z->free_pages > min || ((current->flags & PF_MEMALLOC) && !in_interrupt())) {
++ if (z->free_pages > min ||
++ (((current->flags & PF_MEMALLOC) || (gfp_mask & __GFP_MEMALLOC))
++ && !in_interrupt())) {
+ page = rmqueue(z, order);
+ if (page)
+ return page;
Index: linux-2.4.20-rh-20.9/include/linux/slab.h
===================================================================
--- linux-2.4.20-rh-20.9.orig/include/linux/slab.h 2003-11-13 17:35:48.000000000 +0300
-Index: linux-2.4.22-vanilla/include/linux/mm.h
+Index: linux-2.4.20/include/linux/mm.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/mm.h 2003-11-17 15:26:32.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/mm.h 2003-11-17 15:40:32.000000000 +0300
-@@ -612,6 +612,7 @@
+--- linux-2.4.20.orig/include/linux/mm.h 2003-12-01 17:07:14.000000000 +0300
++++ linux-2.4.20/include/linux/mm.h 2003-12-02 23:17:06.000000000 +0300
+@@ -614,6 +614,7 @@
#define __GFP_IO 0x40 /* Can start low memory physical IO? */
#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */
#define __GFP_FS 0x100 /* Can call down to low-level FS? */
#define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
#define GFP_NOIO (__GFP_HIGH | __GFP_WAIT)
-@@ -622,6 +623,7 @@
+@@ -624,6 +625,7 @@
#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
platforms, used as appropriate on others */
-Index: linux-2.4.22-vanilla/mm/page_alloc.c
+Index: linux-2.4.20/mm/page_alloc.c
===================================================================
---- linux-2.4.22-vanilla.orig/mm/page_alloc.c 2003-11-13 18:19:51.000000000 +0300
-+++ linux-2.4.22-vanilla/mm/page_alloc.c 2003-11-17 15:40:32.000000000 +0300
+--- linux-2.4.20.orig/mm/page_alloc.c 2003-12-01 17:02:43.000000000 +0300
++++ linux-2.4.20/mm/page_alloc.c 2003-12-02 23:21:56.000000000 +0300
@@ -377,7 +377,8 @@
/* here we're in the low on memory slow path */
rebalance:
- if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
+ if (current->flags & (PF_MEMALLOC | PF_MEMDIE) ||
-+ gfp_mask & __GFP_MEMALLOC) {
++ (gfp_mask & __GFP_MEMALLOC)) {
zone = zonelist->zones;
for (;;) {
zone_t *z = *(zone++);
-Index: linux-2.4.22-vanilla/include/linux/slab.h
+Index: linux-2.4.20/include/linux/slab.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/slab.h 2003-11-17 14:58:37.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/slab.h 2003-11-17 15:42:13.000000000 +0300
+--- linux-2.4.20.orig/include/linux/slab.h 2003-12-01 17:07:14.000000000 +0300
++++ linux-2.4.20/include/linux/slab.h 2003-12-02 23:17:06.000000000 +0300
@@ -23,6 +23,7 @@
#define SLAB_KERNEL GFP_KERNEL
#define SLAB_NFS GFP_NFS
#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS)
#define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */
-Index: linux-2.4.22-vanilla/mm/slab.c
+Index: linux-2.4.20/mm/slab.c
===================================================================
---- linux-2.4.22-vanilla.orig/mm/slab.c 2003-11-13 17:39:29.000000000 +0300
-+++ linux-2.4.22-vanilla/mm/slab.c 2003-11-17 15:42:13.000000000 +0300
-@@ -1115,7 +1115,7 @@
+--- linux-2.4.20.orig/mm/slab.c 2003-12-01 17:02:34.000000000 +0300
++++ linux-2.4.20/mm/slab.c 2003-12-02 23:17:06.000000000 +0300
+@@ -1113,7 +1113,7 @@
/* Be lazy and only check for valid flags here,
* keeping it out of the critical path in kmem_cache_alloc().
*/
Index: linux-2.4.22-vanilla/Documentation/Configure.help
===================================================================
--- linux-2.4.22-vanilla.orig/Documentation/Configure.help 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/Documentation/Configure.help 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/Documentation/Configure.help 2003-12-02 23:55:38.000000000 +0300
@@ -15613,6 +15613,39 @@
be compiled as a module, and so this could be dangerous. Most
everyone wants to say Y here.
Index: linux-2.4.22-vanilla/arch/alpha/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/alpha/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/alpha/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/alpha/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/alpha/kernel/entry.S
===================================================================
--- linux-2.4.22-vanilla.orig/arch/alpha/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/alpha/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/alpha/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300
@@ -1154,6 +1154,18 @@
.quad sys_readahead
.quad sys_ni_syscall /* 380, sys_security */
Index: linux-2.4.22-vanilla/arch/arm/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/arm/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/arm/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/arm/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/arm/kernel/calls.S
===================================================================
--- linux-2.4.22-vanilla.orig/arch/arm/kernel/calls.S 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/arm/kernel/calls.S 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/arm/kernel/calls.S 2003-12-02 23:55:38.000000000 +0300
@@ -240,18 +240,18 @@
.long SYMBOL_NAME(sys_ni_syscall) /* Security */
.long SYMBOL_NAME(sys_gettid)
Index: linux-2.4.22-vanilla/arch/i386/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/i386/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/i386/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/i386/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/ia64/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/ia64/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/ia64/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/ia64/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/m68k/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/m68k/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/m68k/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/m68k/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/mips/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/mips/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/mips/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/mips/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/mips64/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/mips64/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/mips64/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/mips64/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/s390/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/s390/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/s390/kernel/entry.S
===================================================================
--- linux-2.4.22-vanilla.orig/arch/s390/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300
@@ -558,18 +558,18 @@
.long sys_fcntl64
.long sys_readahead
Index: linux-2.4.22-vanilla/arch/s390x/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/s390x/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390x/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390x/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/arch/s390x/kernel/entry.S
===================================================================
--- linux-2.4.22-vanilla.orig/arch/s390x/kernel/entry.S 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390x/kernel/entry.S 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390x/kernel/entry.S 2003-12-02 23:55:38.000000000 +0300
@@ -591,18 +591,18 @@
.long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper)
.long SYSCALL(sys_readahead,sys32_readahead)
Index: linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S
===================================================================
--- linux-2.4.22-vanilla.orig/arch/s390x/kernel/wrapper32.S 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/s390x/kernel/wrapper32.S 2003-12-02 23:55:38.000000000 +0300
@@ -1098,6 +1098,98 @@
llgfr %r4,%r4 # long
jg sys32_fstat64 # branch to system call
Index: linux-2.4.22-vanilla/arch/sparc64/defconfig
===================================================================
--- linux-2.4.22-vanilla.orig/arch/sparc64/defconfig 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/arch/sparc64/defconfig 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/arch/sparc64/defconfig 2003-12-02 23:55:38.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.22-vanilla/fs/Config.in
===================================================================
--- linux-2.4.22-vanilla.orig/fs/Config.in 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/Config.in 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/Config.in 2003-12-02 23:55:38.000000000 +0300
@@ -29,6 +29,11 @@
dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
source fs/partitions/Config.in
Index: linux-2.4.22-vanilla/fs/Makefile
===================================================================
---- linux-2.4.22-vanilla.orig/fs/Makefile 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/Makefile 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/Makefile 2003-12-02 23:55:36.000000000 +0300
++++ linux-2.4.22-vanilla/fs/Makefile 2003-12-02 23:55:38.000000000 +0300
@@ -77,6 +77,9 @@
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
Index: linux-2.4.22-vanilla/fs/ext2/Makefile
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext2/Makefile 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/Makefile 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/Makefile 2003-12-02 23:55:38.000000000 +0300
@@ -13,4 +13,8 @@
ioctl.o namei.o super.o symlink.o
obj-m := $(O_TARGET)
Index: linux-2.4.22-vanilla/fs/ext2/file.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext2/file.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/file.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/file.c 2003-12-02 23:55:38.000000000 +0300
@@ -20,6 +20,7 @@
#include <linux/fs.h>
Index: linux-2.4.22-vanilla/fs/ext2/ialloc.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext2/ialloc.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/ialloc.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/ialloc.c 2003-12-02 23:55:38.000000000 +0300
@@ -15,6 +15,7 @@
#include <linux/config.h>
#include <linux/fs.h>
Index: linux-2.4.22-vanilla/fs/ext2/inode.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext2/inode.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/inode.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/inode.c 2003-12-02 23:55:38.000000000 +0300
@@ -39,6 +39,18 @@
static int ext2_update_inode(struct inode * inode, int do_sync);
Index: linux-2.4.22-vanilla/fs/ext2/namei.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext2/namei.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/namei.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/namei.c 2003-12-02 23:55:38.000000000 +0300
@@ -31,6 +31,7 @@
#include <linux/fs.h>
Index: linux-2.4.22-vanilla/fs/ext2/super.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext2/super.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/super.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/super.c 2003-12-02 23:55:38.000000000 +0300
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/fs.h>
Index: linux-2.4.22-vanilla/fs/ext2/symlink.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext2/symlink.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/symlink.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/symlink.c 2003-12-02 23:55:38.000000000 +0300
@@ -19,6 +19,7 @@
#include <linux/fs.h>
};
Index: linux-2.4.22-vanilla/fs/ext2/xattr.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext2/xattr.c 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/xattr.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext2/xattr.c 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/xattr.c 2003-12-02 23:55:38.000000000 +0300
@@ -0,0 +1,1212 @@
+/*
+ * linux/fs/ext2/xattr.c
+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */
Index: linux-2.4.22-vanilla/fs/ext2/xattr_user.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext2/xattr_user.c 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext2/xattr_user.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext2/xattr_user.c 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext2/xattr_user.c 2003-12-02 23:55:38.000000000 +0300
@@ -0,0 +1,103 @@
+/*
+ * linux/fs/ext2/xattr_user.c
+}
Index: linux-2.4.22-vanilla/fs/ext3/Makefile
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/Makefile 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/Makefile 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/Makefile 2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/Makefile 2003-12-02 23:55:38.000000000 +0300
@@ -1,5 +1,5 @@
#
-# Makefile for the linux ext2-filesystem routines.
include $(TOPDIR)/Rules.make
Index: linux-2.4.22-vanilla/fs/ext3/file.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/file.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/file.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/file.c 2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/file.c 2003-12-02 23:55:38.000000000 +0300
@@ -23,6 +23,7 @@
#include <linux/locks.h>
#include <linux/jbd.h>
Index: linux-2.4.22-vanilla/fs/ext3/ialloc.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext3/ialloc.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/ialloc.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/ialloc.c 2003-12-02 23:55:38.000000000 +0300
@@ -17,6 +17,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
Index: linux-2.4.22-vanilla/fs/ext3/inode.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext3/inode.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/inode.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/inode.c 2003-12-02 23:55:38.000000000 +0300
@@ -39,6 +39,18 @@
*/
#undef SEARCH_FROM_ZERO
return;
Index: linux-2.4.22-vanilla/fs/ext3/namei.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/namei.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/namei.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/namei.c 2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/namei.c 2003-12-02 23:55:38.000000000 +0300
@@ -29,6 +29,7 @@
#include <linux/sched.h>
#include <linux/ext3_fs.h>
+
Index: linux-2.4.22-vanilla/fs/ext3/super.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/super.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/super.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/super.c 2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/super.c 2003-12-02 23:56:03.000000000 +0300
@@ -24,6 +24,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) {
sb->s_dev = 0;
goto out_fail;
-@@ -1827,17 +1843,29 @@
+@@ -1822,22 +1838,35 @@
+
+ static int __init init_ext3_fs(void)
+ {
++ int error;
+ #ifdef CONFIG_QUOTA
+ init_dquot_operations(&ext3_qops);
old_sync_dquot = ext3_qops.sync_dquot;
ext3_qops.sync_dquot = ext3_sync_dquot;
#endif
- return register_filesystem(&ext3_fs_type);
-+ int error = init_ext3_xattr();
++ error = init_ext3_xattr();
+ if (error)
+ return error;
+ error = init_ext3_xattr_user();
Index: linux-2.4.22-vanilla/fs/ext3/symlink.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/ext3/symlink.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/symlink.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/symlink.c 2003-12-02 23:55:38.000000000 +0300
@@ -20,6 +20,7 @@
#include <linux/fs.h>
#include <linux/jbd.h>
};
Index: linux-2.4.22-vanilla/fs/ext3/xattr.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/xattr.c 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/xattr.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/xattr.c 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/xattr.c 2003-12-02 23:55:38.000000000 +0300
@@ -0,0 +1,1225 @@
+/*
+ * linux/fs/ext3/xattr.c
+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */
Index: linux-2.4.22-vanilla/fs/ext3/xattr_user.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/xattr_user.c 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/xattr_user.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/xattr_user.c 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/xattr_user.c 2003-12-02 23:55:39.000000000 +0300
@@ -0,0 +1,111 @@
+/*
+ * linux/fs/ext3/xattr_user.c
Index: linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h
===================================================================
--- linux-2.4.22-vanilla.orig/fs/jfs/jfs_xattr.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/jfs/jfs_xattr.h 2003-12-02 23:55:39.000000000 +0300
@@ -52,8 +52,10 @@
#define END_EALIST(ealist) \
((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
Index: linux-2.4.22-vanilla/fs/jfs/xattr.c
===================================================================
--- linux-2.4.22-vanilla.orig/fs/jfs/xattr.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/jfs/xattr.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/fs/jfs/xattr.c 2003-12-02 23:55:39.000000000 +0300
@@ -641,7 +641,7 @@
}
if (value == NULL) { /* empty EA, do not remove */
Index: linux-2.4.22-vanilla/fs/mbcache.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/mbcache.c 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/mbcache.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/mbcache.c 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/mbcache.c 2003-12-02 23:55:39.000000000 +0300
@@ -0,0 +1,648 @@
+/*
+ * linux/fs/mbcache.c
Index: linux-2.4.22-vanilla/include/asm-arm/unistd.h
===================================================================
--- linux-2.4.22-vanilla.orig/include/asm-arm/unistd.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/asm-arm/unistd.h 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/asm-arm/unistd.h 2003-12-02 23:55:39.000000000 +0300
@@ -250,7 +250,6 @@
#define __NR_security (__NR_SYSCALL_BASE+223)
#define __NR_gettid (__NR_SYSCALL_BASE+224)
Index: linux-2.4.22-vanilla/include/asm-ppc64/unistd.h
===================================================================
--- linux-2.4.22-vanilla.orig/include/asm-ppc64/unistd.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/asm-ppc64/unistd.h 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/asm-ppc64/unistd.h 2003-12-02 23:55:39.000000000 +0300
@@ -218,6 +218,7 @@
#define __NR_mincore 206
#define __NR_gettid 207
Index: linux-2.4.22-vanilla/include/asm-s390/unistd.h
===================================================================
--- linux-2.4.22-vanilla.orig/include/asm-s390/unistd.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/asm-s390/unistd.h 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/asm-s390/unistd.h 2003-12-02 23:55:39.000000000 +0300
@@ -213,9 +213,18 @@
#define __NR_getdents64 220
#define __NR_fcntl64 221
Index: linux-2.4.22-vanilla/include/asm-s390x/unistd.h
===================================================================
--- linux-2.4.22-vanilla.orig/include/asm-s390x/unistd.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/asm-s390x/unistd.h 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/asm-s390x/unistd.h 2003-12-02 23:55:39.000000000 +0300
@@ -181,9 +181,18 @@
#define __NR_mincore 218
#define __NR_madvise 219
Index: linux-2.4.22-vanilla/include/linux/cache_def.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/cache_def.h 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/cache_def.h 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/cache_def.h 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/cache_def.h 2003-12-02 23:55:39.000000000 +0300
@@ -0,0 +1,15 @@
+/*
+ * linux/cache_def.h
Index: linux-2.4.22-vanilla/include/linux/errno.h
===================================================================
--- linux-2.4.22-vanilla.orig/include/linux/errno.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/errno.h 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/errno.h 2003-12-02 23:55:39.000000000 +0300
@@ -23,4 +23,8 @@
#endif
Index: linux-2.4.22-vanilla/include/linux/ext2_fs.h
===================================================================
--- linux-2.4.22-vanilla.orig/include/linux/ext2_fs.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext2_fs.h 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext2_fs.h 2003-12-02 23:55:39.000000000 +0300
@@ -57,8 +57,6 @@
*/
#define EXT2_BAD_INO 1 /* Bad blocks inode */
#endif /* __KERNEL__ */
Index: linux-2.4.22-vanilla/include/linux/ext2_xattr.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/ext2_xattr.h 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext2_xattr.h 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/ext2_xattr.h 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext2_xattr.h 2003-12-02 23:55:39.000000000 +0300
@@ -0,0 +1,157 @@
+/*
+ File: linux/ext2_xattr.h
+
Index: linux-2.4.22-vanilla/include/linux/ext3_fs.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/ext3_fs.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext3_fs.h 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/ext3_fs.h 2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext3_fs.h 2003-12-02 23:55:39.000000000 +0300
@@ -63,8 +63,6 @@
*/
#define EXT3_BAD_INO 1 /* Bad blocks inode */
Index: linux-2.4.22-vanilla/include/linux/ext3_jbd.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/ext3_jbd.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext3_jbd.h 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/ext3_jbd.h 2003-12-02 23:55:37.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext3_jbd.h 2003-12-02 23:55:39.000000000 +0300
@@ -30,13 +30,19 @@
#define EXT3_SINGLEDATA_TRANS_BLOCKS 8U
Index: linux-2.4.22-vanilla/include/linux/ext3_xattr.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/ext3_xattr.h 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/ext3_xattr.h 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/ext3_xattr.h 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/ext3_xattr.h 2003-12-02 23:55:39.000000000 +0300
@@ -0,0 +1,157 @@
+/*
+ File: linux/ext3_xattr.h
+
Index: linux-2.4.22-vanilla/include/linux/fs.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/fs.h 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/fs.h 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/fs.h 2003-12-02 23:55:35.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/fs.h 2003-12-02 23:55:39.000000000 +0300
@@ -913,7 +913,7 @@
int (*setattr) (struct dentry *, struct iattr *);
int (*setattr_raw) (struct inode *, struct iattr *);
int (*removexattr) (struct dentry *, const char *);
Index: linux-2.4.22-vanilla/include/linux/mbcache.h
===================================================================
---- linux-2.4.22-vanilla.orig/include/linux/mbcache.h 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/include/linux/mbcache.h 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/include/linux/mbcache.h 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/include/linux/mbcache.h 2003-12-02 23:55:39.000000000 +0300
@@ -0,0 +1,69 @@
+/*
+ File: linux/mbcache.h
+#endif
Index: linux-2.4.22-vanilla/kernel/ksyms.c
===================================================================
---- linux-2.4.22-vanilla.orig/kernel/ksyms.c 2003-11-03 23:41:26.000000000 +0300
-+++ linux-2.4.22-vanilla/kernel/ksyms.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/kernel/ksyms.c 2003-12-02 23:55:34.000000000 +0300
++++ linux-2.4.22-vanilla/kernel/ksyms.c 2003-12-02 23:55:39.000000000 +0300
@@ -11,6 +11,7 @@
#include <linux/config.h>
/* internal kernel memory management */
EXPORT_SYMBOL(_alloc_pages);
-@@ -109,6 +111,8 @@
+@@ -108,6 +110,8 @@
EXPORT_SYMBOL(kmem_cache_alloc);
EXPORT_SYMBOL(kmem_cache_free);
EXPORT_SYMBOL(kmem_cache_size);
Index: linux-2.4.22-vanilla/mm/vmscan.c
===================================================================
--- linux-2.4.22-vanilla.orig/mm/vmscan.c 2003-11-03 23:41:27.000000000 +0300
-+++ linux-2.4.22-vanilla/mm/vmscan.c 2003-11-03 23:41:29.000000000 +0300
++++ linux-2.4.22-vanilla/mm/vmscan.c 2003-12-02 23:55:39.000000000 +0300
@@ -18,6 +18,7 @@
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#endif
Index: linux-2.4.22-vanilla/fs/ext3/ext3-exports.c
===================================================================
---- linux-2.4.22-vanilla.orig/fs/ext3/ext3-exports.c 2003-11-03 23:41:29.000000000 +0300
-+++ linux-2.4.22-vanilla/fs/ext3/ext3-exports.c 2003-11-03 23:41:29.000000000 +0300
+--- linux-2.4.22-vanilla.orig/fs/ext3/ext3-exports.c 2003-12-02 23:55:38.000000000 +0300
++++ linux-2.4.22-vanilla/fs/ext3/ext3-exports.c 2003-12-02 23:55:39.000000000 +0300
@@ -0,0 +1,13 @@
+#include <linux/config.h>
+#include <linux/module.h>
+configurable-x86-stack-2.4.20.patch
dev_read_only_hp_2.4.20.patch
exports_2.4.20-rh-hp.patch
lustre_version.patch
+configurable-x86-stack-2.4.20-rh.patch
mcore-2.4.20-8.patch
dsp.patch
dev_read_only_2.4.20-rh.patch
+configurable-x86-stack-2.4.22-rh.patch
dev_read_only_2.4.20-rh.patch
exports_2.4.20-rh-hp.patch
lustre_version.patch
+configurable-x86-stack-2.4.20.patch
dev_read_only_2.4.20-rh.patch
exports_2.4.20-rh-hp.patch
lustre_version.patch
+configurable-x86-stack-2.4.19-pre1.patch
dev_read_only_2.4.20.patch
exports_2.4.19-pre1.patch
lustre_version.patch
+configurable-x86-stack-2.4.20.patch
uml-patch-2.4.20-6.patch
uml-2.4.20-do_mmap_pgoff-fix.patch
uml-2.4.20-fixes-1.patch
listman-2.4.20.patch
ext3-trusted_ea-2.4.20.patch
kernel_text_address-2.4.20-vanilla.patch
-ext3-xattr-ptr-arith-fix.patch
+ext3-xattr-ptr-arith-fix.patch
+gfp_memalloc-2.4.22.patch
+configurable-x86-stack-2.4.20.patch
dev_read_only_2.4.20-rh.patch
exports_2.4.20-rh-hp.patch
lustre_version.patch
struct ldlm_bl_pool *ldlm_bl_pool;
};
-int __init ldlm_init(void);
-void __exit ldlm_exit(void);
+int ldlm_init(void);
+void ldlm_exit(void);
* has finished. Note that if the ACK does arrive, its
* callback wakes us in short order. --eeb */
lwi = LWI_TIMEOUT (HZ/4, NULL, NULL);
- rc = l_wait_event(req->rq_wait_for_rep, !req->rq_want_ack,
+ rc = l_wait_event(req->rq_reply_waitq, !req->rq_want_ack,
&lwi);
CDEBUG (D_HA, "Retrying req %p: %d\n", req, rc);
/* NB go back and test rq_want_ack with locking, to ensure
OBD_FREE(req->rq_repmsg, req->rq_replen);
req->rq_repmsg = NULL;
}
- init_waitqueue_head(&req->rq_wait_for_rep);
+ init_waitqueue_head(&req->rq_reply_waitq);
netrc = 0;
}
init_waitqueue_entry(&commit_wait, current);
add_wait_queue(&obd->obd_commit_waitq, &commit_wait);
- rc = l_wait_event(req->rq_wait_for_rep,
+ rc = l_wait_event(req->rq_reply_waitq,
!req->rq_want_ack || req->rq_resent ||
req->rq_transno <= obd->obd_last_committed, &lwi);
remove_wait_queue(&obd->obd_commit_waitq, &commit_wait);
static int fsfilt_ext3_read_record(struct file * file, void *buf,
int size, loff_t *offs)
{
- struct buffer_head *bh;
- unsigned long block, boffs;
struct inode *inode = file->f_dentry->d_inode;
- int err;
+ unsigned long block;
+ struct buffer_head *bh;
+ int err, blocksize, csize, boffs;
+ /* prevent reading after eof */
if (inode->i_size < *offs + size) {
size = inode->i_size - *offs;
if (size < 0) {
return 0;
}
- block = *offs >> inode->i_blkbits;
- bh = ext3_bread(NULL, inode, block, 0, &err);
- if (!bh) {
- CERROR("can't read block: %d\n", err);
- return err;
- }
+ blocksize = 1 << inode->i_blkbits;
+
+ while (size > 0) {
+ block = *offs >> inode->i_blkbits;
+ boffs = *offs & (blocksize - 1);
+ csize = min(blocksize - boffs, size);
+ bh = ext3_bread(NULL, inode, block, 0, &err);
+ if (!bh) {
+ CERROR("can't read block: %d\n", err);
+ return err;
+ }
- boffs = (unsigned)*offs % bh->b_size;
- if (boffs + size > bh->b_size) {
- CERROR("request crosses block's border. offset %llu, size %u\n",
- *offs, size);
+ memcpy(buf, bh->b_data + boffs, csize);
brelse(bh);
- return -EIO;
- }
- memcpy(buf, bh->b_data + boffs, size);
- brelse(bh);
- *offs += size;
+ *offs += csize;
+ buf += csize;
+ size -= csize;
+ }
return 0;
}
-static int fsfilt_ext3_write_record(struct file *file, void *buf, int size,
+static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
loff_t *offs, int force_sync)
{
- struct buffer_head *bh;
- unsigned long block, boffs;
+ struct buffer_head *bh = NULL;
+ unsigned long block;
struct inode *inode = file->f_dentry->d_inode;
- loff_t old_size = inode->i_size;
+ loff_t old_size = inode->i_size, offset = *offs;
+ loff_t new_size = inode->i_size;
journal_t *journal;
handle_t *handle;
- int err;
+ int err, block_count = 0, blocksize, size, boffs;
+ /* Determine how many transaction credits are needed */
+ blocksize = 1 << inode->i_blkbits;
+ block_count = (*offs & (blocksize - 1)) + bufsize;
+ block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
+
journal = EXT3_SB(inode->i_sb)->s_journal;
- handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2);
+ handle = journal_start(journal,
+ block_count * EXT3_DATA_TRANS_BLOCKS + 2);
if (IS_ERR(handle)) {
CERROR("can't start transaction\n");
return PTR_ERR(handle);
}
- block = *offs >> inode->i_blkbits;
- if (*offs + size > inode->i_size) {
- down(&inode->i_sem);
- if (*offs + size > inode->i_size)
- inode->i_size = *offs + size;
- if (inode->i_size > EXT3_I(inode)->i_disksize)
- EXT3_I(inode)->i_disksize = inode->i_size;
- up(&inode->i_sem);
- }
-
- bh = ext3_bread(handle, inode, block, 1, &err);
- if (!bh) {
- CERROR("can't read/create block: %d\n", err);
- goto out;
- }
-
- /* This is a hack only needed because ext3_get_block_handle() updates
- * i_disksize after marking the inode dirty in ext3_splice_branch().
- * We will fix that when we get a chance, as ext3_mark_inode_dirty()
- * is not without cost, nor is it even exported.
- */
- if (inode->i_size > old_size)
- mark_inode_dirty(inode);
-
- boffs = (unsigned)*offs % bh->b_size;
- if (boffs + size > bh->b_size) {
- CERROR("request crosses block's border. offset %llu, size %u\n",
- *offs, size);
- err = -EIO;
- goto out;
- }
+ while (bufsize > 0) {
+ if (bh != NULL)
+ brelse(bh);
+
+ block = offset >> inode->i_blkbits;
+ boffs = offset & (blocksize - 1);
+ size = min(blocksize - boffs, bufsize);
+ bh = ext3_bread(handle, inode, block, 1, &err);
+ if (!bh) {
+ CERROR("can't read/create block: %d\n", err);
+ goto out;
+ }
- err = ext3_journal_get_write_access(handle, bh);
- if (err) {
- CERROR("journal_get_write_access() returned error %d\n", err);
- goto out;
- }
- memcpy(bh->b_data + boffs, buf, size);
- err = ext3_journal_dirty_metadata(handle, bh);
- if (err) {
- CERROR("journal_dirty_metadata() returned error %d\n", err);
- goto out;
+ err = ext3_journal_get_write_access(handle, bh);
+ if (err) {
+ CERROR("journal_get_write_access() returned error %d\n",
+ err);
+ goto out;
+ }
+ LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size);
+ memcpy(bh->b_data + boffs, buf, size);
+ err = ext3_journal_dirty_metadata(handle, bh);
+ if (err) {
+ CERROR("journal_dirty_metadata() returned error %d\n",
+ err);
+ goto out;
+ }
+ if (offset + size > new_size)
+ new_size = offset + size;
+ offset += size;
+ bufsize -= size;
+ buf += size;
}
if (force_sync)
out:
if (bh)
brelse(bh);
+
+ /* correct in-core and on-disk sizes */
+ if (new_size > inode->i_size) {
+ down(&inode->i_sem);
+ if (new_size > inode->i_size)
+ inode->i_size = new_size;
+ if (inode->i_size > EXT3_I(inode)->i_disksize)
+ EXT3_I(inode)->i_disksize = inode->i_size;
+ up(&inode->i_sem);
+ if (inode->i_size > old_size)
+ mark_inode_dirty(inode);
+ }
+
journal_stop(handle);
if (err == 0)
- *offs += size;
+ *offs = offset;
return err;
}
sizeof req->rq_ack_locks);
spin_lock_irqsave (&req->rq_lock, flags);
oldrep->rq_resent = 1;
- wake_up(&oldrep->rq_wait_for_rep);
+ wake_up(&oldrep->rq_reply_waitq);
spin_unlock_irqrestore (&req->rq_lock, flags);
DEBUG_REQ(D_HA, oldrep, "stole locks from");
DEBUG_REQ(D_HA, req, "stole locks for");
OBD_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0);
- LASSERT(!strcmp(req->rq_obd->obd_type->typ_name, LUSTRE_MDT_NAME));
-
LASSERT(current->journal_info == NULL);
/* XXX identical to OST */
if (req->rq_reqmsg->opc != MDS_CONNECT) {
int i;
ENTRY;
- if (filter->fo_subdir_count) {
- for (i = 0; i < filter->fo_subdir_count; i++) {
- struct dentry *dentry = filter->fo_dentry_O_sub[i];
- f_dput(dentry);
- filter->fo_dentry_O_sub[i] = NULL;
- }
- OBD_FREE(filter->fo_dentry_O_sub,
- filter->fo_subdir_count *
- sizeof(*filter->fo_dentry_O_sub));
- }
if (filter->fo_dentry_O_groups != NULL &&
filter->fo_last_objids != NULL &&
filter->fo_last_objid_files != NULL) {
}
}
}
+ if (filter->fo_dentry_O_sub != NULL && filter->fo_subdir_count) {
+ for (i = 0; i < filter->fo_subdir_count; i++) {
+ struct dentry *dentry = filter->fo_dentry_O_sub[i];
+ if (dentry != NULL) {
+ f_dput(dentry);
+ filter->fo_dentry_O_sub[i] = NULL;
+ }
+ }
+ OBD_FREE(filter->fo_dentry_O_sub,
+ filter->fo_subdir_count *
+ sizeof(*filter->fo_dentry_O_sub));
+ }
if (filter->fo_dentry_O_groups != NULL)
OBD_FREE(filter->fo_dentry_O_groups,
FILTER_GROUPS * sizeof(struct dentry *));
CDEBUG(D_INODE, "got/created O/%s: %p\n", name, dentry);
if (IS_ERR(dentry)) {
rc = PTR_ERR(dentry);
- CERROR("cannot create O/%s: rc = %d\n", name, rc);
+ CERROR("cannot lookup/create O/%s: rc = %d\n",
+ name, rc);
GOTO(cleanup, rc);
}
filter->fo_dentry_O_groups[i] = dentry;
CDEBUG(D_INODE, "got/created O/0/%s: %p\n", dir,dentry);
if (IS_ERR(dentry)) {
rc = PTR_ERR(dentry);
- CERROR("can't create O/0/%s: rc = %d\n",dir,rc);
+ CERROR("can't lookup/create O/0/%s: rc = %d\n",
+ dir, rc);
GOTO(cleanup, rc);
}
filter->fo_dentry_O_sub[i] = dentry;
filter = &exp->exp_obd->u.filter;
/* an objid of zero is taken to mean "sync whole filesystem" */
- if (!oa || !oa->o_valid & OBD_MD_FLID) {
+ if (!oa || !(oa->o_valid & OBD_MD_FLID)) {
rc = fsfilt_sync(exp->exp_obd, filter->fo_sb);
- GOTO(out_exp, rc);
+ RETURN(rc);
}
dentry = filter_oa2dentry(exp->exp_obd, oa);
if (IS_ERR(dentry))
- GOTO(out_exp, rc = PTR_ERR(dentry));
+ RETURN(PTR_ERR(dentry));
push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL);
f_dput(dentry);
-out_exp:
RETURN(rc);
}
lustre_swab_niobuf_remote (&remote_nb[i]);
}
- size[0] = sizeof(*body);
rc = lustre_pack_reply(req, 1, size, NULL);
if (rc)
GOTO(out, rc);
}
}
- if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
- GOTO(out, rc = -EINVAL);
-
oti_init(oti, req);
switch (req->rq_reqmsg->opc) {
-/* $Id: cygwin-ioctl.h,v 1.2 2003/12/03 03:14:43 phil Exp $
+/* $Id: cygwin-ioctl.h,v 1.3 2003/12/03 05:12:41 phil Exp $
*
* linux/ioctl.h for Linux by H.H. Bergman.
*/
if (portal_cerror == 0) \
break; \
CHECK_STACK(CDEBUG_STACK); \
- if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \
+ if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \
(portal_debug & (mask) && \
portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
#define GFP_MEMALLOC 0
#endif
-#define PORTAL_ALLOC(ptr, size) \
+#define PORTAL_ALLOC_GFP(ptr, size, mask) \
do { \
LASSERT (!in_interrupt()); \
if ((size) > PORTAL_VMALLOC_SIZE) \
(ptr) = vmalloc(size); \
else \
- (ptr) = kmalloc((size), (GFP_KERNEL | GFP_MEMALLOC)); \
- if ((ptr) == NULL) \
+ (ptr) = kmalloc((size), (mask)); \
+ if ((ptr) == NULL) { \
CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\
#ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
- else { \
+ CERROR("PORTALS: %d total bytes allocated by portals\n", \
+ atomic_read(&portal_kmemory)); \
+ } else { \
portal_kmem_inc((ptr), (size)); \
memset((ptr), 0, (size)); \
} \
(int)(size), (ptr), atomic_read (&portal_kmemory)); \
} while (0)
+#define PORTAL_ALLOC(ptr, size) \
+ PORTAL_ALLOC_GFP(ptr, size, (GFP_KERNEL | GFP_MEMALLOC))
+
+#define PORTAL_ALLOC_ATOMIC(ptr, size) \
+ PORTAL_ALLOC_GFP(ptr, size, (GFP_ATOMIC | GFP_MEMALLOC))
+
#define PORTAL_FREE(ptr, size) \
do { \
int s = (size); \
CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \
" '" #ptr "' from slab '" #slab "')\n", __FILE__, \
__LINE__); \
+ CERROR("PORTALS: %d total bytes allocated by portals\n", \
+ atomic_read(&portal_kmemory)); \
} else { \
portal_kmem_inc((ptr), (size)); \
memset((ptr), 0, (size)); \
} \
- CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \
+ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \
(int)(size), (ptr), atomic_read(&portal_kmemory)); \
} while (0)
/******************************************************************************/
/* Light-weight trace
* Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT 1
+#define LWT_SUPPORT 0
+
+#define LWT_MEMORY (64<<20)
+#define LWT_MAX_CPUS 4
typedef struct {
cycles_t lwte_when;
extern int lwt_lookup_string (int *size, char *knlptr,
char *usrptr, int usrsize);
extern int lwt_control (int enable, int clear);
-extern int lwt_snapshot (int *ncpu, int *total_size,
+extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
void *user_ptr, int user_size);
/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
#endif /* __KERNEL__ */
#endif /* LWT_SUPPORT */
+struct portals_device_userstate
+{
+ int pdu_memhog_pages;
+ struct page *pdu_memhog_root_page;
+};
#include <linux/portals_lib.h>
#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long)
#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long)
#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long)
-#define IOC_PORTAL_MAX_NR 41
+#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long)
+#define IOC_PORTAL_MAX_NR 42
enum {
QSWNAL = 1,
lib_eq_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS);
+ lib_eq_t *eq;
+ PORTAL_SLAB_ALLOC(eq, ptl_eq_slab, sizeof(*eq));
if (eq == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&eq_in_use_count);
- kmem_cache_free(ptl_eq_slab, eq);
+ PORTAL_SLAB_FREE(eq, ptl_eq_slab, sizeof(*eq));
}
static inline lib_md_t *
lib_md_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS);
+ lib_md_t *md;
+ PORTAL_SLAB_ALLOC(md, ptl_md_slab, sizeof(*md));
if (md == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&md_in_use_count);
- kmem_cache_free(ptl_md_slab, md);
+ PORTAL_SLAB_FREE(md, ptl_md_slab, sizeof(*md));
}
static inline lib_me_t *
lib_me_alloc (nal_cb_t *nal)
{
/* NEVER called with statelock held */
- lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS);
+ lib_me_t *me;
+ PORTAL_SLAB_ALLOC(me, ptl_me_slab, sizeof(*me));
if (me == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&me_in_use_count);
- kmem_cache_free(ptl_me_slab, me);
+ PORTAL_SLAB_FREE(me, ptl_me_slab, sizeof(*me));
}
static inline lib_msg_t *
lib_msg_alloc(nal_cb_t *nal)
{
/* ALWAYS called with statelock held */
- lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC);
+ lib_msg_t *msg;
+ PORTAL_SLAB_ALLOC(msg, ptl_msg_slab, sizeof(*msg));
if (msg == NULL)
return (NULL);
{
/* ALWAYS called with statelock held */
atomic_dec (&msg_in_use_count);
- kmem_cache_free(ptl_msg_slab, msg);
+ PORTAL_SLAB_FREE(msg, ptl_msg_slab, sizeof(*msg));
}
#endif
int jt_ptl_print_routes (int argc, char **argv);
int jt_ptl_fail_nid (int argc, char **argv);
int jt_ptl_lwt(int argc, char **argv);
+int jt_ptl_memhog(int argc, char **argv);
int dbg_initialize(int argc, char **argv);
int jt_dbg_filter(int argc, char **argv);
}
void
-ksocknal_free_buffers (void)
+ksocknal_free_fmbs (ksock_fmb_pool_t *p)
{
- if (ksocknal_data.ksnd_fmbs != NULL) {
- ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs;
- int i;
- int j;
-
- for (i = 0;
- i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS);
- i++, fmb++)
- for (j = 0; j < fmb->fmb_npages; j++)
- if (fmb->fmb_pages[j] != NULL)
- __free_page (fmb->fmb_pages[j]);
-
- PORTAL_FREE (ksocknal_data.ksnd_fmbs,
- sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
- SOCKNAL_LARGE_FWD_NMSGS));
+ ksock_fmb_t *fmb;
+ int i;
+
+ LASSERT (list_empty(&p->fmp_blocked_conns));
+ LASSERT (p->fmp_nactive_fmbs == 0);
+
+ while (!list_empty(&p->fmp_idle_fmbs)) {
+
+ fmb = list_entry(p->fmp_idle_fmbs.next,
+ ksock_fmb_t, fmb_list);
+
+ for (i = 0; i < fmb->fmb_npages; i++)
+ if (fmb->fmb_pages[i] != NULL)
+ __free_page(fmb->fmb_pages[i]);
+
+ list_del(&fmb->fmb_list);
+ PORTAL_FREE(fmb, sizeof(*fmb));
}
+}
+
+void
+ksocknal_free_buffers (void)
+{
+ ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp);
+ ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp);
- LASSERT (ksocknal_data.ksnd_active_ltxs == 0);
- if (ksocknal_data.ksnd_ltxs != NULL)
- PORTAL_FREE (ksocknal_data.ksnd_ltxs,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS +
- SOCKNAL_NNBLK_LTXS));
+ LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0);
if (ksocknal_data.ksnd_schedulers != NULL)
PORTAL_FREE (ksocknal_data.ksnd_schedulers,
PORTAL_ALLOC (ksocknal_data.ksnd_peers,
sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
if (ksocknal_data.ksnd_peers == NULL)
- RETURN (-ENOMEM);
+ return (-ENOMEM);
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
- spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list);
- INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list);
- init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq);
-
spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
if (ksocknal_data.ksnd_schedulers == NULL) {
ksocknal_module_fini ();
- RETURN(-ENOMEM);
+ return (-ENOMEM);
}
for (i = 0; i < SOCKNAL_N_SCHED; i++) {
init_waitqueue_head (&kss->kss_waitq);
}
- CDEBUG (D_MALLOC, "ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t),
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
- PORTAL_ALLOC(ksocknal_data.ksnd_ltxs,
- sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS));
- if (ksocknal_data.ksnd_ltxs == NULL) {
- ksocknal_module_fini ();
- return (-ENOMEM);
- }
-
- /* Deterministic bugs please */
- memset (ksocknal_data.ksnd_ltxs, 0xeb,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
- for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) {
- ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i];
-
- ltx->ltx_tx.tx_hdr = <x->ltx_hdr;
- ltx->ltx_idle = i < SOCKNAL_NLTXS ?
- &ksocknal_data.ksnd_idle_ltx_list :
- &ksocknal_data.ksnd_idle_nblk_ltx_list;
- list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
- }
-
rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni);
if (rc != 0) {
CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
ksocknal_module_fini ();
- RETURN (rc);
+ return (rc);
}
PtlNIDebug(ksocknal_ni, ~0);
CERROR("Can't spawn socknal scheduler[%d]: %d\n",
i, rc);
ksocknal_module_fini ();
- RETURN (rc);
+ return (rc);
}
}
if (rc != 0) {
CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
ksocknal_module_fini ();
- RETURN (rc);
+ return (rc);
}
}
if (rc != 0) {
CERROR ("Can't spawn socknal reaper: %d\n", rc);
ksocknal_module_fini ();
- RETURN (rc);
+ return (rc);
}
rc = kpr_register(&ksocknal_data.ksnd_router,
} else {
/* Only allocate forwarding buffers if I'm on a gateway */
- PORTAL_ALLOC(ksocknal_data.ksnd_fmbs,
- sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
- SOCKNAL_LARGE_FWD_NMSGS));
- if (ksocknal_data.ksnd_fmbs == NULL) {
- ksocknal_module_fini ();
- RETURN(-ENOMEM);
- }
-
- /* NULL out buffer pointers etc */
- memset(ksocknal_data.ksnd_fmbs, 0,
- sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS +
- SOCKNAL_LARGE_FWD_NMSGS));
-
for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
SOCKNAL_LARGE_FWD_NMSGS); i++) {
- ksock_fmb_t *fmb =
- &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i];
+ ksock_fmb_t *fmb;
+
+ PORTAL_ALLOC(fmb, sizeof(*fmb));
+ if (fmb == NULL) {
+ ksocknal_module_fini();
+ return (-ENOMEM);
+ }
if (i < SOCKNAL_SMALL_FWD_NMSGS) {
fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp;
}
- LASSERT (fmb->fmb_npages > 0);
for (j = 0; j < fmb->fmb_npages; j++) {
fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
return (-ENOMEM);
}
- LASSERT(page_address (fmb->fmb_pages[j]) !=
- NULL);
+ LASSERT(page_address(fmb->fmb_pages[j]) != NULL);
}
list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
-#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
-
#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */
typedef struct /* pool of forwarding buffers */
{
spinlock_t fmp_lock; /* serialise */
- struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
+ struct list_head fmp_idle_fmbs; /* free buffers */
struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
+ int fmp_nactive_fmbs; /* # buffers in use */
} ksock_fmb_pool_t;
kpr_router_t ksnd_router; /* THE router */
- void *ksnd_fmbs; /* all the pre-allocated FMBs */
ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
- void *ksnd_ltxs; /* all the pre-allocated LTXs */
- spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */
- struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
- struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
- wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
- int ksnd_active_ltxs; /* #active ltxs */
+ atomic_t ksnd_nactive_ltxs; /* #active ltxs */
struct list_head ksnd_deathrow_conns; /* conns to be closed */
struct list_head ksnd_zombie_conns; /* conns to be freed */
#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
/* network zero copy callback descriptor embedded in ksock_tx_t */
-/* space for the tx frag descriptors: hdr is always 1 iovec
- * and payload is PTL_MD_MAX of either type. */
-typedef struct
-{
- struct iovec hdr;
- union {
- struct iovec iov[PTL_MD_MAX_IOV];
- ptl_kiov_t kiov[PTL_MD_MAX_IOV];
- } payload;
-} ksock_txiovspace_t;
-
typedef struct /* locally transmitted packet */
{
ksock_tx_t ltx_tx; /* send info */
- struct list_head *ltx_idle; /* where to put when idle */
void *ltx_private; /* lib_finalize() callback arg */
void *ltx_cookie; /* lib_finalize() callback arg */
- ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */
ptl_hdr_t ltx_hdr; /* buffer for packet header */
+ int ltx_desc_size; /* bytes allocated for this desc */
+ struct iovec ltx_iov[1]; /* iov for hdr + payload */
+ ptl_kiov_t ltx_kiov[0]; /* kiov for payload */
} ksock_ltx_t;
#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
return 0;
}
-ksock_ltx_t *
-ksocknal_get_ltx (int may_block)
-{
- unsigned long flags;
- ksock_ltx_t *ltx = NULL;
-
- for (;;) {
- spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
- if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) {
- ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next,
- ksock_ltx_t, ltx_tx.tx_list);
- list_del (<x->ltx_tx.tx_list);
- ksocknal_data.ksnd_active_ltxs++;
- break;
- }
-
- if (!may_block) {
- if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) {
- ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next,
- ksock_ltx_t, ltx_tx.tx_list);
- list_del (<x->ltx_tx.tx_list);
- ksocknal_data.ksnd_active_ltxs++;
- }
- break;
- }
-
- spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock,
- flags);
-
- wait_event (ksocknal_data.ksnd_idle_ltx_waitq,
- !list_empty (&ksocknal_data.ksnd_idle_ltx_list));
- }
-
- spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
- return (ltx);
-}
-
void
-ksocknal_put_ltx (ksock_ltx_t *ltx)
+ksocknal_free_ltx (ksock_ltx_t *ltx)
{
- unsigned long flags;
-
- spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags);
-
- ksocknal_data.ksnd_active_ltxs--;
- list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle);
-
- /* normal tx desc => wakeup anyone blocking for one */
- if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list)
- wake_up (&ksocknal_data.ksnd_idle_ltx_waitq);
-
- spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags);
+ atomic_dec(&ksocknal_data.ksnd_nactive_ltxs);
+ PORTAL_FREE(ltx, ltx->ltx_desc_size);
}
#if SOCKNAL_ZC
}
int
-ksocknal_sendmsg (ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
{
/* Return 0 on success, < 0 on error.
* caller checks tx_resid to determine progress/completion */
}
rc = ksocknal_getconnsock (conn);
- if (rc != 0)
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
return (rc);
+ }
for (;;) {
LASSERT (tx->tx_resid != 0);
- if (conn->ksnc_closing) {
- rc = -ESHUTDOWN;
- break;
- }
-
if (tx->tx_niov != 0)
rc = ksocknal_send_iov (conn, tx);
else
}
int
-ksocknal_recvmsg (ksock_conn_t *conn)
+ksocknal_receive (ksock_conn_t *conn)
{
/* Return 1 on success, 0 on EOF, < 0 on error.
* Caller checks ksnc_rx_nob_wanted to determine
}
rc = ksocknal_getconnsock (conn);
- if (rc != 0)
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
return (rc);
+ }
for (;;) {
- if (conn->ksnc_closing) {
- rc = -ESHUTDOWN;
- break;
- }
-
if (conn->ksnc_rx_niov != 0)
rc = ksocknal_recv_iov (conn);
else
lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie);
- ksocknal_put_ltx (ltx);
+ ksocknal_free_ltx (ltx);
EXIT;
}
{
int rc;
- rc = ksocknal_sendmsg (conn, tx);
+ rc = ksocknal_transmit (conn, tx);
CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
LASSERT (rc != -EAGAIN);
unsigned long flags;
ksock_sched_t *sched = conn->ksnc_scheduler;
- /* called holding global lock (read or irq-write) */
-
+ /* called holding global lock (read or irq-write) and caller may
+ * not have dropped this lock between finding conn and calling me,
+ * so we don't need the {get,put}connsock dance to deref
+ * ksnc_sock... */
+ LASSERT(!conn->ksnc_closing);
+ LASSERT(tx->tx_resid == tx->tx_nob);
+
CDEBUG (D_NET, "Sending to "LPX64" on port %d\n",
conn->ksnc_peer->ksnp_nid, conn->ksnc_port);
atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
- tx->tx_resid = tx->tx_nob;
tx->tx_conn = conn;
#if SOCKNAL_ZC
/* NB this sets 1 ref on zccd, so the callback can only occur after
* I've released this ref. */
#endif
-
spin_lock_irqsave (&sched->kss_lock, flags);
conn->ksnc_tx_deadline = jiffies +
tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
tx->tx_conn = NULL; /* only set when assigned a conn */
+ tx->tx_resid = tx->tx_nob;
+ tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base;
g_lock = &ksocknal_data.ksnd_global_lock;
read_lock (g_lock);
return (-EHOSTUNREACH);
}
-ksock_ltx_t *
-ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type)
+int
+ksocknal_sendmsg(nal_cb_t *nal,
+ void *private,
+ lib_msg_t *cookie,
+ ptl_hdr_t *hdr,
+ int type,
+ ptl_nid_t nid,
+ ptl_pid_t pid,
+ unsigned int payload_niov,
+ struct iovec *payload_iov,
+ ptl_kiov_t *payload_kiov,
+ size_t payload_nob)
{
ksock_ltx_t *ltx;
+ int desc_size;
+ int rc;
+
+ /* NB 'private' is different depending on what we're sending.
+ * Just ignore it... */
+
+ CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
+ " pid %d\n", payload_nob, payload_niov, nid , pid);
- /* I may not block for a transmit descriptor if I might block the
- * receiver, or an interrupt handler. */
- ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK ||
- type == PTL_MSG_REPLY ||
- in_interrupt ()));
+ LASSERT (payload_nob == 0 || payload_niov > 0);
+ LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+
+ /* It must be OK to kmap() if required */
+ LASSERT (payload_kiov == NULL || !in_interrupt ());
+ /* payload is either all vaddrs or all pages */
+ LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+
+ if (payload_iov != NULL)
+ desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]);
+ else
+ desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]);
+
+ if (in_interrupt() ||
+ type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY) {
+ /* Can't block if in interrupt or responding to an incoming
+ * message */
+ PORTAL_ALLOC_ATOMIC(ltx, desc_size);
+ } else {
+ PORTAL_ALLOC(ltx, desc_size);
+ }
+
if (ltx == NULL) {
- CERROR ("Can't allocate tx desc\n");
- return (NULL);
+ CERROR("Can't allocate tx desc type %d size %d %s\n",
+ type, desc_size, in_interrupt() ? "(intr)" : "");
+ return (PTL_NOSPACE);
}
- /* Init local send packet (storage for hdr, finalize() args) */
+ atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
+
+ ltx->ltx_desc_size = desc_size;
+
+ /* We always have 1 mapped frag for the header */
+ ltx->ltx_tx.tx_iov = ltx->ltx_iov;
+ ltx->ltx_iov[0].iov_base = <x->ltx_hdr;
+ ltx->ltx_iov[0].iov_len = sizeof(*hdr);
ltx->ltx_hdr = *hdr;
+
ltx->ltx_private = private;
ltx->ltx_cookie = cookie;
- /* Init common ltx_tx */
ltx->ltx_tx.tx_isfwd = 0;
- ltx->ltx_tx.tx_nob = sizeof (*hdr);
-
- /* We always have 1 mapped frag for the header */
- ltx->ltx_tx.tx_niov = 1;
- ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr;
- ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr;
- ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
-
- ltx->ltx_tx.tx_kiov = NULL;
- ltx->ltx_tx.tx_nkiov = 0;
+ ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob;
- return (ltx);
-}
-
-int
-ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int payload_niov, struct iovec *payload_iov,
- size_t payload_len)
-{
- ksock_ltx_t *ltx;
- int rc;
+ if (payload_iov != NULL) {
+ /* payload is all mapped */
+ ltx->ltx_tx.tx_kiov = NULL;
+ ltx->ltx_tx.tx_nkiov = 0;
- /* NB 'private' is different depending on what we're sending.
- * Just ignore it until we can rely on it
- */
+ ltx->ltx_tx.tx_niov = 1 + payload_niov;
- CDEBUG(D_NET,
- "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64
- " pid %d\n", payload_len, payload_niov, nid, pid);
+ memcpy(ltx->ltx_iov + 1, payload_iov,
+ payload_niov * sizeof (*payload_iov));
- ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
- if (ltx == NULL)
- return (PTL_FAIL);
+ } else {
+ /* payload is all pages */
+ ltx->ltx_tx.tx_kiov = ltx->ltx_kiov;
+ ltx->ltx_tx.tx_nkiov = payload_niov;
- /* append the payload_iovs to the one pointing at the header */
- LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
- LASSERT (payload_niov <= PTL_MD_MAX_IOV);
+ ltx->ltx_tx.tx_niov = 1;
- memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov,
- payload_niov * sizeof (*payload_iov));
- ltx->ltx_tx.tx_niov = 1 + payload_niov;
- ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
+ memcpy(ltx->ltx_kiov, payload_kiov,
+ payload_niov * sizeof (*payload_kiov));
+ }
- rc = ksocknal_launch_packet (<x->ltx_tx, nid);
+ rc = ksocknal_launch_packet(<x->ltx_tx, nid);
if (rc == 0)
return (PTL_OK);
- ksocknal_put_ltx (ltx);
+ ksocknal_free_ltx(ltx);
return (PTL_FAIL);
}
int
+ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
+ ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
+ unsigned int payload_niov, struct iovec *payload_iov,
+ size_t payload_len)
+{
+ return (ksocknal_sendmsg(nal, private, cookie,
+ hdr, type, nid, pid,
+ payload_niov, payload_iov, NULL,
+ payload_len));
+}
+
+int
ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len)
+ unsigned int payload_niov, ptl_kiov_t *payload_kiov,
+ size_t payload_len)
{
- ksock_ltx_t *ltx;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.
- * Just ignore it until we can rely on it */
-
- CDEBUG(D_NET,
- "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n",
- payload_len, payload_niov, nid, pid);
-
- ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type);
- if (ltx == NULL)
- return (PTL_FAIL);
-
- LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0);
- LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-
- ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov;
- memcpy (ltx->ltx_tx.tx_kiov, payload_iov,
- payload_niov * sizeof (*payload_iov));
- ltx->ltx_tx.tx_nkiov = payload_niov;
- ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
-
- rc = ksocknal_launch_packet (<x->ltx_tx, nid);
- if (rc == 0)
- return (PTL_OK);
-
- ksocknal_put_ltx (ltx);
- return (PTL_FAIL);
+ return (ksocknal_sendmsg(nal, private, cookie,
+ hdr, type, nid, pid,
+ payload_niov, NULL, payload_kiov,
+ payload_len));
}
void
tx->tx_iov = fwd->kprfd_iov;
tx->tx_nkiov = 0;
tx->tx_kiov = NULL;
- tx->tx_hdr = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base;
rc = ksocknal_launch_packet (tx, nid);
if (rc != 0)
spin_lock_irqsave (&fmp->fmp_lock, flags);
list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs);
+ fmp->fmp_nactive_fmbs--;
if (!list_empty (&fmp->fmp_blocked_conns)) {
conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next,
ksock_fmb_t *fmb;
LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
- LASSERT (ksocknal_data.ksnd_fmbs != NULL);
+ LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
pool = &ksocknal_data.ksnd_small_fmp;
fmb = list_entry(pool->fmp_idle_fmbs.next,
ksock_fmb_t, fmb_list);
list_del (&fmb->fmb_list);
+ pool->fmp_nactive_fmbs++;
spin_unlock_irqrestore (&pool->fmp_lock, flags);
return (fmb);
return;
}
- if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */
+ if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */
CERROR("dropping packet from "LPX64" (%s) for "LPX64
" (%s): not forwarding\n",
src_nid, portals_nid2str(TCPNAL, src_nid, str),
LASSERT (conn->ksnc_rx_nob_wanted > 0);
- rc = ksocknal_recvmsg(conn);
+ rc = ksocknal_receive(conn);
if (rc <= 0) {
+ LASSERT (rc != -EAGAIN);
+
if (rc == 0)
CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n",
conn, conn->ksnc_peer->ksnp_nid,
* kss_lock. */
conn->ksnc_tx_ready = 0;
spin_unlock_irqrestore (&sched->kss_lock, flags);
-
+
rc = ksocknal_process_transmit(conn, tx);
-
+
spin_lock_irqsave (&sched->kss_lock, flags);
if (rc != -EAGAIN) {
read_lock (&ksocknal_data.ksnd_global_lock);
conn = sk->sk_user_data;
- if (conn == NULL) { /* raced with ksocknal_close_sock */
+ if (conn == NULL) { /* raced with ksocknal_terminate_conn */
LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
sk->sk_data_ready (sk, n);
} else {
(conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
" empty" : " queued"));
- if (conn == NULL) { /* raced with ksocknal_close_sock */
+ if (conn == NULL) { /* raced with ksocknal_terminate_conn */
LASSERT (sk->sk_write_space != &ksocknal_write_space);
sk->sk_write_space (sk);
int option;
struct linger linger;
- sock->sk->allocation = GFP_NOFS;
+ sock->sk->allocation = GFP_MEMALLOC;
/* Ensure this socket aborts active sends immediately when we close
* it. */
kportal_daemonize (name);
kportal_blockallsigs ();
+ current->flags |= PF_MEMALLOC;
+
spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
while (!ksocknal_data.ksnd_shuttingdown) {
init_waitqueue_entry (&wait, current);
+ current->flags |= PF_MEMALLOC;
+
spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
while (!ksocknal_data.ksnd_shuttingdown) {
return buf;
}
-#elif defined(CONFIG_X86)
+#elif defined(__i386__)
extern int is_kernel_text_address(unsigned long addr);
extern int lookup_symbol(unsigned long address, char *buf, int buflen);
char *portals_debug_dumpstack(void)
{
-#if defined(__x86_64__)
- unsigned long esp = current->thread.rsp;
-#else
unsigned long esp = current->thread.esp;
-#endif
unsigned long *stack = (unsigned long *)&esp;
int size;
unsigned long addr;
#if LWT_SUPPORT
-#define LWT_MEMORY (1<<20) /* 1Mb of trace memory */
-#define LWT_MAX_CPUS 4
-
int lwt_enabled;
int lwt_pages_per_cpu;
lwt_cpu_t lwt_cpus[LWT_MAX_CPUS];
}
int
-lwt_snapshot (int *ncpu, int *total_size, void *user_ptr, int user_size)
+lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
+ void *user_ptr, int user_size)
{
const int events_per_page = PAGE_SIZE / sizeof(lwt_event_t);
const int bytes_per_page = events_per_page * sizeof(lwt_event_t);
*ncpu = num_online_cpus();
*total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page;
-
+ *now = get_cycles();
+
if (user_ptr == NULL)
return (0);
}
void
+kportal_memhog_free (struct portals_device_userstate *pdu)
+{
+ struct page **level0p = &pdu->pdu_memhog_root_page;
+ struct page **level1p;
+ struct page **level2p;
+ int count1;
+ int count2;
+
+ if (*level0p != NULL) {
+
+ level1p = (struct page **)page_address(*level0p);
+ count1 = 0;
+
+ while (count1 < PAGE_SIZE/sizeof(struct page *) &&
+ *level1p != NULL) {
+
+ level2p = (struct page **)page_address(*level1p);
+ count2 = 0;
+
+ while (count2 < PAGE_SIZE/sizeof(struct page *) &&
+ *level2p != NULL) {
+
+ __free_page(*level2p);
+ pdu->pdu_memhog_pages--;
+ level2p++;
+ count2++;
+ }
+
+ __free_page(*level1p);
+ pdu->pdu_memhog_pages--;
+ level1p++;
+ count1++;
+ }
+
+ __free_page(*level0p);
+ pdu->pdu_memhog_pages--;
+
+ *level0p = NULL;
+ }
+
+ LASSERT (pdu->pdu_memhog_pages == 0);
+}
+
+int
+kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags)
+{
+ struct page **level0p;
+ struct page **level1p;
+ struct page **level2p;
+ int count1;
+ int count2;
+
+ LASSERT (pdu->pdu_memhog_pages == 0);
+ LASSERT (pdu->pdu_memhog_root_page == NULL);
+
+ if (npages < 0)
+ return -EINVAL;
+
+ if (npages == 0)
+ return 0;
+
+ level0p = &pdu->pdu_memhog_root_page;
+ *level0p = alloc_page(flags);
+ if (*level0p == NULL)
+ return -ENOMEM;
+ pdu->pdu_memhog_pages++;
+
+ level1p = (struct page **)page_address(*level0p);
+ count1 = 0;
+ memset(level1p, 0, PAGE_SIZE);
+
+ while (pdu->pdu_memhog_pages < npages &&
+ count1 < PAGE_SIZE/sizeof(struct page *)) {
+
+ if (signal_pending(current))
+ return (-EINTR);
+
+ *level1p = alloc_page(flags);
+ if (*level1p == NULL)
+ return -ENOMEM;
+ pdu->pdu_memhog_pages++;
+
+ level2p = (struct page **)page_address(*level1p);
+ count2 = 0;
+ memset(level2p, 0, PAGE_SIZE);
+
+ while (pdu->pdu_memhog_pages < npages &&
+ count2 < PAGE_SIZE/sizeof(struct page *)) {
+
+ if (signal_pending(current))
+ return (-EINTR);
+
+ *level2p = alloc_page(flags);
+ if (*level2p == NULL)
+ return (-ENOMEM);
+ pdu->pdu_memhog_pages++;
+
+ level2p++;
+ count2++;
+ }
+
+ level1p++;
+ count1++;
+ }
+
+ return 0;
+}
+
+void
kportal_blockallsigs ()
{
unsigned long flags;
/* called when opening /dev/device */
static int kportal_psdev_open(struct inode * inode, struct file * file)
{
+ struct portals_device_userstate *pdu;
ENTRY;
-
+
if (!inode)
RETURN(-EINVAL);
+
PORTAL_MODULE_USE;
+
+ PORTAL_ALLOC(pdu, sizeof(*pdu));
+ if (pdu != NULL) {
+ pdu->pdu_memhog_pages = 0;
+ pdu->pdu_memhog_root_page = NULL;
+ }
+ file->private_data = pdu;
+
RETURN(0);
}
/* called when closing /dev/device */
static int kportal_psdev_release(struct inode * inode, struct file * file)
{
+ struct portals_device_userstate *pdu;
ENTRY;
if (!inode)
RETURN(-EINVAL);
+ pdu = file->private_data;
+ if (pdu != NULL) {
+ kportal_memhog_free(pdu);
+ PORTAL_FREE(pdu, sizeof(*pdu));
+ }
+
PORTAL_MODULE_UNUSE;
RETURN(0);
}
break;
case IOC_PORTAL_LWT_SNAPSHOT:
- err = lwt_snapshot (&data->ioc_count, &data->ioc_misc,
+ err = lwt_snapshot (&data->ioc_nid,
+ &data->ioc_count, &data->ioc_misc,
data->ioc_pbuf1, data->ioc_plen1);
if (err == 0 &&
copy_to_user((char *)arg, data, sizeof (*data)))
copy_to_user((char *)arg, data, sizeof (*data)))
err = -EFAULT;
break;
-#endif
+#endif
+ case IOC_PORTAL_MEMHOG:
+ if (!capable (CAP_SYS_ADMIN))
+ err = -EPERM;
+ else if (file->private_data == NULL) {
+ err = -EINVAL;
+ } else {
+ kportal_memhog_free(file->private_data);
+ err = kportal_memhog_alloc(file->private_data,
+ data->ioc_count,
+ data->ioc_flags);
+ if (err != 0)
+ kportal_memhog_free(file->private_data);
+ }
+ break;
+
default:
err = -EINVAL;
break;
cleanup_lwt:
#if LWT_SUPPORT
lwt_fini();
-#endif
cleanup_debug:
+#endif
portals_debug_cleanup();
return rc;
}
}
static int
-lwt_snapshot(int *ncpu, int *totalsize, lwt_event_t *events, int size)
+lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize,
+ lwt_event_t *events, int size)
{
struct portal_ioctl_data data;
int rc;
LASSERT (data.ioc_count != 0);
LASSERT (data.ioc_misc != 0);
+ if (now != NULL)
+ *now = data.ioc_nid;
+
if (ncpu != NULL)
*ncpu = data.ioc_count;
int
jt_ptl_lwt(int argc, char **argv)
{
-#define MAX_CPUS 8
int ncpus;
int totalspace;
int nevents_per_cpu;
lwt_event_t *events;
- lwt_event_t *cpu_event[MAX_CPUS + 1];
- lwt_event_t *next_event[MAX_CPUS];
- lwt_event_t *first_event[MAX_CPUS];
+ lwt_event_t *cpu_event[LWT_MAX_CPUS + 1];
+ lwt_event_t *next_event[LWT_MAX_CPUS];
+ lwt_event_t *first_event[LWT_MAX_CPUS];
int cpu;
lwt_event_t *e;
int rc;
double mhz;
cycles_t t0;
cycles_t tlast;
+ cycles_t tnow;
+ struct timeval tvnow;
+ int printed_date = 0;
FILE *f = stdout;
if (argc < 2 ||
return (0);
}
- if (lwt_snapshot(&ncpus, &totalspace, NULL, 0) != 0)
+ if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0)
return (-1);
- if (ncpus > MAX_CPUS) {
- fprintf(stderr, "Too many cpus: %d (%d)\n", ncpus, MAX_CPUS);
+ if (ncpus > LWT_MAX_CPUS) {
+ fprintf(stderr, "Too many cpus: %d (%d)\n",
+ ncpus, LWT_MAX_CPUS);
return (-1);
}
return (-1);
}
- if (lwt_snapshot(NULL, NULL, events, totalspace)) {
+ if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) {
free(events);
return (-1);
}
+ /* we want this time to be sampled at snapshot time */
+ gettimeofday(&tvnow, NULL);
+
if (argc > 2) {
f = fopen (argv[2], "w");
if (f == NULL) {
if (t0 <= next_event[cpu]->lwte_when) {
/* on or after the first event */
+ if (!printed_date) {
+ cycles_t du = (tnow - t0) / mhz;
+ time_t then = tvnow.tv_sec - du/1000000;
+
+ if (du % 1000000 > tvnow.tv_usec)
+ then--;
+
+ fprintf(f, "%s", ctime(&then));
+ printed_date = 1;
+ }
+
rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]);
if (rc != 0)
break;
free(events);
return (0);
-#undef MAX_CPUS
}
+
+int jt_ptl_memhog(int argc, char **argv)
+{
+ static int gfp = 0; /* sticky! */
+
+ struct portal_ioctl_data data;
+ int rc;
+ int count;
+ char *end;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]);
+ return 0;
+ }
+
+ count = strtol(argv[1], &end, 0);
+ if (count < 0 || *end != 0) {
+ fprintf(stderr, "Can't parse page count '%s'\n", argv[1]);
+ return -1;
+ }
+
+ if (argc >= 3) {
+ rc = strtol(argv[2], &end, 0);
+ if (*end != 0) {
+ fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]);
+ return -1;
+ }
+ gfp = rc;
+ }
+
+ PORTAL_IOC_INIT(data);
+ data.ioc_count = count;
+ data.ioc_flags = gfp;
+ rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data);
+
+ if (rc != 0) {
+ fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno));
+ return -1;
+ }
+
+ printf("memhog %d OK\n", count);
+ return 0;
+}
+
spin_lock_init(&request->rq_lock);
INIT_LIST_HEAD(&request->rq_list);
- init_waitqueue_head(&request->rq_wait_for_rep);
+ init_waitqueue_head(&request->rq_reply_waitq);
request->rq_xid = ptlrpc_next_xid();
atomic_set(&request->rq_refcount, 1);
* the timeout lets us CERROR for visibility */
struct l_wait_info lwi = LWI_TIMEOUT(10*HZ, NULL, NULL);
- rc = l_wait_event (request->rq_wait_for_rep,
+ rc = l_wait_event (request->rq_reply_waitq,
request->rq_replied, &lwi);
LASSERT(rc == 0 || rc == -ETIMEDOUT);
if (rc == 0) {
if (req->rq_set != NULL)
wake_up (&req->rq_set->set_waitq);
else
- wake_up(&req->rq_wait_for_rep);
+ wake_up(&req->rq_reply_waitq);
spin_unlock_irqrestore (&req->rq_lock, flags);
}
if (req->rq_set != NULL)
wake_up (&req->rq_set->set_waitq);
else
- wake_up(&req->rq_wait_for_rep);
+ wake_up(&req->rq_reply_waitq);
spin_unlock_irqrestore (&req->rq_lock, flags);
}
DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%d > %d)",
current->comm, req->rq_send_state, imp->imp_state);
lwi = LWI_INTR(interrupted_request, req);
- rc = l_wait_event(req->rq_wait_for_rep,
+ rc = l_wait_event(req->rq_reply_waitq,
(req->rq_send_state == imp->imp_state ||
req->rq_err),
&lwi);
}
lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request,
req);
- l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
+ l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi);
DEBUG_REQ(D_NET, req, "-- done sleeping");
CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:opc "
if (req->rq_bulk != NULL) {
if (rc >= 0) { /* success so far */
lwi = LWI_TIMEOUT(timeout, NULL, NULL);
- brc = l_wait_event(req->rq_wait_for_rep,
+ brc = l_wait_event(req->rq_reply_waitq,
ptlrpc_bulk_complete(req->rq_bulk),
&lwi);
if (brc != 0) {
CDEBUG(D_OTHER, "-- sleeping\n");
lwi = LWI_INTR(NULL, NULL); /* XXX needs timeout, nested recovery */
- l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
+ l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi);
CDEBUG(D_OTHER, "-- done\n");
// up(&cli->cli_rpc_sem);
if (req->rq_set != NULL)
wake_up(&req->rq_set->set_waitq);
else
- wake_up(&req->rq_wait_for_rep);
+ wake_up(&req->rq_reply_waitq);
}
spin_unlock (&req->rq_lock);
}
if (req->rq_set != NULL)
wake_up(&req->rq_set->set_waitq);
else
- wake_up(&req->rq_wait_for_rep);
+ wake_up(&req->rq_reply_waitq);
}
spin_unlock (&req->rq_lock);
}
LASSERT(req->rq_want_ack);
spin_lock_irqsave(&req->rq_lock, flags);
req->rq_want_ack = 0;
- wake_up(&req->rq_wait_for_rep);
+ wake_up(&req->rq_reply_waitq);
spin_unlock_irqrestore(&req->rq_lock, flags);
} else {
// XXX make sure we understand all events
if (req->rq_set != NULL)
wake_up(&req->rq_set->set_waitq);
else
- wake_up(&req->rq_wait_for_rep);
+ wake_up(&req->rq_reply_waitq);
spin_unlock_irqrestore (&req->rq_lock, flags);
} else {
// XXX make sure we understand all events, including ACKs
if (desc->bd_req->rq_set != NULL)
wake_up (&desc->bd_req->rq_set->set_waitq);
else
- wake_up (&desc->bd_req->rq_wait_for_rep);
+ wake_up (&desc->bd_req->rq_reply_waitq);
spin_unlock_irqrestore (&desc->bd_lock, flags);
RETURN(1);
if (desc->bd_req->rq_set != NULL)
wake_up (&desc->bd_req->rq_set->set_waitq);
else
- wake_up (&desc->bd_req->rq_wait_for_rep);
+ wake_up (&desc->bd_req->rq_reply_waitq);
spin_unlock_irqrestore (&desc->bd_lock, flags);
RETURN(1);
struct ptlrpc_service *svc)
{
ptlrpc_lprocfs_register(entry, svc->srv_name,
- "stats", &svc->svc_procroot,
- &svc->svc_stats);
+ "stats", &svc->srv_procroot,
+ &svc->srv_stats);
}
void ptlrpc_lprocfs_register_obd(struct obd_device *obddev)
void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc)
{
- if (svc->svc_procroot) {
- lprocfs_remove(svc->svc_procroot);
- svc->svc_procroot = NULL;
+ if (svc->srv_procroot != NULL) {
+ lprocfs_remove(svc->srv_procroot);
+ svc->srv_procroot = NULL;
}
- if (svc->svc_stats) {
- lprocfs_free_stats(svc->svc_stats);
- svc->svc_stats = NULL;
+ if (svc->srv_stats) {
+ lprocfs_free_stats(svc->srv_stats);
+ svc->srv_stats = NULL;
}
}
void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd)
if (desc->bd_req->rq_set != NULL)
wq = &req->rq_set->set_waitq;
else
- wq = &req->rq_wait_for_rep;
+ wq = &req->rq_reply_waitq;
lwi = LWI_TIMEOUT (10 * HZ, NULL, NULL);
rc = l_wait_event(*wq, ptlrpc_bulk_complete(desc), &lwi);
LASSERT (rc == 0 || rc == -ETIMEDOUT);
req->rq_repmsg->status = req->rq_status;
req->rq_repmsg->opc = req->rq_reqmsg->opc;
- init_waitqueue_head(&req->rq_wait_for_rep);
+ init_waitqueue_head(&req->rq_reply_waitq);
rc = ptl_send_buf(req, req->rq_connection, req->rq_svc->srv_rep_portal);
if (rc != 0) {
/* Do what the callback handler would have done */
wake_up(&req->rq_set->set_waitq);
} else {
DEBUG_REQ(D_HA, req, "waking:");
- wake_up(&req->rq_wait_for_rep);
+ wake_up(&req->rq_reply_waitq);
}
}
spin_unlock_irqrestore(&imp->imp_lock, flags);
spin_lock_init (&request->rq_lock);
INIT_LIST_HEAD(&request->rq_list);
request->rq_svc = svc;
- request->rq_obd = obddev;
request->rq_xid = event->match_bits;
request->rq_reqmsg = event->mem_desc.start + event->offset;
request->rq_reqlen = event->mlength;
do_gettimeofday(&start_time);
total = timeval_sub(&start_time, &event->arrival_time);
- if (svc->svc_stats != NULL) {
- lprocfs_counter_add(svc->svc_stats, PTLRPC_REQWAIT_CNTR,
+ if (svc->srv_stats != NULL) {
+ lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
total);
- lprocfs_counter_add(svc->svc_stats,
+ lprocfs_counter_add(svc->srv_stats,
PTLRPC_SVCIDLETIME_CNTR,
timeval_sub(&start_time,
&finish_time));
#if 0 /* Wait for b_eq branch */
- lprocfs_counter_add(svc->svc_stats,
+ lprocfs_counter_add(svc->srv_stats,
PTLRPC_SVCEQDEPTH_CNTR, 0);
#endif
}
"(%ldus total)\n", request->rq_xid, event->initiator.nid,
total, timeval_sub(&finish_time, &event->arrival_time));
- if (svc->svc_stats != NULL) {
+ if (svc->srv_stats != NULL) {
int opc = opcode_offset(request->rq_reqmsg->opc);
if (opc > 0) {
LASSERT(opc < LUSTRE_MAX_OPCODES);
- lprocfs_counter_add(svc->svc_stats,
+ lprocfs_counter_add(svc->srv_stats,
opc + PTLRPC_LAST_CNTR,
total);
}
"light-weight tracing\n"
"usage: lwt start\n"
" lwt stop [file]"},
+ {"memhog", jt_ptl_memhog, 0,
+ "memory pressure testing\n"
+ "usage: memhog <page count> [<gfp flags>]"},
/* User interface commands */
{"======= control ========", jt_noop, 0, "control commands"},
('route', "Add a new route for the cluster.", PARAM),
('router', "Optional flag to mark a node as router."),
('gw', "Specify the nid of the gateway for a route.", PARAM),
- ('gw_cluster_id', "", PARAM, "0"),
+ ('gateway_cluster_id', "", PARAM, "0"),
('target_cluster_id', "", PARAM, "0"),
('lo', "For a range route, this is the low value nid.", PARAM),
('hi', "For a range route, this is a hi value nid.", PARAM,""),
node_name = get_option(options, 'node')
gw_net_type = get_option(options, 'nettype')
gw = get_option(options, 'gw')
- gw_cluster_id = get_option(options, 'gw_cluster_id')
+ gw_cluster_id = get_option(options, 'gateway_cluster_id')
tgt_cluster_id = get_option(options, 'target_cluster_id')
lo = get_option(options, 'lo')
hi = get_option(options, 'hi')