AC_MSG_RESULT(no)
fi
-AC_ARG_ENABLE(zerocopy, [ --enable-zerocopy enable socknal zerocopy],enable_zerocopy=$enable_zerocopy_temp, enable_zerocopy="")
+AC_ARG_ENABLE(zerocopy, [ --disable-zerocopy disable socknal zerocopy],enable_zerocopy="", enable_zerocopy=$enable_zerocopy_temp)
-AC_ARG_ENABLE(affinity, [ --enable-affinity enable process/irq affinity],enable_affinity="-DCPU_AFFINITY=1", enable_affinity=$enable_affinity_temp)
+AC_ARG_ENABLE(affinity, [ --disable-affinity disable process/irq affinity],enable_affinity="", enable_affinity=$enable_affinity_temp)
#####################################
AC_MSG_CHECKING(if quadrics kernel headers are present)
AC_OUTPUT([Makefile Kernelenv libcfs/Makefile portals/Makefile \
unals/Makefile knals/Makefile router/Makefile \
knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \
- knals/scimacnal/Makefile knals/toenal/Makefile knals/ibnal/Makefile\
+ knals/scimacnal/Makefile knals/ibnal/Makefile\
utils/Makefile tests/Makefile doc/Makefile ])
LYX2HTML = lyx --export html
SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps
-DOCS = portals3.pdf
+if DOC
+ DOCS = portals3.pdf
+else
+ DOCS =
+endif
+
IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps
LYXFILES= portals3.lyx
#define PORTAL_ALLOC_GFP(ptr, size, mask) \
do { \
- LASSERT (!in_interrupt()); \
+ LASSERT(!in_interrupt() || \
+ (size <= PORTAL_VMALLOC_SIZE && mask == GFP_ATOMIC)); \
if ((size) > PORTAL_VMALLOC_SIZE) \
(ptr) = vmalloc(size); \
else \
#define IOC_PORTAL_MAX_NR 42
enum {
- QSWNAL = 1,
- SOCKNAL,
- GMNAL,
- TOENAL,
- TCPNAL,
- SCIMACNAL,
- ROUTER,
- IBNAL,
+ QSWNAL = 1,
+ SOCKNAL = 2,
+ GMNAL = 3,
+ /* 4 unused */
+ TCPNAL = 5,
+ SCIMACNAL = 6,
+ ROUTER = 7,
+ IBNAL = 8,
NAL_ENUM_END_MARKER
};
#ifdef __KERNEL__
extern ptl_handle_ni_t kqswnal_ni;
extern ptl_handle_ni_t ksocknal_ni;
-extern ptl_handle_ni_t ktoenal_ni;
extern ptl_handle_ni_t kgmnal_ni;
extern ptl_handle_ni_t kibnal_ni;
extern ptl_handle_ni_t kscimacnal_ni;
#ifndef _LINUX_LIST_H
-
/*
* Simple doubly linked list implementation.
*
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-DIST_SUBDIRS= socknal toenal qswnal gmnal scimacnal ibnal
-SUBDIRS= socknal toenal @QSWNAL@ @GMNAL@ @SCIMACNAL@ @IBNAL@
+DIST_SUBDIRS= socknal qswnal gmnal scimacnal ibnal
+SUBDIRS= socknal @QSWNAL@ @GMNAL@ @SCIMACNAL@ @IBNAL@
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <linux/init.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/stat.h>
int option;
struct linger linger;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ sock->sk->sk_allocation = GFP_NOFS;
+#else
sock->sk->allocation = GFP_NOFS;
+#endif
/* Ensure this socket aborts active sends immediately when we close
* it. */
+++ /dev/null
-.deps
-Makefile
-Makefile.in
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include ../../Rules.linux
-
-MODULE = ktoenal
-modulenet_DATA = ktoenal.o
-EXTRA_PROGRAMS = ktoenal
-
-DEFS =
-ktoenal_SOURCES = toenal.c toenal_cb.c toenal.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Kedar Sovani <kedar@calsoftinc.com>
- * Author: Amey Inamdar <amey@calsoftinc.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-#include <linux/poll.h>
-#include "toenal.h"
-
-ptl_handle_ni_t ktoenal_ni;
-static nal_t ktoenal_api;
-static ksock_nal_data_t ktoenal_data;
-
-/*
-ksocknal_interface_t ktoenal_interface = {
- ksni_add_sock: ktoenal_add_sock,
- ksni_close_sock: ktoenal_close_sock,
- ksni_set_mynid: ktoenal_set_mynid,
-};
-*/
-
-kpr_nal_interface_t ktoenal_router_interface = {
- kprni_nalid: TOENAL,
- kprni_arg: &ktoenal_data,
- kprni_fwd: ktoenal_fwd_packet,
-};
-
-
-int
-ktoenal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
-
- lib_dispatch(nal_cb, k, id, args, ret); /* ktoenal_send needs k */
- return PTL_OK;
-}
-
-int
-ktoenal_api_shutdown(nal_t *nal, int ni)
-{
- CDEBUG (D_NET, "closing all connections\n");
-
- return ktoenal_close_sock(0); /* close all sockets */
-}
-
-void
-ktoenal_api_yield(nal_t *nal)
-{
- our_cond_resched();
- return;
-}
-
-void
-ktoenal_api_lock(nal_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_cli(nal_cb,flags);
-}
-
-void
-ktoenal_api_unlock(nal_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_sti(nal_cb,flags);
-}
-
-nal_t *
-ktoenal_init(int interface, ptl_pt_index_t ptl_size,
- ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
-{
- CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
- ktoenal_data.ksnd_mynid);
- lib_init(&ktoenal_lib, ktoenal_data.ksnd_mynid, 0, 10, ptl_size,
- ac_size);
- return (&ktoenal_api);
-}
-
-/*
- * EXTRA functions follow
- */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define SOCKET_I(inode) (&(inode)->u.socket_i)
-#endif
-static __inline__ struct socket *
-socki_lookup(struct inode *inode)
-{
- return SOCKET_I(inode);
-}
-
-int
-ktoenal_set_mynid(ptl_nid_t nid)
-{
- lib_ni_t *ni = &ktoenal_lib.ni;
-
- /* FIXME: we have to do this because we call lib_init() at module
- * insertion time, which is before we have 'mynid' available. lib_init
- * sets the NAL's nid, which it uses to tell other nodes where packets
- * are coming from. This is not a very graceful solution to this
- * problem. */
-
- CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid);
-
- ktoenal_data.ksnd_mynid = nid;
- ni->nid = nid;
- return (0);
-}
-
-int
-ktoenal_add_sock (ptl_nid_t nid, int fd)
-{
- unsigned long flags;
- ksock_conn_t *conn;
- struct file *file = NULL;
- struct socket *sock = NULL;
- int ret;
- ENTRY;
-
- file = fget(fd);
- if (file == NULL)
- RETURN(-EINVAL);
-
- ret = -EINVAL;
- sock = socki_lookup(file->f_dentry->d_inode);
- if (sock == NULL)
- GOTO(error, ret);
-
- ret = -ENOMEM;
- PORTAL_ALLOC(conn, sizeof(*conn));
- if (!conn)
- GOTO(error, ret);
-
- memset (conn, 0, sizeof (conn)); /* zero for consistency */
- file->f_flags |= O_NONBLOCK; /* Does this have any conflicts */
- conn->ksnc_file = file;
- conn->ksnc_sock = sock;
- conn->ksnc_peernid = nid;
- atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */
-
- conn->ksnc_rx_ready = 0;
- conn->ksnc_rx_scheduled = 0;
- ktoenal_new_packet (conn, 0);
-
- INIT_LIST_HEAD (&conn->ksnc_tx_queue);
- conn->ksnc_tx_ready = 0;
- conn->ksnc_tx_scheduled = 0;
-
- LASSERT (!in_interrupt());
- write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
-
- list_add(&conn->ksnc_list, &ktoenal_data.ksnd_socklist);
- write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
-
- ktoenal_data_ready(conn);
- ktoenal_write_space(conn);
-
- ktoenal_data.ksnd_slistchange = 1;
- wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
- /* Schedule pollthread so that it will poll
- * for newly created socket
- */
-
-
- CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n",
- conn, conn->ksnc_peernid);
-
- /* Can't unload while connection active */
- PORTAL_MODULE_USE;
- RETURN(0);
-
-error:
- fput(file);
- return (ret);
-}
-
-/* Passing in a zero nid will close all connections */
-int
-ktoenal_close_sock(ptl_nid_t nid)
-{
- unsigned long flags;
- ksock_conn_t *conn;
- LIST_HEAD (death_row);
- struct list_head *tmp;
-
- LASSERT (!in_interrupt());
- write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
-
- if (nid == 0) /* close ALL connections */
- {
- /* insert 'death row' into the socket list... */
- list_add (&death_row, &ktoenal_data.ksnd_socklist);
- /* ...extract and reinitialise the socket list itself... */
- list_del_init (&ktoenal_data.ksnd_socklist);
- /* ...and voila, death row is the proud owner of all conns */
- } else list_for_each (tmp, &ktoenal_data.ksnd_socklist) {
-
- conn = list_entry (tmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_peernid == nid)
- {
- list_del (&conn->ksnc_list);
- list_add (&conn->ksnc_list, &death_row);
- break;
- }
- }
-
-
- write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
-
- if (list_empty (&death_row))
- return (-ENOENT);
-
- do {
- conn = list_entry (death_row.next, ksock_conn_t, ksnc_list);
- list_del (&conn->ksnc_list);
- ktoenal_put_conn (conn); /* drop ref for ksnd_socklist */
- } while (!list_empty (&death_row));
-
- ktoenal_data.ksnd_slistchange = 1;
- wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
-
- return (0);
-}
-
-
-ksock_conn_t *
-ktoenal_get_conn (ptl_nid_t nid)
-{
- struct list_head *tmp;
- ksock_conn_t *conn;
-
- PROF_START(conn_list_walk);
-
- read_lock (&ktoenal_data.ksnd_socklist_lock);
-
- list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
-
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_peernid == nid)
- {
- /* caller is referencing */
- atomic_inc (&conn->ksnc_refcount);
-
- read_unlock (&ktoenal_data.ksnd_socklist_lock);
-
- CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n",
- conn, nid, atomic_read (&conn->ksnc_refcount));
-
- PROF_FINISH(conn_list_walk);
- return (conn);
- }
- }
-
- read_unlock (&ktoenal_data.ksnd_socklist_lock);
-
- CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", nid);
- PROF_FINISH(conn_list_walk);
- return (NULL);
-}
-
-void
-ktoenal_close_conn (ksock_conn_t *conn)
-{
- CDEBUG (D_NET, "connection [%p] closed \n", conn);
-
- fput (conn->ksnc_file);
- PORTAL_FREE (conn, sizeof (*conn));
- /* One less connection keeping us hanging on */
- PORTAL_MODULE_UNUSE;
-}
-
-void
-_ktoenal_put_conn (ksock_conn_t *conn)
-{
- unsigned long flags;
-
- CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn);
-
- /* "But what is the black spot, captain?" I asked.
- * "That's a summons, mate..." */
-
- LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
- LASSERT (!conn->ksnc_rx_scheduled);
-
- if (!in_interrupt())
- {
- ktoenal_close_conn (conn);
- return;
- }
-
- spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
-
- list_add (&conn->ksnc_list, &ktoenal_data.ksnd_reaper_list);
- wake_up (&ktoenal_data.ksnd_reaper_waitq);
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
-}
-
-void
-ktoenal_free_buffers (void)
-{
- if (ktoenal_data.ksnd_fmbs != NULL)
- {
- ksock_fmb_t *fmb = (ksock_fmb_t *)ktoenal_data.ksnd_fmbs;
- int i;
- int j;
-
- for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++, fmb++)
- for (j = 0; j < fmb->fmb_npages; j++)
- if (fmb->fmb_pages[j] != NULL)
- __free_page (fmb->fmb_pages[j]);
-
- PORTAL_FREE (ktoenal_data.ksnd_fmbs,
- sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
- }
-
- if (ktoenal_data.ksnd_ltxs != NULL)
- PORTAL_FREE (ktoenal_data.ksnd_ltxs,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-}
-
-int
-ktoenal_cmd(struct portals_cfg *pcfg, void * private)
-{
- int rc = -EINVAL;
-
- LASSERT (pcfg != NULL);
-
- switch(pcfg->pcfg_command) {
- case NAL_CMD_REGISTER_PEER_FD: {
- rc = ktoenal_add_sock(pcfg->pcfg_nid, pcfg->pcfg_fd);
- break;
- }
- case NAL_CMD_CLOSE_CONNECTION: {
- rc = ktoenal_close_sock(pcfg->pcfg_nid);
- break;
- }
- case NAL_CMD_REGISTER_MYNID: {
- rc = ktoenal_set_mynid (pcfg->pcfg_nid);
- break;
- }
- }
-
- return rc;
-}
-
-
-void /*__exit*/
-ktoenal_module_fini (void)
-{
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&portal_kmemory));
-
- switch (ktoenal_data.ksnd_init)
- {
- default:
- LASSERT (0);
-
- case SOCKNAL_INIT_ALL:
- kportal_nal_unregister(TOENAL);
- PORTAL_SYMBOL_UNREGISTER (ktoenal_ni);
- /* fall through */
-
- case SOCKNAL_INIT_PTL:
- PtlNIFini(ktoenal_ni);
- lib_fini(&ktoenal_lib);
- /* fall through */
-
- case SOCKNAL_INIT_DATA:
- /* Module refcount only gets to zero when all connections
- * have been closed so all lists must be empty */
- LASSERT (list_empty (&ktoenal_data.ksnd_socklist));
- LASSERT (list_empty (&ktoenal_data.ksnd_reaper_list));
- LASSERT (list_empty (&ktoenal_data.ksnd_rx_conns));
- LASSERT (list_empty (&ktoenal_data.ksnd_tx_conns));
- LASSERT (list_empty (&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns));
- LASSERT (list_empty (&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns));
-
- kpr_shutdown (&ktoenal_data.ksnd_router); /* stop router calling me */
-
- /* flag threads to terminate; wake and wait for them to die */
- ktoenal_data.ksnd_shuttingdown = 1;
- wake_up_all (&ktoenal_data.ksnd_reaper_waitq);
- wake_up_all (&ktoenal_data.ksnd_sched_waitq);
- wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
-
- while (atomic_read (&ktoenal_data.ksnd_nthreads) != 0)
- {
- CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
- atomic_read (&ktoenal_data.ksnd_nthreads));
- set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
- }
-
- kpr_deregister (&ktoenal_data.ksnd_router);
-
- ktoenal_free_buffers();
- /* fall through */
-
- case SOCKNAL_INIT_NOTHING:
- break;
- }
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&portal_kmemory));
-
- printk(KERN_INFO "Lustre: Routing socket NAL unloaded (final mem %d)\n",
- atomic_read(&portal_kmemory));
-}
-
-int __init
-ktoenal_module_init (void)
-{
- int pkmem = atomic_read(&portal_kmemory);
- int rc;
- int i;
- int j;
-
- /* packet descriptor must fit in a router descriptor's scratchpad */
- LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
-
- LASSERT (ktoenal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
-
- ktoenal_api.forward = ktoenal_api_forward;
- ktoenal_api.shutdown = ktoenal_api_shutdown;
- ktoenal_api.yield = ktoenal_api_yield;
- ktoenal_api.validate = NULL; /* our api validate is a NOOP */
- ktoenal_api.lock = ktoenal_api_lock;
- ktoenal_api.unlock = ktoenal_api_unlock;
- ktoenal_api.nal_data = &ktoenal_data;
-
- ktoenal_lib.nal_data = &ktoenal_data;
-
- memset (&ktoenal_data, 0, sizeof (ktoenal_data)); /* zero pointers */
-
- INIT_LIST_HEAD(&ktoenal_data.ksnd_socklist);
- rwlock_init(&ktoenal_data.ksnd_socklist_lock);
-
- ktoenal_data.ksnd_nal_cb = &ktoenal_lib;
- spin_lock_init (&ktoenal_data.ksnd_nal_cb_lock);
-
- spin_lock_init (&ktoenal_data.ksnd_sched_lock);
-
- init_waitqueue_head (&ktoenal_data.ksnd_sched_waitq);
-
- INIT_LIST_HEAD (&ktoenal_data.ksnd_rx_conns);
- INIT_LIST_HEAD (&ktoenal_data.ksnd_tx_conns);
-
- INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_idle_fmbs);
- INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns);
- INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_idle_fmbs);
- INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns);
-
- INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_nblk_ltx_list);
- INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_ltx_list);
- init_waitqueue_head(&ktoenal_data.ksnd_idle_ltx_waitq);
-
- INIT_LIST_HEAD (&ktoenal_data.ksnd_reaper_list);
- init_waitqueue_head(&ktoenal_data.ksnd_reaper_waitq);
- spin_lock_init (&ktoenal_data.ksnd_reaper_lock);
-
- ktoenal_data.ksnd_init = SOCKNAL_INIT_DATA; /* flag lists/ptrs/locks initialised */
-
- PORTAL_ALLOC(ktoenal_data.ksnd_fmbs,
- sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
- if (ktoenal_data.ksnd_fmbs == NULL)
- RETURN(-ENOMEM);
-
- /* NULL out buffer pointers etc */
- memset(ktoenal_data.ksnd_fmbs, 0,
- sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
-
- for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++)
- {
- ksock_fmb_t *fmb = &((ksock_fmb_t *)ktoenal_data.ksnd_fmbs)[i];
-
- if (i < SOCKNAL_SMALL_FWD_NMSGS)
- {
- fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
- fmb->fmb_pool = &ktoenal_data.ksnd_small_fmp;
- }
- else
- {
- fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
- fmb->fmb_pool = &ktoenal_data.ksnd_large_fmp;
- }
-
- LASSERT (fmb->fmb_npages > 0);
- for (j = 0; j < fmb->fmb_npages; j++)
- {
- fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
-
- if (fmb->fmb_pages[j] == NULL)
- {
- ktoenal_module_fini ();
- return (-ENOMEM);
- }
-
- LASSERT (page_address (fmb->fmb_pages[j]) != NULL);
- }
-
- list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
- }
-
- PORTAL_ALLOC(ktoenal_data.ksnd_ltxs,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
- if (ktoenal_data.ksnd_ltxs == NULL)
- {
- ktoenal_module_fini ();
- return (-ENOMEM);
- }
-
- /* Deterministic bugs please */
- memset (ktoenal_data.ksnd_ltxs, 0xeb,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
- for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++)
- {
- ksock_ltx_t *ltx = &((ksock_ltx_t *)ktoenal_data.ksnd_ltxs)[i];
-
- ltx->ltx_idle = i < SOCKNAL_NLTXS ?
- &ktoenal_data.ksnd_idle_ltx_list :
- &ktoenal_data.ksnd_idle_nblk_ltx_list;
- list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
- }
-
- rc = PtlNIInit(ktoenal_init, 32, 4, 0, &ktoenal_ni);
- if (rc != 0)
- {
- CERROR("ktoenal: PtlNIInit failed: error %d\n", rc);
- ktoenal_module_fini ();
- RETURN (rc);
- }
- PtlNIDebug(ktoenal_ni, ~0);
-
- ktoenal_data.ksnd_init = SOCKNAL_INIT_PTL; /* flag PtlNIInit() called */
-
- ktoenal_data.ksnd_slistchange = 1;
- for (i = 0; i < TOENAL_N_SCHED; i++)
- {
- rc = ktoenal_thread_start (ktoenal_scheduler, NULL);
- if (rc != 0)
- {
- CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc);
- ktoenal_module_fini ();
- RETURN (rc);
- }
- }
-
- rc = ktoenal_thread_start (ktoenal_reaper, NULL);
- if (rc != 0)
- {
- CERROR("Can't spawn socknal reaper: %d\n", rc);
- ktoenal_module_fini ();
- RETURN (rc);
- }
-
- rc = ktoenal_thread_start (ktoenal_pollthread, NULL);
- if (rc != 0)
- {
- CERROR("Can't spawn socknal pollthread: %d\n", rc);
- ktoenal_module_fini ();
- RETURN (rc);
- }
-
- rc = kpr_register(&ktoenal_data.ksnd_router,
- &ktoenal_router_interface);
- if (rc != 0)
- CDEBUG (D_NET, "Can't initialise routing interface (rc = %d): not routing\n", rc);
-
- rc = kportal_nal_register(TOENAL, &ktoenal_cmd, NULL);
- if (rc != 0)
- CDEBUG(D_NET, "Can't initialise command interface (rc = %d)\n",
- rc);
-
- PORTAL_SYMBOL_REGISTER(ktoenal_ni);
-
- /* flag everything initialised */
- ktoenal_data.ksnd_init = SOCKNAL_INIT_ALL;
-
- printk(KERN_INFO "Lustre: Routing TOE NAL loaded (Routing %s, initial mem %d)\n",
- kpr_routing(&ktoenal_data.ksnd_router) ? "enabled" : "disabled",
- pkmem);
-
- return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
-MODULE_LICENSE("GPL");
-
-module_init(ktoenal_module_init);
-module_exit(ktoenal_module_fini);
-
-EXPORT_SYMBOL (ktoenal_ni);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Kedar Sovani <kedar@calsoftinc.com>
- * Author: Amey Inamdar <amey@calsoftinc.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define DEBUG_PORTAL_ALLOC
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <net/tcp.h>
-#include <linux/uio.h>
-#include <linux/sched.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-
-#define DEBUG_SUBSYSTEM S_SOCKNAL
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-
-#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
-#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
-
-#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
-#define SOCKNAL_LARGE_FWD_NMSGS 32 /* # large messages I can be forwarding at any time */
-
-#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
-
-#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + PTL_MTU) >> PAGE_SHIFT)
- /* # pages in a large message fwd buffer */
-
-#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
-
-#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10)
-
-#define TOENAL_N_SCHED 1
-
-typedef struct /* pool of forwarding buffers */
-{
- struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
- struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
-} ksock_fmb_pool_t;
-
-typedef struct {
- int ksnd_init; /* initialisation state */
-
- struct list_head ksnd_socklist; /* all my connections */
- rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */
-
-
- ptl_nid_t ksnd_mynid;
- nal_cb_t *ksnd_nal_cb;
- spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
-
- atomic_t ksnd_nthreads; /* # live threads */
- int ksnd_shuttingdown; /* tell threads to exit */
-
- kpr_router_t ksnd_router; /* THE router */
-
- spinlock_t ksnd_sched_lock; /* serialise packet scheduling */
- wait_queue_head_t ksnd_sched_waitq; /* where scheduler(s) wait */
-
- struct list_head ksnd_rx_conns; /* conn waiting to be read */
- struct list_head ksnd_tx_conns; /* conn waiting to be written */
-
- void *ksnd_fmbs; /* all the pre-allocated FMBs */
- ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
- ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
-
- void *ksnd_ltxs; /* all the pre-allocated LTXs */
- struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
- struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
- wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
-
- struct list_head ksnd_reaper_list; /* conn waiting to be reaped */
- wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
- spinlock_t ksnd_reaper_lock; /* serialise */
-
- struct task_struct *ksnd_pollthread_tsk;/* task_struct for the poll thread */
- poll_table ksnd_pwait; /* poll wait table for the socket */
- int ksnd_slistchange; /* informs the pollthread that
- * the socklist has changed */
-} ksock_nal_data_t;
-
-#define SOCKNAL_INIT_NOTHING 0
-#define SOCKNAL_INIT_DATA 1
-#define SOCKNAL_INIT_PTL 2
-#define SOCKNAL_INIT_ALL 3
-
-typedef struct /* transmit packet */
-{
- struct list_head tx_list; /* queue on conn for transmission etc */
- char tx_isfwd; /* forwarding / sourced here */
- int tx_nob; /* # packet bytes */
- int tx_niov; /* # packet frags */
- struct iovec *tx_iov; /* packet frags */
-} ksock_tx_t;
-
-typedef struct /* locally transmitted packet */
-{
- ksock_tx_t ltx_tx; /* send info */
- struct list_head *ltx_idle; /* where to put when idle */
- void *ltx_private; /* lib_finalize() callback arg */
- void *ltx_cookie; /* lib_finalize() callback arg */
- struct iovec ltx_iov[1 + PTL_MD_MAX_IOV]; /* msg frags */
- ptl_hdr_t ltx_hdr; /* buffer for packet header */
-} ksock_ltx_t;
-
-#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry (ptr, kpr_fwd_desc_t, kprfd_scratch)
-/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */
-
-#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx)
-/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */
-
-/* NB list_entry() is used here as convenient macro for calculating a
- * pointer to a struct from the addres of a member.
- */
-
-typedef struct /* Kernel portals Socket Forwarding message buffer */
-{ /* (socknal->router) */
- struct list_head fmb_list; /* queue idle */
- kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
- int fmb_npages; /* # pages allocated */
- ksock_fmb_pool_t *fmb_pool; /* owning pool */
- struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
- struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
-} ksock_fmb_t;
-
-#define SOCKNAL_RX_HEADER 1 /* reading header */
-#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */
-#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */
-#define SOCKNAL_RX_SLOP 4 /* skipping body */
-#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */
-#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */
-
-typedef struct
-{
- struct list_head ksnc_list; /* stash on global socket list */
- struct file *ksnc_file; /* socket filp */
- struct socket *ksnc_sock; /* socket */
- ptl_nid_t ksnc_peernid; /* who's on the other end */
- atomic_t ksnc_refcount; /* # users */
-
- /* READER */
- struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
- unsigned long ksnc_rx_ready; /* data ready to read */
- int ksnc_rx_scheduled; /* being progressed */
- int ksnc_rx_state; /* what is being read */
- int ksnc_rx_nob_left; /* # bytes to next hdr/body */
- int ksnc_rx_nob_wanted; /* bytes actually wanted */
- int ksnc_rx_niov; /* # frags */
- struct iovec ksnc_rx_iov[1 + PTL_MD_MAX_IOV]; /* the frags */
-
- void *ksnc_cookie; /* rx lib_finalize passthru arg */
- ptl_hdr_t ksnc_hdr; /* where I read headers into */
-
- /* WRITER */
- struct list_head ksnc_tx_list; /* where I enq waiting for output space */
- struct list_head ksnc_tx_queue; /* packets waiting to be sent */
- unsigned long ksnc_tx_ready; /* write space */
- int ksnc_tx_scheduled; /* being progressed */
-
-} ksock_conn_t;
-
-extern int ktoenal_add_sock (ptl_nid_t nid, int fd);
-extern int ktoenal_close_sock(ptl_nid_t nid);
-extern int ktoenal_set_mynid(ptl_nid_t nid);
-extern int ktoenal_push_sock(ptl_nid_t nid);
-extern ksock_conn_t *ktoenal_get_conn (ptl_nid_t nid);
-extern void _ktoenal_put_conn (ksock_conn_t *conn);
-extern void ktoenal_close_conn (ksock_conn_t *conn);
-
-static inline void
-ktoenal_put_conn (ksock_conn_t *conn)
-{
- CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n",
- conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount));
-
- if (atomic_dec_and_test (&conn->ksnc_refcount))
- _ktoenal_put_conn (conn);
-}
-
-extern int ktoenal_thread_start (int (*fn)(void *arg), void *arg);
-extern int ktoenal_new_packet (ksock_conn_t *conn, int skip);
-extern void ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern int ktoenal_scheduler (void *arg);
-extern int ktoenal_reaper (void *arg);
-extern int ktoenal_pollthread (void *arg);
-extern void ktoenal_data_ready(ksock_conn_t *conn);
-extern void ktoenal_write_space(ksock_conn_t *conn);
-
-
-extern nal_cb_t ktoenal_lib;
-extern ksock_nal_data_t ktoenal_data;
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Kedar Sovani <kedar@calsoftinc.com>
- * Author: Amey Inamdar <amey@calsoftinc.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <linux/poll.h>
-#include "toenal.h"
-
-atomic_t ktoenal_packets_received;
-long ktoenal_packets_launched;
-long ktoenal_packets_transmitted;
-
-/*
- * LIB functions follow
- *
- */
-ptl_err_t
-ktoenal_read(nal_cb_t *nal, void *private, void *dst_addr,
- user_ptr src_addr, size_t len)
-{
- CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
-}
-
-ptl_err_t
-ktoenal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
- void *src_addr, size_t len)
-{
- CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
-}
-
-void *
-ktoenal_malloc(nal_cb_t *nal, size_t len)
-{
- void *buf;
-
- PORTAL_ALLOC(buf, len);
-
- if (buf != NULL)
- memset(buf, 0, len);
-
- return (buf);
-}
-
-void
-ktoenal_free(nal_cb_t *nal, void *buf, size_t len)
-{
- PORTAL_FREE(buf, len);
-}
-
-void
-ktoenal_printf(nal_cb_t *nal, const char *fmt, ...)
-{
- va_list ap;
- char msg[256];
-
- va_start (ap, fmt);
- vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
- va_end (ap);
-
- msg[sizeof (msg) - 1] = 0; /* ensure terminated */
-
- CDEBUG (D_NET, "%s", msg);
-}
-
-void
-ktoenal_cli(nal_cb_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *data = nal->nal_data;
-
- spin_lock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ktoenal_sti(nal_cb_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *data;
- data = nal->nal_data;
-
- spin_unlock(&data->ksnd_nal_cb_lock);
-}
-
-int
-ktoenal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
-{
- /* I would guess that if ktoenal_get_conn(nid) == NULL,
- and we're not routing, then 'nid' is very distant :) */
- if ( nal->ni.nid == nid ) {
- *dist = 0;
- } else {
- *dist = 1;
- }
-
- return 0;
-}
-
-ksock_ltx_t *
-ktoenal_get_ltx (int may_block)
-{
- unsigned long flags;
- ksock_ltx_t *ltx = NULL;
-
- for (;;)
- {
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- if (!list_empty (&ktoenal_data.ksnd_idle_ltx_list))
- {
- ltx = list_entry (ktoenal_data.ksnd_idle_ltx_list.next, ksock_ltx_t, ltx_tx.tx_list);
- list_del (<x->ltx_tx.tx_list);
- break;
- }
-
- if (!may_block)
- {
- if (!list_empty (&ktoenal_data.ksnd_idle_nblk_ltx_list))
- {
- ltx = list_entry (ktoenal_data.ksnd_idle_nblk_ltx_list.next,
- ksock_ltx_t, ltx_tx.tx_list);
- list_del (<x->ltx_tx.tx_list);
- }
- break;
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-
- wait_event (ktoenal_data.ksnd_idle_ltx_waitq,
- !list_empty (&ktoenal_data.ksnd_idle_ltx_list));
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-
- return (ltx);
-}
-
-int
-ktoenal_sendmsg (struct file *sock, struct iovec *iov, int niov, int nob, int flags)
-{
- /* NB This procedure "consumes" iov (actually we do, tcp_sendmsg doesn't)
- */
- mm_segment_t oldmm;
- int rc;
-
- LASSERT (niov > 0);
- LASSERT (nob > 0);
-
- oldmm = get_fs();
- set_fs (KERNEL_DS);
-
-#ifdef PORTAL_DEBUG
- {
- int total_nob;
- int i;
-
- for (i = total_nob = 0; i < niov; i++)
- total_nob += iov[i].iov_len;
-
- LASSERT (nob == total_nob);
- }
-#endif
- LASSERT (!in_interrupt());
-
- rc = sock->f_op->writev(sock, iov, niov, NULL);
-
- set_fs (oldmm);
-
- if (rc > 0) /* sent something? */
- {
- nob = rc; /* consume iov */
- for (;;)
- {
- LASSERT (niov > 0);
-
- if (iov->iov_len >= nob)
- {
- iov->iov_len -= nob;
- iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
- break;
- }
- nob -= iov->iov_len;
- iov->iov_len = 0;
- iov++;
- niov--;
- }
- }
-
- return (rc);
-}
-
-int
-ktoenal_recvmsg(struct file *sock, struct iovec *iov, int niov, int toread)
-{
- /* NB This procedure "consumes" iov (actually tcp_recvmsg does)
- */
- mm_segment_t oldmm;
- int ret, i, len = 0, origlen = 0;
-
- PROF_START(our_recvmsg);
- for(i = 0; i < niov; i++) {
- len += iov[i].iov_len;
- if(len >= toread)
- break;
- }
-
- if(len >= toread) {
- origlen = iov[i].iov_len;
- iov[i].iov_len -= (len - toread);
- }
- else { /* i == niov */
- i = niov - 1;
- }
-
- oldmm = get_fs();
- set_fs(KERNEL_DS);
-
- ret = sock->f_op->readv(sock, iov, i + 1, NULL);
-
- set_fs(oldmm);
-
- if(origlen)
- iov[i].iov_len = origlen;
-
- PROF_FINISH(our_recvmsg);
- return ret;
-}
-
-void
-ktoenal_process_transmit (ksock_conn_t *conn, unsigned long *irq_flags)
-{
- ksock_tx_t *tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list);
- int rc;
-
- LASSERT (conn->ksnc_tx_scheduled);
- LASSERT (conn->ksnc_tx_ready);
- LASSERT (!list_empty (&conn->ksnc_tx_queue));
-
- /* assume transmit will complete now, so dequeue while I've got the lock */
- list_del (&tx->tx_list);
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- LASSERT (tx->tx_nob > 0);
-
- conn->ksnc_tx_ready = 0; /* write_space may race with me and set ready */
- mb(); /* => clear BEFORE trying to write */
-
- rc = ktoenal_sendmsg (conn->ksnc_file,
- tx->tx_iov, tx->tx_niov, tx->tx_nob,
- list_empty (&conn->ksnc_tx_queue) ?
- MSG_DONTWAIT : (MSG_DONTWAIT | MSG_MORE));
-
- CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc);
-
- if (rc < 0) /* error */
- {
- if (rc == -EAGAIN) /* socket full => */
- rc = 0; /* nothing sent */
- else
- {
- //warning FIXME: handle socket errors properly
- CERROR ("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc);
- rc = tx->tx_nob; /* kid on for now whole packet went */
- }
- }
-
- if (rc == tx->tx_nob) /* everything went */
- {
- conn->ksnc_tx_ready = 1; /* assume more can go (ASAP) */
- ktoenal_put_conn (conn); /* release packet's ref */
-
- if (tx->tx_isfwd) /* was a forwarded packet? */
- {
- kpr_fwd_done (&ktoenal_data.ksnd_router,
- KSOCK_TX_2_KPR_FWD_DESC (tx), 0);
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
- }
- else /* local send */
- {
- ksock_ltx_t *ltx = KSOCK_TX_2_KSOCK_LTX (tx);
-
- lib_finalize (&ktoenal_lib, ltx->ltx_private,
- ltx->ltx_cookie, PTL_OK);
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
-
- /* normal tx desc => wakeup anyone blocking for one */
- if (ltx->ltx_idle == &ktoenal_data.ksnd_idle_ltx_list &&
- waitqueue_active (&ktoenal_data.ksnd_idle_ltx_waitq))
- wake_up (&ktoenal_data.ksnd_idle_ltx_waitq);
- }
- ktoenal_packets_transmitted++;
- }
- else
- {
- tx->tx_nob -= rc;
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- /* back onto HEAD of tx_queue */
- list_add (&tx->tx_list, &conn->ksnc_tx_queue);
- }
-
- if (!conn->ksnc_tx_ready || /* no space to write now */
- list_empty (&conn->ksnc_tx_queue)) /* nothing to write */
- {
- conn->ksnc_tx_scheduled = 0; /* not being scheduled */
- ktoenal_put_conn (conn); /* release scheduler's ref */
- }
- else /* let scheduler call me again */
- list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns);
-}
-
-void
-ktoenal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- unsigned long flags;
- int nob = tx->tx_nob;
- struct iovec *iov = tx->tx_iov;
- int niov = 1;
-
- LASSERT (nob >= sizeof (ptl_hdr_t));
-
- /* Truncate iov to exactly match total packet length
- * since socket sendmsg pays no attention to requested length.
- */
- for (;;)
- {
- LASSERT (niov <= tx->tx_niov);
-
- if (iov->iov_len >= nob)
- {
- iov->iov_len = nob;
- break;
- }
- nob -= iov->iov_len;
- iov++;
- niov++;
- }
- tx->tx_niov = niov;
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
-
- if (conn->ksnc_tx_ready && /* able to send */
- !conn->ksnc_tx_scheduled) /* not scheduled to send */
- {
- list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */
- if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
- wake_up (&ktoenal_data.ksnd_sched_waitq);
- }
-
- ktoenal_packets_launched++;
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-}
-
-ptl_err_t
-ktoenal_send(nal_cb_t *nal, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int payload_niov, struct iovec *payload_iov,
- size_t payload_off, size_t payload_len)
-{
- ptl_nid_t gatewaynid;
- ksock_conn_t *conn;
- ksock_ltx_t *ltx;
- int rc;
- int i;
-
- /* By this point, as it happens, we have absolutely no idea what
- * 'private' is. It might be ksock_nal_data or it might be ksock_conn.
- * Ha ha, isn't that a funny joke?
- *
- * FIXME: this is not the right way to fix this; the right way is to
- * always pass in the same kind of structure. This is hard right now.
- * To revisit this issue, set a breakpoint in here and watch for when
- * it's called from lib_finalize. I think this occurs when we send a
- * packet as a side-effect of another packet, such as when an ACK has
- * been requested. -phil */
-
- CDEBUG(D_NET, "sending %d bytes from [%d](%p,%d)... to nid: "
- LPX64" pid %d\n", (int)payload_len, payload_niov,
- payload_niov > 0 ? payload_iov[0].iov_base : NULL,
- (int)(payload_niov > 0 ? payload_iov[0].iov_len : 0), nid, pid);
-
- /* XXX not implemented read-only iov with offset */
- LBUG();
-
- if ((conn = ktoenal_get_conn (nid)) == NULL)
- {
- /* It's not a peer; try to find a gateway */
- rc = kpr_lookup (&ktoenal_data.ksnd_router, nid, payload_niov,
- &gatewaynid);
- if (rc != 0)
- {
- CERROR ("Can't route to "LPX64": router error %d\n", nid, rc);
- return (PTL_FAIL);
- }
-
- if ((conn = ktoenal_get_conn (gatewaynid)) == NULL)
- {
- CERROR ("Can't route to "LPX64": gateway "LPX64" is not a peer\n",
- nid, gatewaynid);
- return (PTL_FAIL);
- }
- }
-
- /* This transmit has now got a ref on conn */
-
- /* I may not block for a transmit descriptor if I might block the
- * receiver, or an interrupt handler. */
- ltx = ktoenal_get_ltx (!(type == PTL_MSG_ACK ||
- type == PTL_MSG_REPLY ||
- in_interrupt ()));
- if (ltx == NULL)
- {
- CERROR ("Can't allocate tx desc\n");
- ktoenal_put_conn (conn);
- return (PTL_FAIL);
- }
-
- /* Init common (to sends and forwards) packet part */
- ltx->ltx_tx.tx_isfwd = 0;
- ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
- ltx->ltx_tx.tx_niov = 1 + payload_niov;
- ltx->ltx_tx.tx_iov = ltx->ltx_iov;
-
- /* Init local send packet (storage for hdr, finalize() args, iov) */
- ltx->ltx_hdr = *hdr;
- ltx->ltx_private = private;
- ltx->ltx_cookie = cookie;
-
- ltx->ltx_iov[0].iov_base = <x->ltx_hdr;
- ltx->ltx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
-
- LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-
- for (i = 0; i < payload_niov; i++)
- {
- ltx->ltx_iov[1 + i].iov_base = payload_iov[i].iov_base;
- ltx->ltx_iov[1 + i].iov_len = payload_iov[i].iov_len;
- }
-
- ktoenal_launch_packet (conn, <x->ltx_tx);
- return (PTL_OK);
-}
-
-void
-ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
-{
- ksock_conn_t *conn;
- ptl_nid_t nid = fwd->kprfd_gateway_nid;
- ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch;
-
- CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
- fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
-
- if (nid == ktoenal_lib.ni.nid) /* I'm the gateway; must be the last hop */
- nid = fwd->kprfd_target_nid;
-
- conn = ktoenal_get_conn (nid);
- if (conn == NULL)
- {
- CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid);
- kpr_fwd_done (&ktoenal_data.ksnd_router, fwd, -EHOSTUNREACH);
- return;
- }
-
- /* This forward has now got a ref on conn */
-
- tx->tx_isfwd = 1; /* This is a forwarding packet */
- tx->tx_nob = fwd->kprfd_nob;
- tx->tx_niov = fwd->kprfd_niov;
- tx->tx_iov = fwd->kprfd_iov;
-
- ktoenal_launch_packet (conn, tx);
-}
-
-int
-ktoenal_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- atomic_inc (&ktoenal_data.ksnd_nthreads);
- return (0);
-}
-
-void
-ktoenal_thread_fini (void)
-{
- atomic_dec (&ktoenal_data.ksnd_nthreads);
-}
-
-void
-ktoenal_fmb_callback (void *arg, int error)
-{
- ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
- ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
- ksock_conn_t *conn;
- unsigned long flags;
-
- CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": %d\n",
- hdr->src_nid, hdr->dest_nid, error);
-
- if (error != 0)
- CERROR ("Failed to route packet from "LPX64" to "LPX64": %d\n",
- hdr->src_nid, hdr->dest_nid, error);
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
-
- if (!list_empty (&fmb->fmb_pool->fmp_blocked_conns))
- {
- conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, ksock_conn_t, ksnc_rx_list);
- list_del (&conn->ksnc_rx_list);
-
- CDEBUG (D_NET, "Scheduling conn %p\n", conn);
- LASSERT (conn->ksnc_rx_scheduled);
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP);
-
- conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;
- list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns);
-
- if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
- wake_up (&ktoenal_data.ksnd_sched_waitq);
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-}
-
-ksock_fmb_t *
-ktoenal_get_idle_fmb (ksock_conn_t *conn)
-{
- /* NB called with sched lock held */
- int payload_nob = conn->ksnc_rx_nob_left;
- int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
- ksock_fmb_pool_t *pool;
- ksock_fmb_t *fmb;
-
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
-
- if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
- pool = &ktoenal_data.ksnd_small_fmp;
- else
- pool = &ktoenal_data.ksnd_large_fmp;
-
- if (!list_empty (&pool->fmp_idle_fmbs))
- {
- fmb = list_entry (pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list);
- list_del (&fmb->fmb_list);
- return (fmb);
- }
-
- /* deschedule until fmb free */
-
- conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP;
-
- list_add_tail (&conn->ksnc_rx_list,
- &pool->fmp_blocked_conns);
- return (NULL);
-}
-
-
-int
-ktoenal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
-{
- int payload_nob = conn->ksnc_rx_nob_left;
- int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
- int niov; /* at least the header */
- int nob;
-
- LASSERT (conn->ksnc_rx_scheduled);
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
- LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
- LASSERT (payload_nob >= 0);
- LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
- LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
-
- /* Got a forwarding buffer; copy the header we just read into the
- * forwarding buffer. If there's payload start reading reading it
- * into the buffer, otherwise the forwarding buffer can be kicked
- * off immediately.
- *
- * NB fmb->fmb_iov spans the WHOLE packet.
- * conn->ksnc_rx_iov spans just the payload.
- */
-
- fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
-
- memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t)); /* copy header */
-
- if (payload_nob == 0) /* got complete packet already */
- {
- atomic_inc (&ktoenal_packets_received);
-
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n", conn,
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, packet_nob);
-
- fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
-
- kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid,
- packet_nob, 1, fmb->fmb_iov,
- ktoenal_fmb_callback, fmb);
-
- kpr_fwd_start (&ktoenal_data.ksnd_router, &fmb->fmb_fwd); /* forward it now */
-
- ktoenal_new_packet (conn, 0); /* on to next packet */
- return (1);
- }
-
- niov = 1;
- if (packet_nob <= PAGE_SIZE) /* whole packet fits in first page */
- fmb->fmb_iov[0].iov_len = packet_nob;
- else
- {
- fmb->fmb_iov[0].iov_len = PAGE_SIZE;
- nob = packet_nob - PAGE_SIZE;
-
- do
- {
- LASSERT (niov < fmb->fmb_npages);
- fmb->fmb_iov[niov].iov_base = page_address (fmb->fmb_pages[niov]);
- fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
- nob -= PAGE_SIZE;
- niov++;
- } while (nob > 0);
- }
-
- kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid,
- packet_nob, niov, fmb->fmb_iov,
- ktoenal_fmb_callback, fmb);
-
- /* stash router's descriptor ready for call to kpr_fwd_start */
- conn->ksnc_cookie = &fmb->fmb_fwd;
-
- conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
-
- /* payload is desc's iov-ed buffer, but skipping the hdr */
- LASSERT (niov <= sizeof (conn->ksnc_rx_iov) / sizeof (conn->ksnc_rx_iov[0]));
-
- conn->ksnc_rx_iov[0].iov_base = (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) + sizeof (ptl_hdr_t));
- conn->ksnc_rx_iov[0].iov_len = fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
-
- if (niov > 1)
- memcpy (&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1], (niov - 1) * sizeof (struct iovec));
-
- conn->ksnc_rx_niov = niov;
-
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, payload_nob);
- return (0);
-}
-
-void
-ktoenal_fwd_parse (ksock_conn_t *conn)
-{
- ksock_conn_t *conn2;
- int body_len;
-
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn,
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left);
-
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER);
- LASSERT (conn->ksnc_rx_scheduled);
-
- body_len = conn->ksnc_hdr.payload_length;
-
- if (body_len < 0) /* length corrupt */
- {
- CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d illegal\n",
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len);
- ktoenal_new_packet (conn, 0); /* on to new packet */
- return;
- }
-
- if (body_len > PTL_MTU) /* too big to forward */
- {
- CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d too big\n",
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len);
- ktoenal_new_packet (conn, body_len); /* on to new packet (skip this one's body) */
- return;
- }
-
- conn2 = ktoenal_get_conn (conn->ksnc_hdr.dest_nid); /* should have gone direct */
- if (conn2 != NULL)
- {
- CERROR ("dropping packet from "LPX64" for "LPX64": target is a peer\n",
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid);
- ktoenal_put_conn (conn2); /* drop ref from get above */
-
- ktoenal_new_packet (conn, body_len); /* on to next packet (skip this one's body) */
- return;
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */
- conn->ksnc_rx_nob_left = body_len; /* stash packet size */
- conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */
-}
-
-int
-ktoenal_new_packet (ksock_conn_t *conn, int nob_to_skip)
-{
- static char ktoenal_slop_buffer[4096];
-
- int nob;
- int niov;
- int skipped;
-
- if (nob_to_skip == 0) /* right at next packet boundary now */
- {
- conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
- conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
- conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
-
- conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
- conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t);
- conn->ksnc_rx_niov = 1;
- return (1);
- }
-
- /* set up to skip as much a possible now */
- /* if there's more left (ran out of iov entries) we'll get called again */
-
- conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
- conn->ksnc_rx_nob_left = nob_to_skip;
- skipped = 0;
- niov = 0;
-
- do
- {
- nob = MIN (nob_to_skip, sizeof (ktoenal_slop_buffer));
-
- conn->ksnc_rx_iov[niov].iov_base = ktoenal_slop_buffer;
- conn->ksnc_rx_iov[niov].iov_len = nob;
- niov++;
- skipped += nob;
- nob_to_skip -=nob;
-
- } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
- niov < sizeof (conn->ksnc_rx_iov)/sizeof (conn->ksnc_rx_iov[0]));
-
- conn->ksnc_rx_niov = niov;
- conn->ksnc_rx_nob_wanted = skipped;
- return (0);
-}
-
-void
-ktoenal_process_receive (ksock_conn_t *conn, unsigned long *irq_flags)
-{
- ksock_fmb_t *fmb;
- int len;
- LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
- LASSERT (conn->ksnc_rx_scheduled);
- LASSERT (conn->ksnc_rx_ready);
-
- /* NB: sched lock held */
- CDEBUG(D_NET, "conn %p\n", conn);
-
- if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) /* doesn't need a forwarding buffer */
- {
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
- goto try_read;
- }
-
- get_fmb:
- /* NB: sched lock held */
- fmb = ktoenal_get_idle_fmb (conn);
- if (fmb == NULL) /* conn descheduled waiting for idle fmb */
- return;
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- if (ktoenal_init_fmb (conn, fmb)) /* packet forwarded ? */
- goto out; /* come back later for next packet */
-
- try_read:
- /* NB: sched lock NOT held */
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
- conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
- conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD ||
- conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
-
- LASSERT (conn->ksnc_rx_niov > 0);
- LASSERT (conn->ksnc_rx_nob_wanted > 0);
-
- conn->ksnc_rx_ready = 0; /* data ready may race with me and set ready */
- mb(); /* => clear BEFORE trying to read */
-
- /* NB ktoenal_recvmsg "consumes" the iov passed to it */
- len = ktoenal_recvmsg(conn->ksnc_file,
- conn->ksnc_rx_iov, conn->ksnc_rx_niov,
- conn->ksnc_rx_nob_wanted);
- CDEBUG (D_NET, "%p read(%d) %d\n", conn, conn->ksnc_rx_nob_wanted, len);
-
- if (len <= 0) /* nothing ready (EAGAIN) or EOF or error */
- {
- if (len != -EAGAIN && /* ! nothing to read now */
- len != 0) /* ! nothing to read ever */
- {
- // warning FIXME: handle socket errors properly
- CERROR ("Error socknal read(%d) %p: %d\n",
- conn->ksnc_rx_nob_wanted, conn, len);
- }
- goto out; /* come back when there's data ready */
- }
-
- LASSERT (len <= conn->ksnc_rx_nob_wanted);
- conn->ksnc_rx_nob_wanted -= len;
- conn->ksnc_rx_nob_left -= len;
-
- if (conn->ksnc_rx_nob_wanted != 0) /* short read */
- goto out; /* try again later */
-
- conn->ksnc_rx_ready = 1; /* assume there's more to be had */
-
- switch (conn->ksnc_rx_state)
- {
- case SOCKNAL_RX_HEADER:
- if (conn->ksnc_hdr.dest_nid != ktoenal_lib.ni.nid) /* It's not for me */
- {
- ktoenal_fwd_parse (conn);
- switch (conn->ksnc_rx_state)
- {
- case SOCKNAL_RX_HEADER: /* skipped this packet (zero payload) */
- goto out; /* => come back later */
- case SOCKNAL_RX_SLOP: /* skipping this packet's body */
- goto try_read; /* => go read it */
- case SOCKNAL_RX_GET_FMB: /* forwarding */
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
- goto get_fmb; /* => go get a fwd msg buffer */
- default:
- break;
- }
- /* Not Reached */
- LBUG ();
- }
-
- PROF_START(lib_parse);
- lib_parse(&ktoenal_lib, &conn->ksnc_hdr, conn); /* sets wanted_len, iovs etc */
- PROF_FINISH(lib_parse);
-
- if (conn->ksnc_rx_nob_wanted != 0) /* need to get some payload? */
- {
- conn->ksnc_rx_state = SOCKNAL_RX_BODY;
- goto try_read; /* go read the payload */
- }
- /* Fall through (completed packet for me) */
-
- case SOCKNAL_RX_BODY:
- atomic_inc (&ktoenal_packets_received);
- lib_finalize(&ktoenal_lib, NULL, conn->ksnc_cookie, PTL_OK); /* packet is done now */
- /* Fall through */
-
- case SOCKNAL_RX_SLOP:
- if (ktoenal_new_packet (conn, conn->ksnc_rx_nob_left)) /* starting new packet? */
- goto out; /* come back later */
- goto try_read; /* try to finish reading slop now */
-
- case SOCKNAL_RX_BODY_FWD:
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", conn,
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left);
-
- atomic_inc (&ktoenal_packets_received);
-
- /* ktoenal_init_fmb() stashed router descriptor in conn->ksnc_cookie */
- kpr_fwd_start (&ktoenal_data.ksnd_router, (kpr_fwd_desc_t *)conn->ksnc_cookie);
-
- LASSERT (conn->ksnc_rx_nob_left == 0); /* no slop in forwarded packets */
-
- ktoenal_new_packet (conn, 0); /* on to next packet */
- goto out; /* (later) */
-
- default:
- break;
- }
-
- /* Not Reached */
- LBUG ();
-
- out:
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- if (!conn->ksnc_rx_ready) /* no data there to read? */
- {
- conn->ksnc_rx_scheduled = 0; /* let socket callback schedule again */
- ktoenal_put_conn (conn); /* release scheduler's ref */
- }
- else /* let scheduler call me again */
- list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns);
-}
-
-ptl_err_t
-ktoenal_recv(nal_cb_t *nal, void *private, lib_msg_t *msg,
- unsigned int niov, struct iovec *iov,
- size_t offset, size_t mlen, size_t rlen)
-{
- ksock_conn_t *conn = (ksock_conn_t *)private;
- int i;
-
- /* XXX not implemented read-only iov with offset */
- LBUG();
-
- conn->ksnc_cookie = msg;
-
- LASSERT (niov <= PTL_MD_MAX_IOV);
- for (i = 0; i < niov; i++)
- {
- conn->ksnc_rx_iov[i].iov_len = iov[i].iov_len;
- conn->ksnc_rx_iov[i].iov_base = iov[i].iov_base;
- }
-
- conn->ksnc_rx_niov = niov;
- conn->ksnc_rx_nob_wanted = mlen;
- conn->ksnc_rx_nob_left = rlen;
-
- return (PTL_OK);
-}
-
-int
-ktoenal_scheduler (void *arg)
-{
- unsigned long flags;
- ksock_conn_t *conn;
- int rc;
- int nloops = 0;
-
- kportal_daemonize ("ktoenal_sched");
- kportal_blockallsigs ();
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- while (!ktoenal_data.ksnd_shuttingdown)
- {
- int did_something = 0;
-
- /* Ensure I progress everything semi-fairly */
-
- if (!list_empty (&ktoenal_data.ksnd_rx_conns))
- {
- did_something = 1;
- conn = list_entry (ktoenal_data.ksnd_rx_conns.next,
- ksock_conn_t, ksnc_rx_list);
- list_del (&conn->ksnc_rx_list);
-
- ktoenal_process_receive (conn, &flags); /* drops & regains ksnd_sched_lock */
- }
-
- if (!list_empty (&ktoenal_data.ksnd_tx_conns))
- {
- did_something = 1;
- conn = list_entry (ktoenal_data.ksnd_tx_conns.next,
- ksock_conn_t, ksnc_tx_list);
-
- list_del (&conn->ksnc_tx_list);
- ktoenal_process_transmit (conn, &flags); /* drops and regains ksnd_sched_lock */
- }
-
- if (!did_something || /* nothing to do */
- ++nloops == SOCKNAL_RESCHED) /* hogging CPU? */
- {
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-
- nloops = 0;
-
- if (!did_something) { /* wait for something to do */
- rc = wait_event_interruptible (ktoenal_data.ksnd_sched_waitq,
- ktoenal_data.ksnd_shuttingdown ||
- !list_empty (&ktoenal_data.ksnd_rx_conns) ||
- !list_empty (&ktoenal_data.ksnd_tx_conns));
- LASSERT (rc == 0);
- } else
- our_cond_resched();
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
- }
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
- ktoenal_thread_fini ();
- return (0);
-}
-
-
-int
-ktoenal_reaper (void *arg)
-{
- unsigned long flags;
- ksock_conn_t *conn;
- int rc;
-
- kportal_daemonize ("ktoenal_reaper");
- kportal_blockallsigs ();
-
- while (!ktoenal_data.ksnd_shuttingdown)
- {
- spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
-
- if (list_empty (&ktoenal_data.ksnd_reaper_list))
- conn = NULL;
- else
- {
- conn = list_entry (ktoenal_data.ksnd_reaper_list.next,
- ksock_conn_t, ksnc_list);
- list_del (&conn->ksnc_list);
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
-
- if (conn != NULL)
- ktoenal_close_conn (conn);
- else {
- rc = wait_event_interruptible (ktoenal_data.ksnd_reaper_waitq,
- ktoenal_data.ksnd_shuttingdown ||
- !list_empty(&ktoenal_data.ksnd_reaper_list));
- LASSERT (rc == 0);
- }
- }
-
- ktoenal_thread_fini ();
- return (0);
-}
-
-#define POLLREAD (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)
-#define POLLWRITE (POLLOUT | POLLWRNORM | POLLWRBAND)
-
-int
-ktoenal_pollthread(void *arg)
-{
- unsigned int mask;
- struct list_head *tmp;
- ksock_conn_t *conn;
-
- /* Save the task struct for waking it up */
- ktoenal_data.ksnd_pollthread_tsk = current;
-
- kportal_daemonize ("ktoenal_pollthread");
- kportal_blockallsigs ();
-
- poll_initwait(&ktoenal_data.ksnd_pwait);
-
- while(!ktoenal_data.ksnd_shuttingdown) {
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- read_lock (&ktoenal_data.ksnd_socklist_lock);
- list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
-
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
- atomic_inc(&conn->ksnc_refcount);
- read_unlock (&ktoenal_data.ksnd_socklist_lock);
-
- mask = conn->ksnc_file->f_op->poll(conn->ksnc_file,
- ktoenal_data.ksnd_slistchange ?
- &ktoenal_data.ksnd_pwait : NULL);
-
- if(mask & POLLREAD) {
- ktoenal_data_ready(conn);
-
- }
- if (mask & POLLWRITE) {
- ktoenal_write_space(conn);
-
- }
- if (mask & (POLLERR | POLLHUP)) {
- /* Do error processing */
- }
-
- read_lock (&ktoenal_data.ksnd_socklist_lock);
- if(atomic_dec_and_test(&conn->ksnc_refcount))
- _ktoenal_put_conn(conn);
- }
- ktoenal_data.ksnd_slistchange = 0;
- read_unlock (&ktoenal_data.ksnd_socklist_lock);
-
- schedule_timeout(MAX_SCHEDULE_TIMEOUT);
- if(ktoenal_data.ksnd_slistchange) {
- poll_freewait(&ktoenal_data.ksnd_pwait);
- poll_initwait(&ktoenal_data.ksnd_pwait);
- }
- }
- poll_freewait(&ktoenal_data.ksnd_pwait);
- ktoenal_thread_fini();
- return (0);
-}
-
-void
-ktoenal_data_ready (ksock_conn_t *conn)
-{
- unsigned long flags;
- ENTRY;
-
- if (!test_and_set_bit (0, &conn->ksnc_rx_ready)) {
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- if (!conn->ksnc_rx_scheduled) { /* not being progressed */
- list_add_tail (&conn->ksnc_rx_list,
- &ktoenal_data.ksnd_rx_conns);
- conn->ksnc_rx_scheduled = 1;
- /* extra ref for scheduler */
- atomic_inc (&conn->ksnc_refcount);
-
- /* This is done to avoid the effects of a sequence
- * of events in which the rx_ready is lost
- */
- conn->ksnc_rx_ready=1;
-
- if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
- wake_up (&ktoenal_data.ksnd_sched_waitq);
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
- }
-
- EXIT;
-}
-
-void
-ktoenal_write_space (ksock_conn_t *conn)
-{
- unsigned long flags;
-
- CDEBUG (D_NET, "conn %p%s%s%s\n",
- conn,
- (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ? " ready" : " blocked"),
- (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? " scheduled" : " idle"),
- (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued"));
-
-
- if (!test_and_set_bit (0, &conn->ksnc_tx_ready)) {
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- if (!list_empty (&conn->ksnc_tx_queue) && /* packets to send */
- !conn->ksnc_tx_scheduled) { /* not being progressed */
-
- list_add_tail (&conn->ksnc_tx_list,
- &ktoenal_data.ksnd_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- atomic_inc (&conn->ksnc_refcount);
-
- if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
- wake_up (&ktoenal_data.ksnd_sched_waitq);
- }
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
- }
-}
-
-nal_cb_t ktoenal_lib = {
- nal_data: &ktoenal_data, /* NAL private data */
- cb_send: ktoenal_send,
- cb_recv: ktoenal_recv,
- cb_read: ktoenal_read,
- cb_write: ktoenal_write,
- cb_malloc: ktoenal_malloc,
- cb_free: ktoenal_free,
- cb_printf: ktoenal_printf,
- cb_cli: ktoenal_cli,
- cb_sti: ktoenal_sti,
- cb_dist: ktoenal_dist
-};
copied = 0;
list_for_each(pos, &my_pages) {
unsigned long to_copy;
- page = list_entry(pos, struct page, list);
+ void *addr;
+ page = list_entry(pos, struct page, list);
to_copy = min(total - off, PAGE_SIZE);
if (to_copy == 0) {
off = 0;
to_copy = min(debug_size - off, PAGE_SIZE);
}
finish_partial:
- memcpy(kmap(page), debug_buf + off, to_copy);
- kunmap(page);
+ addr = kmap_atomic(page, KM_USER0);
+ memcpy(addr, debug_buf + off, to_copy);
+ kunmap_atomic(addr, KM_USER0);
copied += to_copy;
if (copied >= total)
break;
case QSWNAL:
case GMNAL:
case IBNAL:
- case TOENAL:
case SCIMACNAL:
sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid);
break;
char *portals_debug_dumpstack(void)
{
- int size;
- unsigned long addr;
- char *buf = stack_backtrace;
- char *pbuf = buf;
- unsigned long *stack = (unsigned long *)&buf;
-
- size = sprintf(pbuf, " Call Trace: ");
- pbuf += size;
- while (((long) stack & (THREAD_SIZE-1)) != 0) {
- addr = *stack++;
- if (is_kernel_text_address(addr)) {
- size = sprintf(pbuf, "[<%08lx>] ", addr);
- pbuf += size;
- if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12)
- break;
- }
- }
-
- return buf;
+ asm("int $3");
+ return "dump stack";
}
#elif defined(__i386__)
return (PORTAL_SYMBOL_GET(kqswnal_ni));
case SOCKNAL:
return (PORTAL_SYMBOL_GET(ksocknal_ni));
- case TOENAL:
- return (PORTAL_SYMBOL_GET(ktoenal_ni));
case GMNAL:
return (PORTAL_SYMBOL_GET(kgmnal_ni));
case IBNAL:
case SOCKNAL:
PORTAL_SYMBOL_PUT(ksocknal_ni);
break;
- case TOENAL:
- PORTAL_SYMBOL_PUT(ktoenal_ni);
- break;
case GMNAL:
PORTAL_SYMBOL_PUT(kgmnal_ni);
break;
reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
+ /* NB call lib_send() _BEFORE_ lib_recv() completes the incoming
+ * message. Some NALs _require_ this to implement optimized GET */
+
rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY,
hdr->src_nid, hdr->src_pid, md, offset, mlength);
if (rc != PTL_OK)
MODULE_PARM(nal, "i");
MODULE_PARM_DESC(nal, "Use the specified NAL "
- "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)");
+ "(6-kscimacnal, 2-ksocknal, 1-kqswnal)");
MODULE_AUTHOR("Brian Behlendorf (LLNL)");
MODULE_DESCRIPTION("A kernel space ping server for portals testing");
MODULE_PARM(nal, "i");
MODULE_PARM_DESC(nal, "Use the specified NAL "
- "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)");
+ "(6-kscimacnal, 2-ksocknal, 1-kqswnal)");
MODULE_AUTHOR("Brian Behlendorf (LLNL)");
MODULE_DESCRIPTION("A kernel space ping server for portals testing");
fi
case "$1" in
- toe)
- /sbin/insmod ../oslib/portals.o
- /sbin/insmod ../toenal/ktoenal.o
- /sbin/insmod ./$PING
- echo ktoenal > /tmp/nal
- ;;
-
tcp)
/sbin/insmod ../oslib/portals.o
/sbin/insmod ../socknal/ksocknal.o
;;
*)
- echo "Usage : ${0} < tcp | toe | elan | gm>"
+ echo "Usage : ${0} < tcp | elan | gm>"
exit 1;
esac
exit 0;
fi
case "$1" in
- toe)
- /sbin/insmod ../oslib/portals.o
- /sbin/insmod ../toenal/ktoenal.o
- /sbin/insmod ./$PING nal=4
- echo ktoenal > /tmp/nal
- ;;
-
tcp)
/sbin/insmod ../oslib/portals.o
/sbin/insmod ../socknal/ksocknal.o
;;
*)
- echo "Usage : ${0} < tcp | toe | elan | gm>"
+ echo "Usage : ${0} < tcp | elan | gm>"
exit 1;
esac
../utils/acceptor 9999&
/* Assume sufficient socket buffering for this message */
rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
if (rc <= 0) {
- CERROR ("Error %d sending HELLO to %llx\n", rc, *nid);
+ CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
return (rc);
}
rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
if (rc <= 0) {
- CERROR ("Error %d reading HELLO from %llx\n", rc, *nid);
+ CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
return (rc);
}
if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) {
- CERROR ("Bad magic %#08x (%#08x expected) from %llx\n",
+ CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
__cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid);
return (-EPROTO);
}
if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
- " from %llx\n",
+ " from "LPX64"\n",
__le16_to_cpu (hmv->version_major),
__le16_to_cpu (hmv->version_minor),
PORTALS_PROTO_VERSION_MAJOR,
rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
if (rc <= 0) {
- CERROR ("Error %d reading rest of HELLO hdr from %llx\n",
+ CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
rc, *nid);
return (rc);
}
if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) ||
hdr.payload_length != __cpu_to_le32 (0)) {
CERROR ("Expecting a HELLO hdr with 0 payload,"
- " but got type %d with %d payload from %llx\n",
+ " but got type %d with %d payload from "LPX64"\n",
__le32_to_cpu (hdr.type),
__le32_to_cpu (hdr.payload_length), *nid);
return (-EPROTO);
if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */
*nid = __le64_to_cpu(hdr.src_nid);
} else if (*nid != __le64_to_cpu (hdr.src_nid)) {
- CERROR ("Connected to nid %llx, but expecting %llx\n",
+ CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
__le64_to_cpu (hdr.src_nid), *nid);
return (-EPROTO);
}
/* Assume sufficient socket buffering for this message */
rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
if (rc <= 0) {
- CERROR ("Error %d sending HELLO to %llx\n", rc, *nid);
+ CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
return (rc);
}
rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
if (rc <= 0) {
- CERROR ("Error %d reading HELLO from %llx\n", rc, *nid);
+ CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
return (rc);
}
if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) {
- CERROR ("Bad magic %#08x (%#08x expected) from %llx\n",
+ CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
__cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid);
return (-EPROTO);
}
if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
- " from %llx\n",
+ " from "LPX64"\n",
__le16_to_cpu (hmv->version_major),
__le16_to_cpu (hmv->version_minor),
PORTALS_PROTO_VERSION_MAJOR,
rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
if (rc <= 0) {
- CERROR ("Error %d reading rest of HELLO hdr from %llx\n",
+ CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
rc, *nid);
return (rc);
}
if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) ||
hdr.payload_length != __cpu_to_le32 (0)) {
CERROR ("Expecting a HELLO hdr with 0 payload,"
- " but got type %d with %d payload from %llx\n",
+ " but got type %d with %d payload from "LPX64"\n",
__le32_to_cpu (hdr.type),
__le32_to_cpu (hdr.payload_length), *nid);
return (-EPROTO);
if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */
*nid = __le64_to_cpu(hdr.src_nid);
} else if (*nid != __le64_to_cpu (hdr.src_nid)) {
- CERROR ("Connected to nid %llx, but expecting %llx\n",
+ CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
__le64_to_cpu (hdr.src_nid), *nid);
return (-EPROTO);
}
static name2num_t nalnames[] = {
{"any", 0},
{"tcp", SOCKNAL},
- {"toe", TOENAL},
{"elan", QSWNAL},
{"gm", GMNAL},
{"ib", IBNAL},
return 0;
}
- if (!g_nal_is_compatible (argv[0], SOCKNAL, TOENAL, 0))
+ if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
return -1;
rc = ptl_parse_ipaddr (&ipaddr, argv[1]);
return 0;
}
- if (!g_nal_is_compatible (NULL, SOCKNAL, TOENAL, 0))
+ if (!g_nal_is_compatible (NULL, SOCKNAL, 0))
return 0;
if (argc >= 2 &&
return 0;
}
- if (!g_nal_is_compatible (argv[0], SOCKNAL, TOENAL, 0))
+ if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
return -1;
if (argc > 1 &&
Index: linux-2.4.21-chaos/fs/ext3/super.c
===================================================================
---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2003-12-12 16:18:41.000000000 +0300
-+++ linux-2.4.21-chaos/fs/ext3/super.c 2003-12-12 16:18:43.000000000 +0300
-@@ -425,6 +425,220 @@
+--- linux-2.4.21-chaos.orig/fs/ext3/super.c 2004-01-12 19:20:07.000000000 +0300
++++ linux-2.4.21-chaos/fs/ext3/super.c 2004-01-13 17:25:49.000000000 +0300
+@@ -425,6 +425,221 @@
}
}
+
+ clear_opt(sbi->s_mount_opt, ASYNCDEL);
+ wake_up(&sbi->s_delete_thread_queue);
-+ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
++ wait_event(sbi->s_delete_waiter_queue,
++ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0);
+}
+
+/* Instead of playing games with the inode flags, destruction, etc we just
void ext3_put_super (struct super_block * sb)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -432,6 +646,7 @@
+@@ -432,6 +647,7 @@
kdev_t j_dev = sbi->s_journal->j_dev;
int i;
-+ ext3_stop_delete_thread(sbi);
++ J_ASSERT(sbi->s_delete_inodes == 0);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
if (!(sb->s_flags & MS_RDONLY)) {
-@@ -501,7 +716,11 @@
+@@ -501,7 +717,11 @@
write_inode: ext3_write_inode, /* BKL not held. Don't need */
dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
put_inode: ext3_put_inode, /* BKL not held. Don't need */
put_super: ext3_put_super, /* BKL held */
write_super: ext3_write_super, /* BKL held */
sync_fs: ext3_sync_fs,
-@@ -579,6 +798,13 @@
+@@ -579,6 +799,13 @@
*mount_flags &= ~MS_POSIXACL;
else
#endif
if (!strcmp (this_char, "bsddf"))
clear_opt (*mount_options, MINIX_DF);
else if (!strcmp (this_char, "nouid32")) {
-@@ -1283,6 +1509,7 @@
+@@ -1283,6 +1510,7 @@
}
ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1746,6 +1973,9 @@
+@@ -1676,7 +1904,12 @@
+ static int ext3_sync_fs(struct super_block *sb)
+ {
+ tid_t target;
+-
++
++ if (atomic_read(&sb->s_active) == 0) {
++ /* fs is being umounted: time to stop delete thread */
++ ext3_stop_delete_thread(EXT3_SB(sb));
++ }
++
+ sb->s_dirt = 0;
+ target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+ log_wait_commit(EXT3_SB(sb)->s_journal, target);
+@@ -1746,6 +1979,9 @@
if (!parse_options(data, &tmp, sbi, &mount_flags, &tmp, 1))
return -EINVAL;
Index: linux-2.4.21-chaos/fs/ext3/inode.c
===================================================================
---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2003-12-12 16:18:40.000000000 +0300
-+++ linux-2.4.21-chaos/fs/ext3/inode.c 2003-12-12 16:18:43.000000000 +0300
+--- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2004-01-12 19:20:06.000000000 +0300
++++ linux-2.4.21-chaos/fs/ext3/inode.c 2004-01-12 19:20:07.000000000 +0300
@@ -2179,6 +2179,118 @@
return; /* AKPM: return what? */
}
* inode's underlying buffer_head on success.
Index: linux-2.4.21-chaos/fs/ext3/file.c
===================================================================
---- linux-2.4.21-chaos.orig/fs/ext3/file.c 2003-12-12 16:18:27.000000000 +0300
-+++ linux-2.4.21-chaos/fs/ext3/file.c 2003-12-12 16:18:43.000000000 +0300
+--- linux-2.4.21-chaos.orig/fs/ext3/file.c 2004-01-12 19:20:06.000000000 +0300
++++ linux-2.4.21-chaos/fs/ext3/file.c 2004-01-12 19:20:07.000000000 +0300
@@ -132,7 +132,11 @@
};
getxattr: ext3_getxattr, /* BKL held */
Index: linux-2.4.21-chaos/include/linux/ext3_fs.h
===================================================================
---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2003-12-12 16:18:40.000000000 +0300
-+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2003-12-12 16:18:43.000000000 +0300
+--- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2004-01-12 19:20:06.000000000 +0300
++++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2004-01-12 19:20:07.000000000 +0300
@@ -195,6 +195,7 @@
*/
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
Index: linux-2.4.21-chaos/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.4.21-chaos.orig/include/linux/ext3_fs_sb.h 2003-12-12 16:18:41.000000000 +0300
-+++ linux-2.4.21-chaos/include/linux/ext3_fs_sb.h 2003-12-12 16:18:43.000000000 +0300
+--- linux-2.4.21-chaos.orig/include/linux/ext3_fs_sb.h 2004-01-12 19:20:07.000000000 +0300
++++ linux-2.4.21-chaos/include/linux/ext3_fs_sb.h 2004-01-12 20:53:51.000000000 +0300
@@ -29,6 +29,8 @@
#define EXT3_MAX_GROUP_LOADED 32
Index: linux-2.4.21-suse/fs/ext3/super.c
===================================================================
---- linux-2.4.21-suse.orig/fs/ext3/super.c 2003-10-30 02:03:04.000000000 +0300
-+++ linux-2.4.21-suse/fs/ext3/super.c 2003-10-30 02:05:38.000000000 +0300
-@@ -400,6 +400,220 @@
+--- linux-2.4.21-suse.orig/fs/ext3/super.c 2004-01-12 19:49:25.000000000 +0300
++++ linux-2.4.21-suse/fs/ext3/super.c 2004-01-13 17:39:59.000000000 +0300
+@@ -400,6 +400,221 @@
}
}
+
+ clear_opt(sbi->s_mount_opt, ASYNCDEL);
+ wake_up(&sbi->s_delete_thread_queue);
-+ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
++ wait_event(sbi->s_delete_waiter_queue,
++ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0);
+}
+
+/* Instead of playing games with the inode flags, destruction, etc we just
void ext3_put_super (struct super_block * sb)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -407,6 +621,7 @@
+@@ -407,6 +622,7 @@
kdev_t j_dev = sbi->s_journal->j_dev;
int i;
-+ ext3_stop_delete_thread(sbi);
++ J_ASSERT(sbi->s_delete_inodes == 0);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
if (!(sb->s_flags & MS_RDONLY)) {
-@@ -455,7 +670,11 @@
+@@ -455,7 +671,11 @@
write_inode: ext3_write_inode, /* BKL not held. Don't need */
dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
put_inode: ext3_put_inode, /* BKL not held. Don't need */
put_super: ext3_put_super, /* BKL held */
write_super: ext3_write_super, /* BKL held */
sync_fs: ext3_sync_fs,
-@@ -524,6 +743,13 @@
+@@ -524,6 +744,13 @@
clear_opt (*mount_options, XATTR_USER);
else
#endif
if (!strcmp (this_char, "bsddf"))
clear_opt (*mount_options, MINIX_DF);
else if (!strcmp (this_char, "nouid32")) {
-@@ -1223,6 +1449,7 @@
+@@ -1223,6 +1450,7 @@
}
ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1678,6 +1905,9 @@
+@@ -1614,7 +1842,12 @@
+ static int ext3_sync_fs(struct super_block *sb)
+ {
+ tid_t target;
+-
++
++ if (atomic_read(&sb->s_active) == 0) {
++ /* fs is being umounted: time to stop delete thread */
++ ext3_stop_delete_thread(EXT3_SB(sb));
++ }
++
+ sb->s_dirt = 0;
+ target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+ log_wait_commit(EXT3_SB(sb)->s_journal, target);
+@@ -1678,6 +1911,9 @@
if (!parse_options(data, &tmp, sbi, &tmp, 1))
return -EINVAL;
Index: linux-2.4.21-suse/fs/ext3/inode.c
===================================================================
---- linux-2.4.21-suse.orig/fs/ext3/inode.c 2003-10-30 02:03:57.000000000 +0300
-+++ linux-2.4.21-suse/fs/ext3/inode.c 2003-10-30 02:05:38.000000000 +0300
+--- linux-2.4.21-suse.orig/fs/ext3/inode.c 2004-01-13 17:38:09.000000000 +0300
++++ linux-2.4.21-suse/fs/ext3/inode.c 2004-01-13 17:38:10.000000000 +0300
@@ -2552,6 +2552,118 @@
return err;
}
* iloc->bh. This _must_ be cleaned up later.
Index: linux-2.4.21-suse/fs/ext3/file.c
===================================================================
---- linux-2.4.21-suse.orig/fs/ext3/file.c 2003-10-30 01:40:33.000000000 +0300
-+++ linux-2.4.21-suse/fs/ext3/file.c 2003-10-30 02:05:38.000000000 +0300
+--- linux-2.4.21-suse.orig/fs/ext3/file.c 2004-01-12 19:49:25.000000000 +0300
++++ linux-2.4.21-suse/fs/ext3/file.c 2004-01-13 17:38:10.000000000 +0300
@@ -125,7 +125,11 @@
};
getxattr: ext3_getxattr, /* BKL held */
Index: linux-2.4.21-suse/include/linux/ext3_fs.h
===================================================================
---- linux-2.4.21-suse.orig/include/linux/ext3_fs.h 2003-10-30 02:03:57.000000000 +0300
-+++ linux-2.4.21-suse/include/linux/ext3_fs.h 2003-10-30 02:06:05.000000000 +0300
+--- linux-2.4.21-suse.orig/include/linux/ext3_fs.h 2004-01-13 17:38:09.000000000 +0300
++++ linux-2.4.21-suse/include/linux/ext3_fs.h 2004-01-13 17:38:10.000000000 +0300
@@ -193,6 +193,7 @@
*/
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
/* ioctl.c */
Index: linux-2.4.21-suse/include/linux/ext3_fs_sb.h
===================================================================
---- linux-2.4.21-suse.orig/include/linux/ext3_fs_sb.h 2003-10-30 02:03:04.000000000 +0300
-+++ linux-2.4.21-suse/include/linux/ext3_fs_sb.h 2003-10-30 02:05:38.000000000 +0300
+--- linux-2.4.21-suse.orig/include/linux/ext3_fs_sb.h 2004-01-12 19:49:25.000000000 +0300
++++ linux-2.4.21-suse/include/linux/ext3_fs_sb.h 2004-01-13 17:38:10.000000000 +0300
@@ -29,6 +29,8 @@
#define EXT3_MAX_GROUP_LOADED 8
include/linux/ext3_fs.h | 2
7 files changed, 304 insertions(+), 1 deletion(-)
---- linux-2.6.0-test6/Documentation/filesystems/ext2.txt~iopen-2.6.0-test6 2002-11-11 06:28:06.000000000 +0300
-+++ linux-2.6.0-test6-alexey/Documentation/filesystems/ext2.txt 2003-10-14 17:03:48.000000000 +0400
-@@ -35,6 +35,22 @@ resgid=n The group ID which may use th
+Index: linux-2.6.0/Documentation/filesystems/ext2.txt
+===================================================================
+--- linux-2.6.0.orig/Documentation/filesystems/ext2.txt 2002-11-11 06:28:06.000000000 +0300
++++ linux-2.6.0/Documentation/filesystems/ext2.txt 2004-01-07 17:12:07.000000000 +0300
+@@ -35,6 +35,22 @@
sb=n Use alternate superblock at this location.
grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
---- linux-2.6.0-test6/fs/ext3/inode.c~iopen-2.6.0-test6 2003-10-14 17:03:47.000000000 +0400
-+++ linux-2.6.0-test6-alexey/fs/ext3/inode.c 2003-10-14 17:03:48.000000000 +0400
+Index: linux-2.6.0/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.0.orig/fs/ext3/inode.c 2004-01-04 15:27:40.000000000 +0300
++++ linux-2.6.0/fs/ext3/inode.c 2004-01-07 17:12:07.000000000 +0300
@@ -37,6 +37,7 @@
#include <linux/mpage.h>
#include <linux/uio.h>
#include "acl.h"
/*
-@@ -2479,6 +2480,8 @@ void ext3_read_inode(struct inode * inod
+@@ -2472,6 +2473,8 @@
ei->i_acl = EXT3_ACL_NOT_CACHED;
ei->i_default_acl = EXT3_ACL_NOT_CACHED;
#endif
if (ext3_get_inode_loc(inode, &iloc, 0))
goto bad_inode;
bh = iloc.bh;
---- /dev/null 2003-01-30 13:24:37.000000000 +0300
-+++ linux-2.6.0-test6-alexey/fs/ext3/iopen.c 2003-10-14 17:03:48.000000000 +0400
-@@ -0,0 +1,239 @@
+Index: linux-2.6.0/fs/ext3/iopen.c
+===================================================================
+--- linux-2.6.0.orig/fs/ext3/iopen.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.0/fs/ext3/iopen.c 2004-01-07 17:14:22.000000000 +0300
+@@ -0,0 +1,223 @@
+
+
+/*
+ break;
+ }
+ }
++ spin_unlock(&dcache_lock);
+
-+ if (!goal) {
-+ spin_unlock(&dcache_lock);
++ if (!goal)
+ return NULL;
-+ }
+
-+ /* Move the goal to the de hash queue */
+ goal->d_flags &= ~DCACHE_DISCONNECTED;
-+ hlist_add_before(&goal->d_hash, &de->d_hash);
-+ hlist_del(&goal->d_hash);
-+
-+ list_del(&goal->d_child);
-+ list_del(&de->d_child);
++ d_rehash(de);
++ d_move(goal, de);
+
-+ /* Switch the parents and the names.. */
-+ switch_names(goal, de);
-+ do_switch(goal->d_parent, de->d_parent);
-+ do_switch(goal->d_name.len, de->d_name.len);
-+ do_switch(goal->d_name.hash, de->d_name.hash);
-+
-+ /* And add them back to the (new) parent lists */
-+ list_add(&goal->d_child, &goal->d_parent->d_subdirs);
-+ list_add(&de->d_child, &de->d_parent->d_subdirs);
-+
-+ spin_unlock(&dcache_lock);
+ return goal;
+}
+
+
+ return 1;
+}
---- /dev/null 2003-01-30 13:24:37.000000000 +0300
-+++ linux-2.6.0-test6-alexey/fs/ext3/iopen.h 2003-10-14 17:03:48.000000000 +0400
+Index: linux-2.6.0/fs/ext3/iopen.h
+===================================================================
+--- linux-2.6.0.orig/fs/ext3/iopen.h 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.0/fs/ext3/iopen.h 2004-01-07 17:12:07.000000000 +0300
@@ -0,0 +1,15 @@
+/*
+ * iopen.h
+extern int ext3_iopen_get_inode(struct inode * inode);
+
+
---- linux-2.6.0-test6/fs/ext3/namei.c~iopen-2.6.0-test6 2003-10-14 17:03:47.000000000 +0400
-+++ linux-2.6.0-test6-alexey/fs/ext3/namei.c 2003-10-14 17:03:48.000000000 +0400
+Index: linux-2.6.0/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.0.orig/fs/ext3/namei.c 2004-01-04 15:27:40.000000000 +0300
++++ linux-2.6.0/fs/ext3/namei.c 2004-01-07 17:12:45.000000000 +0300
@@ -37,6 +37,7 @@
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
#include "acl.h"
/*
-@@ -970,15 +971,21 @@ errout:
+@@ -970,15 +971,21 @@
}
#endif
bh = ext3_find_entry(dentry, &de);
inode = NULL;
if (bh) {
-@@ -991,6 +998,12 @@ static struct dentry *ext3_lookup(struct
+@@ -989,8 +996,14 @@
+ if (!inode)
+ return ERR_PTR(-EACCES);
}
- if (inode)
- return d_splice_alias(inode, dentry);
-+
+ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
+ iput(inode);
+ return alternate;
+ }
+
+ if (inode)
+ return d_splice_alias(inode, dentry);
++
d_add(dentry, inode);
return NULL;
}
---- linux-2.6.0-test6/fs/ext3/super.c~iopen-2.6.0-test6 2003-10-14 17:03:47.000000000 +0400
-+++ linux-2.6.0-test6-alexey/fs/ext3/super.c 2003-10-14 17:03:48.000000000 +0400
-@@ -534,7 +534,7 @@ enum {
+Index: linux-2.6.0/fs/ext3/super.c
+===================================================================
+--- linux-2.6.0.orig/fs/ext3/super.c 2004-01-04 15:27:40.000000000 +0300
++++ linux-2.6.0/fs/ext3/super.c 2004-01-07 17:12:07.000000000 +0300
+@@ -535,7 +535,7 @@
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
Opt_commit, Opt_journal_update, Opt_journal_inum,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
};
static match_table_t tokens = {
-@@ -573,6 +573,9 @@ static match_table_t tokens = {
+@@ -574,6 +574,9 @@
{Opt_ignore, "noquota"},
{Opt_ignore, "quota"},
{Opt_ignore, "usrquota"},
{Opt_err, NULL}
};
-@@ -760,6 +763,18 @@ static int parse_options (char * options
+@@ -761,6 +764,18 @@
case Opt_abort:
set_opt(sbi->s_mount_opt, ABORT);
break;
case Opt_ignore:
break;
default:
---- linux-2.6.0-test6/include/linux/ext3_fs.h~iopen-2.6.0-test6 2003-10-14 17:03:47.000000000 +0400
-+++ linux-2.6.0-test6-alexey/include/linux/ext3_fs.h 2003-10-14 17:03:48.000000000 +0400
-@@ -325,6 +325,8 @@ struct ext3_inode {
+Index: linux-2.6.0/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.0.orig/fs/ext3/Makefile 2003-09-19 18:00:24.000000000 +0400
++++ linux-2.6.0/fs/ext3/Makefile 2004-01-07 17:12:07.000000000 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+- ioctl.o namei.o super.o symlink.o hash.o
++ ioctl.o namei.o super.o symlink.o hash.o iopen.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.0/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.0.orig/include/linux/ext3_fs.h 2004-01-04 15:27:40.000000000 +0300
++++ linux-2.6.0/include/linux/ext3_fs.h 2004-01-07 17:12:07.000000000 +0300
+@@ -325,6 +325,8 @@
#define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
-
-_
Index: linux-2.4.19-pre1/Documentation/Configure.help
===================================================================
---- linux-2.4.19-pre1.orig/Documentation/Configure.help 2003-11-20 19:01:44.000000000 +0300
-+++ linux-2.4.19-pre1/Documentation/Configure.help 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/Documentation/Configure.help 2004-01-14 01:11:29.000000000 +0300
++++ linux-2.4.19-pre1/Documentation/Configure.help 2004-01-14 01:11:49.000000000 +0300
@@ -14035,6 +14035,39 @@
be compiled as a module, and so this could be dangerous. Most
everyone wants to say Y here.
Index: linux-2.4.19-pre1/arch/alpha/defconfig
===================================================================
--- linux-2.4.19-pre1.orig/arch/alpha/defconfig 2001-11-20 02:19:42.000000000 +0300
-+++ linux-2.4.19-pre1/arch/alpha/defconfig 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/alpha/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.19-pre1/arch/alpha/kernel/entry.S
===================================================================
--- linux-2.4.19-pre1.orig/arch/alpha/kernel/entry.S 2001-11-10 00:45:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/alpha/kernel/entry.S 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/alpha/kernel/entry.S 2004-01-14 01:11:49.000000000 +0300
@@ -1148,3 +1148,16 @@
.quad sys_gettid
.quad sys_readahead
Index: linux-2.4.19-pre1/arch/arm/defconfig
===================================================================
--- linux-2.4.19-pre1.orig/arch/arm/defconfig 2001-05-20 04:43:05.000000000 +0400
-+++ linux-2.4.19-pre1/arch/arm/defconfig 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/arm/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.19-pre1/arch/arm/kernel/calls.S
===================================================================
--- linux-2.4.19-pre1.orig/arch/arm/kernel/calls.S 2001-10-08 21:39:18.000000000 +0400
-+++ linux-2.4.19-pre1/arch/arm/kernel/calls.S 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/arm/kernel/calls.S 2004-01-14 01:11:49.000000000 +0300
@@ -236,6 +236,22 @@
.long SYMBOL_NAME(sys_mincore)
/* 220 */ .long SYMBOL_NAME(sys_madvise)
.rept NR_syscalls - (__syscall_end - __syscall_start) / 4
Index: linux-2.4.19-pre1/arch/i386/defconfig
===================================================================
---- linux-2.4.19-pre1.orig/arch/i386/defconfig 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/i386/defconfig 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/i386/defconfig 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
CONFIG_X86=y
CONFIG_ISA=y
# CONFIG_SBUS is not set
+Index: linux-2.4.19-pre1/arch/i386/kernel/entry.S
+===================================================================
+--- linux-2.4.19-pre1.orig/arch/i386/kernel/entry.S 2004-01-14 01:11:46.000000000 +0300
++++ linux-2.4.19-pre1/arch/i386/kernel/entry.S 2004-01-14 01:11:49.000000000 +0300
+@@ -619,18 +619,18 @@
+ .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */
+ .long SYMBOL_NAME(sys_gettid)
+ .long SYMBOL_NAME(sys_readahead) /* 225 */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for setxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for lsetxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for fsetxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for getxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* 230 reserved for lgetxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for fgetxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for listxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for llistxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for flistxattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* 235 reserved for removexattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for lremovexattr */
+- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for fremovexattr */
++ .long SYMBOL_NAME(sys_setxattr)
++ .long SYMBOL_NAME(sys_lsetxattr)
++ .long SYMBOL_NAME(sys_fsetxattr)
++ .long SYMBOL_NAME(sys_getxattr)
++ .long SYMBOL_NAME(sys_lgetxattr) /* 230 */
++ .long SYMBOL_NAME(sys_fgetxattr)
++ .long SYMBOL_NAME(sys_listxattr)
++ .long SYMBOL_NAME(sys_llistxattr)
++ .long SYMBOL_NAME(sys_flistxattr)
++ .long SYMBOL_NAME(sys_removexattr) /* 235 */
++ .long SYMBOL_NAME(sys_lremovexattr)
++ .long SYMBOL_NAME(sys_fremovexattr)
+
+ .rept NR_syscalls-(.-sys_call_table)/4
+ .long SYMBOL_NAME(sys_ni_syscall)
Index: linux-2.4.19-pre1/arch/ia64/defconfig
===================================================================
--- linux-2.4.19-pre1.orig/arch/ia64/defconfig 2001-11-10 01:26:17.000000000 +0300
-+++ linux-2.4.19-pre1/arch/ia64/defconfig 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/ia64/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.19-pre1/arch/ia64/kernel/entry.S
===================================================================
--- linux-2.4.19-pre1.orig/arch/ia64/kernel/entry.S 2001-11-10 01:26:17.000000000 +0300
-+++ linux-2.4.19-pre1/arch/ia64/kernel/entry.S 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/ia64/kernel/entry.S 2004-01-14 01:11:49.000000000 +0300
@@ -1130,18 +1130,18 @@
data8 sys_getdents64
data8 sys_getunwind // 1215
Index: linux-2.4.19-pre1/arch/m68k/defconfig
===================================================================
--- linux-2.4.19-pre1.orig/arch/m68k/defconfig 2000-06-19 23:56:08.000000000 +0400
-+++ linux-2.4.19-pre1/arch/m68k/defconfig 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/m68k/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.19-pre1/arch/mips/defconfig
===================================================================
--- linux-2.4.19-pre1.orig/arch/mips/defconfig 2001-09-09 21:43:02.000000000 +0400
-+++ linux-2.4.19-pre1/arch/mips/defconfig 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/mips/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.19-pre1/arch/mips64/defconfig
===================================================================
--- linux-2.4.19-pre1.orig/arch/mips64/defconfig 2001-09-09 21:43:02.000000000 +0400
-+++ linux-2.4.19-pre1/arch/mips64/defconfig 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/mips64/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
# Code maturity level options
Index: linux-2.4.19-pre1/arch/ppc/defconfig
===================================================================
---- linux-2.4.19-pre1.orig/arch/ppc/defconfig 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/ppc/defconfig 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/ppc/defconfig 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/ppc/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,20 @@
#
# Automatically generated make config: don't edit
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
Index: linux-2.4.19-pre1/arch/s390/defconfig
===================================================================
---- linux-2.4.19-pre1.orig/arch/s390/defconfig 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/s390/defconfig 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/s390/defconfig 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/s390/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
# CONFIG_MCA is not set
Index: linux-2.4.19-pre1/arch/s390/kernel/entry.S
===================================================================
---- linux-2.4.19-pre1.orig/arch/s390/kernel/entry.S 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/s390/kernel/entry.S 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/s390/kernel/entry.S 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/s390/kernel/entry.S 2004-01-14 01:11:49.000000000 +0300
@@ -599,8 +599,19 @@
.long sys_fcntl64
.long sys_ni_syscall /* 222 - reserved for posix_acl */
Index: linux-2.4.19-pre1/arch/s390x/defconfig
===================================================================
---- linux-2.4.19-pre1.orig/arch/s390x/defconfig 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/s390x/defconfig 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/s390x/defconfig 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/s390x/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
# CONFIG_MCA is not set
Index: linux-2.4.19-pre1/arch/s390x/kernel/entry.S
===================================================================
---- linux-2.4.19-pre1.orig/arch/s390x/kernel/entry.S 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/s390x/kernel/entry.S 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/s390x/kernel/entry.S 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/s390x/kernel/entry.S 2004-01-14 01:11:49.000000000 +0300
@@ -632,8 +632,19 @@
.long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper)
.long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 222 - reserved for posix_acl */
Index: linux-2.4.19-pre1/arch/s390x/kernel/wrapper32.S
===================================================================
---- linux-2.4.19-pre1.orig/arch/s390x/kernel/wrapper32.S 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/s390x/kernel/wrapper32.S 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/s390x/kernel/wrapper32.S 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/s390x/kernel/wrapper32.S 2004-01-14 01:11:49.000000000 +0300
@@ -1091,3 +1091,95 @@
llgtr %r3,%r3 # struct stat64 *
llgfr %r4,%r4 # long
+
Index: linux-2.4.19-pre1/arch/sparc/defconfig
===================================================================
---- linux-2.4.19-pre1.orig/arch/sparc/defconfig 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/sparc/defconfig 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/sparc/defconfig 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/sparc/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.19-pre1/arch/sparc/kernel/systbls.S
===================================================================
--- linux-2.4.19-pre1.orig/arch/sparc/kernel/systbls.S 2001-10-21 21:36:54.000000000 +0400
-+++ linux-2.4.19-pre1/arch/sparc/kernel/systbls.S 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/sparc/kernel/systbls.S 2004-01-14 01:11:49.000000000 +0300
@@ -51,11 +51,11 @@
/*150*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
/*155*/ .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount
/*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir
Index: linux-2.4.19-pre1/arch/sparc64/defconfig
===================================================================
---- linux-2.4.19-pre1.orig/arch/sparc64/defconfig 2003-11-20 19:01:35.000000000 +0300
-+++ linux-2.4.19-pre1/arch/sparc64/defconfig 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/arch/sparc64/defconfig 2004-01-14 01:10:36.000000000 +0300
++++ linux-2.4.19-pre1/arch/sparc64/defconfig 2004-01-14 01:11:49.000000000 +0300
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
Index: linux-2.4.19-pre1/arch/sparc64/kernel/systbls.S
===================================================================
--- linux-2.4.19-pre1.orig/arch/sparc64/kernel/systbls.S 2001-10-21 21:36:54.000000000 +0400
-+++ linux-2.4.19-pre1/arch/sparc64/kernel/systbls.S 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/arch/sparc64/kernel/systbls.S 2004-01-14 01:11:49.000000000 +0300
@@ -52,11 +52,11 @@
/*150*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
.word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount
/*200*/ .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
Index: linux-2.4.19-pre1/fs/Config.in
===================================================================
---- linux-2.4.19-pre1.orig/fs/Config.in 2003-11-20 19:01:36.000000000 +0300
-+++ linux-2.4.19-pre1/fs/Config.in 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/Config.in 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/Config.in 2004-01-14 01:11:49.000000000 +0300
@@ -22,6 +22,11 @@
dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
source fs/partitions/Config.in
Index: linux-2.4.19-pre1/fs/Makefile
===================================================================
---- linux-2.4.19-pre1.orig/fs/Makefile 2003-11-21 03:51:01.000000000 +0300
-+++ linux-2.4.19-pre1/fs/Makefile 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/Makefile 2004-01-14 01:11:49.000000000 +0300
++++ linux-2.4.19-pre1/fs/Makefile 2004-01-14 01:11:49.000000000 +0300
@@ -14,7 +14,7 @@
super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
Index: linux-2.4.19-pre1/fs/ext2/Makefile
===================================================================
--- linux-2.4.19-pre1.orig/fs/ext2/Makefile 2001-10-11 19:05:18.000000000 +0400
-+++ linux-2.4.19-pre1/fs/ext2/Makefile 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/Makefile 2004-01-14 01:11:49.000000000 +0300
@@ -13,4 +13,8 @@
ioctl.o namei.o super.o symlink.o
obj-m := $(O_TARGET)
Index: linux-2.4.19-pre1/fs/ext2/file.c
===================================================================
--- linux-2.4.19-pre1.orig/fs/ext2/file.c 2001-10-11 19:05:18.000000000 +0400
-+++ linux-2.4.19-pre1/fs/ext2/file.c 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/file.c 2004-01-14 01:11:49.000000000 +0300
@@ -20,6 +20,7 @@
#include <linux/fs.h>
};
Index: linux-2.4.19-pre1/fs/ext2/ialloc.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext2/ialloc.c 2003-11-20 19:01:36.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext2/ialloc.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext2/ialloc.c 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/ialloc.c 2004-01-14 01:11:49.000000000 +0300
@@ -15,6 +15,7 @@
#include <linux/config.h>
#include <linux/fs.h>
}
Index: linux-2.4.19-pre1/fs/ext2/inode.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext2/inode.c 2003-11-20 19:01:36.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext2/inode.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext2/inode.c 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/inode.c 2004-01-14 01:11:49.000000000 +0300
@@ -39,6 +39,18 @@
static int ext2_update_inode(struct inode * inode, int do_sync);
Index: linux-2.4.19-pre1/fs/ext2/namei.c
===================================================================
--- linux-2.4.19-pre1.orig/fs/ext2/namei.c 2001-10-04 09:57:36.000000000 +0400
-+++ linux-2.4.19-pre1/fs/ext2/namei.c 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/namei.c 2004-01-14 01:11:49.000000000 +0300
@@ -31,6 +31,7 @@
#include <linux/fs.h>
};
Index: linux-2.4.19-pre1/fs/ext2/super.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext2/super.c 2003-11-20 19:01:36.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext2/super.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext2/super.c 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/super.c 2004-01-14 01:11:49.000000000 +0300
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/fs.h>
Index: linux-2.4.19-pre1/fs/ext2/symlink.c
===================================================================
--- linux-2.4.19-pre1.orig/fs/ext2/symlink.c 2000-09-28 00:41:33.000000000 +0400
-+++ linux-2.4.19-pre1/fs/ext2/symlink.c 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/symlink.c 2004-01-14 01:11:49.000000000 +0300
@@ -19,6 +19,7 @@
#include <linux/fs.h>
};
Index: linux-2.4.19-pre1/fs/ext2/xattr.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext2/xattr.c 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext2/xattr.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext2/xattr.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/xattr.c 2004-01-14 01:11:49.000000000 +0300
@@ -0,0 +1,1212 @@
+/*
+ * linux/fs/ext2/xattr.c
+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */
Index: linux-2.4.19-pre1/fs/ext2/xattr_user.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext2/xattr_user.c 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext2/xattr_user.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext2/xattr_user.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext2/xattr_user.c 2004-01-14 01:11:49.000000000 +0300
@@ -0,0 +1,103 @@
+/*
+ * linux/fs/ext2/xattr_user.c
+}
Index: linux-2.4.19-pre1/fs/ext3/Makefile
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/Makefile 2003-11-21 03:51:02.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/Makefile 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext3/Makefile 2004-01-14 01:11:49.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/Makefile 2004-01-14 01:11:49.000000000 +0300
@@ -1,5 +1,5 @@
#
-# Makefile for the linux ext2-filesystem routines.
include $(TOPDIR)/Rules.make
Index: linux-2.4.19-pre1/fs/ext3/file.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/file.c 2003-11-21 03:51:02.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/file.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext3/file.c 2004-01-14 01:11:49.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/file.c 2004-01-14 01:11:49.000000000 +0300
@@ -23,6 +23,7 @@
#include <linux/locks.h>
#include <linux/jbd.h>
Index: linux-2.4.19-pre1/fs/ext3/ialloc.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/ialloc.c 2003-11-20 19:01:36.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/ialloc.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext3/ialloc.c 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/ialloc.c 2004-01-14 01:11:49.000000000 +0300
@@ -17,6 +17,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
Index: linux-2.4.19-pre1/fs/ext3/inode.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/inode.c 2003-11-20 19:01:36.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/inode.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext3/inode.c 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/inode.c 2004-01-14 01:11:49.000000000 +0300
@@ -39,6 +39,18 @@
*/
#undef SEARCH_FROM_ZERO
/* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
Index: linux-2.4.19-pre1/fs/ext3/namei.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/namei.c 2003-11-21 03:51:02.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/namei.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext3/namei.c 2004-01-14 01:11:49.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/namei.c 2004-01-14 01:11:49.000000000 +0300
@@ -29,6 +29,7 @@
#include <linux/sched.h>
#include <linux/ext3_fs.h>
+
Index: linux-2.4.19-pre1/fs/ext3/super.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/super.c 2003-11-21 03:51:02.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/super.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext3/super.c 2004-01-14 01:11:49.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/super.c 2004-01-14 01:11:49.000000000 +0300
@@ -24,6 +24,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
Index: linux-2.4.19-pre1/fs/ext3/symlink.c
===================================================================
--- linux-2.4.19-pre1.orig/fs/ext3/symlink.c 2001-11-10 01:25:04.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/symlink.c 2003-11-21 03:51:05.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/symlink.c 2004-01-14 01:11:49.000000000 +0300
@@ -20,6 +20,7 @@
#include <linux/fs.h>
#include <linux/jbd.h>
};
Index: linux-2.4.19-pre1/fs/ext3/xattr.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/xattr.c 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/xattr.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext3/xattr.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/xattr.c 2004-01-14 01:11:49.000000000 +0300
@@ -0,0 +1,1225 @@
+/*
+ * linux/fs/ext3/xattr.c
+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */
Index: linux-2.4.19-pre1/fs/ext3/xattr_user.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/xattr_user.c 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/xattr_user.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/ext3/xattr_user.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/xattr_user.c 2004-01-14 01:11:49.000000000 +0300
@@ -0,0 +1,111 @@
+/*
+ * linux/fs/ext3/xattr_user.c
+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
+ &ext3_xattr_user_handler);
+}
+Index: linux-2.4.19-pre1/fs/ext3/ext3-exports.c
+===================================================================
+--- linux-2.4.19-pre1.orig/fs/ext3/ext3-exports.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/ext3/ext3-exports.c 2004-01-14 01:11:49.000000000 +0300
+@@ -0,0 +1,13 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
++
++EXPORT_SYMBOL(ext3_force_commit);
++EXPORT_SYMBOL(ext3_bread);
++EXPORT_SYMBOL(ext3_xattr_register);
++EXPORT_SYMBOL(ext3_xattr_unregister);
++EXPORT_SYMBOL(ext3_xattr_get);
++EXPORT_SYMBOL(ext3_xattr_list);
++EXPORT_SYMBOL(ext3_xattr_set);
Index: linux-2.4.19-pre1/fs/mbcache.c
===================================================================
---- linux-2.4.19-pre1.orig/fs/mbcache.c 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/fs/mbcache.c 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/fs/mbcache.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/mbcache.c 2004-01-14 01:11:49.000000000 +0300
@@ -0,0 +1,648 @@
+/*
+ * linux/fs/mbcache.c
+module_init(init_mbcache)
+module_exit(exit_mbcache)
+
-Index: linux-2.4.19-pre1/include/asm-arm/unistd.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/asm-arm/unistd.h 2001-08-12 22:14:00.000000000 +0400
-+++ linux-2.4.19-pre1/include/asm-arm/unistd.h 2003-11-21 03:51:05.000000000 +0300
-@@ -240,6 +240,18 @@
- #define __NR_mincore (__NR_SYSCALL_BASE+219)
- #define __NR_madvise (__NR_SYSCALL_BASE+220)
- #define __NR_fcntl64 (__NR_SYSCALL_BASE+221)
-+#define __NR_setxattr (__NR_SYSCALL_BASE+226)
-+#define __NR_lsetxattr (__NR_SYSCALL_BASE+227)
-+#define __NR_fsetxattr (__NR_SYSCALL_BASE+228)
-+#define __NR_getxattr (__NR_SYSCALL_BASE+229)
-+#define __NR_lgetxattr (__NR_SYSCALL_BASE+230)
-+#define __NR_fgetxattr (__NR_SYSCALL_BASE+231)
-+#define __NR_listxattr (__NR_SYSCALL_BASE+232)
-+#define __NR_llistxattr (__NR_SYSCALL_BASE+233)
-+#define __NR_flistxattr (__NR_SYSCALL_BASE+234)
-+#define __NR_removexattr (__NR_SYSCALL_BASE+235)
-+#define __NR_lremovexattr (__NR_SYSCALL_BASE+236)
-+#define __NR_fremovexattr (__NR_SYSCALL_BASE+237)
-
- /*
- * The following SWIs are ARM private.
-Index: linux-2.4.19-pre1/include/asm-ia64/unistd.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/asm-ia64/unistd.h 2001-11-10 01:26:17.000000000 +0300
-+++ linux-2.4.19-pre1/include/asm-ia64/unistd.h 2003-11-21 03:51:05.000000000 +0300
-@@ -206,6 +206,18 @@
- #define __NR_getdents64 1214
- #define __NR_getunwind 1215
- #define __NR_readahead 1216
-+#define __NR_setxattr 1217
-+#define __NR_lsetxattr 1218
-+#define __NR_fsetxattr 1219
-+#define __NR_getxattr 1220
-+#define __NR_lgetxattr 1221
-+#define __NR_fgetxattr 1222
-+#define __NR_listxattr 1223
-+#define __NR_llistxattr 1224
-+#define __NR_flistxattr 1225
-+#define __NR_removexattr 1226
-+#define __NR_lremovexattr 1227
-+#define __NR_fremovexattr 1228
-
- #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
-
-Index: linux-2.4.19-pre1/include/asm-s390/unistd.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/asm-s390/unistd.h 2001-10-11 20:43:38.000000000 +0400
-+++ linux-2.4.19-pre1/include/asm-s390/unistd.h 2003-11-21 03:51:05.000000000 +0300
-@@ -211,6 +211,18 @@
- #define __NR_mincore 218
- #define __NR_madvise 219
- #define __NR_getdents64 220
-+#define __NR_setxattr 224
-+#define __NR_lsetxattr 225
-+#define __NR_fsetxattr 226
-+#define __NR_getxattr 227
-+#define __NR_lgetxattr 228
-+#define __NR_fgetxattr 229
-+#define __NR_listxattr 230
-+#define __NR_llistxattr 231
-+#define __NR_flistxattr 232
-+#define __NR_removexattr 233
-+#define __NR_lremovexattr 234
-+#define __NR_fremovexattr 235
-
-
- /* user-visible error numbers are in the range -1 - -122: see <asm-s390/errno.h> */
-Index: linux-2.4.19-pre1/include/asm-s390x/unistd.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/asm-s390x/unistd.h 2001-10-11 20:43:38.000000000 +0400
-+++ linux-2.4.19-pre1/include/asm-s390x/unistd.h 2003-11-21 03:51:05.000000000 +0300
-@@ -181,6 +181,18 @@
- #define __NR_mincore 218
- #define __NR_madvise 219
- #define __NR_getdents64 220
-+#define __NR_setxattr 224
-+#define __NR_lsetxattr 225
-+#define __NR_fsetxattr 226
-+#define __NR_getxattr 227
-+#define __NR_lgetxattr 228
-+#define __NR_fgetxattr 229
-+#define __NR_listxattr 230
-+#define __NR_llistxattr 231
-+#define __NR_flistxattr 232
-+#define __NR_removexattr 233
-+#define __NR_lremovexattr 234
-+#define __NR_fremovexattr 235
-
-
- /* user-visible error numbers are in the range -1 - -122: see <asm-s390/errno.h> */
-Index: linux-2.4.19-pre1/include/asm-sparc/unistd.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/asm-sparc/unistd.h 2001-10-21 21:36:54.000000000 +0400
-+++ linux-2.4.19-pre1/include/asm-sparc/unistd.h 2003-11-21 03:51:05.000000000 +0300
-@@ -184,24 +184,24 @@
- /* #define __NR_exportfs 166 SunOS Specific */
- #define __NR_mount 167 /* Common */
- #define __NR_ustat 168 /* Common */
--/* #define __NR_semsys 169 SunOS Specific */
--/* #define __NR_msgsys 170 SunOS Specific */
--/* #define __NR_shmsys 171 SunOS Specific */
--/* #define __NR_auditsys 172 SunOS Specific */
--/* #define __NR_rfssys 173 SunOS Specific */
-+#define __NR_setxattr 169 /* SunOS: semsys */
-+#define __NR_lsetxattr 170 /* SunOS: msgsys */
-+#define __NR_fsetxattr 171 /* SunOS: shmsys */
-+#define __NR_getxattr 172 /* SunOS: auditsys */
-+#define __NR_lgetxattr 173 /* SunOS: rfssys */
- #define __NR_getdents 174 /* Common */
- #define __NR_setsid 175 /* Common */
- #define __NR_fchdir 176 /* Common */
--/* #define __NR_fchroot 177 SunOS Specific */
--/* #define __NR_vpixsys 178 SunOS Specific */
--/* #define __NR_aioread 179 SunOS Specific */
--/* #define __NR_aiowrite 180 SunOS Specific */
--/* #define __NR_aiowait 181 SunOS Specific */
--/* #define __NR_aiocancel 182 SunOS Specific */
-+#define __NR_fgetxattr 177 /* SunOS: fchroot */
-+#define __NR_listxattr 178 /* SunOS: vpixsys */
-+#define __NR_llistxattr 179 /* SunOS: aioread */
-+#define __NR_flistxattr 180 /* SunOS: aiowrite */
-+#define __NR_removexattr 181 /* SunOS: aiowait */
-+#define __NR_lremovexattr 182 /* SunOS: aiocancel */
- #define __NR_sigpending 183 /* Common */
- #define __NR_query_module 184 /* Linux Specific */
- #define __NR_setpgid 185 /* Common */
--/* #define __NR_pathconf 186 SunOS Specific */
-+#define __NR_fremovexattr 186 /* SunOS: pathconf */
- /* #define __NR_fpathconf 187 SunOS Specific */
- /* #define __NR_sysconf 188 SunOS Specific */
- #define __NR_uname 189 /* Linux Specific */
-Index: linux-2.4.19-pre1/include/asm-sparc64/unistd.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/asm-sparc64/unistd.h 2001-10-21 21:36:54.000000000 +0400
-+++ linux-2.4.19-pre1/include/asm-sparc64/unistd.h 2003-11-21 03:51:05.000000000 +0300
-@@ -184,24 +184,24 @@
- /* #define __NR_exportfs 166 SunOS Specific */
- #define __NR_mount 167 /* Common */
- #define __NR_ustat 168 /* Common */
--/* #define __NR_semsys 169 SunOS Specific */
--/* #define __NR_msgsys 170 SunOS Specific */
--/* #define __NR_shmsys 171 SunOS Specific */
--/* #define __NR_auditsys 172 SunOS Specific */
--/* #define __NR_rfssys 173 SunOS Specific */
-+#define __NR_setxattr 169 /* SunOS: semsys */
-+#define __NR_lsetxattr 170 /* SunOS: msgsys */
-+#define __NR_fsetxattr 171 /* SunOS: shmsys */
-+#define __NR_getxattr 172 /* SunOS: auditsys */
-+#define __NR_lgetxattr 173 /* SunOS: rfssys */
- #define __NR_getdents 174 /* Common */
- #define __NR_setsid 175 /* Common */
- #define __NR_fchdir 176 /* Common */
--/* #define __NR_fchroot 177 SunOS Specific */
--/* #define __NR_vpixsys 178 SunOS Specific */
--/* #define __NR_aioread 179 SunOS Specific */
--/* #define __NR_aiowrite 180 SunOS Specific */
--/* #define __NR_aiowait 181 SunOS Specific */
--/* #define __NR_aiocancel 182 SunOS Specific */
-+#define __NR_fgetxattr 177 /* SunOS: fchroot */
-+#define __NR_listxattr 178 /* SunOS: vpixsys */
-+#define __NR_llistxattr 179 /* SunOS: aioread */
-+#define __NR_flistxattr 180 /* SunOS: aiowrite */
-+#define __NR_removexattr 181 /* SunOS: aiowait */
-+#define __NR_lremovexattr 182 /* SunOS: aiocancel */
- #define __NR_sigpending 183 /* Common */
- #define __NR_query_module 184 /* Linux Specific */
- #define __NR_setpgid 185 /* Common */
--/* #define __NR_pathconf 186 SunOS Specific */
-+#define __NR_fremovexattr 186 /* SunOS: pathconf */
- /* #define __NR_fpathconf 187 SunOS Specific */
- /* #define __NR_sysconf 188 SunOS Specific */
- #define __NR_uname 189 /* Linux Specific */
-Index: linux-2.4.19-pre1/include/linux/cache_def.h
+Index: linux-2.4.19-pre1/fs/xattr.c
===================================================================
---- linux-2.4.19-pre1.orig/include/linux/cache_def.h 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/cache_def.h 2003-11-21 03:51:05.000000000 +0300
-@@ -0,0 +1,15 @@
+--- linux-2.4.19-pre1.orig/fs/xattr.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/fs/xattr.c 2004-01-14 01:11:49.000000000 +0300
+@@ -0,0 +1,355 @@
+/*
-+ * linux/cache_def.h
-+ * Handling of caches defined in drivers, filesystems, ...
-+ *
-+ * Copyright (C) 2002 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+ */
++ File: fs/xattr.c
+
-+struct cache_definition {
-+ const char *name;
-+ void (*shrink)(int, unsigned int);
-+ struct list_head link;
-+};
++ Extended attribute handling.
+
-+extern void register_cache(struct cache_definition *);
-+extern void unregister_cache(struct cache_definition *);
-Index: linux-2.4.19-pre1/include/linux/errno.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/linux/errno.h 2001-02-10 01:46:13.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/errno.h 2003-11-21 03:51:05.000000000 +0300
-@@ -23,4 +23,8 @@
-
- #endif
-
-+/* Defined for extended attributes */
-+#define ENOATTR ENODATA /* No such attribute */
-+#define ENOTSUP EOPNOTSUPP /* Operation not supported */
++ Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
++ Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
++ */
++#include <linux/fs.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++#include <linux/smp_lock.h>
++#include <linux/file.h>
++#include <linux/xattr.h>
++#include <asm/uaccess.h>
+
- #endif
-Index: linux-2.4.19-pre1/include/linux/ext2_fs.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/linux/ext2_fs.h 2001-11-22 22:46:52.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/ext2_fs.h 2003-11-21 03:51:05.000000000 +0300
-@@ -57,8 +57,6 @@
- */
- #define EXT2_BAD_INO 1 /* Bad blocks inode */
- #define EXT2_ROOT_INO 2 /* Root inode */
--#define EXT2_ACL_IDX_INO 3 /* ACL inode */
--#define EXT2_ACL_DATA_INO 4 /* ACL inode */
- #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */
- #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */
-
-@@ -86,7 +84,6 @@
- #else
- # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size)
- #endif
--#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry))
- #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
- #ifdef __KERNEL__
- # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
-@@ -121,28 +118,6 @@
- #endif
-
- /*
-- * ACL structures
-- */
--struct ext2_acl_header /* Header of Access Control Lists */
--{
-- __u32 aclh_size;
-- __u32 aclh_file_count;
-- __u32 aclh_acle_count;
-- __u32 aclh_first_acle;
--};
--
--struct ext2_acl_entry /* Access Control List Entry */
--{
-- __u32 acle_size;
-- __u16 acle_perms; /* Access permissions */
-- __u16 acle_type; /* Type of entry */
-- __u16 acle_tag; /* User or group identity */
-- __u16 acle_pad1;
-- __u32 acle_next; /* Pointer on next entry for the */
-- /* same inode or on next free entry */
--};
--
--/*
- * Structure of a blocks group descriptor
- */
- struct ext2_group_desc
-@@ -314,6 +289,7 @@
- #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */
- #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */
- #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */
-+#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
-
- #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
- #define set_opt(o, opt) o |= EXT2_MOUNT_##opt
-@@ -397,6 +373,7 @@
-
- #ifdef __KERNEL__
- #define EXT2_SB(sb) (&((sb)->u.ext2_sb))
-+#define EXT2_I(inode) (&((inode)->u.ext2_i))
- #else
- /* Assume that user mode programs are passing in an ext2fs superblock, not
- * a kernel struct super_block. This will allow us to call the feature-test
-@@ -466,7 +443,7 @@
- #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008
- #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff
-
--#define EXT2_FEATURE_COMPAT_SUPP 0
-+#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
- #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE
- #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
- EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
-@@ -623,8 +600,10 @@
-
- /* namei.c */
- extern struct inode_operations ext2_dir_inode_operations;
-+extern struct inode_operations ext2_special_inode_operations;
-
- /* symlink.c */
-+extern struct inode_operations ext2_symlink_inode_operations;
- extern struct inode_operations ext2_fast_symlink_inode_operations;
-
- #endif /* __KERNEL__ */
-Index: linux-2.4.19-pre1/include/linux/ext2_xattr.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/linux/ext2_xattr.h 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/ext2_xattr.h 2003-11-21 03:51:05.000000000 +0300
-@@ -0,0 +1,157 @@
+/*
-+ File: linux/ext2_xattr.h
++ * Extended attribute memory allocation wrappers, originally
++ * based on the Intermezzo PRESTO_ALLOC/PRESTO_FREE macros.
++ * The vmalloc use here is very uncommon - extended attributes
++ * are supposed to be small chunks of metadata, and it is quite
++ * unusual to have very many extended attributes, so lists tend
++ * to be quite short as well. The 64K upper limit is derived
++ * from the extended attribute size limit used by XFS.
++ * Intentionally allow zero @size for value/list size requests.
++ */
++static void *
++xattr_alloc(size_t size, size_t limit)
++{
++ void *ptr;
+
-+ On-disk format of extended attributes for the ext2 filesystem.
++ if (size > limit)
++ return ERR_PTR(-E2BIG);
+
-+ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+*/
++ if (!size) /* size request, no buffer is needed */
++ return NULL;
++ else if (size <= PAGE_SIZE)
++ ptr = kmalloc((unsigned long) size, GFP_KERNEL);
++ else
++ ptr = vmalloc((unsigned long) size);
++ if (!ptr)
++ return ERR_PTR(-ENOMEM);
++ return ptr;
++}
+
-+#include <linux/config.h>
-+#include <linux/init.h>
-+#include <linux/xattr.h>
++static void
++xattr_free(void *ptr, size_t size)
++{
++ if (!size) /* size request, no buffer was needed */
++ return;
++ else if (size <= PAGE_SIZE)
++ kfree(ptr);
++ else
++ vfree(ptr);
++}
+
-+/* Magic value in attribute blocks */
-+#define EXT2_XATTR_MAGIC 0xEA020000
++/*
++ * Extended attribute SET operations
++ */
++static long
++setxattr(struct dentry *d, char *name, void *value, size_t size, int flags)
++{
++ int error;
++ void *kvalue;
++ char kname[XATTR_NAME_MAX + 1];
+
-+/* Maximum number of references to one attribute block */
-+#define EXT2_XATTR_REFCOUNT_MAX 1024
++ if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
++ return -EINVAL;
+
-+/* Name indexes */
-+#define EXT2_XATTR_INDEX_MAX 10
-+#define EXT2_XATTR_INDEX_USER 1
-+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2
-+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3
++ error = strncpy_from_user(kname, name, sizeof(kname));
++ if (error == 0 || error == sizeof(kname))
++ error = -ERANGE;
++ if (error < 0)
++ return error;
+
-+struct ext2_xattr_header {
-+ __u32 h_magic; /* magic number for identification */
-+ __u32 h_refcount; /* reference count */
-+ __u32 h_blocks; /* number of disk blocks used */
-+ __u32 h_hash; /* hash value of all attributes */
-+ __u32 h_reserved[4]; /* zero right now */
-+};
++ kvalue = xattr_alloc(size, XATTR_SIZE_MAX);
++ if (IS_ERR(kvalue))
++ return PTR_ERR(kvalue);
+
-+struct ext2_xattr_entry {
-+ __u8 e_name_len; /* length of name */
-+ __u8 e_name_index; /* attribute name index */
-+ __u16 e_value_offs; /* offset in disk block of value */
-+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */
-+ __u32 e_value_size; /* size of attribute value */
-+ __u32 e_hash; /* hash value of name and value */
-+ char e_name[0]; /* attribute name */
-+};
++ if (size > 0 && copy_from_user(kvalue, value, size)) {
++ xattr_free(kvalue, size);
++ return -EFAULT;
++ }
+
-+#define EXT2_XATTR_PAD_BITS 2
-+#define EXT2_XATTR_PAD (1<<EXT2_XATTR_PAD_BITS)
-+#define EXT2_XATTR_ROUND (EXT2_XATTR_PAD-1)
-+#define EXT2_XATTR_LEN(name_len) \
-+ (((name_len) + EXT2_XATTR_ROUND + \
-+ sizeof(struct ext2_xattr_entry)) & ~EXT2_XATTR_ROUND)
-+#define EXT2_XATTR_NEXT(entry) \
-+ ( (struct ext2_xattr_entry *)( \
-+ (char *)(entry) + EXT2_XATTR_LEN((entry)->e_name_len)) )
-+#define EXT2_XATTR_SIZE(size) \
-+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
++ error = -EOPNOTSUPP;
++ if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
++ down(&d->d_inode->i_sem);
++ lock_kernel();
++ error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags);
++ unlock_kernel();
++ up(&d->d_inode->i_sem);
++ }
+
-+#ifdef __KERNEL__
++ xattr_free(kvalue, size);
++ return error;
++}
+
-+# ifdef CONFIG_EXT2_FS_XATTR
++asmlinkage long
++sys_setxattr(char *path, char *name, void *value, size_t size, int flags)
++{
++ struct nameidata nd;
++ int error;
++
++ error = user_path_walk(path, &nd);
++ if (error)
++ return error;
++ error = setxattr(nd.dentry, name, value, size, flags);
++ path_release(&nd);
++ return error;
++}
++
++asmlinkage long
++sys_lsetxattr(char *path, char *name, void *value, size_t size, int flags)
++{
++ struct nameidata nd;
++ int error;
++
++ error = user_path_walk_link(path, &nd);
++ if (error)
++ return error;
++ error = setxattr(nd.dentry, name, value, size, flags);
++ path_release(&nd);
++ return error;
++}
++
++asmlinkage long
++sys_fsetxattr(int fd, char *name, void *value, size_t size, int flags)
++{
++ struct file *f;
++ int error = -EBADF;
++
++ f = fget(fd);
++ if (!f)
++ return error;
++ error = setxattr(f->f_dentry, name, value, size, flags);
++ fput(f);
++ return error;
++}
++
++/*
++ * Extended attribute GET operations
++ */
++static ssize_t
++getxattr(struct dentry *d, char *name, void *value, size_t size)
++{
++ ssize_t error;
++ void *kvalue;
++ char kname[XATTR_NAME_MAX + 1];
++
++ error = strncpy_from_user(kname, name, sizeof(kname));
++ if (error == 0 || error == sizeof(kname))
++ error = -ERANGE;
++ if (error < 0)
++ return error;
++
++ kvalue = xattr_alloc(size, XATTR_SIZE_MAX);
++ if (IS_ERR(kvalue))
++ return PTR_ERR(kvalue);
++
++ error = -EOPNOTSUPP;
++ if (d->d_inode->i_op && d->d_inode->i_op->getxattr) {
++ down(&d->d_inode->i_sem);
++ lock_kernel();
++ error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
++ unlock_kernel();
++ up(&d->d_inode->i_sem);
++ }
++
++ if (kvalue && error > 0)
++ if (copy_to_user(value, kvalue, error))
++ error = -EFAULT;
++ xattr_free(kvalue, size);
++ return error;
++}
++
++asmlinkage ssize_t
++sys_getxattr(char *path, char *name, void *value, size_t size)
++{
++ struct nameidata nd;
++ ssize_t error;
++
++ error = user_path_walk(path, &nd);
++ if (error)
++ return error;
++ error = getxattr(nd.dentry, name, value, size);
++ path_release(&nd);
++ return error;
++}
++
++asmlinkage ssize_t
++sys_lgetxattr(char *path, char *name, void *value, size_t size)
++{
++ struct nameidata nd;
++ ssize_t error;
++
++ error = user_path_walk_link(path, &nd);
++ if (error)
++ return error;
++ error = getxattr(nd.dentry, name, value, size);
++ path_release(&nd);
++ return error;
++}
++
++asmlinkage ssize_t
++sys_fgetxattr(int fd, char *name, void *value, size_t size)
++{
++ struct file *f;
++ ssize_t error = -EBADF;
++
++ f = fget(fd);
++ if (!f)
++ return error;
++ error = getxattr(f->f_dentry, name, value, size);
++ fput(f);
++ return error;
++}
++
++/*
++ * Extended attribute LIST operations
++ */
++static ssize_t
++listxattr(struct dentry *d, char *list, size_t size)
++{
++ ssize_t error;
++ char *klist;
++
++ klist = (char *)xattr_alloc(size, XATTR_LIST_MAX);
++ if (IS_ERR(klist))
++ return PTR_ERR(klist);
++
++ error = -EOPNOTSUPP;
++ if (d->d_inode->i_op && d->d_inode->i_op->listxattr) {
++ down(&d->d_inode->i_sem);
++ lock_kernel();
++ error = d->d_inode->i_op->listxattr(d, klist, size);
++ unlock_kernel();
++ up(&d->d_inode->i_sem);
++ }
++
++ if (klist && error > 0)
++ if (copy_to_user(list, klist, error))
++ error = -EFAULT;
++ xattr_free(klist, size);
++ return error;
++}
++
++asmlinkage ssize_t
++sys_listxattr(char *path, char *list, size_t size)
++{
++ struct nameidata nd;
++ ssize_t error;
++
++ error = user_path_walk(path, &nd);
++ if (error)
++ return error;
++ error = listxattr(nd.dentry, list, size);
++ path_release(&nd);
++ return error;
++}
++
++asmlinkage ssize_t
++sys_llistxattr(char *path, char *list, size_t size)
++{
++ struct nameidata nd;
++ ssize_t error;
++
++ error = user_path_walk_link(path, &nd);
++ if (error)
++ return error;
++ error = listxattr(nd.dentry, list, size);
++ path_release(&nd);
++ return error;
++}
++
++asmlinkage ssize_t
++sys_flistxattr(int fd, char *list, size_t size)
++{
++ struct file *f;
++ ssize_t error = -EBADF;
++
++ f = fget(fd);
++ if (!f)
++ return error;
++ error = listxattr(f->f_dentry, list, size);
++ fput(f);
++ return error;
++}
++
++/*
++ * Extended attribute REMOVE operations
++ */
++static long
++removexattr(struct dentry *d, char *name)
++{
++ int error;
++ char kname[XATTR_NAME_MAX + 1];
++
++ error = strncpy_from_user(kname, name, sizeof(kname));
++ if (error == 0 || error == sizeof(kname))
++ error = -ERANGE;
++ if (error < 0)
++ return error;
++
++ error = -EOPNOTSUPP;
++ if (d->d_inode->i_op && d->d_inode->i_op->removexattr) {
++ down(&d->d_inode->i_sem);
++ lock_kernel();
++ error = d->d_inode->i_op->removexattr(d, kname);
++ unlock_kernel();
++ up(&d->d_inode->i_sem);
++ }
++ return error;
++}
++
++asmlinkage long
++sys_removexattr(char *path, char *name)
++{
++ struct nameidata nd;
++ int error;
++
++ error = user_path_walk(path, &nd);
++ if (error)
++ return error;
++ error = removexattr(nd.dentry, name);
++ path_release(&nd);
++ return error;
++}
++
++asmlinkage long
++sys_lremovexattr(char *path, char *name)
++{
++ struct nameidata nd;
++ int error;
++
++ error = user_path_walk_link(path, &nd);
++ if (error)
++ return error;
++ error = removexattr(nd.dentry, name);
++ path_release(&nd);
++ return error;
++}
++
++asmlinkage long
++sys_fremovexattr(int fd, char *name)
++{
++ struct file *f;
++ int error = -EBADF;
++
++ f = fget(fd);
++ if (!f)
++ return error;
++ error = removexattr(f->f_dentry, name);
++ fput(f);
++ return error;
++}
+Index: linux-2.4.19-pre1/include/asm-arm/unistd.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-arm/unistd.h 2001-08-12 22:14:00.000000000 +0400
++++ linux-2.4.19-pre1/include/asm-arm/unistd.h 2004-01-14 01:11:49.000000000 +0300
+@@ -240,6 +240,18 @@
+ #define __NR_mincore (__NR_SYSCALL_BASE+219)
+ #define __NR_madvise (__NR_SYSCALL_BASE+220)
+ #define __NR_fcntl64 (__NR_SYSCALL_BASE+221)
++#define __NR_setxattr (__NR_SYSCALL_BASE+226)
++#define __NR_lsetxattr (__NR_SYSCALL_BASE+227)
++#define __NR_fsetxattr (__NR_SYSCALL_BASE+228)
++#define __NR_getxattr (__NR_SYSCALL_BASE+229)
++#define __NR_lgetxattr (__NR_SYSCALL_BASE+230)
++#define __NR_fgetxattr (__NR_SYSCALL_BASE+231)
++#define __NR_listxattr (__NR_SYSCALL_BASE+232)
++#define __NR_llistxattr (__NR_SYSCALL_BASE+233)
++#define __NR_flistxattr (__NR_SYSCALL_BASE+234)
++#define __NR_removexattr (__NR_SYSCALL_BASE+235)
++#define __NR_lremovexattr (__NR_SYSCALL_BASE+236)
++#define __NR_fremovexattr (__NR_SYSCALL_BASE+237)
+
+ /*
+ * The following SWIs are ARM private.
+Index: linux-2.4.19-pre1/include/asm-ia64/unistd.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-ia64/unistd.h 2001-11-10 01:26:17.000000000 +0300
++++ linux-2.4.19-pre1/include/asm-ia64/unistd.h 2004-01-14 01:11:49.000000000 +0300
+@@ -206,6 +206,18 @@
+ #define __NR_getdents64 1214
+ #define __NR_getunwind 1215
+ #define __NR_readahead 1216
++#define __NR_setxattr 1217
++#define __NR_lsetxattr 1218
++#define __NR_fsetxattr 1219
++#define __NR_getxattr 1220
++#define __NR_lgetxattr 1221
++#define __NR_fgetxattr 1222
++#define __NR_listxattr 1223
++#define __NR_llistxattr 1224
++#define __NR_flistxattr 1225
++#define __NR_removexattr 1226
++#define __NR_lremovexattr 1227
++#define __NR_fremovexattr 1228
+
+ #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
+
+Index: linux-2.4.19-pre1/include/asm-s390/unistd.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-s390/unistd.h 2001-10-11 20:43:38.000000000 +0400
++++ linux-2.4.19-pre1/include/asm-s390/unistd.h 2004-01-14 01:11:49.000000000 +0300
+@@ -211,6 +211,18 @@
+ #define __NR_mincore 218
+ #define __NR_madvise 219
+ #define __NR_getdents64 220
++#define __NR_setxattr 224
++#define __NR_lsetxattr 225
++#define __NR_fsetxattr 226
++#define __NR_getxattr 227
++#define __NR_lgetxattr 228
++#define __NR_fgetxattr 229
++#define __NR_listxattr 230
++#define __NR_llistxattr 231
++#define __NR_flistxattr 232
++#define __NR_removexattr 233
++#define __NR_lremovexattr 234
++#define __NR_fremovexattr 235
+
+
+ /* user-visible error numbers are in the range -1 - -122: see <asm-s390/errno.h> */
+Index: linux-2.4.19-pre1/include/asm-s390x/unistd.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-s390x/unistd.h 2001-10-11 20:43:38.000000000 +0400
++++ linux-2.4.19-pre1/include/asm-s390x/unistd.h 2004-01-14 01:11:49.000000000 +0300
+@@ -181,6 +181,18 @@
+ #define __NR_mincore 218
+ #define __NR_madvise 219
+ #define __NR_getdents64 220
++#define __NR_setxattr 224
++#define __NR_lsetxattr 225
++#define __NR_fsetxattr 226
++#define __NR_getxattr 227
++#define __NR_lgetxattr 228
++#define __NR_fgetxattr 229
++#define __NR_listxattr 230
++#define __NR_llistxattr 231
++#define __NR_flistxattr 232
++#define __NR_removexattr 233
++#define __NR_lremovexattr 234
++#define __NR_fremovexattr 235
+
+
+ /* user-visible error numbers are in the range -1 - -122: see <asm-s390/errno.h> */
+Index: linux-2.4.19-pre1/include/asm-sparc/unistd.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-sparc/unistd.h 2001-10-21 21:36:54.000000000 +0400
++++ linux-2.4.19-pre1/include/asm-sparc/unistd.h 2004-01-14 01:11:49.000000000 +0300
+@@ -184,24 +184,24 @@
+ /* #define __NR_exportfs 166 SunOS Specific */
+ #define __NR_mount 167 /* Common */
+ #define __NR_ustat 168 /* Common */
+-/* #define __NR_semsys 169 SunOS Specific */
+-/* #define __NR_msgsys 170 SunOS Specific */
+-/* #define __NR_shmsys 171 SunOS Specific */
+-/* #define __NR_auditsys 172 SunOS Specific */
+-/* #define __NR_rfssys 173 SunOS Specific */
++#define __NR_setxattr 169 /* SunOS: semsys */
++#define __NR_lsetxattr 170 /* SunOS: msgsys */
++#define __NR_fsetxattr 171 /* SunOS: shmsys */
++#define __NR_getxattr 172 /* SunOS: auditsys */
++#define __NR_lgetxattr 173 /* SunOS: rfssys */
+ #define __NR_getdents 174 /* Common */
+ #define __NR_setsid 175 /* Common */
+ #define __NR_fchdir 176 /* Common */
+-/* #define __NR_fchroot 177 SunOS Specific */
+-/* #define __NR_vpixsys 178 SunOS Specific */
+-/* #define __NR_aioread 179 SunOS Specific */
+-/* #define __NR_aiowrite 180 SunOS Specific */
+-/* #define __NR_aiowait 181 SunOS Specific */
+-/* #define __NR_aiocancel 182 SunOS Specific */
++#define __NR_fgetxattr 177 /* SunOS: fchroot */
++#define __NR_listxattr 178 /* SunOS: vpixsys */
++#define __NR_llistxattr 179 /* SunOS: aioread */
++#define __NR_flistxattr 180 /* SunOS: aiowrite */
++#define __NR_removexattr 181 /* SunOS: aiowait */
++#define __NR_lremovexattr 182 /* SunOS: aiocancel */
+ #define __NR_sigpending 183 /* Common */
+ #define __NR_query_module 184 /* Linux Specific */
+ #define __NR_setpgid 185 /* Common */
+-/* #define __NR_pathconf 186 SunOS Specific */
++#define __NR_fremovexattr 186 /* SunOS: pathconf */
+ /* #define __NR_fpathconf 187 SunOS Specific */
+ /* #define __NR_sysconf 188 SunOS Specific */
+ #define __NR_uname 189 /* Linux Specific */
+Index: linux-2.4.19-pre1/include/asm-sparc64/unistd.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/asm-sparc64/unistd.h 2001-10-21 21:36:54.000000000 +0400
++++ linux-2.4.19-pre1/include/asm-sparc64/unistd.h 2004-01-14 01:11:49.000000000 +0300
+@@ -184,24 +184,24 @@
+ /* #define __NR_exportfs 166 SunOS Specific */
+ #define __NR_mount 167 /* Common */
+ #define __NR_ustat 168 /* Common */
+-/* #define __NR_semsys 169 SunOS Specific */
+-/* #define __NR_msgsys 170 SunOS Specific */
+-/* #define __NR_shmsys 171 SunOS Specific */
+-/* #define __NR_auditsys 172 SunOS Specific */
+-/* #define __NR_rfssys 173 SunOS Specific */
++#define __NR_setxattr 169 /* SunOS: semsys */
++#define __NR_lsetxattr 170 /* SunOS: msgsys */
++#define __NR_fsetxattr 171 /* SunOS: shmsys */
++#define __NR_getxattr 172 /* SunOS: auditsys */
++#define __NR_lgetxattr 173 /* SunOS: rfssys */
+ #define __NR_getdents 174 /* Common */
+ #define __NR_setsid 175 /* Common */
+ #define __NR_fchdir 176 /* Common */
+-/* #define __NR_fchroot 177 SunOS Specific */
+-/* #define __NR_vpixsys 178 SunOS Specific */
+-/* #define __NR_aioread 179 SunOS Specific */
+-/* #define __NR_aiowrite 180 SunOS Specific */
+-/* #define __NR_aiowait 181 SunOS Specific */
+-/* #define __NR_aiocancel 182 SunOS Specific */
++#define __NR_fgetxattr 177 /* SunOS: fchroot */
++#define __NR_listxattr 178 /* SunOS: vpixsys */
++#define __NR_llistxattr 179 /* SunOS: aioread */
++#define __NR_flistxattr 180 /* SunOS: aiowrite */
++#define __NR_removexattr 181 /* SunOS: aiowait */
++#define __NR_lremovexattr 182 /* SunOS: aiocancel */
+ #define __NR_sigpending 183 /* Common */
+ #define __NR_query_module 184 /* Linux Specific */
+ #define __NR_setpgid 185 /* Common */
+-/* #define __NR_pathconf 186 SunOS Specific */
++#define __NR_fremovexattr 186 /* SunOS: pathconf */
+ /* #define __NR_fpathconf 187 SunOS Specific */
+ /* #define __NR_sysconf 188 SunOS Specific */
+ #define __NR_uname 189 /* Linux Specific */
+Index: linux-2.4.19-pre1/include/linux/cache_def.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/linux/cache_def.h 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/cache_def.h 2004-01-14 01:11:49.000000000 +0300
+@@ -0,0 +1,15 @@
++/*
++ * linux/cache_def.h
++ * Handling of caches defined in drivers, filesystems, ...
++ *
++ * Copyright (C) 2002 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
++ */
+
-+struct ext2_xattr_handler {
-+ char *prefix;
-+ size_t (*list)(char *list, struct inode *inode, const char *name,
-+ int name_len);
-+ int (*get)(struct inode *inode, const char *name, void *buffer,
-+ size_t size);
-+ int (*set)(struct inode *inode, const char *name, const void *buffer,
-+ size_t size, int flags);
++struct cache_definition {
++ const char *name;
++ void (*shrink)(int, unsigned int);
++ struct list_head link;
+};
+
-+extern int ext2_xattr_register(int, struct ext2_xattr_handler *);
-+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *);
-+
-+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int);
-+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t);
-+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t);
-+extern int ext2_removexattr(struct dentry *, const char *);
-+
-+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
-+extern int ext2_xattr_list(struct inode *, char *, size_t);
-+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-+
-+extern void ext2_xattr_delete_inode(struct inode *);
-+extern void ext2_xattr_put_super(struct super_block *);
-+
-+extern int init_ext2_xattr(void) __init;
-+extern void exit_ext2_xattr(void);
-+
-+# else /* CONFIG_EXT2_FS_XATTR */
-+# define ext2_setxattr NULL
-+# define ext2_getxattr NULL
-+# define ext2_listxattr NULL
-+# define ext2_removexattr NULL
-+
-+static inline int
-+ext2_xattr_get(struct inode *inode, int name_index,
-+ const char *name, void *buffer, size_t size)
-+{
-+ return -ENOTSUP;
-+}
-+
-+static inline int
-+ext2_xattr_list(struct inode *inode, char *buffer, size_t size)
-+{
-+ return -ENOTSUP;
-+}
-+
-+static inline int
-+ext2_xattr_set(struct inode *inode, int name_index, const char *name,
-+ const void *value, size_t size, int flags)
-+{
-+ return -ENOTSUP;
-+}
-+
-+static inline void
-+ext2_xattr_delete_inode(struct inode *inode)
-+{
-+}
-+
-+static inline void
-+ext2_xattr_put_super(struct super_block *sb)
-+{
-+}
-+
-+static inline int
-+init_ext2_xattr(void)
-+{
-+ return 0;
-+}
-+
-+static inline void
-+exit_ext2_xattr(void)
-+{
-+}
-+
-+# endif /* CONFIG_EXT2_FS_XATTR */
-+
-+# ifdef CONFIG_EXT2_FS_XATTR_USER
-+
-+extern int init_ext2_xattr_user(void) __init;
-+extern void exit_ext2_xattr_user(void);
-+
-+# else /* CONFIG_EXT2_FS_XATTR_USER */
-+
-+static inline int
-+init_ext2_xattr_user(void)
-+{
-+ return 0;
-+}
-+
-+static inline void
-+exit_ext2_xattr_user(void)
-+{
-+}
-+
-+# endif /* CONFIG_EXT2_FS_XATTR_USER */
-+
-+#endif /* __KERNEL__ */
++extern void register_cache(struct cache_definition *);
++extern void unregister_cache(struct cache_definition *);
+Index: linux-2.4.19-pre1/include/linux/errno.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/linux/errno.h 2001-02-10 01:46:13.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/errno.h 2004-01-14 01:11:49.000000000 +0300
+@@ -23,4 +23,8 @@
+
+ #endif
+
++/* Defined for extended attributes */
++#define ENOATTR ENODATA /* No such attribute */
++#define ENOTSUP EOPNOTSUPP /* Operation not supported */
+
-Index: linux-2.4.19-pre1/include/linux/ext3_fs.h
+ #endif
+Index: linux-2.4.19-pre1/include/linux/ext2_fs.h
===================================================================
---- linux-2.4.19-pre1.orig/include/linux/ext3_fs.h 2003-11-21 03:51:02.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/ext3_fs.h 2003-11-21 03:51:05.000000000 +0300
-@@ -63,8 +63,6 @@
+--- linux-2.4.19-pre1.orig/include/linux/ext2_fs.h 2001-11-22 22:46:52.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/ext2_fs.h 2004-01-14 01:18:00.000000000 +0300
+@@ -57,8 +57,6 @@
*/
- #define EXT3_BAD_INO 1 /* Bad blocks inode */
- #define EXT3_ROOT_INO 2 /* Root inode */
--#define EXT3_ACL_IDX_INO 3 /* ACL inode */
--#define EXT3_ACL_DATA_INO 4 /* ACL inode */
- #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
- #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
- #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
-@@ -94,7 +92,6 @@
+ #define EXT2_BAD_INO 1 /* Bad blocks inode */
+ #define EXT2_ROOT_INO 2 /* Root inode */
+-#define EXT2_ACL_IDX_INO 3 /* ACL inode */
+-#define EXT2_ACL_DATA_INO 4 /* ACL inode */
+ #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */
+ #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */
+
+@@ -86,7 +84,6 @@
#else
- # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+ # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size)
#endif
--#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry))
- #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+-#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry))
+ #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
#ifdef __KERNEL__
- # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
-@@ -129,28 +126,6 @@
+ # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
+@@ -121,28 +118,6 @@
#endif
/*
- * ACL structures
- */
--struct ext3_acl_header /* Header of Access Control Lists */
+-struct ext2_acl_header /* Header of Access Control Lists */
-{
- __u32 aclh_size;
- __u32 aclh_file_count;
- __u32 aclh_first_acle;
-};
-
--struct ext3_acl_entry /* Access Control List Entry */
+-struct ext2_acl_entry /* Access Control List Entry */
-{
- __u32 acle_size;
- __u16 acle_perms; /* Access permissions */
-/*
* Structure of a blocks group descriptor
*/
- struct ext3_group_desc
-@@ -344,6 +319,7 @@
- #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */
- #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
- #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
-+#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
-
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
-@@ -520,7 +496,7 @@
- #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
- #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
+ struct ext2_group_desc
+@@ -314,6 +289,7 @@
+ #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */
+ #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */
+ #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */
++#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
--#define EXT3_FEATURE_COMPAT_SUPP 0
-+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
- #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
- EXT3_FEATURE_INCOMPAT_RECOVER)
- #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-@@ -703,6 +679,7 @@
- extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+ #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
+ #define set_opt(o, opt) o |= EXT2_MOUNT_##opt
+@@ -397,6 +373,7 @@
- /* inode.c */
-+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
- extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
- extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+ #ifdef __KERNEL__
+ #define EXT2_SB(sb) (&((sb)->u.ext2_sb))
++#define EXT2_I(inode) (&((inode)->u.ext2_i))
+ #else
+ /* Assume that user mode programs are passing in an ext2fs superblock, not
+ * a kernel struct super_block. This will allow us to call the feature-test
+@@ -466,7 +443,7 @@
+ #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008
+ #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff
-@@ -771,8 +748,10 @@
+-#define EXT2_FEATURE_COMPAT_SUPP 0
++#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
+ #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE
+ #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+ EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
+@@ -623,8 +600,10 @@
/* namei.c */
- extern struct inode_operations ext3_dir_inode_operations;
-+extern struct inode_operations ext3_special_inode_operations;
+ extern struct inode_operations ext2_dir_inode_operations;
++extern struct inode_operations ext2_special_inode_operations;
/* symlink.c */
-+extern struct inode_operations ext3_symlink_inode_operations;
- extern struct inode_operations ext3_fast_symlink_inode_operations;
-
-
-Index: linux-2.4.19-pre1/include/linux/ext3_jbd.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/linux/ext3_jbd.h 2003-11-21 03:51:02.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/ext3_jbd.h 2003-11-21 03:51:05.000000000 +0300
-@@ -30,13 +30,19 @@
-
- #define EXT3_SINGLEDATA_TRANS_BLOCKS 8
-
-+/* Extended attributes may touch two data buffers, two bitmap buffers,
-+ * and two group and summaries. */
-+
-+#define EXT3_XATTR_TRANS_BLOCKS 8
-+
- /* Define the minimum size for a transaction which modifies data. This
- * needs to take into account the fact that we may end up modifying two
- * quota files too (one for the group, one for the user quota). The
- * superblock only gets updated once, of course, so don't bother
- * counting that again for the quota updates. */
-
--#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2)
-+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \
-+ EXT3_XATTR_TRANS_BLOCKS - 2)
-
- extern int ext3_writepage_trans_blocks(struct inode *inode);
++extern struct inode_operations ext2_symlink_inode_operations;
+ extern struct inode_operations ext2_fast_symlink_inode_operations;
-Index: linux-2.4.19-pre1/include/linux/ext3_xattr.h
+ #endif /* __KERNEL__ */
+Index: linux-2.4.19-pre1/include/linux/ext2_xattr.h
===================================================================
---- linux-2.4.19-pre1.orig/include/linux/ext3_xattr.h 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/ext3_xattr.h 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/include/linux/ext2_xattr.h 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/ext2_xattr.h 2004-01-14 01:18:01.000000000 +0300
@@ -0,0 +1,157 @@
+/*
-+ File: linux/ext3_xattr.h
++ File: linux/ext2_xattr.h
+
-+ On-disk format of extended attributes for the ext3 filesystem.
++ On-disk format of extended attributes for the ext2 filesystem.
+
+ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+#include <linux/xattr.h>
+
+/* Magic value in attribute blocks */
-+#define EXT3_XATTR_MAGIC 0xEA020000
++#define EXT2_XATTR_MAGIC 0xEA020000
+
+/* Maximum number of references to one attribute block */
-+#define EXT3_XATTR_REFCOUNT_MAX 1024
++#define EXT2_XATTR_REFCOUNT_MAX 1024
+
+/* Name indexes */
-+#define EXT3_XATTR_INDEX_MAX 10
-+#define EXT3_XATTR_INDEX_USER 1
-+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2
-+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3
++#define EXT2_XATTR_INDEX_MAX 10
++#define EXT2_XATTR_INDEX_USER 1
++#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2
++#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+
-+struct ext3_xattr_header {
++struct ext2_xattr_header {
+ __u32 h_magic; /* magic number for identification */
+ __u32 h_refcount; /* reference count */
+ __u32 h_blocks; /* number of disk blocks used */
+ __u32 h_reserved[4]; /* zero right now */
+};
+
-+struct ext3_xattr_entry {
++struct ext2_xattr_entry {
+ __u8 e_name_len; /* length of name */
+ __u8 e_name_index; /* attribute name index */
+ __u16 e_value_offs; /* offset in disk block of value */
+ char e_name[0]; /* attribute name */
+};
+
-+#define EXT3_XATTR_PAD_BITS 2
-+#define EXT3_XATTR_PAD (1<<EXT3_XATTR_PAD_BITS)
-+#define EXT3_XATTR_ROUND (EXT3_XATTR_PAD-1)
-+#define EXT3_XATTR_LEN(name_len) \
-+ (((name_len) + EXT3_XATTR_ROUND + \
-+ sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
-+#define EXT3_XATTR_NEXT(entry) \
-+ ( (struct ext3_xattr_entry *)( \
-+ (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
-+#define EXT3_XATTR_SIZE(size) \
-+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
++#define EXT2_XATTR_PAD_BITS 2
++#define EXT2_XATTR_PAD (1<<EXT2_XATTR_PAD_BITS)
++#define EXT2_XATTR_ROUND (EXT2_XATTR_PAD-1)
++#define EXT2_XATTR_LEN(name_len) \
++ (((name_len) + EXT2_XATTR_ROUND + \
++ sizeof(struct ext2_xattr_entry)) & ~EXT2_XATTR_ROUND)
++#define EXT2_XATTR_NEXT(entry) \
++ ( (struct ext2_xattr_entry *)( \
++ (char *)(entry) + EXT2_XATTR_LEN((entry)->e_name_len)) )
++#define EXT2_XATTR_SIZE(size) \
++ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
+
+#ifdef __KERNEL__
+
-+# ifdef CONFIG_EXT3_FS_XATTR
++# ifdef CONFIG_EXT2_FS_XATTR
+
-+struct ext3_xattr_handler {
++struct ext2_xattr_handler {
+ char *prefix;
+ size_t (*list)(char *list, struct inode *inode, const char *name,
+ int name_len);
+ size_t size, int flags);
+};
+
-+extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
-+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
++extern int ext2_xattr_register(int, struct ext2_xattr_handler *);
++extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *);
+
-+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int);
-+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
-+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
-+extern int ext3_removexattr(struct dentry *, const char *);
++extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int);
++extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t);
++extern ssize_t ext2_listxattr(struct dentry *, char *, size_t);
++extern int ext2_removexattr(struct dentry *, const char *);
+
-+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
-+extern int ext3_xattr_list(struct inode *, char *, size_t);
-+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int);
++extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
++extern int ext2_xattr_list(struct inode *, char *, size_t);
++extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
+
-+extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
-+extern void ext3_xattr_put_super(struct super_block *);
++extern void ext2_xattr_delete_inode(struct inode *);
++extern void ext2_xattr_put_super(struct super_block *);
+
-+extern int init_ext3_xattr(void) __init;
-+extern void exit_ext3_xattr(void);
++extern int init_ext2_xattr(void) __init;
++extern void exit_ext2_xattr(void);
+
-+# else /* CONFIG_EXT3_FS_XATTR */
-+# define ext3_setxattr NULL
-+# define ext3_getxattr NULL
-+# define ext3_listxattr NULL
-+# define ext3_removexattr NULL
++# else /* CONFIG_EXT2_FS_XATTR */
++# define ext2_setxattr NULL
++# define ext2_getxattr NULL
++# define ext2_listxattr NULL
++# define ext2_removexattr NULL
+
+static inline int
-+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-+ void *buffer, size_t size)
++ext2_xattr_get(struct inode *inode, int name_index,
++ const char *name, void *buffer, size_t size)
+{
+ return -ENOTSUP;
+}
+
+static inline int
-+ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
++ext2_xattr_list(struct inode *inode, char *buffer, size_t size)
+{
+ return -ENOTSUP;
+}
+
+static inline int
-+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
-+ const char *name, const void *value, size_t size, int flags)
++ext2_xattr_set(struct inode *inode, int name_index, const char *name,
++ const void *value, size_t size, int flags)
+{
+ return -ENOTSUP;
+}
+
+static inline void
-+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
++ext2_xattr_delete_inode(struct inode *inode)
+{
+}
+
+static inline void
-+ext3_xattr_put_super(struct super_block *sb)
++ext2_xattr_put_super(struct super_block *sb)
+{
+}
+
+static inline int
-+init_ext3_xattr(void)
++init_ext2_xattr(void)
+{
+ return 0;
+}
+
+static inline void
-+exit_ext3_xattr(void)
++exit_ext2_xattr(void)
+{
+}
+
-+# endif /* CONFIG_EXT3_FS_XATTR */
++# endif /* CONFIG_EXT2_FS_XATTR */
+
-+# ifdef CONFIG_EXT3_FS_XATTR_USER
++# ifdef CONFIG_EXT2_FS_XATTR_USER
+
-+extern int init_ext3_xattr_user(void) __init;
-+extern void exit_ext3_xattr_user(void);
++extern int init_ext2_xattr_user(void) __init;
++extern void exit_ext2_xattr_user(void);
+
-+# else /* CONFIG_EXT3_FS_XATTR_USER */
++# else /* CONFIG_EXT2_FS_XATTR_USER */
+
+static inline int
-+init_ext3_xattr_user(void)
++init_ext2_xattr_user(void)
+{
+ return 0;
+}
+
+static inline void
-+exit_ext3_xattr_user(void)
++exit_ext2_xattr_user(void)
+{
+}
+
-+#endif /* CONFIG_EXT3_FS_XATTR_USER */
++# endif /* CONFIG_EXT2_FS_XATTR_USER */
+
+#endif /* __KERNEL__ */
+
-Index: linux-2.4.19-pre1/include/linux/fs.h
+Index: linux-2.4.19-pre1/include/linux/ext3_fs.h
===================================================================
---- linux-2.4.19-pre1.orig/include/linux/fs.h 2003-11-21 03:51:00.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/fs.h 2003-11-21 03:51:05.000000000 +0300
-@@ -872,6 +872,10 @@
- int (*setattr) (struct dentry *, struct iattr *);
- int (*setattr_raw) (struct inode *, struct iattr *);
- int (*getattr) (struct dentry *, struct iattr *);
-+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
-+ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-+ ssize_t (*listxattr) (struct dentry *, char *, size_t);
-+ int (*removexattr) (struct dentry *, const char *);
- };
+--- linux-2.4.19-pre1.orig/include/linux/ext3_fs.h 2004-01-14 01:11:49.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/ext3_fs.h 2004-01-14 01:11:49.000000000 +0300
+@@ -63,8 +63,6 @@
+ */
+ #define EXT3_BAD_INO 1 /* Bad blocks inode */
+ #define EXT3_ROOT_INO 2 /* Root inode */
+-#define EXT3_ACL_IDX_INO 3 /* ACL inode */
+-#define EXT3_ACL_DATA_INO 4 /* ACL inode */
+ #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
+ #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
+ #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
+@@ -94,7 +92,6 @@
+ #else
+ # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+ #endif
+-#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry))
+ #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+ #ifdef __KERNEL__
+ # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
+@@ -129,28 +126,6 @@
+ #endif
- struct seq_file;
-Index: linux-2.4.19-pre1/include/linux/mbcache.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/linux/mbcache.h 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/mbcache.h 2003-11-21 03:51:05.000000000 +0300
-@@ -0,0 +1,69 @@
-+/*
-+ File: linux/mbcache.h
-+
-+ (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+*/
-+
-+/* Hardwire the number of additional indexes */
-+#define MB_CACHE_INDEXES_COUNT 1
-+
-+struct mb_cache_entry;
-+
-+struct mb_cache_op {
-+ int (*free)(struct mb_cache_entry *, int);
-+};
-+
-+struct mb_cache {
-+ struct list_head c_cache_list;
-+ const char *c_name;
-+ struct mb_cache_op c_op;
-+ atomic_t c_entry_count;
-+ int c_bucket_count;
-+#ifndef MB_CACHE_INDEXES_COUNT
-+ int c_indexes_count;
-+#endif
-+ kmem_cache_t *c_entry_cache;
-+ struct list_head *c_block_hash;
-+ struct list_head *c_indexes_hash[0];
-+};
-+
-+struct mb_cache_entry_index {
-+ struct list_head o_list;
-+ unsigned int o_key;
-+};
-+
-+struct mb_cache_entry {
-+ struct list_head e_lru_list;
-+ struct mb_cache *e_cache;
-+ atomic_t e_used;
-+ kdev_t e_dev;
-+ unsigned long e_block;
-+ struct list_head e_block_list;
-+ struct mb_cache_entry_index e_indexes[0];
-+};
-+
-+/* Functions on caches */
-+
-+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t,
-+ int, int);
-+void mb_cache_shrink(struct mb_cache *, kdev_t);
-+void mb_cache_destroy(struct mb_cache *);
-+
-+/* Functions on cache entries */
-+
-+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *);
-+int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long,
-+ unsigned int[]);
-+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]);
-+void mb_cache_entry_release(struct mb_cache_entry *);
-+void mb_cache_entry_takeout(struct mb_cache_entry *);
-+void mb_cache_entry_free(struct mb_cache_entry *);
-+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *);
-+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t,
-+ unsigned long);
-+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
-+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int,
-+ kdev_t, unsigned int);
-+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
-+ kdev_t, unsigned int);
-+#endif
-Index: linux-2.4.19-pre1/kernel/ksyms.c
-===================================================================
---- linux-2.4.19-pre1.orig/kernel/ksyms.c 2003-11-21 03:50:59.000000000 +0300
-+++ linux-2.4.19-pre1/kernel/ksyms.c 2003-11-21 03:51:05.000000000 +0300
-@@ -11,6 +11,7 @@
+ /*
+- * ACL structures
+- */
+-struct ext3_acl_header /* Header of Access Control Lists */
+-{
+- __u32 aclh_size;
+- __u32 aclh_file_count;
+- __u32 aclh_acle_count;
+- __u32 aclh_first_acle;
+-};
+-
+-struct ext3_acl_entry /* Access Control List Entry */
+-{
+- __u32 acle_size;
+- __u16 acle_perms; /* Access permissions */
+- __u16 acle_type; /* Type of entry */
+- __u16 acle_tag; /* User or group identity */
+- __u16 acle_pad1;
+- __u32 acle_next; /* Pointer on next entry for the */
+- /* same inode or on next free entry */
+-};
+-
+-/*
+ * Structure of a blocks group descriptor
+ */
+ struct ext3_group_desc
+@@ -344,6 +319,7 @@
+ #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */
+ #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
++#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+@@ -520,7 +496,7 @@
+ #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
+ #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
- #include <linux/config.h>
- #include <linux/slab.h>
-+#include <linux/cache_def.h>
- #include <linux/module.h>
- #include <linux/blkdev.h>
- #include <linux/cdrom.h>
-@@ -88,6 +89,7 @@
- EXPORT_SYMBOL(exit_files);
- EXPORT_SYMBOL(exit_fs);
- EXPORT_SYMBOL(exit_sighand);
-+EXPORT_SYMBOL(copy_fs_struct);
+-#define EXT3_FEATURE_COMPAT_SUPP 0
++#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
+ #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
+ EXT3_FEATURE_INCOMPAT_RECOVER)
+ #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+@@ -703,6 +679,7 @@
+ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
- /* internal kernel memory management */
- EXPORT_SYMBOL(_alloc_pages);
-@@ -104,6 +106,8 @@
- EXPORT_SYMBOL(kmem_cache_shrink);
- EXPORT_SYMBOL(kmem_cache_alloc);
- EXPORT_SYMBOL(kmem_cache_free);
-+EXPORT_SYMBOL(register_cache);
-+EXPORT_SYMBOL(unregister_cache);
- EXPORT_SYMBOL(kmalloc);
- EXPORT_SYMBOL(kfree);
- EXPORT_SYMBOL(vfree);
-Index: linux-2.4.19-pre1/mm/vmscan.c
+ /* inode.c */
++extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+
+@@ -771,8 +748,10 @@
+
+ /* namei.c */
+ extern struct inode_operations ext3_dir_inode_operations;
++extern struct inode_operations ext3_special_inode_operations;
+
+ /* symlink.c */
++extern struct inode_operations ext3_symlink_inode_operations;
+ extern struct inode_operations ext3_fast_symlink_inode_operations;
+
+
+Index: linux-2.4.19-pre1/include/linux/ext3_jbd.h
===================================================================
---- linux-2.4.19-pre1.orig/mm/vmscan.c 2003-11-20 19:01:38.000000000 +0300
-+++ linux-2.4.19-pre1/mm/vmscan.c 2003-11-21 03:51:05.000000000 +0300
-@@ -15,6 +15,7 @@
- #include <linux/kernel_stat.h>
- #include <linux/swap.h>
- #include <linux/swapctl.h>
-+#include <linux/cache_def.h>
- #include <linux/smp_lock.h>
- #include <linux/pagemap.h>
- #include <linux/init.h>
-@@ -32,6 +33,39 @@
- */
- #define DEF_PRIORITY (6)
+--- linux-2.4.19-pre1.orig/include/linux/ext3_jbd.h 2004-01-14 01:11:49.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/ext3_jbd.h 2004-01-14 01:18:04.000000000 +0300
+@@ -30,13 +30,19 @@
-+static DECLARE_MUTEX(other_caches_sem);
-+static LIST_HEAD(cache_definitions);
-+
-+void register_cache(struct cache_definition *cache)
-+{
-+ down(&other_caches_sem);
-+ list_add(&cache->link, &cache_definitions);
-+ up(&other_caches_sem);
-+}
-+
-+void unregister_cache(struct cache_definition *cache)
-+{
-+ down(&other_caches_sem);
-+ list_del(&cache->link);
-+ up(&other_caches_sem);
-+}
-+
-+static void shrink_other_caches(unsigned int priority, int gfp_mask)
-+{
-+ struct list_head *p;
-+
-+ if (down_trylock(&other_caches_sem))
-+ return;
-+
-+ list_for_each_prev(p, &cache_definitions) {
-+ struct cache_definition *cache =
-+ list_entry(p, struct cache_definition, link);
+ #define EXT3_SINGLEDATA_TRANS_BLOCKS 8
+
++/* Extended attributes may touch two data buffers, two bitmap buffers,
++ * and two group and summaries. */
+
-+ cache->shrink(priority, gfp_mask);
-+ }
-+ up(&other_caches_sem);
-+}
++#define EXT3_XATTR_TRANS_BLOCKS 8
+
- /*
- * The swap-out function returns 1 if it successfully
- * scanned all the pages it was asked to (`count').
-@@ -578,6 +612,7 @@
+ /* Define the minimum size for a transaction which modifies data. This
+ * needs to take into account the fact that we may end up modifying two
+ * quota files too (one for the group, one for the user quota). The
+ * superblock only gets updated once, of course, so don't bother
+ * counting that again for the quota updates. */
- shrink_dcache_memory(priority, gfp_mask);
- shrink_icache_memory(priority, gfp_mask);
-+ shrink_other_caches(priority, gfp_mask);
- #ifdef CONFIG_QUOTA
- shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
- #endif
-Index: linux-2.4.19-pre1/fs/ext3/ext3-exports.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/ext3/ext3-exports.c 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/fs/ext3/ext3-exports.c 2003-11-21 03:51:05.000000000 +0300
-@@ -0,0 +1,13 @@
-+#include <linux/config.h>
-+#include <linux/module.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/ext3_xattr.h>
-+
-+EXPORT_SYMBOL(ext3_force_commit);
-+EXPORT_SYMBOL(ext3_bread);
-+EXPORT_SYMBOL(ext3_xattr_register);
-+EXPORT_SYMBOL(ext3_xattr_unregister);
-+EXPORT_SYMBOL(ext3_xattr_get);
-+EXPORT_SYMBOL(ext3_xattr_list);
-+EXPORT_SYMBOL(ext3_xattr_set);
-Index: linux-2.4.19-pre1/include/linux/xattr.h
+-#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2)
++#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \
++ EXT3_XATTR_TRANS_BLOCKS - 2)
+
+ extern int ext3_writepage_trans_blocks(struct inode *inode);
+
+Index: linux-2.4.19-pre1/include/linux/ext3_xattr.h
===================================================================
---- linux-2.4.19-pre1.orig/include/linux/xattr.h 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/xattr.h 2003-11-21 03:51:05.000000000 +0300
-@@ -0,0 +1,15 @@
+--- linux-2.4.19-pre1.orig/include/linux/ext3_xattr.h 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/ext3_xattr.h 2004-01-14 01:11:49.000000000 +0300
+@@ -0,0 +1,157 @@
+/*
-+ File: linux/xattr.h
++ File: linux/ext3_xattr.h
+
-+ Extended attributes handling.
++ On-disk format of extended attributes for the ext3 filesystem.
+
-+ Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
-+ Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
++ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
-+#ifndef _LINUX_XATTR_H
-+#define _LINUX_XATTR_H
+
-+#define XATTR_CREATE 0x1 /* set the value, fail if attr already exists */
-+#define XATTR_REPLACE 0x2 /* set the value, fail if attr does not exist */
++#include <linux/config.h>
++#include <linux/init.h>
++#include <linux/xattr.h>
+
-+#endif /* _LINUX_XATTR_H */
-Index: linux-2.4.19-pre1/arch/i386/kernel/entry.S
-===================================================================
---- linux-2.4.19-pre1.orig/arch/i386/kernel/entry.S 2003-11-21 03:38:55.000000000 +0300
-+++ linux-2.4.19-pre1/arch/i386/kernel/entry.S 2003-11-21 03:51:05.000000000 +0300
-@@ -622,18 +622,18 @@
- .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */
- .long SYMBOL_NAME(sys_gettid)
- .long SYMBOL_NAME(sys_readahead) /* 225 */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for setxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for lsetxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for fsetxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for getxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* 230 reserved for lgetxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for fgetxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for listxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for llistxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for flistxattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* 235 reserved for removexattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for lremovexattr */
-- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for fremovexattr */
-+ .long SYMBOL_NAME(sys_setxattr)
-+ .long SYMBOL_NAME(sys_lsetxattr)
-+ .long SYMBOL_NAME(sys_fsetxattr)
-+ .long SYMBOL_NAME(sys_getxattr)
-+ .long SYMBOL_NAME(sys_lgetxattr) /* 230 */
-+ .long SYMBOL_NAME(sys_fgetxattr)
-+ .long SYMBOL_NAME(sys_listxattr)
-+ .long SYMBOL_NAME(sys_llistxattr)
-+ .long SYMBOL_NAME(sys_flistxattr)
-+ .long SYMBOL_NAME(sys_removexattr) /* 235 */
-+ .long SYMBOL_NAME(sys_lremovexattr)
-+ .long SYMBOL_NAME(sys_fremovexattr)
-
- .rept NR_syscalls-(.-sys_call_table)/4
- .long SYMBOL_NAME(sys_ni_syscall)
-Index: linux-2.4.19-pre1/fs/xattr.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/xattr.c 2003-11-21 03:51:05.000000000 +0300
-+++ linux-2.4.19-pre1/fs/xattr.c 2003-11-21 03:51:05.000000000 +0300
-@@ -0,0 +1,355 @@
-+/*
-+ File: fs/xattr.c
++/* Magic value in attribute blocks */
++#define EXT3_XATTR_MAGIC 0xEA020000
+
-+ Extended attribute handling.
++/* Maximum number of references to one attribute block */
++#define EXT3_XATTR_REFCOUNT_MAX 1024
+
-+ Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
-+ Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
-+ */
-+#include <linux/fs.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/smp_lock.h>
-+#include <linux/file.h>
-+#include <linux/xattr.h>
-+#include <asm/uaccess.h>
++/* Name indexes */
++#define EXT3_XATTR_INDEX_MAX 10
++#define EXT3_XATTR_INDEX_USER 1
++#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2
++#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+
-+/*
-+ * Extended attribute memory allocation wrappers, originally
-+ * based on the Intermezzo PRESTO_ALLOC/PRESTO_FREE macros.
-+ * The vmalloc use here is very uncommon - extended attributes
-+ * are supposed to be small chunks of metadata, and it is quite
-+ * unusual to have very many extended attributes, so lists tend
-+ * to be quite short as well. The 64K upper limit is derived
-+ * from the extended attribute size limit used by XFS.
-+ * Intentionally allow zero @size for value/list size requests.
-+ */
-+static void *
-+xattr_alloc(size_t size, size_t limit)
-+{
-+ void *ptr;
++struct ext3_xattr_header {
++ __u32 h_magic; /* magic number for identification */
++ __u32 h_refcount; /* reference count */
++ __u32 h_blocks; /* number of disk blocks used */
++ __u32 h_hash; /* hash value of all attributes */
++ __u32 h_reserved[4]; /* zero right now */
++};
+
-+ if (size > limit)
-+ return ERR_PTR(-E2BIG);
++struct ext3_xattr_entry {
++ __u8 e_name_len; /* length of name */
++ __u8 e_name_index; /* attribute name index */
++ __u16 e_value_offs; /* offset in disk block of value */
++ __u32 e_value_block; /* disk block attribute is stored on (n/i) */
++ __u32 e_value_size; /* size of attribute value */
++ __u32 e_hash; /* hash value of name and value */
++ char e_name[0]; /* attribute name */
++};
+
-+ if (!size) /* size request, no buffer is needed */
-+ return NULL;
-+ else if (size <= PAGE_SIZE)
-+ ptr = kmalloc((unsigned long) size, GFP_KERNEL);
-+ else
-+ ptr = vmalloc((unsigned long) size);
-+ if (!ptr)
-+ return ERR_PTR(-ENOMEM);
-+ return ptr;
-+}
++#define EXT3_XATTR_PAD_BITS 2
++#define EXT3_XATTR_PAD (1<<EXT3_XATTR_PAD_BITS)
++#define EXT3_XATTR_ROUND (EXT3_XATTR_PAD-1)
++#define EXT3_XATTR_LEN(name_len) \
++ (((name_len) + EXT3_XATTR_ROUND + \
++ sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
++#define EXT3_XATTR_NEXT(entry) \
++ ( (struct ext3_xattr_entry *)( \
++ (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
++#define EXT3_XATTR_SIZE(size) \
++ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
+
-+static void
-+xattr_free(void *ptr, size_t size)
-+{
-+ if (!size) /* size request, no buffer was needed */
-+ return;
-+ else if (size <= PAGE_SIZE)
-+ kfree(ptr);
-+ else
-+ vfree(ptr);
-+}
++#ifdef __KERNEL__
+
-+/*
-+ * Extended attribute SET operations
-+ */
-+static long
-+setxattr(struct dentry *d, char *name, void *value, size_t size, int flags)
-+{
-+ int error;
-+ void *kvalue;
-+ char kname[XATTR_NAME_MAX + 1];
++# ifdef CONFIG_EXT3_FS_XATTR
+
-+ if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
-+ return -EINVAL;
++struct ext3_xattr_handler {
++ char *prefix;
++ size_t (*list)(char *list, struct inode *inode, const char *name,
++ int name_len);
++ int (*get)(struct inode *inode, const char *name, void *buffer,
++ size_t size);
++ int (*set)(struct inode *inode, const char *name, const void *buffer,
++ size_t size, int flags);
++};
+
-+ error = strncpy_from_user(kname, name, sizeof(kname));
-+ if (error == 0 || error == sizeof(kname))
-+ error = -ERANGE;
-+ if (error < 0)
-+ return error;
++extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
++extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
+
-+ kvalue = xattr_alloc(size, XATTR_SIZE_MAX);
-+ if (IS_ERR(kvalue))
-+ return PTR_ERR(kvalue);
++extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int);
++extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
++extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
++extern int ext3_removexattr(struct dentry *, const char *);
+
-+ if (size > 0 && copy_from_user(kvalue, value, size)) {
-+ xattr_free(kvalue, size);
-+ return -EFAULT;
-+ }
++extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
++extern int ext3_xattr_list(struct inode *, char *, size_t);
++extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int);
+
-+ error = -EOPNOTSUPP;
-+ if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
-+ down(&d->d_inode->i_sem);
-+ lock_kernel();
-+ error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags);
-+ unlock_kernel();
-+ up(&d->d_inode->i_sem);
-+ }
++extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
++extern void ext3_xattr_put_super(struct super_block *);
+
-+ xattr_free(kvalue, size);
-+ return error;
-+}
++extern int init_ext3_xattr(void) __init;
++extern void exit_ext3_xattr(void);
+
-+asmlinkage long
-+sys_setxattr(char *path, char *name, void *value, size_t size, int flags)
-+{
-+ struct nameidata nd;
-+ int error;
++# else /* CONFIG_EXT3_FS_XATTR */
++# define ext3_setxattr NULL
++# define ext3_getxattr NULL
++# define ext3_listxattr NULL
++# define ext3_removexattr NULL
+
-+ error = user_path_walk(path, &nd);
-+ if (error)
-+ return error;
-+ error = setxattr(nd.dentry, name, value, size, flags);
-+ path_release(&nd);
-+ return error;
++static inline int
++ext3_xattr_get(struct inode *inode, int name_index, const char *name,
++ void *buffer, size_t size)
++{
++ return -ENOTSUP;
+}
+
-+asmlinkage long
-+sys_lsetxattr(char *path, char *name, void *value, size_t size, int flags)
++static inline int
++ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
+{
-+ struct nameidata nd;
-+ int error;
-+
-+ error = user_path_walk_link(path, &nd);
-+ if (error)
-+ return error;
-+ error = setxattr(nd.dentry, name, value, size, flags);
-+ path_release(&nd);
-+ return error;
++ return -ENOTSUP;
+}
+
-+asmlinkage long
-+sys_fsetxattr(int fd, char *name, void *value, size_t size, int flags)
++static inline int
++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
++ const char *name, const void *value, size_t size, int flags)
+{
-+ struct file *f;
-+ int error = -EBADF;
-+
-+ f = fget(fd);
-+ if (!f)
-+ return error;
-+ error = setxattr(f->f_dentry, name, value, size, flags);
-+ fput(f);
-+ return error;
++ return -ENOTSUP;
+}
+
-+/*
-+ * Extended attribute GET operations
-+ */
-+static ssize_t
-+getxattr(struct dentry *d, char *name, void *value, size_t size)
++static inline void
++ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+{
-+ ssize_t error;
-+ void *kvalue;
-+ char kname[XATTR_NAME_MAX + 1];
-+
-+ error = strncpy_from_user(kname, name, sizeof(kname));
-+ if (error == 0 || error == sizeof(kname))
-+ error = -ERANGE;
-+ if (error < 0)
-+ return error;
-+
-+ kvalue = xattr_alloc(size, XATTR_SIZE_MAX);
-+ if (IS_ERR(kvalue))
-+ return PTR_ERR(kvalue);
-+
-+ error = -EOPNOTSUPP;
-+ if (d->d_inode->i_op && d->d_inode->i_op->getxattr) {
-+ down(&d->d_inode->i_sem);
-+ lock_kernel();
-+ error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
-+ unlock_kernel();
-+ up(&d->d_inode->i_sem);
-+ }
-+
-+ if (kvalue && error > 0)
-+ if (copy_to_user(value, kvalue, error))
-+ error = -EFAULT;
-+ xattr_free(kvalue, size);
-+ return error;
+}
+
-+asmlinkage ssize_t
-+sys_getxattr(char *path, char *name, void *value, size_t size)
++static inline void
++ext3_xattr_put_super(struct super_block *sb)
+{
-+ struct nameidata nd;
-+ ssize_t error;
-+
-+ error = user_path_walk(path, &nd);
-+ if (error)
-+ return error;
-+ error = getxattr(nd.dentry, name, value, size);
-+ path_release(&nd);
-+ return error;
+}
+
-+asmlinkage ssize_t
-+sys_lgetxattr(char *path, char *name, void *value, size_t size)
++static inline int
++init_ext3_xattr(void)
+{
-+ struct nameidata nd;
-+ ssize_t error;
-+
-+ error = user_path_walk_link(path, &nd);
-+ if (error)
-+ return error;
-+ error = getxattr(nd.dentry, name, value, size);
-+ path_release(&nd);
-+ return error;
++ return 0;
+}
+
-+asmlinkage ssize_t
-+sys_fgetxattr(int fd, char *name, void *value, size_t size)
++static inline void
++exit_ext3_xattr(void)
+{
-+ struct file *f;
-+ ssize_t error = -EBADF;
++}
+
-+ f = fget(fd);
-+ if (!f)
-+ return error;
-+ error = getxattr(f->f_dentry, name, value, size);
-+ fput(f);
-+ return error;
++# endif /* CONFIG_EXT3_FS_XATTR */
++
++# ifdef CONFIG_EXT3_FS_XATTR_USER
++
++extern int init_ext3_xattr_user(void) __init;
++extern void exit_ext3_xattr_user(void);
++
++# else /* CONFIG_EXT3_FS_XATTR_USER */
++
++static inline int
++init_ext3_xattr_user(void)
++{
++ return 0;
+}
+
-+/*
-+ * Extended attribute LIST operations
-+ */
-+static ssize_t
-+listxattr(struct dentry *d, char *list, size_t size)
++static inline void
++exit_ext3_xattr_user(void)
+{
-+ ssize_t error;
-+ char *klist;
++}
+
-+ klist = (char *)xattr_alloc(size, XATTR_LIST_MAX);
-+ if (IS_ERR(klist))
-+ return PTR_ERR(klist);
++#endif /* CONFIG_EXT3_FS_XATTR_USER */
+
-+ error = -EOPNOTSUPP;
-+ if (d->d_inode->i_op && d->d_inode->i_op->listxattr) {
-+ down(&d->d_inode->i_sem);
-+ lock_kernel();
-+ error = d->d_inode->i_op->listxattr(d, klist, size);
-+ unlock_kernel();
-+ up(&d->d_inode->i_sem);
-+ }
++#endif /* __KERNEL__ */
+
-+ if (klist && error > 0)
-+ if (copy_to_user(list, klist, error))
-+ error = -EFAULT;
-+ xattr_free(klist, size);
-+ return error;
-+}
+Index: linux-2.4.19-pre1/include/linux/fs.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/linux/fs.h 2004-01-14 01:11:48.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/fs.h 2004-01-14 01:11:49.000000000 +0300
+@@ -872,6 +872,10 @@
+ int (*setattr) (struct dentry *, struct iattr *);
+ int (*setattr_raw) (struct inode *, struct iattr *);
+ int (*getattr) (struct dentry *, struct iattr *);
++ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
++ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
++ ssize_t (*listxattr) (struct dentry *, char *, size_t);
++ int (*removexattr) (struct dentry *, const char *);
+ };
+
+ struct seq_file;
+Index: linux-2.4.19-pre1/include/linux/mbcache.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/linux/mbcache.h 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/mbcache.h 2004-01-14 01:11:49.000000000 +0300
+@@ -0,0 +1,69 @@
++/*
++ File: linux/mbcache.h
+
-+asmlinkage ssize_t
-+sys_listxattr(char *path, char *list, size_t size)
-+{
-+ struct nameidata nd;
-+ ssize_t error;
++ (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
++*/
+
-+ error = user_path_walk(path, &nd);
-+ if (error)
-+ return error;
-+ error = listxattr(nd.dentry, list, size);
-+ path_release(&nd);
-+ return error;
-+}
++/* Hardwire the number of additional indexes */
++#define MB_CACHE_INDEXES_COUNT 1
+
-+asmlinkage ssize_t
-+sys_llistxattr(char *path, char *list, size_t size)
-+{
-+ struct nameidata nd;
-+ ssize_t error;
++struct mb_cache_entry;
+
-+ error = user_path_walk_link(path, &nd);
-+ if (error)
-+ return error;
-+ error = listxattr(nd.dentry, list, size);
-+ path_release(&nd);
-+ return error;
-+}
++struct mb_cache_op {
++ int (*free)(struct mb_cache_entry *, int);
++};
+
-+asmlinkage ssize_t
-+sys_flistxattr(int fd, char *list, size_t size)
-+{
-+ struct file *f;
-+ ssize_t error = -EBADF;
++struct mb_cache {
++ struct list_head c_cache_list;
++ const char *c_name;
++ struct mb_cache_op c_op;
++ atomic_t c_entry_count;
++ int c_bucket_count;
++#ifndef MB_CACHE_INDEXES_COUNT
++ int c_indexes_count;
++#endif
++ kmem_cache_t *c_entry_cache;
++ struct list_head *c_block_hash;
++ struct list_head *c_indexes_hash[0];
++};
+
-+ f = fget(fd);
-+ if (!f)
-+ return error;
-+ error = listxattr(f->f_dentry, list, size);
-+ fput(f);
-+ return error;
-+}
++struct mb_cache_entry_index {
++ struct list_head o_list;
++ unsigned int o_key;
++};
+
-+/*
-+ * Extended attribute REMOVE operations
-+ */
-+static long
-+removexattr(struct dentry *d, char *name)
-+{
-+ int error;
-+ char kname[XATTR_NAME_MAX + 1];
++struct mb_cache_entry {
++ struct list_head e_lru_list;
++ struct mb_cache *e_cache;
++ atomic_t e_used;
++ kdev_t e_dev;
++ unsigned long e_block;
++ struct list_head e_block_list;
++ struct mb_cache_entry_index e_indexes[0];
++};
+
-+ error = strncpy_from_user(kname, name, sizeof(kname));
-+ if (error == 0 || error == sizeof(kname))
-+ error = -ERANGE;
-+ if (error < 0)
-+ return error;
++/* Functions on caches */
+
-+ error = -EOPNOTSUPP;
-+ if (d->d_inode->i_op && d->d_inode->i_op->removexattr) {
-+ down(&d->d_inode->i_sem);
-+ lock_kernel();
-+ error = d->d_inode->i_op->removexattr(d, kname);
-+ unlock_kernel();
-+ up(&d->d_inode->i_sem);
-+ }
-+ return error;
-+}
++struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t,
++ int, int);
++void mb_cache_shrink(struct mb_cache *, kdev_t);
++void mb_cache_destroy(struct mb_cache *);
+
-+asmlinkage long
-+sys_removexattr(char *path, char *name)
-+{
-+ struct nameidata nd;
-+ int error;
++/* Functions on cache entries */
+
-+ error = user_path_walk(path, &nd);
-+ if (error)
-+ return error;
-+ error = removexattr(nd.dentry, name);
-+ path_release(&nd);
-+ return error;
-+}
++struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *);
++int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long,
++ unsigned int[]);
++void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]);
++void mb_cache_entry_release(struct mb_cache_entry *);
++void mb_cache_entry_takeout(struct mb_cache_entry *);
++void mb_cache_entry_free(struct mb_cache_entry *);
++struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *);
++struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t,
++ unsigned long);
++#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
++struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int,
++ kdev_t, unsigned int);
++struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
++ kdev_t, unsigned int);
++#endif
+Index: linux-2.4.19-pre1/include/linux/xattr.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/linux/xattr.h 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/xattr.h 2004-01-14 01:11:49.000000000 +0300
+@@ -0,0 +1,15 @@
++/*
++ File: linux/xattr.h
+
-+asmlinkage long
-+sys_lremovexattr(char *path, char *name)
-+{
-+ struct nameidata nd;
-+ int error;
++ Extended attributes handling.
+
-+ error = user_path_walk_link(path, &nd);
-+ if (error)
-+ return error;
-+ error = removexattr(nd.dentry, name);
-+ path_release(&nd);
-+ return error;
-+}
++ Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
++ Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
++*/
++#ifndef _LINUX_XATTR_H
++#define _LINUX_XATTR_H
+
-+asmlinkage long
-+sys_fremovexattr(int fd, char *name)
-+{
-+ struct file *f;
-+ int error = -EBADF;
++#define XATTR_CREATE 0x1 /* set the value, fail if attr already exists */
++#define XATTR_REPLACE 0x2 /* set the value, fail if attr does not exist */
+
-+ f = fget(fd);
-+ if (!f)
-+ return error;
-+ error = removexattr(f->f_dentry, name);
-+ fput(f);
-+ return error;
-+}
++#endif /* _LINUX_XATTR_H */
Index: linux-2.4.19-pre1/include/linux/kernel.h
===================================================================
---- linux-2.4.19-pre1.orig/include/linux/kernel.h 2003-11-21 02:25:34.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/kernel.h 2003-11-21 03:51:05.000000000 +0300
+--- linux-2.4.19-pre1.orig/include/linux/kernel.h 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/kernel.h 2004-01-14 01:16:51.000000000 +0300
@@ -11,6 +11,7 @@
#include <linux/linkage.h>
#include <linux/stddef.h>
/* Optimization barrier */
/* The "volatile" is due to gcc bugs */
+Index: linux-2.4.19-pre1/include/linux/limits.h
+===================================================================
+--- linux-2.4.19-pre1.orig/include/linux/limits.h 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/include/linux/limits.h 2004-01-14 01:22:08.000000000 +0300
+@@ -13,6 +13,9 @@
+ #define NAME_MAX 255 /* # chars in a file name */
+ #define PATH_MAX 4096 /* # chars in a path name including nul */
+ #define PIPE_BUF 4096 /* # bytes in atomic write to a pipe */
++#define XATTR_NAME_MAX 255 /* # chars in an extended attribute name */
++#define XATTR_SIZE_MAX 65536 /* size of an extended attribute value (64k) */
++#define XATTR_LIST_MAX 65536 /* size of extended attribute namelist (64k) */
+
+ #define RTSIG_MAX 32
+
+Index: linux-2.4.19-pre1/kernel/ksyms.c
+===================================================================
+--- linux-2.4.19-pre1.orig/kernel/ksyms.c 2004-01-14 01:11:48.000000000 +0300
++++ linux-2.4.19-pre1/kernel/ksyms.c 2004-01-14 01:11:49.000000000 +0300
+@@ -11,6 +11,7 @@
+
+ #include <linux/config.h>
+ #include <linux/slab.h>
++#include <linux/cache_def.h>
+ #include <linux/module.h>
+ #include <linux/blkdev.h>
+ #include <linux/cdrom.h>
+@@ -88,6 +89,7 @@
+ EXPORT_SYMBOL(exit_files);
+ EXPORT_SYMBOL(exit_fs);
+ EXPORT_SYMBOL(exit_sighand);
++EXPORT_SYMBOL(copy_fs_struct);
+
+ /* internal kernel memory management */
+ EXPORT_SYMBOL(_alloc_pages);
+@@ -104,6 +106,8 @@
+ EXPORT_SYMBOL(kmem_cache_shrink);
+ EXPORT_SYMBOL(kmem_cache_alloc);
+ EXPORT_SYMBOL(kmem_cache_free);
++EXPORT_SYMBOL(register_cache);
++EXPORT_SYMBOL(unregister_cache);
+ EXPORT_SYMBOL(kmalloc);
+ EXPORT_SYMBOL(kfree);
+ EXPORT_SYMBOL(vfree);
+Index: linux-2.4.19-pre1/mm/vmscan.c
+===================================================================
+--- linux-2.4.19-pre1.orig/mm/vmscan.c 2004-01-14 01:10:37.000000000 +0300
++++ linux-2.4.19-pre1/mm/vmscan.c 2004-01-14 01:11:49.000000000 +0300
+@@ -15,6 +15,7 @@
+ #include <linux/kernel_stat.h>
+ #include <linux/swap.h>
+ #include <linux/swapctl.h>
++#include <linux/cache_def.h>
+ #include <linux/smp_lock.h>
+ #include <linux/pagemap.h>
+ #include <linux/init.h>
+@@ -32,6 +33,39 @@
+ */
+ #define DEF_PRIORITY (6)
+
++static DECLARE_MUTEX(other_caches_sem);
++static LIST_HEAD(cache_definitions);
++
++void register_cache(struct cache_definition *cache)
++{
++ down(&other_caches_sem);
++ list_add(&cache->link, &cache_definitions);
++ up(&other_caches_sem);
++}
++
++void unregister_cache(struct cache_definition *cache)
++{
++ down(&other_caches_sem);
++ list_del(&cache->link);
++ up(&other_caches_sem);
++}
++
++static void shrink_other_caches(unsigned int priority, int gfp_mask)
++{
++ struct list_head *p;
++
++ if (down_trylock(&other_caches_sem))
++ return;
++
++ list_for_each_prev(p, &cache_definitions) {
++ struct cache_definition *cache =
++ list_entry(p, struct cache_definition, link);
++
++ cache->shrink(priority, gfp_mask);
++ }
++ up(&other_caches_sem);
++}
++
+ /*
+ * The swap-out function returns 1 if it successfully
+ * scanned all the pages it was asked to (`count').
+@@ -578,6 +612,7 @@
+
+ shrink_dcache_memory(priority, gfp_mask);
+ shrink_icache_memory(priority, gfp_mask);
++ shrink_other_caches(priority, gfp_mask);
+ #ifdef CONFIG_QUOTA
+ shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
+ #endif
0 files changed
-Index: linux-2.6.0-test6/fs/namei.c
+Index: linux-2.6.0/fs/namei.c
===================================================================
---- linux-2.6.0-test6.orig/fs/namei.c 2003-10-07 15:33:15.000000000 +0800
-+++ linux-2.6.0-test6/fs/namei.c 2003-10-07 15:39:16.000000000 +0800
+--- linux-2.6.0.orig/fs/namei.c 2004-01-04 15:22:42.000000000 +0300
++++ linux-2.6.0/fs/namei.c 2004-01-04 15:25:04.000000000 +0300
@@ -1270,7 +1270,7 @@
if (!error) {
DQUOT_INIT(inode);
exit5:
dput(new_dentry);
exit4:
-Index: linux-2.6.0-test6/fs/open.c
+Index: linux-2.6.0/fs/open.c
===================================================================
---- linux-2.6.0-test6.orig/fs/open.c 2003-10-07 15:33:15.000000000 +0800
-+++ linux-2.6.0-test6/fs/open.c 2003-10-07 15:40:41.000000000 +0800
-@@ -178,9 +178,10 @@
+--- linux-2.6.0.orig/fs/open.c 2004-01-04 15:21:49.000000000 +0300
++++ linux-2.6.0/fs/open.c 2004-01-04 15:25:04.000000000 +0300
+@@ -180,9 +180,10 @@
return error;
}
struct iattr newattrs;
/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
-@@ -191,7 +192,14 @@
+@@ -193,7 +194,14 @@
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
down(&dentry->d_inode->i_sem);
down_write(&dentry->d_inode->i_alloc_sem);
up_write(&dentry->d_inode->i_alloc_sem);
up(&dentry->d_inode->i_sem);
return err;
-@@ -247,7 +255,7 @@
+@@ -249,7 +257,7 @@
error = locks_verify_truncate(inode, NULL, length);
if (!error) {
DQUOT_INIT(inode);
}
put_write_access(inode);
-@@ -299,7 +307,7 @@
+@@ -301,7 +309,7 @@
error = locks_verify_truncate(inode, file, length);
if (!error)
out_putf:
fput(file);
out:
-@@ -378,9 +386,19 @@
+@@ -380,9 +388,19 @@
(error = permission(inode,MAY_WRITE,&nd)) != 0)
goto dput_and_out;
}
dput_and_out:
path_release(&nd);
out:
-@@ -431,9 +449,19 @@
+@@ -433,9 +451,19 @@
(error = permission(inode,MAY_WRITE,&nd)) != 0)
goto dput_and_out;
}
dput_and_out:
path_release(&nd);
out:
-@@ -634,6 +662,18 @@
+@@ -636,6 +664,18 @@
if (IS_RDONLY(inode))
goto dput_and_out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto dput_and_out;
-@@ -667,6 +707,18 @@
+@@ -669,6 +709,18 @@
if (IS_RDONLY(inode))
goto out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out;
newattrs.ia_valid = ATTR_CTIME;
-@@ -680,6 +732,7 @@
+@@ -682,6 +734,7 @@
}
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
down(&inode->i_sem);
error = notify_change(dentry, &newattrs);
up(&inode->i_sem);
-Index: linux-2.6.0-test6/include/linux/fs.h
+Index: linux-2.6.0/fs/exec.c
===================================================================
---- linux-2.6.0-test6.orig/include/linux/fs.h 2003-10-07 15:34:10.000000000 +0800
-+++ linux-2.6.0-test6/include/linux/fs.h 2003-10-07 15:39:17.000000000 +0800
-@@ -831,13 +831,20 @@
+--- linux-2.6.0.orig/fs/exec.c 2004-01-04 15:21:49.000000000 +0300
++++ linux-2.6.0/fs/exec.c 2004-01-04 15:25:04.000000000 +0300
+@@ -1405,7 +1405,7 @@
+ goto close_fail;
+ if (!file->f_op->write)
+ goto close_fail;
+- if (do_truncate(file->f_dentry, 0) != 0)
++ if (do_truncate(file->f_dentry, 0, 0) != 0)
+ goto close_fail;
+
+ retval = binfmt->core_dump(signr, regs, file);
+Index: linux-2.6.0/include/linux/fs.h
+===================================================================
+--- linux-2.6.0.orig/include/linux/fs.h 2004-01-04 15:21:49.000000000 +0300
++++ linux-2.6.0/include/linux/fs.h 2004-01-04 15:25:04.000000000 +0300
+@@ -836,13 +836,20 @@
int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*readlink) (struct dentry *, char __user *,int);
int (*follow_link) (struct dentry *, struct nameidata *);
void (*truncate) (struct inode *);
-@@ -1122,7 +1129,7 @@
+@@ -1127,7 +1134,7 @@
asmlinkage long sys_open(const char __user *, int, int);
asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */
extern struct file *filp_open(const char *, int, int);
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
-Index: linux-2.6.0-test6/fs/exec.c
+Index: linux-2.6.0/net/unix/af_unix.c
===================================================================
---- linux-2.6.0-test6.orig/fs/exec.c 2003-10-07 15:33:15.000000000 +0800
-+++ linux-2.6.0-test6/fs/exec.c 2003-10-07 15:39:17.000000000 +0800
-@@ -1390,7 +1390,7 @@
- goto close_fail;
- if (!file->f_op->write)
- goto close_fail;
-- if (do_truncate(file->f_dentry, 0) != 0)
-+ if (do_truncate(file->f_dentry, 0, 0) != 0)
- goto close_fail;
-
- retval = binfmt->core_dump(signr, regs, file);
+--- linux-2.6.0.orig/net/unix/af_unix.c 2004-01-04 15:21:21.000000000 +0300
++++ linux-2.6.0/net/unix/af_unix.c 2004-01-04 15:25:18.000000000 +0300
+@@ -592,6 +592,7 @@
+ int err = 0;
+
+ if (sunname->sun_path[0]) {
++ intent_init(&nd.intent, IT_LOOKUP);
+ err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ if (err)
+ goto fail;
ext3-xattr-ptr-arith-fix.patch
kernel_text_address-2.4.18-chaos.patch
pagecache-lock-2.4.21-chaos.patch
-quadrics-fixes.patch
ext3-trusted_ea-2.4.20.patch
gfp_memalloc-2.4.22.patch
ext3-xattr-ptr-arith-fix.patch
+procfs-ndynamic-2.4.patch
+kernel_text_address-2.4.18-chaos.patch
ext3-map_inode_page.patch
ext3-error-export.patch
iopen-2.4.20.patch
-tcp-zero-copy.patch
+tcp-zero-copy-2.4.19-pre1.patch
jbd-dont-account-blocks-twice.patch
jbd-commit-tricks.patch
ext3-no-write-super.patch
resched-2.4.19-pre1.patch
ext3-xattr-ptr-arith-fix.patch
gfp_memalloc-2.4.22.patch
+vmalloc_to_page-2.4.19-pre1.patch
kernel_text_address-2.4.22-vanilla.patch
gfp_memalloc-2.4.22.patch
ext3-xattr-ptr-arith-fix.patch
-3.5G-address-space-2.4.22-vanilla.patch
+3.5G-address-space-2.4.22-vanilla.patch
+procfs-ndynamic-2.4.patch
AC_MSG_RESULT(no)
fi
-AC_ARG_ENABLE(zerocopy, [ --enable-zerocopy enable socknal zerocopy],enable_zerocopy=$enable_zerocopy_temp, enable_zerocopy="")
+AC_ARG_ENABLE(zerocopy, [ --disable-zerocopy disable socknal zerocopy],enable_zerocopy="", enable_zerocopy=$enable_zerocopy_temp)
-AC_ARG_ENABLE(affinity, [ --enable-affinity enable process/irq affinity],enable_affinity="-DCPU_AFFINITY=1", enable_affinity=$enable_affinity_temp)
+AC_ARG_ENABLE(affinity, [ --disable-affinity disable process/irq affinity],enable_affinity="", enable_affinity=$enable_affinity_temp)
#####################################
AC_MSG_CHECKING(if quadrics kernel headers are present)
AC_OUTPUT([Makefile Kernelenv libcfs/Makefile portals/Makefile \
unals/Makefile knals/Makefile router/Makefile \
knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \
- knals/scimacnal/Makefile knals/toenal/Makefile knals/ibnal/Makefile\
+ knals/scimacnal/Makefile knals/ibnal/Makefile\
utils/Makefile tests/Makefile doc/Makefile ])
LYX2HTML = lyx --export html
SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps
-DOCS = portals3.pdf
+if DOC
+ DOCS = portals3.pdf
+else
+ DOCS =
+endif
+
IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps
LYXFILES= portals3.lyx
#define PORTAL_ALLOC_GFP(ptr, size, mask) \
do { \
- LASSERT (!in_interrupt()); \
+ LASSERT(!in_interrupt() || \
+ (size <= PORTAL_VMALLOC_SIZE && mask == GFP_ATOMIC)); \
if ((size) > PORTAL_VMALLOC_SIZE) \
(ptr) = vmalloc(size); \
else \
#define IOC_PORTAL_MAX_NR 42
enum {
- QSWNAL = 1,
- SOCKNAL,
- GMNAL,
- TOENAL,
- TCPNAL,
- SCIMACNAL,
- ROUTER,
- IBNAL,
+ QSWNAL = 1,
+ SOCKNAL = 2,
+ GMNAL = 3,
+ /* 4 unused */
+ TCPNAL = 5,
+ SCIMACNAL = 6,
+ ROUTER = 7,
+ IBNAL = 8,
NAL_ENUM_END_MARKER
};
#ifdef __KERNEL__
extern ptl_handle_ni_t kqswnal_ni;
extern ptl_handle_ni_t ksocknal_ni;
-extern ptl_handle_ni_t ktoenal_ni;
extern ptl_handle_ni_t kgmnal_ni;
extern ptl_handle_ni_t kibnal_ni;
extern ptl_handle_ni_t kscimacnal_ni;
#ifndef _LINUX_LIST_H
-
/*
* Simple doubly linked list implementation.
*
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-DIST_SUBDIRS= socknal toenal qswnal gmnal scimacnal ibnal
-SUBDIRS= socknal toenal @QSWNAL@ @GMNAL@ @SCIMACNAL@ @IBNAL@
+DIST_SUBDIRS= socknal qswnal gmnal scimacnal ibnal
+SUBDIRS= socknal @QSWNAL@ @GMNAL@ @SCIMACNAL@ @IBNAL@
#include <asm/system.h>
#include <asm/uaccess.h>
+#include <linux/init.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/stat.h>
int option;
struct linger linger;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ sock->sk->sk_allocation = GFP_NOFS;
+#else
sock->sk->allocation = GFP_NOFS;
+#endif
/* Ensure this socket aborts active sends immediately when we close
* it. */
+++ /dev/null
-.deps
-Makefile
-Makefile.in
+++ /dev/null
-# Copyright (C) 2001 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include ../../Rules.linux
-
-MODULE = ktoenal
-modulenet_DATA = ktoenal.o
-EXTRA_PROGRAMS = ktoenal
-
-DEFS =
-ktoenal_SOURCES = toenal.c toenal_cb.c toenal.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Kedar Sovani <kedar@calsoftinc.com>
- * Author: Amey Inamdar <amey@calsoftinc.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-#include <linux/poll.h>
-#include "toenal.h"
-
-ptl_handle_ni_t ktoenal_ni;
-static nal_t ktoenal_api;
-static ksock_nal_data_t ktoenal_data;
-
-/*
-ksocknal_interface_t ktoenal_interface = {
- ksni_add_sock: ktoenal_add_sock,
- ksni_close_sock: ktoenal_close_sock,
- ksni_set_mynid: ktoenal_set_mynid,
-};
-*/
-
-kpr_nal_interface_t ktoenal_router_interface = {
- kprni_nalid: TOENAL,
- kprni_arg: &ktoenal_data,
- kprni_fwd: ktoenal_fwd_packet,
-};
-
-
-int
-ktoenal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
-
- lib_dispatch(nal_cb, k, id, args, ret); /* ktoenal_send needs k */
- return PTL_OK;
-}
-
-int
-ktoenal_api_shutdown(nal_t *nal, int ni)
-{
- CDEBUG (D_NET, "closing all connections\n");
-
- return ktoenal_close_sock(0); /* close all sockets */
-}
-
-void
-ktoenal_api_yield(nal_t *nal)
-{
- our_cond_resched();
- return;
-}
-
-void
-ktoenal_api_lock(nal_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_cli(nal_cb,flags);
-}
-
-void
-ktoenal_api_unlock(nal_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_sti(nal_cb,flags);
-}
-
-nal_t *
-ktoenal_init(int interface, ptl_pt_index_t ptl_size,
- ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
-{
- CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n",
- ktoenal_data.ksnd_mynid);
- lib_init(&ktoenal_lib, ktoenal_data.ksnd_mynid, 0, 10, ptl_size,
- ac_size);
- return (&ktoenal_api);
-}
-
-/*
- * EXTRA functions follow
- */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define SOCKET_I(inode) (&(inode)->u.socket_i)
-#endif
-static __inline__ struct socket *
-socki_lookup(struct inode *inode)
-{
- return SOCKET_I(inode);
-}
-
-int
-ktoenal_set_mynid(ptl_nid_t nid)
-{
- lib_ni_t *ni = &ktoenal_lib.ni;
-
- /* FIXME: we have to do this because we call lib_init() at module
- * insertion time, which is before we have 'mynid' available. lib_init
- * sets the NAL's nid, which it uses to tell other nodes where packets
- * are coming from. This is not a very graceful solution to this
- * problem. */
-
- CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid);
-
- ktoenal_data.ksnd_mynid = nid;
- ni->nid = nid;
- return (0);
-}
-
-int
-ktoenal_add_sock (ptl_nid_t nid, int fd)
-{
- unsigned long flags;
- ksock_conn_t *conn;
- struct file *file = NULL;
- struct socket *sock = NULL;
- int ret;
- ENTRY;
-
- file = fget(fd);
- if (file == NULL)
- RETURN(-EINVAL);
-
- ret = -EINVAL;
- sock = socki_lookup(file->f_dentry->d_inode);
- if (sock == NULL)
- GOTO(error, ret);
-
- ret = -ENOMEM;
- PORTAL_ALLOC(conn, sizeof(*conn));
- if (!conn)
- GOTO(error, ret);
-
- memset (conn, 0, sizeof (conn)); /* zero for consistency */
- file->f_flags |= O_NONBLOCK; /* Does this have any conflicts */
- conn->ksnc_file = file;
- conn->ksnc_sock = sock;
- conn->ksnc_peernid = nid;
- atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */
-
- conn->ksnc_rx_ready = 0;
- conn->ksnc_rx_scheduled = 0;
- ktoenal_new_packet (conn, 0);
-
- INIT_LIST_HEAD (&conn->ksnc_tx_queue);
- conn->ksnc_tx_ready = 0;
- conn->ksnc_tx_scheduled = 0;
-
- LASSERT (!in_interrupt());
- write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
-
- list_add(&conn->ksnc_list, &ktoenal_data.ksnd_socklist);
- write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
-
- ktoenal_data_ready(conn);
- ktoenal_write_space(conn);
-
- ktoenal_data.ksnd_slistchange = 1;
- wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
- /* Schedule pollthread so that it will poll
- * for newly created socket
- */
-
-
- CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n",
- conn, conn->ksnc_peernid);
-
- /* Can't unload while connection active */
- PORTAL_MODULE_USE;
- RETURN(0);
-
-error:
- fput(file);
- return (ret);
-}
-
-/* Passing in a zero nid will close all connections */
-int
-ktoenal_close_sock(ptl_nid_t nid)
-{
- unsigned long flags;
- ksock_conn_t *conn;
- LIST_HEAD (death_row);
- struct list_head *tmp;
-
- LASSERT (!in_interrupt());
- write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags);
-
- if (nid == 0) /* close ALL connections */
- {
- /* insert 'death row' into the socket list... */
- list_add (&death_row, &ktoenal_data.ksnd_socklist);
- /* ...extract and reinitialise the socket list itself... */
- list_del_init (&ktoenal_data.ksnd_socklist);
- /* ...and voila, death row is the proud owner of all conns */
- } else list_for_each (tmp, &ktoenal_data.ksnd_socklist) {
-
- conn = list_entry (tmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_peernid == nid)
- {
- list_del (&conn->ksnc_list);
- list_add (&conn->ksnc_list, &death_row);
- break;
- }
- }
-
-
- write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags);
-
- if (list_empty (&death_row))
- return (-ENOENT);
-
- do {
- conn = list_entry (death_row.next, ksock_conn_t, ksnc_list);
- list_del (&conn->ksnc_list);
- ktoenal_put_conn (conn); /* drop ref for ksnd_socklist */
- } while (!list_empty (&death_row));
-
- ktoenal_data.ksnd_slistchange = 1;
- wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
-
- return (0);
-}
-
-
-ksock_conn_t *
-ktoenal_get_conn (ptl_nid_t nid)
-{
- struct list_head *tmp;
- ksock_conn_t *conn;
-
- PROF_START(conn_list_walk);
-
- read_lock (&ktoenal_data.ksnd_socklist_lock);
-
- list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
-
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_peernid == nid)
- {
- /* caller is referencing */
- atomic_inc (&conn->ksnc_refcount);
-
- read_unlock (&ktoenal_data.ksnd_socklist_lock);
-
- CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n",
- conn, nid, atomic_read (&conn->ksnc_refcount));
-
- PROF_FINISH(conn_list_walk);
- return (conn);
- }
- }
-
- read_unlock (&ktoenal_data.ksnd_socklist_lock);
-
- CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", nid);
- PROF_FINISH(conn_list_walk);
- return (NULL);
-}
-
-void
-ktoenal_close_conn (ksock_conn_t *conn)
-{
- CDEBUG (D_NET, "connection [%p] closed \n", conn);
-
- fput (conn->ksnc_file);
- PORTAL_FREE (conn, sizeof (*conn));
- /* One less connection keeping us hanging on */
- PORTAL_MODULE_UNUSE;
-}
-
-void
-_ktoenal_put_conn (ksock_conn_t *conn)
-{
- unsigned long flags;
-
- CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn);
-
- /* "But what is the black spot, captain?" I asked.
- * "That's a summons, mate..." */
-
- LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
- LASSERT (!conn->ksnc_rx_scheduled);
-
- if (!in_interrupt())
- {
- ktoenal_close_conn (conn);
- return;
- }
-
- spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
-
- list_add (&conn->ksnc_list, &ktoenal_data.ksnd_reaper_list);
- wake_up (&ktoenal_data.ksnd_reaper_waitq);
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
-}
-
-void
-ktoenal_free_buffers (void)
-{
- if (ktoenal_data.ksnd_fmbs != NULL)
- {
- ksock_fmb_t *fmb = (ksock_fmb_t *)ktoenal_data.ksnd_fmbs;
- int i;
- int j;
-
- for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++, fmb++)
- for (j = 0; j < fmb->fmb_npages; j++)
- if (fmb->fmb_pages[j] != NULL)
- __free_page (fmb->fmb_pages[j]);
-
- PORTAL_FREE (ktoenal_data.ksnd_fmbs,
- sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
- }
-
- if (ktoenal_data.ksnd_ltxs != NULL)
- PORTAL_FREE (ktoenal_data.ksnd_ltxs,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-}
-
-int
-ktoenal_cmd(struct portals_cfg *pcfg, void * private)
-{
- int rc = -EINVAL;
-
- LASSERT (pcfg != NULL);
-
- switch(pcfg->pcfg_command) {
- case NAL_CMD_REGISTER_PEER_FD: {
- rc = ktoenal_add_sock(pcfg->pcfg_nid, pcfg->pcfg_fd);
- break;
- }
- case NAL_CMD_CLOSE_CONNECTION: {
- rc = ktoenal_close_sock(pcfg->pcfg_nid);
- break;
- }
- case NAL_CMD_REGISTER_MYNID: {
- rc = ktoenal_set_mynid (pcfg->pcfg_nid);
- break;
- }
- }
-
- return rc;
-}
-
-
-void /*__exit*/
-ktoenal_module_fini (void)
-{
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&portal_kmemory));
-
- switch (ktoenal_data.ksnd_init)
- {
- default:
- LASSERT (0);
-
- case SOCKNAL_INIT_ALL:
- kportal_nal_unregister(TOENAL);
- PORTAL_SYMBOL_UNREGISTER (ktoenal_ni);
- /* fall through */
-
- case SOCKNAL_INIT_PTL:
- PtlNIFini(ktoenal_ni);
- lib_fini(&ktoenal_lib);
- /* fall through */
-
- case SOCKNAL_INIT_DATA:
- /* Module refcount only gets to zero when all connections
- * have been closed so all lists must be empty */
- LASSERT (list_empty (&ktoenal_data.ksnd_socklist));
- LASSERT (list_empty (&ktoenal_data.ksnd_reaper_list));
- LASSERT (list_empty (&ktoenal_data.ksnd_rx_conns));
- LASSERT (list_empty (&ktoenal_data.ksnd_tx_conns));
- LASSERT (list_empty (&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns));
- LASSERT (list_empty (&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns));
-
- kpr_shutdown (&ktoenal_data.ksnd_router); /* stop router calling me */
-
- /* flag threads to terminate; wake and wait for them to die */
- ktoenal_data.ksnd_shuttingdown = 1;
- wake_up_all (&ktoenal_data.ksnd_reaper_waitq);
- wake_up_all (&ktoenal_data.ksnd_sched_waitq);
- wake_up_process(ktoenal_data.ksnd_pollthread_tsk);
-
- while (atomic_read (&ktoenal_data.ksnd_nthreads) != 0)
- {
- CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
- atomic_read (&ktoenal_data.ksnd_nthreads));
- set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
- }
-
- kpr_deregister (&ktoenal_data.ksnd_router);
-
- ktoenal_free_buffers();
- /* fall through */
-
- case SOCKNAL_INIT_NOTHING:
- break;
- }
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&portal_kmemory));
-
- printk(KERN_INFO "Lustre: Routing socket NAL unloaded (final mem %d)\n",
- atomic_read(&portal_kmemory));
-}
-
-int __init
-ktoenal_module_init (void)
-{
- int pkmem = atomic_read(&portal_kmemory);
- int rc;
- int i;
- int j;
-
- /* packet descriptor must fit in a router descriptor's scratchpad */
- LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
-
- LASSERT (ktoenal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
-
- ktoenal_api.forward = ktoenal_api_forward;
- ktoenal_api.shutdown = ktoenal_api_shutdown;
- ktoenal_api.yield = ktoenal_api_yield;
- ktoenal_api.validate = NULL; /* our api validate is a NOOP */
- ktoenal_api.lock = ktoenal_api_lock;
- ktoenal_api.unlock = ktoenal_api_unlock;
- ktoenal_api.nal_data = &ktoenal_data;
-
- ktoenal_lib.nal_data = &ktoenal_data;
-
- memset (&ktoenal_data, 0, sizeof (ktoenal_data)); /* zero pointers */
-
- INIT_LIST_HEAD(&ktoenal_data.ksnd_socklist);
- rwlock_init(&ktoenal_data.ksnd_socklist_lock);
-
- ktoenal_data.ksnd_nal_cb = &ktoenal_lib;
- spin_lock_init (&ktoenal_data.ksnd_nal_cb_lock);
-
- spin_lock_init (&ktoenal_data.ksnd_sched_lock);
-
- init_waitqueue_head (&ktoenal_data.ksnd_sched_waitq);
-
- INIT_LIST_HEAD (&ktoenal_data.ksnd_rx_conns);
- INIT_LIST_HEAD (&ktoenal_data.ksnd_tx_conns);
-
- INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_idle_fmbs);
- INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns);
- INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_idle_fmbs);
- INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns);
-
- INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_nblk_ltx_list);
- INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_ltx_list);
- init_waitqueue_head(&ktoenal_data.ksnd_idle_ltx_waitq);
-
- INIT_LIST_HEAD (&ktoenal_data.ksnd_reaper_list);
- init_waitqueue_head(&ktoenal_data.ksnd_reaper_waitq);
- spin_lock_init (&ktoenal_data.ksnd_reaper_lock);
-
- ktoenal_data.ksnd_init = SOCKNAL_INIT_DATA; /* flag lists/ptrs/locks initialised */
-
- PORTAL_ALLOC(ktoenal_data.ksnd_fmbs,
- sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
- if (ktoenal_data.ksnd_fmbs == NULL)
- RETURN(-ENOMEM);
-
- /* NULL out buffer pointers etc */
- memset(ktoenal_data.ksnd_fmbs, 0,
- sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS));
-
- for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++)
- {
- ksock_fmb_t *fmb = &((ksock_fmb_t *)ktoenal_data.ksnd_fmbs)[i];
-
- if (i < SOCKNAL_SMALL_FWD_NMSGS)
- {
- fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES;
- fmb->fmb_pool = &ktoenal_data.ksnd_small_fmp;
- }
- else
- {
- fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES;
- fmb->fmb_pool = &ktoenal_data.ksnd_large_fmp;
- }
-
- LASSERT (fmb->fmb_npages > 0);
- for (j = 0; j < fmb->fmb_npages; j++)
- {
- fmb->fmb_pages[j] = alloc_page(GFP_KERNEL);
-
- if (fmb->fmb_pages[j] == NULL)
- {
- ktoenal_module_fini ();
- return (-ENOMEM);
- }
-
- LASSERT (page_address (fmb->fmb_pages[j]) != NULL);
- }
-
- list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
- }
-
- PORTAL_ALLOC(ktoenal_data.ksnd_ltxs,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
- if (ktoenal_data.ksnd_ltxs == NULL)
- {
- ktoenal_module_fini ();
- return (-ENOMEM);
- }
-
- /* Deterministic bugs please */
- memset (ktoenal_data.ksnd_ltxs, 0xeb,
- sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS));
-
- for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++)
- {
- ksock_ltx_t *ltx = &((ksock_ltx_t *)ktoenal_data.ksnd_ltxs)[i];
-
- ltx->ltx_idle = i < SOCKNAL_NLTXS ?
- &ktoenal_data.ksnd_idle_ltx_list :
- &ktoenal_data.ksnd_idle_nblk_ltx_list;
- list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
- }
-
- rc = PtlNIInit(ktoenal_init, 32, 4, 0, &ktoenal_ni);
- if (rc != 0)
- {
- CERROR("ktoenal: PtlNIInit failed: error %d\n", rc);
- ktoenal_module_fini ();
- RETURN (rc);
- }
- PtlNIDebug(ktoenal_ni, ~0);
-
- ktoenal_data.ksnd_init = SOCKNAL_INIT_PTL; /* flag PtlNIInit() called */
-
- ktoenal_data.ksnd_slistchange = 1;
- for (i = 0; i < TOENAL_N_SCHED; i++)
- {
- rc = ktoenal_thread_start (ktoenal_scheduler, NULL);
- if (rc != 0)
- {
- CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc);
- ktoenal_module_fini ();
- RETURN (rc);
- }
- }
-
- rc = ktoenal_thread_start (ktoenal_reaper, NULL);
- if (rc != 0)
- {
- CERROR("Can't spawn socknal reaper: %d\n", rc);
- ktoenal_module_fini ();
- RETURN (rc);
- }
-
- rc = ktoenal_thread_start (ktoenal_pollthread, NULL);
- if (rc != 0)
- {
- CERROR("Can't spawn socknal pollthread: %d\n", rc);
- ktoenal_module_fini ();
- RETURN (rc);
- }
-
- rc = kpr_register(&ktoenal_data.ksnd_router,
- &ktoenal_router_interface);
- if (rc != 0)
- CDEBUG (D_NET, "Can't initialise routing interface (rc = %d): not routing\n", rc);
-
- rc = kportal_nal_register(TOENAL, &ktoenal_cmd, NULL);
- if (rc != 0)
- CDEBUG(D_NET, "Can't initialise command interface (rc = %d)\n",
- rc);
-
- PORTAL_SYMBOL_REGISTER(ktoenal_ni);
-
- /* flag everything initialised */
- ktoenal_data.ksnd_init = SOCKNAL_INIT_ALL;
-
- printk(KERN_INFO "Lustre: Routing TOE NAL loaded (Routing %s, initial mem %d)\n",
- kpr_routing(&ktoenal_data.ksnd_router) ? "enabled" : "disabled",
- pkmem);
-
- return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
-MODULE_LICENSE("GPL");
-
-module_init(ktoenal_module_init);
-module_exit(ktoenal_module_fini);
-
-EXPORT_SYMBOL (ktoenal_ni);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Kedar Sovani <kedar@calsoftinc.com>
- * Author: Amey Inamdar <amey@calsoftinc.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define DEBUG_PORTAL_ALLOC
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <net/tcp.h>
-#include <linux/uio.h>
-#include <linux/sched.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-
-#define DEBUG_SUBSYSTEM S_SOCKNAL
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-
-#define SOCKNAL_NLTXS 128 /* # normal transmit messages */
-#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */
-
-#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
-#define SOCKNAL_LARGE_FWD_NMSGS 32 /* # large messages I can be forwarding at any time */
-
-#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
-
-#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + PTL_MTU) >> PAGE_SHIFT)
- /* # pages in a large message fwd buffer */
-
-#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
-
-#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10)
-
-#define TOENAL_N_SCHED 1
-
-typedef struct /* pool of forwarding buffers */
-{
- struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */
- struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
-} ksock_fmb_pool_t;
-
-typedef struct {
- int ksnd_init; /* initialisation state */
-
- struct list_head ksnd_socklist; /* all my connections */
- rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */
-
-
- ptl_nid_t ksnd_mynid;
- nal_cb_t *ksnd_nal_cb;
- spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
-
- atomic_t ksnd_nthreads; /* # live threads */
- int ksnd_shuttingdown; /* tell threads to exit */
-
- kpr_router_t ksnd_router; /* THE router */
-
- spinlock_t ksnd_sched_lock; /* serialise packet scheduling */
- wait_queue_head_t ksnd_sched_waitq; /* where scheduler(s) wait */
-
- struct list_head ksnd_rx_conns; /* conn waiting to be read */
- struct list_head ksnd_tx_conns; /* conn waiting to be written */
-
- void *ksnd_fmbs; /* all the pre-allocated FMBs */
- ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
- ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
-
- void *ksnd_ltxs; /* all the pre-allocated LTXs */
- struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */
- struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */
- wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */
-
- struct list_head ksnd_reaper_list; /* conn waiting to be reaped */
- wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
- spinlock_t ksnd_reaper_lock; /* serialise */
-
- struct task_struct *ksnd_pollthread_tsk;/* task_struct for the poll thread */
- poll_table ksnd_pwait; /* poll wait table for the socket */
- int ksnd_slistchange; /* informs the pollthread that
- * the socklist has changed */
-} ksock_nal_data_t;
-
-#define SOCKNAL_INIT_NOTHING 0
-#define SOCKNAL_INIT_DATA 1
-#define SOCKNAL_INIT_PTL 2
-#define SOCKNAL_INIT_ALL 3
-
-typedef struct /* transmit packet */
-{
- struct list_head tx_list; /* queue on conn for transmission etc */
- char tx_isfwd; /* forwarding / sourced here */
- int tx_nob; /* # packet bytes */
- int tx_niov; /* # packet frags */
- struct iovec *tx_iov; /* packet frags */
-} ksock_tx_t;
-
-typedef struct /* locally transmitted packet */
-{
- ksock_tx_t ltx_tx; /* send info */
- struct list_head *ltx_idle; /* where to put when idle */
- void *ltx_private; /* lib_finalize() callback arg */
- void *ltx_cookie; /* lib_finalize() callback arg */
- struct iovec ltx_iov[1 + PTL_MD_MAX_IOV]; /* msg frags */
- ptl_hdr_t ltx_hdr; /* buffer for packet header */
-} ksock_ltx_t;
-
-#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry (ptr, kpr_fwd_desc_t, kprfd_scratch)
-/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */
-
-#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx)
-/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */
-
-/* NB list_entry() is used here as convenient macro for calculating a
- * pointer to a struct from the addres of a member.
- */
-
-typedef struct /* Kernel portals Socket Forwarding message buffer */
-{ /* (socknal->router) */
- struct list_head fmb_list; /* queue idle */
- kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
- int fmb_npages; /* # pages allocated */
- ksock_fmb_pool_t *fmb_pool; /* owning pool */
- struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES];
- struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES];
-} ksock_fmb_t;
-
-#define SOCKNAL_RX_HEADER 1 /* reading header */
-#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */
-#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */
-#define SOCKNAL_RX_SLOP 4 /* skipping body */
-#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */
-#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */
-
-typedef struct
-{
- struct list_head ksnc_list; /* stash on global socket list */
- struct file *ksnc_file; /* socket filp */
- struct socket *ksnc_sock; /* socket */
- ptl_nid_t ksnc_peernid; /* who's on the other end */
- atomic_t ksnc_refcount; /* # users */
-
- /* READER */
- struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
- unsigned long ksnc_rx_ready; /* data ready to read */
- int ksnc_rx_scheduled; /* being progressed */
- int ksnc_rx_state; /* what is being read */
- int ksnc_rx_nob_left; /* # bytes to next hdr/body */
- int ksnc_rx_nob_wanted; /* bytes actually wanted */
- int ksnc_rx_niov; /* # frags */
- struct iovec ksnc_rx_iov[1 + PTL_MD_MAX_IOV]; /* the frags */
-
- void *ksnc_cookie; /* rx lib_finalize passthru arg */
- ptl_hdr_t ksnc_hdr; /* where I read headers into */
-
- /* WRITER */
- struct list_head ksnc_tx_list; /* where I enq waiting for output space */
- struct list_head ksnc_tx_queue; /* packets waiting to be sent */
- unsigned long ksnc_tx_ready; /* write space */
- int ksnc_tx_scheduled; /* being progressed */
-
-} ksock_conn_t;
-
-extern int ktoenal_add_sock (ptl_nid_t nid, int fd);
-extern int ktoenal_close_sock(ptl_nid_t nid);
-extern int ktoenal_set_mynid(ptl_nid_t nid);
-extern int ktoenal_push_sock(ptl_nid_t nid);
-extern ksock_conn_t *ktoenal_get_conn (ptl_nid_t nid);
-extern void _ktoenal_put_conn (ksock_conn_t *conn);
-extern void ktoenal_close_conn (ksock_conn_t *conn);
-
-static inline void
-ktoenal_put_conn (ksock_conn_t *conn)
-{
- CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n",
- conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount));
-
- if (atomic_dec_and_test (&conn->ksnc_refcount))
- _ktoenal_put_conn (conn);
-}
-
-extern int ktoenal_thread_start (int (*fn)(void *arg), void *arg);
-extern int ktoenal_new_packet (ksock_conn_t *conn, int skip);
-extern void ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern int ktoenal_scheduler (void *arg);
-extern int ktoenal_reaper (void *arg);
-extern int ktoenal_pollthread (void *arg);
-extern void ktoenal_data_ready(ksock_conn_t *conn);
-extern void ktoenal_write_space(ksock_conn_t *conn);
-
-
-extern nal_cb_t ktoenal_lib;
-extern ksock_nal_data_t ktoenal_data;
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- * Author: Zach Brown <zab@zabbo.net>
- * Author: Peter J. Braam <braam@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Kedar Sovani <kedar@calsoftinc.com>
- * Author: Amey Inamdar <amey@calsoftinc.com>
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <linux/poll.h>
-#include "toenal.h"
-
-atomic_t ktoenal_packets_received;
-long ktoenal_packets_launched;
-long ktoenal_packets_transmitted;
-
-/*
- * LIB functions follow
- *
- */
-ptl_err_t
-ktoenal_read(nal_cb_t *nal, void *private, void *dst_addr,
- user_ptr src_addr, size_t len)
-{
- CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
-}
-
-ptl_err_t
-ktoenal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
- void *src_addr, size_t len)
-{
- CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
-}
-
-void *
-ktoenal_malloc(nal_cb_t *nal, size_t len)
-{
- void *buf;
-
- PORTAL_ALLOC(buf, len);
-
- if (buf != NULL)
- memset(buf, 0, len);
-
- return (buf);
-}
-
-void
-ktoenal_free(nal_cb_t *nal, void *buf, size_t len)
-{
- PORTAL_FREE(buf, len);
-}
-
-void
-ktoenal_printf(nal_cb_t *nal, const char *fmt, ...)
-{
- va_list ap;
- char msg[256];
-
- va_start (ap, fmt);
- vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
- va_end (ap);
-
- msg[sizeof (msg) - 1] = 0; /* ensure terminated */
-
- CDEBUG (D_NET, "%s", msg);
-}
-
-void
-ktoenal_cli(nal_cb_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *data = nal->nal_data;
-
- spin_lock(&data->ksnd_nal_cb_lock);
-}
-
-void
-ktoenal_sti(nal_cb_t *nal, unsigned long *flags)
-{
- ksock_nal_data_t *data;
- data = nal->nal_data;
-
- spin_unlock(&data->ksnd_nal_cb_lock);
-}
-
-int
-ktoenal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
-{
- /* I would guess that if ktoenal_get_conn(nid) == NULL,
- and we're not routing, then 'nid' is very distant :) */
- if ( nal->ni.nid == nid ) {
- *dist = 0;
- } else {
- *dist = 1;
- }
-
- return 0;
-}
-
-ksock_ltx_t *
-ktoenal_get_ltx (int may_block)
-{
- unsigned long flags;
- ksock_ltx_t *ltx = NULL;
-
- for (;;)
- {
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- if (!list_empty (&ktoenal_data.ksnd_idle_ltx_list))
- {
- ltx = list_entry (ktoenal_data.ksnd_idle_ltx_list.next, ksock_ltx_t, ltx_tx.tx_list);
- list_del (<x->ltx_tx.tx_list);
- break;
- }
-
- if (!may_block)
- {
- if (!list_empty (&ktoenal_data.ksnd_idle_nblk_ltx_list))
- {
- ltx = list_entry (ktoenal_data.ksnd_idle_nblk_ltx_list.next,
- ksock_ltx_t, ltx_tx.tx_list);
- list_del (<x->ltx_tx.tx_list);
- }
- break;
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-
- wait_event (ktoenal_data.ksnd_idle_ltx_waitq,
- !list_empty (&ktoenal_data.ksnd_idle_ltx_list));
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-
- return (ltx);
-}
-
-int
-ktoenal_sendmsg (struct file *sock, struct iovec *iov, int niov, int nob, int flags)
-{
- /* NB This procedure "consumes" iov (actually we do, tcp_sendmsg doesn't)
- */
- mm_segment_t oldmm;
- int rc;
-
- LASSERT (niov > 0);
- LASSERT (nob > 0);
-
- oldmm = get_fs();
- set_fs (KERNEL_DS);
-
-#ifdef PORTAL_DEBUG
- {
- int total_nob;
- int i;
-
- for (i = total_nob = 0; i < niov; i++)
- total_nob += iov[i].iov_len;
-
- LASSERT (nob == total_nob);
- }
-#endif
- LASSERT (!in_interrupt());
-
- rc = sock->f_op->writev(sock, iov, niov, NULL);
-
- set_fs (oldmm);
-
- if (rc > 0) /* sent something? */
- {
- nob = rc; /* consume iov */
- for (;;)
- {
- LASSERT (niov > 0);
-
- if (iov->iov_len >= nob)
- {
- iov->iov_len -= nob;
- iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
- break;
- }
- nob -= iov->iov_len;
- iov->iov_len = 0;
- iov++;
- niov--;
- }
- }
-
- return (rc);
-}
-
-int
-ktoenal_recvmsg(struct file *sock, struct iovec *iov, int niov, int toread)
-{
- /* NB This procedure "consumes" iov (actually tcp_recvmsg does)
- */
- mm_segment_t oldmm;
- int ret, i, len = 0, origlen = 0;
-
- PROF_START(our_recvmsg);
- for(i = 0; i < niov; i++) {
- len += iov[i].iov_len;
- if(len >= toread)
- break;
- }
-
- if(len >= toread) {
- origlen = iov[i].iov_len;
- iov[i].iov_len -= (len - toread);
- }
- else { /* i == niov */
- i = niov - 1;
- }
-
- oldmm = get_fs();
- set_fs(KERNEL_DS);
-
- ret = sock->f_op->readv(sock, iov, i + 1, NULL);
-
- set_fs(oldmm);
-
- if(origlen)
- iov[i].iov_len = origlen;
-
- PROF_FINISH(our_recvmsg);
- return ret;
-}
-
-void
-ktoenal_process_transmit (ksock_conn_t *conn, unsigned long *irq_flags)
-{
- ksock_tx_t *tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list);
- int rc;
-
- LASSERT (conn->ksnc_tx_scheduled);
- LASSERT (conn->ksnc_tx_ready);
- LASSERT (!list_empty (&conn->ksnc_tx_queue));
-
- /* assume transmit will complete now, so dequeue while I've got the lock */
- list_del (&tx->tx_list);
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- LASSERT (tx->tx_nob > 0);
-
- conn->ksnc_tx_ready = 0; /* write_space may race with me and set ready */
- mb(); /* => clear BEFORE trying to write */
-
- rc = ktoenal_sendmsg (conn->ksnc_file,
- tx->tx_iov, tx->tx_niov, tx->tx_nob,
- list_empty (&conn->ksnc_tx_queue) ?
- MSG_DONTWAIT : (MSG_DONTWAIT | MSG_MORE));
-
- CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc);
-
- if (rc < 0) /* error */
- {
- if (rc == -EAGAIN) /* socket full => */
- rc = 0; /* nothing sent */
- else
- {
- //warning FIXME: handle socket errors properly
- CERROR ("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc);
- rc = tx->tx_nob; /* kid on for now whole packet went */
- }
- }
-
- if (rc == tx->tx_nob) /* everything went */
- {
- conn->ksnc_tx_ready = 1; /* assume more can go (ASAP) */
- ktoenal_put_conn (conn); /* release packet's ref */
-
- if (tx->tx_isfwd) /* was a forwarded packet? */
- {
- kpr_fwd_done (&ktoenal_data.ksnd_router,
- KSOCK_TX_2_KPR_FWD_DESC (tx), 0);
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
- }
- else /* local send */
- {
- ksock_ltx_t *ltx = KSOCK_TX_2_KSOCK_LTX (tx);
-
- lib_finalize (&ktoenal_lib, ltx->ltx_private,
- ltx->ltx_cookie, PTL_OK);
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- list_add (<x->ltx_tx.tx_list, ltx->ltx_idle);
-
- /* normal tx desc => wakeup anyone blocking for one */
- if (ltx->ltx_idle == &ktoenal_data.ksnd_idle_ltx_list &&
- waitqueue_active (&ktoenal_data.ksnd_idle_ltx_waitq))
- wake_up (&ktoenal_data.ksnd_idle_ltx_waitq);
- }
- ktoenal_packets_transmitted++;
- }
- else
- {
- tx->tx_nob -= rc;
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- /* back onto HEAD of tx_queue */
- list_add (&tx->tx_list, &conn->ksnc_tx_queue);
- }
-
- if (!conn->ksnc_tx_ready || /* no space to write now */
- list_empty (&conn->ksnc_tx_queue)) /* nothing to write */
- {
- conn->ksnc_tx_scheduled = 0; /* not being scheduled */
- ktoenal_put_conn (conn); /* release scheduler's ref */
- }
- else /* let scheduler call me again */
- list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns);
-}
-
-void
-ktoenal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx)
-{
- unsigned long flags;
- int nob = tx->tx_nob;
- struct iovec *iov = tx->tx_iov;
- int niov = 1;
-
- LASSERT (nob >= sizeof (ptl_hdr_t));
-
- /* Truncate iov to exactly match total packet length
- * since socket sendmsg pays no attention to requested length.
- */
- for (;;)
- {
- LASSERT (niov <= tx->tx_niov);
-
- if (iov->iov_len >= nob)
- {
- iov->iov_len = nob;
- break;
- }
- nob -= iov->iov_len;
- iov++;
- niov++;
- }
- tx->tx_niov = niov;
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
-
- if (conn->ksnc_tx_ready && /* able to send */
- !conn->ksnc_tx_scheduled) /* not scheduled to send */
- {
- list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */
- if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
- wake_up (&ktoenal_data.ksnd_sched_waitq);
- }
-
- ktoenal_packets_launched++;
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-}
-
-ptl_err_t
-ktoenal_send(nal_cb_t *nal, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int payload_niov, struct iovec *payload_iov,
- size_t payload_off, size_t payload_len)
-{
- ptl_nid_t gatewaynid;
- ksock_conn_t *conn;
- ksock_ltx_t *ltx;
- int rc;
- int i;
-
- /* By this point, as it happens, we have absolutely no idea what
- * 'private' is. It might be ksock_nal_data or it might be ksock_conn.
- * Ha ha, isn't that a funny joke?
- *
- * FIXME: this is not the right way to fix this; the right way is to
- * always pass in the same kind of structure. This is hard right now.
- * To revisit this issue, set a breakpoint in here and watch for when
- * it's called from lib_finalize. I think this occurs when we send a
- * packet as a side-effect of another packet, such as when an ACK has
- * been requested. -phil */
-
- CDEBUG(D_NET, "sending %d bytes from [%d](%p,%d)... to nid: "
- LPX64" pid %d\n", (int)payload_len, payload_niov,
- payload_niov > 0 ? payload_iov[0].iov_base : NULL,
- (int)(payload_niov > 0 ? payload_iov[0].iov_len : 0), nid, pid);
-
- /* XXX not implemented read-only iov with offset */
- LBUG();
-
- if ((conn = ktoenal_get_conn (nid)) == NULL)
- {
- /* It's not a peer; try to find a gateway */
- rc = kpr_lookup (&ktoenal_data.ksnd_router, nid, payload_niov,
- &gatewaynid);
- if (rc != 0)
- {
- CERROR ("Can't route to "LPX64": router error %d\n", nid, rc);
- return (PTL_FAIL);
- }
-
- if ((conn = ktoenal_get_conn (gatewaynid)) == NULL)
- {
- CERROR ("Can't route to "LPX64": gateway "LPX64" is not a peer\n",
- nid, gatewaynid);
- return (PTL_FAIL);
- }
- }
-
- /* This transmit has now got a ref on conn */
-
- /* I may not block for a transmit descriptor if I might block the
- * receiver, or an interrupt handler. */
- ltx = ktoenal_get_ltx (!(type == PTL_MSG_ACK ||
- type == PTL_MSG_REPLY ||
- in_interrupt ()));
- if (ltx == NULL)
- {
- CERROR ("Can't allocate tx desc\n");
- ktoenal_put_conn (conn);
- return (PTL_FAIL);
- }
-
- /* Init common (to sends and forwards) packet part */
- ltx->ltx_tx.tx_isfwd = 0;
- ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len;
- ltx->ltx_tx.tx_niov = 1 + payload_niov;
- ltx->ltx_tx.tx_iov = ltx->ltx_iov;
-
- /* Init local send packet (storage for hdr, finalize() args, iov) */
- ltx->ltx_hdr = *hdr;
- ltx->ltx_private = private;
- ltx->ltx_cookie = cookie;
-
- ltx->ltx_iov[0].iov_base = <x->ltx_hdr;
- ltx->ltx_iov[0].iov_len = sizeof (ltx->ltx_hdr);
-
- LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-
- for (i = 0; i < payload_niov; i++)
- {
- ltx->ltx_iov[1 + i].iov_base = payload_iov[i].iov_base;
- ltx->ltx_iov[1 + i].iov_len = payload_iov[i].iov_len;
- }
-
- ktoenal_launch_packet (conn, <x->ltx_tx);
- return (PTL_OK);
-}
-
-void
-ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
-{
- ksock_conn_t *conn;
- ptl_nid_t nid = fwd->kprfd_gateway_nid;
- ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch;
-
- CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
- fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
-
- if (nid == ktoenal_lib.ni.nid) /* I'm the gateway; must be the last hop */
- nid = fwd->kprfd_target_nid;
-
- conn = ktoenal_get_conn (nid);
- if (conn == NULL)
- {
- CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid);
- kpr_fwd_done (&ktoenal_data.ksnd_router, fwd, -EHOSTUNREACH);
- return;
- }
-
- /* This forward has now got a ref on conn */
-
- tx->tx_isfwd = 1; /* This is a forwarding packet */
- tx->tx_nob = fwd->kprfd_nob;
- tx->tx_niov = fwd->kprfd_niov;
- tx->tx_iov = fwd->kprfd_iov;
-
- ktoenal_launch_packet (conn, tx);
-}
-
-int
-ktoenal_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- atomic_inc (&ktoenal_data.ksnd_nthreads);
- return (0);
-}
-
-void
-ktoenal_thread_fini (void)
-{
- atomic_dec (&ktoenal_data.ksnd_nthreads);
-}
-
-void
-ktoenal_fmb_callback (void *arg, int error)
-{
- ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
- ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]);
- ksock_conn_t *conn;
- unsigned long flags;
-
- CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": %d\n",
- hdr->src_nid, hdr->dest_nid, error);
-
- if (error != 0)
- CERROR ("Failed to route packet from "LPX64" to "LPX64": %d\n",
- hdr->src_nid, hdr->dest_nid, error);
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs);
-
- if (!list_empty (&fmb->fmb_pool->fmp_blocked_conns))
- {
- conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, ksock_conn_t, ksnc_rx_list);
- list_del (&conn->ksnc_rx_list);
-
- CDEBUG (D_NET, "Scheduling conn %p\n", conn);
- LASSERT (conn->ksnc_rx_scheduled);
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP);
-
- conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;
- list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns);
-
- if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
- wake_up (&ktoenal_data.ksnd_sched_waitq);
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-}
-
-ksock_fmb_t *
-ktoenal_get_idle_fmb (ksock_conn_t *conn)
-{
- /* NB called with sched lock held */
- int payload_nob = conn->ksnc_rx_nob_left;
- int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
- ksock_fmb_pool_t *pool;
- ksock_fmb_t *fmb;
-
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
-
- if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
- pool = &ktoenal_data.ksnd_small_fmp;
- else
- pool = &ktoenal_data.ksnd_large_fmp;
-
- if (!list_empty (&pool->fmp_idle_fmbs))
- {
- fmb = list_entry (pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list);
- list_del (&fmb->fmb_list);
- return (fmb);
- }
-
- /* deschedule until fmb free */
-
- conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP;
-
- list_add_tail (&conn->ksnc_rx_list,
- &pool->fmp_blocked_conns);
- return (NULL);
-}
-
-
-int
-ktoenal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
-{
- int payload_nob = conn->ksnc_rx_nob_left;
- int packet_nob = sizeof (ptl_hdr_t) + payload_nob;
- int niov; /* at least the header */
- int nob;
-
- LASSERT (conn->ksnc_rx_scheduled);
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
- LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
- LASSERT (payload_nob >= 0);
- LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE);
- LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
-
- /* Got a forwarding buffer; copy the header we just read into the
- * forwarding buffer. If there's payload start reading reading it
- * into the buffer, otherwise the forwarding buffer can be kicked
- * off immediately.
- *
- * NB fmb->fmb_iov spans the WHOLE packet.
- * conn->ksnc_rx_iov spans just the payload.
- */
-
- fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]);
-
- memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t)); /* copy header */
-
- if (payload_nob == 0) /* got complete packet already */
- {
- atomic_inc (&ktoenal_packets_received);
-
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n", conn,
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, packet_nob);
-
- fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t);
-
- kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid,
- packet_nob, 1, fmb->fmb_iov,
- ktoenal_fmb_callback, fmb);
-
- kpr_fwd_start (&ktoenal_data.ksnd_router, &fmb->fmb_fwd); /* forward it now */
-
- ktoenal_new_packet (conn, 0); /* on to next packet */
- return (1);
- }
-
- niov = 1;
- if (packet_nob <= PAGE_SIZE) /* whole packet fits in first page */
- fmb->fmb_iov[0].iov_len = packet_nob;
- else
- {
- fmb->fmb_iov[0].iov_len = PAGE_SIZE;
- nob = packet_nob - PAGE_SIZE;
-
- do
- {
- LASSERT (niov < fmb->fmb_npages);
- fmb->fmb_iov[niov].iov_base = page_address (fmb->fmb_pages[niov]);
- fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob);
- nob -= PAGE_SIZE;
- niov++;
- } while (nob > 0);
- }
-
- kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid,
- packet_nob, niov, fmb->fmb_iov,
- ktoenal_fmb_callback, fmb);
-
- /* stash router's descriptor ready for call to kpr_fwd_start */
- conn->ksnc_cookie = &fmb->fmb_fwd;
-
- conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
-
- /* payload is desc's iov-ed buffer, but skipping the hdr */
- LASSERT (niov <= sizeof (conn->ksnc_rx_iov) / sizeof (conn->ksnc_rx_iov[0]));
-
- conn->ksnc_rx_iov[0].iov_base = (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) + sizeof (ptl_hdr_t));
- conn->ksnc_rx_iov[0].iov_len = fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t);
-
- if (niov > 1)
- memcpy (&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1], (niov - 1) * sizeof (struct iovec));
-
- conn->ksnc_rx_niov = niov;
-
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, payload_nob);
- return (0);
-}
-
-void
-ktoenal_fwd_parse (ksock_conn_t *conn)
-{
- ksock_conn_t *conn2;
- int body_len;
-
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn,
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left);
-
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER);
- LASSERT (conn->ksnc_rx_scheduled);
-
- body_len = conn->ksnc_hdr.payload_length;
-
- if (body_len < 0) /* length corrupt */
- {
- CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d illegal\n",
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len);
- ktoenal_new_packet (conn, 0); /* on to new packet */
- return;
- }
-
- if (body_len > PTL_MTU) /* too big to forward */
- {
- CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d too big\n",
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len);
- ktoenal_new_packet (conn, body_len); /* on to new packet (skip this one's body) */
- return;
- }
-
- conn2 = ktoenal_get_conn (conn->ksnc_hdr.dest_nid); /* should have gone direct */
- if (conn2 != NULL)
- {
- CERROR ("dropping packet from "LPX64" for "LPX64": target is a peer\n",
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid);
- ktoenal_put_conn (conn2); /* drop ref from get above */
-
- ktoenal_new_packet (conn, body_len); /* on to next packet (skip this one's body) */
- return;
- }
-
- conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */
- conn->ksnc_rx_nob_left = body_len; /* stash packet size */
- conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */
-}
-
-int
-ktoenal_new_packet (ksock_conn_t *conn, int nob_to_skip)
-{
- static char ktoenal_slop_buffer[4096];
-
- int nob;
- int niov;
- int skipped;
-
- if (nob_to_skip == 0) /* right at next packet boundary now */
- {
- conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
- conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
- conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
-
- conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
- conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t);
- conn->ksnc_rx_niov = 1;
- return (1);
- }
-
- /* set up to skip as much a possible now */
- /* if there's more left (ran out of iov entries) we'll get called again */
-
- conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
- conn->ksnc_rx_nob_left = nob_to_skip;
- skipped = 0;
- niov = 0;
-
- do
- {
- nob = MIN (nob_to_skip, sizeof (ktoenal_slop_buffer));
-
- conn->ksnc_rx_iov[niov].iov_base = ktoenal_slop_buffer;
- conn->ksnc_rx_iov[niov].iov_len = nob;
- niov++;
- skipped += nob;
- nob_to_skip -=nob;
-
- } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
- niov < sizeof (conn->ksnc_rx_iov)/sizeof (conn->ksnc_rx_iov[0]));
-
- conn->ksnc_rx_niov = niov;
- conn->ksnc_rx_nob_wanted = skipped;
- return (0);
-}
-
-void
-ktoenal_process_receive (ksock_conn_t *conn, unsigned long *irq_flags)
-{
- ksock_fmb_t *fmb;
- int len;
- LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
- LASSERT (conn->ksnc_rx_scheduled);
- LASSERT (conn->ksnc_rx_ready);
-
- /* NB: sched lock held */
- CDEBUG(D_NET, "conn %p\n", conn);
-
- if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) /* doesn't need a forwarding buffer */
- {
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
- goto try_read;
- }
-
- get_fmb:
- /* NB: sched lock held */
- fmb = ktoenal_get_idle_fmb (conn);
- if (fmb == NULL) /* conn descheduled waiting for idle fmb */
- return;
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- if (ktoenal_init_fmb (conn, fmb)) /* packet forwarded ? */
- goto out; /* come back later for next packet */
-
- try_read:
- /* NB: sched lock NOT held */
- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
- conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
- conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD ||
- conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
-
- LASSERT (conn->ksnc_rx_niov > 0);
- LASSERT (conn->ksnc_rx_nob_wanted > 0);
-
- conn->ksnc_rx_ready = 0; /* data ready may race with me and set ready */
- mb(); /* => clear BEFORE trying to read */
-
- /* NB ktoenal_recvmsg "consumes" the iov passed to it */
- len = ktoenal_recvmsg(conn->ksnc_file,
- conn->ksnc_rx_iov, conn->ksnc_rx_niov,
- conn->ksnc_rx_nob_wanted);
- CDEBUG (D_NET, "%p read(%d) %d\n", conn, conn->ksnc_rx_nob_wanted, len);
-
- if (len <= 0) /* nothing ready (EAGAIN) or EOF or error */
- {
- if (len != -EAGAIN && /* ! nothing to read now */
- len != 0) /* ! nothing to read ever */
- {
- // warning FIXME: handle socket errors properly
- CERROR ("Error socknal read(%d) %p: %d\n",
- conn->ksnc_rx_nob_wanted, conn, len);
- }
- goto out; /* come back when there's data ready */
- }
-
- LASSERT (len <= conn->ksnc_rx_nob_wanted);
- conn->ksnc_rx_nob_wanted -= len;
- conn->ksnc_rx_nob_left -= len;
-
- if (conn->ksnc_rx_nob_wanted != 0) /* short read */
- goto out; /* try again later */
-
- conn->ksnc_rx_ready = 1; /* assume there's more to be had */
-
- switch (conn->ksnc_rx_state)
- {
- case SOCKNAL_RX_HEADER:
- if (conn->ksnc_hdr.dest_nid != ktoenal_lib.ni.nid) /* It's not for me */
- {
- ktoenal_fwd_parse (conn);
- switch (conn->ksnc_rx_state)
- {
- case SOCKNAL_RX_HEADER: /* skipped this packet (zero payload) */
- goto out; /* => come back later */
- case SOCKNAL_RX_SLOP: /* skipping this packet's body */
- goto try_read; /* => go read it */
- case SOCKNAL_RX_GET_FMB: /* forwarding */
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
- goto get_fmb; /* => go get a fwd msg buffer */
- default:
- break;
- }
- /* Not Reached */
- LBUG ();
- }
-
- PROF_START(lib_parse);
- lib_parse(&ktoenal_lib, &conn->ksnc_hdr, conn); /* sets wanted_len, iovs etc */
- PROF_FINISH(lib_parse);
-
- if (conn->ksnc_rx_nob_wanted != 0) /* need to get some payload? */
- {
- conn->ksnc_rx_state = SOCKNAL_RX_BODY;
- goto try_read; /* go read the payload */
- }
- /* Fall through (completed packet for me) */
-
- case SOCKNAL_RX_BODY:
- atomic_inc (&ktoenal_packets_received);
- lib_finalize(&ktoenal_lib, NULL, conn->ksnc_cookie, PTL_OK); /* packet is done now */
- /* Fall through */
-
- case SOCKNAL_RX_SLOP:
- if (ktoenal_new_packet (conn, conn->ksnc_rx_nob_left)) /* starting new packet? */
- goto out; /* come back later */
- goto try_read; /* try to finish reading slop now */
-
- case SOCKNAL_RX_BODY_FWD:
- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", conn,
- conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left);
-
- atomic_inc (&ktoenal_packets_received);
-
- /* ktoenal_init_fmb() stashed router descriptor in conn->ksnc_cookie */
- kpr_fwd_start (&ktoenal_data.ksnd_router, (kpr_fwd_desc_t *)conn->ksnc_cookie);
-
- LASSERT (conn->ksnc_rx_nob_left == 0); /* no slop in forwarded packets */
-
- ktoenal_new_packet (conn, 0); /* on to next packet */
- goto out; /* (later) */
-
- default:
- break;
- }
-
- /* Not Reached */
- LBUG ();
-
- out:
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
-
- if (!conn->ksnc_rx_ready) /* no data there to read? */
- {
- conn->ksnc_rx_scheduled = 0; /* let socket callback schedule again */
- ktoenal_put_conn (conn); /* release scheduler's ref */
- }
- else /* let scheduler call me again */
- list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns);
-}
-
-ptl_err_t
-ktoenal_recv(nal_cb_t *nal, void *private, lib_msg_t *msg,
- unsigned int niov, struct iovec *iov,
- size_t offset, size_t mlen, size_t rlen)
-{
- ksock_conn_t *conn = (ksock_conn_t *)private;
- int i;
-
- /* XXX not implemented read-only iov with offset */
- LBUG();
-
- conn->ksnc_cookie = msg;
-
- LASSERT (niov <= PTL_MD_MAX_IOV);
- for (i = 0; i < niov; i++)
- {
- conn->ksnc_rx_iov[i].iov_len = iov[i].iov_len;
- conn->ksnc_rx_iov[i].iov_base = iov[i].iov_base;
- }
-
- conn->ksnc_rx_niov = niov;
- conn->ksnc_rx_nob_wanted = mlen;
- conn->ksnc_rx_nob_left = rlen;
-
- return (PTL_OK);
-}
-
-int
-ktoenal_scheduler (void *arg)
-{
- unsigned long flags;
- ksock_conn_t *conn;
- int rc;
- int nloops = 0;
-
- kportal_daemonize ("ktoenal_sched");
- kportal_blockallsigs ();
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- while (!ktoenal_data.ksnd_shuttingdown)
- {
- int did_something = 0;
-
- /* Ensure I progress everything semi-fairly */
-
- if (!list_empty (&ktoenal_data.ksnd_rx_conns))
- {
- did_something = 1;
- conn = list_entry (ktoenal_data.ksnd_rx_conns.next,
- ksock_conn_t, ksnc_rx_list);
- list_del (&conn->ksnc_rx_list);
-
- ktoenal_process_receive (conn, &flags); /* drops & regains ksnd_sched_lock */
- }
-
- if (!list_empty (&ktoenal_data.ksnd_tx_conns))
- {
- did_something = 1;
- conn = list_entry (ktoenal_data.ksnd_tx_conns.next,
- ksock_conn_t, ksnc_tx_list);
-
- list_del (&conn->ksnc_tx_list);
- ktoenal_process_transmit (conn, &flags); /* drops and regains ksnd_sched_lock */
- }
-
- if (!did_something || /* nothing to do */
- ++nloops == SOCKNAL_RESCHED) /* hogging CPU? */
- {
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
-
- nloops = 0;
-
- if (!did_something) { /* wait for something to do */
- rc = wait_event_interruptible (ktoenal_data.ksnd_sched_waitq,
- ktoenal_data.ksnd_shuttingdown ||
- !list_empty (&ktoenal_data.ksnd_rx_conns) ||
- !list_empty (&ktoenal_data.ksnd_tx_conns));
- LASSERT (rc == 0);
- } else
- our_cond_resched();
-
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
- }
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
- ktoenal_thread_fini ();
- return (0);
-}
-
-
-int
-ktoenal_reaper (void *arg)
-{
- unsigned long flags;
- ksock_conn_t *conn;
- int rc;
-
- kportal_daemonize ("ktoenal_reaper");
- kportal_blockallsigs ();
-
- while (!ktoenal_data.ksnd_shuttingdown)
- {
- spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags);
-
- if (list_empty (&ktoenal_data.ksnd_reaper_list))
- conn = NULL;
- else
- {
- conn = list_entry (ktoenal_data.ksnd_reaper_list.next,
- ksock_conn_t, ksnc_list);
- list_del (&conn->ksnc_list);
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags);
-
- if (conn != NULL)
- ktoenal_close_conn (conn);
- else {
- rc = wait_event_interruptible (ktoenal_data.ksnd_reaper_waitq,
- ktoenal_data.ksnd_shuttingdown ||
- !list_empty(&ktoenal_data.ksnd_reaper_list));
- LASSERT (rc == 0);
- }
- }
-
- ktoenal_thread_fini ();
- return (0);
-}
-
-#define POLLREAD (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)
-#define POLLWRITE (POLLOUT | POLLWRNORM | POLLWRBAND)
-
-int
-ktoenal_pollthread(void *arg)
-{
- unsigned int mask;
- struct list_head *tmp;
- ksock_conn_t *conn;
-
- /* Save the task struct for waking it up */
- ktoenal_data.ksnd_pollthread_tsk = current;
-
- kportal_daemonize ("ktoenal_pollthread");
- kportal_blockallsigs ();
-
- poll_initwait(&ktoenal_data.ksnd_pwait);
-
- while(!ktoenal_data.ksnd_shuttingdown) {
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- read_lock (&ktoenal_data.ksnd_socklist_lock);
- list_for_each(tmp, &ktoenal_data.ksnd_socklist) {
-
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
- atomic_inc(&conn->ksnc_refcount);
- read_unlock (&ktoenal_data.ksnd_socklist_lock);
-
- mask = conn->ksnc_file->f_op->poll(conn->ksnc_file,
- ktoenal_data.ksnd_slistchange ?
- &ktoenal_data.ksnd_pwait : NULL);
-
- if(mask & POLLREAD) {
- ktoenal_data_ready(conn);
-
- }
- if (mask & POLLWRITE) {
- ktoenal_write_space(conn);
-
- }
- if (mask & (POLLERR | POLLHUP)) {
- /* Do error processing */
- }
-
- read_lock (&ktoenal_data.ksnd_socklist_lock);
- if(atomic_dec_and_test(&conn->ksnc_refcount))
- _ktoenal_put_conn(conn);
- }
- ktoenal_data.ksnd_slistchange = 0;
- read_unlock (&ktoenal_data.ksnd_socklist_lock);
-
- schedule_timeout(MAX_SCHEDULE_TIMEOUT);
- if(ktoenal_data.ksnd_slistchange) {
- poll_freewait(&ktoenal_data.ksnd_pwait);
- poll_initwait(&ktoenal_data.ksnd_pwait);
- }
- }
- poll_freewait(&ktoenal_data.ksnd_pwait);
- ktoenal_thread_fini();
- return (0);
-}
-
-void
-ktoenal_data_ready (ksock_conn_t *conn)
-{
- unsigned long flags;
- ENTRY;
-
- if (!test_and_set_bit (0, &conn->ksnc_rx_ready)) {
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- if (!conn->ksnc_rx_scheduled) { /* not being progressed */
- list_add_tail (&conn->ksnc_rx_list,
- &ktoenal_data.ksnd_rx_conns);
- conn->ksnc_rx_scheduled = 1;
- /* extra ref for scheduler */
- atomic_inc (&conn->ksnc_refcount);
-
- /* This is done to avoid the effects of a sequence
- * of events in which the rx_ready is lost
- */
- conn->ksnc_rx_ready=1;
-
- if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
- wake_up (&ktoenal_data.ksnd_sched_waitq);
- }
-
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
- }
-
- EXIT;
-}
-
-void
-ktoenal_write_space (ksock_conn_t *conn)
-{
- unsigned long flags;
-
- CDEBUG (D_NET, "conn %p%s%s%s\n",
- conn,
- (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ? " ready" : " blocked"),
- (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? " scheduled" : " idle"),
- (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued"));
-
-
- if (!test_and_set_bit (0, &conn->ksnc_tx_ready)) {
- spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags);
-
- if (!list_empty (&conn->ksnc_tx_queue) && /* packets to send */
- !conn->ksnc_tx_scheduled) { /* not being progressed */
-
- list_add_tail (&conn->ksnc_tx_list,
- &ktoenal_data.ksnd_tx_conns);
- conn->ksnc_tx_scheduled = 1;
- /* extra ref for scheduler */
- atomic_inc (&conn->ksnc_refcount);
-
- if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq))
- wake_up (&ktoenal_data.ksnd_sched_waitq);
- }
- spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags);
- }
-}
-
-nal_cb_t ktoenal_lib = {
- nal_data: &ktoenal_data, /* NAL private data */
- cb_send: ktoenal_send,
- cb_recv: ktoenal_recv,
- cb_read: ktoenal_read,
- cb_write: ktoenal_write,
- cb_malloc: ktoenal_malloc,
- cb_free: ktoenal_free,
- cb_printf: ktoenal_printf,
- cb_cli: ktoenal_cli,
- cb_sti: ktoenal_sti,
- cb_dist: ktoenal_dist
-};
copied = 0;
list_for_each(pos, &my_pages) {
unsigned long to_copy;
- page = list_entry(pos, struct page, list);
+ void *addr;
+ page = list_entry(pos, struct page, list);
to_copy = min(total - off, PAGE_SIZE);
if (to_copy == 0) {
off = 0;
to_copy = min(debug_size - off, PAGE_SIZE);
}
finish_partial:
- memcpy(kmap(page), debug_buf + off, to_copy);
- kunmap(page);
+ addr = kmap_atomic(page, KM_USER0);
+ memcpy(addr, debug_buf + off, to_copy);
+ kunmap_atomic(addr, KM_USER0);
copied += to_copy;
if (copied >= total)
break;
case QSWNAL:
case GMNAL:
case IBNAL:
- case TOENAL:
case SCIMACNAL:
sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid);
break;
char *portals_debug_dumpstack(void)
{
- int size;
- unsigned long addr;
- char *buf = stack_backtrace;
- char *pbuf = buf;
- unsigned long *stack = (unsigned long *)&buf;
-
- size = sprintf(pbuf, " Call Trace: ");
- pbuf += size;
- while (((long) stack & (THREAD_SIZE-1)) != 0) {
- addr = *stack++;
- if (is_kernel_text_address(addr)) {
- size = sprintf(pbuf, "[<%08lx>] ", addr);
- pbuf += size;
- if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12)
- break;
- }
- }
-
- return buf;
+ asm("int $3");
+ return "dump stack";
}
#elif defined(__i386__)
return (PORTAL_SYMBOL_GET(kqswnal_ni));
case SOCKNAL:
return (PORTAL_SYMBOL_GET(ksocknal_ni));
- case TOENAL:
- return (PORTAL_SYMBOL_GET(ktoenal_ni));
case GMNAL:
return (PORTAL_SYMBOL_GET(kgmnal_ni));
case IBNAL:
case SOCKNAL:
PORTAL_SYMBOL_PUT(ksocknal_ni);
break;
- case TOENAL:
- PORTAL_SYMBOL_PUT(ktoenal_ni);
- break;
case GMNAL:
PORTAL_SYMBOL_PUT(kgmnal_ni);
break;
reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
+ /* NB call lib_send() _BEFORE_ lib_recv() completes the incoming
+ * message. Some NALs _require_ this to implement optimized GET */
+
rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY,
hdr->src_nid, hdr->src_pid, md, offset, mlength);
if (rc != PTL_OK)
MODULE_PARM(nal, "i");
MODULE_PARM_DESC(nal, "Use the specified NAL "
- "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)");
+ "(6-kscimacnal, 2-ksocknal, 1-kqswnal)");
MODULE_AUTHOR("Brian Behlendorf (LLNL)");
MODULE_DESCRIPTION("A kernel space ping server for portals testing");
MODULE_PARM(nal, "i");
MODULE_PARM_DESC(nal, "Use the specified NAL "
- "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)");
+ "(6-kscimacnal, 2-ksocknal, 1-kqswnal)");
MODULE_AUTHOR("Brian Behlendorf (LLNL)");
MODULE_DESCRIPTION("A kernel space ping server for portals testing");
fi
case "$1" in
- toe)
- /sbin/insmod ../oslib/portals.o
- /sbin/insmod ../toenal/ktoenal.o
- /sbin/insmod ./$PING
- echo ktoenal > /tmp/nal
- ;;
-
tcp)
/sbin/insmod ../oslib/portals.o
/sbin/insmod ../socknal/ksocknal.o
;;
*)
- echo "Usage : ${0} < tcp | toe | elan | gm>"
+ echo "Usage : ${0} < tcp | elan | gm>"
exit 1;
esac
exit 0;
fi
case "$1" in
- toe)
- /sbin/insmod ../oslib/portals.o
- /sbin/insmod ../toenal/ktoenal.o
- /sbin/insmod ./$PING nal=4
- echo ktoenal > /tmp/nal
- ;;
-
tcp)
/sbin/insmod ../oslib/portals.o
/sbin/insmod ../socknal/ksocknal.o
;;
*)
- echo "Usage : ${0} < tcp | toe | elan | gm>"
+ echo "Usage : ${0} < tcp | elan | gm>"
exit 1;
esac
../utils/acceptor 9999&
/* Assume sufficient socket buffering for this message */
rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
if (rc <= 0) {
- CERROR ("Error %d sending HELLO to %llx\n", rc, *nid);
+ CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
return (rc);
}
rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
if (rc <= 0) {
- CERROR ("Error %d reading HELLO from %llx\n", rc, *nid);
+ CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
return (rc);
}
if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) {
- CERROR ("Bad magic %#08x (%#08x expected) from %llx\n",
+ CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
__cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid);
return (-EPROTO);
}
if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
- " from %llx\n",
+ " from "LPX64"\n",
__le16_to_cpu (hmv->version_major),
__le16_to_cpu (hmv->version_minor),
PORTALS_PROTO_VERSION_MAJOR,
rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
if (rc <= 0) {
- CERROR ("Error %d reading rest of HELLO hdr from %llx\n",
+ CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
rc, *nid);
return (rc);
}
if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) ||
hdr.payload_length != __cpu_to_le32 (0)) {
CERROR ("Expecting a HELLO hdr with 0 payload,"
- " but got type %d with %d payload from %llx\n",
+ " but got type %d with %d payload from "LPX64"\n",
__le32_to_cpu (hdr.type),
__le32_to_cpu (hdr.payload_length), *nid);
return (-EPROTO);
if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */
*nid = __le64_to_cpu(hdr.src_nid);
} else if (*nid != __le64_to_cpu (hdr.src_nid)) {
- CERROR ("Connected to nid %llx, but expecting %llx\n",
+ CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
__le64_to_cpu (hdr.src_nid), *nid);
return (-EPROTO);
}
static name2num_t nalnames[] = {
{"any", 0},
{"tcp", SOCKNAL},
- {"toe", TOENAL},
{"elan", QSWNAL},
{"gm", GMNAL},
{"ib", IBNAL},
return 0;
}
- if (!g_nal_is_compatible (argv[0], SOCKNAL, TOENAL, 0))
+ if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
return -1;
rc = ptl_parse_ipaddr (&ipaddr, argv[1]);
return 0;
}
- if (!g_nal_is_compatible (NULL, SOCKNAL, TOENAL, 0))
+ if (!g_nal_is_compatible (NULL, SOCKNAL, 0))
return 0;
if (argc >= 2 &&
return 0;
}
- if (!g_nal_is_compatible (argv[0], SOCKNAL, TOENAL, 0))
+ if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
return -1;
if (argc > 1 &&
NETTYPE=${NETTYPE:-tcp}
TIMEOUT=${TIMEOUT:-30}
PTLDEBUG=${PTLDEBUG:-0}
+SUBSYSTEM=${SUBSYSTEM:-0}
MOUNT=${MOUNT:-"/mnt/lustre"}
#CLIENT_UPCALL=${CLIENT_UPCALL:-`pwd`/client-upcall-mdev.sh}
UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh}
MOUNT2=${MOUNT2:-"/mnt/lustre2"}
DIR=${DIR:-$MOUNT}
DIR2=${DIR2:-$MOUNT1}
-PTLDEBUG=${PTLDEBUG:-0}
+PTLDEBUG=${PTLDEBUG:-0x3f0400}
+SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
PDSH=${PDSH:-no_dsh}
MDSDEV=${MDSDEV:-$ROOT/tmp/mds-`hostname`}
MDSSIZE=${MDSSIZE:-10000}
OSTDEV=${OSTDEV:-$ROOT/tmp/ost-`hostname`}
-OSTSIZE=${OSTSIZE:-10000}
+OSTSIZE=${OSTSIZE:-50000}
FSTYPE=${FSTYPE:-ext3}
TIMEOUT=${TIMEOUT:-10}
UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
}
run_test 8 "double mount setup"
+test_9() {
+ # backup the old values of PTLDEBUG and SUBSYSTEM
+ OLDPTLDEBUG=$PTLDEBUG
+ OLDSUBSYSTEM=$SUBSYSTEM
+
+ # generate new configuration file with lmc --ptldebug and --subsystem
+ PTLDEBUG="trace"
+ SUBSYSTEM="mdc"
+ gen_config
+
+ # check the result of lmc --ptldebug/subsystem
+ start_ost
+ start_mds
+ mount_client $MOUNT
+ [ "`cat /proc/sys/portals/debug`" = "1" ] && \
+ echo "lmc --debug success" || return 1
+ [ "`cat /proc/sys/portals/subsystem_debug`" = "16777216" ] && \
+ echo "lmc --subsystem success" || return 1
+ check_mount || return 41
+ cleanup
+
+ # the new PTLDEBUG/SUBSYSTEM used for lconf --ptldebug/subsystem
+ PTLDEBUG="inode"
+ SUBSYSTEM="mds"
+
+ # check lconf --ptldebug/subsystem overriding lmc --ptldebug/subsystem
+ start_ost
+ start_mds
+ mount_client $MOUNT
+ [ "`cat /proc/sys/portals/debug`" = "2" ] && \
+ echo "lconf --debug overriding success" || return 1
+ [ "`cat /proc/sys/portals/subsystem_debug`" = "33554432" ] && \
+ echo "lconf --subsystem overriding success" || return 1
+ check_mount || return 41
+ cleanup
+
+ # resume the old configuration
+ PTLDEBUG=$OLDPTLDEBUG
+ SUBSYSTEM=$OLDSUBSYSTEM
+ gen_config
+}
+run_test 9 "test --ptldebug and --subsystem for lmc"
+
+test_10() {
+ OLDXMLCONFIG=$XMLCONFIG
+ XMLCONFIG="broken.xml"
+ [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG
+ SAMENAME="mds1"
+ do_lmc --add node --node $SAMENAME
+ do_lmc --add net --node $SAMENAME --nid $SAMENAME --nettype tcp
+ do_lmc --add mds --node $SAMENAME --mds $SAMENAME --nid $SAMENAME \
+ --fstype ext3 --dev /dev/mds1 || return $?
+ do_lmc --add lov --lov lov1 --mds $SAMENAME --stripe_sz 65536 \
+ --stripe_cnt 1 --stripe_pattern 0 || return $?
+ echo "Success!"
+ XMLCONFIG=$OLDXMLCONFIG
+}
+run_test 10 "use lmc with the same name for node and mds"
+
+test_11() {
+ OLDXMLCONFIG=$XMLCONFIG
+ XMLCONFIG="conf11.xml"
+
+ [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG
+ add_mds mds --dev $MDSDEV --size $MDSSIZE
+ add_ost ost --dev $OSTDEV --size $OSTSIZE
+ add_client client mds --path $MOUNT --ost ost_svc || return $?
+ echo "Default lov config success!"
+
+ [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG
+ add_mds mds --dev $MDSDEV --size $MDSSIZE
+ add_ost ost --dev $OSTDEV --size $OSTSIZE
+ add_client client mds --path $MOUNT && return $?
+ echo "--add mtpt with neither --lov nor --ost will return error"
+
+ echo ""
+ echo "Success!"
+ XMLCONFIG=$OLDXMLCONFIG
+}
+run_test 11 "use default lov configuration (should return error)"
equals_msg "Done"
#!/bin/bash
-insmod ../obdclass/llog_test.o
-../utils/lctl modules > /r/tmp/ogdb-localhost.localdomain
+PATH=`dirname $0`:`dirname $0`/../utils:$PATH
+insmod ../obdclass/llog_test.o || exit 1
+lctl modules > /r/tmp/ogdb-`hostname`
echo "NOW reload debugging syms.."
-# Using ignore_errors will allow lctl to cleanup even if the test
-# fails.
-../utils/lctl <<EOF
-ignore_errors
+RC=0
+lctl <<EOT || RC=2
newdev
attach llog_test llt_name llt_uuid
setup mds1
+EOT
+
+# Using ignore_errors will allow lctl to cleanup even if the test fails.
+lctl <<EOC
+cfg_device llt_name
+ignore_errors
cleanup
detach
-EOF
-rmmod llog_test
+EOC
+rmmod llog_test || RC2=3
+[ $RC -eq 0 -a "$RC2" ] && RC=$RC2
+
+exit $RC
break;
}
}
+ if (param->obdindex == OBD_NOT_FOUND) {
+ printf("unknown obduuid: %s\n", param->obduuid);
+ return EINVAL;
+ }
} else if (!param->quiet) {
printf("OBDS:\n");
for (i = 0, uuidp = param->uuids; i < obdcount; i++, uuidp++)
void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *dname, char *fname,
int obdindex, int quiet, int header, int body)
{
- int i;
+ int i, obdstripe = 0;
if (obdindex != OBD_NOT_FOUND) {
for (i = 0; i < lum->lmm_stripe_count; i++) {
if (obdindex == lum->lmm_objects[i].l_ost_idx) {
printf("%s/%s\n", dname, fname);
+ obdstripe = 1;
break;
}
}
} else if (!quiet) {
printf("%s/%s\n", dname, fname);
+ obdstripe = 1;
}
- if (header) {
- printf("lmm_magic: 0x%80X\n", lum->lmm_magic);
+ if (header && (obdstripe == 1)) {
+ printf("lmm_magic: 0x%08X\n", lum->lmm_magic);
printf("lmm_object_gr: "LPX64"\n", lum->lmm_object_gr);
printf("lmm_object_id: "LPX64"\n", lum->lmm_object_id);
printf("lmm_stripe_count: %u\n", (int)lum->lmm_stripe_count);
if (body) {
long long oid;
- if (!quiet)
+ if ((!quiet) && (obdstripe == 1))
printf("\tobdidx\t\t objid\t\tobjid\t\t group\n");
for (i = 0; i < lum->lmm_stripe_count; i++) {
int idx = lum->lmm_objects[i].l_ost_idx;
oid = lum->lmm_objects[i].l_object_id;
- printf("\t%6u\t%14llu\t%#13llx\t%14lld%s\n", idx, oid,
- oid, (long long)lum->lmm_objects[i].l_object_gr,
- obdindex == idx ? " *" : "");
+ if ((obdindex == OBD_NOT_FOUND) || (obdindex == idx))
+ printf("\t%6u\t%14llu\t%#13llx\t%14lld%s\n",
+ idx, oid, oid,
+ (long long)lum->lmm_objects[i].l_object_gr,
+ obdindex == idx ? " *" : "");
}
printf("\n");
}