The complete set of citi nfsv4 patches combined into one patch.

Changes since 2.6.10-rc3-CITI_NFS4_ALL-3
	* minor adjustments to xdr buffer length calculations in fs/nfs4xdr.c
	* client acl revisions: pass acls in page array of xdr bufs, removing
	  arbitrary length restrictions.  Temporarily disable acl caching.

Index: linux-2.6.10/include/linux/nfsd/state.h
===================================================================
--- linux-2.6.10.orig/include/linux/nfsd/state.h	2004-12-25 05:33:50.000000000 +0800
+++ linux-2.6.10/include/linux/nfsd/state.h	2005-04-05 14:49:13.465682224 +0800
@@ -67,6 +67,45 @@
 #define ZERO_STATEID(stateid)       (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
 #define ONE_STATEID(stateid)        (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
 
+/* Delegation recall states */
+#define NFS4_NO_RECALL			0x000
+#define NFS4_RECALL_IN_PROGRESS		0x001
+#define NFS4_RECALL_COMPLETE		0x002
+
+
+/* Delegation flags */
+#define NFS4_DELAY_CLOSE		0x001
+
+struct nfs4_cb_recall {
+	u32			cbr_ident;
+	int			cbr_trunc;
+	stateid_t		cbr_stateid;
+	u32			cbr_fhlen;
+	u32			cbr_fhval[NFS4_FHSIZE];
+	struct nfs4_delegation	*cbr_dp;
+};
+
+struct nfs4_delegation {
+	struct list_head	dl_del_perfile; /* nfs4_file->fi_del_perfile */
+	struct list_head	dl_del_perclnt; /* nfs4_client->cl_del_perclnt*/
+	struct list_head	dl_recall_lru;  /* delegation recalled */
+	atomic_t		dl_recall_cnt;  /* resend cb_recall only once */
+	atomic_t		dl_count;       /* ref count */
+	atomic_t		dl_state;       /* recall state */
+	struct nfs4_client	*dl_client;
+	struct nfs4_file	*dl_file;
+	struct file_lock	*dl_flock;
+	struct nfs4_stateid	*dl_stp;
+	u32			dl_flags;
+	u32			dl_type;
+	time_t			dl_time;
+	struct nfs4_cb_recall	dl_recall;
+};
+
+#define dl_stateid      dl_recall.cbr_stateid
+#define dl_fhlen        dl_recall.cbr_fhlen
+#define dl_fhval        dl_recall.cbr_fhval
+
 /* client delegation callback info */
 struct nfs4_callback {
 	/* SETCLIENTID info */
@@ -75,9 +114,8 @@
 	unsigned short          cb_port;
 	u32                     cb_prog;
 	u32                     cb_ident;
-	struct xdr_netobj	cb_netid;
 	/* RPC client info */
-	u32			cb_set;     /* successful CB_NULL call */
+	atomic_t		cb_set;     /* successful CB_NULL call */
 	struct rpc_program      cb_program;
 	struct rpc_stat         cb_stat;
 	struct rpc_clnt *       cb_client;
@@ -97,6 +135,7 @@
 	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
 	struct list_head	cl_strhash; 	/* hash by cl_name */
 	struct list_head	cl_perclient; 	/* list: stateowners */
+	struct list_head	cl_del_perclnt; /* list: delegations */
 	struct list_head        cl_lru;         /* tail queue */
 	struct xdr_netobj	cl_name; 	/* id generated by client */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
@@ -106,7 +145,8 @@
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
 	struct nfs4_callback	cl_callback;    /* callback info */
-	time_t			cl_first_state; /* first state aquisition*/
+	atomic_t		cl_count;	/* ref count */
+	u32			cl_firststate;  /* recovery file creation */
 };
 
 /* struct nfs4_client_reset
@@ -117,8 +157,6 @@
 struct nfs4_client_reclaim {
 	struct list_head	cr_strhash;	/* hash by cr_name */
 	struct xdr_netobj 	cr_name; 	/* id generated by client */
-	time_t			cr_first_state; /* first state aquisition */
-	u32			cr_expired;     /* boolean: lease expired? */
 };
 
 static inline void
@@ -194,6 +232,7 @@
 struct nfs4_file {
 	struct list_head        fi_hash;    /* hash by "struct inode *" */
 	struct list_head        fi_perfile; /* list: nfs4_stateid */
+	struct list_head	fi_del_perfile; /* list: nfs4_delegation */
 	struct inode		*fi_inode;
 	u32                     fi_id;      /* used with stateowner->so_id 
 					     * for stateid_hashtbl hash */
@@ -231,8 +270,10 @@
 #define CONFIRM                 0x00000002
 #define OPEN_STATE              0x00000004
 #define LOCK_STATE              0x00000008
-#define RDWR_STATE              0x00000010
-#define CLOSE_STATE             0x00000020
+#define RD_STATE	        0x00000010
+#define WR_STATE	        0x00000020
+#define CLOSE_STATE             0x00000040
+#define DELEG_RET               0x00000080
 
 #define seqid_mutating_err(err)                       \
 	(((err) != nfserr_stale_clientid) &&    \
@@ -243,14 +284,24 @@
 extern time_t nfs4_laundromat(void);
 extern int nfsd4_renew(clientid_t *clid);
 extern int nfs4_preprocess_stateid_op(struct svc_fh *current_fh, 
-		stateid_t *stateid, int flags, struct nfs4_stateid **stpp);
+		stateid_t *stateid, int flags, struct file **filp);
 extern int nfs4_share_conflict(struct svc_fh *current_fh, 
 		unsigned int deny_type);
 extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
 extern int nfs4_in_grace(void);
 extern int nfs4_check_open_reclaim(clientid_t *clid);
+extern void put_nfs4_client(struct nfs4_client *clp);
 extern void nfs4_free_stateowner(struct kref *kref);
+extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern int nfsd4_cb_recall(struct nfs4_delegation *dp);
+extern int nfsd4_create_clid_file(struct nfs4_client *clp);
+extern void nfsd4_remove_clid_file(struct nfs4_client *clp);
+extern int nfsd4_list_rec_dir(int clear);
+extern void nfsd4_init_rec_dir(char *rec_dirname);
+extern void nfsd4_shutdown_rec_dir(void);
+extern int nfs4_client_to_reclaim(char *name, int namlen);
+
 
 static inline void
 nfs4_put_stateowner(struct nfs4_stateowner *so)
Index: linux-2.6.10/include/linux/nfsd/nfsd.h
===================================================================
--- linux-2.6.10.orig/include/linux/nfsd/nfsd.h	2004-12-25 05:35:39.000000000 +0800
+++ linux-2.6.10/include/linux/nfsd/nfsd.h	2005-04-05 14:49:13.464682376 +0800
@@ -98,8 +98,12 @@
 void		nfsd_close(struct file *);
 int		nfsd_read(struct svc_rqst *, struct svc_fh *,
 				loff_t, struct kvec *,int, unsigned long *);
+int 		nfsd_vfs_read(struct svc_rqst *, struct svc_fh *, struct file *,
+              			loff_t, struct kvec *, int, unsigned long *);
 int		nfsd_write(struct svc_rqst *, struct svc_fh *,
 				loff_t, struct kvec *,int, unsigned long, int *);
+int 		nfsd_vfs_write(struct svc_rqst *, struct svc_fh *,struct file *,
+                                loff_t, struct kvec *,int, unsigned long, int *);
 int		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
 				char *, int *);
 int		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
Index: linux-2.6.10/include/linux/nfsd/xdr4.h
===================================================================
--- linux-2.6.10.orig/include/linux/nfsd/xdr4.h	2004-12-25 05:34:01.000000000 +0800
+++ linux-2.6.10/include/linux/nfsd/xdr4.h	2005-04-05 14:49:13.466682072 +0800
@@ -44,16 +44,6 @@
 #define NFSD4_MAX_TAGLEN	128
 #define XDR_LEN(n)                     (((n) + 3) & ~3)
 
-typedef u32 delegation_zero_t;
-typedef u32 delegation_boot_t;
-typedef u64 delegation_id_t;
-
-typedef struct {
-	delegation_zero_t	ds_zero;
-	delegation_boot_t	ds_boot;
-	delegation_id_t		ds_id;
-} delegation_stateid_t;
-
 struct nfsd4_change_info {
 	u32		atomic;
 	u32		before_ctime_sec;
@@ -104,6 +94,10 @@
 #define cr_specdata1	u.dev.specdata1
 #define cr_specdata2	u.dev.specdata2
 
+struct nfsd4_delegreturn {
+	stateid_t	dr_stateid;
+};
+
 struct nfsd4_getattr {
 	u32		ga_bmval[2];        /* request */
 	struct svc_fh	*ga_fhp;            /* response */
@@ -202,13 +196,13 @@
 	u32		op_claim_type;      /* request */
 	struct xdr_netobj op_fname;	    /* request - everything but CLAIM_PREV */
 	u32		op_delegate_type;   /* request - CLAIM_PREV only */
-	delegation_stateid_t	op_delegate_stateid; /* request - CLAIM_DELEGATE_CUR only */
+	stateid_t       op_delegate_stateid; /* request - response */
 	u32		op_create;     	    /* request */
 	u32		op_createmode;      /* request */
 	u32		op_bmval[2];        /* request */
 	union {                             /* request */
-		struct iattr	iattr;		            /* UNCHECKED4,GUARDED4 */
-		nfs4_verifier	verf;		                     /* EXCLUSIVE4 */
+		struct iattr	iattr;                      /* UNCHECKED4,GUARDED4 */
+		nfs4_verifier	verf;                                /* EXCLUSIVE4 */
 	} u;
 	clientid_t	op_clientid;        /* request */
 	struct xdr_netobj op_owner;           /* request */
@@ -247,6 +241,7 @@
 	u32		rd_length;          /* request */
 	struct kvec	rd_iov[RPCSVC_MAXPAGES];
 	int		rd_vlen;
+	struct file     *rd_filp;
 	
 	struct svc_rqst *rd_rqstp;          /* response */
 	struct svc_fh * rd_fhp;             /* response */
@@ -345,6 +340,7 @@
 		struct nfsd4_close		close;
 		struct nfsd4_commit		commit;
 		struct nfsd4_create		create;
+		struct nfsd4_delegreturn	delegreturn;
 		struct nfsd4_getattr		getattr;
 		struct svc_fh *			getfh;
 		struct nfsd4_link		link;
@@ -456,6 +452,8 @@
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		struct nfsd4_release_lockowner *rlockowner);
 extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
+extern int nfsd4_delegreturn(struct svc_rqst *rqstp,
+		struct svc_fh *current_fh, struct nfsd4_delegreturn *dr);
 #endif
 
 /*
Index: linux-2.6.10/include/linux/fs.h
===================================================================
--- linux-2.6.10.orig/include/linux/fs.h	2005-03-31 15:35:26.000000000 +0800
+++ linux-2.6.10/include/linux/fs.h	2005-04-05 14:49:13.461682832 +0800
@@ -1185,11 +1185,6 @@
 
 extern int vfs_statfs(struct super_block *, struct kstatfs *);
 
-/* Return value for VFS lock functions - tells locks.c to lock conventionally
- * REALLY kosha for root NFS and nfs_lock
- */ 
-#define LOCK_USE_CLNT 1
-
 #define FLOCK_VERIFY_READ  1
 #define FLOCK_VERIFY_WRITE 2
 
Index: linux-2.6.10/include/linux/dcache.h
===================================================================
--- linux-2.6.10.orig/include/linux/dcache.h	2005-03-31 15:35:26.000000000 +0800
+++ linux-2.6.10/include/linux/dcache.h	2005-04-05 14:49:13.460682984 +0800
@@ -200,6 +200,7 @@
  * These are the low-level FS interfaces to the dcache..
  */
 extern void d_instantiate(struct dentry *, struct inode *);
+extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern void d_delete(struct dentry *);
 
 /* allocate/de-allocate */
@@ -244,6 +245,23 @@
 	d_rehash(entry);
 }
 
+/**
+ * d_add_unique - add dentry to hash queues without aliasing
+ * @entry: dentry to add
+ * @inode: The inode to attach to this dentry
+ *
+ * This adds the entry to the hash queues and initializes @inode.
+ * The entry was actually filled in earlier during d_alloc().
+ */
+static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *res;
+
+	res = d_instantiate_unique(entry, inode);
+	d_rehash(res != NULL ? res : entry);
+	return res;
+}
+
 /* used for rename() and baskets */
 extern void d_move(struct dentry *, struct dentry *);
 
Index: linux-2.6.10/include/linux/nfs_fs.h
===================================================================
--- linux-2.6.10.orig/include/linux/nfs_fs.h	2004-12-25 05:34:31.000000000 +0800
+++ linux-2.6.10/include/linux/nfs_fs.h	2005-04-05 14:49:13.463682528 +0800
@@ -30,6 +30,7 @@
 #include <linux/nfs_xdr.h>
 #include <linux/rwsem.h>
 #include <linux/workqueue.h>
+#include <linux/mempool.h>
 
 /*
  * Enable debugging support for nfs client.
@@ -201,6 +202,7 @@
 #define NFS_INO_INVALID_ATTR	0x0008		/* cached attrs are invalid */
 #define NFS_INO_INVALID_DATA	0x0010		/* cached data is invalid */
 #define NFS_INO_INVALID_ATIME	0x0020		/* cached atime is invalid */
+#define NFS_INO_INVALID_ACCESS	0x0040		/* cached access cred invalid */
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
@@ -239,7 +241,7 @@
 static inline void NFS_CACHEINV(struct inode *inode)
 {
 	if (!nfs_caches_unstable(inode))
-		NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR;
+		NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
 }
 
 static inline int nfs_server_capable(struct inode *inode, int cap)
@@ -424,6 +426,44 @@
 	return nfs_wb_page_priority(inode, page, 0);
 }
 
+/*
+ * Allocate and free nfs_write_data structures
+ */
+extern mempool_t *nfs_wdata_mempool;
+extern mempool_t *nfs_commit_mempool;
+
+static inline struct nfs_write_data *nfs_writedata_alloc(void)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+	}
+	return p;
+}
+
+static inline void nfs_writedata_free(struct nfs_write_data *p)
+{
+	mempool_free(p, nfs_wdata_mempool);
+}
+
+extern void  nfs_writedata_release(struct rpc_task *task);
+
+static inline struct nfs_write_data *nfs_commit_alloc(void)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+	}
+	return p;
+}
+
+static inline void nfs_commit_free(struct nfs_write_data *p)
+{
+	mempool_free(p, nfs_commit_mempool);
+}
+
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
@@ -439,6 +479,26 @@
 extern void nfs_readpage_result(struct rpc_task *);
 
 /*
+ * Allocate and free nfs_read_data structures
+ */
+extern mempool_t *nfs_rdata_mempool;
+
+static inline struct nfs_read_data *nfs_readdata_alloc(void)
+{
+	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+	if (p)
+		memset(p, 0, sizeof(*p));
+	return p;
+}
+
+static inline void nfs_readdata_free(struct nfs_read_data *p)
+{
+	mempool_free(p, nfs_rdata_mempool);
+}
+
+extern void  nfs_readdata_release(struct rpc_task *task);
+
+/*
  * linux/fs/mount_clnt.c
  * (Used only by nfsroot module)
  */
@@ -644,6 +704,12 @@
 
 extern struct dentry_operations nfs4_dentry_operations;
 extern struct inode_operations nfs4_dir_inode_operations;
+extern struct inode_operations nfs4_file_inode_operations;
+
+/* inode.c */
+extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
+extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
+extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
 
 /* nfs4proc.c */
 extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
@@ -651,13 +717,14 @@
 extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
 extern int nfs4_proc_renew(struct nfs4_client *);
-extern int nfs4_do_close(struct inode *, struct nfs4_state *);
-extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
+extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
 extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
 extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
 extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
 extern int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request);
+extern ssize_t nfs4_proc_get_acl(struct inode *, void *buf, ssize_t buflen);
+extern int nfs4_proc_set_acl(struct inode *, const void *buf, ssize_t buflen);
 
 /* nfs4renewd.c */
 extern void nfs4_schedule_state_renewal(struct nfs4_client *);
Index: linux-2.6.10/include/linux/nfs4.h
===================================================================
--- linux-2.6.10.orig/include/linux/nfs4.h	2004-12-25 05:34:45.000000000 +0800
+++ linux-2.6.10/include/linux/nfs4.h	2005-04-05 14:49:13.474680856 +0800
@@ -28,7 +28,7 @@
 #define NFS4_ACCESS_DELETE      0x0010
 #define NFS4_ACCESS_EXECUTE     0x0020
 
-#define NFS4_FH_PERISTENT		0x0000
+#define NFS4_FH_PERSISTENT		0x0000
 #define NFS4_FH_NOEXPIRE_WITH_OPEN	0x0001
 #define NFS4_FH_VOLATILE_ANY		0x0002
 #define NFS4_FH_VOL_MIGRATION		0x0004
@@ -382,6 +382,8 @@
 	NFSPROC4_CLNT_READDIR,
 	NFSPROC4_CLNT_SERVER_CAPS,
 	NFSPROC4_CLNT_DELEGRETURN,
+	NFSPROC4_CLNT_GETACL,
+	NFSPROC4_CLNT_SETACL,
 };
 
 #endif
Index: linux-2.6.10/include/linux/sunrpc/auth.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/auth.h	2004-12-25 05:34:57.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/auth.h	2005-04-05 14:49:13.468681768 +0800
@@ -51,7 +51,6 @@
 };
 #define RPCAUTH_CRED_LOCKED	0x0001
 #define RPCAUTH_CRED_UPTODATE	0x0002
-#define RPCAUTH_CRED_DEAD	0x0004
 
 #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
 
@@ -133,7 +132,6 @@
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
-int			rpcauth_deadcred(struct rpc_task *);
 void			rpcauth_init_credcache(struct rpc_auth *);
 void			rpcauth_free_credcache(struct rpc_auth *);
 
Index: linux-2.6.10/include/linux/sunrpc/svc.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/svc.h	2004-12-25 05:34:58.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/svc.h	2005-04-05 14:49:13.467681920 +0800
@@ -251,8 +251,7 @@
 	char *			pg_name;	/* service name */
 	char *			pg_class;	/* class name: services sharing authentication */
 	struct svc_stat *	pg_stats;	/* rpc statistics */
-	/* Override authentication. NULL means use default */
-	int			(*pg_authenticate)(struct svc_rqst *, u32 *);
+	int			(*pg_authenticate)(struct svc_rqst *);
 };
 
 /*
Index: linux-2.6.10/include/linux/sunrpc/cache.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/cache.h	2004-12-25 05:34:57.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/cache.h	2005-04-05 14:49:13.470681464 +0800
@@ -128,20 +128,17 @@
  * just like a template in C++, this macro does cache lookup
  * for us.
  * The function is passed some sort of HANDLE from which a cache_detail
- * structure can be determined (via SETUP, DETAIL), a template
+ * structure can be determined (via DETAIL), a template
  * cache entry (type RTN*), and a "set" flag.  Using the HASHFN and the 
  * TEST, the function will try to find a matching cache entry in the cache.
  * If "set" == 0 :
  *    If an entry is found, it is returned
  *    If no entry is found, a new non-VALID entry is created.
- * If "set" == 1 and INPLACE == 0 :
+ * If "set" == 1:
  *    If no entry is found a new one is inserted with data from "template"
  *    If a non-CACHE_VALID entry is found, it is updated from template using UPDATE
  *    If a CACHE_VALID entry is found, a new entry is swapped in with data
  *       from "template"
- * If set == 1, and INPLACE == 1 :
- *    As above, except that if a CACHE_VALID entry is found, we UPDATE in place
- *       instead of swapping in a new entry.
  *
  * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not
  * run but insteead CACHE_NEGATIVE is set in any new item.
@@ -153,25 +150,22 @@
  * MEMBER is the member of the cache which is cache_head, which must be first
  * FNAME is the name for the function	
  * ARGS are arguments to function and must contain RTN *item, int set.  May
- *   also contain something to be usedby SETUP or DETAIL to find cache_detail.
- * SETUP  locates the cache detail and makes it available as...
- * DETAIL identifies the cache detail, possibly set up by SETUP
+ *   also contain something to be used by DETAIL to find cache_detail.
+ * DETAIL identifies the cache detail
  * HASHFN returns a hash value of the cache entry "item"
  * TEST  tests if "tmp" matches "item"
  * INIT copies key information from "item" to "new"
  * UPDATE copies content information from "item" to "tmp"
- * INPLACE is true if updates can happen inplace rather than allocating a new structure
  *
  * WARNING: any substantial changes to this must be reflected in
  *   net/sunrpc/svcauth.c(auth_domain_lookup)
  *  which is a similar routine that is open-coded.
  */
-#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE)	\
+#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,DETAIL,HASHFN,TEST,INIT,UPDATE)	\
 RTN *FNAME ARGS										\
 {											\
 	RTN *tmp, *new=NULL;								\
 	struct cache_head **hp, **head;							\
-	SETUP;										\
 	head = &(DETAIL)->hash_table[HASHFN];						\
  retry:											\
 	if (set||new) write_lock(&(DETAIL)->hash_lock);					\
@@ -180,14 +174,14 @@
 		tmp = container_of(*hp, RTN, MEMBER);					\
 		if (TEST) { /* found a match */						\
 											\
-			if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
+			if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
 				break;							\
 											\
 			if (new)							\
 				{INIT;}							\
 			cache_get(&tmp->MEMBER);					\
 			if (set) {							\
-				if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
+				if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
 				{ /* need to swap in new */				\
 					RTN *t2;					\
 											\
@@ -209,7 +203,7 @@
 			else read_unlock(&(DETAIL)->hash_lock);				\
 			if (set)							\
 				cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \
-			if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0);	\
+			if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0);	\
 			if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL);		\
 			return tmp;							\
 		}									\
@@ -242,10 +236,10 @@
 	return NULL;									\
 }
 
-#define DefineSimpleCacheLookup(STRUCT,INPLACE)	\
-	DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */,	\
+#define DefineSimpleCacheLookup(STRUCT)	\
+	DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set),	\
 			  & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\
-			  STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE)
+			  STRUCT##_init(new, item), STRUCT##_update(tmp, item))
 
 #define cache_for_each(pos, detail, index, member) 						\
 	for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ;		\
Index: linux-2.6.10/include/linux/sunrpc/sched.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/sched.h	2004-12-25 05:35:01.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/sched.h	2005-04-05 14:49:13.472681160 +0800
@@ -11,7 +11,9 @@
 
 #include <linux/timer.h>
 #include <linux/sunrpc/types.h>
+#include <linux/spinlock.h>
 #include <linux/wait.h>
+#include <linux/workqueue.h>
 #include <linux/sunrpc/xdr.h>
 
 /*
@@ -25,11 +27,18 @@
 	struct rpc_cred *	rpc_cred;	/* Credentials */
 };
 
+struct rpc_wait_queue;
+struct rpc_wait {
+	struct list_head	list;		/* wait queue links */
+	struct list_head	links;		/* Links to related tasks */
+	wait_queue_head_t	waitq;		/* sync: sleep on this q */
+	struct rpc_wait_queue *	rpc_waitq;	/* RPC wait queue we're on */
+};
+
 /*
  * This is the RPC task struct
  */
 struct rpc_task {
-	struct list_head	tk_list;	/* wait queue links */
 #ifdef RPC_DEBUG
 	unsigned long		tk_magic;	/* 0xf00baa */
 #endif
@@ -37,7 +46,6 @@
 	struct rpc_clnt *	tk_client;	/* RPC client */
 	struct rpc_rqst *	tk_rqstp;	/* RPC request */
 	int			tk_status;	/* result of last operation */
-	struct rpc_wait_queue *	tk_rpcwait;	/* RPC wait queue we're on */
 
 	/*
 	 * RPC call state
@@ -70,13 +78,18 @@
 	 * you have a pathological interest in kernel oopses.
 	 */
 	struct timer_list	tk_timer;	/* kernel timer */
-	wait_queue_head_t	tk_wait;	/* sync: sleep on this q */
 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned char		tk_active   : 1;/* Task has been activated */
 	unsigned char		tk_priority : 2;/* Task priority */
 	unsigned long		tk_runstate;	/* Task run status */
-	struct list_head	tk_links;	/* links to related tasks */
+	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
+						 * be any workqueue
+						 */
+	union {
+		struct work_struct	tk_work;	/* Async task work queue */
+		struct rpc_wait		tk_wait;	/* RPC wait */
+	} u;
 #ifdef RPC_DEBUG
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
@@ -87,11 +100,11 @@
 /* support walking a list of tasks on a wait queue */
 #define	task_for_each(task, pos, head) \
 	list_for_each(pos, head) \
-		if ((task=list_entry(pos, struct rpc_task, tk_list)),1)
+		if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1)
 
 #define	task_for_first(task, head) \
 	if (!list_empty(head) &&  \
-	    ((task=list_entry((head)->next, struct rpc_task, tk_list)),1))
+	    ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
 
 /* .. and walking list of all tasks */
 #define	alltask_for_each(task, pos, head) \
@@ -126,22 +139,39 @@
 #define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
 #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
 
-#define RPC_TASK_SLEEPING	0
-#define RPC_TASK_RUNNING	1
-#define RPC_IS_SLEEPING(t)	(test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
-#define RPC_IS_RUNNING(t)	(test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
+#define RPC_TASK_RUNNING	0
+#define RPC_TASK_QUEUED		1
+#define RPC_TASK_WAKEUP		2
+#define RPC_TASK_HAS_TIMER	3
 
+#define RPC_IS_RUNNING(t)	(test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
 #define rpc_set_running(t)	(set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
-#define rpc_clear_running(t)	(clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
+#define rpc_test_and_set_running(t) \
+				(test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
+#define rpc_clear_running(t)	\
+	do { \
+		smp_mb__before_clear_bit(); \
+		clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \
+		smp_mb__after_clear_bit(); \
+	} while (0)
 
-#define rpc_set_sleeping(t)	(set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
+#define RPC_IS_QUEUED(t)	(test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
+#define rpc_set_queued(t)	(set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
+#define rpc_clear_queued(t)	\
+	do { \
+		smp_mb__before_clear_bit(); \
+		clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \
+		smp_mb__after_clear_bit(); \
+	} while (0)
 
-#define rpc_clear_sleeping(t) \
+#define rpc_start_wakeup(t) \
+	(test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0)
+#define rpc_finish_wakeup(t) \
 	do { \
 		smp_mb__before_clear_bit(); \
-		clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \
+		clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \
 		smp_mb__after_clear_bit(); \
-	} while(0)
+	} while (0)
 
 /*
  * Task priorities.
@@ -157,6 +187,7 @@
  * RPC synchronization objects
  */
 struct rpc_wait_queue {
+	spinlock_t		lock;
 	struct list_head	tasks[RPC_NR_PRIORITY];	/* task queue for each priority level */
 	unsigned long		cookie;			/* cookie of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
@@ -177,6 +208,7 @@
 
 #ifndef RPC_DEBUG
 # define RPC_WAITQ_INIT(var,qname) { \
+		.lock = SPIN_LOCK_UNLOCKED, \
 		.tasks = { \
 			[0] = LIST_HEAD_INIT(var.tasks[0]), \
 			[1] = LIST_HEAD_INIT(var.tasks[1]), \
@@ -185,6 +217,7 @@
 	}
 #else
 # define RPC_WAITQ_INIT(var,qname) { \
+		.lock = SPIN_LOCK_UNLOCKED, \
 		.tasks = { \
 			[0] = LIST_HEAD_INIT(var.tasks[0]), \
 			[1] = LIST_HEAD_INIT(var.tasks[1]), \
@@ -209,13 +242,10 @@
 int		rpc_execute(struct rpc_task *);
 void		rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
 					rpc_action action);
-int		rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
-void		rpc_remove_wait_queue(struct rpc_task *);
 void		rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
 void		rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
 void		rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
 					rpc_action action, rpc_action timer);
-void		rpc_add_timer(struct rpc_task *, rpc_action);
 void		rpc_wake_up_task(struct rpc_task *);
 void		rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
Index: linux-2.6.10/include/linux/sunrpc/gss_krb5.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/gss_krb5.h	2004-12-25 05:34:57.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/gss_krb5.h	2005-04-05 14:49:13.473681008 +0800
@@ -53,6 +53,8 @@
 	struct xdr_netobj	mech_used;
 };
 
+extern spinlock_t krb5_seq_lock;
+
 #define KG_TOK_MIC_MSG    0x0101
 #define KG_TOK_WRAP_MSG   0x0201
 
@@ -116,18 +118,25 @@
 
 s32
 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
-		   struct xdr_netobj *cksum);
+		int body_offset, struct xdr_netobj *cksum);
 
 u32
 krb5_make_token(struct krb5_ctx *context_handle, int qop_req,
 	struct xdr_buf *input_message_buffer,
-	struct xdr_netobj *output_message_buffer, int toktype);
+	struct xdr_netobj *output_message_buffer);
 
 u32
 krb5_read_token(struct krb5_ctx *context_handle,
 	  struct xdr_netobj *input_token_buffer,
-	  struct xdr_buf *message_buffer,
-	  int *qop_state, int toktype);
+	  struct xdr_buf *message_buffer, int *qop_state);
+
+u32
+gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset,
+		struct xdr_buf *outbuf, struct page **pages);
+
+u32
+gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset,
+		struct xdr_buf *buf, int *out_offset);
 
 u32
 krb5_encrypt(struct crypto_tfm * key,
@@ -137,6 +146,13 @@
 krb5_decrypt(struct crypto_tfm * key,
 	     void *iv, void *in, void *out, int length); 
 
+int
+gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset,
+		struct page **pages);
+
+int
+gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset);
+
 s32
 krb5_make_seq_num(struct crypto_tfm * key,
 		int direction,
Index: linux-2.6.10/include/linux/sunrpc/xdr.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/xdr.h	2004-12-25 05:35:40.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/xdr.h	2005-04-05 14:49:13.467681920 +0800
@@ -192,6 +192,7 @@
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p);
 extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
+extern void truncate_xdr_buf(struct xdr_buf *xdr, int len);
 
 #endif /* __KERNEL__ */
 
Index: linux-2.6.10/include/linux/sunrpc/gss_api.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/gss_api.h	2004-12-25 05:35:28.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/gss_api.h	2005-04-05 14:49:13.471681312 +0800
@@ -47,6 +47,18 @@
 		struct xdr_buf		*message,
 		struct xdr_netobj	*mic_token,
 		u32			*qstate);
+u32 gss_wrap(
+		struct gss_ctx		*ctx_id,
+		u32			qop,
+		int			offset,
+		struct xdr_buf		*outbuf,
+		struct page		**inpages);
+u32 gss_unwrap(
+		struct gss_ctx		*ctx_id,
+		u32			*qop,
+		int			offset,
+		struct xdr_buf		*inbuf,
+		int			*out_offset);
 u32 gss_delete_sec_context(
 		struct gss_ctx		**ctx_id);
 
@@ -93,6 +105,18 @@
 			struct xdr_buf		*message,
 			struct xdr_netobj	*mic_token,
 			u32			*qstate);
+	u32 (*gss_wrap)(
+			struct gss_ctx		*ctx_id,
+			u32			qop,
+			int			offset,
+			struct xdr_buf		*outbuf,
+			struct page		**inpages);
+	u32 (*gss_unwrap)(
+			struct gss_ctx		*ctx_id,
+			u32			*qop,
+			int			offset,
+			struct xdr_buf		*buf,
+			int			*out_offset);
 	void (*gss_delete_sec_context)(
 			void			*internal_ctx_id);
 };
Index: linux-2.6.10/include/linux/sunrpc/svcauth.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/svcauth.h	2004-12-25 05:34:31.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/svcauth.h	2005-04-05 14:49:13.469681616 +0800
@@ -26,21 +26,23 @@
 struct svc_rqst;		/* forward decl */
 
 /* Authentication is done in the context of a domain.
- * For a server, a domain represents a group of clients using
+ *
+ * Currently, the nfs server uses the auth_domain to stand
+ * for the "client" listed in /etc/exports.
+ *
+ * More generally, a domain might represent a group of clients using
  * a common mechanism for authentication and having a common mapping
  * between local identity (uid) and network identity.  All clients
  * in a domain have similar general access rights.  Each domain can
  * contain multiple principals which will have different specific right
  * based on normal Discretionary Access Control.
  *
- * For a client, a domain represents a number of servers which all
- * use a common authentication mechanism and network identity name space.
- *
  * A domain is created by an authentication flavour module based on name
  * only.  Userspace then fills in detail on demand.
  *
- * The creation of a domain typically implies creation of one or
- * more caches for storing domain specific information.
+ * In the case of auth_unix and auth_null, the auth_domain is also
+ * associated with entries in another cache representing the mapping
+ * of ip addresses to the given client.
  */
 struct auth_domain {
 	struct	cache_head	h;
@@ -92,6 +94,7 @@
 	int	(*accept)(struct svc_rqst *rq, u32 *authp);
 	int	(*release)(struct svc_rqst *rq);
 	void	(*domain_release)(struct auth_domain *);
+	int	(*set_client)(struct svc_rqst *rq);
 };
 
 #define	SVC_GARBAGE	1
@@ -107,6 +110,7 @@
 
 extern int	svc_authenticate(struct svc_rqst *rqstp, u32 *authp);
 extern int	svc_authorise(struct svc_rqst *rqstp);
+extern int	svc_set_client(struct svc_rqst *rqstp);
 extern int	svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
 extern void	svc_auth_unregister(rpc_authflavor_t flavor);
 
Index: linux-2.6.10/include/linux/sunrpc/xprt.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/xprt.h	2004-12-25 05:35:23.000000000 +0800
+++ linux-2.6.10/include/linux/sunrpc/xprt.h	2005-04-05 14:49:13.471681312 +0800
@@ -95,7 +95,10 @@
 	int			rq_cong;	/* has incremented xprt->cong */
 	int			rq_received;	/* receive completed */
 	u32			rq_seqno;	/* gss seq no. used on req. */
-
+	int			rq_enc_pages_num;
+	struct page		**rq_enc_pages;	/* scratch pages for use by
+						   gss privacy code */
+	void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
 	struct list_head	rq_list;
 
 	struct xdr_buf		rq_private_buf;		/* The receive buffer
Index: linux-2.6.10/include/linux/nfs_xdr.h
===================================================================
--- linux-2.6.10.orig/include/linux/nfs_xdr.h	2004-12-25 05:35:24.000000000 +0800
+++ linux-2.6.10/include/linux/nfs_xdr.h	2005-04-05 14:49:13.459683136 +0800
@@ -326,6 +326,20 @@
 	const u32 *			bitmask;
 };
 
+struct nfs_setaclargs {
+	struct nfs_fh *			fh;
+	ssize_t				acl_len;
+	unsigned int			acl_pgbase;
+	struct page **			acl_pages;
+};
+
+struct nfs_getaclargs {
+	struct nfs_fh *			fh;
+	ssize_t				acl_len;
+	unsigned int			acl_pgbase;
+	struct page **			acl_pages;
+};
+
 struct nfs_setattrres {
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
@@ -666,6 +680,7 @@
 	int	version;		/* Protocol version */
 	struct dentry_operations *dentry_ops;
 	struct inode_operations *dir_inode_ops;
+	struct inode_operations *file_inode_ops;
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
@@ -681,7 +696,7 @@
 	int	(*read)    (struct nfs_read_data *);
 	int	(*write)   (struct nfs_write_data *);
 	int	(*commit)  (struct nfs_write_data *);
-	struct inode *	(*create)  (struct inode *, struct qstr *,
+	struct inode *	(*create)  (struct inode *, struct dentry *,
 			    struct iattr *, int);
 	int	(*remove)  (struct inode *, struct qstr *);
 	int	(*unlink_setup)  (struct rpc_message *,
Index: linux-2.6.10/net/sunrpc/xprt.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/xprt.c	2004-12-25 05:35:14.000000000 +0800
+++ linux-2.6.10/net/sunrpc/xprt.c	2005-04-05 14:49:13.393693168 +0800
@@ -891,7 +891,8 @@
 	xprt->tcp_flags &= ~XPRT_COPY_XID;
 	xprt->tcp_flags |= XPRT_COPY_DATA;
 	xprt->tcp_copied = 4;
-	dprintk("RPC:      reading reply for XID %08x\n", xprt->tcp_xid);
+	dprintk("RPC:      reading reply for XID %08x\n",
+						ntohl(xprt->tcp_xid));
 	tcp_check_recm(xprt);
 }
 
@@ -911,7 +912,7 @@
 	if (!req) {
 		xprt->tcp_flags &= ~XPRT_COPY_DATA;
 		dprintk("RPC:      XID %08x request not found!\n",
-				xprt->tcp_xid);
+				ntohl(xprt->tcp_xid));
 		spin_unlock(&xprt->sock_lock);
 		return;
 	}
@@ -1101,7 +1102,7 @@
 		goto out;
 
 	spin_lock_bh(&xprt->sock_lock);
-	if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
+	if (xprt->snd_task)
 		rpc_wake_up_task(xprt->snd_task);
 	spin_unlock_bh(&xprt->sock_lock);
 out:
@@ -1359,8 +1360,9 @@
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
 	req->rq_xid     = xprt_alloc_xid(xprt);
+	req->rq_release_snd_buf = NULL;
 	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
-			req, req->rq_xid);
+			req, ntohl(req->rq_xid));
 }
 
 /*
@@ -1384,6 +1386,8 @@
 		mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT);
 	spin_unlock_bh(&xprt->sock_lock);
 	task->tk_rqstp = NULL;
+	if (req->rq_release_snd_buf)
+		req->rq_release_snd_buf(req);
 	memset(req, 0, sizeof(*req));	/* mark unused */
 
 	dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
Index: linux-2.6.10/net/sunrpc/auth.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth.c	2004-12-25 05:34:57.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth.c	2005-04-05 14:49:13.394693016 +0800
@@ -214,8 +214,6 @@
 	list_for_each_safe(pos, next, &auth->au_credcache[nr]) {
 		struct rpc_cred *entry;
 	       	entry = list_entry(pos, struct rpc_cred, cr_hash);
-		if (entry->cr_flags & RPCAUTH_CRED_DEAD)
-			continue;
 		if (rpcauth_prune_expired(entry, &free))
 			continue;
 		if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
@@ -307,9 +305,6 @@
 	if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
 		return;
 
-	if ((cred->cr_flags & RPCAUTH_CRED_DEAD) && !list_empty(&cred->cr_hash))
-		list_del_init(&cred->cr_hash);
-
 	if (list_empty(&cred->cr_hash)) {
 		spin_unlock(&rpc_credcache_lock);
 		rpcauth_crdestroy(cred);
@@ -413,10 +408,3 @@
 	return !(task->tk_msg.rpc_cred) ||
 		(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
 }
-
-int
-rpcauth_deadcred(struct rpc_task *task)
-{
-	return !(task->tk_msg.rpc_cred) ||
-		(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_DEAD);
-}
Index: linux-2.6.10/net/sunrpc/svcauth_unix.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/svcauth_unix.c	2004-12-25 05:35:00.000000000 +0800
+++ linux-2.6.10/net/sunrpc/svcauth_unix.c	2005-04-05 14:49:13.395692864 +0800
@@ -97,7 +97,7 @@
 };
 static struct cache_head	*ip_table[IP_HASHMAX];
 
-void ip_map_put(struct cache_head *item, struct cache_detail *cd)
+static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
 {
 	struct ip_map *im = container_of(item, struct ip_map,h);
 	if (cache_put(item, cd)) {
@@ -258,7 +258,7 @@
 	.cache_show	= ip_map_show,
 };
 
-static DefineSimpleCacheLookup(ip_map, 0)
+static DefineSimpleCacheLookup(ip_map)
 
 
 int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
@@ -329,14 +329,49 @@
 	cache_purge(&auth_domain_cache);
 }
 
+int
+svcauth_unix_set_client(struct svc_rqst *rqstp)
+{
+	struct ip_map key, *ipm;
+
+	rqstp->rq_client = NULL;
+	if (rqstp->rq_proc == 0)
+		return SVC_OK;
+
+	strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
+	key.m_addr = rqstp->rq_addr.sin_addr;
+
+	ipm = ip_map_lookup(&key, 0);
+
+	if (ipm == NULL)
+		return SVC_DENIED;
+
+	switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
+		case -EAGAIN:
+			return SVC_DROP;
+		case -ENOENT:
+			return SVC_DENIED;
+		case 0:
+			rqstp->rq_client = &ipm->m_client->h;
+			cache_get(&rqstp->rq_client->h);
+			ip_map_put(&ipm->h, &ip_map_cache);
+			return SVC_OK;
+		default:
+			BUG();
+	}
+	/* shut up gcc: */
+	return -1;
+}
 
 static int
 svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
-	int		rv=0;
-	struct ip_map key, *ipm;
+	struct svc_cred	*cred = &rqstp->rq_cred;
+
+	cred->cr_group_info = NULL;
+	rqstp->rq_client = NULL;
 
 	if (argv->iov_len < 3*4)
 		return SVC_GARBAGE;
@@ -353,45 +388,17 @@
 	}
 
 	/* Signal that mapping to nobody uid/gid is required */
-	rqstp->rq_cred.cr_uid = (uid_t) -1;
-	rqstp->rq_cred.cr_gid = (gid_t) -1;
-	rqstp->rq_cred.cr_group_info = groups_alloc(0);
-	if (rqstp->rq_cred.cr_group_info == NULL)
+	cred->cr_uid = (uid_t) -1;
+	cred->cr_gid = (gid_t) -1;
+	cred->cr_group_info = groups_alloc(0);
+	if (cred->cr_group_info == NULL)
 		return SVC_DROP; /* kmalloc failure - client must retry */
 
 	/* Put NULL verifier */
 	svc_putu32(resv, RPC_AUTH_NULL);
 	svc_putu32(resv, 0);
 
-	strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
-	key.m_addr = rqstp->rq_addr.sin_addr;
-
-	ipm = ip_map_lookup(&key, 0);
-
-	rqstp->rq_client = NULL;
-
-	if (ipm)
-		switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
-		case -EAGAIN:
-			rv = SVC_DROP;
-			break;
-		case -ENOENT:
-			rv = SVC_OK; /* rq_client is NULL */
-			break;
-		case 0:
-			rqstp->rq_client = &ipm->m_client->h;
-			cache_get(&rqstp->rq_client->h);
-			ip_map_put(&ipm->h, &ip_map_cache);
-			rv = SVC_OK;
-			break;
-		default: BUG();
-		}
-	else rv = SVC_DROP;
-
-	if (rqstp->rq_client == NULL && rqstp->rq_proc != 0)
-		*authp = rpc_autherr_badcred;
-
-	return rv;
+	return SVC_OK;
 }
 
 static int
@@ -414,6 +421,7 @@
 	.flavour	= RPC_AUTH_NULL,
 	.accept 	= svcauth_null_accept,
 	.release	= svcauth_null_release,
+	.set_client	= svcauth_unix_set_client,
 };
 
 
@@ -425,8 +433,6 @@
 	struct svc_cred	*cred = &rqstp->rq_cred;
 	u32		slen, i;
 	int		len   = argv->iov_len;
-	int		rv=0;
-	struct ip_map key, *ipm;
 
 	cred->cr_group_info = NULL;
 	rqstp->rq_client = NULL;
@@ -458,39 +464,11 @@
 		return SVC_DENIED;
 	}
 
-
-	strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
-	key.m_addr = rqstp->rq_addr.sin_addr;
-
-
-	ipm = ip_map_lookup(&key, 0);
-
-	if (ipm)
-		switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
-		case -EAGAIN:
-			rv = SVC_DROP;
-			break;
-		case -ENOENT:
-			rv = SVC_OK; /* rq_client is NULL */
-			break;
-		case 0:
-			rqstp->rq_client = &ipm->m_client->h;
-			cache_get(&rqstp->rq_client->h);
-			ip_map_put(&ipm->h, &ip_map_cache);
-			rv = SVC_OK;
-			break;
-		default: BUG();
-		}
-	else rv = SVC_DROP;
-
-	if (rv  == SVC_OK && rqstp->rq_client == NULL && rqstp->rq_proc != 0)
-		goto badcred;
-
 	/* Put NULL verifier */
 	svc_putu32(resv, RPC_AUTH_NULL);
 	svc_putu32(resv, 0);
 
-	return rv;
+	return SVC_OK;
 
 badcred:
 	*authp = rpc_autherr_badcred;
@@ -520,5 +498,6 @@
 	.accept 	= svcauth_unix_accept,
 	.release	= svcauth_unix_release,
 	.domain_release	= svcauth_unix_domain_release,
+	.set_client	= svcauth_unix_set_client,
 };
 
Index: linux-2.6.10/net/sunrpc/clnt.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/clnt.c	2005-03-31 15:35:26.000000000 +0800
+++ linux-2.6.10/net/sunrpc/clnt.c	2005-04-05 14:49:13.410690584 +0800
@@ -636,8 +636,14 @@
 		rpc_exit(task, -EIO);
 		return;
 	}
-	if (encode && (status = rpcauth_wrap_req(task, encode, req, p,
-						 task->tk_msg.rpc_argp)) < 0) {
+	if (encode == NULL)
+		return;
+
+	status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp);
+	if (status == -EAGAIN) {
+		printk("XXXJBF: out of memory?  Should retry here!!!\n");
+	}
+	if (status < 0) {
 		printk(KERN_WARNING "%s: can't encode arguments: %d\n",
 				clnt->cl_protname, -status);
 		rpc_exit(task, status);
@@ -935,7 +941,7 @@
 	task->tk_action = call_reserve;
 	if (status >= 0 && rpcauth_uptodatecred(task))
 		return;
-	if (rpcauth_deadcred(task)) {
+	if (status == -EACCES) {
 		rpc_exit(task, -EACCES);
 		return;
 	}
@@ -993,7 +999,7 @@
 			goto garbage;
 		if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) {
 			printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n);
-		} else if (--len < 0)
+		} else if (--len == 0)
 		switch ((n = ntohl(*p++))) {
 		case RPC_AUTH_REJECTEDCRED:
 		case RPC_AUTH_REJECTEDVERF:
Index: linux-2.6.10/net/sunrpc/svcauth.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/svcauth.c	2004-12-25 05:35:23.000000000 +0800
+++ linux-2.6.10/net/sunrpc/svcauth.c	2005-04-05 14:49:13.392693320 +0800
@@ -59,6 +59,11 @@
 	return aops->accept(rqstp, authp);
 }
 
+int svc_set_client(struct svc_rqst *rqstp)
+{
+	return rqstp->rq_authop->set_client(rqstp);
+}
+
 /* A request, which was authenticated, has now executed.
  * Time to finalise the the credentials and verifier
  * and release and resources
Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_unseal.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_unseal.c	2004-12-25 05:35:24.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_unseal.c	2005-04-05 14:49:13.401691952 +0800
@@ -68,20 +68,13 @@
 #endif
 
 
-/* message_buffer is an input if toktype is MIC and an output if it is WRAP:
- * If toktype is MIC: read_token is a mic token, and message_buffer is the
- *   data that the mic was supposedly taken over.
- * If toktype is WRAP: read_token is a wrap token, and message_buffer is used
- *   to return the decrypted data.
- */
+/* read_token is a mic token, and message_buffer is the data that the mic was
+ * supposedly taken over. */
 
-/* XXX will need to change prototype and/or just split into a separate function
- * when we add privacy (because read_token will be in pages too). */
 u32
 krb5_read_token(struct krb5_ctx *ctx,
 		struct xdr_netobj *read_token,
-		struct xdr_buf *message_buffer,
-		int *qop_state, int toktype)
+		struct xdr_buf *message_buffer, int *qop_state)
 {
 	int			signalg;
 	int			sealalg;
@@ -100,16 +93,12 @@
 					read_token->len))
 		goto out;
 
-	if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff)))
+	if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
+	    (*ptr++ != ( KG_TOK_MIC_MSG    &0xff))   )
 		goto out;
 
 	/* XXX sanity-check bodysize?? */
 
-	if (toktype == KG_TOK_WRAP_MSG) {
-		/* XXX gone */
-		goto out;
-	}
-
 	/* get the sign and seal algorithms */
 
 	signalg = ptr[0] + (ptr[1] << 8);
@@ -120,14 +109,7 @@
 	if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
 		goto out;
 
-	if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) ||
-	    ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff)))
-		goto out;
-
-	/* in the current spec, there is only one valid seal algorithm per
-	   key type, so a simple comparison is ok */
-
-	if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg))
+	if (sealalg != 0xffff)
 		goto out;
 
 	/* there are several mappings of seal algorithms to sign algorithms,
@@ -154,7 +136,7 @@
 	switch (signalg) {
 	case SGN_ALG_DES_MAC_MD5:
 		ret = make_checksum(checksum_type, ptr - 2, 8,
-					 message_buffer, &md5cksum);
+					 message_buffer, 0, &md5cksum);
 		if (ret)
 			goto out;
 
Index: linux-2.6.10/net/sunrpc/auth_gss/gss_mech_switch.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_mech_switch.c	2004-12-25 05:35:01.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/gss_mech_switch.c	2005-04-05 14:49:13.408690888 +0800
@@ -279,6 +279,29 @@
 				 qstate);
 }
 
+u32
+gss_wrap(struct gss_ctx	*ctx_id,
+	 u32		qop,
+	 int		offset,
+	 struct xdr_buf	*buf,
+	 struct page	**inpages)
+{
+	return ctx_id->mech_type->gm_ops
+		->gss_wrap(ctx_id, qop, offset, buf, inpages);
+}
+
+u32
+gss_unwrap(struct gss_ctx	*ctx_id,
+	   u32			*qop,
+	   int			offset,
+	   struct xdr_buf	*buf,
+	   int			*out_offset)
+{
+	return ctx_id->mech_type->gm_ops
+		->gss_unwrap(ctx_id, qop, offset, buf, out_offset);
+}
+
+
 /* gss_delete_sec_context: free all resources associated with context_handle.
  * Note this differs from the RFC 2744-specified prototype in that we don't
  * bother returning an output token, since it would never be used anyway. */
Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_wrap.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_wrap.c	2005-04-05 19:01:49.158500672 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_wrap.c	2005-04-05 14:49:13.397692560 +0800
@@ -0,0 +1,337 @@
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/random.h>
+#include <linux/pagemap.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_AUTH
+#endif
+
+static inline int
+gss_krb5_padding(int blocksize, int length)
+{
+	/* Most of the code is block-size independent but currently we
+	 * use only 8: */
+	BUG_ON(blocksize != 8);
+	return 8 - (length & 7);
+}
+
+static inline void
+gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
+{
+	int padding = gss_krb5_padding(blocksize, buf->len - offset);
+	char *p;
+	struct kvec *iov;
+
+	if (buf->page_len || buf->tail[0].iov_len)
+		iov = &buf->tail[0];
+	else
+		iov = &buf->head[0];
+	p = iov->iov_base + iov->iov_len;
+	iov->iov_len += padding;
+	buf->len += padding;
+	memset(p, padding, padding);
+}
+
+static inline int
+gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
+{
+	u8 *ptr;
+	u8 pad;
+	int len = buf->len;
+
+	if (len <= buf->head[0].iov_len) {
+		pad = *(u8 *)(buf->head[0].iov_base + len - 1);
+		goto out;
+	} else
+		len -= buf->head[0].iov_len;
+	if (len <= buf->page_len) {
+		int last = (buf->page_base + len - 1)
+					>>PAGE_CACHE_SHIFT;
+		int offset = (buf->page_base + len - 1)
+					& (PAGE_CACHE_SIZE - 1);
+		ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA);
+		pad = *(ptr + offset);
+		kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA);
+		goto out;
+	} else
+		len -= buf->page_len;
+	BUG_ON(len > buf->tail[0].iov_len);
+	pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
+out:
+	if (pad > blocksize)
+		return -EINVAL;
+	buf->len -= pad;
+	return 0;
+}
+
+static inline void
+make_confounder(char *p, int blocksize)
+{
+	/* XXX?  Is this OK to do on every packet? */
+	get_random_bytes(p, blocksize);
+}
+
+/* Assumptions: the head and tail of inbuf are ours to play with.
+ * The pages, however, may be real pages in the page cache and we replace
+ * them with scratch pages from **pages before writing to them. */
+/* XXX: obviously the above should be documentation of wrap interface,
+ * and shouldn't be in this kerberos-specific file. */
+
+/* XXX factor out common code with seal/unseal. */
+
+u32
+gss_wrap_kerberos(struct gss_ctx *ctx, u32 qop, int offset,
+		struct xdr_buf *buf, struct page **pages)
+{
+	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
+	s32			checksum_type;
+	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
+	int			blocksize = 0, plainlen;
+	unsigned char		*ptr, *krb5_hdr, *msg_start;
+	s32			now;
+	int			headlen;
+	struct page		**tmp_pages;
+	u32			seq_send;
+
+	dprintk("RPC:     gss_wrap_kerberos\n");
+
+	now = get_seconds();
+
+	if (qop != 0)
+		goto out_err;
+
+	switch (kctx->signalg) {
+		case SGN_ALG_DES_MAC_MD5:
+			checksum_type = CKSUMTYPE_RSA_MD5;
+			break;
+		default:
+			dprintk("RPC:      gss_krb5_seal: kctx->signalg %d not"
+				" supported\n", kctx->signalg);
+			goto out_err;
+	}
+	if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) {
+		dprintk("RPC:      gss_krb5_seal: kctx->sealalg %d not supported\n",
+			kctx->sealalg);
+		goto out_err;
+	}
+
+	blocksize = crypto_tfm_alg_blocksize(kctx->enc);
+	gss_krb5_add_padding(buf, offset, blocksize);
+	BUG_ON((buf->len - offset) % blocksize);
+	plainlen = blocksize + buf->len - offset;
+
+	headlen = g_token_size(&kctx->mech_used, 22 + plainlen) -
+						(buf->len - offset);
+
+	ptr = buf->head[0].iov_base + offset;
+	/* shift data to make room for header. */
+	/* XXX Would be cleverer to encrypt while copying. */
+	/* XXX bounds checking, slack, etc. */
+	memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
+	buf->head[0].iov_len += headlen;
+	buf->len += headlen;
+	BUG_ON((buf->len - offset - headlen) % blocksize);
+
+	g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr);
+
+
+	*ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff);
+	*ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff);
+
+	/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
+	krb5_hdr = ptr - 2;
+	msg_start = krb5_hdr + 24;
+	/* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
+
+	*(u16 *)(krb5_hdr + 2) = htons(kctx->signalg);
+	memset(krb5_hdr + 4, 0xff, 4);
+	*(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
+
+	make_confounder(msg_start, blocksize);
+
+	/* XXXJBF: UGH!: */
+	tmp_pages = buf->pages;
+	buf->pages = pages;
+	if (make_checksum(checksum_type, krb5_hdr, 8, buf,
+				offset + headlen - blocksize, &md5cksum))
+		goto out_err;
+	buf->pages = tmp_pages;
+
+	switch (kctx->signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+				  md5cksum.data, md5cksum.len))
+			goto out_err;
+		memcpy(krb5_hdr + 16,
+		       md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
+		       KRB5_CKSUM_LENGTH);
+
+		dprintk("RPC:      make_seal_token: cksum data: \n");
+		print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
+		break;
+	default:
+		BUG();
+	}
+
+	kfree(md5cksum.data);
+
+	spin_lock(&krb5_seq_lock);
+	seq_send = kctx->seq_send++;
+	spin_unlock(&krb5_seq_lock);
+
+	/* XXX would probably be more efficient to compute checksum
+	 * and encrypt at the same time: */
+	if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
+			       seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+		goto out_err;
+
+	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
+									pages))
+		goto out_err;
+
+	return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
+out_err:
+	if (md5cksum.data) kfree(md5cksum.data);
+	return GSS_S_FAILURE;
+}
+
+u32
+gss_unwrap_kerberos(struct gss_ctx *ctx, u32 *qop, int offset,
+			struct xdr_buf *buf, int *out_offset)
+{
+	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
+	int			signalg;
+	int			sealalg;
+	s32			checksum_type;
+	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
+	s32			now;
+	int			direction;
+	s32			seqnum;
+	unsigned char		*ptr;
+	int			bodysize;
+	u32			ret = GSS_S_DEFECTIVE_TOKEN;
+	u8			*data_start;
+	int			blocksize;
+
+	dprintk("RPC:      gss_unwrap_kerberos\n");
+
+	ptr = (u8 *)buf->head[0].iov_base + offset;
+	if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
+					buf->len - offset))
+		goto out;
+
+	if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
+	    (*ptr++ !=  (KG_TOK_WRAP_MSG    &0xff))   )
+		goto out;
+
+	/* XXX sanity-check bodysize?? */
+
+	/* get the sign and seal algorithms */
+
+	signalg = ptr[0] + (ptr[1] << 8);
+	sealalg = ptr[2] + (ptr[3] << 8);
+
+	/* Sanity checks */
+
+	if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
+		goto out;
+
+	if (sealalg == 0xffff)
+		goto out;
+
+	/* in the current spec, there is only one valid seal algorithm per
+	   key type, so a simple comparison is ok */
+
+	if (sealalg != kctx->sealalg)
+		goto out;
+
+	/* there are several mappings of seal algorithms to sign algorithms,
+	   but few enough that we can try them all. */
+
+	if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
+	    (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
+	    (kctx->sealalg == SEAL_ALG_DES3KD &&
+	     signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
+		goto out;
+
+	if (gss_decrypt_xdr_buf(kctx->enc, buf,
+			ptr + 22 - (unsigned char *)buf->head[0].iov_base))
+		goto out;
+
+	/* compute the checksum of the message */
+
+	/* initialize the the cksum */
+	switch (signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		checksum_type = CKSUMTYPE_RSA_MD5;
+		break;
+	default:
+		ret = GSS_S_DEFECTIVE_TOKEN;
+		goto out;
+	}
+
+	switch (signalg) {
+	case SGN_ALG_DES_MAC_MD5:
+		ret = make_checksum(checksum_type, ptr - 2, 8, buf,
+			 ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum);
+		if (ret)
+			goto out;
+
+		ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+				   md5cksum.data, md5cksum.len);
+		if (ret)
+			goto out;
+
+		if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
+			ret = GSS_S_BAD_SIG;
+			goto out;
+		}
+		break;
+	default:
+		ret = GSS_S_DEFECTIVE_TOKEN;
+		goto out;
+	}
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+
+	if (qop)
+		*qop = GSS_C_QOP_DEFAULT;
+
+	now = get_seconds();
+
+	ret = GSS_S_CONTEXT_EXPIRED;
+	if (now > kctx->endtime)
+		goto out;
+
+	/* do sequencing checks */
+
+	ret = GSS_S_BAD_SIG;
+	if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
+				    &seqnum)))
+		goto out;
+
+	if ((kctx->initiate && direction != 0xff) ||
+	    (!kctx->initiate && direction != 0))
+		goto out;
+
+	/* Copy the data back to the right position.  XXX: Would probably be
+	 * better to copy and encrypt at the same time. */
+
+	blocksize = crypto_tfm_alg_blocksize(kctx->enc);
+	data_start = ptr + 22 + blocksize;
+	*out_offset = data_start - (u8 *)buf->head[0].iov_base;
+
+	ret = GSS_S_DEFECTIVE_TOKEN;
+	if (gss_krb5_remove_padding(buf, blocksize))
+		goto out;
+
+	ret = GSS_S_COMPLETE;
+out:
+	if (md5cksum.data) kfree(md5cksum.data);
+	return ret;
+}
Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_crypto.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_crypto.c	2004-12-25 05:33:50.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_crypto.c	2005-04-05 14:49:13.398692408 +0800
@@ -139,17 +139,91 @@
 	sg->length = len;
 }
 
+static int
+process_xdr_buf(struct xdr_buf *buf, int offset, int len,
+		int (*actor)(struct scatterlist *, void *), void *data)
+{
+	int i, page_len, thislen, page_offset, ret = 0;
+	struct scatterlist	sg[1];
+
+	if (offset >= buf->head[0].iov_len) {
+		offset -= buf->head[0].iov_len;
+	} else {
+		thislen = buf->head[0].iov_len - offset;
+		if (thislen > len)
+			thislen = len;
+		buf_to_sg(sg, buf->head[0].iov_base + offset, thislen);
+		ret = actor(sg, data);
+		if (ret)
+			goto out;
+		offset = 0;
+		len -= thislen;
+	}
+	if (len == 0)
+		goto out;
+
+	if (offset >= buf->page_len) {
+		offset -= buf->page_len;
+	} else {
+		page_len = buf->page_len - offset;
+		if (page_len > len)
+			page_len = len;
+		len -= page_len;
+		page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
+		i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
+		thislen = PAGE_CACHE_SIZE - page_offset;
+		do {
+			if (thislen > page_len)
+				thislen = page_len;
+			sg->page = buf->pages[i];
+			sg->offset = page_offset;
+			sg->length = thislen;
+			ret = actor(sg, data);
+			if (ret)
+				goto out;
+			page_len -= thislen;
+			i++;
+			page_offset = 0;
+			thislen = PAGE_CACHE_SIZE;
+		} while (page_len != 0);
+		offset = 0;
+	}
+	if (len == 0)
+		goto out;
+
+	if (offset < buf->tail[0].iov_len) {
+		thislen = buf->tail[0].iov_len - offset;
+		if (thislen > len)
+			thislen = len;
+		buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen);
+		ret = actor(sg, data);
+		len -= thislen;
+	}
+	if (len != 0)
+		ret = -EINVAL;
+out:
+	return ret;
+}
+
+static int
+checksummer(struct scatterlist *sg, void *data)
+{
+	struct crypto_tfm *tfm = (struct crypto_tfm *)data;
+
+	crypto_digest_update(tfm, sg, 1);
+
+	return 0;
+}
+
 /* checksum the plaintext data and hdrlen bytes of the token header */
 s32
 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
-		   struct xdr_netobj *cksum)
+		   int body_offset, struct xdr_netobj *cksum)
 {
 	char                            *cksumname;
 	struct crypto_tfm               *tfm = NULL; /* XXX add to ctx? */
 	struct scatterlist              sg[1];
 	u32                             code = GSS_S_FAILURE;
-	int				len, thislen, offset;
-	int				i;
 
 	switch (cksumtype) {
 		case CKSUMTYPE_RSA_MD5:
@@ -169,35 +243,8 @@
 	crypto_digest_init(tfm);
 	buf_to_sg(sg, header, hdrlen);
 	crypto_digest_update(tfm, sg, 1);
-	if (body->head[0].iov_len) {
-		buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len);
-		crypto_digest_update(tfm, sg, 1);
-	}
-
-	len = body->page_len;
-	if (len != 0) {
-		offset = body->page_base & (PAGE_CACHE_SIZE - 1);
-		i = body->page_base >> PAGE_CACHE_SHIFT;
-		thislen = PAGE_CACHE_SIZE - offset;
-		do {
-			if (thislen > len)
-				thislen = len;
-			sg->page = body->pages[i];
-			sg->offset = offset;
-			sg->length = thislen;
-			kmap(sg->page); /* XXX kmap_atomic? */
-			crypto_digest_update(tfm, sg, 1);
-			kunmap(sg->page);
-			len -= thislen;
-			i++;
-			offset = 0;
-			thislen = PAGE_CACHE_SIZE;
-		} while(len != 0);
-	}
-	if (body->tail[0].iov_len) {
-		buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len);
-		crypto_digest_update(tfm, sg, 1);
-	}
+	process_xdr_buf(body, body_offset, body->len - body_offset,
+			checksummer, tfm);
 	crypto_digest_final(tfm, cksum->data);
 	code = 0;
 out:
@@ -207,3 +254,154 @@
 }
 
 EXPORT_SYMBOL(make_checksum);
+
+struct encryptor_desc {
+	u8 iv[8]; /* XXX hard-coded blocksize */
+	struct crypto_tfm *tfm;
+	int pos;
+	struct xdr_buf *outbuf;
+	struct page **pages;
+	struct scatterlist infrags[4];
+	struct scatterlist outfrags[4];
+	int fragno;
+	int fraglen;
+};
+
+static int
+encryptor(struct scatterlist *sg, void *data)
+{
+	struct encryptor_desc *desc = data;
+	struct xdr_buf *outbuf = desc->outbuf;
+	struct page *in_page;
+	int thislen = desc->fraglen + sg->length;
+	int fraglen, ret;
+	int page_pos;
+
+	/* Worst case is 4 fragments: head, end of page 1, start
+	 * of page 2, tail.  Anything more is a bug. */
+	BUG_ON(desc->fragno > 3);
+	desc->infrags[desc->fragno] = *sg;
+	desc->outfrags[desc->fragno] = *sg;
+
+	page_pos = desc->pos - outbuf->head[0].iov_len;
+	if (page_pos >= 0 && page_pos < outbuf->page_len) {
+		/* pages are not in place: */
+		int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
+		in_page = desc->pages[i];
+	} else {
+		in_page = sg->page;
+	}
+	desc->infrags[desc->fragno].page = in_page;
+	desc->fragno++;
+	desc->fraglen += sg->length;
+	desc->pos += sg->length;
+
+	fraglen = thislen & 7; /* XXX hardcoded blocksize */
+	thislen -= fraglen;
+
+	if (thislen == 0)
+		return 0;
+
+	ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags,
+					thislen, desc->iv);
+	if (ret)
+		return ret;
+	if (fraglen) {
+		desc->outfrags[0].page = sg->page;
+		desc->outfrags[0].offset = sg->offset + sg->length - fraglen;
+		desc->outfrags[0].length = fraglen;
+		desc->infrags[0] = desc->outfrags[0];
+		desc->infrags[0].page = in_page;
+		desc->fragno = 1;
+		desc->fraglen = fraglen;
+	} else {
+		desc->fragno = 0;
+		desc->fraglen = 0;
+	}
+	return 0;
+}
+
+int
+gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset,
+		struct page **pages)
+{
+	int ret;
+	struct encryptor_desc desc;
+
+	BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+	desc.tfm = tfm;
+	desc.pos = offset;
+	desc.outbuf = buf;
+	desc.pages = pages;
+	desc.fragno = 0;
+	desc.fraglen = 0;
+
+	ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc);
+	return ret;
+}
+
+EXPORT_SYMBOL(gss_encrypt_xdr_buf);
+
+struct decryptor_desc {
+	u8 iv[8]; /* XXX hard-coded blocksize */
+	struct crypto_tfm *tfm;
+	struct scatterlist frags[4];
+	int fragno;
+	int fraglen;
+};
+
+static int
+decryptor(struct scatterlist *sg, void *data)
+{
+	struct decryptor_desc *desc = data;
+	int thislen = desc->fraglen + sg->length;
+	int fraglen, ret;
+
+	/* Worst case is 4 fragments: head, end of page 1, start
+	 * of page 2, tail.  Anything more is a bug. */
+	BUG_ON(desc->fragno > 3);
+	desc->frags[desc->fragno] = *sg;
+	desc->fragno++;
+	desc->fraglen += sg->length;
+
+	fraglen = thislen & 7; /* XXX hardcoded blocksize */
+	thislen -= fraglen;
+
+	if (thislen == 0)
+		return 0;
+
+	ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags,
+					thislen, desc->iv);
+	if (ret)
+		return ret;
+	if (fraglen) {
+		desc->frags[0].page = sg->page;
+		desc->frags[0].offset = sg->offset + sg->length - fraglen;
+		desc->frags[0].length = fraglen;
+		desc->fragno = 1;
+		desc->fraglen = fraglen;
+	} else {
+		desc->fragno = 0;
+		desc->fraglen = 0;
+	}
+	return 0;
+}
+
+int
+gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset)
+{
+	struct decryptor_desc desc;
+
+	/* XXXJBF: */
+	BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+	desc.tfm = tfm;
+	desc.fragno = 0;
+	desc.fraglen = 0;
+	return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
+}
+
+EXPORT_SYMBOL(gss_decrypt_xdr_buf);
Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_seal.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_seal.c	2004-12-25 05:33:47.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_seal.c	2005-04-05 14:49:13.402691800 +0800
@@ -70,24 +70,17 @@
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
-static inline int
-gss_krb5_padding(int blocksize, int length) {
-	/* Most of the code is block-size independent but in practice we
-	 * use only 8: */
-	BUG_ON(blocksize != 8);
-	return 8 - (length & 7);
-}
+spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
 
 u32
 krb5_make_token(struct krb5_ctx *ctx, int qop_req,
-		   struct xdr_buf *text, struct xdr_netobj *token,
-		   int toktype)
+		   struct xdr_buf *text, struct xdr_netobj *token)
 {
 	s32			checksum_type;
 	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
-	int			blocksize = 0, tmsglen;
 	unsigned char		*ptr, *krb5_hdr, *msg_start;
 	s32			now;
+	u32			seq_send;
 
 	dprintk("RPC:     gss_krb5_seal\n");
 
@@ -111,21 +104,13 @@
 		goto out_err;
 	}
 
-	if (toktype == KG_TOK_WRAP_MSG) {
-		blocksize = crypto_tfm_alg_blocksize(ctx->enc);
-		tmsglen = blocksize + text->len
-			+ gss_krb5_padding(blocksize, blocksize + text->len);
-	} else {
-		tmsglen = 0;
-	}
-
-	token->len = g_token_size(&ctx->mech_used, 22 + tmsglen);
+	token->len = g_token_size(&ctx->mech_used, 22);
 
 	ptr = token->data;
-	g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr);
+	g_make_token_header(&ctx->mech_used, 22, &ptr);
 
-	*ptr++ = (unsigned char) ((toktype>>8)&0xff);
-	*ptr++ = (unsigned char) (toktype&0xff);
+	*ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff);
+	*ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff);
 
 	/* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
 	krb5_hdr = ptr - 2;
@@ -133,17 +118,9 @@
 
 	*(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
 	memset(krb5_hdr + 4, 0xff, 4);
-	if (toktype == KG_TOK_WRAP_MSG)
-		*(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg);
 
-	if (toktype == KG_TOK_WRAP_MSG) {
-		/* XXX removing support for now */
-		goto out_err;
-	} else { /* Sign only.  */
-		if (make_checksum(checksum_type, krb5_hdr, 8, text,
-				       &md5cksum))
+	if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum))
 			goto out_err;
-	}
 
 	switch (ctx->signalg) {
 	case SGN_ALG_DES_MAC_MD5:
@@ -163,12 +140,14 @@
 
 	kfree(md5cksum.data);
 
+	spin_lock(&krb5_seq_lock);
+	seq_send = ctx->seq_send++;
+	spin_unlock(&krb5_seq_lock);
+
 	if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
-			       ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+			       seq_send, krb5_hdr + 16, krb5_hdr + 8)))
 		goto out_err;
 
-	ctx->seq_send++;
-
 	return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
 out_err:
 	if (md5cksum.data) kfree(md5cksum.data);
Index: linux-2.6.10/net/sunrpc/auth_gss/gss_pseudoflavors.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_pseudoflavors.c	2004-12-25 05:34:45.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/gss_pseudoflavors.c	2005-04-05 19:01:49.158500672 +0800
@@ -1,237 +0,0 @@
-/*
- *  linux/net/sunrpc/gss_union.c
- *
- *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic code
- *
- *  Copyright (c) 2001 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Andy Adamson   <andros@umich.edu>
- *
- */
-
-/*
- * Copyright 1993 by OpenVision Technologies, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appears in all copies and
- * that both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of OpenVision not be used
- * in advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission. OpenVision makes no
- * representations about the suitability of this software for any
- * purpose.  It is provided "as is" without express or implied warranty.
- *
- * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */ 
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-#include <linux/sunrpc/gss_asn1.h>
-#include <linux/sunrpc/auth_gss.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY        RPCDBG_AUTH
-#endif
-
-static LIST_HEAD(registered_triples);
-static spinlock_t registered_triples_lock = SPIN_LOCK_UNLOCKED;
-
-/* The following must be called with spinlock held: */
-static struct sup_sec_triple *
-do_lookup_triple_by_pseudoflavor(u32 pseudoflavor)
-{
-	struct sup_sec_triple *pos, *triple = NULL;
-
-	list_for_each_entry(pos, &registered_triples, triples) {
-		if (pos->pseudoflavor == pseudoflavor) {
-			triple = pos;
-			break;
-		}
-	}
-	return triple;
-}
-
-/* XXX Need to think about reference counting of triples and of mechs.
- * Currently we do no reference counting of triples, and I think that's
- * probably OK given the reference counting on mechs, but there's probably
- * a better way to do all this. */
-
-int
-gss_register_triple(u32 pseudoflavor, struct gss_api_mech *mech,
-			  u32 qop, u32 service)
-{
-	struct sup_sec_triple *triple;
-
-	if (!(triple = kmalloc(sizeof(*triple), GFP_KERNEL))) {
-		printk("Alloc failed in gss_register_triple");
-		goto err;
-	}
-	triple->pseudoflavor = pseudoflavor;
-	triple->mech = gss_mech_get_by_OID(&mech->gm_oid);
-	triple->qop = qop;
-	triple->service = service;
-
-	spin_lock(&registered_triples_lock);
-	if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) {
-		printk(KERN_WARNING "RPC: Registered pseudoflavor %d again\n",
-				pseudoflavor);
-		goto err_unlock;
-	}
-	list_add(&triple->triples, &registered_triples);
-	spin_unlock(&registered_triples_lock);
-	dprintk("RPC:      registered pseudoflavor %d\n", pseudoflavor);
-
-	return 0;
-
-err_unlock:
-	kfree(triple);
-	spin_unlock(&registered_triples_lock);
-err:
-	return -1;
-}
-
-int
-gss_unregister_triple(u32 pseudoflavor)
-{
-	struct sup_sec_triple *triple;
-
-	spin_lock(&registered_triples_lock);
-	if (!(triple = do_lookup_triple_by_pseudoflavor(pseudoflavor))) {
-		spin_unlock(&registered_triples_lock);
-		printk("Can't unregister unregistered pseudoflavor %d\n",
-		       pseudoflavor);
-		return -1;
-	}
-	list_del(&triple->triples);
-	spin_unlock(&registered_triples_lock);
-	gss_mech_put(triple->mech);
-	kfree(triple);
-	return 0;
-
-}
-
-void
-print_sec_triple(struct xdr_netobj *oid,u32 qop,u32 service)
-{
-	dprintk("RPC: print_sec_triple:\n");
-	dprintk("                     oid_len %d\n  oid :\n",oid->len);
-	print_hexl((u32 *)oid->data,oid->len,0);
-	dprintk("                     qop %d\n",qop);
-	dprintk("                     service %d\n",service);
-}
-
-/* Function: gss_get_cmp_triples
- *
- * Description: search sec_triples for a matching security triple
- * return pseudoflavor if match, else 0
- * (Note that 0 is a valid pseudoflavor, but not for any gss pseudoflavor
- * (0 means auth_null), so this shouldn't cause confusion.)
- */
-u32
-gss_cmp_triples(u32 oid_len, char *oid_data, u32 qop, u32 service)
-{
-	struct sup_sec_triple *triple;
-	u32 pseudoflavor = 0;
-	struct xdr_netobj oid;
-
-	oid.len = oid_len;
-	oid.data = oid_data;
-
-	dprintk("RPC:      gss_cmp_triples\n");
-	print_sec_triple(&oid,qop,service);
-
-	spin_lock(&registered_triples_lock);
-	list_for_each_entry(triple, &registered_triples, triples) {
-		if((g_OID_equal(&oid, &triple->mech->gm_oid))
-		    && (qop == triple->qop)
-		    && (service == triple->service)) {
-			pseudoflavor = triple->pseudoflavor;
-			break;
-		}
-	}
-	spin_unlock(&registered_triples_lock);
-	dprintk("RPC:      gss_cmp_triples return %d\n", pseudoflavor);
-	return pseudoflavor;
-}
-
-u32
-gss_get_pseudoflavor(struct gss_ctx *ctx, u32 qop, u32 service)
-{
-	return gss_cmp_triples(ctx->mech_type->gm_oid.len,
-			       ctx->mech_type->gm_oid.data,
-			       qop, service);
-}
-
-/* Returns nonzero iff the given pseudoflavor is in the supported list.
- * (Note that without incrementing a reference count or anything, this
- * doesn't give any guarantees.) */
-int
-gss_pseudoflavor_supported(u32 pseudoflavor)
-{
-	struct sup_sec_triple *triple;
-
-	spin_lock(&registered_triples_lock);
-	triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
-	spin_unlock(&registered_triples_lock);
-	return (triple ? 1 : 0);
-}
-
-u32
-gss_pseudoflavor_to_service(u32 pseudoflavor)
-{
-	struct sup_sec_triple *triple;
-
-	spin_lock(&registered_triples_lock);
-	triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
-	spin_unlock(&registered_triples_lock);
-	if (!triple) {
-		dprintk("RPC:      gss_pseudoflavor_to_service called with unsupported pseudoflavor %d\n",
-				pseudoflavor);
-		return 0;
-	}
-	return triple->service;
-}
-
-struct gss_api_mech *
-gss_pseudoflavor_to_mech(u32 pseudoflavor) {
-	struct sup_sec_triple *triple;
-	struct gss_api_mech *mech = NULL;
-
-	spin_lock(&registered_triples_lock);
-	triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
-	spin_unlock(&registered_triples_lock);
-	if (triple)
-		mech = gss_mech_get(triple->mech);
-	else
-		dprintk("RPC:      gss_pseudoflavor_to_mech called with unsupported pseudoflavor %d\n",
-				pseudoflavor);
-	return mech;
-}
-
-int
-gss_pseudoflavor_to_mechOID(u32 pseudoflavor, struct xdr_netobj * oid)
-{
-	struct gss_api_mech *mech;
-
-	mech = gss_pseudoflavor_to_mech(pseudoflavor);
-	if (!mech)  {
-		dprintk("RPC:      gss_pseudoflavor_to_mechOID called with unsupported pseudoflavor %d\n",
-				pseudoflavor);
-		        return -1;
-	}
-	oid->len = mech->gm_oid.len;
-	if (!(oid->data = kmalloc(oid->len, GFP_KERNEL)))
-		return -1;
-	memcpy(oid->data, mech->gm_oid.data, oid->len);
-	gss_mech_put(mech);
-	return 0;
-}
Index: linux-2.6.10/net/sunrpc/auth_gss/svcauth_gss.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/svcauth_gss.c	2004-12-25 05:34:44.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/svcauth_gss.c	2005-04-05 14:49:13.407691040 +0800
@@ -37,6 +37,7 @@
  *
  */
 
+#include <asm/bitops.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/pagemap.h>
@@ -78,7 +79,6 @@
 
 static struct cache_head *rsi_table[RSI_HASHMAX];
 static struct cache_detail rsi_cache;
-static struct rsi *rsi_lookup(struct rsi *item, int set);
 
 static void rsi_free(struct rsi *rsii)
 {
@@ -125,38 +125,6 @@
 	return dup_to_netobj(dst, src->data, src->len);
 }
 
-static inline void rsi_init(struct rsi *new, struct rsi *item)
-{
-	new->out_handle.data = NULL;
-	new->out_handle.len = 0;
-	new->out_token.data = NULL;
-	new->out_token.len = 0;
-	new->in_handle.len = item->in_handle.len;
-	item->in_handle.len = 0;
-	new->in_token.len = item->in_token.len;
-	item->in_token.len = 0;
-	new->in_handle.data = item->in_handle.data;
-	item->in_handle.data = NULL;
-	new->in_token.data = item->in_token.data;
-	item->in_token.data = NULL;
-}
-
-static inline void rsi_update(struct rsi *new, struct rsi *item)
-{
-	BUG_ON(new->out_handle.data || new->out_token.data);
-	new->out_handle.len = item->out_handle.len;
-	item->out_handle.len = 0;
-	new->out_token.len = item->out_token.len;
-	item->out_token.len = 0;
-	new->out_handle.data = item->out_handle.data;
-	item->out_handle.data = NULL;
-	new->out_token.data = item->out_token.data;
-	item->out_token.data = NULL;
-
-	new->major_status = item->major_status;
-	new->minor_status = item->minor_status;
-}
-
 static void rsi_request(struct cache_detail *cd,
                        struct cache_head *h,
                        char **bpp, int *blen)
@@ -168,6 +136,75 @@
 	(*bpp)[-1] = '\n';
 }
 
+static inline int
+gssd_reply(struct rsi *item)
+{
+	struct rsi *tmp;
+	struct cache_head **hp, **head;
+
+	head = &rsi_cache.hash_table[rsi_hash(item)];
+	write_lock(&rsi_cache.hash_lock);
+	for (hp = head; *hp != NULL; hp = &tmp->h.next) {
+		tmp = container_of(*hp, struct rsi, h);
+		if (rsi_match(tmp, item)) {
+			cache_get(&tmp->h);
+			clear_bit(CACHE_HASHED, &tmp->h.flags);
+			*hp = tmp->h.next;
+			tmp->h.next = NULL;
+			rsi_cache.entries--;
+			if (test_bit(CACHE_VALID, &tmp->h.flags)) {
+				write_unlock(&rsi_cache.hash_lock);
+				rsi_put(&tmp->h, &rsi_cache);
+				return -EINVAL;
+			}
+			set_bit(CACHE_HASHED, &item->h.flags);
+			item->h.next = *hp;
+			*hp = &item->h;
+			rsi_cache.entries++;
+			set_bit(CACHE_VALID, &item->h.flags);
+			item->h.last_refresh = get_seconds();
+			write_unlock(&rsi_cache.hash_lock);
+			cache_fresh(&rsi_cache, &tmp->h, 0);
+			rsi_put(&tmp->h, &rsi_cache);
+			return 0;
+		}
+	}
+	write_unlock(&rsi_cache.hash_lock);
+	return -EINVAL;
+}
+
+static inline struct rsi *
+gssd_upcall(struct rsi *item, struct svc_rqst *rqstp)
+{
+	struct rsi *tmp;
+	struct cache_head **hp, **head;
+
+	head = &rsi_cache.hash_table[rsi_hash(item)];
+	read_lock(&rsi_cache.hash_lock);
+	for (hp = head; *hp != NULL; hp = &tmp->h.next) {
+		tmp = container_of(*hp, struct rsi, h);
+		if (rsi_match(tmp, item)) {
+			if (!test_bit(CACHE_VALID, &tmp->h.flags)) {
+				read_unlock(&rsi_cache.hash_lock);
+				return NULL;
+			}
+			*hp = tmp->h.next;
+			tmp->h.next = NULL;
+			rsi_cache.entries--;
+			read_unlock(&rsi_cache.hash_lock);
+			return tmp;
+		}
+	}
+	cache_get(&item->h);
+	item->h.next = *head;
+	*head = &item->h;
+	rsi_cache.entries++;
+	read_unlock(&rsi_cache.hash_lock);
+	cache_get(&item->h);
+	if (cache_check(&rsi_cache, &item->h, &rqstp->rq_chandle))
+		return NULL;
+	return item;
+}
 
 static int rsi_parse(struct cache_detail *cd,
                     char *mesg, int mlen)
@@ -176,17 +213,22 @@
 	char *buf = mesg;
 	char *ep;
 	int len;
-	struct rsi rsii, *rsip = NULL;
+	struct rsi *rsii;
 	time_t expiry;
 	int status = -EINVAL;
 
-	memset(&rsii, 0, sizeof(rsii));
+	rsii = kmalloc(sizeof(*rsii), GFP_KERNEL);
+	if (!rsii)
+		return -ENOMEM;
+	memset(rsii, 0, sizeof(*rsii));
+	cache_init(&rsii->h);
+
 	/* handle */
 	len = qword_get(&mesg, buf, mlen);
 	if (len < 0)
 		goto out;
 	status = -ENOMEM;
-	if (dup_to_netobj(&rsii.in_handle, buf, len))
+	if (dup_to_netobj(&rsii->in_handle, buf, len))
 		goto out;
 
 	/* token */
@@ -195,10 +237,9 @@
 	if (len < 0)
 		goto out;
 	status = -ENOMEM;
-	if (dup_to_netobj(&rsii.in_token, buf, len))
+	if (dup_to_netobj(&rsii->in_token, buf, len))
 		goto out;
 
-	rsii.h.flags = 0;
 	/* expiry */
 	expiry = get_expiry(&mesg);
 	status = -EINVAL;
@@ -212,13 +253,13 @@
 	if (len == 0) {
 		goto out;
 	} else {
-		rsii.major_status = simple_strtoul(buf, &ep, 10);
+		rsii->major_status = simple_strtoul(buf, &ep, 10);
 		if (*ep)
 			goto out;
 		len = qword_get(&mesg, buf, mlen);
 		if (len <= 0)
 			goto out;
-		rsii.minor_status = simple_strtoul(buf, &ep, 10);
+		rsii->minor_status = simple_strtoul(buf, &ep, 10);
 		if (*ep)
 			goto out;
 
@@ -227,7 +268,7 @@
 		if (len < 0)
 			goto out;
 		status = -ENOMEM;
-		if (dup_to_netobj(&rsii.out_handle, buf, len))
+		if (dup_to_netobj(&rsii->out_handle, buf, len))
 			goto out;
 
 		/* out_token */
@@ -236,16 +277,14 @@
 		if (len < 0)
 			goto out;
 		status = -ENOMEM;
-		if (dup_to_netobj(&rsii.out_token, buf, len))
+		if (dup_to_netobj(&rsii->out_token, buf, len))
 			goto out;
 	}
-	rsii.h.expiry_time = expiry;
-	rsip = rsi_lookup(&rsii, 1);
-	status = 0;
+	rsii->h.expiry_time = expiry;
+	status = gssd_reply(rsii);
 out:
-	rsi_free(&rsii);
-	if (rsip)
-		rsi_put(&rsip->h, &rsi_cache);
+	if (rsii)
+		rsi_put(&rsii->h, &rsi_cache);
 	return status;
 }
 
@@ -258,8 +297,6 @@
 	.cache_parse    = rsi_parse,
 };
 
-static DefineSimpleCacheLookup(rsi, 0)
-
 /*
  * The rpcsec_context cache is used to store a context that is
  * used in data exchange.
@@ -292,7 +329,6 @@
 
 static struct cache_head *rsc_table[RSC_HASHMAX];
 static struct cache_detail rsc_cache;
-static struct rsc *rsc_lookup(struct rsc *item, int set);
 
 static void rsc_free(struct rsc *rsci)
 {
@@ -325,26 +361,46 @@
 	return netobj_equal(&new->handle, &tmp->handle);
 }
 
-static inline void
-rsc_init(struct rsc *new, struct rsc *tmp)
+static struct rsc *rsc_lookup(struct rsc *item, int set)
 {
-	new->handle.len = tmp->handle.len;
-	tmp->handle.len = 0;
-	new->handle.data = tmp->handle.data;
-	tmp->handle.data = NULL;
-	new->mechctx = NULL;
-	new->cred.cr_group_info = NULL;
-}
-
-static inline void
-rsc_update(struct rsc *new, struct rsc *tmp)
-{
-	new->mechctx = tmp->mechctx;
-	tmp->mechctx = NULL;
-	memset(&new->seqdata, 0, sizeof(new->seqdata));
-	spin_lock_init(&new->seqdata.sd_lock);
-	new->cred = tmp->cred;
-	tmp->cred.cr_group_info = NULL;
+	struct rsc *tmp = NULL;
+	struct cache_head **hp, **head;
+	head = &rsc_cache.hash_table[rsc_hash(item)];
+
+	if (set)
+		write_lock(&rsc_cache.hash_lock);
+	else
+		read_lock(&rsc_cache.hash_lock);
+	for (hp = head; *hp != NULL; hp = &tmp->h.next) {
+		tmp = container_of(*hp, struct rsc, h);
+		if (!rsc_match(tmp, item))
+			continue;
+		cache_get(&tmp->h);
+		if (!set)
+			goto out_noset;
+		*hp = tmp->h.next;
+		tmp->h.next = NULL;
+		clear_bit(CACHE_HASHED, &tmp->h.flags);
+		rsc_put(&tmp->h, &rsc_cache);
+		goto out_set;
+	}
+	/* Didn't find anything */
+	if (!set)
+		goto out_nada;
+	rsc_cache.entries++;
+out_set:
+	set_bit(CACHE_HASHED, &item->h.flags);
+	item->h.next = *head;
+	*head = &item->h;
+	write_unlock(&rsc_cache.hash_lock);
+	cache_fresh(&rsc_cache, &item->h, item->h.expiry_time);
+	cache_get(&item->h);
+	return item;
+out_nada:
+	tmp = NULL;
+out_noset:
+	read_unlock(&rsc_cache.hash_lock);
+	return tmp;
 }
 
 static int rsc_parse(struct cache_detail *cd,
@@ -353,19 +409,22 @@
 	/* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */
 	char *buf = mesg;
 	int len, rv;
-	struct rsc rsci, *rscp = NULL;
+	struct rsc *rsci, *res = NULL;
 	time_t expiry;
 	int status = -EINVAL;
 
-	memset(&rsci, 0, sizeof(rsci));
+	rsci = kmalloc(sizeof(*rsci), GFP_KERNEL);
+	if (!rsci)
+		return -ENOMEM;
+	memset(rsci, 0, sizeof(*rsci));
+	cache_init(&rsci->h);
 	/* context handle */
 	len = qword_get(&mesg, buf, mlen);
 	if (len < 0) goto out;
 	status = -ENOMEM;
-	if (dup_to_netobj(&rsci.handle, buf, len))
+	if (dup_to_netobj(&rsci->handle, buf, len))
 		goto out;
 
-	rsci.h.flags = 0;
 	/* expiry */
 	expiry = get_expiry(&mesg);
 	status = -EINVAL;
@@ -373,26 +432,26 @@
 		goto out;
 
 	/* uid, or NEGATIVE */
-	rv = get_int(&mesg, &rsci.cred.cr_uid);
+	rv = get_int(&mesg, &rsci->cred.cr_uid);
 	if (rv == -EINVAL)
 		goto out;
 	if (rv == -ENOENT)
-		set_bit(CACHE_NEGATIVE, &rsci.h.flags);
+		set_bit(CACHE_NEGATIVE, &rsci->h.flags);
 	else {
 		int N, i;
 		struct gss_api_mech *gm;
 		struct xdr_netobj tmp_buf;
 
 		/* gid */
-		if (get_int(&mesg, &rsci.cred.cr_gid))
+		if (get_int(&mesg, &rsci->cred.cr_gid))
 			goto out;
 
 		/* number of additional gid's */
 		if (get_int(&mesg, &N))
 			goto out;
 		status = -ENOMEM;
-		rsci.cred.cr_group_info = groups_alloc(N);
-		if (rsci.cred.cr_group_info == NULL)
+		rsci->cred.cr_group_info = groups_alloc(N);
+		if (rsci->cred.cr_group_info == NULL)
 			goto out;
 
 		/* gid's */
@@ -401,7 +460,7 @@
 			gid_t gid;
 			if (get_int(&mesg, &gid))
 				goto out;
-			GROUP_AT(rsci.cred.cr_group_info, i) = gid;
+			GROUP_AT(rsci->cred.cr_group_info, i) = gid;
 		}
 
 		/* mech name */
@@ -422,19 +481,21 @@
 		}
 		tmp_buf.len = len;
 		tmp_buf.data = buf;
-		if (gss_import_sec_context(&tmp_buf, gm, &rsci.mechctx)) {
+		if (gss_import_sec_context(&tmp_buf, gm, &rsci->mechctx)) {
 			gss_mech_put(gm);
 			goto out;
 		}
 		gss_mech_put(gm);
 	}
-	rsci.h.expiry_time = expiry;
-	rscp = rsc_lookup(&rsci, 1);
+	rsci->h.expiry_time = expiry;
+	spin_lock_init(&rsci->seqdata.sd_lock);
+	res = rsc_lookup(rsci, 1);
+	rsc_put(&res->h, &rsc_cache);
+	rsci = NULL;
 	status = 0;
 out:
-	rsc_free(&rsci);
-	if (rscp)
-		rsc_put(&rscp->h, &rsc_cache);
+	if (rsci)
+		rsc_put(&rsci->h, &rsc_cache);
 	return status;
 }
 
@@ -446,19 +507,14 @@
 	.cache_parse	= rsc_parse,
 };
 
-static DefineSimpleCacheLookup(rsc, 0);
-
 struct rsc *
 gss_svc_searchbyctx(struct xdr_netobj *handle)
 {
 	struct rsc rsci;
 	struct rsc *found;
 
-	memset(&rsci, 0, sizeof(rsci));
-	if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
-		return NULL;
+	rsci.handle = *handle;
 	found = rsc_lookup(&rsci, 0);
-	rsc_free(&rsci);
 	if (!found)
 		return NULL;
 	if (cache_check(&rsc_cache, &found->h, NULL))
@@ -721,6 +777,45 @@
 	return stat;
 }
 
+static int
+unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+{
+	int stat = -EINVAL;
+	int out_offset;
+	u32 * lenp;
+	u32 priv_len, maj_stat;
+	int saved_len;
+
+	lenp = buf->head[0].iov_base;
+	priv_len = ntohl(svc_getu32(&buf->head[0]));
+	if (priv_len > buf->len) /* XXXJBF: wrong check */
+		goto out;
+	/* XXXJBF: bizarre hack: to handle revisits (and not decrypt
+	 * twice), the first time through we write an offset
+	 * telling us where to skip to find the already-decrypted data */
+	if (rqstp->rq_deferred) {
+		buf->head[0].iov_base += priv_len;
+		buf->head[0].iov_len -= priv_len;
+		return 0;
+	}
+	saved_len = buf->len; /* XXX HACK */
+	buf->len = priv_len;
+	maj_stat = gss_unwrap(ctx, NULL, 0, buf, &out_offset);
+	buf->len = saved_len;
+	buf->head[0].iov_base += out_offset;
+	buf->head[0].iov_len -= out_offset;
+	BUG_ON(buf->head[0].iov_len <= 0);
+	if (maj_stat != GSS_S_COMPLETE)
+		goto out;
+	if (ntohl(svc_getu32(&buf->head[0])) != seq)
+		goto out;
+	/* XXXJBF: see "bizarre hack", above. */
+	*lenp = htonl(out_offset + 4);
+	stat = 0;
+out:
+	return stat;
+}
+
 struct gss_svc_data {
 	/* decoded gss client cred: */
 	struct rpc_gss_wire_cred	clcred;
@@ -730,6 +825,19 @@
 	struct rsc			*rsci;
 };
 
+static int
+svcauth_gss_set_client(struct svc_rqst *rqstp)
+{
+	struct gss_svc_data *svcdata = rqstp->rq_auth_data;
+	struct rsc *rsci = svcdata->rsci;
+	struct rpc_gss_wire_cred *gc = &svcdata->clcred;
+
+	rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
+	if (rqstp->rq_client == NULL)
+		return SVC_DENIED;
+	return SVC_OK;
+}
+
 /*
  * Accept an rpcsec packet.
  * If context establishment, punt to user space
@@ -748,7 +856,7 @@
 	struct gss_svc_data *svcdata = rqstp->rq_auth_data;
 	struct rpc_gss_wire_cred *gc;
 	struct rsc	*rsci = NULL;
-	struct rsi	*rsip, rsikey;
+	struct rsi	*rsip, *rsikey = NULL;
 	u32		*rpcstart;
 	u32		*reject_stat = resv->iov_base + resv->iov_len;
 	int		ret;
@@ -841,30 +949,23 @@
 		*authp = rpc_autherr_badcred;
 		if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
 			goto auth_err;
-		memset(&rsikey, 0, sizeof(rsikey));
-		if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
+		rsikey = kmalloc(sizeof(*rsikey), GFP_KERNEL);
+		if (!rsikey)
+			goto drop;
+		memset(rsikey, 0, sizeof(*rsikey));
+		cache_init(&rsikey->h);
+		if (dup_netobj(&rsikey->in_handle, &gc->gc_ctx))
 			goto drop;
 		*authp = rpc_autherr_badverf;
-		if (svc_safe_getnetobj(argv, &tmpobj)) {
-			kfree(rsikey.in_handle.data);
+		if (svc_safe_getnetobj(argv, &tmpobj))
 			goto auth_err;
-		}
-		if (dup_netobj(&rsikey.in_token, &tmpobj)) {
-			kfree(rsikey.in_handle.data);
+		if (dup_netobj(&rsikey->in_token, &tmpobj))
 			goto drop;
-		}
 
-		rsip = rsi_lookup(&rsikey, 0);
-		rsi_free(&rsikey);
-		if (!rsip) {
-			goto drop;
-		}
-		switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
-		case -EAGAIN:
+		rsip = gssd_upcall(rsikey, rqstp);
+		if (!rsip)
 			goto drop;
-		case -ENOENT:
-			goto drop;
-		case 0:
+		else {
 			rsci = gss_svc_searchbyctx(&rsip->out_handle);
 			if (!rsci) {
 				goto drop;
@@ -893,11 +994,6 @@
 		svc_putu32(resv, rpc_success);
 		goto complete;
 	case RPC_GSS_PROC_DATA:
-		*authp = rpc_autherr_badcred;
-		rqstp->rq_client =
-			find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
-		if (rqstp->rq_client == NULL)
-			goto auth_err;
 		*authp = rpcsec_gsserr_ctxproblem;
 		if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
 			goto auth_err;
@@ -911,6 +1007,15 @@
 			if (unwrap_integ_data(&rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto auth_err;
+			/* placeholders for length and seq. number: */
+			svcdata->body_start = resv->iov_base + resv->iov_len;
+			svc_putu32(resv, 0);
+			svc_putu32(resv, 0);
+			break;
+		case RPC_GSS_SVC_PRIVACY:
+			if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
+					gc->gc_seq, rsci->mechctx))
+				goto auth_err;
 			svcdata->rsci = rsci;
 			cache_get(&rsci->h);
 			/* placeholders for length and seq. number: */
@@ -918,11 +1023,11 @@
 			svc_putu32(resv, 0);
 			svc_putu32(resv, 0);
 			break;
-		case RPC_GSS_SVC_PRIVACY:
-			/* currently unsupported */
 		default:
 			goto auth_err;
 		}
+		svcdata->rsci = rsci;
+		cache_get(&rsci->h);
 		ret = SVC_OK;
 		goto out;
 	}
@@ -937,13 +1042,15 @@
 drop:
 	ret = SVC_DROP;
 out:
+	if (rsikey)
+		rsi_put(&rsikey->h, &rsi_cache);
 	if (rsci)
 		rsc_put(&rsci->h, &rsc_cache);
 	return ret;
 }
 
-static int
-svcauth_gss_release(struct svc_rqst *rqstp)
+static inline int
+svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
 {
 	struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
 	struct rpc_gss_wire_cred *gc = &gsd->clcred;
@@ -955,10 +1062,160 @@
 	int integ_offset, integ_len;
 	int stat = -EINVAL;
 
+	p = gsd->body_start;
+	gsd->body_start = NULL;
+	/* move accept_stat to right place: */
+	memcpy(p, p + 2, 4);
+	/* Don't wrap in failure case: */
+	/* Counting on not getting here if call was not even accepted! */
+	if (*p != rpc_success) {
+		resbuf->head[0].iov_len -= 2 * 4;
+		goto out;
+	}
+	p++;
+	integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
+	integ_len = resbuf->len - integ_offset;
+	BUG_ON(integ_len % 4);
+	*p++ = htonl(integ_len);
+	*p++ = htonl(gc->gc_seq);
+	if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
+				integ_len))
+		BUG();
+	if (resbuf->page_len == 0
+			&& resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE
+			< PAGE_SIZE) {
+		BUG_ON(resbuf->tail[0].iov_len);
+		/* Use head for everything */
+		resv = &resbuf->head[0];
+	} else if (resbuf->tail[0].iov_base == NULL) {
+		/* copied from nfsd4_encode_read */
+		svc_take_page(rqstp);
+		resbuf->tail[0].iov_base = page_address(rqstp
+				->rq_respages[rqstp->rq_resused-1]);
+		rqstp->rq_restailpage = rqstp->rq_resused-1;
+		resbuf->tail[0].iov_len = 0;
+		resv = &resbuf->tail[0];
+	} else {
+		resv = &resbuf->tail[0];
+	}
+	mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
+	if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic))
+		goto out_err;
+	svc_putu32(resv, htonl(mic.len));
+	memset(mic.data + mic.len, 0,
+			round_up_to_quad(mic.len) - mic.len);
+	resv->iov_len += XDR_QUADLEN(mic.len) << 2;
+	/* not strictly required: */
+	resbuf->len += XDR_QUADLEN(mic.len) << 2;
+	BUG_ON(resv->iov_len > PAGE_SIZE);
+out:
+	stat = 0;
+out_err:
+	return stat;
+}
+
+/* XXXJBF: Look for chances to share code with client */
+/* XXXJBF: Do we need to preallocate these pages somehow?  E.g. see
+ * buffer size calculations in svcsock.c */
+/* XXXJBF: how does reference counting on pages work? */
+static struct page **
+svc_alloc_enc_pages(struct xdr_buf *buf)
+{
+	struct page **ret;
+	int last, i;
+
+	if (buf->page_len == 0)
+		return NULL;
+	BUG_ON(buf->page_base >> PAGE_CACHE_SHIFT);
+	last = (buf->page_base + buf->page_len - 1) >> PAGE_CACHE_SHIFT;
+	ret = kmalloc((last + 1) * sizeof(struct page *), GFP_KERNEL);
+	if (!ret)
+		goto out;
+	for (i = 0; i<= last; i++) {
+		ret[i] = alloc_page(GFP_KERNEL);
+		if (ret[i] == NULL)
+			goto out_free;
+	}
+out:
+	return ret;
+out_free:
+	for (i--; i >= 0; i--) {
+		__free_page(ret[i]);
+	}
+	return NULL;
+}
+
+static inline int
+svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
+{
+	struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+	struct rpc_gss_wire_cred *gc = &gsd->clcred;
+	struct xdr_buf *resbuf = &rqstp->rq_res;
+	struct page **inpages;
+	u32 *p;
+	int offset, *len;
+	int pad;
+	int stat = -EINVAL;
+
+	p = gsd->body_start;
+	gsd->body_start = NULL;
+	/* move accept_stat to right place: */
+	memcpy(p, p + 2, 4);
+	/* Don't wrap in failure case: */
+	/* Counting on not getting here if call was not even accepted! */
+	if (*p != rpc_success) {
+		resbuf->head[0].iov_len -= 2 * 4;
+		goto out;
+	}
+	p++;
+	len = p++;
+	offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base;
+	*p++ = htonl(gc->gc_seq);
+	stat = -ENOMEM;
+	inpages = resbuf->pages;
+	/* XXXJBF: huge memory leaks here: allocated pages probably aren't
+	 * freed, and neither is memory used to hold page array. */
+	resbuf->pages = svc_alloc_enc_pages(resbuf);
+	if (resbuf->page_len && !resbuf->pages)
+		goto out_err; /* XXX sleep and retry? Reserve ahead of time
+				and BUG_ON? */
+	if (resbuf->tail[0].iov_len == 0 || resbuf->tail[0].iov_base == NULL) {
+		/* copied from nfsd4_encode_read */
+		{int i = svc_take_page(rqstp); BUG_ON(i); }
+		resbuf->tail[0].iov_base = page_address(rqstp
+				->rq_respages[rqstp->rq_resused-1]);
+		rqstp->rq_restailpage = rqstp->rq_resused-1;
+		resbuf->tail[0].iov_len = 0;
+	}
+	/* XXX: Will svc code attempt to free stuff in xdr_buf->pages?
+	 * Or can we leave it in any old state on error?? */
+	stat = -EINVAL;
+	if (gss_wrap(gsd->rsci->mechctx, GSS_C_QOP_DEFAULT, offset,
+				resbuf, inpages))
+		goto out_err;
+	*len = htonl(resbuf->len - offset);
+	pad = 3 - ((resbuf->len - offset - 1)&3);
+	p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len);
+	memset(p, 0, pad);
+	resbuf->tail[0].iov_len += pad;
+out:
+	return 0;
+out_err:
+	return stat;
+}
+
+static int
+svcauth_gss_release(struct svc_rqst *rqstp)
+{
+	struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+	struct rpc_gss_wire_cred *gc = &gsd->clcred;
+	struct xdr_buf *resbuf = &rqstp->rq_res;
+	int stat = -EINVAL;
+
 	if (gc->gc_proc != RPC_GSS_PROC_DATA)
 		goto out;
 	/* Release can be called twice, but we only wrap once. */
-	if (gsd->body_start == 0)
+	if (gsd->body_start == NULL)
 		goto out;
 	/* normally not set till svc_send, but we need it here: */
 	resbuf->len = resbuf->head[0].iov_len
@@ -967,55 +1224,15 @@
 	case RPC_GSS_SVC_NONE:
 		break;
 	case RPC_GSS_SVC_INTEGRITY:
-		p = gsd->body_start;
-		gsd->body_start = NULL;
-		/* move accept_stat to right place: */
-		memcpy(p, p + 2, 4);
-		/* don't wrap in failure case: */
-		/* Note: counting on not getting here if call was not even
-		 * accepted! */
-		if (*p != rpc_success) {
-			resbuf->head[0].iov_len -= 2 * 4;
-			goto out;
-		}
-		p++;
-		integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
-		integ_len = resbuf->len - integ_offset;
-		BUG_ON(integ_len % 4);
-		*p++ = htonl(integ_len);
-		*p++ = htonl(gc->gc_seq);
-		if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
-					integ_len))
-			BUG();
-		if (resbuf->page_len == 0
-			&& resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE
-				< PAGE_SIZE) {
-			BUG_ON(resbuf->tail[0].iov_len);
-			/* Use head for everything */
-			resv = &resbuf->head[0];
-		} else if (resbuf->tail[0].iov_base == NULL) {
-			/* copied from nfsd4_encode_read */
-			svc_take_page(rqstp);
-			resbuf->tail[0].iov_base = page_address(rqstp
-					->rq_respages[rqstp->rq_resused-1]);
-			rqstp->rq_restailpage = rqstp->rq_resused-1;
-			resbuf->tail[0].iov_len = 0;
-			resv = &resbuf->tail[0];
-		} else {
-			resv = &resbuf->tail[0];
-		}
-		mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
-		if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic))
+		stat = svcauth_gss_wrap_resp_integ(rqstp);
+		if (stat)
 			goto out_err;
-		svc_putu32(resv, htonl(mic.len));
-		memset(mic.data + mic.len, 0,
-				round_up_to_quad(mic.len) - mic.len);
-		resv->iov_len += XDR_QUADLEN(mic.len) << 2;
-		/* not strictly required: */
-		resbuf->len += XDR_QUADLEN(mic.len) << 2;
-		BUG_ON(resv->iov_len > PAGE_SIZE);
 		break;
 	case RPC_GSS_SVC_PRIVACY:
+		stat = svcauth_gss_wrap_resp_priv(rqstp);
+		if (stat)
+			goto out_err;
+		break;
 	default:
 		goto out_err;
 	}
@@ -1052,6 +1269,7 @@
 	.accept		= svcauth_gss_accept,
 	.release	= svcauth_gss_release,
 	.domain_release = svcauth_gss_domain_release,
+	.set_client	= svcauth_gss_set_client,
 };
 
 int
Index: linux-2.6.10/net/sunrpc/auth_gss/sunrpcgss_syms.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/sunrpcgss_syms.c	2004-12-25 05:35:23.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/sunrpcgss_syms.c	2005-04-05 19:01:49.158500672 +0800
@@ -1,37 +0,0 @@
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/sched.h>
-#include <linux/uio.h>
-#include <linux/unistd.h>
-
-#include <linux/sunrpc/auth_gss.h>
-#include <linux/sunrpc/svcauth_gss.h>
-#include <linux/sunrpc/gss_asn1.h>
-#include <linux/sunrpc/gss_krb5.h>
-
-/* svcauth_gss.c: */
-EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
-
-/* registering gss mechanisms to the mech switching code: */
-EXPORT_SYMBOL(gss_mech_register);
-EXPORT_SYMBOL(gss_mech_unregister);
-EXPORT_SYMBOL(gss_mech_get);
-EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor);
-EXPORT_SYMBOL(gss_mech_get_by_name);
-EXPORT_SYMBOL(gss_mech_put);
-EXPORT_SYMBOL(gss_pseudoflavor_to_service);
-EXPORT_SYMBOL(gss_service_to_auth_domain_name);
-
-/* generic functionality in gss code: */
-EXPORT_SYMBOL(g_make_token_header);
-EXPORT_SYMBOL(g_verify_token_header);
-EXPORT_SYMBOL(g_token_size);
-EXPORT_SYMBOL(make_checksum);
-EXPORT_SYMBOL(krb5_encrypt);
-EXPORT_SYMBOL(krb5_decrypt);
-
-/* debug */
-EXPORT_SYMBOL(print_hexl);
Index: linux-2.6.10/net/sunrpc/auth_gss/Makefile
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/Makefile	2004-12-25 05:34:33.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/Makefile	2005-04-05 14:49:13.408690888 +0800
@@ -10,7 +10,7 @@
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
 rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
-	gss_krb5_seqnum.o
+	gss_krb5_seqnum.o gss_krb5_wrap.o
 
 obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
 
Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_mech.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_mech.c	2004-12-25 05:35:23.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_mech.c	2005-04-05 14:49:13.400692104 +0800
@@ -182,6 +182,7 @@
 	kfree(kctx);
 }
 
+/* XXX the following wrappers have become pointless; kill them. */
 static u32
 gss_verify_mic_kerberos(struct gss_ctx		*ctx,
 			struct xdr_buf		*message,
@@ -191,8 +192,7 @@
 	int qop_state;
 	struct krb5_ctx *kctx = ctx->internal_ctx_id;
 
-	maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state,
-				   KG_TOK_MIC_MSG);
+	maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state);
 	if (!maj_stat && qop_state)
 	    *qstate = qop_state;
 
@@ -208,7 +208,7 @@
 	u32 err = 0;
 	struct krb5_ctx *kctx = ctx->internal_ctx_id;
 
-	err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG);
+	err = krb5_make_token(kctx, qop, message, mic_token);
 
 	dprintk("RPC:      gss_get_mic_kerberos returning %d\n",err);
 
@@ -219,6 +219,8 @@
 	.gss_import_sec_context	= gss_import_sec_context_kerberos,
 	.gss_get_mic		= gss_get_mic_kerberos,
 	.gss_verify_mic		= gss_verify_mic_kerberos,
+	.gss_wrap		= gss_wrap_kerberos,
+	.gss_unwrap		= gss_unwrap_kerberos,
 	.gss_delete_sec_context	= gss_delete_sec_context_kerberos,
 };
 
@@ -233,6 +235,11 @@
 		.service = RPC_GSS_SVC_INTEGRITY,
 		.name = "krb5i",
 	},
+	[2] = {
+		.pseudoflavor = RPC_AUTH_GSS_KRB5P,
+		.service = RPC_GSS_SVC_PRIVACY,
+		.name = "krb5p",
+	},
 };
 
 static struct gss_api_mech gss_kerberos_mech = {
Index: linux-2.6.10/net/sunrpc/auth_gss/auth_gss.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/auth_gss/auth_gss.c	2004-12-25 05:34:44.000000000 +0800
+++ linux-2.6.10/net/sunrpc/auth_gss/auth_gss.c	2005-04-05 14:49:13.404691496 +0800
@@ -45,6 +45,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/sched.h>
+#include <linux/pagemap.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/auth_gss.h>
@@ -480,12 +481,14 @@
 	if (!cred)
 		goto err;
 	if (gss_err)
-		cred->cr_flags |= RPCAUTH_CRED_DEAD;
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
 	else
 		gss_cred_set_ctx(cred, ctx);
 	spin_lock(&gss_auth->lock);
 	gss_msg = __gss_find_upcall(gss_auth, acred.uid);
 	if (gss_msg) {
+		if (gss_err)
+			gss_msg->msg.errno = -EACCES;
 		__gss_unhash_msg(gss_msg);
 		spin_unlock(&gss_auth->lock);
 		gss_release_msg(gss_msg);
@@ -740,7 +743,9 @@
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx,
 			       GSS_C_QOP_DEFAULT, 
 			       &verf_buf, &mic);
-	if(maj_stat != 0){
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	} else if (maj_stat != 0) {
 		printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
 		goto out_put_ctx;
 	}
@@ -779,6 +784,7 @@
 	struct xdr_netobj mic;
 	u32		flav,len;
 	u32		service;
+	u32		maj_stat;
 
 	dprintk("RPC: %4u gss_validate\n", task->tk_pid);
 
@@ -794,8 +800,11 @@
 	mic.data = (u8 *)p;
 	mic.len = len;
 
-	if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state))
-               goto out_bad;
+	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	if (maj_stat)
+		goto out_bad;
        service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type,
 					gss_cred->gc_flavor);
        switch (service) {
@@ -807,6 +816,11 @@
 	       /* verifier data, flavor, length, length, sequence number: */
 	       task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4;
 	       break;
+       case RPC_GSS_SVC_PRIVACY:
+	       /* XXXJBF: Ugh. Going for a wild overestimate.
+		* Need some info from krb5 layer? */
+	       task->tk_auth->au_rslack = XDR_QUADLEN(len) + 32;
+	       break;
        default:
 	       goto out_bad;
        }
@@ -821,11 +835,10 @@
 }
 
 static inline int
-gss_wrap_req_integ(struct gss_cl_ctx *ctx,
-			kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
+gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+		kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
 {
-	struct rpc_rqst	*req = (struct rpc_rqst *)rqstp;
-	struct xdr_buf	*snd_buf = &req->rq_snd_buf;
+	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_buf	integ_buf;
 	u32             *integ_len = NULL;
 	struct xdr_netobj mic;
@@ -836,7 +849,7 @@
 
 	integ_len = p++;
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
-	*p++ = htonl(req->rq_seqno);
+	*p++ = htonl(rqstp->rq_seqno);
 
 	status = encode(rqstp, p, obj);
 	if (status)
@@ -848,7 +861,7 @@
 	*integ_len = htonl(integ_buf.len);
 
 	/* guess whether we're in the head or the tail: */
-	if (snd_buf->page_len || snd_buf->tail[0].iov_len) 
+	if (snd_buf->page_len || snd_buf->tail[0].iov_len)
 		iov = snd_buf->tail;
 	else
 		iov = snd_buf->head;
@@ -858,7 +871,9 @@
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx,
 			GSS_C_QOP_DEFAULT, &integ_buf, &mic);
 	status = -EIO; /* XXX? */
-	if (maj_stat)
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	else if (maj_stat)
 		return status;
 	q = xdr_encode_opaque(p, NULL, mic.len);
 
@@ -868,6 +883,112 @@
 	return 0;
 }
 
+static void
+priv_release_snd_buf(struct rpc_rqst *rqstp)
+{
+	int i;
+
+	for (i=0; i < rqstp->rq_enc_pages_num; i++)
+		__free_page(rqstp->rq_enc_pages[i]);
+	kfree(rqstp->rq_enc_pages);
+}
+
+static int
+alloc_enc_pages(struct rpc_rqst *rqstp)
+{
+	struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
+	int first, last, i;
+
+	if (snd_buf->page_len == 0) {
+		rqstp->rq_enc_pages_num = 0;
+		return 0;
+	}
+
+	first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
+	last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT;
+	rqstp->rq_enc_pages_num = last - first + 1 + 1;
+	rqstp->rq_enc_pages
+		= kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
+				GFP_NOFS);
+	if (!rqstp->rq_enc_pages)
+		goto out;
+	for (i=0; i < rqstp->rq_enc_pages_num; i++) {
+		rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS);
+		if (rqstp->rq_enc_pages[i] == NULL)
+			goto out_free;
+	}
+	rqstp->rq_release_snd_buf = priv_release_snd_buf;
+	return 0;
+out_free:
+	for (i--; i >= 0; i--) {
+		__free_page(rqstp->rq_enc_pages[i]);
+	}
+out:
+	return -EAGAIN;
+}
+
+static inline int
+gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+		kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
+{
+	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
+	u32		offset;
+	u32             maj_stat;
+	int		status;
+	u32		*opaque_len;
+	struct page	**inpages;
+	int		first;
+	int		pad;
+	struct kvec	*iov;
+	char		*tmp;
+
+	opaque_len = p++;
+	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
+	*p++ = htonl(rqstp->rq_seqno);
+
+	status = encode(rqstp, p, obj);
+	if (status)
+		return status;
+
+	status = alloc_enc_pages(rqstp);
+	if (status)
+		return status;
+	/* XXXJBF: Oops!  Do we need rq_enc_pages really any more?? */
+	first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
+	inpages = snd_buf->pages + first;
+	snd_buf->pages = rqstp->rq_enc_pages;
+	snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
+	/* XXX?: tail needs to be separate if we want to be able to expand
+	 * the head (since it's often put right after the head).  But is
+	 * expanding the head safe in any case? */
+	if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+		tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
+		memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
+		snd_buf->tail[0].iov_base = tmp;
+	}
+	maj_stat = gss_wrap(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, offset,
+				snd_buf, inpages);
+        status = -EIO; /* XXX? */
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	else if (maj_stat)
+		return status;
+
+	*opaque_len = htonl(snd_buf->len - offset);
+	/* guess whether we're in the head or the tail: */
+	if (snd_buf->page_len || snd_buf->tail[0].iov_len)
+		iov = snd_buf->tail;
+	else
+		iov = snd_buf->head;
+	p = iov->iov_base + iov->iov_len;
+	pad = 3 - ((snd_buf->len - offset - 1) & 3);
+	memset(p, 0, pad);
+	iov->iov_len += pad;
+	snd_buf->len += pad;
+
+	return 0;
+}
+
 static int
 gss_wrap_req(struct rpc_task *task,
 	     kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
@@ -894,9 +1015,13 @@
 			status = encode(rqstp, p, obj);
 			goto out;
 		case RPC_GSS_SVC_INTEGRITY:
-			status = gss_wrap_req_integ(ctx, encode, rqstp, p, obj);
+			status = gss_wrap_req_integ(cred, ctx, encode,
+								rqstp, p, obj);
 			goto out;
 		case RPC_GSS_SVC_PRIVACY:
+			status = gss_wrap_req_priv(cred, ctx, encode,
+								rqstp, p, obj);
+			goto out;
 		default:
 			goto out;
 	}
@@ -907,11 +1032,10 @@
 }
 
 static inline int
-gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
-		kxdrproc_t decode, void *rqstp, u32 **p, void *obj)
+gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+		struct rpc_rqst *rqstp, u32 **p)
 {
-	struct rpc_rqst *req = (struct rpc_rqst *)rqstp;
-	struct xdr_buf	*rcv_buf = &req->rq_rcv_buf;
+	struct xdr_buf	*rcv_buf = &rqstp->rq_rcv_buf;
 	struct xdr_buf integ_buf;
 	struct xdr_netobj mic;
 	u32 data_offset, mic_offset;
@@ -926,7 +1050,7 @@
 	mic_offset = integ_len + data_offset;
 	if (mic_offset > rcv_buf->len)
 		return status;
-	if (ntohl(*(*p)++) != req->rq_seqno)
+	if (ntohl(*(*p)++) != rqstp->rq_seqno)
 		return status;
 
 	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
@@ -938,11 +1062,44 @@
 
 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf,
 			&mic, NULL);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	if (maj_stat != GSS_S_COMPLETE)
+		return status;
+	return 0;
+}
+
+static inline int
+gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+		struct rpc_rqst *rqstp, u32 **p)
+{
+	struct xdr_buf  *rcv_buf = &rqstp->rq_rcv_buf;
+	u32 offset, out_offset;
+	u32 opaque_len;
+	u32 maj_stat;
+	int status = -EIO;
+
+	opaque_len = ntohl(*(*p)++);
+	offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+	if (offset + opaque_len > rcv_buf->len)
+		return status;
+	/* remove padding: */
+	rcv_buf->len = offset + opaque_len;
+
+	maj_stat = gss_unwrap(ctx->gc_gss_ctx, NULL,
+			offset, rcv_buf, &out_offset);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
 	if (maj_stat != GSS_S_COMPLETE)
 		return status;
+	*p = (u32 *)(rcv_buf->head[0].iov_base + out_offset);
+	if (ntohl(*(*p)++) != rqstp->rq_seqno)
+		return status;
+
 	return 0;
 }
 
+
 static int
 gss_unwrap_resp(struct rpc_task *task,
 		kxdrproc_t decode, void *rqstp, u32 *p, void *obj)
@@ -962,12 +1119,16 @@
 		case RPC_GSS_SVC_NONE:
 			goto out_decode;
 		case RPC_GSS_SVC_INTEGRITY:
-			status = gss_unwrap_resp_integ(ctx, decode, 
-							rqstp, &p, obj);
+			status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
 			if (status)
 				goto out;
 			break;
 		case RPC_GSS_SVC_PRIVACY:
+			status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
+			if (status)
+				goto out;
+			break;
+
 		default:
 			goto out;
 	}
Index: linux-2.6.10/net/sunrpc/svc.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/svc.c	2004-12-25 05:35:28.000000000 +0800
+++ linux-2.6.10/net/sunrpc/svc.c	2005-04-05 14:49:13.409690736 +0800
@@ -264,6 +264,7 @@
 	u32			dir, prog, vers, proc,
 				auth_stat, rpc_stat;
 	int			auth_res;
+	u32			*accept_statp;
 
 	rpc_stat = rpc_success;
 
@@ -299,6 +300,9 @@
 	if (vers != 2)		/* RPC version number */
 		goto err_bad_rpc;
 
+	/* Save position in case we later decide to reject: */
+	accept_statp = resv->iov_base + resv->iov_len;
+
 	svc_putu32(resv, xdr_zero);		/* ACCEPT */
 
 	rqstp->rq_prog = prog = ntohl(svc_getu32(argv));	/* program number */
@@ -311,10 +315,12 @@
 	 * We do this before anything else in order to get a decent
 	 * auth verifier.
 	 */
-	if (progp->pg_authenticate != NULL)
-		auth_res = progp->pg_authenticate(rqstp, &auth_stat);
-	else
-		auth_res = svc_authenticate(rqstp, &auth_stat);
+	auth_res = svc_authenticate(rqstp, &auth_stat);
+	/* Also give the program a chance to reject this call: */
+	if (auth_res == SVC_OK) {
+		auth_stat = rpc_autherr_badcred;
+		auth_res = progp->pg_authenticate(rqstp);
+	}
 	switch (auth_res) {
 	case SVC_OK:
 		break;
@@ -437,7 +443,8 @@
 err_bad_auth:
 	dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
 	serv->sv_stats->rpcbadauth++;
-	resv->iov_len -= 4;
+	/* Restore write pointer to location of accept status: */
+	xdr_ressize_check(rqstp, accept_statp);
 	svc_putu32(resv, xdr_one);	/* REJECT */
 	svc_putu32(resv, xdr_one);	/* AUTH_ERROR */
 	svc_putu32(resv, auth_stat);	/* status */
Index: linux-2.6.10/net/sunrpc/sched.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/sched.c	2004-12-25 05:34:58.000000000 +0800
+++ linux-2.6.10/net/sunrpc/sched.c	2005-04-05 14:49:13.391693472 +0800
@@ -41,13 +41,7 @@
 
 static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
-
-/*
- * When an asynchronous RPC task is activated within a bottom half
- * handler, or while executing another RPC task, it is put on
- * schedq, and rpciod is woken up.
- */
-static RPC_WAITQ(schedq, "schedq");
+static void			rpc_async_schedule(void *);
 
 /*
  * RPC tasks that create another task (e.g. for contacting the portmapper)
@@ -68,26 +62,18 @@
 /*
  * rpciod-related stuff
  */
-static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
-static DECLARE_COMPLETION(rpciod_killer);
 static DECLARE_MUTEX(rpciod_sema);
 static unsigned int		rpciod_users;
-static pid_t			rpciod_pid;
-static int			rpc_inhibit;
+static struct workqueue_struct *rpciod_workqueue;
 
 /*
- * Spinlock for wait queues. Access to the latter also has to be
- * interrupt-safe in order to allow timers to wake up sleeping tasks.
- */
-static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
-/*
  * Spinlock for other critical sections of code.
  */
 static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
 
 /*
  * Disable the timer for a given RPC task. Should be called with
- * rpc_queue_lock and bh_disabled in order to avoid races within
+ * queue->lock and bh_disabled in order to avoid races within
  * rpc_run_timer().
  */
 static inline void
@@ -105,19 +91,19 @@
  * without calling del_timer_sync(). The latter could cause a
  * deadlock if called while we're holding spinlocks...
  */
-static void
-rpc_run_timer(struct rpc_task *task)
+static void rpc_run_timer(struct rpc_task *task)
 {
 	void (*callback)(struct rpc_task *);
 
-	spin_lock_bh(&rpc_queue_lock);
 	callback = task->tk_timeout_fn;
 	task->tk_timeout_fn = NULL;
-	spin_unlock_bh(&rpc_queue_lock);
-	if (callback) {
+	if (callback && RPC_IS_QUEUED(task)) {
 		dprintk("RPC: %4d running timer\n", task->tk_pid);
 		callback(task);
 	}
+	smp_mb__before_clear_bit();
+	clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
+	smp_mb__after_clear_bit();
 }
 
 /*
@@ -136,29 +122,21 @@
 		task->tk_timeout_fn = timer;
 	else
 		task->tk_timeout_fn = __rpc_default_timer;
+	set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
 	mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
 }
 
 /*
- * Set up a timer for an already sleeping task.
- */
-void rpc_add_timer(struct rpc_task *task, rpc_action timer)
-{
-	spin_lock_bh(&rpc_queue_lock);
-	if (!RPC_IS_RUNNING(task))
-		__rpc_add_timer(task, timer);
-	spin_unlock_bh(&rpc_queue_lock);
-}
-
-/*
  * Delete any timer for the current task. Because we use del_timer_sync(),
- * this function should never be called while holding rpc_queue_lock.
+ * this function should never be called while holding queue->lock.
  */
 static inline void
 rpc_delete_timer(struct rpc_task *task)
 {
-	if (del_timer_sync(&task->tk_timer))
+	if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
+		del_singleshot_timer_sync(&task->tk_timer);
 		dprintk("RPC: %4d deleting timer\n", task->tk_pid);
+	}
 }
 
 /*
@@ -169,16 +147,17 @@
 	struct list_head *q;
 	struct rpc_task *t;
 
+	INIT_LIST_HEAD(&task->u.tk_wait.links);
 	q = &queue->tasks[task->tk_priority];
 	if (unlikely(task->tk_priority > queue->maxpriority))
 		q = &queue->tasks[queue->maxpriority];
-	list_for_each_entry(t, q, tk_list) {
+	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_cookie == task->tk_cookie) {
-			list_add_tail(&task->tk_list, &t->tk_links);
+			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
 			return;
 		}
 	}
-	list_add_tail(&task->tk_list, q);
+	list_add_tail(&task->u.tk_wait.list, q);
 }
 
 /*
@@ -189,37 +168,21 @@
  * improve overall performance.
  * Everyone else gets appended to the queue to ensure proper FIFO behavior.
  */
-static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-	if (task->tk_rpcwait == queue)
-		return 0;
+	BUG_ON (RPC_IS_QUEUED(task));
 
-	if (task->tk_rpcwait) {
-		printk(KERN_WARNING "RPC: doubly enqueued task!\n");
-		return -EWOULDBLOCK;
-	}
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_add_wait_queue_priority(queue, task);
 	else if (RPC_IS_SWAPPER(task))
-		list_add(&task->tk_list, &queue->tasks[0]);
+		list_add(&task->u.tk_wait.list, &queue->tasks[0]);
 	else
-		list_add_tail(&task->tk_list, &queue->tasks[0]);
-	task->tk_rpcwait = queue;
+		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
+	task->u.tk_wait.rpc_waitq = queue;
+	rpc_set_queued(task);
 
 	dprintk("RPC: %4d added to queue %p \"%s\"\n",
 				task->tk_pid, queue, rpc_qname(queue));
-
-	return 0;
-}
-
-int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
-{
-	int		result;
-
-	spin_lock_bh(&rpc_queue_lock);
-	result = __rpc_add_wait_queue(q, task);
-	spin_unlock_bh(&rpc_queue_lock);
-	return result;
 }
 
 /*
@@ -229,12 +192,12 @@
 {
 	struct rpc_task *t;
 
-	if (!list_empty(&task->tk_links)) {
-		t = list_entry(task->tk_links.next, struct rpc_task, tk_list);
-		list_move(&t->tk_list, &task->tk_list);
-		list_splice_init(&task->tk_links, &t->tk_links);
+	if (!list_empty(&task->u.tk_wait.links)) {
+		t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
+		list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
+		list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
 	}
-	list_del(&task->tk_list);
+	list_del(&task->u.tk_wait.list);
 }
 
 /*
@@ -243,31 +206,17 @@
  */
 static void __rpc_remove_wait_queue(struct rpc_task *task)
 {
-	struct rpc_wait_queue *queue = task->tk_rpcwait;
-
-	if (!queue)
-		return;
+	struct rpc_wait_queue *queue;
+	queue = task->u.tk_wait.rpc_waitq;
 
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_remove_wait_queue_priority(task);
 	else
-		list_del(&task->tk_list);
-	task->tk_rpcwait = NULL;
-
+		list_del(&task->u.tk_wait.list);
 	dprintk("RPC: %4d removed from queue %p \"%s\"\n",
 				task->tk_pid, queue, rpc_qname(queue));
 }
 
-void
-rpc_remove_wait_queue(struct rpc_task *task)
-{
-	if (!task->tk_rpcwait)
-		return;
-	spin_lock_bh(&rpc_queue_lock);
-	__rpc_remove_wait_queue(task);
-	spin_unlock_bh(&rpc_queue_lock);
-}
-
 static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
 	queue->priority = priority;
@@ -290,6 +239,7 @@
 {
 	int i;
 
+	spin_lock_init(&queue->lock);
 	for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
 		INIT_LIST_HEAD(&queue->tasks[i]);
 	queue->maxpriority = maxprio;
@@ -316,34 +266,31 @@
  * Note: If the task is ASYNC, this must be called with 
  * the spinlock held to protect the wait queue operation.
  */
-static inline void
-rpc_make_runnable(struct rpc_task *task)
+static void rpc_make_runnable(struct rpc_task *task)
 {
-	if (task->tk_timeout_fn) {
-		printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
+	int do_ret;
+
+	BUG_ON(task->tk_timeout_fn);
+	do_ret = rpc_test_and_set_running(task);
+	rpc_clear_queued(task);
+	if (do_ret)
 		return;
-	}
-	rpc_set_running(task);
 	if (RPC_IS_ASYNC(task)) {
-		if (RPC_IS_SLEEPING(task)) {
-			int status;
-			status = __rpc_add_wait_queue(&schedq, task);
-			if (status < 0) {
-				printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
-				task->tk_status = status;
-				return;
-			}
-			rpc_clear_sleeping(task);
-			wake_up(&rpciod_idle);
+		int status;
+
+		INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
+		status = queue_work(task->tk_workqueue, &task->u.tk_work);
+		if (status < 0) {
+			printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
+			task->tk_status = status;
+			return;
 		}
-	} else {
-		rpc_clear_sleeping(task);
-		wake_up(&task->tk_wait);
-	}
+	} else
+		wake_up(&task->u.tk_wait.waitq);
 }
 
 /*
- * Place a newly initialized task on the schedq.
+ * Place a newly initialized task on the workqueue.
  */
 static inline void
 rpc_schedule_run(struct rpc_task *task)
@@ -352,33 +299,18 @@
 	if (RPC_IS_ACTIVATED(task))
 		return;
 	task->tk_active = 1;
-	rpc_set_sleeping(task);
 	rpc_make_runnable(task);
 }
 
 /*
- *	For other people who may need to wake the I/O daemon
- *	but should (for now) know nothing about its innards
- */
-void rpciod_wake_up(void)
-{
-	if(rpciod_pid==0)
-		printk(KERN_ERR "rpciod: wot no daemon?\n");
-	wake_up(&rpciod_idle);
-}
-
-/*
  * Prepare for sleeping on a wait queue.
  * By always appending tasks to the list we ensure FIFO behavior.
  * NB: An RPC task will only receive interrupt-driven events as long
  * as it's on a wait queue.
  */
-static void
-__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
+static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 			rpc_action action, rpc_action timer)
 {
-	int status;
-
 	dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
 				rpc_qname(q), jiffies);
 
@@ -388,49 +320,36 @@
 	}
 
 	/* Mark the task as being activated if so needed */
-	if (!RPC_IS_ACTIVATED(task)) {
+	if (!RPC_IS_ACTIVATED(task))
 		task->tk_active = 1;
-		rpc_set_sleeping(task);
-	}
 
-	status = __rpc_add_wait_queue(q, task);
-	if (status) {
-		printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
-		task->tk_status = status;
-	} else {
-		rpc_clear_running(task);
-		if (task->tk_callback) {
-			dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
-			BUG();
-		}
-		task->tk_callback = action;
-		__rpc_add_timer(task, timer);
-	}
+	__rpc_add_wait_queue(q, task);
+
+	BUG_ON(task->tk_callback != NULL);
+	task->tk_callback = action;
+	__rpc_add_timer(task, timer);
 }
 
-void
-rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
+void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 				rpc_action action, rpc_action timer)
 {
 	/*
 	 * Protect the queue operations.
 	 */
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&q->lock);
 	__rpc_sleep_on(q, task, action, timer);
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&q->lock);
 }
 
 /**
- * __rpc_wake_up_task - wake up a single rpc_task
+ * __rpc_do_wake_up_task - wake up a single rpc_task
  * @task: task to be woken up
  *
- * Caller must hold rpc_queue_lock
+ * Caller must hold queue->lock, and have cleared the task queued flag.
  */
-static void
-__rpc_wake_up_task(struct rpc_task *task)
+static void __rpc_do_wake_up_task(struct rpc_task *task)
 {
-	dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n",
-					task->tk_pid, jiffies, rpc_inhibit);
+	dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies);
 
 #ifdef RPC_DEBUG
 	if (task->tk_magic != 0xf00baa) {
@@ -445,12 +364,9 @@
 		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
 		return;
 	}
-	if (RPC_IS_RUNNING(task))
-		return;
 
 	__rpc_disable_timer(task);
-	if (task->tk_rpcwait != &schedq)
-		__rpc_remove_wait_queue(task);
+	__rpc_remove_wait_queue(task);
 
 	rpc_make_runnable(task);
 
@@ -458,6 +374,18 @@
 }
 
 /*
+ * Wake up the specified task
+ */
+static void __rpc_wake_up_task(struct rpc_task *task)
+{
+	if (rpc_start_wakeup(task)) {
+		if (RPC_IS_QUEUED(task))
+			__rpc_do_wake_up_task(task);
+		rpc_finish_wakeup(task);
+	}
+}
+
+/*
  * Default timeout handler if none specified by user
  */
 static void
@@ -471,14 +399,18 @@
 /*
  * Wake up the specified task
  */
-void
-rpc_wake_up_task(struct rpc_task *task)
+void rpc_wake_up_task(struct rpc_task *task)
 {
-	if (RPC_IS_RUNNING(task))
-		return;
-	spin_lock_bh(&rpc_queue_lock);
-	__rpc_wake_up_task(task);
-	spin_unlock_bh(&rpc_queue_lock);
+	if (rpc_start_wakeup(task)) {
+		if (RPC_IS_QUEUED(task)) {
+			struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
+
+			spin_lock_bh(&queue->lock);
+			__rpc_do_wake_up_task(task);
+			spin_unlock_bh(&queue->lock);
+		}
+		rpc_finish_wakeup(task);
+	}
 }
 
 /*
@@ -494,11 +426,11 @@
 	 */
 	q = &queue->tasks[queue->priority];
 	if (!list_empty(q)) {
-		task = list_entry(q->next, struct rpc_task, tk_list);
+		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
 		if (queue->cookie == task->tk_cookie) {
 			if (--queue->nr)
 				goto out;
-			list_move_tail(&task->tk_list, q);
+			list_move_tail(&task->u.tk_wait.list, q);
 		}
 		/*
 		 * Check if we need to switch queues.
@@ -516,7 +448,7 @@
 		else
 			q = q - 1;
 		if (!list_empty(q)) {
-			task = list_entry(q->next, struct rpc_task, tk_list);
+			task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
 			goto new_queue;
 		}
 	} while (q != &queue->tasks[queue->priority]);
@@ -541,14 +473,14 @@
 	struct rpc_task	*task = NULL;
 
 	dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&queue->lock);
 	if (RPC_IS_PRIORITY(queue))
 		task = __rpc_wake_up_next_priority(queue);
 	else {
 		task_for_first(task, &queue->tasks[0])
 			__rpc_wake_up_task(task);
 	}
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&queue->lock);
 
 	return task;
 }
@@ -557,25 +489,25 @@
  * rpc_wake_up - wake up all rpc_tasks
  * @queue: rpc_wait_queue on which the tasks are sleeping
  *
- * Grabs rpc_queue_lock
+ * Grabs queue->lock
  */
 void rpc_wake_up(struct rpc_wait_queue *queue)
 {
 	struct rpc_task *task;
 
 	struct list_head *head;
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&queue->lock);
 	head = &queue->tasks[queue->maxpriority];
 	for (;;) {
 		while (!list_empty(head)) {
-			task = list_entry(head->next, struct rpc_task, tk_list);
+			task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
 			__rpc_wake_up_task(task);
 		}
 		if (head == &queue->tasks[0])
 			break;
 		head--;
 	}
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&queue->lock);
 }
 
 /**
@@ -583,18 +515,18 @@
  * @queue: rpc_wait_queue on which the tasks are sleeping
  * @status: status value to set
  *
- * Grabs rpc_queue_lock
+ * Grabs queue->lock
  */
 void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 {
 	struct list_head *head;
 	struct rpc_task *task;
 
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&queue->lock);
 	head = &queue->tasks[queue->maxpriority];
 	for (;;) {
 		while (!list_empty(head)) {
-			task = list_entry(head->next, struct rpc_task, tk_list);
+			task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
 			task->tk_status = status;
 			__rpc_wake_up_task(task);
 		}
@@ -602,7 +534,7 @@
 			break;
 		head--;
 	}
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&queue->lock);
 }
 
 /*
@@ -626,22 +558,23 @@
 /*
  * This is the RPC `scheduler' (or rather, the finite state machine).
  */
-static int
-__rpc_execute(struct rpc_task *task)
+static int __rpc_execute(struct rpc_task *task)
 {
 	int		status = 0;
 
 	dprintk("RPC: %4d rpc_execute flgs %x\n",
 				task->tk_pid, task->tk_flags);
 
-	if (!RPC_IS_RUNNING(task)) {
-		printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
-		return 0;
-	}
+	BUG_ON(RPC_IS_QUEUED(task));
 
  restarted:
 	while (1) {
 		/*
+		 * Garbage collection of pending timers...
+		 */
+		rpc_delete_timer(task);
+
+		/*
 		 * Execute any pending callback.
 		 */
 		if (RPC_DO_CALLBACK(task)) {
@@ -657,7 +590,9 @@
 			 */
 			save_callback=task->tk_callback;
 			task->tk_callback=NULL;
+			lock_kernel();
 			save_callback(task);
+			unlock_kernel();
 		}
 
 		/*
@@ -665,43 +600,35 @@
 		 * tk_action may be NULL when the task has been killed
 		 * by someone else.
 		 */
-		if (RPC_IS_RUNNING(task)) {
-			/*
-			 * Garbage collection of pending timers...
-			 */
-			rpc_delete_timer(task);
+		if (!RPC_IS_QUEUED(task)) {
 			if (!task->tk_action)
 				break;
+			lock_kernel();
 			task->tk_action(task);
-			/* micro-optimization to avoid spinlock */
-			if (RPC_IS_RUNNING(task))
-				continue;
+			unlock_kernel();
 		}
 
 		/*
-		 * Check whether task is sleeping.
+		 * Lockless check for whether task is sleeping or not.
 		 */
-		spin_lock_bh(&rpc_queue_lock);
-		if (!RPC_IS_RUNNING(task)) {
-			rpc_set_sleeping(task);
-			if (RPC_IS_ASYNC(task)) {
-				spin_unlock_bh(&rpc_queue_lock);
+		if (!RPC_IS_QUEUED(task))
+			continue;
+		rpc_clear_running(task);
+		if (RPC_IS_ASYNC(task)) {
+			/* Careful! we may have raced... */
+			if (RPC_IS_QUEUED(task))
 				return 0;
-			}
+			if (rpc_test_and_set_running(task))
+				return 0;
+			continue;
 		}
-		spin_unlock_bh(&rpc_queue_lock);
 
-		if (!RPC_IS_SLEEPING(task))
-			continue;
 		/* sync task: sleep here */
 		dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
-		if (current->pid == rpciod_pid)
-			printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
-
 		if (RPC_TASK_UNINTERRUPTIBLE(task)) {
-			__wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
+			__wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
 		} else {
-			__wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status);
+			__wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
 			/*
 			 * When a sync task receives a signal, it exits with
 			 * -ERESTARTSYS. In order to catch any callbacks that
@@ -715,11 +642,14 @@
 				rpc_wake_up_task(task);
 			}
 		}
+		rpc_set_running(task);
 		dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
 	}
 
 	if (task->tk_exit) {
+		lock_kernel();
 		task->tk_exit(task);
+		unlock_kernel();
 		/* If tk_action is non-null, the user wants us to restart */
 		if (task->tk_action) {
 			if (!RPC_ASSASSINATED(task)) {
@@ -738,7 +668,6 @@
 
 	/* Release all resources associated with the task */
 	rpc_release_task(task);
-
 	return status;
 }
 
@@ -754,57 +683,16 @@
 int
 rpc_execute(struct rpc_task *task)
 {
-	int status = -EIO;
-	if (rpc_inhibit) {
-		printk(KERN_INFO "RPC: execution inhibited!\n");
-		goto out_release;
-	}
-
-	status = -EWOULDBLOCK;
-	if (task->tk_active) {
-		printk(KERN_ERR "RPC: active task was run twice!\n");
-		goto out_err;
-	}
+	BUG_ON(task->tk_active);
 
 	task->tk_active = 1;
 	rpc_set_running(task);
 	return __rpc_execute(task);
- out_release:
-	rpc_release_task(task);
- out_err:
-	return status;
 }
 
-/*
- * This is our own little scheduler for async RPC tasks.
- */
-static void
-__rpc_schedule(void)
+static void rpc_async_schedule(void *arg)
 {
-	struct rpc_task	*task;
-	int		count = 0;
-
-	dprintk("RPC:      rpc_schedule enter\n");
-	while (1) {
-
-		task_for_first(task, &schedq.tasks[0]) {
-			__rpc_remove_wait_queue(task);
-			spin_unlock_bh(&rpc_queue_lock);
-
-			__rpc_execute(task);
-			spin_lock_bh(&rpc_queue_lock);
-		} else {
-			break;
-		}
-
-		if (++count >= 200 || need_resched()) {
-			count = 0;
-			spin_unlock_bh(&rpc_queue_lock);
-			schedule();
-			spin_lock_bh(&rpc_queue_lock);
-		}
-	}
-	dprintk("RPC:      rpc_schedule leave\n");
+	__rpc_execute((struct rpc_task *)arg);
 }
 
 /*
@@ -862,7 +750,6 @@
 	task->tk_client = clnt;
 	task->tk_flags  = flags;
 	task->tk_exit   = callback;
-	init_waitqueue_head(&task->tk_wait);
 	if (current->uid != current->fsuid || current->gid != current->fsgid)
 		task->tk_flags |= RPC_TASK_SETUID;
 
@@ -873,7 +760,11 @@
 
 	task->tk_priority = RPC_PRIORITY_NORMAL;
 	task->tk_cookie = (unsigned long)current;
-	INIT_LIST_HEAD(&task->tk_links);
+
+	/* Initialize workqueue for async tasks */
+	task->tk_workqueue = rpciod_workqueue;
+	if (!RPC_IS_ASYNC(task))
+		init_waitqueue_head(&task->u.tk_wait.waitq);
 
 	/* Add to global list of all tasks */
 	spin_lock(&rpc_sched_lock);
@@ -944,8 +835,7 @@
 	goto out;
 }
 
-void
-rpc_release_task(struct rpc_task *task)
+void rpc_release_task(struct rpc_task *task)
 {
 	dprintk("RPC: %4d release task\n", task->tk_pid);
 
@@ -963,19 +853,9 @@
 	list_del(&task->tk_task);
 	spin_unlock(&rpc_sched_lock);
 
-	/* Protect the execution below. */
-	spin_lock_bh(&rpc_queue_lock);
-
-	/* Disable timer to prevent zombie wakeup */
-	__rpc_disable_timer(task);
-
-	/* Remove from any wait queue we're still on */
-	__rpc_remove_wait_queue(task);
-
+	BUG_ON (RPC_IS_QUEUED(task));
 	task->tk_active = 0;
 
-	spin_unlock_bh(&rpc_queue_lock);
-
 	/* Synchronously delete any running timer */
 	rpc_delete_timer(task);
 
@@ -1005,10 +885,9 @@
  * queue 'childq'. If so returns a pointer to the parent.
  * Upon failure returns NULL.
  *
- * Caller must hold rpc_queue_lock
+ * Caller must hold childq.lock
  */
-static inline struct rpc_task *
-rpc_find_parent(struct rpc_task *child)
+static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
 {
 	struct rpc_task	*task, *parent;
 	struct list_head *le;
@@ -1021,17 +900,16 @@
 	return NULL;
 }
 
-static void
-rpc_child_exit(struct rpc_task *child)
+static void rpc_child_exit(struct rpc_task *child)
 {
 	struct rpc_task	*parent;
 
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&childq.lock);
 	if ((parent = rpc_find_parent(child)) != NULL) {
 		parent->tk_status = child->tk_status;
 		__rpc_wake_up_task(parent);
 	}
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&childq.lock);
 }
 
 /*
@@ -1054,22 +932,20 @@
 	return NULL;
 }
 
-void
-rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
+void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
 {
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&childq.lock);
 	/* N.B. Is it possible for the child to have already finished? */
 	__rpc_sleep_on(&childq, task, func, NULL);
 	rpc_schedule_run(child);
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&childq.lock);
 }
 
 /*
  * Kill all tasks for the given client.
  * XXX: kill their descendants as well?
  */
-void
-rpc_killall_tasks(struct rpc_clnt *clnt)
+void rpc_killall_tasks(struct rpc_clnt *clnt)
 {
 	struct rpc_task	*rovr;
 	struct list_head *le;
@@ -1091,93 +967,14 @@
 
 static DECLARE_MUTEX_LOCKED(rpciod_running);
 
-static inline int
-rpciod_task_pending(void)
-{
-	return !list_empty(&schedq.tasks[0]);
-}
-
-
-/*
- * This is the rpciod kernel thread
- */
-static int
-rpciod(void *ptr)
-{
-	int		rounds = 0;
-
-	lock_kernel();
-	/*
-	 * Let our maker know we're running ...
-	 */
-	rpciod_pid = current->pid;
-	up(&rpciod_running);
-
-	daemonize("rpciod");
-	allow_signal(SIGKILL);
-
-	dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
-	spin_lock_bh(&rpc_queue_lock);
-	while (rpciod_users) {
-		DEFINE_WAIT(wait);
-		if (signalled()) {
-			spin_unlock_bh(&rpc_queue_lock);
-			rpciod_killall();
-			flush_signals(current);
-			spin_lock_bh(&rpc_queue_lock);
-		}
-		__rpc_schedule();
-		if (current->flags & PF_FREEZE) {
-			spin_unlock_bh(&rpc_queue_lock);
-			refrigerator(PF_FREEZE);
-			spin_lock_bh(&rpc_queue_lock);
-		}
-
-		if (++rounds >= 64) {	/* safeguard */
-			spin_unlock_bh(&rpc_queue_lock);
-			schedule();
-			rounds = 0;
-			spin_lock_bh(&rpc_queue_lock);
-		}
-
-		dprintk("RPC: rpciod back to sleep\n");
-		prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE);
-		if (!rpciod_task_pending() && !signalled()) {
-			spin_unlock_bh(&rpc_queue_lock);
-			schedule();
-			rounds = 0;
-			spin_lock_bh(&rpc_queue_lock);
-		}
-		finish_wait(&rpciod_idle, &wait);
-		dprintk("RPC: switch to rpciod\n");
-	}
-	spin_unlock_bh(&rpc_queue_lock);
-
-	dprintk("RPC: rpciod shutdown commences\n");
-	if (!list_empty(&all_tasks)) {
-		printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
-		rpciod_killall();
-	}
-
-	dprintk("RPC: rpciod exiting\n");
-	unlock_kernel();
-
-	rpciod_pid = 0;
-	complete_and_exit(&rpciod_killer, 0);
-	return 0;
-}
-
-static void
-rpciod_killall(void)
+static void rpciod_killall(void)
 {
 	unsigned long flags;
 
 	while (!list_empty(&all_tasks)) {
 		clear_thread_flag(TIF_SIGPENDING);
 		rpc_killall_tasks(NULL);
-		spin_lock_bh(&rpc_queue_lock);
-		__rpc_schedule();
-		spin_unlock_bh(&rpc_queue_lock);
+		flush_workqueue(rpciod_workqueue);
 		if (!list_empty(&all_tasks)) {
 			dprintk("rpciod_killall: waiting for tasks to exit\n");
 			yield();
@@ -1195,28 +992,30 @@
 int
 rpciod_up(void)
 {
+	struct workqueue_struct *wq;
 	int error = 0;
 
 	down(&rpciod_sema);
-	dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
+	dprintk("rpciod_up: users %d\n", rpciod_users);
 	rpciod_users++;
-	if (rpciod_pid)
+	if (rpciod_workqueue)
 		goto out;
 	/*
 	 * If there's no pid, we should be the first user.
 	 */
 	if (rpciod_users > 1)
-		printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users);
+		printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users);
 	/*
 	 * Create the rpciod thread and wait for it to start.
 	 */
-	error = kernel_thread(rpciod, NULL, 0);
-	if (error < 0) {
-		printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error);
+	error = -ENOMEM;
+	wq = create_workqueue("rpciod");
+	if (wq == NULL) {
+		printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
 		rpciod_users--;
 		goto out;
 	}
-	down(&rpciod_running);
+	rpciod_workqueue = wq;
 	error = 0;
 out:
 	up(&rpciod_sema);
@@ -1227,20 +1026,21 @@
 rpciod_down(void)
 {
 	down(&rpciod_sema);
-	dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
+	dprintk("rpciod_down sema %d\n", rpciod_users);
 	if (rpciod_users) {
 		if (--rpciod_users)
 			goto out;
 	} else
-		printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid);
+		printk(KERN_WARNING "rpciod_down: no users??\n");
 
-	if (!rpciod_pid) {
+	if (!rpciod_workqueue) {
 		dprintk("rpciod_down: Nothing to do!\n");
 		goto out;
 	}
+	rpciod_killall();
 
-	kill_proc(rpciod_pid, SIGKILL, 1);
-	wait_for_completion(&rpciod_killer);
+	destroy_workqueue(rpciod_workqueue);
+	rpciod_workqueue = NULL;
  out:
 	up(&rpciod_sema);
 }
@@ -1258,7 +1058,12 @@
 	}
 	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
 		"-rpcwait -action- --exit--\n");
-	alltask_for_each(t, le, &all_tasks)
+	alltask_for_each(t, le, &all_tasks) {
+		const char *rpc_waitq = "none";
+
+		if (RPC_IS_QUEUED(t))
+			rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
+
 		printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
 			t->tk_pid,
 			(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
@@ -1266,8 +1071,9 @@
 			t->tk_client,
 			(t->tk_client ? t->tk_client->cl_prog : 0),
 			t->tk_rqstp, t->tk_timeout,
-			rpc_qname(t->tk_rpcwait),
+			rpc_waitq,
 			t->tk_action, t->tk_exit);
+	}
 	spin_unlock(&rpc_sched_lock);
 }
 #endif
Index: linux-2.6.10/net/sunrpc/sunrpc_syms.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/sunrpc_syms.c	2004-12-25 05:35:25.000000000 +0800
+++ linux-2.6.10/net/sunrpc/sunrpc_syms.c	2005-04-05 14:49:13.411690432 +0800
@@ -58,6 +58,9 @@
 EXPORT_SYMBOL(rpc_wake_up);
 EXPORT_SYMBOL(rpc_queue_upcall);
 EXPORT_SYMBOL(rpc_mkpipe);
+EXPORT_SYMBOL(rpc_mkdir);
+EXPORT_SYMBOL(rpc_rmdir);
+
 
 /* Client transport */
 EXPORT_SYMBOL(xprt_create_proto);
@@ -90,6 +93,7 @@
 EXPORT_SYMBOL(svc_auth_register);
 EXPORT_SYMBOL(auth_domain_lookup);
 EXPORT_SYMBOL(svc_authenticate);
+EXPORT_SYMBOL(svc_set_client);
 
 /* RPC statistics */
 #ifdef CONFIG_PROC_FS
Index: linux-2.6.10/kernel/exit.c
===================================================================
--- linux-2.6.10.orig/kernel/exit.c	2005-04-05 14:48:52.534864192 +0800
+++ linux-2.6.10/kernel/exit.c	2005-04-05 14:50:57.737830448 +0800
@@ -848,6 +848,8 @@
 	for (;;) ;
 }
 
+EXPORT_SYMBOL(do_exit);
+
 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
Index: linux-2.6.10/fs/locks.c
===================================================================
--- linux-2.6.10.orig/fs/locks.c	2004-12-25 05:35:28.000000000 +0800
+++ linux-2.6.10/fs/locks.c	2005-04-05 14:49:13.434686936 +0800
@@ -1096,15 +1096,13 @@
 */
 void remove_lease(struct file_lock *fl)
 {
-	if (!IS_LEASE(fl))
-		return;
-
 	lock_kernel();
-
+	if (!fl || !IS_LEASE(fl))
+		goto out;
 	fl->fl_type = F_UNLCK | F_INPROGRESS;
 	fl->fl_break_time = jiffies - 10;
 	time_out_leases(fl->fl_file->f_dentry->d_inode);
-
+out:
 	unlock_kernel();
 }
 
@@ -1563,9 +1561,6 @@
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
 		if (error < 0)
 			goto out;
-		else if (error == LOCK_USE_CLNT)
-		  /* Bypass for NFS with no locking - 2.0.36 compat */
-		  fl = posix_test_lock(filp, &file_lock);
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
@@ -1708,9 +1703,6 @@
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
 		if (error < 0)
 			goto out;
-		else if (error == LOCK_USE_CLNT)
-		  /* Bypass for NFS with no locking - 2.0.36 compat */
-		  fl = posix_test_lock(filp, &file_lock);
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
Index: linux-2.6.10/fs/dcache.c
===================================================================
--- linux-2.6.10.orig/fs/dcache.c	2005-03-31 15:35:26.000000000 +0800
+++ linux-2.6.10/fs/dcache.c	2005-04-05 14:49:13.413690128 +0800
@@ -789,6 +789,54 @@
 }
 
 /**
+ * d_instantiate_unique - instantiate a non-aliased dentry
+ * @entry: dentry to instantiate
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry. On success, it returns NULL.
+ * If an unhashed alias of "entry" already exists, then we return the
+ * aliased dentry instead.
+ *
+ * Note that in order to avoid conflicts with rename() etc, the caller
+ * had better be holding the parent directory semaphore.
+ */
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *alias;
+	int len = entry->d_name.len;
+	const char *name = entry->d_name.name;
+	unsigned int hash = entry->d_name.hash;
+
+	BUG_ON(!list_empty(&entry->d_alias));
+	spin_lock(&dcache_lock);
+	if (!inode)
+		goto do_negative;
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct qstr *qstr = &alias->d_name;
+
+		if (qstr->hash != hash)
+			continue;
+		if (alias->d_parent != entry->d_parent)
+			continue;
+		if (qstr->len != len)
+			continue;
+		if (memcmp(qstr->name, name, len))
+			continue;
+		dget_locked(alias);
+		spin_unlock(&dcache_lock);
+		BUG_ON(!d_unhashed(alias));
+		return alias;
+	}
+	list_add(&entry->d_alias, &inode->i_dentry);
+do_negative:
+	entry->d_inode = inode;
+	spin_unlock(&dcache_lock);
+	security_d_instantiate(entry, inode);
+	return NULL;
+}
+EXPORT_SYMBOL(d_instantiate_unique);
+
+/**
  * d_alloc_root - allocate root dentry
  * @root_inode: inode to allocate the root for
  *
Index: linux-2.6.10/fs/lockd/svc.c
===================================================================
--- linux-2.6.10.orig/fs/lockd/svc.c	2005-03-31 15:35:26.000000000 +0800
+++ linux-2.6.10/fs/lockd/svc.c	2005-04-05 14:49:13.458683288 +0800
@@ -418,6 +418,38 @@
 	return 0;							\
 }
 
+static inline int is_callback(u32 proc)
+{
+	return proc == NLMPROC_GRANTED
+		|| proc == NLMPROC_GRANTED_MSG
+		|| proc == NLMPROC_TEST_RES
+		|| proc == NLMPROC_LOCK_RES
+		|| proc == NLMPROC_CANCEL_RES
+		|| proc == NLMPROC_UNLOCK_RES
+		|| proc == NLMPROC_NSM_NOTIFY;
+}
+
+
+static int lockd_authenticate(struct svc_rqst *rqstp)
+{
+	rqstp->rq_client = NULL;
+	switch (rqstp->rq_authop->flavour) {
+		case RPC_AUTH_NULL:
+		case RPC_AUTH_UNIX:
+			if (rqstp->rq_proc == 0)
+				return SVC_OK;
+			if (is_callback(rqstp->rq_proc)) {
+				/* Leave it to individual procedures to
+				 * call nlmsvc_lookup_host(rqstp)
+				 */
+				return SVC_OK;
+			}
+			return svc_set_client(rqstp);
+	}
+	return SVC_DENIED;
+}
+
+
 param_set_min_max(port, int, simple_strtol, 0, 65535)
 param_set_min_max(grace_period, unsigned long, simple_strtoul,
 		  nlm_grace_period_min, nlm_grace_period_max)
@@ -498,4 +530,5 @@
 	.pg_name	= "lockd",		/* service name */
 	.pg_class	= "nfsd",		/* share authentication with nfsd */
 	.pg_stats	= &nlmsvc_stats,	/* stats table */
+	.pg_authenticate = &lockd_authenticate	/* export authentication */
 };
Index: linux-2.6.10/fs/nfsd/nfs4xdr.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfs4xdr.c	2004-12-25 05:35:24.000000000 +0800
+++ linux-2.6.10/fs/nfsd/nfs4xdr.c	2005-04-05 14:49:13.425688304 +0800
@@ -60,121 +60,6 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
-static const char utf8_byte_len[256] = {
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0
-};
-
-static inline int
-is_legal_utf8_sequence(unsigned char *source, int length)
-{
-	unsigned char *ptr;
-	unsigned char c;
-
-	if (length==1) return 1;
-
-	/* Check for overlong sequence, and check second byte */
-	c = *(source + 1);
-	switch (*source) {
-	case 0xE0: /* 3 bytes */
-		if ( c < 0xA0 ) return 0;
-		break;
-	case 0xF0: /* 4 bytes */
-		if ( c < 0x90 ) return 0;
-		break;
-	case 0xF8: /* 5 bytes */
-		if ( c < 0xC8 ) return 0;
-		break;
-	case 0xFC: /* 6 bytes */
-		if ( c < 0x84 ) return 0;
-		break;
-	default:
-		if ( (c & 0xC0) != 0x80) return 0;
-	}
-
-	/* Check that trailing bytes look like 10xxxxxx */
-	for (ptr = source++ + length - 1; ptr>source; ptr--)
-		if ( ((*ptr) & 0xC0) != 0x80 ) return 0;
-	return 1;
-}
-
-/* This does some screening on disallowed unicode characters.  It is NOT
- * comprehensive.
- */
-static int
-is_allowed_utf8_char(unsigned char *source, int length)
-{
-	/* We assume length and source point to a valid utf8 sequence */
-	unsigned char c;
-
-	/* Disallow F0000 and up (in utf8, F3B08080) */
-	if (*source > 0xF3 ) return 0;
-	c = *(source + 1);
-	switch (*source) {
-	case 0xF3:
-		if (c >= 0xB0) return 0;
-		break;
-	/* Disallow D800-F8FF (in utf8, EDA080-EFA3BF */
-	case 0xED:
-		if (c >= 0xA0) return 0;
-		break;
-	case 0xEE:
-		return 0;
-		break;
-	case 0xEF:
-		if (c <= 0xA3) return 0;
-	/* Disallow FFF9-FFFF (EFBFB9-EFBFBF) */
-		if (c==0xBF)
-			/* Don't need to check <=0xBF, since valid utf8 */
-			if ( *(source+2) >= 0xB9) return 0;
-		break;
-	}
-	return 1;
-}
-
-/* This routine should really check to see that the proper stringprep
- * mappings have been applied.  Instead, we do a simple screen of some
- * of the more obvious illegal values by calling is_allowed_utf8_char.
- * This will allow many illegal strings through, but if a client behaves,
- * it will get full functionality.  The other option (apart from full
- * stringprep checking) is to limit everything to an easily handled subset,
- * such as 7-bit ascii.
- *
- * Note - currently calling routines ignore return value except as boolean.
- */
-static int
-check_utf8(char *str, int len)
-{
-	unsigned char *chunk, *sourceend;
-	int chunklen;
-
-	chunk = str;
-	sourceend = str + len;
-
-	while (chunk < sourceend) {
-		chunklen = utf8_byte_len[*chunk];
-		if (!chunklen)
-			return nfserr_inval;
-		if (chunk + chunklen > sourceend)
-			return nfserr_inval;
-		if (!is_legal_utf8_sequence(chunk, chunklen))
-			return nfserr_inval;
-		if (!is_allowed_utf8_char(chunk, chunklen))
-			return nfserr_inval;
-		if ( (chunklen==1) && (!*chunk) )
-			return nfserr_inval; /* Disallow embedded nulls */
-		chunk += chunklen;
-	}
-
-	return 0;
-}
-
 static int
 check_filename(char *str, int len, int err)
 {
@@ -187,7 +72,7 @@
 	for (i = 0; i < len; i++)
 		if (str[i] == '/')
 			return err;
-	return check_utf8(str, len);
+	return 0;
 }
 
 /*
@@ -403,8 +288,6 @@
 			READ_BUF(dummy32);
 			len += XDR_QUADLEN(dummy32) << 2;
 			READMEM(buf, dummy32);
-			if (check_utf8(buf, dummy32))
-				return nfserr_inval;
 			ace.whotype = nfs4_acl_get_whotype(buf, dummy32);
 			status = 0;
 			if (ace.whotype != NFS4_ACL_WHO_NAMED)
@@ -439,8 +322,6 @@
 		READ_BUF(dummy32);
 		len += (XDR_QUADLEN(dummy32) << 2);
 		READMEM(buf, dummy32);
-		if (check_utf8(buf, dummy32))
-			return nfserr_inval;
 		if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
 			goto out_nfserr;
 		iattr->ia_valid |= ATTR_UID;
@@ -452,8 +333,6 @@
 		READ_BUF(dummy32);
 		len += (XDR_QUADLEN(dummy32) << 2);
 		READMEM(buf, dummy32);
-		if (check_utf8(buf, dummy32))
-			return nfserr_inval;
 		if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
 			goto out_nfserr;
 		iattr->ia_valid |= ATTR_GID;
@@ -525,7 +404,7 @@
 		}
 	}
 	if (len != expected_len)
-		goto xdr_error;
+		printk("nfsd: funky nfs4 client sent extra bytes in setattr\n");
 
 	DECODE_TAIL;
 
@@ -585,8 +464,6 @@
 		READ32(create->cr_linklen);
 		READ_BUF(create->cr_linklen);
 		SAVEMEM(create->cr_linkname, create->cr_linklen);
-		if (check_utf8(create->cr_linkname, create->cr_linklen))
-			return nfserr_inval;
 		break;
 	case NF4BLK:
 	case NF4CHR:
@@ -615,6 +492,18 @@
 }
 
 static inline int
+nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
+{
+	DECODE_HEAD;
+
+	READ_BUF(sizeof(stateid_t));
+	READ32(dr->dr_stateid.si_generation);
+	COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t));
+
+	DECODE_TAIL;
+}
+
+static inline int
 nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
 {
 	return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
@@ -790,8 +679,8 @@
 		READ32(open->op_delegate_type);
 		break;
 	case NFS4_OPEN_CLAIM_DELEGATE_CUR:
-		READ_BUF(sizeof(delegation_stateid_t) + 4);
-		COPYMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
+		READ_BUF(sizeof(stateid_t) + 4);
+		COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t));
 		READ32(open->op_fname.len);
 		READ_BUF(open->op_fname.len);
 		SAVEMEM(open->op_fname.data, open->op_fname.len);
@@ -825,7 +714,7 @@
 	DECODE_HEAD;
 		    
 	open_down->od_stateowner = NULL;
-	READ_BUF(4 + sizeof(stateid_t));
+	READ_BUF(12 + sizeof(stateid_t));
 	READ32(open_down->od_stateid.si_generation);
 	COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t));
 	READ32(open_down->od_seqid);
@@ -1170,6 +1059,9 @@
 		case OP_CREATE:
 			op->status = nfsd4_decode_create(argp, &op->u.create);
 			break;
+		case OP_DELEGRETURN:
+			op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
+			break;
 		case OP_GETATTR:
 			op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
 			break;
@@ -1425,7 +1317,7 @@
 		if (status)
 			goto out_nfserr;
 	}
-	if ((bmval0 & FATTR4_WORD0_FILEHANDLE) && !fhp) {
+	if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
 		fh_init(&tempfh, NFS4_FHSIZE);
 		status = fh_compose(&tempfh, exp, dentry, NULL);
 		if (status)
@@ -1471,7 +1363,10 @@
 	if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
 		if ((buflen -= 4) < 0)
 			goto out_resource;
-		WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
+		if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+			WRITE32(NFS4_FH_PERSISTENT);
+		else
+			WRITE32(NFS4_FH_VOL_RENAME);
 	}
 	if (bmval0 & FATTR4_WORD0_CHANGE) {
 		/*
@@ -1508,10 +1403,15 @@
 	if (bmval0 & FATTR4_WORD0_FSID) {
 		if ((buflen -= 16) < 0)
 			goto out_resource;
-		WRITE32(0);
-		WRITE32(MAJOR(stat.dev));
-		WRITE32(0);
-		WRITE32(MINOR(stat.dev));
+		if (is_fsid(fhp, rqstp->rq_reffh)) {
+			WRITE64((u64)exp->ex_fsid);
+			WRITE64((u64)0);
+		} else {
+			WRITE32(0);
+			WRITE32(MAJOR(stat.dev));
+			WRITE32(0);
+			WRITE32(MINOR(stat.dev));
+		}
 	}
 	if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
 		if ((buflen -= 4) < 0)
@@ -1765,17 +1665,65 @@
 }
 
 static int
+nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
+		const char *name, int namlen, u32 *p, int *buflen)
+{
+	struct svc_export *exp = cd->rd_fhp->fh_export;
+	struct dentry *dentry;
+	int nfserr;
+
+	dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+	if (IS_ERR(dentry))
+		return nfserrno(PTR_ERR(dentry));
+
+	exp_get(exp);
+	if (d_mountpoint(dentry)) {
+		if (nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp)) {
+		/*
+		 * -EAGAIN is the only error returned from
+		 * nfsd_cross_mnt() and it indicates that an
+		 * up-call has  been initiated to fill in the export
+		 * options on exp.  When the answer comes back,
+		 * this call will be retried.
+		 */
+			nfserr = nfserr_dropit;
+			goto out_put;
+		}
+
+	}
+	nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
+					cd->rd_rqstp);
+out_put:
+	dput(dentry);
+	exp_put(exp);
+	return nfserr;
+}
+
+static u32 *
+nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr)
+{
+	u32 *attrlenp;
+
+	if (buflen < 6)
+		return NULL;
+	*p++ = htonl(2);
+	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
+	*p++ = htonl(0);			 /* bmval1 */
+
+	attrlenp = p++;
+	*p++ = nfserr;       /* no htonl */
+	*attrlenp = htonl((char *)p - (char *)attrlenp - 4);
+	return p;
+}
+
+static int
 nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
 		    loff_t offset, ino_t ino, unsigned int d_type)
 {
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
 	int buflen;
 	u32 *p = cd->buffer;
-	u32 *attrlenp;
-	struct dentry *dentry;
-	struct svc_export *exp = cd->rd_fhp->fh_export;
-	u32 bmval0, bmval1;
-	int nfserr = 0;
+	int nfserr = nfserr_toosmall;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
 	if (name && isdotent(name, namlen)) {
@@ -1788,106 +1736,44 @@
 
 	buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
 	if (buflen < 0)
-		goto nospc;
+		goto fail;
 
 	*p++ = xdr_one;                             /* mark entry present */
 	cd->offset = p;                             /* remember pointer */
 	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
 	p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
-	/*
-	 * Now we come to the ugly part: writing the fattr for this entry.
-	 */
-	bmval0 = cd->rd_bmval[0];
-	bmval1 = cd->rd_bmval[1];
-	if ((bmval0 & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_FILEID)) || bmval1)  {
-		/*
-		 * "Heavyweight" case: we have no choice except to
-		 * call nfsd4_encode_fattr(). 
-		 */
-		dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
-		if (IS_ERR(dentry)) {
-			nfserr = nfserrno(PTR_ERR(dentry));
-			goto error;
-		}
-
-		exp_get(exp);
-		if (d_mountpoint(dentry)) {
-			if ((nfserr = nfsd_cross_mnt(cd->rd_rqstp, &dentry, 
-					 &exp))) {	
-			/* 
-			 * -EAGAIN is the only error returned from 
-			 * nfsd_cross_mnt() and it indicates that an 
-			 * up-call has  been initiated to fill in the export 
-			 * options on exp.  When the answer comes back,
-			 * this call will be retried.
-			 */
-				dput(dentry);
-				exp_put(exp);
-				nfserr = nfserr_dropit;
-				goto error;
-			}
-
-		}
-
-		nfserr = nfsd4_encode_fattr(NULL, exp,
-				dentry, p, &buflen, cd->rd_bmval,
-				cd->rd_rqstp);
-		dput(dentry);
-		exp_put(exp);
-		if (!nfserr) {
-			p += buflen;
-			goto out;
-		}
-		if (nfserr == nfserr_resource)
-			goto nospc;
-
-error:
+	nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen);
+	switch (nfserr) {
+	case nfs_ok:
+		p += buflen;
+		break;
+	case nfserr_resource:
+		nfserr = nfserr_toosmall;
+		goto fail;
+	case nfserr_dropit:
+		goto fail;
+	default:
 		/*
-		 * If we get here, we experienced a miscellaneous
-		 * failure while writing the attributes.  If the
-		 * client requested the RDATTR_ERROR attribute,
+		 * If the client requested the RDATTR_ERROR attribute,
 		 * we stuff the error code into this attribute
 		 * and continue.  If this attribute was not requested,
 		 * then in accordance with the spec, we fail the
 		 * entire READDIR operation(!)
 		 */
-		if (!(bmval0 & FATTR4_WORD0_RDATTR_ERROR)) {
-			cd->common.err = nfserr;
-			return -EINVAL;
-		}
-
-		bmval0 = FATTR4_WORD0_RDATTR_ERROR;
-		bmval1 = 0;
-		/* falling through here will do the right thing... */
+		if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
+			goto fail;
+		nfserr = nfserr_toosmall;
+		p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+		if (p == NULL)
+			goto fail;
 	}
-
-	/*
-	 * In the common "lightweight" case, we avoid
-	 * the overhead of nfsd4_encode_fattr() by assembling
-	 * a small fattr by hand.
-	 */
-	if (buflen < 6)
-		goto nospc;
-	*p++ = htonl(2);
-	*p++ = htonl(bmval0);
-	*p++ = htonl(bmval1);
-
-	attrlenp = p++;
-	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR)
-		*p++ = nfserr;       /* no htonl */
-	if (bmval0 & FATTR4_WORD0_FILEID)
-		p = xdr_encode_hyper(p, (u64)ino);
-	*attrlenp = htonl((char *)p - (char *)attrlenp - 4);
-
-out:
 	cd->buflen -= (p - cd->buffer);
 	cd->buffer = p;
 	cd->common.err = nfs_ok;
 	return 0;
-
-nospc:
-	cd->common.err = nfserr_toosmall;
+fail:
+	cd->common.err = nfserr;
 	return -EINVAL;
 }
 
@@ -2081,8 +1967,8 @@
 	case NFS4_OPEN_DELEGATE_NONE:
 		break;
 	case NFS4_OPEN_DELEGATE_READ:
-		RESERVE_SPACE(20 + sizeof(delegation_stateid_t));
-		WRITEMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
+		RESERVE_SPACE(20 + sizeof(stateid_t));
+		WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
 		WRITE32(0);
 
 		/*
@@ -2095,8 +1981,8 @@
 		ADJUST_ARGS();
 		break;
 	case NFS4_OPEN_DELEGATE_WRITE:
-		RESERVE_SPACE(32 + sizeof(delegation_stateid_t));
-		WRITEMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
+		RESERVE_SPACE(32 + sizeof(stateid_t));
+		WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
 		WRITE32(0);
 
 		/*
@@ -2185,10 +2071,17 @@
 	}
 	read->rd_vlen = v;
 
-	nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp,
-			   read->rd_offset,
-			   read->rd_iov, read->rd_vlen,
-			   &maxcount);
+	if (read->rd_filp)
+		nfserr = nfsd_vfs_read(read->rd_rqstp, read->rd_fhp,
+				read->rd_filp, read->rd_offset,
+				read->rd_iov, read->rd_vlen,
+				&maxcount);
+	else
+		nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp,
+				read->rd_offset,
+				read->rd_iov, read->rd_vlen,
+				&maxcount);
+
 	if (nfserr == nfserr_symlink)
 		nfserr = nfserr_inval;
 	if (nfserr)
@@ -2460,6 +2353,8 @@
 	case OP_CREATE:
 		nfsd4_encode_create(resp, op->status, &op->u.create);
 		break;
+	case OP_DELEGRETURN:
+		break;
 	case OP_GETATTR:
 		op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
 		break;
Index: linux-2.6.10/fs/nfsd/nfs4state.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfs4state.c	2004-12-25 05:35:23.000000000 +0800
+++ linux-2.6.10/fs/nfsd/nfs4state.c	2005-04-05 14:49:13.421688912 +0800
@@ -44,6 +44,7 @@
 #include <linux/mount.h>
 #include <linux/workqueue.h>
 #include <linux/smp_lock.h>
+#include <linux/kthread.h>
 #include <linux/nfs4.h>
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
@@ -56,9 +57,11 @@
 static u32 nfs4_reclaim_init = 0;
 time_t boot_time;
 static time_t grace_end = 0;
+static u32 first_run = 1;       /* laundromat threads first run */
 static u32 current_clientid = 1;
-static u32 current_ownerid;
-static u32 current_fileid;
+static u32 current_ownerid = 1;
+static u32 current_fileid = 1;
+static u32 current_delegid = 1;
 static u32 nfs4_init;
 stateid_t zerostateid;             /* bits all 0 */
 stateid_t onestateid;              /* bits all 1 */
@@ -70,14 +73,17 @@
 u32 del_perclient = 0;
 u32 alloc_file = 0;
 u32 free_file = 0;
-u32 alloc_sowner = 0;
-u32 free_sowner = 0;
 u32 vfsopen = 0;
 u32 vfsclose = 0;
-u32 alloc_lsowner= 0;
+u32 alloc_delegation= 0;
+u32 free_delegation= 0;
 
 /* forward declarations */
 struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+static void release_delegation(struct nfs4_delegation *dp);
+static void release_stateid_lockowner(struct nfs4_stateid *open_stp);
+extern char recovery_dirname[];
 
 /* Locking:
  *
@@ -117,6 +123,112 @@
 static void release_stateid(struct nfs4_stateid *stp, int flags);
 static void release_file(struct nfs4_file *fp);
 
+/*
+ * Delegation state
+ */
+
+/* recall_lock protects the del_recall_lru */
+spinlock_t recall_lock;
+static struct list_head del_recall_lru;
+
+static struct nfs4_delegation *
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
+{
+	struct nfs4_delegation *dp;
+	struct nfs4_file *fp = stp->st_file;
+
+	dprintk("NFSD alloc_init_deleg\n");
+	if ((dp = kmalloc(sizeof(struct nfs4_delegation),
+		GFP_KERNEL)) == NULL)
+		return dp;
+	INIT_LIST_HEAD(&dp->dl_del_perfile);
+	INIT_LIST_HEAD(&dp->dl_del_perclnt);
+	INIT_LIST_HEAD(&dp->dl_recall_lru);
+	dp->dl_client = clp;
+	dp->dl_file = fp;
+	dp->dl_flock = NULL;
+	dp->dl_stp = stp;
+	dp->dl_flags = 0;
+	dp->dl_type = type;
+	dp->dl_recall.cbr_dp = NULL;
+	dp->dl_recall.cbr_ident = 0;
+	dp->dl_recall.cbr_trunc = 0;
+	dp->dl_stateid.si_boot = boot_time;
+	dp->dl_stateid.si_stateownerid = current_delegid++;
+	dp->dl_stateid.si_fileid = 0;
+	dp->dl_stateid.si_generation = 0;
+	dp->dl_fhlen = current_fh->fh_handle.fh_size;
+	memcpy(dp->dl_fhval, &current_fh->fh_handle.fh_base,
+	current_fh->fh_handle.fh_size);
+	dp->dl_time = 0;
+	atomic_set(&dp->dl_state, NFS4_NO_RECALL);
+	atomic_set(&dp->dl_count, 1);
+	atomic_set(&dp->dl_recall_cnt, 0);
+	list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
+	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
+	alloc_delegation++;
+	return dp;
+}
+
+/*
+ * Free the delegation structure.
+ */
+static void
+nfs4_free_delegation(struct nfs4_delegation *dp)
+{
+	dprintk("NFSD: nfs4_free_delegation freeing dp %p\n",dp);
+	list_del(&dp->dl_recall_lru);
+	kfree(dp);
+	free_delegation++;
+}
+
+/* release_delegation:
+ *
+ * lease_modify() is called to remove the FS_LEASE file_lock from
+ * the i_flock list, eventually calling nfsd's lock_manager
+ * fl_release_callback.
+ *
+ * call either:
+ *   nfsd_close : if last close, locks_remove_flock calls lease_modify.
+ *                otherwise, recalled state set to NFS4_RECALL_COMPLETE
+ *                so that it will be reaped by the laundromat service.
+ * or
+ *   remove_lease (calls time_out_lease which calls lease_modify).
+ *   and nfs4_free_delegation.
+ *
+ * lock_kernel() protects dp->dl_flock which is set under the kernel lock
+ * by nfsd_copy_lock_deleg_callback and nfsd_release_deleg_callback.
+ *
+ */
+
+static void
+release_delegation(struct nfs4_delegation *dp)
+{
+	/* delayed nfsd_close */
+	if (dp->dl_flags && NFS4_DELAY_CLOSE) {
+		struct file *filp = dp->dl_stp->st_vfs_file;
+
+		dprintk("NFSD: release_delegation CLOSE\n");
+		release_stateid_lockowner(dp->dl_stp);
+		kfree(dp->dl_stp);
+		dp->dl_flags &= ~NFS4_DELAY_CLOSE;
+		dp->dl_stp = NULL;
+		atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
+		nfsd_close(filp);
+		vfsclose++;
+	} else {
+		dprintk("NFSD: release_delegation remove lease dl_flock %p\n",
+			dp->dl_flock);
+		remove_lease(dp->dl_flock);
+		list_del_init(&dp->dl_del_perfile);
+		list_del_init(&dp->dl_del_perclnt);
+		/* dl_count > 0 => outstanding recall rpc */
+		dprintk("NFSD: release_delegation free deleg dl_count %d\n",
+		atomic_read(&dp->dl_count));
+		if (atomic_dec_and_test(&dp->dl_count))
+			nfs4_free_delegation(dp);
+	}
+}
 
 /* 
  * SETCLIENTID state 
@@ -148,7 +260,7 @@
  * for last close replay.
  */
 static struct list_head	reclaim_str_hashtbl[CLIENT_HASH_SIZE];
-static int reclaim_str_hashtbl_size;
+static int reclaim_str_hashtbl_size = 0;
 static struct list_head	conf_id_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head	conf_str_hashtbl[CLIENT_HASH_SIZE];
 static struct list_head	unconf_str_hashtbl[CLIENT_HASH_SIZE];
@@ -213,12 +325,38 @@
 	kfree(clp);
 }
 
+void
+put_nfs4_client(struct nfs4_client *clp)
+{
+	if (atomic_dec_and_test(&clp->cl_count))
+		free_client(clp);
+}
+
 static void
 expire_client(struct nfs4_client *clp)
 {
 	struct nfs4_stateowner *sop;
+	struct nfs4_delegation *dp;
+	struct nfs4_callback *cb = &clp->cl_callback;
+	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+
+	dprintk("NFSD: expire_client cl_count %d\n",
+	                    atomic_read(&clp->cl_count));
 
-	dprintk("NFSD: expire_client\n");
+	/* shutdown rpc client, ending any outstanding recall rpcs */
+	if (atomic_read(&cb->cb_set) == 1 && clnt) {
+		rpc_shutdown_client(clnt);
+		clnt = clp->cl_callback.cb_client = NULL;
+	}
+	while (!list_empty(&clp->cl_del_perclnt)) {
+		dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
+		dprintk("NFSD: expire client. dp %p, dl_state %d, fp %p\n",
+				dp, atomic_read(&dp->dl_state), dp->dl_flock);
+
+		/* force release of delegation. */
+		atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
+		release_delegation(dp);
+	}
 	list_del(&clp->cl_idhash);
 	list_del(&clp->cl_strhash);
 	list_del(&clp->cl_lru);
@@ -226,7 +364,7 @@
 		sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
 		release_stateowner(sop);
 	}
-	free_client(clp);
+	put_nfs4_client(clp);
 }
 
 static struct nfs4_client *
@@ -235,9 +373,13 @@
 
 	if (!(clp = alloc_client(name)))
 		goto out;
+	atomic_set(&clp->cl_count, 1);
+	atomic_set(&clp->cl_callback.cb_set, 0);
+	clp->cl_callback.cb_parsed = 0;
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
 	INIT_LIST_HEAD(&clp->cl_perclient);
+	INIT_LIST_HEAD(&clp->cl_del_perclnt);
 	INIT_LIST_HEAD(&clp->cl_lru);
 out:
 	return clp;
@@ -420,17 +562,24 @@
 {
 	struct nfs4_callback *cb = &clp->cl_callback;
 
+	/* Currently, we only support tcp for the callback channel */
+	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
+		goto out_err;
+
 	if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
-		         &cb->cb_addr, &cb->cb_port))) {
-		printk(KERN_INFO "NFSD: BAD callback address. client will not receive delegations\n");
-		cb->cb_parsed = 0;
-		return;
-	}
-	cb->cb_netid.len = se->se_callback_netid_len;
-	cb->cb_netid.data = se->se_callback_netid_val;
+	                          &cb->cb_addr, &cb->cb_port)))
+		goto out_err;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
 	cb->cb_parsed = 1;
+	return;
+out_err:
+	printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+		"will not receive delegations\n",
+		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+
+	cb->cb_parsed = 0;
+	return;
 }
 
 /*
@@ -707,6 +856,7 @@
 			status = nfserr_clid_inuse;
 		else {
 			expire_client(conf);
+			clp = unconf;
 			move_to_confirmed(unconf, idhashval);
 			status = nfs_ok;
 		}
@@ -724,6 +874,7 @@
 		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
+			clp = conf;
 			status = nfs_ok;
 		}
 		goto out;
@@ -738,6 +889,7 @@
 			status = nfserr_clid_inuse;
 		} else {
 			status = nfs_ok;
+			clp = unconf;
 			move_to_confirmed(unconf, idhashval);
 		}
 		goto out;
@@ -757,7 +909,8 @@
 	status = nfserr_inval;
 	goto out;
 out:
-	/* XXX if status == nfs_ok, probe callback path */
+	if (!status)
+		nfsd4_probe_callback(clp);
 	nfs4_unlock_state();
 	return status;
 }
@@ -803,6 +956,7 @@
 	if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
 		INIT_LIST_HEAD(&fp->fi_hash);
 		INIT_LIST_HEAD(&fp->fi_perfile);
+		INIT_LIST_HEAD(&fp->fi_del_perfile);
 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
 		fp->fi_inode = igrab(ino);
 		fp->fi_id = current_fileid++;
@@ -822,7 +976,7 @@
 		while (!list_empty(&file_hashtbl[i])) {
 			fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
 			/* this should never be more than once... */
-			if (!list_empty(&fp->fi_perfile)) {
+			if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
 				printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
 			}
 			release_file(fp);
@@ -830,15 +984,36 @@
 	}
 }
 
-/* should use a slab cache */
+kmem_cache_t *stateowner_slab = NULL;
+
+int
+nfsd4_init_slabs(void)
+{
+	stateowner_slab = kmem_cache_create("nfsd4_stateowners",
+			sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
+	if (stateowner_slab == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+int
+nfsd4_free_slabs(void)
+{
+	int status = 0;
+
+	if (stateowner_slab)
+		status = kmem_cache_destroy(stateowner_slab);
+	stateowner_slab = NULL;
+	return status;
+}
+
 void
 nfs4_free_stateowner(struct kref *kref)
 {
 	struct nfs4_stateowner *sop =
 		container_of(kref, struct nfs4_stateowner, so_ref);
 	kfree(sop->so_owner.data);
-	kfree(sop);
-	free_sowner++;
+	kmem_cache_free(stateowner_slab, sop);
 }
 
 static inline struct nfs4_stateowner *
@@ -846,14 +1021,14 @@
 {
 	struct nfs4_stateowner *sop;
 
-	if ((sop = kmalloc(sizeof(struct nfs4_stateowner),GFP_KERNEL))) {
+	if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) {
 		if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
 			memcpy(sop->so_owner.data, owner->data, owner->len);
 			sop->so_owner.len = owner->len;
 			kref_init(&sop->so_ref);
 			return sop;
 		} 
-		kfree(sop);
+		kmem_cache_free(stateowner_slab, sop);
 	}
 	return NULL;
 }
@@ -887,7 +1062,6 @@
 	rp->rp_status = NFSERR_SERVERFAULT;
 	rp->rp_buflen = 0;
 	rp->rp_buf = rp->rp_ibuf;
-	alloc_sowner++;
 	return sop;
 }
 
@@ -957,14 +1131,29 @@
 	__set_bit(open->op_share_deny, &stp->st_deny_bmap);
 }
 
+/*
+* Because nfsd_close() can call locks_remove_flock() which removes leases,
+* delay nfsd_close() for delegations from the nfsd_open() clientid
+* until the delegation is reaped.
+*/
 static void
-release_stateid(struct nfs4_stateid *stp, int flags) {
+release_stateid(struct nfs4_stateid *stp, int flags)
+{
+	struct nfs4_delegation *dp;
+	struct nfs4_file *fp = stp->st_file;
 
 	list_del(&stp->st_hash);
 	list_del_perfile++;
 	list_del(&stp->st_perfile);
 	list_del(&stp->st_perfilestate);
 	if ((stp->st_vfs_set) && (flags & OPEN_STATE)) {
+		list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+			if(cmp_clid(&dp->dl_client->cl_clientid,
+			    &stp->st_stateowner->so_client->cl_clientid)) {
+				dp->dl_flags |= NFS4_DELAY_CLOSE;
+				return;
+			}
+		}
 		release_stateid_lockowner(stp);
 		nfsd_close(stp->st_vfs_file);
 		vfsclose++;
@@ -1013,7 +1202,7 @@
 	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
 		move_to_close_lru(sop);
 	/* unused nfs4_file's are releseed. XXX slab cache? */
-	if (list_empty(&fp->fi_perfile)) {
+	if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
 		release_file(fp);
 	}
 }
@@ -1141,6 +1330,100 @@
 	}
 }
 
+/*
+ * Recall a delegation
+ */
+static int
+do_recall(void *__dp)
+{
+	struct nfs4_delegation *dp = __dp;
+
+	atomic_inc(&dp->dl_count);
+	nfsd4_cb_recall(dp);
+	do_exit(0);
+	return 0;
+}
+
+/*
+ * Spawn a thread to perform a recall on the delegation represented
+ * by the lease (file_lock)
+ *
+ * Called from break_lease() with lock_kernel() held,
+ *
+ */
+static
+void nfsd_break_deleg_cb(struct file_lock *fl)
+{
+	struct nfs4_delegation *dp=  (struct nfs4_delegation *)fl->fl_owner;
+	struct task_struct *t;
+
+	dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
+	if (!dp)
+		return;
+
+	/* schedule delegation for recall */
+	spin_lock(&recall_lock);
+	atomic_set(&dp->dl_state, NFS4_RECALL_IN_PROGRESS);
+	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+	spin_unlock(&recall_lock);
+
+	/* only place dl_time is set. protected by lock_kernel*/
+	dp->dl_time = get_seconds();
+
+	/* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */
+	fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ;
+
+	t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
+	if (IS_ERR(t)) {
+		struct nfs4_client *clp = dp->dl_client;
+
+		printk(KERN_INFO "NFSD: Callback thread failed for "
+			"for client (clientid %08x/%08x)\n",
+			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+	}
+}
+
+/*
+ * The file_lock is being reapd.
+ *
+ * Called by locks_free_lock() with lock_kernel() held.
+ */
+static
+void nfsd_release_deleg_cb(struct file_lock *fl)
+{
+	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+
+	dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d, dl_state %d\n", fl,dp, atomic_read(&dp->dl_count), atomic_read(&dp->dl_state));
+
+	if (!(fl->fl_flags & FL_LEASE) || !dp)
+		return;
+	atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE);
+	dp->dl_flock = NULL;
+}
+
+/*
+ * Set the delegation file_lock back pointer.
+ *
+ * Called from __setlease() with lock_kernel() held.
+ */
+static
+void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
+{
+	struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
+
+	dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
+	if (!dp)
+		return;
+	dp->dl_flock = new;
+}
+
+struct lock_manager_operations nfsd_lease_mng_ops = {
+        .fl_break = nfsd_break_deleg_cb,
+        .fl_release_private = nfsd_release_deleg_cb,
+        .fl_copy_lock = nfsd_copy_lock_deleg_cb,
+};
+
+
 
 /*
  * nfsd4_process_open1()
@@ -1238,6 +1521,43 @@
 }
 
 static int
+nfs4_deleg_conflict(u32 share, u32 dtype)
+{
+	return (((share & NFS4_SHARE_ACCESS_WRITE) &&
+		dtype == NFS4_OPEN_DELEGATE_READ) ||
+		((share & NFS4_SHARE_ACCESS_READ) &&
+		dtype == NFS4_OPEN_DELEGATE_WRITE));
+}
+
+#define DONT_DELEGATE  8
+
+/*
+ * nfs4_check_deleg_recall()
+ *
+ * Test any delegation that is currently within an incompleted recalled
+ * state, and return NFSERR_DELAY for conflicting open share.
+ * flag is set to DONT_DELEGATE for shares that match the deleg type.
+ */
+static int
+nfs4_check_deleg_recall(struct nfs4_file *fp, struct nfsd4_open *op, int *flag)
+{
+	struct nfs4_delegation *dp;
+	int status = 0;
+
+	list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+		dprintk("NFSD: found delegation %p with dl_state %d\n",
+		 	                 dp, atomic_read(&dp->dl_state));
+		if (atomic_read(&dp->dl_state) == NFS4_RECALL_IN_PROGRESS) {
+			if(nfs4_deleg_conflict(op->op_share_access, dp->dl_type))
+				status = nfserr_jukebox;
+			else
+				*flag = DONT_DELEGATE;
+		}
+	}
+	return status;
+}
+
+static int
 nfs4_check_open(struct nfs4_file *fp, struct nfs4_stateowner *sop, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
 	struct nfs4_stateid *local;
@@ -1339,6 +1659,65 @@
 }
 
 /*
+ * Attempt to hand out a delegation.
+ */
+static void
+nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp, int *flag)
+{
+	struct nfs4_delegation *dp;
+	struct nfs4_stateowner *sop = stp->st_stateowner;
+	struct nfs4_callback *cb = &sop->so_client->cl_callback;
+	struct file_lock fl, *flp = &fl;
+	int status;
+
+	if (*flag == DONT_DELEGATE) {
+		*flag = NFS4_OPEN_DELEGATE_NONE;
+		return;
+	}
+
+	/* set flag */
+	*flag = NFS4_OPEN_DELEGATE_NONE;
+	if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
+	     || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
+		return;
+
+	if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+		*flag = NFS4_OPEN_DELEGATE_READ;
+
+	else if (!(open->op_share_access & NFS4_SHARE_ACCESS_READ))
+		*flag = NFS4_OPEN_DELEGATE_WRITE;
+
+	if (!(dp = alloc_init_deleg(sop->so_client, stp, fh, *flag)))
+		return;
+	locks_init_lock(&fl);
+	fl.fl_lmops = &nfsd_lease_mng_ops;
+	fl.fl_flags = FL_LEASE;
+	fl.fl_end = OFFSET_MAX;
+	fl.fl_owner =  (fl_owner_t)dp;
+	fl.fl_file = stp->st_vfs_file;
+	fl.fl_pid = current->tgid;
+
+	if ((status = setlease(stp->st_vfs_file,
+		*flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
+		dprintk("NFSD: setlease failed [%d], no delegation\n", status);
+		list_del(&dp->dl_del_perfile);
+		list_del(&dp->dl_del_perclnt);
+		kfree(dp);
+		free_delegation++;
+		*flag = NFS4_OPEN_DELEGATE_NONE;
+		return;
+	}
+
+	memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
+
+	dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
+	             dp->dl_stateid.si_boot,
+	             dp->dl_stateid.si_stateownerid,
+	             dp->dl_stateid.si_fileid,
+	             dp->dl_stateid.si_generation);
+}
+
+/*
  * called with nfs4_lock_state() held.
  */
 int
@@ -1346,28 +1725,24 @@
 {
 	struct nfs4_stateowner *sop = open->op_stateowner;
 	struct nfs4_file *fp = NULL;
-	struct inode *ino;
+	struct inode *ino = current_fh->fh_dentry->d_inode;
 	unsigned int fi_hashval;
 	struct nfs4_stateid *stp = NULL;
-	int status;
-
-	status = nfserr_resource;
-	if (!sop)
-		return status;
-
-	ino = current_fh->fh_dentry->d_inode;
+	int status, delegflag = 0;
 
 	status = nfserr_inval;
 	if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
 		goto out;
 	/*
-	 * Lookup file; if found, lookup stateid and check open request;
-	 * not found, create
+	 * Lookup file; if found, lookup stateid and check open request,
+	 * and check for delegations in the process of being recalled.
+	 * If not found, create the nfs4_file struct
 	 */
 	fi_hashval = file_hashval(ino);
 	if (find_file(fi_hashval, ino, &fp)) {
-		status = nfs4_check_open(fp, sop, open, &stp);
-		if (status)
+		if ((status = nfs4_check_open(fp, sop, open, &stp)))
+			goto out;
+		if ((status = nfs4_check_deleg_recall(fp, open, &delegflag)))
 			goto out;
 	} else {
 		status = nfserr_resource;
@@ -1407,14 +1782,20 @@
 			}
 		}
 	}
-	dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
-	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
-	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
-
 	memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
 
-	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
+	/*
+	* Attempt to hand out a delegation. No error return, because the
+	* OPEN succeeds even if we fail.
+	*/
+	nfs4_open_delegation(current_fh, open, stp, &delegflag);
+	open->op_delegate_type = delegflag;
+
 	status = nfs_ok;
+
+	dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
+	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
+	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
 out:
 	/* take the opportunity to clean up unused state */
 	if (fp && list_empty(&fp->fi_perfile))
@@ -1480,14 +1861,26 @@
 {
 	struct nfs4_client *clp;
 	struct nfs4_stateowner *sop;
+	struct nfs4_delegation *dp;
 	struct list_head *pos, *next;
 	time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
 	time_t t, clientid_val = NFSD_LEASE_TIME;
-	time_t u, close_val = NFSD_LEASE_TIME;
+	time_t u, test_val = NFSD_LEASE_TIME;
 
 	nfs4_lock_state();
 
-	dprintk("NFSD: laundromat service - starting, examining clients\n");
+	dprintk("NFSD: laundromat service - starting\n");
+	/* Remove clientid's from recovery directory */
+	if (first_run) {
+		int status;
+
+		dprintk("NFSD: laundromat service - FIRST_RUN\n");
+		status  = nfsd4_list_rec_dir(1);
+		if (status < 0)
+			printk("NFSD: error clearing recovery directory %s\n",
+				recovery_dirname);
+		first_run = 0;
+	}
 	list_for_each_safe(pos, next, &client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -1498,14 +1891,34 @@
 		}
 		dprintk("NFSD: purging unused client (clientid %08x)\n",
 			clp->cl_clientid.cl_id);
+		if (clp->cl_firststate)
+			nfsd4_remove_clid_file(clp);
 		expire_client(clp);
 	}
+	spin_lock(&recall_lock);
+	list_for_each_safe(pos, next, &del_recall_lru) {
+		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		if (atomic_read(&dp->dl_state) == NFS4_RECALL_COMPLETE)
+			goto reap;
+		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
+			u = dp->dl_time - cutoff;
+			if (test_val > u)
+				test_val = u;
+			break;
+		}
+reap:
+		dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
+			            dp, dp->dl_flock);
+		release_delegation(dp);
+	}
+	spin_unlock(&recall_lock);
+	test_val = NFSD_LEASE_TIME;
 	list_for_each_safe(pos, next, &close_lru) {
 		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
 		if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
 			u = sop->so_time - cutoff;
-			if (close_val > u)
-				close_val = u;
+			if (test_val > u)
+				test_val = u;
 			break;
 		}
 		dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
@@ -1564,21 +1977,81 @@
 	return 1;
 }
 
+static inline int
+access_permit_read(unsigned long access_bmap)
+{
+	return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
+		test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
+}
+
+static inline int
+access_permit_write(unsigned long access_bmap)
+{
+	return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
+		test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
+}
+
+static
+int nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
+{
+        int status = nfserr_openmode;
+
+	if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
+                goto out;
+	if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
+                goto out;
+	status = nfs_ok;
+out:
+	return status;
+}
+
+static int
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+	int status = nfserr_openmode;
+
+	if ((flags & WR_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+		goto out;
+	if ((flags & RD_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_WRITE))
+		goto out;
+	status = nfs_ok;
+out:
+	return status;
+}
+
+static int
+nfs4_rw_grace(int flags)
+{
+	return (nfs4_in_grace() && ((flags & RD_STATE) || (flags & WR_STATE)));
+}
+
+/*
+ * Allow READ/WRITE during grace period on recovered state only for files
+ * that are not able to provide mandatory locking.
+ */
+static int
+nfs4_check_rw_grace(umode_t mode, int flags)
+{
+	return (nfs4_rw_grace(flags) && ((mode & S_IXGRP) && (mode & S_ISGID)));
+}
 
 /*
 * Checks for stateid operations
 */
 int
-nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct nfs4_stateid **stpp)
+nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp)
 {
-	struct nfs4_stateid *stp;
+	struct nfs4_stateid *stp = NULL;
+	struct nfs4_delegation *dp = NULL;
+	stateid_t *stidp;
+	struct inode *ino = current_fh->fh_dentry->d_inode;
 	int status;
 
 	dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
 		stateid->si_boot, stateid->si_stateownerid, 
 		stateid->si_fileid, stateid->si_generation); 
-
-	*stpp = NULL;
+	if (filpp)
+		*filpp = NULL;
 
 	/* STALE STATEID */
 	status = nfserr_stale_stateid;
@@ -1587,33 +2060,58 @@
 
 	/* BAD STATEID */
 	status = nfserr_bad_stateid;
-	if (!(stp = find_stateid(stateid, flags))) {
-		dprintk("NFSD: preprocess_stateid_op: no open stateid!\n");
-		goto out;
-	}
-	if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
-		dprintk("NFSD: preprocess_stateid_op: fh-stateid mismatch!\n");
-		stp->st_vfs_set = 0;
-		goto out;
-	}
-	if (!stp->st_stateowner->so_confirmed) {
-		dprintk("preprocess_stateid_op: lockowner not confirmed yet!\n");
-		goto out;
+	if (!stateid->si_fileid) { /* delegation stateid */
+
+		if(!(dp = find_delegation_stateid(ino, stateid))) {
+			dprintk("NFSD: delegation stateid not found\n");
+			if (nfs4_rw_grace(flags))
+				status = nfserr_grace;
+			goto out;
+		}
+		stidp = &dp->dl_stateid;
+	} else { /* open or lock stateid */
+		if (!(stp = find_stateid(stateid, flags))) {
+			dprintk("NFSD: open or lock stateid not found\n");
+			if (nfs4_rw_grace(flags))
+				status = nfserr_grace;
+			goto out;
+		}
+		if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp))
+			goto out;
+		if (!stp->st_stateowner->so_confirmed)
+			goto out;
+		stidp = &stp->st_stateid;
 	}
-	if (stateid->si_generation > stp->st_stateid.si_generation) {
-		dprintk("preprocess_stateid_op: future stateid?!\n");
+	if (stateid->si_generation > stidp->si_generation)
 		goto out;
-	}
 
 	/* OLD STATEID */
 	status = nfserr_old_stateid;
-	if (stateid->si_generation < stp->st_stateid.si_generation) {
-		dprintk("preprocess_stateid_op: old stateid!\n");
+	if (stateid->si_generation < stidp->si_generation)
 		goto out;
+
+	status = nfserr_grace;
+	if (nfs4_check_rw_grace(ino->i_mode, flags))
+		goto out;
+
+	if (stp) {
+		renew_client(stp->st_stateowner->so_client);
+		if ((status = nfs4_check_openmode(stp,flags)))
+			goto out;
+		if (filpp)
+			*filpp = stp->st_vfs_file;
+	} else if (dp) {
+		renew_client(dp->dl_client);
+		if ((status = nfs4_check_delegmode(dp, flags)))
+			goto out;
+		if (flags & DELEG_RET) {
+			atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE);
+			release_delegation(dp);
+		}
+		if (filpp && dp && dp->dl_stp)
+			*filpp = dp->dl_stp->st_vfs_file;
 	}
-	*stpp = stp;
 	status = nfs_ok;
-	renew_client(stp->st_stateowner->so_client);
 out:
 	return status;
 }
@@ -1750,17 +2248,6 @@
 	goto out;
 }
 
-/*
- * eventually, this will perform an upcall to the 'state daemon' as well as
- * set the cl_first_state field.
- */
-void
-first_state(struct nfs4_client *clp)
-{
-	if (!clp->cl_first_state)
-		clp->cl_first_state = get_seconds();
-}
-
 int
 nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
 {
@@ -1793,8 +2280,16 @@
 		         stp->st_stateid.si_stateownerid,
 		         stp->st_stateid.si_fileid,
 		         stp->st_stateid.si_generation);
-	status = nfs_ok;
-	first_state(sop->so_client);
+
+	if (!sop->so_client->cl_firststate) {
+		int err = nfsd4_create_clid_file(sop->so_client);
+		if (!err) {
+			sop->so_client->cl_firststate = 1;
+			dprintk("NFSD: OPEN_CONFIRM firststate set [%.*s]\n",
+				sop->so_client->cl_name.len,
+				sop->so_client->cl_name.data);
+		}
+	}
 out:
 	if (oc->oc_stateowner)
 		nfs4_get_stateowner(oc->oc_stateowner);
@@ -1912,6 +2407,22 @@
 	return status;
 }
 
+int
+nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
+{
+	int status;
+
+	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
+		goto out;
+
+	nfs4_lock_state();
+	status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET, NULL);
+	nfs4_unlock_state();
+out:
+	return status;
+}
+
+
 /* 
  * Lock owner state (byte-range locks)
  */
@@ -1938,7 +2449,7 @@
 	unsigned int hashval;
 
 	dprintk("NFSD: find_stateid flags 0x%x\n",flags);
-	if ((flags & LOCK_STATE) || (flags & RDWR_STATE)) {
+	if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
 		hashval = stateid_hashval(st_id, f_id);
 		list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
 			if ((local->st_stateid.si_stateownerid == st_id) &&
@@ -1946,7 +2457,7 @@
 				return local;
 		}
 	} 
-	if ((flags & OPEN_STATE) || (flags & RDWR_STATE)) {
+	if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
 		hashval = stateid_hashval(st_id, f_id);
 		list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
 			if ((local->st_stateid.si_stateownerid == st_id) &&
@@ -1958,6 +2469,30 @@
 	return NULL;
 }
 
+static struct nfs4_delegation *
+find_delegation_stateid(struct inode *ino, stateid_t *stid)
+{
+	struct nfs4_delegation *dp = NULL;
+	struct nfs4_file *fp = NULL;
+	u32 st_id;
+	unsigned int fi_hashval;
+
+	dprintk("NFSD:find_delegation_stateid ino %p, stid %p\n",ino,stid);
+
+	if(!ino || !stid)
+		return NULL;
+	st_id = stid->si_stateownerid;
+	fi_hashval = file_hashval(ino);
+	if (find_file(fi_hashval, ino, &fp)) {
+		list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+			if(dp->dl_stateid.si_stateownerid == st_id) {
+				dprintk("NFSD: find_delegation dp %p\n",dp);
+				return dp;
+			}
+		}
+	}
+	return NULL;
+}
 
 /*
  * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
@@ -2085,7 +2620,6 @@
 	rp->rp_status = NFSERR_SERVERFAULT;
 	rp->rp_buflen = 0;
 	rp->rp_buf = rp->rp_ibuf;
-	alloc_lsowner++;
 	return sop;
 }
 
@@ -2558,22 +3092,22 @@
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
-static int
-nfs4_client_to_reclaim(struct nfs4_client *clp)
+int
+nfs4_client_to_reclaim(char *name, int namlen)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
 
-	crp = alloc_reclaim(clp->cl_name.len);
+	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
+	crp = alloc_reclaim(namlen);
 	if (!crp)
 		return 0;
-	strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
+	strhashval = clientstr_hashval(name, namlen);
 	INIT_LIST_HEAD(&crp->cr_strhash);
 	list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
-	memcpy(crp->cr_name.data, clp->cl_name.data, clp->cl_name.len);
-	crp->cr_name.len = clp->cl_name.len;
-	crp->cr_first_state = clp->cl_first_state;
-	crp->cr_expired = 0;
+	memcpy(crp->cr_name.data, name, namlen);
+	crp->cr_name.len = namlen;
+	reclaim_str_hashtbl_size++;
 	return 1;
 }
 
@@ -2618,6 +3152,9 @@
 	if (!client)
 		return NULL;
 
+	dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
+		            clp->cl_name.len, clp->cl_name.data);
+
 	/* find clp->cl_name in reclaim_str_hashtbl */
 	strhashval = clientstr_hashval(client->cl_name.data,
 	                              client->cl_name.len);
@@ -2639,8 +3176,6 @@
 
 	if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
 		return nfserr_reclaim_bad;
-	if (crp->cr_expired)
-		return nfserr_no_grace;
 	return nfs_ok;
 }
 
@@ -2657,10 +3192,18 @@
 
 	if (nfs4_init)
 		return;
+	if (nfsd4_init_slabs())
+		BUG(); /* XXXXXX!!! */
 	if (!nfs4_reclaim_init) {
+		int status;
+
 		for (i = 0; i < CLIENT_HASH_SIZE; i++)
 			INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
 		reclaim_str_hashtbl_size = 0;
+		nfsd4_init_rec_dir(recovery_dirname);
+		status = nfsd4_list_rec_dir(0);
+		if (status)
+			printk("NFSD: Failure in reading recovery data\n");
 		nfs4_reclaim_init = 1;
 	}
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
@@ -2689,6 +3232,8 @@
 
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
+	INIT_LIST_HEAD(&del_recall_lru);
+	spin_lock_init(&recall_lock);
 	boot_time = get_seconds();
 	grace_time = max(old_lease_time, lease_time);
 	if (reclaim_str_hashtbl_size == 0)
@@ -2725,6 +3270,15 @@
 {
 	int i;
 	struct nfs4_client *clp = NULL;
+	struct nfs4_delegation *dp = NULL;
+	struct nfs4_stateowner *sop = NULL;
+	struct list_head *pos, *next;
+
+	list_for_each_safe(pos, next, &close_lru) {
+		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+		list_del(&sop->so_close_lru);
+		nfs4_put_stateowner(sop);
+	}
 
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		while (!list_empty(&conf_id_hashtbl[i])) {
@@ -2736,20 +3290,31 @@
 			expire_client(clp);
 		}
 	}
+	spin_lock(&recall_lock);
+	list_for_each_safe(pos, next, &del_recall_lru) {
+		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+		atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
+		release_delegation(dp);
+	}
+	spin_unlock(&recall_lock);
+
 	release_all_files();
 	cancel_delayed_work(&laundromat_work);
 	flush_scheduled_work();
 	nfs4_init = 0;
+	nfs4_reclaim_init = 0;
 	dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
 			list_add_perfile, list_del_perfile);
 	dprintk("NFSD: add_perclient %d del_perclient %d\n",
 			add_perclient, del_perclient);
 	dprintk("NFSD: alloc_file %d free_file %d\n",
 			alloc_file, free_file);
-	dprintk("NFSD: alloc_sowner %d alloc_lsowner %d free_sowner %d\n",
-			alloc_sowner, alloc_lsowner, free_sowner);
 	dprintk("NFSD: vfsopen %d vfsclose %d\n",
 			vfsopen, vfsclose);
+	dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
+			alloc_delegation, free_delegation);
+	if (nfsd4_free_slabs())
+		BUG(); /* XXX? */
 }
 
 void
@@ -2801,11 +3366,10 @@
 	/* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
-			if (!nfs4_client_to_reclaim(clp)) {
+			if (!nfs4_client_to_reclaim(clp->cl_name.data, clp->cl_name.len)) {
 				nfs4_release_reclaim();
 				goto init_state;
 			}
-			reclaim_str_hashtbl_size++;
 		}
 	}
 init_state:
Index: linux-2.6.10/fs/nfsd/nfsproc.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfsproc.c	2004-12-25 05:34:30.000000000 +0800
+++ linux-2.6.10/fs/nfsd/nfsproc.c	2005-04-05 14:49:13.426688152 +0800
@@ -586,7 +586,6 @@
 		{ nfserr_dquot, -EDQUOT },
 #endif
 		{ nfserr_stale, -ESTALE },
-		{ nfserr_jukebox, -EWOULDBLOCK },
 		{ nfserr_jukebox, -ETIMEDOUT },
 		{ nfserr_dropit, -EAGAIN },
 		{ nfserr_dropit, -ENOMEM },
Index: linux-2.6.10/fs/nfsd/nfs4acl.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfs4acl.c	2004-12-25 05:34:29.000000000 +0800
+++ linux-2.6.10/fs/nfsd/nfs4acl.c	2005-04-05 14:49:13.429687696 +0800
@@ -89,6 +89,8 @@
 	return ret;
 }
 
+/* modify functions to take NFS errors */
+
 static int
 mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
 {
Index: linux-2.6.10/fs/nfsd/nfs4idmap.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfs4idmap.c	2004-12-25 05:35:23.000000000 +0800
+++ linux-2.6.10/fs/nfsd/nfs4idmap.c	2005-04-05 14:49:13.414689976 +0800
@@ -78,9 +78,9 @@
 
 #define DefineSimpleCacheLookupMap(STRUCT, FUNC)			\
         DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,		\
-        (struct STRUCT *item, int set), /*no setup */,			\
+        (struct STRUCT *item, int set),			\
 	& FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),	\
-	STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
+	STRUCT##_init(new, item), STRUCT##_update(tmp, item))
 
 /* Common entry handling */
 
Index: linux-2.6.10/fs/nfsd/vfs.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/vfs.c	2005-03-31 15:35:26.000000000 +0800
+++ linux-2.6.10/fs/nfsd/vfs.c	2005-04-05 14:49:13.417689520 +0800
@@ -304,6 +304,8 @@
 		 * we need to break all leases.
 		 */
 		err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
+		if (err == -EWOULDBLOCK)
+			err = -ETIMEDOUT;
 		if (err) /* ENOMEM or EWOULDBLOCK */
 			goto out_nfserr;
 
@@ -678,6 +680,8 @@
 	 * This may block while leases are broken.
 	 */
 	err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
+	if (err == -EWOULDBLOCK)
+		err = -ETIMEDOUT;
 	if (err) /* NOMEM or WOULDBLOCK */
 		goto out_nfserr;
 
@@ -822,21 +826,34 @@
 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
           struct kvec *vec, int vlen, unsigned long *count)
 {
-	struct raparms	*ra;
-	mm_segment_t	oldfs;
 	int		err;
 	struct file	*file;
-	struct inode	*inode;
 
 	err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
 	if (err)
 		goto out;
+	err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
+
+	nfsd_close(file);
+out:
+	return err;
+}
+
+int
+nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+{
+	struct inode *inode;
+	struct raparms	*ra;
+	mm_segment_t	oldfs;
+	int		err;
+
 	err = nfserr_perm;
 	inode = file->f_dentry->d_inode;
 #ifdef MSNFS
 	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 		(!lock_may_read(inode, offset, *count)))
-		goto out_close;
+		goto out;
 #endif
 
 	/* Get readahead parameters */
@@ -872,8 +889,6 @@
 		dnotify_parent(file->f_dentry, DN_ACCESS);
 	} else 
 		err = nfserrno(err);
-out_close:
-	nfsd_close(file);
 out:
 	return err;
 }
@@ -888,25 +903,40 @@
 				struct kvec *vec, int vlen,
 	   			unsigned long cnt, int *stablep)
 {
-	struct svc_export	*exp;
 	struct file		*file;
-	struct dentry		*dentry;
-	struct inode		*inode;
-	mm_segment_t		oldfs;
 	int			err = 0;
-	int			stable = *stablep;
 
 	err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
 	if (err)
 		goto out;
 	if (!cnt)
 		goto out_close;
+
+	err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep);
+out_close:
+	nfsd_close(file);
+out:
+	return err;
+}
+
+int
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+				loff_t offset, struct kvec *vec, int vlen,
+	   			unsigned long cnt, int *stablep)
+{
+	struct svc_export	*exp;
+	struct dentry		*dentry;
+	struct inode		*inode;
+	mm_segment_t		oldfs;
+	int			err = 0;
+	int			stable = *stablep;
+
 	err = nfserr_perm;
 
 #ifdef MSNFS
 	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 		(!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
-		goto out_close;
+		goto out;
 #endif
 
 	dentry = file->f_dentry;
@@ -993,13 +1023,10 @@
 		err = 0;
 	else 
 		err = nfserrno(err);
-out_close:
-	nfsd_close(file);
 out:
 	return err;
 }
 
-
 #ifdef CONFIG_NFSD_V3
 /*
  * Commit all pending writes to stable storage.
Index: linux-2.6.10/fs/nfsd/nfs4callback.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfs4callback.c	2005-04-05 19:01:49.158500672 +0800
+++ linux-2.6.10/fs/nfsd/nfs4callback.c	2005-04-05 14:49:13.428687848 +0800
@@ -0,0 +1,589 @@
+/*
+ *  linux/fs/nfsd/nfs4callback.c
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/inet.h>
+#include <linux/errno.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/state.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs4.h>
+
+#define NFSDDBG_FACILITY                NFSDDBG_PROC
+
+#define NFSPROC4_CB_NULL 0
+#define NFSPROC4_CB_COMPOUND 1
+
+/* declarations */
+static void nfs4_cb_null(struct rpc_task *task);
+extern spinlock_t recall_lock;
+
+/* Index of predefined Linux callback client operations */
+
+enum {
+        NFSPROC4_CLNT_CB_NULL = 0,
+	NFSPROC4_CLNT_CB_RECALL,
+};
+
+enum nfs_cb_opnum4 {
+	OP_CB_RECALL            = 4,
+};
+
+#define NFS4_MAXTAGLEN		20
+
+#define NFS4_enc_cb_null_sz		0
+#define NFS4_dec_cb_null_sz		0
+#define cb_compound_enc_hdr_sz		4
+#define cb_compound_dec_hdr_sz		(3 + (NFS4_MAXTAGLEN >> 2))
+#define op_enc_sz			1
+#define op_dec_sz			2
+#define enc_nfs4_fh_sz			(1 + (NFS4_FHSIZE >> 2))
+#define enc_stateid_sz			16
+#define NFS4_enc_cb_recall_sz		(cb_compound_enc_hdr_sz +       \
+					1 + enc_stateid_sz +            \
+					enc_nfs4_fh_sz)
+
+#define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+					op_dec_sz)
+
+/*
+* Generic encode routines from fs/nfs/nfs4xdr.c
+*/
+static inline u32 *
+xdr_writemem(u32 *p, const void *ptr, int nbytes)
+{
+	int tmp = XDR_QUADLEN(nbytes);
+	if (!tmp)
+		return p;
+	p[tmp-1] = 0;
+	memcpy(p, ptr, nbytes);
+	return p + tmp;
+}
+
+#define WRITE32(n)               *p++ = htonl(n)
+#define WRITEMEM(ptr,nbytes)     do {                           \
+	p = xdr_writemem(p, ptr, nbytes);                       \
+} while (0)
+#define RESERVE_SPACE(nbytes)   do {                            \
+	p = xdr_reserve_space(xdr, nbytes);                     \
+	if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
+	BUG_ON(!p);                                             \
+} while (0)
+
+/*
+ * Generic decode routines from fs/nfs/nfs4xdr.c
+ */
+#define DECODE_TAIL                             \
+	status = 0;                             \
+out:                                            \
+	return status;                          \
+xdr_error:                                      \
+	dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+	status = -EIO;                          \
+	goto out
+
+#define READ32(x)         (x) = ntohl(*p++)
+#define READ64(x)         do {                  \
+	(x) = (u64)ntohl(*p++) << 32;           \
+	(x) |= ntohl(*p++);                     \
+} while (0)
+#define READTIME(x)       do {                  \
+	p++;                                    \
+	(x.tv_sec) = ntohl(*p++);               \
+	(x.tv_nsec) = ntohl(*p++);              \
+} while (0)
+#define READ_BUF(nbytes)  do { \
+	p = xdr_inline_decode(xdr, nbytes); \
+	if (!p) { \
+		dprintk("NFSD: %s: reply buffer overflowed in line %d.", \
+			__FUNCTION__, __LINE__); \
+		return -EIO; \
+	} \
+} while (0)
+
+struct nfs4_cb_compound_hdr {
+	int		status;
+	u32		ident;
+	u32		nops;
+	u32		taglen;
+	char *		tag;
+};
+
+static struct {
+int stat;
+int errno;
+} nfs_cb_errtbl[] = {
+	{ NFS4_OK,		0               },
+	{ NFS4ERR_PERM,		EPERM           },
+	{ NFS4ERR_NOENT,	ENOENT          },
+	{ NFS4ERR_IO,		EIO             },
+	{ NFS4ERR_NXIO,		ENXIO           },
+	{ NFS4ERR_ACCESS,	EACCES          },
+	{ NFS4ERR_EXIST,	EEXIST          },
+	{ NFS4ERR_XDEV,		EXDEV           },
+	{ NFS4ERR_NOTDIR,	ENOTDIR         },
+	{ NFS4ERR_ISDIR,	EISDIR          },
+	{ NFS4ERR_INVAL,	EINVAL          },
+	{ NFS4ERR_FBIG,		EFBIG           },
+	{ NFS4ERR_NOSPC,	ENOSPC          },
+	{ NFS4ERR_ROFS,		EROFS           },
+	{ NFS4ERR_MLINK,	EMLINK          },
+	{ NFS4ERR_NAMETOOLONG,	ENAMETOOLONG    },
+	{ NFS4ERR_NOTEMPTY,	ENOTEMPTY       },
+	{ NFS4ERR_DQUOT,	EDQUOT          },
+	{ NFS4ERR_STALE,	ESTALE          },
+	{ NFS4ERR_BADHANDLE,	EBADHANDLE      },
+	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE      },
+	{ NFS4ERR_NOTSUPP,	ENOTSUPP        },
+	{ NFS4ERR_TOOSMALL,	ETOOSMALL       },
+	{ NFS4ERR_SERVERFAULT,	ESERVERFAULT    },
+	{ NFS4ERR_BADTYPE,	EBADTYPE        },
+	{ NFS4ERR_LOCKED,	EAGAIN          },
+	{ NFS4ERR_RESOURCE,	EREMOTEIO       },
+	{ NFS4ERR_SYMLINK,	ELOOP           },
+	{ NFS4ERR_OP_ILLEGAL,	EOPNOTSUPP      },
+	{ NFS4ERR_DEADLOCK,	EDEADLK         },
+	{ -1,                   EIO             }
+};
+
+static int
+nfs_cb_stat_to_errno(int stat)
+{
+	int i;
+	for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
+		if (nfs_cb_errtbl[i].stat == stat)
+			return nfs_cb_errtbl[i].errno;
+	}
+	/* If we cannot translate the error, the recovery routines should
+	* handle it.
+	* Note: remaining NFSv4 error codes have values > 10000, so should
+	* not conflict with native Linux error codes.
+	*/
+	return stat;
+}
+
+/*
+ * XDR encode
+ */
+
+static int
+encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+{
+	u32 * p;
+
+	RESERVE_SPACE(16);
+	WRITE32(0);            /* tag length is always 0 */
+	WRITE32(NFS4_MINOR_VERSION);
+	WRITE32(hdr->ident);
+	WRITE32(hdr->nops);
+	return 0;
+}
+
+static int
+encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
+{
+	u32 *p;
+	int len = cb_rec->cbr_fhlen;
+
+	RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
+	WRITE32(OP_CB_RECALL);
+	WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t));
+	WRITE32(cb_rec->cbr_trunc);
+	WRITE32(len);
+	WRITEMEM(cb_rec->cbr_fhval, len);
+	return 0;
+}
+
+static int
+nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p)
+{
+	struct xdr_stream xdrs, *xdr = &xdrs;
+
+	xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
+        RESERVE_SPACE(0);
+	return 0;
+}
+
+static int
+nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args)
+{
+	struct xdr_stream xdr;
+	struct nfs4_cb_compound_hdr hdr = {
+		.nops   = 1,
+	};
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_cb_compound_hdr(&xdr, &hdr);
+	return (encode_cb_recall(&xdr, args));
+}
+
+
+static int
+decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+        u32 *p;
+
+        READ_BUF(8);
+        READ32(hdr->status);
+        READ32(hdr->taglen);
+        READ_BUF(hdr->taglen + 4);
+        hdr->tag = (char *)p;
+        p += XDR_QUADLEN(hdr->taglen);
+        READ32(hdr->nops);
+        return 0;
+}
+
+static int
+decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+	u32 *p;
+	u32 op;
+	int32_t nfserr;
+
+	READ_BUF(8);
+	READ32(op);
+	if (op != expected) {
+		dprintk("NFSD: decode_cb_op_hdr: Callback server returned "
+		         " operation %d but we issued a request for %d\n",
+		         op, expected);
+		return -EIO;
+	}
+	READ32(nfserr);
+	if (nfserr != NFS_OK)
+		return -nfs_cb_stat_to_errno(nfserr);
+	return 0;
+}
+
+static int
+nfs4_xdr_dec_cb_null(struct rpc_rqst *req, u32 *p)
+{
+	return 0;
+}
+
+static int
+nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p)
+{
+	struct xdr_stream xdr;
+	struct nfs4_cb_compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_cb_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
+out	:
+	return status;
+}
+
+/*
+ * RPC procedure tables
+ */
+#ifndef MAX
+# define MAX(a, b)      (((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, call, argtype, restype)                              \
+[NFSPROC4_CLNT_##proc] = {                                      	\
+        .p_proc   = NFSPROC4_CB_##call,					\
+        .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
+        .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
+        .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
+}
+
+struct rpc_procinfo     nfs4_cb_procedures[] = {
+    PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
+    PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
+};
+
+struct rpc_version              nfs_cb_version4 = {
+        .number                 = 1,
+        .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
+        .procs                  = nfs4_cb_procedures
+};
+
+static struct rpc_version *	nfs_cb_version[] = {
+	NULL,
+	&nfs_cb_version4,
+};
+
+/*
+ * Use the SETCLIENTID credential
+ */
+struct rpc_cred *
+nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
+{
+        struct auth_cred acred;
+	struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+        struct rpc_cred *ret = NULL;
+
+	if (!clnt)
+		goto out;
+        get_group_info(clp->cl_cred.cr_group_info);
+        acred.uid = clp->cl_cred.cr_uid;
+        acred.gid = clp->cl_cred.cr_gid;
+        acred.group_info = clp->cl_cred.cr_group_info;
+
+        dprintk("NFSD:     looking up %s cred\n",
+                clnt->cl_auth->au_ops->au_name);
+        ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags);
+        put_group_info(clp->cl_cred.cr_group_info);
+out:
+        return ret;
+}
+
+/*
+ * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
+ */
+void
+nfsd4_probe_callback(struct nfs4_client *clp)
+{
+	struct sockaddr_in	addr;
+	struct nfs4_callback    *cb = &clp->cl_callback;
+	struct rpc_timeout	timeparms;
+	struct rpc_xprt *	xprt;
+	struct rpc_program *	program = &cb->cb_program;
+	struct rpc_stat *	stat = &cb->cb_stat;
+	struct rpc_clnt *	clnt;
+	struct rpc_message msg = {
+		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
+		.rpc_argp       = clp,
+	};
+	char                    hostname[32];
+	int status;
+
+	dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
+			cb->cb_parsed, atomic_read(&cb->cb_set));
+	if (!cb->cb_parsed || atomic_read(&cb->cb_set))
+		return;
+
+	/* Initialize address */
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(cb->cb_port);
+	addr.sin_addr.s_addr = htonl(cb->cb_addr);
+
+	/* Initialize timeout */
+	timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
+	timeparms.to_retries = 5;
+	timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
+	timeparms.to_exponential = 1;
+
+	/* Create RPC transport */
+	if (!(xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms))) {
+		dprintk("NFSD: couldn't create callback transport!\n");
+		goto out_err;
+	}
+
+	/* Initialize rpc_program */
+	program->name = "nfs4_cb";
+	program->number = cb->cb_prog;
+	program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]);
+	program->version = nfs_cb_version;
+	program->stats = stat;
+
+	/* Initialize rpc_stat */
+	memset(stat, 0, sizeof(struct rpc_stat));
+	stat->program = program;
+
+	/* Create RPC client
+ 	 *
+	 * XXX AUTH_UNIX only - need AUTH_GSS....
+	 */
+	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
+	if (!(clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX))) {
+		dprintk("NFSD: couldn't create callback client\n");
+		goto out_xprt;
+	}
+	clnt->cl_intr = 1;
+	clnt->cl_softrtry = 1;
+	clnt->cl_chatty = 1;
+
+	/* Kick rpciod, put the call on the wire. */
+
+	if (rpciod_up() != 0) {
+		dprintk("nfsd: couldn't start rpciod for callbacks!\n");
+		goto out_clnt;
+	}
+
+	/* the task holds a reference to the nfs4_client struct */
+	cb->cb_client = clnt;
+	atomic_inc(&clp->cl_count);
+
+	msg.rpc_cred = nfsd4_lookupcred(clp,0);
+	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL);
+
+	if (status != 0) {
+		dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
+		goto out_rpciod;
+	}
+	return;
+
+out_rpciod:
+	atomic_dec(&clp->cl_count);
+	rpciod_down();
+out_clnt:
+	rpc_shutdown_client(clnt);
+	goto out_err;
+out_xprt:
+	xprt_destroy(xprt);
+out_err:
+	dprintk("NFSD: warning: no callback path to client %.*s\n",
+		clp->cl_name.len, clp->cl_name.data);
+	cb->cb_client = NULL;
+}
+
+static void
+nfs4_cb_null(struct rpc_task *task)
+{
+	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+	struct nfs4_callback *cb = &clp->cl_callback;
+	u32 addr = htonl(cb->cb_addr);
+
+	dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status);
+
+	if (task->tk_status < 0) {
+		dprintk("NFSD: callback establishment to client %.*s failed\n",
+			clp->cl_name.len, clp->cl_name.data);
+		goto out;
+	}
+	atomic_set(&cb->cb_set, 1);
+	dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr));
+out:
+	put_nfs4_client(clp);
+}
+
+/*
+ *  Called with dp->dl_count incremented
+ */
+static void
+nfs4_cb_recall_done(struct rpc_task *task)
+{
+	struct nfs4_cb_recall *cbr = (struct nfs4_cb_recall *)task->tk_calldata;
+	struct nfs4_delegation *dp = cbr->cbr_dp;
+	int status;
+
+	spin_lock(&recall_lock);
+
+	/* all is well... */
+	if (task->tk_status == 0)
+		goto out;
+
+	/* network partition, retry nfsd4_cb_recall once.  */
+	if (task->tk_status == -EIO) {
+		if (atomic_read(&dp->dl_recall_cnt) == 0)
+			goto retry;
+		else
+			/* callback channel no longer available */
+			atomic_set(&dp->dl_client->cl_callback.cb_set, 0);
+	}
+
+	/* Race: a recall occurred miliseconds after a delegation was granted.
+	* Client may have received recall prior to delegation. retry recall
+	* once.
+	* XXX what about nfserr_bad_stateid?
+	*/
+	if (task->tk_status == -EBADHANDLE) {
+		if (atomic_read(&dp->dl_recall_cnt) == 0)
+			goto retry;
+	}
+
+	/* nfs4_laundromat will reap delegation */
+	atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
+
+out:
+	atomic_dec(&dp->dl_count);
+	BUG_ON(atomic_read(&dp->dl_count) < 0);
+	spin_unlock(&recall_lock);
+	return;
+
+retry:
+	atomic_inc(&dp->dl_recall_cnt);
+	spin_unlock(&recall_lock);
+	/* sleep 2 seconds before retrying recall */
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(2*HZ);
+	status = nfsd4_cb_recall(dp);
+	dprintk("NFSD: nfs4_cb_recall_done: retry status: %d  dp %p dl_flock %p\n",status,dp, dp->dl_flock);
+}
+
+/*
+ * called with dp->dl_count inc'ed.
+ * nfs4_lock_state() may or may not have been called.
+ */
+int
+nfsd4_cb_recall(struct nfs4_delegation *dp)
+{
+	struct nfs4_client *clp;
+	struct rpc_clnt *clnt;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
+	};
+	struct nfs4_cb_recall *cbr = &dp->dl_recall;
+	int status;
+
+	dprintk("NFSD: nfsd4_cb_recall NFS4_enc_cb_recall_sz %d NFS4_dec_cb_recall_sz %d \n",NFS4_enc_cb_recall_sz,NFS4_dec_cb_recall_sz);
+
+	clp = dp->dl_client;
+	clnt = clp->cl_callback.cb_client;
+	status = EIO;
+	if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
+		goto out_free;
+
+	msg.rpc_argp = cbr;
+	msg.rpc_resp = cbr;
+	msg.rpc_cred = nfsd4_lookupcred(clp,0);
+
+	cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
+	cbr->cbr_dp = dp;
+
+	if ((status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
+		nfs4_cb_recall_done, cbr ))) {
+		dprintk("NFSD: recall_delegation: rpc_call_async failed %d\n",
+			status);
+		goto out_fail;
+	}
+out:
+	return status;
+out_fail:
+	status = nfserrno(status);
+	out_free:
+	kfree(cbr);
+	goto out;
+}
Index: linux-2.6.10/fs/nfsd/nfs4proc.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfs4proc.c	2004-12-25 05:35:40.000000000 +0800
+++ linux-2.6.10/fs/nfsd/nfs4proc.c	2005-04-05 14:49:13.432687240 +0800
@@ -461,28 +461,12 @@
 }
 
 static inline int
-access_bits_permit_read(unsigned long access_bmap)
-{
-	return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
-		test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
-}
-
-static inline int
-access_bits_permit_write(unsigned long access_bmap)
-{
-	return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
-		test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
-}
-
-static inline int
 nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
 {
-	struct nfs4_stateid *stp;
 	int status;
+	struct file *filp;
 
 	/* no need to check permission - this will be done in nfsd_read() */
-	if (nfs4_in_grace())
-		return nfserr_grace;
 
 	if (read->rd_offset >= OFFSET_MAX)
 		return nfserr_inval;
@@ -508,21 +492,17 @@
 		goto out;
 	}
 	/* check stateid */
-	if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid, 
-					CHECK_FH | RDWR_STATE, &stp))) {
+	if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
+					CHECK_FH | RD_STATE, &filp))) {
 		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
 		goto out;
 	}
-	status = nfserr_openmode;
-	if (!access_bits_permit_read(stp->st_access_bmap)) {
-		dprintk("NFSD: nfsd4_read: file not opened for read!\n");
-		goto out;
-	}
 	status = nfs_ok;
 out:
 	nfs4_unlock_state();
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = current_fh;
+	read->rd_filp = filp;
 	return status;
 }
 
@@ -562,6 +542,8 @@
 {
 	int status;
 
+	if (nfs4_in_grace())
+		return nfserr_grace;
 	status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
 	if (status == nfserr_symlink)
 		return nfserr_notdir;
@@ -580,6 +562,9 @@
 
 	if (!save_fh->fh_dentry)
 		return status;
+	if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
+					& NFSEXP_NOSUBTREECHECK))
+		return nfserr_grace;
 	status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
 			     rename->rn_snamelen, current_fh,
 			     rename->rn_tname, rename->rn_tnamelen);
@@ -605,12 +590,8 @@
 static inline int
 nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
 {
-	struct nfs4_stateid *stp;
 	int status = nfs_ok;
 
-	if (nfs4_in_grace())
-		return nfserr_grace;
-
 	if (!current_fh->fh_dentry)
 		return nfserr_nofilehandle;
 
@@ -626,15 +607,10 @@
 		nfs4_lock_state();
 		if ((status = nfs4_preprocess_stateid_op(current_fh, 
 						&setattr->sa_stateid, 
-						CHECK_FH | RDWR_STATE, &stp))) {
+						CHECK_FH | WR_STATE, NULL))) {
 			dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
 			goto out_unlock;
 		}
-		status = nfserr_openmode;
-		if (!access_bits_permit_write(stp->st_access_bmap)) {
-			dprintk("NFSD: nfsd4_setattr: not opened for write!\n");
-			goto out_unlock;
-		}
 		nfs4_unlock_state();
 	}
 	status = nfs_ok;
@@ -654,14 +630,11 @@
 static inline int
 nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write)
 {
-	struct nfs4_stateid *stp;
 	stateid_t *stateid = &write->wr_stateid;
+	struct file *filp;
 	u32 *p;
 	int status = nfs_ok;
 
-	if (nfs4_in_grace())
-		return nfserr_grace;
-
 	/* no need to check permission - this will be done in nfsd_write() */
 
 	if (write->wr_offset >= OFFSET_MAX)
@@ -677,18 +650,13 @@
 		goto zero_stateid;
 	}
 	if ((status = nfs4_preprocess_stateid_op(current_fh, stateid, 
-					CHECK_FH | RDWR_STATE, &stp))) {
+					CHECK_FH | WR_STATE, &filp))) {
 		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
 		goto out;
 	}
 
-	status = nfserr_openmode;
-	if (!access_bits_permit_write(stp->st_access_bmap)) {
-		dprintk("NFSD: nfsd4_write: file not open for write!\n");
-		goto out;
-	}
-
 zero_stateid:
+
 	nfs4_unlock_state();
 	write->wr_bytes_written = write->wr_buflen;
 	write->wr_how_written = write->wr_stable_how;
@@ -696,9 +664,16 @@
 	*p++ = nfssvc_boot.tv_sec;
 	*p++ = nfssvc_boot.tv_usec;
 
-	status =  nfsd_write(rqstp, current_fh, write->wr_offset,
-			  write->wr_vec, write->wr_vlen, write->wr_buflen,
-			  &write->wr_how_written);
+	if (filp)
+		status =  nfsd_vfs_write(rqstp, current_fh, filp,
+				write->wr_offset, write->wr_vec,
+				write->wr_vlen, write->wr_buflen,
+			  	&write->wr_how_written);
+	else
+		status =  nfsd_write(rqstp, current_fh, write->wr_offset,
+			  	write->wr_vec, write->wr_vlen, write->wr_buflen,
+			  	&write->wr_how_written);
+
 	if (status == nfserr_symlink)
 		status = nfserr_inval;
 	return status;
@@ -872,6 +847,9 @@
 		case OP_CREATE:
 			op->status = nfsd4_create(rqstp, current_fh, &op->u.create);
 			break;
+		case OP_DELEGRETURN:
+			op->status = nfsd4_delegreturn(rqstp, current_fh, &op->u.delegreturn);
+			break;
 		case OP_GETATTR:
 			op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr);
 			break;
Index: linux-2.6.10/fs/nfsd/export.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/export.c	2004-12-25 05:34:58.000000000 +0800
+++ linux-2.6.10/fs/nfsd/export.c	2005-04-05 14:49:13.415689824 +0800
@@ -255,7 +255,7 @@
 	new->ek_export = item->ek_export;
 }
 
-static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
+static DefineSimpleCacheLookup(svc_expkey)
 
 #define	EXPORT_HASHBITS		8
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
@@ -492,8 +492,72 @@
 	new->ex_fsid = item->ex_fsid;
 }
 
-static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
+struct svc_export *
+svc_export_lookup(struct svc_export *item, int set)
+{
+	struct svc_export *tmp, *new = NULL;
+	struct cache_head **hp, **head;
 
+	head = &svc_export_cache.hash_table[svc_export_hash(item)];
+retry:
+	if (set||new)
+		write_lock(&svc_export_cache.hash_lock);
+	else
+		read_lock(&svc_export_cache.hash_lock);
+	for(hp=head; *hp != NULL; hp = &tmp->h.next) {
+		tmp = container_of(*hp, struct svc_export, h);
+		if (svc_export_match(item, tmp)) { /* found a match */
+			cache_get(&tmp->h);
+			if (set) {
+				if (test_bit(CACHE_NEGATIVE,  &item->h.flags))
+					 set_bit(CACHE_NEGATIVE, &tmp->h.flags);
+				else {
+					clear_bit(CACHE_NEGATIVE, &tmp->h.flags);
+					svc_export_update(tmp, item);
+				}
+			}
+			if (set||new)
+				write_unlock(&svc_export_cache.hash_lock);
+			else
+				read_unlock(&svc_export_cache.hash_lock);
+			if (set)
+				cache_fresh(&svc_export_cache, &tmp->h,
+						item->h.expiry_time);
+			if (new)
+				svc_export_put(&new->h, &svc_export_cache);
+			return tmp;
+		}
+	}
+	/* Didn't find anything */
+	if (new) {
+		svc_export_init(new, item);
+		new->h.next = *head;
+		*head = &new->h;
+		set_bit(CACHE_HASHED, &new->h.flags);
+		svc_export_cache.entries++;
+		if (set) {
+			tmp = new;
+			if (test_bit(CACHE_NEGATIVE, &item->h.flags))
+				set_bit(CACHE_NEGATIVE, &tmp->h.flags);
+			else
+				svc_export_update(tmp, item);
+		}
+	}
+	if (set||new)
+		write_unlock(&svc_export_cache.hash_lock);
+	else
+		read_unlock(&svc_export_cache.hash_lock);
+	if (new && set)
+		cache_fresh(&svc_export_cache, &new->h, item->h.expiry_time);
+	if (new)
+		return new;
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
+	if (new) {
+		cache_init(&new->h);
+		goto retry;
+	}
+	return NULL;
+}
 
 struct svc_expkey *
 exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
Index: linux-2.6.10/fs/nfsd/nfssvc.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfssvc.c	2004-12-25 05:34:58.000000000 +0800
+++ linux-2.6.10/fs/nfsd/nfssvc.c	2005-04-05 14:49:13.422688760 +0800
@@ -378,4 +378,6 @@
 	.pg_name		= "nfsd",		/* program name */
 	.pg_class		= "nfsd",		/* authentication class */
 	.pg_stats		= &nfsd_svcstats,	/* version table */
+	.pg_authenticate	= &svc_set_client,	/* export authentication */
+
 };
Index: linux-2.6.10/fs/nfsd/nfs4recover.c
===================================================================
--- linux-2.6.10.orig/fs/nfsd/nfs4recover.c	2005-04-05 19:01:49.158500672 +0800
+++ linux-2.6.10/fs/nfsd/nfs4recover.c	2005-04-05 14:49:13.430687544 +0800
@@ -0,0 +1,411 @@
+/*
+*  linux/fs/nfsd/nfs4recover.c
+*
+*  Copyright (c) 2004 The Regents of the University of Michigan.
+*  All rights reserved.
+*
+*  Andy Adamson <andros@umich.edu>
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions
+*  are met:
+*
+*  1. Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*  2. Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in the
+*     documentation and/or other materials provided with the distribution.
+*  3. Neither the name of the University nor the names of its
+*     contributors may be used to endorse or promote products derived
+*     from this software without specific prior written permission.
+*
+*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+
+#include <linux/param.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <asm/uaccess.h>
+
+#define NFSDDBG_FACILITY                NFSDDBG_PROC
+
+/* MAX_FILE_LEN/2 = max client id name length due to changing name
+ * into hex
+ */
+#define MAX_FILE_LEN 256
+
+/* Globals */
+char recovery_dirname[] = "/var/lib/nfs/v4recovery";
+static uid_t	saveuid;
+static gid_t   savegid;
+static struct nameidata nd_rec_init;
+static int rec_dir_init = 0;
+
+void
+nfs4_save_set_user(void)
+{
+	saveuid = current->fsuid;
+	savegid = current->fsgid;
+	current->fsuid = 0;
+	current->fsgid = 0;
+}
+
+void
+nfs4_reset_user(void)
+{
+	current->fsuid = saveuid;
+	current->fsgid = savegid;
+}
+
+void
+nfs4_make_rec_filename(char **filename, struct nfs4_client *clp)
+{
+	char   	*fname = *filename;
+	int	flen = MAX_FILE_LEN;
+
+	memset(fname, 0, flen);
+	qword_addhex(&fname, &flen, clp->cl_name.data, clp->cl_name.len);
+}
+
+/* XXX need to check dput() mntput ?? */
+int
+nfsd4_create_clid_file(struct nfs4_client *clp)
+{
+	struct file 		*filp = NULL;
+	struct dentry		*dentry;
+	mm_segment_t		oldfs;
+	loff_t			offset = 0;
+	char			fbuf[MAX_FILE_LEN], *fname = fbuf;
+	int 			status;
+
+
+	if (!rec_dir_init)
+		return -EINVAL;
+	nfs4_save_set_user();
+
+	dprintk("NFSD: nfsd4_create_clid_file IN recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	/* lock the parent */
+	down(&nd_rec_init.dentry->d_inode->i_sem);
+
+	nfs4_make_rec_filename(&fname, clp);
+	/* dentry->d_count will be 1 */
+	dentry = lookup_one_len(fname, nd_rec_init.dentry, strlen(fname));
+	status = PTR_ERR(dentry);
+	if (IS_ERR(dentry))
+		goto out_unlock;
+
+	status = -EEXIST;
+	if (dentry->d_inode){
+		dprintk("NFSD: nfsd4_create_clid_file: FILE EXISTS\n");
+		goto out_unlock;
+	}
+
+	/* nd_rec_init.dentry->d_count is bumped */
+	status = vfs_create(nd_rec_init.dentry->d_inode, dentry, S_IRWXU, NULL);
+	if (status < 0)
+		goto out_unlock;
+
+	up(&nd_rec_init.dentry->d_inode->i_sem);
+
+	filp = dentry_open(dget(dentry), mntget(nd_rec_init.mnt), O_RDWR);
+	status = PTR_ERR(filp);
+	if (IS_ERR(filp))
+		goto out_mnt;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	status = vfs_write(filp, clp->cl_name.data, clp->cl_name.len, &offset);
+	set_fs(oldfs);
+
+	dprintk("NFSD: nfsd4_create_clid_file vfs_write returns %d\n",status);
+	if (status >= 0)
+		status = nfs_ok;
+
+	if (filp->f_op && filp->f_op->flush) {
+		int err = filp->f_op->flush(filp);
+		dprintk("NFSD: nfsd4_create_clid_file called flush\n");
+		if (!status)
+			status = err;
+	}
+	/* dget and mntget in dentry_open call */
+	fput(filp);
+
+	/* dentry->d_count will be 0 */
+	dput(dentry);
+out_mnt:
+	/* dget in vfs_create call */
+	dput(nd_rec_init.dentry);
+
+out:
+	nfs4_reset_user();
+
+	dprintk("NFSD: nfsd4_create_clid_file OUT recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+	dprintk("NFSD: nfsd4_create_clid_file returns %d\n",status);
+
+	return status;
+
+out_unlock:
+	up(&nd_rec_init.dentry->d_inode->i_sem);
+	goto out;
+}
+
+/*
+ * called with pdentry->d_inode->i_sem held ?
+ */
+int
+nfsd4_unlink_rec_file(char *name, int namlen)
+{
+	struct dentry *dentry;
+	int type, status;
+
+	dprintk("NFSD: nfsd4_unlink_rec_file. name %.*s\n", namlen, name);
+
+	dentry = lookup_one_len(name, nd_rec_init.dentry, namlen);
+	dprintk("NFSD: nfsd4_unlink_rec_file POST LOOKUP nd_rec d_count %d\n",
+		atomic_read(&nd_rec_init.dentry->d_count));
+	status = PTR_ERR(dentry);
+	if (IS_ERR(dentry))
+		goto out;
+
+	status = -ENOENT;
+	if (!dentry->d_inode) {
+		dput(dentry);
+		goto out;
+	}
+
+	/* should only be files here! */
+	type = dentry->d_inode->i_mode & S_IFMT;
+	status = -EISDIR;
+	if (!(type & S_IFREG)) {
+		dput(dentry);
+		goto out;
+	}
+
+	dprintk("NFSD: nfsd4_unlink_rec_file PRE VFS UNLINK [%d:%d]\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	status = vfs_unlink(nd_rec_init.dentry->d_inode, dentry);
+
+	dprintk("NFSD: nfsd4_unlink_rec_file POST VFS UNLINK [%d:%d]\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	dprintk("NFSD: nfsd4_unlink_rec_file FILE dentry->d_count %d\n",
+		atomic_read(&dentry->d_count));
+out:
+	dprintk("NFSD: nfsd4_unlink_rec_file returns %d\n",status);
+	return status;
+}
+
+void
+nfsd4_remove_clid_file(struct nfs4_client *clp)
+{
+	char			fbuf[MAX_FILE_LEN], *fname = fbuf;
+	int 			status;
+
+	if (!rec_dir_init)
+		return;
+
+	dprintk("NFSD: nfsd4_remove_clid_file client %.*s\n",
+		clp->cl_name.len,clp->cl_name.data);
+
+	nfs4_save_set_user();
+
+	dprintk("NFSD: nfsd4_remove_clid_file IN recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	nfs4_make_rec_filename(&fname, clp);
+	status = nfsd4_unlink_rec_file(fname, strlen(fname));
+	nfs4_reset_user();
+	if (status != nfs_ok)
+		printk("NFSD: Failed to remove expired client state file %.*s from %s\n", strlen(fname), fname, recovery_dirname);
+
+	dprintk("NFSD: nfsd4_remove_clid_file OUT recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+	return;
+}
+
+struct rec_dirent {
+	int clear;
+};
+
+/*
+ * on reboot, stuff the reclaim hash with known client id's.
+ *
+ * the filename may not equal the clid. the clid might be the first
+ * (and so far only) line of data in the file.
+ *
+ * i will probably end up writing data such as the setclientid principal
+ * to each clid file. if i do, i will always put the clid as the
+ * first line of data.
+ */
+
+int
+nfsd4_get_recdir_dirent(struct rec_dirent *rdirent, const char *name,
+		int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+{
+	struct dentry 		*dclid;
+	struct file		*filp;
+	mm_segment_t		oldfs;
+	int			status = nfs_ok;
+
+	dprintk("NFSD: nfsd4_get_recdir_dirent IN recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	dprintk("NFSD: nfsd4_get_recdir_dirent name %.*s, clear %d\n",
+		namlen, name, rdirent->clear);
+
+	if (name && isdotent(name, namlen))
+		goto out;
+
+	dclid = lookup_one_len(name, nd_rec_init.dentry, namlen);
+	status = PTR_ERR(dclid);
+	if(IS_ERR(dclid))
+		goto out;
+
+	if (rdirent->clear){
+		dprintk("NFSD: nfsd4_get_recdir_dirent REMOVE\n");
+
+	dprintk("NFSD: nfsd4_get_recdir_dirent PRE VFS_UNLINK [%d:%d]\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+		status = vfs_unlink(nd_rec_init.dentry->d_inode, dclid);
+
+	dprintk("NFSD: nfsd4_get_recdir_dirent POST VFS_UNLINK [%d:%d]\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	} else {
+		char 	buf[MAX_FILE_LEN];
+
+		dprintk("NFSD: nfsd4_get_recdir_dirent READ\n");
+
+		filp = dentry_open(dclid, mntget(nd_rec_init.mnt), O_RDWR);
+		if (IS_ERR(filp)) {
+			status = PTR_ERR(filp);
+			goto out;
+		}
+
+		memset(buf, 0, MAX_FILE_LEN);
+		oldfs = get_fs(); set_fs(KERNEL_DS);
+		status = vfs_read(filp, buf, MAX_FILE_LEN, &filp->f_pos);
+		set_fs(oldfs);
+
+		dprintk("NFSD: nfsd4_get_recdir_dirent vfs_read returns %d\n",
+			status);
+		if (status > 0)
+			status = nfs4_client_to_reclaim(buf, status);
+		fput(filp);
+	}
+out:
+	dprintk("NFSD:nfsd4_get_recdir_dirent OUT recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	dprintk("NFSD: nfsd4_get_recdir_dirent returns %d\n",status);
+	return 0;
+}
+
+int
+nfsd4_list_rec_dir(int clear)
+{
+	struct file		*filp;
+	struct rec_dirent	rdirent;
+	int 			status;
+
+	if (!rec_dir_init)
+		return -EINVAL;
+
+	nfs4_save_set_user();
+
+	dprintk("NFSD: nfsd4_list_rec_dir IN recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	/* open directory */
+	filp = dentry_open(dget(nd_rec_init.dentry), mntget(nd_rec_init.mnt),
+		            O_RDWR);
+	status = PTR_ERR(filp);
+	if (IS_ERR(filp))
+		goto out;
+	rdirent.clear = clear;
+
+	/* read the directory entries into memory */
+	status = vfs_readdir(filp, (filldir_t) nfsd4_get_recdir_dirent,
+			(void*)&rdirent);
+
+	fput(filp);
+out:
+	dprintk("NFSD: nfsd4_list_rec_dir OUT recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	dprintk("NFSD: nfsd4_list_rec_dir DONE status: %d\n", status);
+
+	nfs4_reset_user();
+	return status;
+}
+
+
+/*
+ * Hold reference to the recovery directory.
+ */
+
+void
+nfsd4_init_rec_dir(char *rec_dirname)
+{
+	int 			status;
+
+	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
+		rec_dirname);
+
+	nfs4_save_set_user();
+
+        status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &nd_rec_init);
+
+	printk("NFSD: nfsd4_init_rec_dir INITIAL recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+
+	if (!status)
+		rec_dir_init = 1;
+	nfs4_reset_user();
+	printk("NFSD: nfsd4_init_rec_dir rec_dir_init %d\n", rec_dir_init);
+}
+
+void
+nfsd4_shutdown_rec_dir(void)
+{
+	rec_dir_init = 0;
+	path_release(&nd_rec_init);
+
+	printk("NFSD: nfsd4_shutdown_rec_dir FINAL recdir [d:mnt] count %d:%d\n",
+		atomic_read(&nd_rec_init.dentry->d_count),
+		atomic_read(&nd_rec_init.mnt->mnt_count));
+}
Index: linux-2.6.10/fs/nfsd/Makefile
===================================================================
--- linux-2.6.10.orig/fs/nfsd/Makefile	2004-12-25 05:35:50.000000000 +0800
+++ linux-2.6.10/fs/nfsd/Makefile	2005-04-05 14:49:13.431687392 +0800
@@ -8,5 +8,5 @@
 			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
-			   nfs4acl.o
+			   nfs4acl.o nfs4callback.o nfs4recover.o
 nfsd-objs		:= $(nfsd-y)
Index: linux-2.6.10/fs/nfs/nfs4xdr.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/nfs4xdr.c	2004-12-25 05:35:40.000000000 +0800
+++ linux-2.6.10/fs/nfs/nfs4xdr.c	2005-04-05 14:49:13.452684200 +0800
@@ -82,12 +82,16 @@
 #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
 #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
 				((3+NFS4_FHSIZE) >> 2))
-#define encode_getattr_maxsz    (op_encode_hdr_maxsz + 3)
+#define nfs4_fattr_bitmap_maxsz 3
+#define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
-#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
-#define decode_getattr_maxsz    (op_decode_hdr_maxsz + 3 + \
-                                nfs4_fattr_bitmap_maxsz)
+/* This is based on getfattr, which uses the most attributes: */
+#define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
+				3 + 3 + 3 + 2 * nfs4_name_maxsz))
+#define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
+				nfs4_fattr_value_maxsz)
+#define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
 #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
 #define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
@@ -122,11 +126,11 @@
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 				1 + nfs4_name_maxsz + \
 				nfs4_path_maxsz + \
-				nfs4_fattr_bitmap_maxsz)
+				nfs4_fattr_maxsz)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
 				2 + nfs4_name_maxsz + \
-				nfs4_fattr_bitmap_maxsz)
+				nfs4_fattr_maxsz)
 #define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
@@ -205,7 +209,7 @@
 #define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + 4 + \
-                                nfs4_fattr_bitmap_maxsz + \
+                                nfs4_fattr_maxsz + \
                                 encode_getattr_maxsz)
 #define NFS4_dec_setattr_sz     (compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
@@ -360,6 +364,20 @@
 				encode_delegreturn_maxsz)
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
 				decode_delegreturn_maxsz)
+#define NFS4_enc_getacl_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_getacl_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + \
+				nfs4_fattr_bitmap_maxsz + 1)
+#define NFS4_enc_setacl_sz	(compound_encode_hdr_maxsz + \
+				encode_putfh_maxsz + \
+				op_encode_hdr_maxsz + 4 + \
+				nfs4_fattr_bitmap_maxsz + 1)
+#define NFS4_dec_setacl_sz	(compound_decode_hdr_maxsz + \
+				decode_putfh_maxsz + \
+				op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 
 static struct {
 	unsigned int	mode;
@@ -459,7 +477,7 @@
 	 * In the worst-case, this would be
 	 *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
 	 *          = 36 bytes, plus any contribution from variable-length fields
-	 *            such as owner/group/acl's.
+	 *            such as owner/group.
 	 */
 	len = 16;
 
@@ -1083,6 +1101,27 @@
 	return 0;
 }
 
+extern nfs4_stateid zero_stateid;
+
+static int
+encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4+sizeof(zero_stateid.data));
+	WRITE32(OP_SETATTR);
+	WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
+	RESERVE_SPACE(2*4);
+	WRITE32(1);
+	WRITE32(FATTR4_WORD0_ACL);
+	if (arg->acl_len % 4)
+		return -EINVAL;
+	RESERVE_SPACE(4);
+	WRITE32(arg->acl_len);
+	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
+	return 0;
+}
+
 static int
 encode_savefh(struct xdr_stream *xdr)
 {
@@ -1627,6 +1666,34 @@
 }
 
 /*
+ * Encode a GETACL request
+ */
+static int
+nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,
+		struct nfs_getaclargs *args)
+{
+	struct xdr_stream xdr;
+	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct compound_hdr hdr = {
+		.nops   = 2,
+	};
+	int replen, status;
+
+	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+	encode_compound_hdr(&xdr, &hdr);
+	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
+	/* set up reply buffer: */
+	replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
+	xdr_inline_pages(&req->rq_rcv_buf, replen,
+		args->acl_pages, args->acl_pgbase, args->acl_len);
+out:
+	return status;
+}
+
+/*
  * Encode a WRITE request
  */
 static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
@@ -3122,6 +3189,46 @@
 	return decode_op_hdr(xdr, OP_RENEW);
 }
 
+static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
+		ssize_t *acl_len)
+{
+	uint32_t *savep;
+	uint32_t attrlen,
+		 bitmap[2] = {0};
+	struct kvec *iov = req->rq_rcv_buf.head;
+	int status;
+
+	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+		goto out;
+	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+		goto out;
+	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+		goto out;
+
+	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
+		return -EIO;
+	if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
+		int hdrlen, recvd;
+
+		/* We ignore &savep and don't do consistency checks on
+		 * the attr length.  Let userspace figure it out.... */
+		hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
+		recvd = req->rq_rcv_buf.len - hdrlen;
+		if (attrlen > recvd) {
+			printk(KERN_WARNING "NFS: server cheating in getattr"
+					" acl reply: attrlen %u > recvd %u\n",
+					attrlen, recvd);
+			return -EINVAL;
+		}
+		if (attrlen <= *acl_len)
+			xdr_read_pages(xdr, attrlen);
+		*acl_len = attrlen;
+	}
+
+out:
+	return status;
+}
+
 static int
 decode_savefh(struct xdr_stream *xdr)
 {
@@ -3413,6 +3520,71 @@
 
 }
 
+/*
+ * Encode an SETACL request
+ */
+static int
+nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args)
+{
+        struct xdr_stream xdr;
+        struct compound_hdr hdr = {
+                .nops   = 2,
+        };
+        int status;
+
+        xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+        encode_compound_hdr(&xdr, &hdr);
+        status = encode_putfh(&xdr, args->fh);
+        if (status)
+                goto out;
+        status = encode_setacl(&xdr, args);
+out:
+        return status;
+}
+/*
+ * Decode SETACL response
+ */
+static int
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_setattr(&xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode GETACL response
+ */
+static int
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, ssize_t *acl_len)
+{
+	struct xdr_stream xdr;
+	struct compound_hdr hdr;
+	int status;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	status = decode_compound_hdr(&xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status)
+		goto out;
+	status = decode_getacl(&xdr, rqstp, acl_len);
+
+out:
+	return status;
+}
 
 /*
  * Decode CLOSE response
@@ -4009,6 +4181,8 @@
   PROC(READDIR,		enc_readdir,	dec_readdir),
   PROC(SERVER_CAPS,	enc_server_caps, dec_server_caps),
   PROC(DELEGRETURN,	enc_delegreturn, dec_delegreturn),
+  PROC(GETACL,		enc_getacl,	dec_getacl),
+  PROC(SETACL,		enc_setacl,	dec_setacl),
 };
 
 struct rpc_version		nfs_version4 = {
Index: linux-2.6.10/fs/nfs/inode.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/inode.c	2004-12-25 05:35:24.000000000 +0800
+++ linux-2.6.10/fs/nfs/inode.c	2005-04-05 14:49:13.445685264 +0800
@@ -486,13 +486,27 @@
 	if (error < 0)
 		goto out_err;
 
-	buf->f_frsize = server->wtmult;
+	/*
+	 * Current versions of glibc do not correctly handle the
+	 * case where f_frsize != f_bsize.  Eventually we want to
+	 * report the value of wtmult in this field.
+	 */
+	buf->f_frsize = sb->s_blocksize;
+
+	/*
+	 * On most *nix systems, f_blocks, f_bfree, and f_bavail
+	 * are reported in units of f_frsize.  Linux hasn't had
+	 * an f_frsize field in its statfs struct until recently,
+	 * thus historically Linux's sys_statfs reports these
+	 * fields in units of f_bsize.
+	 */
 	buf->f_bsize = sb->s_blocksize;
 	blockbits = sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
 	buf->f_bavail = (res.abytes + blockres) >> blockbits;
+
 	buf->f_files = res.tfiles;
 	buf->f_ffree = res.afiles;
 
@@ -565,9 +579,9 @@
 
 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
 	else
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
 }
 
 /*
@@ -605,7 +619,7 @@
 		return 0;
 	if (nfs_compare_fh(NFS_FH(inode), fh))
 		return 0;
-	if (is_bad_inode(inode))
+	if (is_bad_inode(inode) || NFS_STALE(inode))
 		return 0;
 	return 1;
 }
@@ -664,7 +678,7 @@
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
-		inode->i_op = &nfs_file_inode_operations;
+		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
@@ -766,13 +780,8 @@
 			vmtruncate(inode, attr->ia_size);
 		}
 	}
-	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
-		struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
-		if (*cred) {
-			put_rpccred(*cred);
-			*cred = NULL;
-		}
-	}
+	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
 	nfs_end_data_update(inode);
 	unlock_kernel();
 	return error;
@@ -949,14 +958,14 @@
 	lock_kernel();
 	if (!inode || is_bad_inode(inode))
  		goto out_nowait;
-	if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode)
+	if (NFS_STALE(inode))
  		goto out_nowait;
 
 	while (NFS_REVALIDATING(inode)) {
 		status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
 		if (status < 0)
 			goto out_nowait;
-		if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC)
+		if (NFS_ATTRTIMEO(inode) == 0)
 			continue;
 		if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
 			continue;
@@ -968,14 +977,14 @@
 	/* Protect against RPC races by saving the change attribute */
 	verifier = nfs_save_change_attribute(inode);
 	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
-	if (status) {
+	if (status != 0) {
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
 			 inode->i_sb->s_id,
 			 (long long)NFS_FILEID(inode), status);
 		if (status == -ESTALE) {
-			NFS_FLAGS(inode) |= NFS_INO_STALE;
-			if (inode != inode->i_sb->s_root->d_inode)
-				remove_inode_hash(inode);
+			nfs_zap_caches(inode);
+			if (!S_ISDIR(inode->i_mode))
+				NFS_FLAGS(inode) |= NFS_INO_STALE;
 		}
 		goto out;
 	}
@@ -1014,7 +1023,6 @@
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode));
 
-	NFS_FLAGS(inode) &= ~NFS_INO_STALE;
 out:
 	NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
 	wake_up(&nfsi->nfs_i_wait);
@@ -1161,7 +1169,7 @@
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
 			|| inode->i_uid != fattr->uid
 			|| inode->i_gid != fattr->gid)
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
 
 	/* Has the link count changed? */
 	if (inode->i_nlink != fattr->nlink)
@@ -1270,7 +1278,7 @@
 #endif
 		nfsi->change_attr = fattr->change_attr;
 		if (!data_unstable)
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
 	}
 
 	memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
@@ -1278,14 +1286,8 @@
 
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
 	    inode->i_uid != fattr->uid ||
-	    inode->i_gid != fattr->gid) {
-		struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
-		if (*cred) {
-			put_rpccred(*cred);
-			*cred = NULL;
-		}
-		invalid |= NFS_INO_INVALID_ATTR;
-	}
+	    inode->i_gid != fattr->gid)
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
 
 	inode->i_mode = fattr->mode;
 	inode->i_nlink = fattr->nlink;
@@ -1335,7 +1337,8 @@
 	 */
 	nfs_invalidate_inode(inode);
  out_err:
-	return -EIO;
+	NFS_FLAGS(inode) |= NFS_INO_STALE;
+	return -ESTALE;
 }
 
 /*
@@ -1449,8 +1452,6 @@
 
 	kill_anon_super(s);
 
-	nfs4_renewd_prepare_shutdown(server);
-
 	if (server->client != NULL && !IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
 	if (server->client_sys != NULL && !IS_ERR(server->client_sys))
@@ -1461,8 +1462,6 @@
 
 	rpciod_down();		/* release rpciod */
 
-	destroy_nfsv4_state(server);
-
 	if (server->hostname != NULL)
 		kfree(server->hostname);
 	kfree(server);
@@ -1478,8 +1477,53 @@
 
 #ifdef CONFIG_NFS_V4
 
-static void nfs4_clear_inode(struct inode *);
+#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+
+int
+nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+		size_t buflen, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
+		return -EINVAL;
+
+	if (!S_ISREG(inode->i_mode) &&
+	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+		return -EPERM;
+
+	return nfs4_proc_set_acl(inode, buf, buflen);
+}
+
+/* The getxattr man page suggests returning -ENODATA for unknown attributes,
+ * and that's what we'll do for e.g. user attributes that haven't been set.
+ * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
+ * attributes in kernel-managed attribute namespaces. */
+ssize_t
+nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
+		size_t buflen)
+{
+	struct inode *inode = dentry->d_inode;
 
+	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
+		return -EOPNOTSUPP;
+
+	return nfs4_proc_get_acl(inode, buf, buflen);
+}
+
+ssize_t
+nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
+{
+	ssize_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
+
+	if (buf && buflen < len)
+		return -ERANGE;
+	if (buf)
+		memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
+	return len;
+}
+
+static void nfs4_clear_inode(struct inode *);
 
 static struct super_operations nfs4_sops = { 
 	.alloc_inode	= nfs_alloc_inode,
@@ -1543,9 +1587,6 @@
 		server->wsize = nfs_block_size(data->wsize, NULL);
 	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
 
-	/* NFSv4 doesn't use NLM locking */
-	server->flags |= NFS_MOUNT_NONLM;
-
 	server->acregmin = data->acregmin*HZ;
 	server->acregmax = data->acregmax*HZ;
 	server->acdirmin = data->acdirmin*HZ;
@@ -1790,8 +1831,22 @@
 
 static void nfs4_kill_super(struct super_block *sb)
 {
+	struct nfs_server *server = NFS_SB(sb);
+
 	nfs_return_all_delegations(sb);
-	nfs_kill_super(sb);
+	kill_anon_super(sb);
+
+	nfs4_renewd_prepare_shutdown(server);
+
+	if (server->client != NULL && !IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+	rpciod_down();		/* release rpciod */
+
+	destroy_nfsv4_state(server);
+
+	if (server->hostname != NULL)
+		kfree(server->hostname);
+	kfree(server);
 }
 
 static struct file_system_type nfs4_fs_type = {
@@ -1821,9 +1876,13 @@
 extern int nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
 extern int nfs_init_readpagecache(void);
-extern int nfs_destroy_readpagecache(void);
+extern void nfs_destroy_readpagecache(void);
 extern int nfs_init_writepagecache(void);
-extern int nfs_destroy_writepagecache(void);
+extern void nfs_destroy_writepagecache(void);
+#ifdef CONFIG_NFS_DIRECTIO
+extern int nfs_init_directcache(void);
+extern void nfs_destroy_directcache(void);
+#endif
 
 static kmem_cache_t * nfs_inode_cachep;
 
@@ -1904,6 +1963,12 @@
 	if (err)
 		goto out1;
 
+#ifdef CONFIG_NFS_DIRECTIO
+	err = nfs_init_directcache();
+	if (err)
+		goto out0;
+#endif
+
 #ifdef CONFIG_PROC_FS
 	rpc_proc_register(&nfs_rpcstat);
 #endif
@@ -1914,8 +1979,14 @@
 		goto out;
 	return 0;
 out:
+#ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
+#endif
 	nfs_destroy_writepagecache();
+#ifdef CONFIG_NFS_DIRECTIO
+out0:
+	nfs_destroy_directcache();
+#endif
 out1:
 	nfs_destroy_readpagecache();
 out2:
@@ -1928,6 +1999,9 @@
 
 static void __exit exit_nfs_fs(void)
 {
+#ifdef CONFIG_NFS_DIRECTIO
+	nfs_destroy_directcache();
+#endif
 	nfs_destroy_writepagecache();
 	nfs_destroy_readpagecache();
 	nfs_destroy_inodecache();
Index: linux-2.6.10/fs/nfs/nfs4state.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/nfs4state.c	2004-12-25 05:33:49.000000000 +0800
+++ linux-2.6.10/fs/nfs/nfs4state.c	2005-04-05 14:49:13.446685112 +0800
@@ -445,7 +445,7 @@
 		state->owner = owner;
 		atomic_inc(&owner->so_count);
 		list_add(&state->inode_states, &nfsi->open_states);
-		state->inode = inode;
+		state->inode = igrab(inode);
 		spin_unlock(&inode->i_lock);
 	} else {
 		spin_unlock(&inode->i_lock);
@@ -471,6 +471,7 @@
 		list_del(&state->inode_states);
 	spin_unlock(&inode->i_lock);
 	list_del(&state->open_states);
+	iput(inode);
 	BUG_ON (state->state != 0);
 	nfs4_free_open_state(state);
 	nfs4_put_state_owner(owner);
@@ -486,7 +487,6 @@
 	struct nfs4_state_owner *owner = state->owner;
 	struct nfs4_client *clp = owner->so_client;
 	int newstate;
-	int status = 0;
 
 	atomic_inc(&owner->so_count);
 	down_read(&clp->cl_sem);
@@ -508,10 +508,8 @@
 			newstate |= FMODE_WRITE;
 		if (state->state == newstate)
 			goto out;
-		if (newstate != 0)
-			status = nfs4_do_downgrade(inode, state, newstate);
-		else
-			status = nfs4_do_close(inode, state);
+		if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
+			return;
 	}
 out:
 	nfs4_put_open_state(state);
Index: linux-2.6.10/fs/nfs/idmap.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/idmap.c	2004-12-25 05:34:26.000000000 +0800
+++ linux-2.6.10/fs/nfs/idmap.c	2005-04-05 14:49:13.454683896 +0800
@@ -80,6 +80,7 @@
 static ssize_t   idmap_pipe_downcall(struct file *, const char __user *,
 		     size_t);
 void             idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
+static void	idmap_pipe_release(struct inode *inode);
 
 static unsigned int fnvhash32(const void *, size_t);
 
@@ -87,6 +88,7 @@
         .upcall         = idmap_pipe_upcall,
         .downcall       = idmap_pipe_downcall,
         .destroy_msg    = idmap_pipe_destroy_msg,
+	.release_pipe	= idmap_pipe_release,
 };
 
 void
@@ -448,6 +450,19 @@
 	up(&idmap->idmap_im_lock);
 }
 
+static void
+idmap_pipe_release(struct inode *inode)
+{
+	struct rpc_inode *rpci = RPC_I(inode);
+	struct idmap *idmap = (struct idmap *)rpci->private;
+	struct idmap_msg *im = &idmap->idmap_im;
+
+	down(&idmap->idmap_im_lock);
+	im->im_status = IDMAP_STATUS_LOOKUPFAIL;
+	wake_up(&idmap->idmap_wq);
+	up(&idmap->idmap_im_lock);
+}
+
 /* 
  * Fowler/Noll/Vo hash
  *    http://www.isthe.com/chongo/tech/comp/fnv/
Index: linux-2.6.10/fs/nfs/dir.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/dir.c	2005-03-31 15:35:26.000000000 +0800
+++ linux-2.6.10/fs/nfs/dir.c	2005-04-05 14:49:13.439686176 +0800
@@ -40,8 +40,6 @@
 static int nfs_opendir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_cached_lookup(struct inode *, struct dentry *,
-				struct nfs_fh *, struct nfs_fattr *);
 static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
 static int nfs_mkdir(struct inode *, struct dentry *, int);
 static int nfs_rmdir(struct inode *, struct dentry *);
@@ -92,6 +90,9 @@
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+	.getxattr       = nfs4_getxattr,
+	.setxattr       = nfs4_setxattr,
+	.listxattr      = nfs4_listxattr,
 };
 
 #endif /* CONFIG_NFS_V4 */
@@ -294,24 +295,13 @@
 	return res;
 }
 
-static unsigned int nfs_type2dtype[] = {
-	DT_UNKNOWN,
-	DT_REG,
-	DT_DIR,
-	DT_BLK,
-	DT_CHR,
-	DT_LNK,
-	DT_SOCK,
-	DT_UNKNOWN,
-	DT_FIFO
-};
-
-static inline
-unsigned int nfs_type_to_d_type(enum nfs_ftype type)
+static inline unsigned int dt_type(struct inode *inode)
 {
-	return nfs_type2dtype[type];
+	return (inode->i_mode >> 12) & 15;
 }
 
+static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
+
 /*
  * Once we've found the start of the dirent within a page: fill 'er up...
  */
@@ -321,6 +311,7 @@
 {
 	struct file	*file = desc->file;
 	struct nfs_entry *entry = desc->entry;
+	struct dentry	*dentry = NULL;
 	unsigned long	fileid;
 	int		loop_count = 0,
 			res;
@@ -333,9 +324,16 @@
 		 *	 retrieving the current dirent on the server */
 		fileid = nfs_fileid_to_ino_t(entry->ino);
 
+		/* Get a dentry if we have one */
+		if (dentry != NULL)
+			dput(dentry);
+		dentry = nfs_readdir_lookup(desc);
+
 		/* Use readdirplus info */
-		if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR))
-			d_type = nfs_type_to_d_type(entry->fattr->type);
+		if (dentry != NULL && dentry->d_inode != NULL) {
+			d_type = dt_type(dentry->d_inode);
+			fileid = dentry->d_inode->i_ino;
+		}
 
 		res = filldir(dirent, entry->name, entry->len, 
 			      entry->prev_cookie, fileid, d_type);
@@ -352,7 +350,8 @@
 		}
 	}
 	dir_page_release(desc);
-
+	if (dentry != NULL)
+		dput(dentry);
 	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
 	return res;
 }
@@ -615,24 +614,10 @@
 		goto out_valid;
 	}
 
-	/*
-	 * Note: we're not holding inode->i_sem and so may be racing with
-	 * operations that change the directory. We therefore save the
-	 * change attribute *before* we do the RPC call.
-	 */
-	verifier = nfs_save_change_attribute(dir);
-	error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
-	if (!error) {
-		if (nfs_compare_fh(NFS_FH(inode), &fhandle))
-			goto out_bad;
-		if (nfs_lookup_verify_inode(inode, isopen))
-			goto out_zap_parent;
-		goto out_valid_renew;
-	}
-
 	if (NFS_STALE(inode))
 		goto out_bad;
 
+	verifier = nfs_save_change_attribute(dir);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error)
 		goto out_bad;
@@ -641,7 +626,6 @@
 	if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
 		goto out_bad;
 
- out_valid_renew:
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, verifier);
  out_valid:
@@ -723,6 +707,7 @@
 
 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
+	struct dentry *res;
 	struct inode *inode = NULL;
 	int error;
 	struct nfs_fh fhandle;
@@ -731,11 +716,11 @@
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	error = -ENAMETOOLONG;
+	res = ERR_PTR(-ENAMETOOLONG);
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
 		goto out;
 
-	error = -ENOMEM;
+	res = ERR_PTR(-ENOMEM);
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
 	lock_kernel();
@@ -746,29 +731,27 @@
 	if (nfs_is_exclusive_create(dir, nd))
 		goto no_entry;
 
-	error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
-	if (error != 0) {
-		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name,
-				&fhandle, &fattr);
-		if (error == -ENOENT)
-			goto no_entry;
-		if (error != 0)
-			goto out_unlock;
+	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+	if (error == -ENOENT)
+		goto no_entry;
+	if (error < 0) {
+		res = ERR_PTR(error);
+		goto out_unlock;
 	}
-	error = -EACCES;
+	res = ERR_PTR(-EACCES);
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
 	if (!inode)
 		goto out_unlock;
 no_entry:
-	error = 0;
-	d_add(dentry, inode);
+	res = d_add_unique(dentry, inode);
+	if (res != NULL)
+		dentry = res;
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unlock:
 	unlock_kernel();
 out:
-	BUG_ON(error > 0);
-	return ERR_PTR(error);
+	return res;
 }
 
 #ifdef CONFIG_NFS_V4
@@ -798,15 +781,15 @@
 
 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+	struct dentry *res = NULL;
 	struct inode *inode = NULL;
-	int error = 0;
 
 	/* Check that we are indeed trying to open this file */
 	if (!is_atomic_open(dir, nd))
 		goto no_open;
 
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
-		error = -ENAMETOOLONG;
+		res = ERR_PTR(-ENAMETOOLONG);
 		goto out;
 	}
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
@@ -828,7 +811,7 @@
 		inode = nfs4_atomic_open(dir, dentry, nd);
 	unlock_kernel();
 	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
+		int error = PTR_ERR(inode);
 		switch (error) {
 			/* Make a negative dentry */
 			case -ENOENT:
@@ -841,16 +824,18 @@
 			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
+				res = ERR_PTR(error);
 				goto out;
 		}
 	}
 no_entry:
-	d_add(dentry, inode);
+	res = d_add_unique(dentry, inode);
+	if (res != NULL)
+		dentry = res;
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out:
-	BUG_ON(error > 0);
-	return ERR_PTR(error);
+	return res;
 no_open:
 	return nfs_lookup(dir, dentry, nd);
 }
@@ -906,83 +891,51 @@
 }
 #endif /* CONFIG_NFSV4 */
 
-static inline
-int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry)
+static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 {
+	struct dentry *parent = desc->file->f_dentry;
+	struct inode *dir = parent->d_inode;
 	struct nfs_entry *entry = desc->entry;
-	int		 status;
-
-	while((status = dir_decode(desc)) == 0) {
-		if (entry->len != dentry->d_name.len)
-			continue;
-		if (memcmp(entry->name, dentry->d_name.name, entry->len))
-			continue;
-		if (!(entry->fattr->valid & NFS_ATTR_FATTR))
-			continue;
-		break;
-	}
-	return status;
-}
-
-/*
- * Use the cached Readdirplus results in order to avoid a LOOKUP call
- * whenever we believe that the parent directory has not changed.
- *
- * We assume that any file creation/rename changes the directory mtime.
- * As this results in a page cache invalidation whenever it occurs,
- * we don't require any other tests for cache coherency.
- */
-static
-int nfs_cached_lookup(struct inode *dir, struct dentry *dentry,
-			struct nfs_fh *fh, struct nfs_fattr *fattr)
-{
-	nfs_readdir_descriptor_t desc;
-	struct nfs_server *server;
-	struct nfs_entry entry;
-	struct page *page;
-	unsigned long timestamp;
-	int res;
-
-	if (!NFS_USE_READDIRPLUS(dir))
-		return -ENOENT;
-	server = NFS_SERVER(dir);
-	/* Don't use readdirplus unless the cache is stable */
-	if ((server->flags & NFS_MOUNT_NOAC) != 0
-			|| nfs_caches_unstable(dir)
-			|| nfs_attribute_timeout(dir))
-		return -ENOENT;
-	if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0)
-		return -ENOENT;
-	timestamp = NFS_I(dir)->readdir_timestamp;
-
-	entry.fh = fh;
-	entry.fattr = fattr;
-
-	desc.decode = NFS_PROTO(dir)->decode_dirent;
-	desc.entry = &entry;
-	desc.page_index = 0;
-	desc.plus = 1;
-
-	for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) {
-
-		res = -EIO;
-		if (PageUptodate(page)) {
-			void * kaddr = kmap_atomic(page, KM_USER0);
-			desc.ptr = kaddr;
-			res = find_dirent_name(&desc, page, dentry);
-			kunmap_atomic(kaddr, KM_USER0);
-		}
-		page_cache_release(page);
+	struct dentry *dentry, *alias;
+	struct qstr name = {
+		.name = entry->name,
+		.len = entry->len,
+	};
+	struct inode *inode;
 
-		if (res == 0)
-			goto out_found;
-		if (res != -EAGAIN)
+	switch (name.len) {
+		case 2:
+			if (name.name[0] == '.' && name.name[1] == '.')
+				return dget_parent(parent);
 			break;
+		case 1:
+			if (name.name[0] == '.')
+				return dget(parent);
+	}
+	name.hash = full_name_hash(name.name, name.len);
+	dentry = d_lookup(parent, &name);
+	if (dentry != NULL)
+		return dentry;
+	if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
+		return NULL;
+	/* Note: caller is already holding the dir->i_sem! */
+	dentry = d_alloc(parent, &name);
+	if (dentry == NULL)
+		return NULL;
+	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
 	}
-	return -ENOENT;
- out_found:
-	fattr->timestamp = timestamp;
-	return 0;
+	alias = d_add_unique(dentry, inode);
+	if (alias != NULL) {
+		dput(dentry);
+		dentry = alias;
+	}
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	return dentry;
 }
 
 /*
@@ -1045,15 +998,9 @@
 	if (nd && (nd->flags & LOOKUP_CREATE))
 		open_flags = nd->intent.open.flags;
 
-	/*
-	 * The 0 argument passed into the create function should one day
-	 * contain the O_EXCL flag if requested. This allows NFSv3 to
-	 * select the appropriate create strategy. Currently open_namei
-	 * does not pass the create flags.
-	 */
 	lock_kernel();
 	nfs_begin_data_update(dir);
-	inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags);
+	inode = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
 	nfs_end_data_update(dir);
 	if (!IS_ERR(inode)) {
 		d_instantiate(dentry, inode);
@@ -1508,7 +1455,7 @@
 
 	if (cache->cred != cred
 			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-			|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR))
+			|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
 		return -ENOENT;
 	memcpy(res, cache, sizeof(*res));
 	return 0;
@@ -1522,6 +1469,7 @@
 		if (cache->cred)
 			put_rpccred(cache->cred);
 		cache->cred = get_rpccred(set->cred);
+		NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
 	}
 	cache->jiffies = set->jiffies;
 	cache->mask = set->mask;
Index: linux-2.6.10/fs/nfs/unlink.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/unlink.c	2004-12-25 05:35:29.000000000 +0800
+++ linux-2.6.10/fs/nfs/unlink.c	2005-04-05 14:49:13.435686784 +0800
@@ -215,7 +215,6 @@
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
 	spin_unlock(&dentry->d_lock);
-	if (data->task.tk_rpcwait == &nfs_delete_queue)
-		rpc_wake_up_task(&data->task);
+	rpc_wake_up_task(&data->task);
 	nfs_put_unlinkdata(data);
 }
Index: linux-2.6.10/fs/nfs/write.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/write.c	2004-12-25 05:35:23.000000000 +0800
+++ linux-2.6.10/fs/nfs/write.c	2005-04-05 14:49:13.443685568 +0800
@@ -61,7 +61,6 @@
 #include <linux/nfs_page.h>
 #include <asm/uaccess.h>
 #include <linux/smp_lock.h>
-#include <linux/mempool.h>
 
 #include "delegation.h"
 
@@ -83,49 +82,17 @@
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 
 static kmem_cache_t *nfs_wdata_cachep;
-static mempool_t *nfs_wdata_mempool;
-static mempool_t *nfs_commit_mempool;
+mempool_t *nfs_wdata_mempool;
+mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
-{
-	struct nfs_write_data	*p;
-	p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-	}
-	return p;
-}
-
-static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
-{
-	mempool_free(p, nfs_wdata_mempool);
-}
-
-static void nfs_writedata_release(struct rpc_task *task)
+void nfs_writedata_release(struct rpc_task *task)
 {
 	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
 	nfs_writedata_free(wdata);
 }
 
-static __inline__ struct nfs_write_data *nfs_commit_alloc(void)
-{
-	struct nfs_write_data	*p;
-	p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-	}
-	return p;
-}
-
-static __inline__ void nfs_commit_free(struct nfs_write_data *p)
-{
-	mempool_free(p, nfs_commit_mempool);
-}
-
 /* Adjust the file length if we're writing beyond the end */
 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 {
@@ -184,11 +151,10 @@
 	int		result, written = 0;
 	struct nfs_write_data *wdata;
 
-	wdata = kmalloc(sizeof(*wdata), GFP_NOFS);
+	wdata = nfs_writedata_alloc();
 	if (!wdata)
 		return -ENOMEM;
 
-	memset(wdata, 0, sizeof(*wdata));
 	wdata->flags = how;
 	wdata->cred = ctx->cred;
 	wdata->inode = inode;
@@ -238,8 +204,7 @@
 
 io_error:
 	nfs_end_data_update_defer(inode);
-
-	kfree(wdata);
+	nfs_writedata_free(wdata);
 	return written ? written : result;
 }
 
@@ -1199,7 +1164,8 @@
 		}
 		if (time_before(complain, jiffies)) {
 			printk(KERN_WARNING
-			       "NFS: Server wrote less than requested.\n");
+			       "NFS: Server wrote zero bytes, expected %u.\n",
+					argp->count);
 			complain = jiffies + 300 * HZ;
 		}
 		/* Can't do anything about it except throw an error. */
Index: linux-2.6.10/fs/nfs/proc.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/proc.c	2004-12-25 05:35:28.000000000 +0800
+++ linux-2.6.10/fs/nfs/proc.c	2005-04-05 14:49:13.440686024 +0800
@@ -63,12 +63,12 @@
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	fattr->valid = 0;
 	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
-	dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
+	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
 	status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
-	dprintk("%s: reply statfs %d\n", __FUNCTION__, status);
+	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	info->rtmax  = NFS_MAXDATA;
@@ -96,7 +96,7 @@
 	fattr->valid = 0;
 	status = rpc_call(server->client, NFSPROC_GETATTR,
 				fhandle, fattr, 0);
-	dprintk("NFS reply getattr\n");
+	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
 
@@ -114,7 +114,7 @@
 	dprintk("NFS call  setattr\n");
 	fattr->valid = 0;
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
-	dprintk("NFS reply setattr\n");
+	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
 
@@ -213,15 +213,15 @@
 }
 
 static struct inode *
-nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		int flags)
 {
 	struct nfs_fh		fhandle;
 	struct nfs_fattr	fattr;
 	struct nfs_createargs	arg = {
 		.fh		= NFS_FH(dir),
-		.name		= name->name,
-		.len		= name->len,
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
 		.sattr		= sattr
 	};
 	struct nfs_diropok	res = {
@@ -231,7 +231,7 @@
 	int			status;
 
 	fattr.valid = 0;
-	dprintk("NFS call  create %s\n", name->name);
+	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
 	dprintk("NFS reply create: %d\n", status);
 	if (status == 0) {
@@ -620,6 +620,7 @@
 	.version	= 2,		       /* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.file_inode_ops	= &nfs_file_inode_operations,
 	.getroot	= nfs_proc_get_root,
 	.getattr	= nfs_proc_getattr,
 	.setattr	= nfs_proc_setattr,
Index: linux-2.6.10/fs/nfs/callback.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/callback.c	2004-12-25 05:34:57.000000000 +0800
+++ linux-2.6.10/fs/nfs/callback.c	2005-04-05 14:49:13.436686632 +0800
@@ -139,133 +139,10 @@
 	return ret;
 }
 
-/*
- * AUTH_NULL authentication
- */
-static int nfs_callback_null_accept(struct svc_rqst *rqstp, u32 *authp)
-{
-	struct kvec    *argv = &rqstp->rq_arg.head[0];
-	struct kvec    *resv = &rqstp->rq_res.head[0];
-
-	if (argv->iov_len < 3*4)
-		return SVC_GARBAGE;
-
-	if (svc_getu32(argv) != 0) {
-		dprintk("svc: bad null cred\n");
-		*authp = rpc_autherr_badcred;
-		return SVC_DENIED;
-	}
-	if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
-		dprintk("svc: bad null verf\n");
-		 *authp = rpc_autherr_badverf;
-		 return SVC_DENIED;
-	}
-
-	/* Signal that mapping to nobody uid/gid is required */
-	rqstp->rq_cred.cr_uid = (uid_t) -1;
-	rqstp->rq_cred.cr_gid = (gid_t) -1;
-	rqstp->rq_cred.cr_group_info = groups_alloc(0);
-	if (rqstp->rq_cred.cr_group_info == NULL)
-		return SVC_DROP; /* kmalloc failure - client must retry */
-
-	/* Put NULL verifier */
-	svc_putu32(resv, RPC_AUTH_NULL);
-	svc_putu32(resv, 0);
-	dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK);
-	return SVC_OK;
-}
-
-static int nfs_callback_null_release(struct svc_rqst *rqstp)
-{
-	if (rqstp->rq_cred.cr_group_info)
-		put_group_info(rqstp->rq_cred.cr_group_info);
-	rqstp->rq_cred.cr_group_info = NULL;
-	return 0; /* don't drop */
-}
-
-static struct auth_ops nfs_callback_auth_null = {
-	.name = "null",
-	.flavour = RPC_AUTH_NULL,
-	.accept = nfs_callback_null_accept,
-	.release = nfs_callback_null_release,
-};
-
-/*
- * AUTH_SYS authentication
- */
-static int nfs_callback_unix_accept(struct svc_rqst *rqstp, u32 *authp)
-{
-	struct kvec    *argv = &rqstp->rq_arg.head[0];
-	struct kvec    *resv = &rqstp->rq_res.head[0];
-	struct svc_cred *cred = &rqstp->rq_cred;
-	u32 slen, i;
-	int len = argv->iov_len;
-
-	dprintk("%s: start\n", __FUNCTION__);
-	cred->cr_group_info = NULL;
-	rqstp->rq_client = NULL;
-	if ((len -= 3*4) < 0)
-		return SVC_GARBAGE;
-
-	/* Get length, time stamp and machine name */
-	svc_getu32(argv);
-	svc_getu32(argv);
-	slen = XDR_QUADLEN(ntohl(svc_getu32(argv)));
-	if (slen > 64 || (len -= (slen + 3)*4) < 0)
-		goto badcred;
-	argv->iov_base = (void*)((u32*)argv->iov_base + slen);
-	argv->iov_len -= slen*4;
-
-	cred->cr_uid = ntohl(svc_getu32(argv));
-	cred->cr_gid = ntohl(svc_getu32(argv));
-	slen = ntohl(svc_getu32(argv));
-	if (slen > 16 || (len -= (slen + 2)*4) < 0)
-		goto badcred;
-	cred->cr_group_info = groups_alloc(slen);
-	if (cred->cr_group_info == NULL)
-		return SVC_DROP;
-	for (i = 0; i < slen; i++)
-		GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv));
-
-	if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
-		*authp = rpc_autherr_badverf;
-		return SVC_DENIED;
-	}
-	/* Put NULL verifier */
-	svc_putu32(resv, RPC_AUTH_NULL);
-	svc_putu32(resv, 0);
-	dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK);
-	return SVC_OK;
-badcred:
-	*authp = rpc_autherr_badcred;
-	return SVC_DENIED;
-}
-
-static int nfs_callback_unix_release(struct svc_rqst *rqstp)
-{
-	if (rqstp->rq_cred.cr_group_info)
-		put_group_info(rqstp->rq_cred.cr_group_info);
-	rqstp->rq_cred.cr_group_info = NULL;
-	return 0;
-}
-
-static struct auth_ops nfs_callback_auth_unix = {
-	.name = "unix",
-	.flavour = RPC_AUTH_UNIX,
-	.accept = nfs_callback_unix_accept,
-	.release = nfs_callback_unix_release,
-};
-
-/*
- * Hook the authentication protocol
- */
-static int nfs_callback_auth(struct svc_rqst *rqstp, u32 *authp)
+static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
 	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
 	struct nfs4_client *clp;
-	struct kvec *argv = &rqstp->rq_arg.head[0];
-	int flavour;
-	int retval;
 
 	/* Don't talk to strangers */
 	clp = nfs4_find_client(addr);
@@ -273,34 +150,19 @@
 		return SVC_DROP;
 	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
 	nfs4_put_client(clp);
-	flavour = ntohl(svc_getu32(argv));
-	switch(flavour) {
+	switch (rqstp->rq_authop->flavour) {
 		case RPC_AUTH_NULL:
-			if (rqstp->rq_proc != CB_NULL) {
-				*authp = rpc_autherr_tooweak;
-				retval = SVC_DENIED;
-				break;
-			}
-			rqstp->rq_authop = &nfs_callback_auth_null;
-			retval = nfs_callback_null_accept(rqstp, authp);
+			if (rqstp->rq_proc != CB_NULL)
+				return SVC_DENIED;
 			break;
 		case RPC_AUTH_UNIX:
-			/* Eat the authentication flavour */
-			rqstp->rq_authop = &nfs_callback_auth_unix;
-			retval = nfs_callback_unix_accept(rqstp, authp);
 			break;
+		case RPC_AUTH_GSS:
+			/* FIXME: RPCSEC_GSS handling? */
 		default:
-			/* FIXME: need to add RPCSEC_GSS upcalls */
-#if 0
-			svc_ungetu32(argv);
-			retval = svc_authenticate(rqstp, authp);
-#else
-			*authp = rpc_autherr_rejectedcred;
-			retval = SVC_DENIED;
-#endif
+			return SVC_DENIED;
 	}
-	dprintk("%s: flavour %d returning error %d\n", __FUNCTION__, flavour, retval);
-	return retval;
+	return SVC_OK;
 }
 
 /*
@@ -321,5 +183,5 @@
 	.pg_name = "NFSv4 callback",			/* service name */
 	.pg_class = "nfs",				/* authentication class */
 	.pg_stats = &nfs4_callback_stats,
-	.pg_authenticate = nfs_callback_auth,
+	.pg_authenticate = nfs_callback_authenticate,
 };
Index: linux-2.6.10/fs/nfs/file.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/file.c	2004-12-25 05:35:01.000000000 +0800
+++ linux-2.6.10/fs/nfs/file.c	2005-04-05 14:49:13.453684048 +0800
@@ -67,6 +67,19 @@
 	.setattr	= nfs_setattr,
 };
 
+#ifdef CONFIG_NFS_V4
+
+struct inode_operations nfs4_file_inode_operations = {
+	.permission	= nfs_permission,
+	.getattr	= nfs_getattr,
+	.setattr	= nfs_setattr,
+	.getxattr	= nfs4_getxattr,
+	.setxattr	= nfs4_setxattr,
+	.listxattr	= nfs4_listxattr,
+};
+
+#endif /* CONFIG_NFS_V4 */
+
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
@@ -295,10 +308,19 @@
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
-	int status;
+	int status = 0;
 
 	lock_kernel();
-	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	else {
+		struct file_lock *cfl = posix_test_lock(filp, fl);
+		if (cfl != NULL) {
+			memcpy(fl, cfl, sizeof(*fl));
+			fl->fl_type = F_UNLCK;
+		}
+	}
 	unlock_kernel();
 	return status;
 }
@@ -325,7 +347,11 @@
 	 * 	still need to complete the unlock.
 	 */
 	lock_kernel();
-	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	else
+		status = posix_lock_file_wait(filp, fl);
 	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
 	return status;
 }
@@ -351,15 +377,19 @@
 		return status;
 
 	lock_kernel();
-	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
-	/* If we were signalled we still need to ensure that
-	 * we clean up any state on the server. We therefore
-	 * record the lock call as having succeeded in order to
-	 * ensure that locks_remove_posix() cleans it out when
-	 * the process exits.
-	 */
-	if (status == -EINTR || status == -ERESTARTSYS)
-		posix_lock_file(filp, fl);
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
+		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+		/* If we were signalled we still need to ensure that
+		 * we clean up any state on the server. We therefore
+		 * record the lock call as having succeeded in order to
+		 * ensure that locks_remove_posix() cleans it out when
+		 * the process exits.
+		 */
+		if (status == -EINTR || status == -ERESTARTSYS)
+			posix_lock_file(filp, fl);
+	} else
+		status = posix_lock_file_wait(filp, fl);
 	unlock_kernel();
 	if (status < 0)
 		return status;
@@ -396,15 +426,6 @@
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;
 
-	if (NFS_PROTO(inode)->version != 4) {
-		/* Fake OK code if mounted without NLM support */
-		if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) {
-			if (IS_GETLK(cmd))
-				return LOCK_USE_CLNT;
-			return 0;
-		}
-	}
-
 	/*
 	 * No BSD flocks over NFS allowed.
 	 * Note: we could try to fake a POSIX lock request here by
Index: linux-2.6.10/fs/nfs/nfs3proc.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/nfs3proc.c	2004-12-25 05:34:45.000000000 +0800
+++ linux-2.6.10/fs/nfs/nfs3proc.c	2005-04-05 14:49:13.441685872 +0800
@@ -80,10 +80,10 @@
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
 	info->fattr->valid = 0;
 	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
-	dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status);
+	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
 		status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
-		dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
+		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	}
 	return status;
 }
@@ -101,7 +101,7 @@
 	fattr->valid = 0;
 	status = rpc_call(server->client, NFS3PROC_GETATTR,
 			  fhandle, fattr, 0);
-	dprintk("NFS reply getattr\n");
+	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
 
@@ -119,7 +119,7 @@
 	dprintk("NFS call  setattr\n");
 	fattr->valid = 0;
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
-	dprintk("NFS reply setattr\n");
+	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
 
@@ -198,7 +198,7 @@
 		if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
 			entry->mask |= MAY_EXEC;
 	}
-	dprintk("NFS reply access, status = %d\n", status);
+	dprintk("NFS reply access: %d\n", status);
 	return status;
 }
 
@@ -296,7 +296,7 @@
  * For now, we don't implement O_EXCL.
  */
 static struct inode *
-nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		 int flags)
 {
 	struct nfs_fh		fhandle;
@@ -304,8 +304,8 @@
 	struct nfs_fattr	dir_attr;
 	struct nfs3_createargs	arg = {
 		.fh		= NFS_FH(dir),
-		.name		= name->name,
-		.len		= name->len,
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
 		.sattr		= sattr,
 	};
 	struct nfs3_diropres	res = {
@@ -315,7 +315,7 @@
 	};
 	int			status;
 
-	dprintk("NFS call  create %s\n", name->name);
+	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	arg.createmode = NFS3_CREATE_UNCHECKED;
 	if (flags & O_EXCL) {
 		arg.createmode  = NFS3_CREATE_EXCLUSIVE;
@@ -353,7 +353,7 @@
 	if (status != 0)
 		goto out;
 	if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) {
-		status = nfs3_proc_lookup(dir, name, &fhandle, &fattr);
+		status = nfs3_proc_lookup(dir, &dentry->d_name, &fhandle, &fattr);
 		if (status != 0)
 			goto out;
 	}
@@ -838,6 +838,7 @@
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
+	.file_inode_ops	= &nfs_file_inode_operations,
 	.getroot	= nfs3_proc_get_root,
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
Index: linux-2.6.10/fs/nfs/nfs4proc.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/nfs4proc.c	2004-12-25 05:35:23.000000000 +0800
+++ linux-2.6.10/fs/nfs/nfs4proc.c	2005-04-05 14:49:13.456683592 +0800
@@ -477,7 +477,7 @@
 /*
  * Returns an nfs4_state + an referenced inode
  */
-static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
@@ -491,7 +491,7 @@
 	struct nfs_openargs o_arg = {
 		.fh             = NFS_FH(dir),
 		.open_flags	= flags,
-		.name           = name,
+		.name           = &dentry->d_name,
 		.server         = server,
 		.bitmask = server->attr_bitmask,
 		.claim = NFS4_OPEN_CLAIM_NULL,
@@ -581,14 +581,14 @@
 }
 
 
-struct nfs4_state *nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred)
+struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;
 
 	do {
-		status = _nfs4_do_open(dir, name, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -635,6 +635,8 @@
 
         fattr->valid = 0;
 
+	if (state != NULL)
+		msg.rpc_cred = state->owner->so_cred;
 	if (sattr->ia_valid & ATTR_SIZE)
 		nfs4_copy_stateid(&arg.stateid, state, NULL);
 	else
@@ -658,6 +660,61 @@
 	return err;
 }
 
+struct nfs4_closedata {
+	struct inode *inode;
+	struct nfs4_state *state;
+	struct nfs_closeargs arg;
+	struct nfs_closeres res;
+};
+
+static void nfs4_close_done(struct rpc_task *task)
+{
+	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_state *state = calldata->state;
+	struct nfs4_state_owner *sp = state->owner;
+	struct nfs_server *server = NFS_SERVER(calldata->inode);
+
+        /* hmm. we are done with the inode, and in the process of freeing
+	 * the state_owner. we keep this around to process errors
+	 */
+	nfs4_increment_seqid(task->tk_status, sp);
+	switch (task->tk_status) {
+		case 0:
+			state->state = calldata->arg.open_flags;
+			memcpy(&state->stateid, &calldata->res.stateid,
+					sizeof(state->stateid));
+			break;
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			state->state = calldata->arg.open_flags;
+			nfs4_schedule_state_recovery(server->nfs4_state);
+			break;
+		default:
+			if (nfs4_async_handle_error(task, server) == -EAGAIN) {
+				rpc_restart_call(task);
+				return;
+			}
+	}
+	nfs4_put_open_state(state);
+	up(&sp->so_sema);
+	nfs4_put_state_owner(sp);
+	up_read(&server->nfs4_state->cl_sem);
+	kfree(calldata);
+}
+
+static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
+		.rpc_argp = &calldata->arg,
+		.rpc_resp = &calldata->res,
+		.rpc_cred = calldata->state->owner->so_cred,
+	};
+	if (calldata->arg.open_flags != 0)
+		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+	return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
+}
+
 /* 
  * It is possible for data to be read/written from a mem-mapped file 
  * after the sys_close call (which hits the vfs layer as a flush).
@@ -669,102 +726,34 @@
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
+int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
 {
-	struct nfs4_state_owner *sp = state->owner;
-	int status = 0;
-	struct nfs_closeargs arg = {
-		.fh		= NFS_FH(inode),
-	};
-	struct nfs_closeres res;
-	struct rpc_message msg = {
-		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
-		.rpc_argp	= &arg,
-		.rpc_resp	= &res,
-	};
+	struct nfs4_closedata *calldata;
+	int status;
 
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+	/* Tell caller we're done */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+		state->state = mode;
 		return 0;
-	memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
+	}
+	calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
+	if (calldata == NULL)
+		return -ENOMEM;
+	calldata->inode = inode;
+	calldata->state = state;
+	calldata->arg.fh = NFS_FH(inode);
 	/* Serialization for the sequence id */
-	arg.seqid = sp->so_seqid,
-	status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
-
-        /* hmm. we are done with the inode, and in the process of freeing
-	 * the state_owner. we keep this around to process errors
+	calldata->arg.seqid = state->owner->so_seqid;
+	calldata->arg.open_flags = mode;
+	memcpy(&calldata->arg.stateid, &state->stateid,
+			sizeof(calldata->arg.stateid));
+	status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
+	/*
+	 * Return -EINPROGRESS on success in order to indicate to the
+	 * caller that an asynchronous RPC call has been launched, and
+	 * that it will release the semaphores on completion.
 	 */
-	nfs4_increment_seqid(status, sp);
-	if (!status)
-		memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
-
-	return status;
-}
-
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
-{
-	struct nfs_server *server = NFS_SERVER(state->inode);
-	struct nfs4_exception exception = { };
-	int err;
-	do {
-		err = _nfs4_do_close(inode, state);
-		switch (err) {
-			case -NFS4ERR_STALE_STATEID:
-			case -NFS4ERR_EXPIRED:
-				nfs4_schedule_state_recovery(server->nfs4_state);
-				err = 0;
-			default:
-				state->state = 0;
-		}
-		err = nfs4_handle_exception(server, err, &exception);
-	} while (exception.retry);
-	return err;
-}
-
-static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) 
-{
-	struct nfs4_state_owner *sp = state->owner;
-	int status = 0;
-	struct nfs_closeargs arg = {
-		.fh		= NFS_FH(inode),
-		.seqid		= sp->so_seqid,
-		.open_flags	= mode,
-	};
-	struct nfs_closeres res;
-	struct rpc_message msg = {
-		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE],
-		.rpc_argp	= &arg,
-		.rpc_resp	= &res,
-	};
-
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-		return 0;
-	memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
-	status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
-	if (!status)
-		memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
-
-	return status;
-}
-
-int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) 
-{
-	struct nfs_server *server = NFS_SERVER(state->inode);
-	struct nfs4_exception exception = { };
-	int err;
-	do {
-		err = _nfs4_do_downgrade(inode, state, mode);
-		switch (err) {
-			case -NFS4ERR_STALE_STATEID:
-			case -NFS4ERR_EXPIRED:
-				nfs4_schedule_state_recovery(server->nfs4_state);
-				err = 0;
-			default:
-				state->state = mode;
-		}
-		err = nfs4_handle_exception(server, err, &exception);
-	} while (exception.retry);
-	return err;
+	return (status == 0) ? -EINPROGRESS : status;
 }
 
 struct inode *
@@ -785,7 +774,7 @@
 	}
 
 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
-	state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
+	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state))
 		return (struct inode *)state;
@@ -802,7 +791,7 @@
 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
 	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
 	if (IS_ERR(state))
-		state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred);
+		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
 	put_rpccred(cred);
 	if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
 		return 1;
@@ -1026,7 +1015,7 @@
 					FMODE_WRITE, cred);
 			if (IS_ERR(state))
 				state = nfs4_do_open(dentry->d_parent->d_inode,
-						&dentry->d_name, FMODE_WRITE,
+						dentry, FMODE_WRITE,
 						NULL, cred);
 			need_iput = 1;
 		}
@@ -1327,7 +1316,7 @@
  */
 
 static struct inode *
-nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                  int flags)
 {
 	struct inode *inode;
@@ -1335,7 +1324,7 @@
 	struct rpc_cred *cred;
 
 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
-	state = nfs4_do_open(dir, name, flags, sattr, cred);
+	state = nfs4_do_open(dir, dentry, flags, sattr, cred);
 	put_rpccred(cred);
 	if (!IS_ERR(state)) {
 		inode = state->inode;
@@ -2049,6 +2038,86 @@
 }
 
 static int
+nfs4_server_supports_acls(struct nfs_server *server)
+{
+	return (server->caps & NFS_CAP_ACLS)
+		&& (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
+		&& (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
+}
+
+/* XXX: assuming XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE,
+ * and that it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE)
+ * bytes on the stack.  (Currently probably both true.)
+ */
+#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
+
+static void buf_to_pages(const void *buf, ssize_t buflen,
+		struct page **pages, unsigned int *pgbase)
+{
+	const void *p = buf;
+
+	*pgbase = offset_in_page(buf);
+	p -= *pgbase;
+	while (p < buf + buflen) {
+		*(pages++) = virt_to_page(p);
+		p += PAGE_CACHE_SIZE;
+	}
+}
+
+ssize_t
+nfs4_proc_get_acl(struct inode *inode, void *buf, ssize_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct page *pages[NFS4ACL_MAXPAGES];
+	struct nfs_getaclargs args = {
+		.fh = NFS_FH(inode),
+		.acl_pages = pages,
+		.acl_len = buflen,
+	};
+	ssize_t acl_len = buflen;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
+		.rpc_argp = &args,
+		.rpc_resp = &acl_len,
+	};
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+	if (buflen && acl_len > buflen)
+		return -ERANGE;
+	if (ret == 0)
+		ret = acl_len;
+	return ret;
+}
+
+int
+nfs4_proc_set_acl(struct inode *inode, const void *buf, ssize_t buflen)
+{
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct page *pages[NFS4ACL_MAXPAGES];
+	struct nfs_setaclargs arg = {
+		.fh		= NFS_FH(inode),
+		.acl_pages	= pages,
+		.acl_len	= buflen,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETACL],
+		.rpc_argp	= &arg,
+		.rpc_resp	= NULL,
+	};
+	int ret;
+
+	if (!nfs4_server_supports_acls(server))
+		return -EOPNOTSUPP;
+	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	return ret;
+}
+
+static int
 nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
 {
 	struct nfs4_client *clp = server->nfs4_state;
@@ -2589,6 +2658,7 @@
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+	.file_inode_ops	= &nfs4_file_inode_operations,
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
Index: linux-2.6.10/fs/nfs/direct.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/direct.c	2005-03-31 15:35:23.000000000 +0800
+++ linux-2.6.10/fs/nfs/direct.c	2005-04-05 14:49:13.448684808 +0800
@@ -33,6 +33,7 @@
  * 08 Jul 2002	Version for 2.4.19, with bug fixes --trondmy
  * 08 Jun 2003	Port to 2.5 APIs  --cel
  * 31 Mar 2004	Handle direct I/O without VFS support  --cel
+ * 15 Sep 2004	Parallel async reads  --cel
  *
  */
 
@@ -43,6 +44,7 @@
 #include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/pagemap.h>
+#include <linux/kref.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
@@ -50,11 +52,27 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/atomic.h>
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
-#define VERF_SIZE		(2 * sizeof(__u32))
 #define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
+static kmem_cache_t *nfs_direct_cachep;
+
+/*
+ * This represents a set of asynchronous requests that we're waiting on
+ */
+struct nfs_direct_req {
+	struct kref		kref;		/* release manager */
+	struct list_head	list;		/* nfs_read_data structs */
+	wait_queue_head_t	wait;		/* wait for i/o completion */
+	struct page **		pages;		/* pages in our buffer */
+	unsigned int		npages;		/* count of pages */
+	atomic_t		complete,	/* i/os we're waiting for */
+				count,		/* bytes actually processed */
+				error;		/* any reported error */
+};
+
 
 /**
  * nfs_get_user_pages - find and set up pages underlying user's buffer
@@ -71,7 +89,8 @@
 	unsigned long page_count;
 	size_t array_size;
 
-	/* set an arbitrary limit to prevent arithmetic overflow */
+ 	/* set an arbitrary limit to prevent type overflow */
+ 	/* XXX: this can probably be as large as INT_MAX */
 	if (size > MAX_DIRECTIO_SIZE) {
 		*pages = NULL;
 		return -EFBIG;
@@ -95,6 +114,8 @@
 /**
  * nfs_free_user_pages - tear down page struct array
  * @pages: array of page struct pointers underlying target buffer
+ * @npages: number of pages in the array
+ * @do_dirty: dirty the pages as we release them
  */
 static void
 nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
@@ -109,77 +130,231 @@
 }
 
 /**
- * nfs_direct_read_seg - Read in one iov segment.  Generate separate
- *                        read RPCs for each "rsize" bytes.
+ * nfs_direct_req_release - release  nfs_direct_req structure for direct read
+ * @kref: kref object embedded in an nfs_direct_req structure
+ *
+ */
+static void nfs_direct_req_release(struct kref *kref)
+{
+	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+	kmem_cache_free(nfs_direct_cachep, dreq);
+}
+
+/**
+ * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
+ * @count: count of bytes for the read request
+ * @rsize: local rsize setting
+ *
+ * Note we also set the number of requests we have in the dreq when we are
+ * done.  This prevents races with I/O completion so we will always wait
+ * until all requests have been dispatched and completed.
+ */
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+{
+	struct list_head *list;
+	struct nfs_direct_req *dreq;
+	unsigned int reads = 0;
+
+	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	if (!dreq)
+		return NULL;
+
+	kref_init(&dreq->kref);
+	init_waitqueue_head(&dreq->wait);
+	INIT_LIST_HEAD(&dreq->list);
+	atomic_set(&dreq->count, 0);
+	atomic_set(&dreq->error, 0);
+
+	list = &dreq->list;
+	for(;;) {
+		struct nfs_read_data *data = nfs_readdata_alloc();
+
+		if (unlikely(!data)) {
+			while (!list_empty(list)) {
+				data = list_entry(list->next,
+						  struct nfs_read_data, pages);
+				list_del(&data->pages);
+				nfs_readdata_free(data);
+			}
+			kref_put(&dreq->kref, nfs_direct_req_release);
+			return NULL;
+		}
+
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, list);
+
+		data->req = (struct nfs_page *) dreq;
+		reads++;
+		if (nbytes <= rsize)
+			break;
+		nbytes -= rsize;
+	}
+	kref_get(&dreq->kref);
+	atomic_set(&dreq->complete, reads);
+	return dreq;
+}
+
+/**
+ * nfs_direct_read_result - handle a read reply for a direct read request
+ * @data: address of NFS READ operation control block
+ * @status: status of this NFS READ operation
+ *
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ */
+static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+{
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	if (likely(status >= 0))
+		atomic_add(data->res.count, &dreq->count);
+	else
+		atomic_set(&dreq->error, status);
+
+	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
+		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+		wake_up(&dreq->wait);
+		kref_put(&dreq->kref, nfs_direct_req_release);
+	}
+}
+
+/**
+ * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
+ * @dreq: address of nfs_direct_req struct for this request
  * @inode: target inode
  * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ *
+ * For each nfs_read_data struct that was allocated on the list, dispatch
+ * an NFS READ operation
  */
-static int
-nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
-		unsigned long user_addr, size_t count, loff_t file_offset,
-		struct page **pages, int nr_pages)
-{
-	const unsigned int rsize = NFS_SERVER(inode)->rsize;
-	int tot_bytes = 0;
-	int curpage = 0;
-	struct nfs_read_data	rdata = {
-		.inode		= inode,
-		.cred		= ctx->cred,
-		.args		= {
-			.fh		= NFS_FH(inode),
-			.context	= ctx,
-		},
-		.res		= {
-			.fattr		= &rdata.fattr,
-		},
-	};
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
+		struct inode *inode, struct nfs_open_context *ctx,
+		unsigned long user_addr, size_t count, loff_t file_offset)
+{
+	struct list_head *list = &dreq->list;
+	struct page **pages = dreq->pages;
+	unsigned int curpage, pgbase;
+	unsigned int rsize = NFS_SERVER(inode)->rsize;
 
-	rdata.args.pgbase = user_addr & ~PAGE_MASK;
-	rdata.args.offset = file_offset;
-        do {
-		int result;
-
-		rdata.args.count = count;
-                if (rdata.args.count > rsize)
-                        rdata.args.count = rsize;
-		rdata.args.pages = &pages[curpage];
-
-		dprintk("NFS: direct read: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-			rdata.args.count, (long long) rdata.args.offset,
-			user_addr + tot_bytes, rdata.args.pgbase, curpage);
+	curpage = 0;
+	pgbase = user_addr & ~PAGE_MASK;
+	do {
+		struct nfs_read_data *data;
+		unsigned int bytes;
+
+		bytes = rsize;
+		if (count < rsize)
+			bytes = count;
+
+		data = list_entry(list->next, struct nfs_read_data, pages);
+		list_del_init(&data->pages);
+
+		data->inode = inode;
+		data->cred = ctx->cred;
+		data->args.fh = NFS_FH(inode);
+		data->args.context = ctx;
+		data->args.offset = file_offset;
+		data->args.pgbase = pgbase;
+		data->args.pages = &pages[curpage];
+		data->args.count = bytes;
+		data->res.fattr = &data->fattr;
+		data->res.eof = 0;
+		data->res.count = bytes;
+
+		NFS_PROTO(inode)->read_setup(data);
+
+		data->task.tk_cookie = (unsigned long) inode;
+		data->task.tk_calldata = data;
+		data->task.tk_release = nfs_readdata_release;
+		data->complete = nfs_direct_read_result;
 
 		lock_kernel();
-		result = NFS_PROTO(inode)->read(&rdata);
+		rpc_execute(&data->task);
 		unlock_kernel();
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			if (result == -EISDIR)
-				result = -EINVAL;
-			return result;
-		}
+		dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				bytes,
+				(unsigned long long)data->args.offset);
+
+		file_offset += bytes;
+		pgbase += bytes;
+		curpage += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;
 
-                tot_bytes += result;
-		if (rdata.res.eof)
-			break;
-
-                rdata.args.offset += result;
-		rdata.args.pgbase += result;
-		curpage += rdata.args.pgbase >> PAGE_SHIFT;
-		rdata.args.pgbase &= ~PAGE_MASK;
-		count -= result;
+		count -= bytes;
 	} while (count != 0);
+}
 
-	/* XXX: should we zero the rest of the user's buffer if we
-	 *      hit eof? */
+/**
+ * nfs_direct_read_wait - wait for I/O completion for direct reads
+ * @dreq: request on which we are to wait
+ * @intr: whether or not this wait can be interrupted
+ *
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
+{
+	int result = 0;
 
-	return tot_bytes;
+	if (intr) {
+		result = wait_event_interruptible(dreq->wait,
+					(atomic_read(&dreq->complete) == 0));
+	} else {
+		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
+	}
+
+	if (!result)
+		result = atomic_read(&dreq->error);
+	if (!result)
+		result = atomic_read(&dreq->count);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
+/**
+ * nfs_direct_read_seg - Read in one iov segment.  Generate separate
+ *                        read RPCs for each "rsize" bytes.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ * @pages: array of addresses of page structs defining user's buffer
+ * @nr_pages: number of pages in the array
+ *
+ */
+static ssize_t nfs_direct_read_seg(struct inode *inode,
+		struct nfs_open_context *ctx, unsigned long user_addr,
+		size_t count, loff_t file_offset, struct page **pages,
+		unsigned int nr_pages)
+{
+	ssize_t result;
+	sigset_t oldset;
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_direct_req *dreq;
+
+	dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
+	if (!dreq)
+		return -ENOMEM;
+
+	dreq->pages = pages;
+	dreq->npages = nr_pages;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
+				 file_offset);
+	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+	rpc_clnt_sigunmask(clnt, &oldset);
+
+	return result;
 }
 
 /**
@@ -191,9 +366,8 @@
  * file_offset: offset in file to begin the operation
  * nr_segs: size of iovec array
  *
- * generic_file_direct_IO has already pushed out any non-direct
- * writes so that this read will see them when we read from the
- * server.
+ * We've already pushed out any non-direct writes so that this read
+ * will see them when we read from the server.
  */
 static ssize_t
 nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
@@ -222,8 +396,6 @@
 		result = nfs_direct_read_seg(inode, ctx, user_addr, size,
 				file_offset, pages, page_count);
 
-		nfs_free_user_pages(pages, page_count, 1);
-
 		if (result <= 0) {
 			if (tot_bytes > 0)
 				break;
@@ -249,31 +421,31 @@
  * @pages: array of addresses of page structs defining user's buffer
  * nr_pages: size of pages array
  */
-static int
-nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
-		unsigned long user_addr, size_t count, loff_t file_offset,
-		struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write_seg(struct inode *inode,
+		struct nfs_open_context *ctx, unsigned long user_addr,
+		size_t count, loff_t file_offset, struct page **pages,
+		int nr_pages)
 {
 	const unsigned int wsize = NFS_SERVER(inode)->wsize;
 	size_t request;
-	int curpage, need_commit, result, tot_bytes;
+	int curpage, need_commit;
+	ssize_t result, tot_bytes;
 	struct nfs_writeverf first_verf;
-	struct nfs_write_data	wdata = {
-		.inode		= inode,
-		.cred		= ctx->cred,
-		.args		= {
-			.fh		= NFS_FH(inode),
-			.context	= ctx,
-		},
-		.res		= {
-			.fattr		= &wdata.fattr,
-			.verf		= &wdata.verf,
-		},
-	};
+	struct nfs_write_data *wdata;
 
-	wdata.args.stable = NFS_UNSTABLE;
+	wdata = nfs_writedata_alloc();
+	if (!wdata)
+		return -ENOMEM;
+
+	wdata->inode = inode;
+	wdata->cred = ctx->cred;
+	wdata->args.fh = NFS_FH(inode);
+	wdata->args.context = ctx;
+	wdata->args.stable = NFS_UNSTABLE;
 	if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
-		wdata.args.stable = NFS_FILE_SYNC;
+		wdata->args.stable = NFS_FILE_SYNC;
+	wdata->res.fattr = &wdata->fattr;
+	wdata->res.verf = &wdata->verf;
 
 	nfs_begin_data_update(inode);
 retry:
@@ -281,20 +453,20 @@
 	tot_bytes = 0;
 	curpage = 0;
 	request = count;
-	wdata.args.pgbase = user_addr & ~PAGE_MASK;
-	wdata.args.offset = file_offset;
-        do {
-		wdata.args.count = request;
-                if (wdata.args.count > wsize)
-                        wdata.args.count = wsize;
-		wdata.args.pages = &pages[curpage];
+	wdata->args.pgbase = user_addr & ~PAGE_MASK;
+	wdata->args.offset = file_offset;
+	do {
+		wdata->args.count = request;
+		if (wdata->args.count > wsize)
+			wdata->args.count = wsize;
+		wdata->args.pages = &pages[curpage];
 
 		dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-			wdata.args.count, (long long) wdata.args.offset,
-			user_addr + tot_bytes, wdata.args.pgbase, curpage);
+			wdata->args.count, (long long) wdata->args.offset,
+			user_addr + tot_bytes, wdata->args.pgbase, curpage);
 
 		lock_kernel();
-		result = NFS_PROTO(inode)->write(&wdata);
+		result = NFS_PROTO(inode)->write(wdata);
 		unlock_kernel();
 
 		if (result <= 0) {
@@ -304,20 +476,25 @@
 		}
 
 		if (tot_bytes == 0)
-			memcpy(&first_verf.verifier, &wdata.verf.verifier,
-								VERF_SIZE);
-		if (wdata.verf.committed != NFS_FILE_SYNC) {
+			memcpy(&first_verf.verifier, &wdata->verf.verifier,
+						sizeof(first_verf.verifier));
+		if (wdata->verf.committed != NFS_FILE_SYNC) {
 			need_commit = 1;
-			if (memcmp(&first_verf.verifier,
-					&wdata.verf.verifier, VERF_SIZE))
+			if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
+					sizeof(first_verf.verifier)));
 				goto sync_retry;
 		}
 
-                tot_bytes += result;
-                wdata.args.offset += result;
-		wdata.args.pgbase += result;
-		curpage += wdata.args.pgbase >> PAGE_SHIFT;
-		wdata.args.pgbase &= ~PAGE_MASK;
+		tot_bytes += result;
+
+		/* in case of a short write: stop now, let the app recover */
+		if (result < wdata->args.count)
+			break;
+
+		wdata->args.offset += result;
+		wdata->args.pgbase += result;
+		curpage += wdata->args.pgbase >> PAGE_SHIFT;
+		wdata->args.pgbase &= ~PAGE_MASK;
 		request -= result;
 	} while (request != 0);
 
@@ -325,27 +502,27 @@
 	 * Commit data written so far, even in the event of an error
 	 */
 	if (need_commit) {
-		wdata.args.count = tot_bytes;
-		wdata.args.offset = file_offset;
+		wdata->args.count = tot_bytes;
+		wdata->args.offset = file_offset;
 
 		lock_kernel();
-		result = NFS_PROTO(inode)->commit(&wdata);
+		result = NFS_PROTO(inode)->commit(wdata);
 		unlock_kernel();
 
 		if (result < 0 || memcmp(&first_verf.verifier,
-						&wdata.verf.verifier,
-						VERF_SIZE) != 0)
+					 &wdata->verf.verifier,
+					 sizeof(first_verf.verifier)) != 0)
 			goto sync_retry;
 	}
 	result = tot_bytes;
 
 out:
 	nfs_end_data_update_defer(inode);
-
+	nfs_writedata_free(wdata);
 	return result;
 
 sync_retry:
-	wdata.args.stable = NFS_FILE_SYNC;
+	wdata->args.stable = NFS_FILE_SYNC;
 	goto retry;
 }
 
@@ -362,9 +539,9 @@
  * that non-direct readers might access, so they will pick up these
  * writes immediately.
  */
-static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx,
-		const struct iovec *iov, loff_t file_offset,
-		unsigned long nr_segs)
+static ssize_t nfs_direct_write(struct inode *inode,
+		struct nfs_open_context *ctx, const struct iovec *iov,
+		loff_t file_offset, unsigned long nr_segs)
 {
 	ssize_t tot_bytes = 0;
 	unsigned long seg = 0;
@@ -504,6 +681,8 @@
 	if (mapping->nrpages) {
 		retval = filemap_fdatawrite(mapping);
 		if (retval == 0)
+			retval = nfs_wb_all(inode);
+		if (retval == 0)
 			retval = filemap_fdatawait(mapping);
 		if (retval)
 			goto out;
@@ -593,6 +772,8 @@
 	if (mapping->nrpages) {
 		retval = filemap_fdatawrite(mapping);
 		if (retval == 0)
+			retval = nfs_wb_all(inode);
+		if (retval == 0)
 			retval = filemap_fdatawait(mapping);
 		if (retval)
 			goto out;
@@ -607,3 +788,21 @@
 out:
 	return retval;
 }
+
+int nfs_init_directcache(void)
+{
+	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
+						sizeof(struct nfs_direct_req),
+						0, SLAB_RECLAIM_ACCOUNT,
+						NULL, NULL);
+	if (nfs_direct_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_directcache(void)
+{
+	if (kmem_cache_destroy(nfs_direct_cachep))
+		printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
+}
Index: linux-2.6.10/fs/nfs/read.c
===================================================================
--- linux-2.6.10.orig/fs/nfs/read.c	2004-12-25 05:33:47.000000000 +0800
+++ linux-2.6.10/fs/nfs/read.c	2005-04-05 14:49:13.437686480 +0800
@@ -24,7 +24,6 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/mempool.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
@@ -39,25 +38,11 @@
 static void nfs_readpage_result_full(struct nfs_read_data *, int);
 
 static kmem_cache_t *nfs_rdata_cachep;
-static mempool_t *nfs_rdata_mempool;
+mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
-static struct nfs_read_data *nfs_readdata_alloc(void)
-{
-	struct nfs_read_data   *p;
-	p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
-	if (p)
-		memset(p, 0, sizeof(*p));
-	return p;
-}
-
-static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
-{
-	mempool_free(p, nfs_rdata_mempool);
-}
-
-static void nfs_readdata_release(struct rpc_task *task)
+void nfs_readdata_release(struct rpc_task *task)
 {
         struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
         nfs_readdata_free(data);