Whamcloud - gitweb
Brach HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / linux-2.6.10-CITI_NFS4_ALL-1.patch
1
2
3 The complete set of citi nfsv4 patches combined into one patch.
4
5 Changes since 2.6.10-rc3-CITI_NFS4_ALL-3
6         * minor adjustments to xdr buffer length calculations in fs/nfs4xdr.c
7         * client acl revisions: pass acls in page array of xdr bufs, removing
8           arbitrary length restrictions.  Temporarily disable acl caching.
9
10 Index: linux-2.6.10/include/linux/nfsd/state.h
11 ===================================================================
12 --- linux-2.6.10.orig/include/linux/nfsd/state.h        2004-12-25 05:33:50.000000000 +0800
13 +++ linux-2.6.10/include/linux/nfsd/state.h     2005-04-05 14:49:13.465682224 +0800
14 @@ -67,6 +67,45 @@
15  #define ZERO_STATEID(stateid)       (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
16  #define ONE_STATEID(stateid)        (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
17  
18 +/* Delegation recall states */
19 +#define NFS4_NO_RECALL                 0x000
20 +#define NFS4_RECALL_IN_PROGRESS                0x001
21 +#define NFS4_RECALL_COMPLETE           0x002
22 +
23 +
24 +/* Delegation flags */
25 +#define NFS4_DELAY_CLOSE               0x001
26 +
27 +struct nfs4_cb_recall {
28 +       u32                     cbr_ident;
29 +       int                     cbr_trunc;
30 +       stateid_t               cbr_stateid;
31 +       u32                     cbr_fhlen;
32 +       u32                     cbr_fhval[NFS4_FHSIZE];
33 +       struct nfs4_delegation  *cbr_dp;
34 +};
35 +
36 +struct nfs4_delegation {
37 +       struct list_head        dl_del_perfile; /* nfs4_file->fi_del_perfile */
38 +       struct list_head        dl_del_perclnt; /* nfs4_client->cl_del_perclnt*/
39 +       struct list_head        dl_recall_lru;  /* delegation recalled */
40 +       atomic_t                dl_recall_cnt;  /* resend cb_recall only once */
41 +       atomic_t                dl_count;       /* ref count */
42 +       atomic_t                dl_state;       /* recall state */
43 +       struct nfs4_client      *dl_client;
44 +       struct nfs4_file        *dl_file;
45 +       struct file_lock        *dl_flock;
46 +       struct nfs4_stateid     *dl_stp;
47 +       u32                     dl_flags;
48 +       u32                     dl_type;
49 +       time_t                  dl_time;
50 +       struct nfs4_cb_recall   dl_recall;
51 +};
52 +
53 +#define dl_stateid      dl_recall.cbr_stateid
54 +#define dl_fhlen        dl_recall.cbr_fhlen
55 +#define dl_fhval        dl_recall.cbr_fhval
56 +
57  /* client delegation callback info */
58  struct nfs4_callback {
59         /* SETCLIENTID info */
60 @@ -75,9 +114,8 @@
61         unsigned short          cb_port;
62         u32                     cb_prog;
63         u32                     cb_ident;
64 -       struct xdr_netobj       cb_netid;
65         /* RPC client info */
66 -       u32                     cb_set;     /* successful CB_NULL call */
67 +       atomic_t                cb_set;     /* successful CB_NULL call */
68         struct rpc_program      cb_program;
69         struct rpc_stat         cb_stat;
70         struct rpc_clnt *       cb_client;
71 @@ -97,6 +135,7 @@
72         struct list_head        cl_idhash;      /* hash by cl_clientid.id */
73         struct list_head        cl_strhash;     /* hash by cl_name */
74         struct list_head        cl_perclient;   /* list: stateowners */
75 +       struct list_head        cl_del_perclnt; /* list: delegations */
76         struct list_head        cl_lru;         /* tail queue */
77         struct xdr_netobj       cl_name;        /* id generated by client */
78         nfs4_verifier           cl_verifier;    /* generated by client */
79 @@ -106,7 +145,8 @@
80         clientid_t              cl_clientid;    /* generated by server */
81         nfs4_verifier           cl_confirm;     /* generated by server */
82         struct nfs4_callback    cl_callback;    /* callback info */
83 -       time_t                  cl_first_state; /* first state aquisition*/
84 +       atomic_t                cl_count;       /* ref count */
85 +       u32                     cl_firststate;  /* recovery file creation */
86  };
87  
88  /* struct nfs4_client_reset
89 @@ -117,8 +157,6 @@
90  struct nfs4_client_reclaim {
91         struct list_head        cr_strhash;     /* hash by cr_name */
92         struct xdr_netobj       cr_name;        /* id generated by client */
93 -       time_t                  cr_first_state; /* first state aquisition */
94 -       u32                     cr_expired;     /* boolean: lease expired? */
95  };
96  
97  static inline void
98 @@ -194,6 +232,7 @@
99  struct nfs4_file {
100         struct list_head        fi_hash;    /* hash by "struct inode *" */
101         struct list_head        fi_perfile; /* list: nfs4_stateid */
102 +       struct list_head        fi_del_perfile; /* list: nfs4_delegation */
103         struct inode            *fi_inode;
104         u32                     fi_id;      /* used with stateowner->so_id 
105                                              * for stateid_hashtbl hash */
106 @@ -231,8 +270,10 @@
107  #define CONFIRM                 0x00000002
108  #define OPEN_STATE              0x00000004
109  #define LOCK_STATE              0x00000008
110 -#define RDWR_STATE              0x00000010
111 -#define CLOSE_STATE             0x00000020
112 +#define RD_STATE               0x00000010
113 +#define WR_STATE               0x00000020
114 +#define CLOSE_STATE             0x00000040
115 +#define DELEG_RET               0x00000080
116  
117  #define seqid_mutating_err(err)                       \
118         (((err) != nfserr_stale_clientid) &&    \
119 @@ -243,14 +284,24 @@
120  extern time_t nfs4_laundromat(void);
121  extern int nfsd4_renew(clientid_t *clid);
122  extern int nfs4_preprocess_stateid_op(struct svc_fh *current_fh, 
123 -               stateid_t *stateid, int flags, struct nfs4_stateid **stpp);
124 +               stateid_t *stateid, int flags, struct file **filp);
125  extern int nfs4_share_conflict(struct svc_fh *current_fh, 
126                 unsigned int deny_type);
127  extern void nfs4_lock_state(void);
128  extern void nfs4_unlock_state(void);
129  extern int nfs4_in_grace(void);
130  extern int nfs4_check_open_reclaim(clientid_t *clid);
131 +extern void put_nfs4_client(struct nfs4_client *clp);
132  extern void nfs4_free_stateowner(struct kref *kref);
133 +extern void nfsd4_probe_callback(struct nfs4_client *clp);
134 +extern int nfsd4_cb_recall(struct nfs4_delegation *dp);
135 +extern int nfsd4_create_clid_file(struct nfs4_client *clp);
136 +extern void nfsd4_remove_clid_file(struct nfs4_client *clp);
137 +extern int nfsd4_list_rec_dir(int clear);
138 +extern void nfsd4_init_rec_dir(char *rec_dirname);
139 +extern void nfsd4_shutdown_rec_dir(void);
140 +extern int nfs4_client_to_reclaim(char *name, int namlen);
141 +
142  
143  static inline void
144  nfs4_put_stateowner(struct nfs4_stateowner *so)
145 Index: linux-2.6.10/include/linux/nfsd/nfsd.h
146 ===================================================================
147 --- linux-2.6.10.orig/include/linux/nfsd/nfsd.h 2004-12-25 05:35:39.000000000 +0800
148 +++ linux-2.6.10/include/linux/nfsd/nfsd.h      2005-04-05 14:49:13.464682376 +0800
149 @@ -98,8 +98,12 @@
150  void           nfsd_close(struct file *);
151  int            nfsd_read(struct svc_rqst *, struct svc_fh *,
152                                 loff_t, struct kvec *,int, unsigned long *);
153 +int            nfsd_vfs_read(struct svc_rqst *, struct svc_fh *, struct file *,
154 +                               loff_t, struct kvec *, int, unsigned long *);
155  int            nfsd_write(struct svc_rqst *, struct svc_fh *,
156                                 loff_t, struct kvec *,int, unsigned long, int *);
157 +int            nfsd_vfs_write(struct svc_rqst *, struct svc_fh *,struct file *,
158 +                                loff_t, struct kvec *,int, unsigned long, int *);
159  int            nfsd_readlink(struct svc_rqst *, struct svc_fh *,
160                                 char *, int *);
161  int            nfsd_symlink(struct svc_rqst *, struct svc_fh *,
162 Index: linux-2.6.10/include/linux/nfsd/xdr4.h
163 ===================================================================
164 --- linux-2.6.10.orig/include/linux/nfsd/xdr4.h 2004-12-25 05:34:01.000000000 +0800
165 +++ linux-2.6.10/include/linux/nfsd/xdr4.h      2005-04-05 14:49:13.466682072 +0800
166 @@ -44,16 +44,6 @@
167  #define NFSD4_MAX_TAGLEN       128
168  #define XDR_LEN(n)                     (((n) + 3) & ~3)
169  
170 -typedef u32 delegation_zero_t;
171 -typedef u32 delegation_boot_t;
172 -typedef u64 delegation_id_t;
173 -
174 -typedef struct {
175 -       delegation_zero_t       ds_zero;
176 -       delegation_boot_t       ds_boot;
177 -       delegation_id_t         ds_id;
178 -} delegation_stateid_t;
179 -
180  struct nfsd4_change_info {
181         u32             atomic;
182         u32             before_ctime_sec;
183 @@ -104,6 +94,10 @@
184  #define cr_specdata1   u.dev.specdata1
185  #define cr_specdata2   u.dev.specdata2
186  
187 +struct nfsd4_delegreturn {
188 +       stateid_t       dr_stateid;
189 +};
190 +
191  struct nfsd4_getattr {
192         u32             ga_bmval[2];        /* request */
193         struct svc_fh   *ga_fhp;            /* response */
194 @@ -202,13 +196,13 @@
195         u32             op_claim_type;      /* request */
196         struct xdr_netobj op_fname;         /* request - everything but CLAIM_PREV */
197         u32             op_delegate_type;   /* request - CLAIM_PREV only */
198 -       delegation_stateid_t    op_delegate_stateid; /* request - CLAIM_DELEGATE_CUR only */
199 +       stateid_t       op_delegate_stateid; /* request - response */
200         u32             op_create;          /* request */
201         u32             op_createmode;      /* request */
202         u32             op_bmval[2];        /* request */
203         union {                             /* request */
204 -               struct iattr    iattr;                      /* UNCHECKED4,GUARDED4 */
205 -               nfs4_verifier   verf;                                /* EXCLUSIVE4 */
206 +               struct iattr    iattr;                      /* UNCHECKED4,GUARDED4 */
207 +               nfs4_verifier   verf;                                /* EXCLUSIVE4 */
208         } u;
209         clientid_t      op_clientid;        /* request */
210         struct xdr_netobj op_owner;           /* request */
211 @@ -247,6 +241,7 @@
212         u32             rd_length;          /* request */
213         struct kvec     rd_iov[RPCSVC_MAXPAGES];
214         int             rd_vlen;
215 +       struct file     *rd_filp;
216         
217         struct svc_rqst *rd_rqstp;          /* response */
218         struct svc_fh * rd_fhp;             /* response */
219 @@ -345,6 +340,7 @@
220                 struct nfsd4_close              close;
221                 struct nfsd4_commit             commit;
222                 struct nfsd4_create             create;
223 +               struct nfsd4_delegreturn        delegreturn;
224                 struct nfsd4_getattr            getattr;
225                 struct svc_fh *                 getfh;
226                 struct nfsd4_link               link;
227 @@ -456,6 +452,8 @@
228  nfsd4_release_lockowner(struct svc_rqst *rqstp,
229                 struct nfsd4_release_lockowner *rlockowner);
230  extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
231 +extern int nfsd4_delegreturn(struct svc_rqst *rqstp,
232 +               struct svc_fh *current_fh, struct nfsd4_delegreturn *dr);
233  #endif
234  
235  /*
236 Index: linux-2.6.10/include/linux/fs.h
237 ===================================================================
238 --- linux-2.6.10.orig/include/linux/fs.h        2005-03-31 15:35:26.000000000 +0800
239 +++ linux-2.6.10/include/linux/fs.h     2005-04-05 14:49:13.461682832 +0800
240 @@ -1185,11 +1185,6 @@
241  
242  extern int vfs_statfs(struct super_block *, struct kstatfs *);
243  
244 -/* Return value for VFS lock functions - tells locks.c to lock conventionally
245 - * REALLY kosha for root NFS and nfs_lock
246 - */ 
247 -#define LOCK_USE_CLNT 1
248 -
249  #define FLOCK_VERIFY_READ  1
250  #define FLOCK_VERIFY_WRITE 2
251  
252 Index: linux-2.6.10/include/linux/dcache.h
253 ===================================================================
254 --- linux-2.6.10.orig/include/linux/dcache.h    2005-03-31 15:35:26.000000000 +0800
255 +++ linux-2.6.10/include/linux/dcache.h 2005-04-05 14:49:13.460682984 +0800
256 @@ -200,6 +200,7 @@
257   * These are the low-level FS interfaces to the dcache..
258   */
259  extern void d_instantiate(struct dentry *, struct inode *);
260 +extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
261  extern void d_delete(struct dentry *);
262  
263  /* allocate/de-allocate */
264 @@ -244,6 +245,23 @@
265         d_rehash(entry);
266  }
267  
268 +/**
269 + * d_add_unique - add dentry to hash queues without aliasing
270 + * @entry: dentry to add
271 + * @inode: The inode to attach to this dentry
272 + *
273 + * This adds the entry to the hash queues and initializes @inode.
274 + * The entry was actually filled in earlier during d_alloc().
275 + */
276 +static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode)
277 +{
278 +       struct dentry *res;
279 +
280 +       res = d_instantiate_unique(entry, inode);
281 +       d_rehash(res != NULL ? res : entry);
282 +       return res;
283 +}
284 +
285  /* used for rename() and baskets */
286  extern void d_move(struct dentry *, struct dentry *);
287  
288 Index: linux-2.6.10/include/linux/nfs_fs.h
289 ===================================================================
290 --- linux-2.6.10.orig/include/linux/nfs_fs.h    2004-12-25 05:34:31.000000000 +0800
291 +++ linux-2.6.10/include/linux/nfs_fs.h 2005-04-05 14:49:13.463682528 +0800
292 @@ -30,6 +30,7 @@
293  #include <linux/nfs_xdr.h>
294  #include <linux/rwsem.h>
295  #include <linux/workqueue.h>
296 +#include <linux/mempool.h>
297  
298  /*
299   * Enable debugging support for nfs client.
300 @@ -201,6 +202,7 @@
301  #define NFS_INO_INVALID_ATTR   0x0008          /* cached attrs are invalid */
302  #define NFS_INO_INVALID_DATA   0x0010          /* cached data is invalid */
303  #define NFS_INO_INVALID_ATIME  0x0020          /* cached atime is invalid */
304 +#define NFS_INO_INVALID_ACCESS 0x0040          /* cached access cred invalid */
305  
306  static inline struct nfs_inode *NFS_I(struct inode *inode)
307  {
308 @@ -239,7 +241,7 @@
309  static inline void NFS_CACHEINV(struct inode *inode)
310  {
311         if (!nfs_caches_unstable(inode))
312 -               NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR;
313 +               NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
314  }
315  
316  static inline int nfs_server_capable(struct inode *inode, int cap)
317 @@ -424,6 +426,44 @@
318         return nfs_wb_page_priority(inode, page, 0);
319  }
320  
321 +/*
322 + * Allocate and free nfs_write_data structures
323 + */
324 +extern mempool_t *nfs_wdata_mempool;
325 +extern mempool_t *nfs_commit_mempool;
326 +
327 +static inline struct nfs_write_data *nfs_writedata_alloc(void)
328 +{
329 +       struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
330 +       if (p) {
331 +               memset(p, 0, sizeof(*p));
332 +               INIT_LIST_HEAD(&p->pages);
333 +       }
334 +       return p;
335 +}
336 +
337 +static inline void nfs_writedata_free(struct nfs_write_data *p)
338 +{
339 +       mempool_free(p, nfs_wdata_mempool);
340 +}
341 +
342 +extern void  nfs_writedata_release(struct rpc_task *task);
343 +
344 +static inline struct nfs_write_data *nfs_commit_alloc(void)
345 +{
346 +       struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
347 +       if (p) {
348 +               memset(p, 0, sizeof(*p));
349 +               INIT_LIST_HEAD(&p->pages);
350 +       }
351 +       return p;
352 +}
353 +
354 +static inline void nfs_commit_free(struct nfs_write_data *p)
355 +{
356 +       mempool_free(p, nfs_commit_mempool);
357 +}
358 +
359  /* Hack for future NFS swap support */
360  #ifndef IS_SWAPFILE
361  # define IS_SWAPFILE(inode)    (0)
362 @@ -439,6 +479,26 @@
363  extern void nfs_readpage_result(struct rpc_task *);
364  
365  /*
366 + * Allocate and free nfs_read_data structures
367 + */
368 +extern mempool_t *nfs_rdata_mempool;
369 +
370 +static inline struct nfs_read_data *nfs_readdata_alloc(void)
371 +{
372 +       struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
373 +       if (p)
374 +               memset(p, 0, sizeof(*p));
375 +       return p;
376 +}
377 +
378 +static inline void nfs_readdata_free(struct nfs_read_data *p)
379 +{
380 +       mempool_free(p, nfs_rdata_mempool);
381 +}
382 +
383 +extern void  nfs_readdata_release(struct rpc_task *task);
384 +
385 +/*
386   * linux/fs/mount_clnt.c
387   * (Used only by nfsroot module)
388   */
389 @@ -644,6 +704,12 @@
390  
391  extern struct dentry_operations nfs4_dentry_operations;
392  extern struct inode_operations nfs4_dir_inode_operations;
393 +extern struct inode_operations nfs4_file_inode_operations;
394 +
395 +/* inode.c */
396 +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
397 +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
398 +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
399  
400  /* nfs4proc.c */
401  extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
402 @@ -651,13 +717,14 @@
403  extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
404  extern int nfs4_proc_async_renew(struct nfs4_client *);
405  extern int nfs4_proc_renew(struct nfs4_client *);
406 -extern int nfs4_do_close(struct inode *, struct nfs4_state *);
407 -extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
408 +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
409  extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
410  extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
411  extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
412  extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
413  extern int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request);
414 +extern ssize_t nfs4_proc_get_acl(struct inode *, void *buf, ssize_t buflen);
415 +extern int nfs4_proc_set_acl(struct inode *, const void *buf, ssize_t buflen);
416  
417  /* nfs4renewd.c */
418  extern void nfs4_schedule_state_renewal(struct nfs4_client *);
419 Index: linux-2.6.10/include/linux/nfs4.h
420 ===================================================================
421 --- linux-2.6.10.orig/include/linux/nfs4.h      2004-12-25 05:34:45.000000000 +0800
422 +++ linux-2.6.10/include/linux/nfs4.h   2005-04-05 14:49:13.474680856 +0800
423 @@ -28,7 +28,7 @@
424  #define NFS4_ACCESS_DELETE      0x0010
425  #define NFS4_ACCESS_EXECUTE     0x0020
426  
427 -#define NFS4_FH_PERISTENT              0x0000
428 +#define NFS4_FH_PERSISTENT             0x0000
429  #define NFS4_FH_NOEXPIRE_WITH_OPEN     0x0001
430  #define NFS4_FH_VOLATILE_ANY           0x0002
431  #define NFS4_FH_VOL_MIGRATION          0x0004
432 @@ -382,6 +382,8 @@
433         NFSPROC4_CLNT_READDIR,
434         NFSPROC4_CLNT_SERVER_CAPS,
435         NFSPROC4_CLNT_DELEGRETURN,
436 +       NFSPROC4_CLNT_GETACL,
437 +       NFSPROC4_CLNT_SETACL,
438  };
439  
440  #endif
441 Index: linux-2.6.10/include/linux/sunrpc/auth.h
442 ===================================================================
443 --- linux-2.6.10.orig/include/linux/sunrpc/auth.h       2004-12-25 05:34:57.000000000 +0800
444 +++ linux-2.6.10/include/linux/sunrpc/auth.h    2005-04-05 14:49:13.468681768 +0800
445 @@ -51,7 +51,6 @@
446  };
447  #define RPCAUTH_CRED_LOCKED    0x0001
448  #define RPCAUTH_CRED_UPTODATE  0x0002
449 -#define RPCAUTH_CRED_DEAD      0x0004
450  
451  #define RPCAUTH_CRED_MAGIC     0x0f4aa4f0
452  
453 @@ -133,7 +132,6 @@
454  int                    rpcauth_refreshcred(struct rpc_task *);
455  void                   rpcauth_invalcred(struct rpc_task *);
456  int                    rpcauth_uptodatecred(struct rpc_task *);
457 -int                    rpcauth_deadcred(struct rpc_task *);
458  void                   rpcauth_init_credcache(struct rpc_auth *);
459  void                   rpcauth_free_credcache(struct rpc_auth *);
460  
461 Index: linux-2.6.10/include/linux/sunrpc/svc.h
462 ===================================================================
463 --- linux-2.6.10.orig/include/linux/sunrpc/svc.h        2004-12-25 05:34:58.000000000 +0800
464 +++ linux-2.6.10/include/linux/sunrpc/svc.h     2005-04-05 14:49:13.467681920 +0800
465 @@ -251,8 +251,7 @@
466         char *                  pg_name;        /* service name */
467         char *                  pg_class;       /* class name: services sharing authentication */
468         struct svc_stat *       pg_stats;       /* rpc statistics */
469 -       /* Override authentication. NULL means use default */
470 -       int                     (*pg_authenticate)(struct svc_rqst *, u32 *);
471 +       int                     (*pg_authenticate)(struct svc_rqst *);
472  };
473  
474  /*
475 Index: linux-2.6.10/include/linux/sunrpc/cache.h
476 ===================================================================
477 --- linux-2.6.10.orig/include/linux/sunrpc/cache.h      2004-12-25 05:34:57.000000000 +0800
478 +++ linux-2.6.10/include/linux/sunrpc/cache.h   2005-04-05 14:49:13.470681464 +0800
479 @@ -128,20 +128,17 @@
480   * just like a template in C++, this macro does cache lookup
481   * for us.
482   * The function is passed some sort of HANDLE from which a cache_detail
483 - * structure can be determined (via SETUP, DETAIL), a template
484 + * structure can be determined (via DETAIL), a template
485   * cache entry (type RTN*), and a "set" flag.  Using the HASHFN and the 
486   * TEST, the function will try to find a matching cache entry in the cache.
487   * If "set" == 0 :
488   *    If an entry is found, it is returned
489   *    If no entry is found, a new non-VALID entry is created.
490 - * If "set" == 1 and INPLACE == 0 :
491 + * If "set" == 1:
492   *    If no entry is found a new one is inserted with data from "template"
493   *    If a non-CACHE_VALID entry is found, it is updated from template using UPDATE
494   *    If a CACHE_VALID entry is found, a new entry is swapped in with data
495   *       from "template"
496 - * If set == 1, and INPLACE == 1 :
497 - *    As above, except that if a CACHE_VALID entry is found, we UPDATE in place
498 - *       instead of swapping in a new entry.
499   *
500   * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not
501   * run but insteead CACHE_NEGATIVE is set in any new item.
502 @@ -153,25 +150,22 @@
503   * MEMBER is the member of the cache which is cache_head, which must be first
504   * FNAME is the name for the function  
505   * ARGS are arguments to function and must contain RTN *item, int set.  May
506 - *   also contain something to be usedby SETUP or DETAIL to find cache_detail.
507 - * SETUP  locates the cache detail and makes it available as...
508 - * DETAIL identifies the cache detail, possibly set up by SETUP
509 + *   also contain something to be used by DETAIL to find cache_detail.
510 + * DETAIL identifies the cache detail
511   * HASHFN returns a hash value of the cache entry "item"
512   * TEST  tests if "tmp" matches "item"
513   * INIT copies key information from "item" to "new"
514   * UPDATE copies content information from "item" to "tmp"
515 - * INPLACE is true if updates can happen inplace rather than allocating a new structure
516   *
517   * WARNING: any substantial changes to this must be reflected in
518   *   net/sunrpc/svcauth.c(auth_domain_lookup)
519   *  which is a similar routine that is open-coded.
520   */
521 -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE)  \
522 +#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,DETAIL,HASHFN,TEST,INIT,UPDATE)        \
523  RTN *FNAME ARGS                                                                                \
524  {                                                                                      \
525         RTN *tmp, *new=NULL;                                                            \
526         struct cache_head **hp, **head;                                                 \
527 -       SETUP;                                                                          \
528         head = &(DETAIL)->hash_table[HASHFN];                                           \
529   retry:                                                                                        \
530         if (set||new) write_lock(&(DETAIL)->hash_lock);                                 \
531 @@ -180,14 +174,14 @@
532                 tmp = container_of(*hp, RTN, MEMBER);                                   \
533                 if (TEST) { /* found a match */                                         \
534                                                                                         \
535 -                       if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
536 +                       if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
537                                 break;                                                  \
538                                                                                         \
539                         if (new)                                                        \
540                                 {INIT;}                                                 \
541                         cache_get(&tmp->MEMBER);                                        \
542                         if (set) {                                                      \
543 -                               if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
544 +                               if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
545                                 { /* need to swap in new */                             \
546                                         RTN *t2;                                        \
547                                                                                         \
548 @@ -209,7 +203,7 @@
549                         else read_unlock(&(DETAIL)->hash_lock);                         \
550                         if (set)                                                        \
551                                 cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \
552 -                       if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0);       \
553 +                       if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0);   \
554                         if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL);             \
555                         return tmp;                                                     \
556                 }                                                                       \
557 @@ -242,10 +236,10 @@
558         return NULL;                                                                    \
559  }
560  
561 -#define DefineSimpleCacheLookup(STRUCT,INPLACE)        \
562 -       DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */,     \
563 +#define DefineSimpleCacheLookup(STRUCT)        \
564 +       DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set),    \
565                           & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\
566 -                         STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE)
567 +                         STRUCT##_init(new, item), STRUCT##_update(tmp, item))
568  
569  #define cache_for_each(pos, detail, index, member)                                             \
570         for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ;                \
571 Index: linux-2.6.10/include/linux/sunrpc/sched.h
572 ===================================================================
573 --- linux-2.6.10.orig/include/linux/sunrpc/sched.h      2004-12-25 05:35:01.000000000 +0800
574 +++ linux-2.6.10/include/linux/sunrpc/sched.h   2005-04-05 14:49:13.472681160 +0800
575 @@ -11,7 +11,9 @@
576  
577  #include <linux/timer.h>
578  #include <linux/sunrpc/types.h>
579 +#include <linux/spinlock.h>
580  #include <linux/wait.h>
581 +#include <linux/workqueue.h>
582  #include <linux/sunrpc/xdr.h>
583  
584  /*
585 @@ -25,11 +27,18 @@
586         struct rpc_cred *       rpc_cred;       /* Credentials */
587  };
588  
589 +struct rpc_wait_queue;
590 +struct rpc_wait {
591 +       struct list_head        list;           /* wait queue links */
592 +       struct list_head        links;          /* Links to related tasks */
593 +       wait_queue_head_t       waitq;          /* sync: sleep on this q */
594 +       struct rpc_wait_queue * rpc_waitq;      /* RPC wait queue we're on */
595 +};
596 +
597  /*
598   * This is the RPC task struct
599   */
600  struct rpc_task {
601 -       struct list_head        tk_list;        /* wait queue links */
602  #ifdef RPC_DEBUG
603         unsigned long           tk_magic;       /* 0xf00baa */
604  #endif
605 @@ -37,7 +46,6 @@
606         struct rpc_clnt *       tk_client;      /* RPC client */
607         struct rpc_rqst *       tk_rqstp;       /* RPC request */
608         int                     tk_status;      /* result of last operation */
609 -       struct rpc_wait_queue * tk_rpcwait;     /* RPC wait queue we're on */
610  
611         /*
612          * RPC call state
613 @@ -70,13 +78,18 @@
614          * you have a pathological interest in kernel oopses.
615          */
616         struct timer_list       tk_timer;       /* kernel timer */
617 -       wait_queue_head_t       tk_wait;        /* sync: sleep on this q */
618         unsigned long           tk_timeout;     /* timeout for rpc_sleep() */
619         unsigned short          tk_flags;       /* misc flags */
620         unsigned char           tk_active   : 1;/* Task has been activated */
621         unsigned char           tk_priority : 2;/* Task priority */
622         unsigned long           tk_runstate;    /* Task run status */
623 -       struct list_head        tk_links;       /* links to related tasks */
624 +       struct workqueue_struct *tk_workqueue;  /* Normally rpciod, but could
625 +                                                * be any workqueue
626 +                                                */
627 +       union {
628 +               struct work_struct      tk_work;        /* Async task work queue */
629 +               struct rpc_wait         tk_wait;        /* RPC wait */
630 +       } u;
631  #ifdef RPC_DEBUG
632         unsigned short          tk_pid;         /* debugging aid */
633  #endif
634 @@ -87,11 +100,11 @@
635  /* support walking a list of tasks on a wait queue */
636  #define        task_for_each(task, pos, head) \
637         list_for_each(pos, head) \
638 -               if ((task=list_entry(pos, struct rpc_task, tk_list)),1)
639 +               if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1)
640  
641  #define        task_for_first(task, head) \
642         if (!list_empty(head) &&  \
643 -           ((task=list_entry((head)->next, struct rpc_task, tk_list)),1))
644 +           ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
645  
646  /* .. and walking list of all tasks */
647  #define        alltask_for_each(task, pos, head) \
648 @@ -126,22 +139,39 @@
649  #define RPC_IS_SOFT(t)         ((t)->tk_flags & RPC_TASK_SOFT)
650  #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
651  
652 -#define RPC_TASK_SLEEPING      0
653 -#define RPC_TASK_RUNNING       1
654 -#define RPC_IS_SLEEPING(t)     (test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
655 -#define RPC_IS_RUNNING(t)      (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
656 +#define RPC_TASK_RUNNING       0
657 +#define RPC_TASK_QUEUED                1
658 +#define RPC_TASK_WAKEUP                2
659 +#define RPC_TASK_HAS_TIMER     3
660  
661 +#define RPC_IS_RUNNING(t)      (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
662  #define rpc_set_running(t)     (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
663 -#define rpc_clear_running(t)   (clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
664 +#define rpc_test_and_set_running(t) \
665 +                               (test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
666 +#define rpc_clear_running(t)   \
667 +       do { \
668 +               smp_mb__before_clear_bit(); \
669 +               clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \
670 +               smp_mb__after_clear_bit(); \
671 +       } while (0)
672  
673 -#define rpc_set_sleeping(t)    (set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
674 +#define RPC_IS_QUEUED(t)       (test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
675 +#define rpc_set_queued(t)      (set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
676 +#define rpc_clear_queued(t)    \
677 +       do { \
678 +               smp_mb__before_clear_bit(); \
679 +               clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \
680 +               smp_mb__after_clear_bit(); \
681 +       } while (0)
682  
683 -#define rpc_clear_sleeping(t) \
684 +#define rpc_start_wakeup(t) \
685 +       (test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0)
686 +#define rpc_finish_wakeup(t) \
687         do { \
688                 smp_mb__before_clear_bit(); \
689 -               clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \
690 +               clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \
691                 smp_mb__after_clear_bit(); \
692 -       } while(0)
693 +       } while (0)
694  
695  /*
696   * Task priorities.
697 @@ -157,6 +187,7 @@
698   * RPC synchronization objects
699   */
700  struct rpc_wait_queue {
701 +       spinlock_t              lock;
702         struct list_head        tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
703         unsigned long           cookie;                 /* cookie of last task serviced */
704         unsigned char           maxpriority;            /* maximum priority (0 if queue is not a priority queue) */
705 @@ -177,6 +208,7 @@
706  
707  #ifndef RPC_DEBUG
708  # define RPC_WAITQ_INIT(var,qname) { \
709 +               .lock = SPIN_LOCK_UNLOCKED, \
710                 .tasks = { \
711                         [0] = LIST_HEAD_INIT(var.tasks[0]), \
712                         [1] = LIST_HEAD_INIT(var.tasks[1]), \
713 @@ -185,6 +217,7 @@
714         }
715  #else
716  # define RPC_WAITQ_INIT(var,qname) { \
717 +               .lock = SPIN_LOCK_UNLOCKED, \
718                 .tasks = { \
719                         [0] = LIST_HEAD_INIT(var.tasks[0]), \
720                         [1] = LIST_HEAD_INIT(var.tasks[1]), \
721 @@ -209,13 +242,10 @@
722  int            rpc_execute(struct rpc_task *);
723  void           rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
724                                         rpc_action action);
725 -int            rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
726 -void           rpc_remove_wait_queue(struct rpc_task *);
727  void           rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
728  void           rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
729  void           rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
730                                         rpc_action action, rpc_action timer);
731 -void           rpc_add_timer(struct rpc_task *, rpc_action);
732  void           rpc_wake_up_task(struct rpc_task *);
733  void           rpc_wake_up(struct rpc_wait_queue *);
734  struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
735 Index: linux-2.6.10/include/linux/sunrpc/gss_krb5.h
736 ===================================================================
737 --- linux-2.6.10.orig/include/linux/sunrpc/gss_krb5.h   2004-12-25 05:34:57.000000000 +0800
738 +++ linux-2.6.10/include/linux/sunrpc/gss_krb5.h        2005-04-05 14:49:13.473681008 +0800
739 @@ -53,6 +53,8 @@
740         struct xdr_netobj       mech_used;
741  };
742  
743 +extern spinlock_t krb5_seq_lock;
744 +
745  #define KG_TOK_MIC_MSG    0x0101
746  #define KG_TOK_WRAP_MSG   0x0201
747  
748 @@ -116,18 +118,25 @@
749  
750  s32
751  make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
752 -                  struct xdr_netobj *cksum);
753 +               int body_offset, struct xdr_netobj *cksum);
754  
755  u32
756  krb5_make_token(struct krb5_ctx *context_handle, int qop_req,
757         struct xdr_buf *input_message_buffer,
758 -       struct xdr_netobj *output_message_buffer, int toktype);
759 +       struct xdr_netobj *output_message_buffer);
760  
761  u32
762  krb5_read_token(struct krb5_ctx *context_handle,
763           struct xdr_netobj *input_token_buffer,
764 -         struct xdr_buf *message_buffer,
765 -         int *qop_state, int toktype);
766 +         struct xdr_buf *message_buffer, int *qop_state);
767 +
768 +u32
769 +gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset,
770 +               struct xdr_buf *outbuf, struct page **pages);
771 +
772 +u32
773 +gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset,
774 +               struct xdr_buf *buf, int *out_offset);
775  
776  u32
777  krb5_encrypt(struct crypto_tfm * key,
778 @@ -137,6 +146,13 @@
779  krb5_decrypt(struct crypto_tfm * key,
780              void *iv, void *in, void *out, int length); 
781  
782 +int
783 +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset,
784 +               struct page **pages);
785 +
786 +int
787 +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset);
788 +
789  s32
790  krb5_make_seq_num(struct crypto_tfm * key,
791                 int direction,
792 Index: linux-2.6.10/include/linux/sunrpc/xdr.h
793 ===================================================================
794 --- linux-2.6.10.orig/include/linux/sunrpc/xdr.h        2004-12-25 05:35:40.000000000 +0800
795 +++ linux-2.6.10/include/linux/sunrpc/xdr.h     2005-04-05 14:49:13.467681920 +0800
796 @@ -192,6 +192,7 @@
797  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p);
798  extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
799  extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
800 +extern void truncate_xdr_buf(struct xdr_buf *xdr, int len);
801  
802  #endif /* __KERNEL__ */
803  
804 Index: linux-2.6.10/include/linux/sunrpc/gss_api.h
805 ===================================================================
806 --- linux-2.6.10.orig/include/linux/sunrpc/gss_api.h    2004-12-25 05:35:28.000000000 +0800
807 +++ linux-2.6.10/include/linux/sunrpc/gss_api.h 2005-04-05 14:49:13.471681312 +0800
808 @@ -47,6 +47,18 @@
809                 struct xdr_buf          *message,
810                 struct xdr_netobj       *mic_token,
811                 u32                     *qstate);
812 +u32 gss_wrap(
813 +               struct gss_ctx          *ctx_id,
814 +               u32                     qop,
815 +               int                     offset,
816 +               struct xdr_buf          *outbuf,
817 +               struct page             **inpages);
818 +u32 gss_unwrap(
819 +               struct gss_ctx          *ctx_id,
820 +               u32                     *qop,
821 +               int                     offset,
822 +               struct xdr_buf          *inbuf,
823 +               int                     *out_offset);
824  u32 gss_delete_sec_context(
825                 struct gss_ctx          **ctx_id);
826  
827 @@ -93,6 +105,18 @@
828                         struct xdr_buf          *message,
829                         struct xdr_netobj       *mic_token,
830                         u32                     *qstate);
831 +       u32 (*gss_wrap)(
832 +                       struct gss_ctx          *ctx_id,
833 +                       u32                     qop,
834 +                       int                     offset,
835 +                       struct xdr_buf          *outbuf,
836 +                       struct page             **inpages);
837 +       u32 (*gss_unwrap)(
838 +                       struct gss_ctx          *ctx_id,
839 +                       u32                     *qop,
840 +                       int                     offset,
841 +                       struct xdr_buf          *buf,
842 +                       int                     *out_offset);
843         void (*gss_delete_sec_context)(
844                         void                    *internal_ctx_id);
845  };
846 Index: linux-2.6.10/include/linux/sunrpc/svcauth.h
847 ===================================================================
848 --- linux-2.6.10.orig/include/linux/sunrpc/svcauth.h    2004-12-25 05:34:31.000000000 +0800
849 +++ linux-2.6.10/include/linux/sunrpc/svcauth.h 2005-04-05 14:49:13.469681616 +0800
850 @@ -26,21 +26,23 @@
851  struct svc_rqst;               /* forward decl */
852  
853  /* Authentication is done in the context of a domain.
854 - * For a server, a domain represents a group of clients using
855 + *
856 + * Currently, the nfs server uses the auth_domain to stand
857 + * for the "client" listed in /etc/exports.
858 + *
859 + * More generally, a domain might represent a group of clients using
860   * a common mechanism for authentication and having a common mapping
861   * between local identity (uid) and network identity.  All clients
862   * in a domain have similar general access rights.  Each domain can
863   * contain multiple principals which will have different specific right
864   * based on normal Discretionary Access Control.
865   *
866 - * For a client, a domain represents a number of servers which all
867 - * use a common authentication mechanism and network identity name space.
868 - *
869   * A domain is created by an authentication flavour module based on name
870   * only.  Userspace then fills in detail on demand.
871   *
872 - * The creation of a domain typically implies creation of one or
873 - * more caches for storing domain specific information.
874 + * In the case of auth_unix and auth_null, the auth_domain is also
875 + * associated with entries in another cache representing the mapping
876 + * of ip addresses to the given client.
877   */
878  struct auth_domain {
879         struct  cache_head      h;
880 @@ -92,6 +94,7 @@
881         int     (*accept)(struct svc_rqst *rq, u32 *authp);
882         int     (*release)(struct svc_rqst *rq);
883         void    (*domain_release)(struct auth_domain *);
884 +       int     (*set_client)(struct svc_rqst *rq);
885  };
886  
887  #define        SVC_GARBAGE     1
888 @@ -107,6 +110,7 @@
889  
890  extern int     svc_authenticate(struct svc_rqst *rqstp, u32 *authp);
891  extern int     svc_authorise(struct svc_rqst *rqstp);
892 +extern int     svc_set_client(struct svc_rqst *rqstp);
893  extern int     svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
894  extern void    svc_auth_unregister(rpc_authflavor_t flavor);
895  
896 Index: linux-2.6.10/include/linux/sunrpc/xprt.h
897 ===================================================================
898 --- linux-2.6.10.orig/include/linux/sunrpc/xprt.h       2004-12-25 05:35:23.000000000 +0800
899 +++ linux-2.6.10/include/linux/sunrpc/xprt.h    2005-04-05 14:49:13.471681312 +0800
900 @@ -95,7 +95,10 @@
901         int                     rq_cong;        /* has incremented xprt->cong */
902         int                     rq_received;    /* receive completed */
903         u32                     rq_seqno;       /* gss seq no. used on req. */
904 -
905 +       int                     rq_enc_pages_num;
906 +       struct page             **rq_enc_pages; /* scratch pages for use by
907 +                                                  gss privacy code */
908 +       void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
909         struct list_head        rq_list;
910  
911         struct xdr_buf          rq_private_buf;         /* The receive buffer
912 Index: linux-2.6.10/include/linux/nfs_xdr.h
913 ===================================================================
914 --- linux-2.6.10.orig/include/linux/nfs_xdr.h   2004-12-25 05:35:24.000000000 +0800
915 +++ linux-2.6.10/include/linux/nfs_xdr.h        2005-04-05 14:49:13.459683136 +0800
916 @@ -326,6 +326,20 @@
917         const u32 *                     bitmask;
918  };
919  
920 +struct nfs_setaclargs {
921 +       struct nfs_fh *                 fh;
922 +       ssize_t                         acl_len;
923 +       unsigned int                    acl_pgbase;
924 +       struct page **                  acl_pages;
925 +};
926 +
927 +struct nfs_getaclargs {
928 +       struct nfs_fh *                 fh;
929 +       ssize_t                         acl_len;
930 +       unsigned int                    acl_pgbase;
931 +       struct page **                  acl_pages;
932 +};
933 +
934  struct nfs_setattrres {
935         struct nfs_fattr *              fattr;
936         const struct nfs_server *       server;
937 @@ -666,6 +680,7 @@
938         int     version;                /* Protocol version */
939         struct dentry_operations *dentry_ops;
940         struct inode_operations *dir_inode_ops;
941 +       struct inode_operations *file_inode_ops;
942  
943         int     (*getroot) (struct nfs_server *, struct nfs_fh *,
944                             struct nfs_fsinfo *);
945 @@ -681,7 +696,7 @@
946         int     (*read)    (struct nfs_read_data *);
947         int     (*write)   (struct nfs_write_data *);
948         int     (*commit)  (struct nfs_write_data *);
949 -       struct inode *  (*create)  (struct inode *, struct qstr *,
950 +       struct inode *  (*create)  (struct inode *, struct dentry *,
951                             struct iattr *, int);
952         int     (*remove)  (struct inode *, struct qstr *);
953         int     (*unlink_setup)  (struct rpc_message *,
954 Index: linux-2.6.10/net/sunrpc/xprt.c
955 ===================================================================
956 --- linux-2.6.10.orig/net/sunrpc/xprt.c 2004-12-25 05:35:14.000000000 +0800
957 +++ linux-2.6.10/net/sunrpc/xprt.c      2005-04-05 14:49:13.393693168 +0800
958 @@ -891,7 +891,8 @@
959         xprt->tcp_flags &= ~XPRT_COPY_XID;
960         xprt->tcp_flags |= XPRT_COPY_DATA;
961         xprt->tcp_copied = 4;
962 -       dprintk("RPC:      reading reply for XID %08x\n", xprt->tcp_xid);
963 +       dprintk("RPC:      reading reply for XID %08x\n",
964 +                                               ntohl(xprt->tcp_xid));
965         tcp_check_recm(xprt);
966  }
967  
968 @@ -911,7 +912,7 @@
969         if (!req) {
970                 xprt->tcp_flags &= ~XPRT_COPY_DATA;
971                 dprintk("RPC:      XID %08x request not found!\n",
972 -                               xprt->tcp_xid);
973 +                               ntohl(xprt->tcp_xid));
974                 spin_unlock(&xprt->sock_lock);
975                 return;
976         }
977 @@ -1101,7 +1102,7 @@
978                 goto out;
979  
980         spin_lock_bh(&xprt->sock_lock);
981 -       if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
982 +       if (xprt->snd_task)
983                 rpc_wake_up_task(xprt->snd_task);
984         spin_unlock_bh(&xprt->sock_lock);
985  out:
986 @@ -1359,8 +1360,9 @@
987         req->rq_task    = task;
988         req->rq_xprt    = xprt;
989         req->rq_xid     = xprt_alloc_xid(xprt);
990 +       req->rq_release_snd_buf = NULL;
991         dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
992 -                       req, req->rq_xid);
993 +                       req, ntohl(req->rq_xid));
994  }
995  
996  /*
997 @@ -1384,6 +1386,8 @@
998                 mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT);
999         spin_unlock_bh(&xprt->sock_lock);
1000         task->tk_rqstp = NULL;
1001 +       if (req->rq_release_snd_buf)
1002 +               req->rq_release_snd_buf(req);
1003         memset(req, 0, sizeof(*req));   /* mark unused */
1004  
1005         dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
1006 Index: linux-2.6.10/net/sunrpc/auth.c
1007 ===================================================================
1008 --- linux-2.6.10.orig/net/sunrpc/auth.c 2004-12-25 05:34:57.000000000 +0800
1009 +++ linux-2.6.10/net/sunrpc/auth.c      2005-04-05 14:49:13.394693016 +0800
1010 @@ -214,8 +214,6 @@
1011         list_for_each_safe(pos, next, &auth->au_credcache[nr]) {
1012                 struct rpc_cred *entry;
1013                 entry = list_entry(pos, struct rpc_cred, cr_hash);
1014 -               if (entry->cr_flags & RPCAUTH_CRED_DEAD)
1015 -                       continue;
1016                 if (rpcauth_prune_expired(entry, &free))
1017                         continue;
1018                 if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
1019 @@ -307,9 +305,6 @@
1020         if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
1021                 return;
1022  
1023 -       if ((cred->cr_flags & RPCAUTH_CRED_DEAD) && !list_empty(&cred->cr_hash))
1024 -               list_del_init(&cred->cr_hash);
1025 -
1026         if (list_empty(&cred->cr_hash)) {
1027                 spin_unlock(&rpc_credcache_lock);
1028                 rpcauth_crdestroy(cred);
1029 @@ -413,10 +408,3 @@
1030         return !(task->tk_msg.rpc_cred) ||
1031                 (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
1032  }
1033 -
1034 -int
1035 -rpcauth_deadcred(struct rpc_task *task)
1036 -{
1037 -       return !(task->tk_msg.rpc_cred) ||
1038 -               (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_DEAD);
1039 -}
1040 Index: linux-2.6.10/net/sunrpc/svcauth_unix.c
1041 ===================================================================
1042 --- linux-2.6.10.orig/net/sunrpc/svcauth_unix.c 2004-12-25 05:35:00.000000000 +0800
1043 +++ linux-2.6.10/net/sunrpc/svcauth_unix.c      2005-04-05 14:49:13.395692864 +0800
1044 @@ -97,7 +97,7 @@
1045  };
1046  static struct cache_head       *ip_table[IP_HASHMAX];
1047  
1048 -void ip_map_put(struct cache_head *item, struct cache_detail *cd)
1049 +static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
1050  {
1051         struct ip_map *im = container_of(item, struct ip_map,h);
1052         if (cache_put(item, cd)) {
1053 @@ -258,7 +258,7 @@
1054         .cache_show     = ip_map_show,
1055  };
1056  
1057 -static DefineSimpleCacheLookup(ip_map, 0)
1058 +static DefineSimpleCacheLookup(ip_map)
1059  
1060  
1061  int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
1062 @@ -329,14 +329,49 @@
1063         cache_purge(&auth_domain_cache);
1064  }
1065  
1066 +int
1067 +svcauth_unix_set_client(struct svc_rqst *rqstp)
1068 +{
1069 +       struct ip_map key, *ipm;
1070 +
1071 +       rqstp->rq_client = NULL;
1072 +       if (rqstp->rq_proc == 0)
1073 +               return SVC_OK;
1074 +
1075 +       strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
1076 +       key.m_addr = rqstp->rq_addr.sin_addr;
1077 +
1078 +       ipm = ip_map_lookup(&key, 0);
1079 +
1080 +       if (ipm == NULL)
1081 +               return SVC_DENIED;
1082 +
1083 +       switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
1084 +               case -EAGAIN:
1085 +                       return SVC_DROP;
1086 +               case -ENOENT:
1087 +                       return SVC_DENIED;
1088 +               case 0:
1089 +                       rqstp->rq_client = &ipm->m_client->h;
1090 +                       cache_get(&rqstp->rq_client->h);
1091 +                       ip_map_put(&ipm->h, &ip_map_cache);
1092 +                       return SVC_OK;
1093 +               default:
1094 +                       BUG();
1095 +       }
1096 +       /* shut up gcc: */
1097 +       return -1;
1098 +}
1099  
1100  static int
1101  svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
1102  {
1103         struct kvec     *argv = &rqstp->rq_arg.head[0];
1104         struct kvec     *resv = &rqstp->rq_res.head[0];
1105 -       int             rv=0;
1106 -       struct ip_map key, *ipm;
1107 +       struct svc_cred *cred = &rqstp->rq_cred;
1108 +
1109 +       cred->cr_group_info = NULL;
1110 +       rqstp->rq_client = NULL;
1111  
1112         if (argv->iov_len < 3*4)
1113                 return SVC_GARBAGE;
1114 @@ -353,45 +388,17 @@
1115         }
1116  
1117         /* Signal that mapping to nobody uid/gid is required */
1118 -       rqstp->rq_cred.cr_uid = (uid_t) -1;
1119 -       rqstp->rq_cred.cr_gid = (gid_t) -1;
1120 -       rqstp->rq_cred.cr_group_info = groups_alloc(0);
1121 -       if (rqstp->rq_cred.cr_group_info == NULL)
1122 +       cred->cr_uid = (uid_t) -1;
1123 +       cred->cr_gid = (gid_t) -1;
1124 +       cred->cr_group_info = groups_alloc(0);
1125 +       if (cred->cr_group_info == NULL)
1126                 return SVC_DROP; /* kmalloc failure - client must retry */
1127  
1128         /* Put NULL verifier */
1129         svc_putu32(resv, RPC_AUTH_NULL);
1130         svc_putu32(resv, 0);
1131  
1132 -       strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
1133 -       key.m_addr = rqstp->rq_addr.sin_addr;
1134 -
1135 -       ipm = ip_map_lookup(&key, 0);
1136 -
1137 -       rqstp->rq_client = NULL;
1138 -
1139 -       if (ipm)
1140 -               switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
1141 -               case -EAGAIN:
1142 -                       rv = SVC_DROP;
1143 -                       break;
1144 -               case -ENOENT:
1145 -                       rv = SVC_OK; /* rq_client is NULL */
1146 -                       break;
1147 -               case 0:
1148 -                       rqstp->rq_client = &ipm->m_client->h;
1149 -                       cache_get(&rqstp->rq_client->h);
1150 -                       ip_map_put(&ipm->h, &ip_map_cache);
1151 -                       rv = SVC_OK;
1152 -                       break;
1153 -               default: BUG();
1154 -               }
1155 -       else rv = SVC_DROP;
1156 -
1157 -       if (rqstp->rq_client == NULL && rqstp->rq_proc != 0)
1158 -               *authp = rpc_autherr_badcred;
1159 -
1160 -       return rv;
1161 +       return SVC_OK;
1162  }
1163  
1164  static int
1165 @@ -414,6 +421,7 @@
1166         .flavour        = RPC_AUTH_NULL,
1167         .accept         = svcauth_null_accept,
1168         .release        = svcauth_null_release,
1169 +       .set_client     = svcauth_unix_set_client,
1170  };
1171  
1172  
1173 @@ -425,8 +433,6 @@
1174         struct svc_cred *cred = &rqstp->rq_cred;
1175         u32             slen, i;
1176         int             len   = argv->iov_len;
1177 -       int             rv=0;
1178 -       struct ip_map key, *ipm;
1179  
1180         cred->cr_group_info = NULL;
1181         rqstp->rq_client = NULL;
1182 @@ -458,39 +464,11 @@
1183                 return SVC_DENIED;
1184         }
1185  
1186 -
1187 -       strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
1188 -       key.m_addr = rqstp->rq_addr.sin_addr;
1189 -
1190 -
1191 -       ipm = ip_map_lookup(&key, 0);
1192 -
1193 -       if (ipm)
1194 -               switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
1195 -               case -EAGAIN:
1196 -                       rv = SVC_DROP;
1197 -                       break;
1198 -               case -ENOENT:
1199 -                       rv = SVC_OK; /* rq_client is NULL */
1200 -                       break;
1201 -               case 0:
1202 -                       rqstp->rq_client = &ipm->m_client->h;
1203 -                       cache_get(&rqstp->rq_client->h);
1204 -                       ip_map_put(&ipm->h, &ip_map_cache);
1205 -                       rv = SVC_OK;
1206 -                       break;
1207 -               default: BUG();
1208 -               }
1209 -       else rv = SVC_DROP;
1210 -
1211 -       if (rv  == SVC_OK && rqstp->rq_client == NULL && rqstp->rq_proc != 0)
1212 -               goto badcred;
1213 -
1214         /* Put NULL verifier */
1215         svc_putu32(resv, RPC_AUTH_NULL);
1216         svc_putu32(resv, 0);
1217  
1218 -       return rv;
1219 +       return SVC_OK;
1220  
1221  badcred:
1222         *authp = rpc_autherr_badcred;
1223 @@ -520,5 +498,6 @@
1224         .accept         = svcauth_unix_accept,
1225         .release        = svcauth_unix_release,
1226         .domain_release = svcauth_unix_domain_release,
1227 +       .set_client     = svcauth_unix_set_client,
1228  };
1229  
1230 Index: linux-2.6.10/net/sunrpc/clnt.c
1231 ===================================================================
1232 --- linux-2.6.10.orig/net/sunrpc/clnt.c 2005-03-31 15:35:26.000000000 +0800
1233 +++ linux-2.6.10/net/sunrpc/clnt.c      2005-04-05 14:49:13.410690584 +0800
1234 @@ -636,8 +636,14 @@
1235                 rpc_exit(task, -EIO);
1236                 return;
1237         }
1238 -       if (encode && (status = rpcauth_wrap_req(task, encode, req, p,
1239 -                                                task->tk_msg.rpc_argp)) < 0) {
1240 +       if (encode == NULL)
1241 +               return;
1242 +
1243 +       status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp);
1244 +       if (status == -EAGAIN) {
1245 +               printk("XXXJBF: out of memory?  Should retry here!!!\n");
1246 +       }
1247 +       if (status < 0) {
1248                 printk(KERN_WARNING "%s: can't encode arguments: %d\n",
1249                                 clnt->cl_protname, -status);
1250                 rpc_exit(task, status);
1251 @@ -935,7 +941,7 @@
1252         task->tk_action = call_reserve;
1253         if (status >= 0 && rpcauth_uptodatecred(task))
1254                 return;
1255 -       if (rpcauth_deadcred(task)) {
1256 +       if (status == -EACCES) {
1257                 rpc_exit(task, -EACCES);
1258                 return;
1259         }
1260 @@ -993,7 +999,7 @@
1261                         goto garbage;
1262                 if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) {
1263                         printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n);
1264 -               } else if (--len < 0)
1265 +               } else if (--len == 0)
1266                 switch ((n = ntohl(*p++))) {
1267                 case RPC_AUTH_REJECTEDCRED:
1268                 case RPC_AUTH_REJECTEDVERF:
1269 Index: linux-2.6.10/net/sunrpc/svcauth.c
1270 ===================================================================
1271 --- linux-2.6.10.orig/net/sunrpc/svcauth.c      2004-12-25 05:35:23.000000000 +0800
1272 +++ linux-2.6.10/net/sunrpc/svcauth.c   2005-04-05 14:49:13.392693320 +0800
1273 @@ -59,6 +59,11 @@
1274         return aops->accept(rqstp, authp);
1275  }
1276  
1277 +int svc_set_client(struct svc_rqst *rqstp)
1278 +{
1279 +       return rqstp->rq_authop->set_client(rqstp);
1280 +}
1281 +
1282  /* A request, which was authenticated, has now executed.
1283   * Time to finalise the the credentials and verifier
1284   * and release and resources
1285 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_unseal.c
1286 ===================================================================
1287 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_unseal.c     2004-12-25 05:35:24.000000000 +0800
1288 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_unseal.c  2005-04-05 14:49:13.401691952 +0800
1289 @@ -68,20 +68,13 @@
1290  #endif
1291  
1292  
1293 -/* message_buffer is an input if toktype is MIC and an output if it is WRAP:
1294 - * If toktype is MIC: read_token is a mic token, and message_buffer is the
1295 - *   data that the mic was supposedly taken over.
1296 - * If toktype is WRAP: read_token is a wrap token, and message_buffer is used
1297 - *   to return the decrypted data.
1298 - */
1299 +/* read_token is a mic token, and message_buffer is the data that the mic was
1300 + * supposedly taken over. */
1301  
1302 -/* XXX will need to change prototype and/or just split into a separate function
1303 - * when we add privacy (because read_token will be in pages too). */
1304  u32
1305  krb5_read_token(struct krb5_ctx *ctx,
1306                 struct xdr_netobj *read_token,
1307 -               struct xdr_buf *message_buffer,
1308 -               int *qop_state, int toktype)
1309 +               struct xdr_buf *message_buffer, int *qop_state)
1310  {
1311         int                     signalg;
1312         int                     sealalg;
1313 @@ -100,16 +93,12 @@
1314                                         read_token->len))
1315                 goto out;
1316  
1317 -       if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff)))
1318 +       if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
1319 +           (*ptr++ != ( KG_TOK_MIC_MSG    &0xff))   )
1320                 goto out;
1321  
1322         /* XXX sanity-check bodysize?? */
1323  
1324 -       if (toktype == KG_TOK_WRAP_MSG) {
1325 -               /* XXX gone */
1326 -               goto out;
1327 -       }
1328 -
1329         /* get the sign and seal algorithms */
1330  
1331         signalg = ptr[0] + (ptr[1] << 8);
1332 @@ -120,14 +109,7 @@
1333         if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
1334                 goto out;
1335  
1336 -       if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) ||
1337 -           ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff)))
1338 -               goto out;
1339 -
1340 -       /* in the current spec, there is only one valid seal algorithm per
1341 -          key type, so a simple comparison is ok */
1342 -
1343 -       if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg))
1344 +       if (sealalg != 0xffff)
1345                 goto out;
1346  
1347         /* there are several mappings of seal algorithms to sign algorithms,
1348 @@ -154,7 +136,7 @@
1349         switch (signalg) {
1350         case SGN_ALG_DES_MAC_MD5:
1351                 ret = make_checksum(checksum_type, ptr - 2, 8,
1352 -                                        message_buffer, &md5cksum);
1353 +                                        message_buffer, 0, &md5cksum);
1354                 if (ret)
1355                         goto out;
1356  
1357 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_mech_switch.c
1358 ===================================================================
1359 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_mech_switch.c     2004-12-25 05:35:01.000000000 +0800
1360 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_mech_switch.c  2005-04-05 14:49:13.408690888 +0800
1361 @@ -279,6 +279,29 @@
1362                                  qstate);
1363  }
1364  
1365 +u32
1366 +gss_wrap(struct gss_ctx        *ctx_id,
1367 +        u32            qop,
1368 +        int            offset,
1369 +        struct xdr_buf *buf,
1370 +        struct page    **inpages)
1371 +{
1372 +       return ctx_id->mech_type->gm_ops
1373 +               ->gss_wrap(ctx_id, qop, offset, buf, inpages);
1374 +}
1375 +
1376 +u32
1377 +gss_unwrap(struct gss_ctx      *ctx_id,
1378 +          u32                  *qop,
1379 +          int                  offset,
1380 +          struct xdr_buf       *buf,
1381 +          int                  *out_offset)
1382 +{
1383 +       return ctx_id->mech_type->gm_ops
1384 +               ->gss_unwrap(ctx_id, qop, offset, buf, out_offset);
1385 +}
1386 +
1387 +
1388  /* gss_delete_sec_context: free all resources associated with context_handle.
1389   * Note this differs from the RFC 2744-specified prototype in that we don't
1390   * bother returning an output token, since it would never be used anyway. */
1391 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_wrap.c
1392 ===================================================================
1393 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_wrap.c       2005-04-05 19:01:49.158500672 +0800
1394 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_wrap.c    2005-04-05 14:49:13.397692560 +0800
1395 @@ -0,0 +1,337 @@
1396 +#include <linux/types.h>
1397 +#include <linux/slab.h>
1398 +#include <linux/jiffies.h>
1399 +#include <linux/sunrpc/gss_krb5.h>
1400 +#include <linux/random.h>
1401 +#include <linux/pagemap.h>
1402 +#include <asm/scatterlist.h>
1403 +#include <linux/crypto.h>
1404 +
1405 +#ifdef RPC_DEBUG
1406 +# define RPCDBG_FACILITY       RPCDBG_AUTH
1407 +#endif
1408 +
1409 +static inline int
1410 +gss_krb5_padding(int blocksize, int length)
1411 +{
1412 +       /* Most of the code is block-size independent but currently we
1413 +        * use only 8: */
1414 +       BUG_ON(blocksize != 8);
1415 +       return 8 - (length & 7);
1416 +}
1417 +
1418 +static inline void
1419 +gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
1420 +{
1421 +       int padding = gss_krb5_padding(blocksize, buf->len - offset);
1422 +       char *p;
1423 +       struct kvec *iov;
1424 +
1425 +       if (buf->page_len || buf->tail[0].iov_len)
1426 +               iov = &buf->tail[0];
1427 +       else
1428 +               iov = &buf->head[0];
1429 +       p = iov->iov_base + iov->iov_len;
1430 +       iov->iov_len += padding;
1431 +       buf->len += padding;
1432 +       memset(p, padding, padding);
1433 +}
1434 +
1435 +static inline int
1436 +gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
1437 +{
1438 +       u8 *ptr;
1439 +       u8 pad;
1440 +       int len = buf->len;
1441 +
1442 +       if (len <= buf->head[0].iov_len) {
1443 +               pad = *(u8 *)(buf->head[0].iov_base + len - 1);
1444 +               goto out;
1445 +       } else
1446 +               len -= buf->head[0].iov_len;
1447 +       if (len <= buf->page_len) {
1448 +               int last = (buf->page_base + len - 1)
1449 +                                       >>PAGE_CACHE_SHIFT;
1450 +               int offset = (buf->page_base + len - 1)
1451 +                                       & (PAGE_CACHE_SIZE - 1);
1452 +               ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA);
1453 +               pad = *(ptr + offset);
1454 +               kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA);
1455 +               goto out;
1456 +       } else
1457 +               len -= buf->page_len;
1458 +       BUG_ON(len > buf->tail[0].iov_len);
1459 +       pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
1460 +out:
1461 +       if (pad > blocksize)
1462 +               return -EINVAL;
1463 +       buf->len -= pad;
1464 +       return 0;
1465 +}
1466 +
1467 +static inline void
1468 +make_confounder(char *p, int blocksize)
1469 +{
1470 +       /* XXX?  Is this OK to do on every packet? */
1471 +       get_random_bytes(p, blocksize);
1472 +}
1473 +
1474 +/* Assumptions: the head and tail of inbuf are ours to play with.
1475 + * The pages, however, may be real pages in the page cache and we replace
1476 + * them with scratch pages from **pages before writing to them. */
1477 +/* XXX: obviously the above should be documentation of wrap interface,
1478 + * and shouldn't be in this kerberos-specific file. */
1479 +
1480 +/* XXX factor out common code with seal/unseal. */
1481 +
1482 +u32
1483 +gss_wrap_kerberos(struct gss_ctx *ctx, u32 qop, int offset,
1484 +               struct xdr_buf *buf, struct page **pages)
1485 +{
1486 +       struct krb5_ctx         *kctx = ctx->internal_ctx_id;
1487 +       s32                     checksum_type;
1488 +       struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
1489 +       int                     blocksize = 0, plainlen;
1490 +       unsigned char           *ptr, *krb5_hdr, *msg_start;
1491 +       s32                     now;
1492 +       int                     headlen;
1493 +       struct page             **tmp_pages;
1494 +       u32                     seq_send;
1495 +
1496 +       dprintk("RPC:     gss_wrap_kerberos\n");
1497 +
1498 +       now = get_seconds();
1499 +
1500 +       if (qop != 0)
1501 +               goto out_err;
1502 +
1503 +       switch (kctx->signalg) {
1504 +               case SGN_ALG_DES_MAC_MD5:
1505 +                       checksum_type = CKSUMTYPE_RSA_MD5;
1506 +                       break;
1507 +               default:
1508 +                       dprintk("RPC:      gss_krb5_seal: kctx->signalg %d not"
1509 +                               " supported\n", kctx->signalg);
1510 +                       goto out_err;
1511 +       }
1512 +       if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) {
1513 +               dprintk("RPC:      gss_krb5_seal: kctx->sealalg %d not supported\n",
1514 +                       kctx->sealalg);
1515 +               goto out_err;
1516 +       }
1517 +
1518 +       blocksize = crypto_tfm_alg_blocksize(kctx->enc);
1519 +       gss_krb5_add_padding(buf, offset, blocksize);
1520 +       BUG_ON((buf->len - offset) % blocksize);
1521 +       plainlen = blocksize + buf->len - offset;
1522 +
1523 +       headlen = g_token_size(&kctx->mech_used, 22 + plainlen) -
1524 +                                               (buf->len - offset);
1525 +
1526 +       ptr = buf->head[0].iov_base + offset;
1527 +       /* shift data to make room for header. */
1528 +       /* XXX Would be cleverer to encrypt while copying. */
1529 +       /* XXX bounds checking, slack, etc. */
1530 +       memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
1531 +       buf->head[0].iov_len += headlen;
1532 +       buf->len += headlen;
1533 +       BUG_ON((buf->len - offset - headlen) % blocksize);
1534 +
1535 +       g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr);
1536 +
1537 +
1538 +       *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff);
1539 +       *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff);
1540 +
1541 +       /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
1542 +       krb5_hdr = ptr - 2;
1543 +       msg_start = krb5_hdr + 24;
1544 +       /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
1545 +
1546 +       *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg);
1547 +       memset(krb5_hdr + 4, 0xff, 4);
1548 +       *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
1549 +
1550 +       make_confounder(msg_start, blocksize);
1551 +
1552 +       /* XXXJBF: UGH!: */
1553 +       tmp_pages = buf->pages;
1554 +       buf->pages = pages;
1555 +       if (make_checksum(checksum_type, krb5_hdr, 8, buf,
1556 +                               offset + headlen - blocksize, &md5cksum))
1557 +               goto out_err;
1558 +       buf->pages = tmp_pages;
1559 +
1560 +       switch (kctx->signalg) {
1561 +       case SGN_ALG_DES_MAC_MD5:
1562 +               if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
1563 +                                 md5cksum.data, md5cksum.len))
1564 +                       goto out_err;
1565 +               memcpy(krb5_hdr + 16,
1566 +                      md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
1567 +                      KRB5_CKSUM_LENGTH);
1568 +
1569 +               dprintk("RPC:      make_seal_token: cksum data: \n");
1570 +               print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
1571 +               break;
1572 +       default:
1573 +               BUG();
1574 +       }
1575 +
1576 +       kfree(md5cksum.data);
1577 +
1578 +       spin_lock(&krb5_seq_lock);
1579 +       seq_send = kctx->seq_send++;
1580 +       spin_unlock(&krb5_seq_lock);
1581 +
1582 +       /* XXX would probably be more efficient to compute checksum
1583 +        * and encrypt at the same time: */
1584 +       if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
1585 +                              seq_send, krb5_hdr + 16, krb5_hdr + 8)))
1586 +               goto out_err;
1587 +
1588 +       if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
1589 +                                                                       pages))
1590 +               goto out_err;
1591 +
1592 +       return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
1593 +out_err:
1594 +       if (md5cksum.data) kfree(md5cksum.data);
1595 +       return GSS_S_FAILURE;
1596 +}
1597 +
1598 +u32
1599 +gss_unwrap_kerberos(struct gss_ctx *ctx, u32 *qop, int offset,
1600 +                       struct xdr_buf *buf, int *out_offset)
1601 +{
1602 +       struct krb5_ctx         *kctx = ctx->internal_ctx_id;
1603 +       int                     signalg;
1604 +       int                     sealalg;
1605 +       s32                     checksum_type;
1606 +       struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
1607 +       s32                     now;
1608 +       int                     direction;
1609 +       s32                     seqnum;
1610 +       unsigned char           *ptr;
1611 +       int                     bodysize;
1612 +       u32                     ret = GSS_S_DEFECTIVE_TOKEN;
1613 +       u8                      *data_start;
1614 +       int                     blocksize;
1615 +
1616 +       dprintk("RPC:      gss_unwrap_kerberos\n");
1617 +
1618 +       ptr = (u8 *)buf->head[0].iov_base + offset;
1619 +       if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
1620 +                                       buf->len - offset))
1621 +               goto out;
1622 +
1623 +       if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
1624 +           (*ptr++ !=  (KG_TOK_WRAP_MSG    &0xff))   )
1625 +               goto out;
1626 +
1627 +       /* XXX sanity-check bodysize?? */
1628 +
1629 +       /* get the sign and seal algorithms */
1630 +
1631 +       signalg = ptr[0] + (ptr[1] << 8);
1632 +       sealalg = ptr[2] + (ptr[3] << 8);
1633 +
1634 +       /* Sanity checks */
1635 +
1636 +       if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
1637 +               goto out;
1638 +
1639 +       if (sealalg == 0xffff)
1640 +               goto out;
1641 +
1642 +       /* in the current spec, there is only one valid seal algorithm per
1643 +          key type, so a simple comparison is ok */
1644 +
1645 +       if (sealalg != kctx->sealalg)
1646 +               goto out;
1647 +
1648 +       /* there are several mappings of seal algorithms to sign algorithms,
1649 +          but few enough that we can try them all. */
1650 +
1651 +       if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
1652 +           (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
1653 +           (kctx->sealalg == SEAL_ALG_DES3KD &&
1654 +            signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
1655 +               goto out;
1656 +
1657 +       if (gss_decrypt_xdr_buf(kctx->enc, buf,
1658 +                       ptr + 22 - (unsigned char *)buf->head[0].iov_base))
1659 +               goto out;
1660 +
1661 +       /* compute the checksum of the message */
1662 +
1663 +       /* initialize the the cksum */
1664 +       switch (signalg) {
1665 +       case SGN_ALG_DES_MAC_MD5:
1666 +               checksum_type = CKSUMTYPE_RSA_MD5;
1667 +               break;
1668 +       default:
1669 +               ret = GSS_S_DEFECTIVE_TOKEN;
1670 +               goto out;
1671 +       }
1672 +
1673 +       switch (signalg) {
1674 +       case SGN_ALG_DES_MAC_MD5:
1675 +               ret = make_checksum(checksum_type, ptr - 2, 8, buf,
1676 +                        ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum);
1677 +               if (ret)
1678 +                       goto out;
1679 +
1680 +               ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data,
1681 +                                  md5cksum.data, md5cksum.len);
1682 +               if (ret)
1683 +                       goto out;
1684 +
1685 +               if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
1686 +                       ret = GSS_S_BAD_SIG;
1687 +                       goto out;
1688 +               }
1689 +               break;
1690 +       default:
1691 +               ret = GSS_S_DEFECTIVE_TOKEN;
1692 +               goto out;
1693 +       }
1694 +
1695 +       /* it got through unscathed.  Make sure the context is unexpired */
1696 +
1697 +       if (qop)
1698 +               *qop = GSS_C_QOP_DEFAULT;
1699 +
1700 +       now = get_seconds();
1701 +
1702 +       ret = GSS_S_CONTEXT_EXPIRED;
1703 +       if (now > kctx->endtime)
1704 +               goto out;
1705 +
1706 +       /* do sequencing checks */
1707 +
1708 +       ret = GSS_S_BAD_SIG;
1709 +       if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
1710 +                                   &seqnum)))
1711 +               goto out;
1712 +
1713 +       if ((kctx->initiate && direction != 0xff) ||
1714 +           (!kctx->initiate && direction != 0))
1715 +               goto out;
1716 +
1717 +       /* Copy the data back to the right position.  XXX: Would probably be
1718 +        * better to copy and encrypt at the same time. */
1719 +
1720 +       blocksize = crypto_tfm_alg_blocksize(kctx->enc);
1721 +       data_start = ptr + 22 + blocksize;
1722 +       *out_offset = data_start - (u8 *)buf->head[0].iov_base;
1723 +
1724 +       ret = GSS_S_DEFECTIVE_TOKEN;
1725 +       if (gss_krb5_remove_padding(buf, blocksize))
1726 +               goto out;
1727 +
1728 +       ret = GSS_S_COMPLETE;
1729 +out:
1730 +       if (md5cksum.data) kfree(md5cksum.data);
1731 +       return ret;
1732 +}
1733 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_crypto.c
1734 ===================================================================
1735 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_crypto.c     2004-12-25 05:33:50.000000000 +0800
1736 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_crypto.c  2005-04-05 14:49:13.398692408 +0800
1737 @@ -139,17 +139,91 @@
1738         sg->length = len;
1739  }
1740  
1741 +static int
1742 +process_xdr_buf(struct xdr_buf *buf, int offset, int len,
1743 +               int (*actor)(struct scatterlist *, void *), void *data)
1744 +{
1745 +       int i, page_len, thislen, page_offset, ret = 0;
1746 +       struct scatterlist      sg[1];
1747 +
1748 +       if (offset >= buf->head[0].iov_len) {
1749 +               offset -= buf->head[0].iov_len;
1750 +       } else {
1751 +               thislen = buf->head[0].iov_len - offset;
1752 +               if (thislen > len)
1753 +                       thislen = len;
1754 +               buf_to_sg(sg, buf->head[0].iov_base + offset, thislen);
1755 +               ret = actor(sg, data);
1756 +               if (ret)
1757 +                       goto out;
1758 +               offset = 0;
1759 +               len -= thislen;
1760 +       }
1761 +       if (len == 0)
1762 +               goto out;
1763 +
1764 +       if (offset >= buf->page_len) {
1765 +               offset -= buf->page_len;
1766 +       } else {
1767 +               page_len = buf->page_len - offset;
1768 +               if (page_len > len)
1769 +                       page_len = len;
1770 +               len -= page_len;
1771 +               page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
1772 +               i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
1773 +               thislen = PAGE_CACHE_SIZE - page_offset;
1774 +               do {
1775 +                       if (thislen > page_len)
1776 +                               thislen = page_len;
1777 +                       sg->page = buf->pages[i];
1778 +                       sg->offset = page_offset;
1779 +                       sg->length = thislen;
1780 +                       ret = actor(sg, data);
1781 +                       if (ret)
1782 +                               goto out;
1783 +                       page_len -= thislen;
1784 +                       i++;
1785 +                       page_offset = 0;
1786 +                       thislen = PAGE_CACHE_SIZE;
1787 +               } while (page_len != 0);
1788 +               offset = 0;
1789 +       }
1790 +       if (len == 0)
1791 +               goto out;
1792 +
1793 +       if (offset < buf->tail[0].iov_len) {
1794 +               thislen = buf->tail[0].iov_len - offset;
1795 +               if (thislen > len)
1796 +                       thislen = len;
1797 +               buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen);
1798 +               ret = actor(sg, data);
1799 +               len -= thislen;
1800 +       }
1801 +       if (len != 0)
1802 +               ret = -EINVAL;
1803 +out:
1804 +       return ret;
1805 +}
1806 +
1807 +static int
1808 +checksummer(struct scatterlist *sg, void *data)
1809 +{
1810 +       struct crypto_tfm *tfm = (struct crypto_tfm *)data;
1811 +
1812 +       crypto_digest_update(tfm, sg, 1);
1813 +
1814 +       return 0;
1815 +}
1816 +
1817  /* checksum the plaintext data and hdrlen bytes of the token header */
1818  s32
1819  make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
1820 -                  struct xdr_netobj *cksum)
1821 +                  int body_offset, struct xdr_netobj *cksum)
1822  {
1823         char                            *cksumname;
1824         struct crypto_tfm               *tfm = NULL; /* XXX add to ctx? */
1825         struct scatterlist              sg[1];
1826         u32                             code = GSS_S_FAILURE;
1827 -       int                             len, thislen, offset;
1828 -       int                             i;
1829  
1830         switch (cksumtype) {
1831                 case CKSUMTYPE_RSA_MD5:
1832 @@ -169,35 +243,8 @@
1833         crypto_digest_init(tfm);
1834         buf_to_sg(sg, header, hdrlen);
1835         crypto_digest_update(tfm, sg, 1);
1836 -       if (body->head[0].iov_len) {
1837 -               buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len);
1838 -               crypto_digest_update(tfm, sg, 1);
1839 -       }
1840 -
1841 -       len = body->page_len;
1842 -       if (len != 0) {
1843 -               offset = body->page_base & (PAGE_CACHE_SIZE - 1);
1844 -               i = body->page_base >> PAGE_CACHE_SHIFT;
1845 -               thislen = PAGE_CACHE_SIZE - offset;
1846 -               do {
1847 -                       if (thislen > len)
1848 -                               thislen = len;
1849 -                       sg->page = body->pages[i];
1850 -                       sg->offset = offset;
1851 -                       sg->length = thislen;
1852 -                       kmap(sg->page); /* XXX kmap_atomic? */
1853 -                       crypto_digest_update(tfm, sg, 1);
1854 -                       kunmap(sg->page);
1855 -                       len -= thislen;
1856 -                       i++;
1857 -                       offset = 0;
1858 -                       thislen = PAGE_CACHE_SIZE;
1859 -               } while(len != 0);
1860 -       }
1861 -       if (body->tail[0].iov_len) {
1862 -               buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len);
1863 -               crypto_digest_update(tfm, sg, 1);
1864 -       }
1865 +       process_xdr_buf(body, body_offset, body->len - body_offset,
1866 +                       checksummer, tfm);
1867         crypto_digest_final(tfm, cksum->data);
1868         code = 0;
1869  out:
1870 @@ -207,3 +254,154 @@
1871  }
1872  
1873  EXPORT_SYMBOL(make_checksum);
1874 +
1875 +struct encryptor_desc {
1876 +       u8 iv[8]; /* XXX hard-coded blocksize */
1877 +       struct crypto_tfm *tfm;
1878 +       int pos;
1879 +       struct xdr_buf *outbuf;
1880 +       struct page **pages;
1881 +       struct scatterlist infrags[4];
1882 +       struct scatterlist outfrags[4];
1883 +       int fragno;
1884 +       int fraglen;
1885 +};
1886 +
1887 +static int
1888 +encryptor(struct scatterlist *sg, void *data)
1889 +{
1890 +       struct encryptor_desc *desc = data;
1891 +       struct xdr_buf *outbuf = desc->outbuf;
1892 +       struct page *in_page;
1893 +       int thislen = desc->fraglen + sg->length;
1894 +       int fraglen, ret;
1895 +       int page_pos;
1896 +
1897 +       /* Worst case is 4 fragments: head, end of page 1, start
1898 +        * of page 2, tail.  Anything more is a bug. */
1899 +       BUG_ON(desc->fragno > 3);
1900 +       desc->infrags[desc->fragno] = *sg;
1901 +       desc->outfrags[desc->fragno] = *sg;
1902 +
1903 +       page_pos = desc->pos - outbuf->head[0].iov_len;
1904 +       if (page_pos >= 0 && page_pos < outbuf->page_len) {
1905 +               /* pages are not in place: */
1906 +               int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
1907 +               in_page = desc->pages[i];
1908 +       } else {
1909 +               in_page = sg->page;
1910 +       }
1911 +       desc->infrags[desc->fragno].page = in_page;
1912 +       desc->fragno++;
1913 +       desc->fraglen += sg->length;
1914 +       desc->pos += sg->length;
1915 +
1916 +       fraglen = thislen & 7; /* XXX hardcoded blocksize */
1917 +       thislen -= fraglen;
1918 +
1919 +       if (thislen == 0)
1920 +               return 0;
1921 +
1922 +       ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags,
1923 +                                       thislen, desc->iv);
1924 +       if (ret)
1925 +               return ret;
1926 +       if (fraglen) {
1927 +               desc->outfrags[0].page = sg->page;
1928 +               desc->outfrags[0].offset = sg->offset + sg->length - fraglen;
1929 +               desc->outfrags[0].length = fraglen;
1930 +               desc->infrags[0] = desc->outfrags[0];
1931 +               desc->infrags[0].page = in_page;
1932 +               desc->fragno = 1;
1933 +               desc->fraglen = fraglen;
1934 +       } else {
1935 +               desc->fragno = 0;
1936 +               desc->fraglen = 0;
1937 +       }
1938 +       return 0;
1939 +}
1940 +
1941 +int
1942 +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset,
1943 +               struct page **pages)
1944 +{
1945 +       int ret;
1946 +       struct encryptor_desc desc;
1947 +
1948 +       BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
1949 +
1950 +       memset(desc.iv, 0, sizeof(desc.iv));
1951 +       desc.tfm = tfm;
1952 +       desc.pos = offset;
1953 +       desc.outbuf = buf;
1954 +       desc.pages = pages;
1955 +       desc.fragno = 0;
1956 +       desc.fraglen = 0;
1957 +
1958 +       ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc);
1959 +       return ret;
1960 +}
1961 +
1962 +EXPORT_SYMBOL(gss_encrypt_xdr_buf);
1963 +
1964 +struct decryptor_desc {
1965 +       u8 iv[8]; /* XXX hard-coded blocksize */
1966 +       struct crypto_tfm *tfm;
1967 +       struct scatterlist frags[4];
1968 +       int fragno;
1969 +       int fraglen;
1970 +};
1971 +
1972 +static int
1973 +decryptor(struct scatterlist *sg, void *data)
1974 +{
1975 +       struct decryptor_desc *desc = data;
1976 +       int thislen = desc->fraglen + sg->length;
1977 +       int fraglen, ret;
1978 +
1979 +       /* Worst case is 4 fragments: head, end of page 1, start
1980 +        * of page 2, tail.  Anything more is a bug. */
1981 +       BUG_ON(desc->fragno > 3);
1982 +       desc->frags[desc->fragno] = *sg;
1983 +       desc->fragno++;
1984 +       desc->fraglen += sg->length;
1985 +
1986 +       fraglen = thislen & 7; /* XXX hardcoded blocksize */
1987 +       thislen -= fraglen;
1988 +
1989 +       if (thislen == 0)
1990 +               return 0;
1991 +
1992 +       ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags,
1993 +                                       thislen, desc->iv);
1994 +       if (ret)
1995 +               return ret;
1996 +       if (fraglen) {
1997 +               desc->frags[0].page = sg->page;
1998 +               desc->frags[0].offset = sg->offset + sg->length - fraglen;
1999 +               desc->frags[0].length = fraglen;
2000 +               desc->fragno = 1;
2001 +               desc->fraglen = fraglen;
2002 +       } else {
2003 +               desc->fragno = 0;
2004 +               desc->fraglen = 0;
2005 +       }
2006 +       return 0;
2007 +}
2008 +
2009 +int
2010 +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset)
2011 +{
2012 +       struct decryptor_desc desc;
2013 +
2014 +       /* XXXJBF: */
2015 +       BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
2016 +
2017 +       memset(desc.iv, 0, sizeof(desc.iv));
2018 +       desc.tfm = tfm;
2019 +       desc.fragno = 0;
2020 +       desc.fraglen = 0;
2021 +       return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
2022 +}
2023 +
2024 +EXPORT_SYMBOL(gss_decrypt_xdr_buf);
2025 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_seal.c
2026 ===================================================================
2027 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_seal.c       2004-12-25 05:33:47.000000000 +0800
2028 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_seal.c    2005-04-05 14:49:13.402691800 +0800
2029 @@ -70,24 +70,17 @@
2030  # define RPCDBG_FACILITY        RPCDBG_AUTH
2031  #endif
2032  
2033 -static inline int
2034 -gss_krb5_padding(int blocksize, int length) {
2035 -       /* Most of the code is block-size independent but in practice we
2036 -        * use only 8: */
2037 -       BUG_ON(blocksize != 8);
2038 -       return 8 - (length & 7);
2039 -}
2040 +spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
2041  
2042  u32
2043  krb5_make_token(struct krb5_ctx *ctx, int qop_req,
2044 -                  struct xdr_buf *text, struct xdr_netobj *token,
2045 -                  int toktype)
2046 +                  struct xdr_buf *text, struct xdr_netobj *token)
2047  {
2048         s32                     checksum_type;
2049         struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
2050 -       int                     blocksize = 0, tmsglen;
2051         unsigned char           *ptr, *krb5_hdr, *msg_start;
2052         s32                     now;
2053 +       u32                     seq_send;
2054  
2055         dprintk("RPC:     gss_krb5_seal\n");
2056  
2057 @@ -111,21 +104,13 @@
2058                 goto out_err;
2059         }
2060  
2061 -       if (toktype == KG_TOK_WRAP_MSG) {
2062 -               blocksize = crypto_tfm_alg_blocksize(ctx->enc);
2063 -               tmsglen = blocksize + text->len
2064 -                       + gss_krb5_padding(blocksize, blocksize + text->len);
2065 -       } else {
2066 -               tmsglen = 0;
2067 -       }
2068 -
2069 -       token->len = g_token_size(&ctx->mech_used, 22 + tmsglen);
2070 +       token->len = g_token_size(&ctx->mech_used, 22);
2071  
2072         ptr = token->data;
2073 -       g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr);
2074 +       g_make_token_header(&ctx->mech_used, 22, &ptr);
2075  
2076 -       *ptr++ = (unsigned char) ((toktype>>8)&0xff);
2077 -       *ptr++ = (unsigned char) (toktype&0xff);
2078 +       *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff);
2079 +       *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff);
2080  
2081         /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
2082         krb5_hdr = ptr - 2;
2083 @@ -133,17 +118,9 @@
2084  
2085         *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
2086         memset(krb5_hdr + 4, 0xff, 4);
2087 -       if (toktype == KG_TOK_WRAP_MSG)
2088 -               *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg);
2089  
2090 -       if (toktype == KG_TOK_WRAP_MSG) {
2091 -               /* XXX removing support for now */
2092 -               goto out_err;
2093 -       } else { /* Sign only.  */
2094 -               if (make_checksum(checksum_type, krb5_hdr, 8, text,
2095 -                                      &md5cksum))
2096 +       if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum))
2097                         goto out_err;
2098 -       }
2099  
2100         switch (ctx->signalg) {
2101         case SGN_ALG_DES_MAC_MD5:
2102 @@ -163,12 +140,14 @@
2103  
2104         kfree(md5cksum.data);
2105  
2106 +       spin_lock(&krb5_seq_lock);
2107 +       seq_send = ctx->seq_send++;
2108 +       spin_unlock(&krb5_seq_lock);
2109 +
2110         if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
2111 -                              ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
2112 +                              seq_send, krb5_hdr + 16, krb5_hdr + 8)))
2113                 goto out_err;
2114  
2115 -       ctx->seq_send++;
2116 -
2117         return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
2118  out_err:
2119         if (md5cksum.data) kfree(md5cksum.data);
2120 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_pseudoflavors.c
2121 ===================================================================
2122 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_pseudoflavors.c   2004-12-25 05:34:45.000000000 +0800
2123 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_pseudoflavors.c        2005-04-05 19:01:49.158500672 +0800
2124 @@ -1,237 +0,0 @@
2125 -/*
2126 - *  linux/net/sunrpc/gss_union.c
2127 - *
2128 - *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic code
2129 - *
2130 - *  Copyright (c) 2001 The Regents of the University of Michigan.
2131 - *  All rights reserved.
2132 - *
2133 - *  Andy Adamson   <andros@umich.edu>
2134 - *
2135 - */
2136 -
2137 -/*
2138 - * Copyright 1993 by OpenVision Technologies, Inc.
2139 - *
2140 - * Permission to use, copy, modify, distribute, and sell this software
2141 - * and its documentation for any purpose is hereby granted without fee,
2142 - * provided that the above copyright notice appears in all copies and
2143 - * that both that copyright notice and this permission notice appear in
2144 - * supporting documentation, and that the name of OpenVision not be used
2145 - * in advertising or publicity pertaining to distribution of the software
2146 - * without specific, written prior permission. OpenVision makes no
2147 - * representations about the suitability of this software for any
2148 - * purpose.  It is provided "as is" without express or implied warranty.
2149 - *
2150 - * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
2151 - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
2152 - * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
2153 - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
2154 - * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
2155 - * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
2156 - * PERFORMANCE OF THIS SOFTWARE.
2157 - */ 
2158 -
2159 -#include <linux/types.h>
2160 -#include <linux/slab.h>
2161 -#include <linux/socket.h>
2162 -#include <linux/sunrpc/gss_asn1.h>
2163 -#include <linux/sunrpc/auth_gss.h>
2164 -
2165 -#ifdef RPC_DEBUG
2166 -# define RPCDBG_FACILITY        RPCDBG_AUTH
2167 -#endif
2168 -
2169 -static LIST_HEAD(registered_triples);
2170 -static spinlock_t registered_triples_lock = SPIN_LOCK_UNLOCKED;
2171 -
2172 -/* The following must be called with spinlock held: */
2173 -static struct sup_sec_triple *
2174 -do_lookup_triple_by_pseudoflavor(u32 pseudoflavor)
2175 -{
2176 -       struct sup_sec_triple *pos, *triple = NULL;
2177 -
2178 -       list_for_each_entry(pos, &registered_triples, triples) {
2179 -               if (pos->pseudoflavor == pseudoflavor) {
2180 -                       triple = pos;
2181 -                       break;
2182 -               }
2183 -       }
2184 -       return triple;
2185 -}
2186 -
2187 -/* XXX Need to think about reference counting of triples and of mechs.
2188 - * Currently we do no reference counting of triples, and I think that's
2189 - * probably OK given the reference counting on mechs, but there's probably
2190 - * a better way to do all this. */
2191 -
2192 -int
2193 -gss_register_triple(u32 pseudoflavor, struct gss_api_mech *mech,
2194 -                         u32 qop, u32 service)
2195 -{
2196 -       struct sup_sec_triple *triple;
2197 -
2198 -       if (!(triple = kmalloc(sizeof(*triple), GFP_KERNEL))) {
2199 -               printk("Alloc failed in gss_register_triple");
2200 -               goto err;
2201 -       }
2202 -       triple->pseudoflavor = pseudoflavor;
2203 -       triple->mech = gss_mech_get_by_OID(&mech->gm_oid);
2204 -       triple->qop = qop;
2205 -       triple->service = service;
2206 -
2207 -       spin_lock(&registered_triples_lock);
2208 -       if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) {
2209 -               printk(KERN_WARNING "RPC: Registered pseudoflavor %d again\n",
2210 -                               pseudoflavor);
2211 -               goto err_unlock;
2212 -       }
2213 -       list_add(&triple->triples, &registered_triples);
2214 -       spin_unlock(&registered_triples_lock);
2215 -       dprintk("RPC:      registered pseudoflavor %d\n", pseudoflavor);
2216 -
2217 -       return 0;
2218 -
2219 -err_unlock:
2220 -       kfree(triple);
2221 -       spin_unlock(&registered_triples_lock);
2222 -err:
2223 -       return -1;
2224 -}
2225 -
2226 -int
2227 -gss_unregister_triple(u32 pseudoflavor)
2228 -{
2229 -       struct sup_sec_triple *triple;
2230 -
2231 -       spin_lock(&registered_triples_lock);
2232 -       if (!(triple = do_lookup_triple_by_pseudoflavor(pseudoflavor))) {
2233 -               spin_unlock(&registered_triples_lock);
2234 -               printk("Can't unregister unregistered pseudoflavor %d\n",
2235 -                      pseudoflavor);
2236 -               return -1;
2237 -       }
2238 -       list_del(&triple->triples);
2239 -       spin_unlock(&registered_triples_lock);
2240 -       gss_mech_put(triple->mech);
2241 -       kfree(triple);
2242 -       return 0;
2243 -
2244 -}
2245 -
2246 -void
2247 -print_sec_triple(struct xdr_netobj *oid,u32 qop,u32 service)
2248 -{
2249 -       dprintk("RPC: print_sec_triple:\n");
2250 -       dprintk("                     oid_len %d\n  oid :\n",oid->len);
2251 -       print_hexl((u32 *)oid->data,oid->len,0);
2252 -       dprintk("                     qop %d\n",qop);
2253 -       dprintk("                     service %d\n",service);
2254 -}
2255 -
2256 -/* Function: gss_get_cmp_triples
2257 - *
2258 - * Description: search sec_triples for a matching security triple
2259 - * return pseudoflavor if match, else 0
2260 - * (Note that 0 is a valid pseudoflavor, but not for any gss pseudoflavor
2261 - * (0 means auth_null), so this shouldn't cause confusion.)
2262 - */
2263 -u32
2264 -gss_cmp_triples(u32 oid_len, char *oid_data, u32 qop, u32 service)
2265 -{
2266 -       struct sup_sec_triple *triple;
2267 -       u32 pseudoflavor = 0;
2268 -       struct xdr_netobj oid;
2269 -
2270 -       oid.len = oid_len;
2271 -       oid.data = oid_data;
2272 -
2273 -       dprintk("RPC:      gss_cmp_triples\n");
2274 -       print_sec_triple(&oid,qop,service);
2275 -
2276 -       spin_lock(&registered_triples_lock);
2277 -       list_for_each_entry(triple, &registered_triples, triples) {
2278 -               if((g_OID_equal(&oid, &triple->mech->gm_oid))
2279 -                   && (qop == triple->qop)
2280 -                   && (service == triple->service)) {
2281 -                       pseudoflavor = triple->pseudoflavor;
2282 -                       break;
2283 -               }
2284 -       }
2285 -       spin_unlock(&registered_triples_lock);
2286 -       dprintk("RPC:      gss_cmp_triples return %d\n", pseudoflavor);
2287 -       return pseudoflavor;
2288 -}
2289 -
2290 -u32
2291 -gss_get_pseudoflavor(struct gss_ctx *ctx, u32 qop, u32 service)
2292 -{
2293 -       return gss_cmp_triples(ctx->mech_type->gm_oid.len,
2294 -                              ctx->mech_type->gm_oid.data,
2295 -                              qop, service);
2296 -}
2297 -
2298 -/* Returns nonzero iff the given pseudoflavor is in the supported list.
2299 - * (Note that without incrementing a reference count or anything, this
2300 - * doesn't give any guarantees.) */
2301 -int
2302 -gss_pseudoflavor_supported(u32 pseudoflavor)
2303 -{
2304 -       struct sup_sec_triple *triple;
2305 -
2306 -       spin_lock(&registered_triples_lock);
2307 -       triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
2308 -       spin_unlock(&registered_triples_lock);
2309 -       return (triple ? 1 : 0);
2310 -}
2311 -
2312 -u32
2313 -gss_pseudoflavor_to_service(u32 pseudoflavor)
2314 -{
2315 -       struct sup_sec_triple *triple;
2316 -
2317 -       spin_lock(&registered_triples_lock);
2318 -       triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
2319 -       spin_unlock(&registered_triples_lock);
2320 -       if (!triple) {
2321 -               dprintk("RPC:      gss_pseudoflavor_to_service called with unsupported pseudoflavor %d\n",
2322 -                               pseudoflavor);
2323 -               return 0;
2324 -       }
2325 -       return triple->service;
2326 -}
2327 -
2328 -struct gss_api_mech *
2329 -gss_pseudoflavor_to_mech(u32 pseudoflavor) {
2330 -       struct sup_sec_triple *triple;
2331 -       struct gss_api_mech *mech = NULL;
2332 -
2333 -       spin_lock(&registered_triples_lock);
2334 -       triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
2335 -       spin_unlock(&registered_triples_lock);
2336 -       if (triple)
2337 -               mech = gss_mech_get(triple->mech);
2338 -       else
2339 -               dprintk("RPC:      gss_pseudoflavor_to_mech called with unsupported pseudoflavor %d\n",
2340 -                               pseudoflavor);
2341 -       return mech;
2342 -}
2343 -
2344 -int
2345 -gss_pseudoflavor_to_mechOID(u32 pseudoflavor, struct xdr_netobj * oid)
2346 -{
2347 -       struct gss_api_mech *mech;
2348 -
2349 -       mech = gss_pseudoflavor_to_mech(pseudoflavor);
2350 -       if (!mech)  {
2351 -               dprintk("RPC:      gss_pseudoflavor_to_mechOID called with unsupported pseudoflavor %d\n",
2352 -                               pseudoflavor);
2353 -                       return -1;
2354 -       }
2355 -       oid->len = mech->gm_oid.len;
2356 -       if (!(oid->data = kmalloc(oid->len, GFP_KERNEL)))
2357 -               return -1;
2358 -       memcpy(oid->data, mech->gm_oid.data, oid->len);
2359 -       gss_mech_put(mech);
2360 -       return 0;
2361 -}
2362 Index: linux-2.6.10/net/sunrpc/auth_gss/svcauth_gss.c
2363 ===================================================================
2364 --- linux-2.6.10.orig/net/sunrpc/auth_gss/svcauth_gss.c 2004-12-25 05:34:44.000000000 +0800
2365 +++ linux-2.6.10/net/sunrpc/auth_gss/svcauth_gss.c      2005-04-05 14:49:13.407691040 +0800
2366 @@ -37,6 +37,7 @@
2367   *
2368   */
2369  
2370 +#include <asm/bitops.h>
2371  #include <linux/types.h>
2372  #include <linux/module.h>
2373  #include <linux/pagemap.h>
2374 @@ -78,7 +79,6 @@
2375  
2376  static struct cache_head *rsi_table[RSI_HASHMAX];
2377  static struct cache_detail rsi_cache;
2378 -static struct rsi *rsi_lookup(struct rsi *item, int set);
2379  
2380  static void rsi_free(struct rsi *rsii)
2381  {
2382 @@ -125,38 +125,6 @@
2383         return dup_to_netobj(dst, src->data, src->len);
2384  }
2385  
2386 -static inline void rsi_init(struct rsi *new, struct rsi *item)
2387 -{
2388 -       new->out_handle.data = NULL;
2389 -       new->out_handle.len = 0;
2390 -       new->out_token.data = NULL;
2391 -       new->out_token.len = 0;
2392 -       new->in_handle.len = item->in_handle.len;
2393 -       item->in_handle.len = 0;
2394 -       new->in_token.len = item->in_token.len;
2395 -       item->in_token.len = 0;
2396 -       new->in_handle.data = item->in_handle.data;
2397 -       item->in_handle.data = NULL;
2398 -       new->in_token.data = item->in_token.data;
2399 -       item->in_token.data = NULL;
2400 -}
2401 -
2402 -static inline void rsi_update(struct rsi *new, struct rsi *item)
2403 -{
2404 -       BUG_ON(new->out_handle.data || new->out_token.data);
2405 -       new->out_handle.len = item->out_handle.len;
2406 -       item->out_handle.len = 0;
2407 -       new->out_token.len = item->out_token.len;
2408 -       item->out_token.len = 0;
2409 -       new->out_handle.data = item->out_handle.data;
2410 -       item->out_handle.data = NULL;
2411 -       new->out_token.data = item->out_token.data;
2412 -       item->out_token.data = NULL;
2413 -
2414 -       new->major_status = item->major_status;
2415 -       new->minor_status = item->minor_status;
2416 -}
2417 -
2418  static void rsi_request(struct cache_detail *cd,
2419                         struct cache_head *h,
2420                         char **bpp, int *blen)
2421 @@ -168,6 +136,75 @@
2422         (*bpp)[-1] = '\n';
2423  }
2424  
2425 +static inline int
2426 +gssd_reply(struct rsi *item)
2427 +{
2428 +       struct rsi *tmp;
2429 +       struct cache_head **hp, **head;
2430 +
2431 +       head = &rsi_cache.hash_table[rsi_hash(item)];
2432 +       write_lock(&rsi_cache.hash_lock);
2433 +       for (hp = head; *hp != NULL; hp = &tmp->h.next) {
2434 +               tmp = container_of(*hp, struct rsi, h);
2435 +               if (rsi_match(tmp, item)) {
2436 +                       cache_get(&tmp->h);
2437 +                       clear_bit(CACHE_HASHED, &tmp->h.flags);
2438 +                       *hp = tmp->h.next;
2439 +                       tmp->h.next = NULL;
2440 +                       rsi_cache.entries--;
2441 +                       if (test_bit(CACHE_VALID, &tmp->h.flags)) {
2442 +                               write_unlock(&rsi_cache.hash_lock);
2443 +                               rsi_put(&tmp->h, &rsi_cache);
2444 +                               return -EINVAL;
2445 +                       }
2446 +                       set_bit(CACHE_HASHED, &item->h.flags);
2447 +                       item->h.next = *hp;
2448 +                       *hp = &item->h;
2449 +                       rsi_cache.entries++;
2450 +                       set_bit(CACHE_VALID, &item->h.flags);
2451 +                       item->h.last_refresh = get_seconds();
2452 +                       write_unlock(&rsi_cache.hash_lock);
2453 +                       cache_fresh(&rsi_cache, &tmp->h, 0);
2454 +                       rsi_put(&tmp->h, &rsi_cache);
2455 +                       return 0;
2456 +               }
2457 +       }
2458 +       write_unlock(&rsi_cache.hash_lock);
2459 +       return -EINVAL;
2460 +}
2461 +
2462 +static inline struct rsi *
2463 +gssd_upcall(struct rsi *item, struct svc_rqst *rqstp)
2464 +{
2465 +       struct rsi *tmp;
2466 +       struct cache_head **hp, **head;
2467 +
2468 +       head = &rsi_cache.hash_table[rsi_hash(item)];
2469 +       read_lock(&rsi_cache.hash_lock);
2470 +       for (hp = head; *hp != NULL; hp = &tmp->h.next) {
2471 +               tmp = container_of(*hp, struct rsi, h);
2472 +               if (rsi_match(tmp, item)) {
2473 +                       if (!test_bit(CACHE_VALID, &tmp->h.flags)) {
2474 +                               read_unlock(&rsi_cache.hash_lock);
2475 +                               return NULL;
2476 +                       }
2477 +                       *hp = tmp->h.next;
2478 +                       tmp->h.next = NULL;
2479 +                       rsi_cache.entries--;
2480 +                       read_unlock(&rsi_cache.hash_lock);
2481 +                       return tmp;
2482 +               }
2483 +       }
2484 +       cache_get(&item->h);
2485 +       item->h.next = *head;
2486 +       *head = &item->h;
2487 +       rsi_cache.entries++;
2488 +       read_unlock(&rsi_cache.hash_lock);
2489 +       cache_get(&item->h);
2490 +       if (cache_check(&rsi_cache, &item->h, &rqstp->rq_chandle))
2491 +               return NULL;
2492 +       return item;
2493 +}
2494  
2495  static int rsi_parse(struct cache_detail *cd,
2496                      char *mesg, int mlen)
2497 @@ -176,17 +213,22 @@
2498         char *buf = mesg;
2499         char *ep;
2500         int len;
2501 -       struct rsi rsii, *rsip = NULL;
2502 +       struct rsi *rsii;
2503         time_t expiry;
2504         int status = -EINVAL;
2505  
2506 -       memset(&rsii, 0, sizeof(rsii));
2507 +       rsii = kmalloc(sizeof(*rsii), GFP_KERNEL);
2508 +       if (!rsii)
2509 +               return -ENOMEM;
2510 +       memset(rsii, 0, sizeof(*rsii));
2511 +       cache_init(&rsii->h);
2512 +
2513         /* handle */
2514         len = qword_get(&mesg, buf, mlen);
2515         if (len < 0)
2516                 goto out;
2517         status = -ENOMEM;
2518 -       if (dup_to_netobj(&rsii.in_handle, buf, len))
2519 +       if (dup_to_netobj(&rsii->in_handle, buf, len))
2520                 goto out;
2521  
2522         /* token */
2523 @@ -195,10 +237,9 @@
2524         if (len < 0)
2525                 goto out;
2526         status = -ENOMEM;
2527 -       if (dup_to_netobj(&rsii.in_token, buf, len))
2528 +       if (dup_to_netobj(&rsii->in_token, buf, len))
2529                 goto out;
2530  
2531 -       rsii.h.flags = 0;
2532         /* expiry */
2533         expiry = get_expiry(&mesg);
2534         status = -EINVAL;
2535 @@ -212,13 +253,13 @@
2536         if (len == 0) {
2537                 goto out;
2538         } else {
2539 -               rsii.major_status = simple_strtoul(buf, &ep, 10);
2540 +               rsii->major_status = simple_strtoul(buf, &ep, 10);
2541                 if (*ep)
2542                         goto out;
2543                 len = qword_get(&mesg, buf, mlen);
2544                 if (len <= 0)
2545                         goto out;
2546 -               rsii.minor_status = simple_strtoul(buf, &ep, 10);
2547 +               rsii->minor_status = simple_strtoul(buf, &ep, 10);
2548                 if (*ep)
2549                         goto out;
2550  
2551 @@ -227,7 +268,7 @@
2552                 if (len < 0)
2553                         goto out;
2554                 status = -ENOMEM;
2555 -               if (dup_to_netobj(&rsii.out_handle, buf, len))
2556 +               if (dup_to_netobj(&rsii->out_handle, buf, len))
2557                         goto out;
2558  
2559                 /* out_token */
2560 @@ -236,16 +277,14 @@
2561                 if (len < 0)
2562                         goto out;
2563                 status = -ENOMEM;
2564 -               if (dup_to_netobj(&rsii.out_token, buf, len))
2565 +               if (dup_to_netobj(&rsii->out_token, buf, len))
2566                         goto out;
2567         }
2568 -       rsii.h.expiry_time = expiry;
2569 -       rsip = rsi_lookup(&rsii, 1);
2570 -       status = 0;
2571 +       rsii->h.expiry_time = expiry;
2572 +       status = gssd_reply(rsii);
2573  out:
2574 -       rsi_free(&rsii);
2575 -       if (rsip)
2576 -               rsi_put(&rsip->h, &rsi_cache);
2577 +       if (rsii)
2578 +               rsi_put(&rsii->h, &rsi_cache);
2579         return status;
2580  }
2581  
2582 @@ -258,8 +297,6 @@
2583         .cache_parse    = rsi_parse,
2584  };
2585  
2586 -static DefineSimpleCacheLookup(rsi, 0)
2587 -
2588  /*
2589   * The rpcsec_context cache is used to store a context that is
2590   * used in data exchange.
2591 @@ -292,7 +329,6 @@
2592  
2593  static struct cache_head *rsc_table[RSC_HASHMAX];
2594  static struct cache_detail rsc_cache;
2595 -static struct rsc *rsc_lookup(struct rsc *item, int set);
2596  
2597  static void rsc_free(struct rsc *rsci)
2598  {
2599 @@ -325,26 +361,46 @@
2600         return netobj_equal(&new->handle, &tmp->handle);
2601  }
2602  
2603 -static inline void
2604 -rsc_init(struct rsc *new, struct rsc *tmp)
2605 +static struct rsc *rsc_lookup(struct rsc *item, int set)
2606  {
2607 -       new->handle.len = tmp->handle.len;
2608 -       tmp->handle.len = 0;
2609 -       new->handle.data = tmp->handle.data;
2610 -       tmp->handle.data = NULL;
2611 -       new->mechctx = NULL;
2612 -       new->cred.cr_group_info = NULL;
2613 -}
2614 -
2615 -static inline void
2616 -rsc_update(struct rsc *new, struct rsc *tmp)
2617 -{
2618 -       new->mechctx = tmp->mechctx;
2619 -       tmp->mechctx = NULL;
2620 -       memset(&new->seqdata, 0, sizeof(new->seqdata));
2621 -       spin_lock_init(&new->seqdata.sd_lock);
2622 -       new->cred = tmp->cred;
2623 -       tmp->cred.cr_group_info = NULL;
2624 +       struct rsc *tmp = NULL;
2625 +       struct cache_head **hp, **head;
2626 +       head = &rsc_cache.hash_table[rsc_hash(item)];
2627 +
2628 +       if (set)
2629 +               write_lock(&rsc_cache.hash_lock);
2630 +       else
2631 +               read_lock(&rsc_cache.hash_lock);
2632 +       for (hp = head; *hp != NULL; hp = &tmp->h.next) {
2633 +               tmp = container_of(*hp, struct rsc, h);
2634 +               if (!rsc_match(tmp, item))
2635 +                       continue;
2636 +               cache_get(&tmp->h);
2637 +               if (!set)
2638 +                       goto out_noset;
2639 +               *hp = tmp->h.next;
2640 +               tmp->h.next = NULL;
2641 +               clear_bit(CACHE_HASHED, &tmp->h.flags);
2642 +               rsc_put(&tmp->h, &rsc_cache);
2643 +               goto out_set;
2644 +       }
2645 +       /* Didn't find anything */
2646 +       if (!set)
2647 +               goto out_nada;
2648 +       rsc_cache.entries++;
2649 +out_set:
2650 +       set_bit(CACHE_HASHED, &item->h.flags);
2651 +       item->h.next = *head;
2652 +       *head = &item->h;
2653 +       write_unlock(&rsc_cache.hash_lock);
2654 +       cache_fresh(&rsc_cache, &item->h, item->h.expiry_time);
2655 +       cache_get(&item->h);
2656 +       return item;
2657 +out_nada:
2658 +       tmp = NULL;
2659 +out_noset:
2660 +       read_unlock(&rsc_cache.hash_lock);
2661 +       return tmp;
2662  }
2663  
2664  static int rsc_parse(struct cache_detail *cd,
2665 @@ -353,19 +409,22 @@
2666         /* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */
2667         char *buf = mesg;
2668         int len, rv;
2669 -       struct rsc rsci, *rscp = NULL;
2670 +       struct rsc *rsci, *res = NULL;
2671         time_t expiry;
2672         int status = -EINVAL;
2673  
2674 -       memset(&rsci, 0, sizeof(rsci));
2675 +       rsci = kmalloc(sizeof(*rsci), GFP_KERNEL);
2676 +       if (!rsci)
2677 +               return -ENOMEM;
2678 +       memset(rsci, 0, sizeof(*rsci));
2679 +       cache_init(&rsci->h);
2680         /* context handle */
2681         len = qword_get(&mesg, buf, mlen);
2682         if (len < 0) goto out;
2683         status = -ENOMEM;
2684 -       if (dup_to_netobj(&rsci.handle, buf, len))
2685 +       if (dup_to_netobj(&rsci->handle, buf, len))
2686                 goto out;
2687  
2688 -       rsci.h.flags = 0;
2689         /* expiry */
2690         expiry = get_expiry(&mesg);
2691         status = -EINVAL;
2692 @@ -373,26 +432,26 @@
2693                 goto out;
2694  
2695         /* uid, or NEGATIVE */
2696 -       rv = get_int(&mesg, &rsci.cred.cr_uid);
2697 +       rv = get_int(&mesg, &rsci->cred.cr_uid);
2698         if (rv == -EINVAL)
2699                 goto out;
2700         if (rv == -ENOENT)
2701 -               set_bit(CACHE_NEGATIVE, &rsci.h.flags);
2702 +               set_bit(CACHE_NEGATIVE, &rsci->h.flags);
2703         else {
2704                 int N, i;
2705                 struct gss_api_mech *gm;
2706                 struct xdr_netobj tmp_buf;
2707  
2708                 /* gid */
2709 -               if (get_int(&mesg, &rsci.cred.cr_gid))
2710 +               if (get_int(&mesg, &rsci->cred.cr_gid))
2711                         goto out;
2712  
2713                 /* number of additional gid's */
2714                 if (get_int(&mesg, &N))
2715                         goto out;
2716                 status = -ENOMEM;
2717 -               rsci.cred.cr_group_info = groups_alloc(N);
2718 -               if (rsci.cred.cr_group_info == NULL)
2719 +               rsci->cred.cr_group_info = groups_alloc(N);
2720 +               if (rsci->cred.cr_group_info == NULL)
2721                         goto out;
2722  
2723                 /* gid's */
2724 @@ -401,7 +460,7 @@
2725                         gid_t gid;
2726                         if (get_int(&mesg, &gid))
2727                                 goto out;
2728 -                       GROUP_AT(rsci.cred.cr_group_info, i) = gid;
2729 +                       GROUP_AT(rsci->cred.cr_group_info, i) = gid;
2730                 }
2731  
2732                 /* mech name */
2733 @@ -422,19 +481,21 @@
2734                 }
2735                 tmp_buf.len = len;
2736                 tmp_buf.data = buf;
2737 -               if (gss_import_sec_context(&tmp_buf, gm, &rsci.mechctx)) {
2738 +               if (gss_import_sec_context(&tmp_buf, gm, &rsci->mechctx)) {
2739                         gss_mech_put(gm);
2740                         goto out;
2741                 }
2742                 gss_mech_put(gm);
2743         }
2744 -       rsci.h.expiry_time = expiry;
2745 -       rscp = rsc_lookup(&rsci, 1);
2746 +       rsci->h.expiry_time = expiry;
2747 +       spin_lock_init(&rsci->seqdata.sd_lock);
2748 +       res = rsc_lookup(rsci, 1);
2749 +       rsc_put(&res->h, &rsc_cache);
2750 +       rsci = NULL;
2751         status = 0;
2752  out:
2753 -       rsc_free(&rsci);
2754 -       if (rscp)
2755 -               rsc_put(&rscp->h, &rsc_cache);
2756 +       if (rsci)
2757 +               rsc_put(&rsci->h, &rsc_cache);
2758         return status;
2759  }
2760  
2761 @@ -446,19 +507,14 @@
2762         .cache_parse    = rsc_parse,
2763  };
2764  
2765 -static DefineSimpleCacheLookup(rsc, 0);
2766 -
2767  struct rsc *
2768  gss_svc_searchbyctx(struct xdr_netobj *handle)
2769  {
2770         struct rsc rsci;
2771         struct rsc *found;
2772  
2773 -       memset(&rsci, 0, sizeof(rsci));
2774 -       if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
2775 -               return NULL;
2776 +       rsci.handle = *handle;
2777         found = rsc_lookup(&rsci, 0);
2778 -       rsc_free(&rsci);
2779         if (!found)
2780                 return NULL;
2781         if (cache_check(&rsc_cache, &found->h, NULL))
2782 @@ -721,6 +777,45 @@
2783         return stat;
2784  }
2785  
2786 +static int
2787 +unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
2788 +{
2789 +       int stat = -EINVAL;
2790 +       int out_offset;
2791 +       u32 * lenp;
2792 +       u32 priv_len, maj_stat;
2793 +       int saved_len;
2794 +
2795 +       lenp = buf->head[0].iov_base;
2796 +       priv_len = ntohl(svc_getu32(&buf->head[0]));
2797 +       if (priv_len > buf->len) /* XXXJBF: wrong check */
2798 +               goto out;
2799 +       /* XXXJBF: bizarre hack: to handle revisits (and not decrypt
2800 +        * twice), the first time through we write an offset
2801 +        * telling us where to skip to find the already-decrypted data */
2802 +       if (rqstp->rq_deferred) {
2803 +               buf->head[0].iov_base += priv_len;
2804 +               buf->head[0].iov_len -= priv_len;
2805 +               return 0;
2806 +       }
2807 +       saved_len = buf->len; /* XXX HACK */
2808 +       buf->len = priv_len;
2809 +       maj_stat = gss_unwrap(ctx, NULL, 0, buf, &out_offset);
2810 +       buf->len = saved_len;
2811 +       buf->head[0].iov_base += out_offset;
2812 +       buf->head[0].iov_len -= out_offset;
2813 +       BUG_ON(buf->head[0].iov_len <= 0);
2814 +       if (maj_stat != GSS_S_COMPLETE)
2815 +               goto out;
2816 +       if (ntohl(svc_getu32(&buf->head[0])) != seq)
2817 +               goto out;
2818 +       /* XXXJBF: see "bizarre hack", above. */
2819 +       *lenp = htonl(out_offset + 4);
2820 +       stat = 0;
2821 +out:
2822 +       return stat;
2823 +}
2824 +
2825  struct gss_svc_data {
2826         /* decoded gss client cred: */
2827         struct rpc_gss_wire_cred        clcred;
2828 @@ -730,6 +825,19 @@
2829         struct rsc                      *rsci;
2830  };
2831  
2832 +static int
2833 +svcauth_gss_set_client(struct svc_rqst *rqstp)
2834 +{
2835 +       struct gss_svc_data *svcdata = rqstp->rq_auth_data;
2836 +       struct rsc *rsci = svcdata->rsci;
2837 +       struct rpc_gss_wire_cred *gc = &svcdata->clcred;
2838 +
2839 +       rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
2840 +       if (rqstp->rq_client == NULL)
2841 +               return SVC_DENIED;
2842 +       return SVC_OK;
2843 +}
2844 +
2845  /*
2846   * Accept an rpcsec packet.
2847   * If context establishment, punt to user space
2848 @@ -748,7 +856,7 @@
2849         struct gss_svc_data *svcdata = rqstp->rq_auth_data;
2850         struct rpc_gss_wire_cred *gc;
2851         struct rsc      *rsci = NULL;
2852 -       struct rsi      *rsip, rsikey;
2853 +       struct rsi      *rsip, *rsikey = NULL;
2854         u32             *rpcstart;
2855         u32             *reject_stat = resv->iov_base + resv->iov_len;
2856         int             ret;
2857 @@ -841,30 +949,23 @@
2858                 *authp = rpc_autherr_badcred;
2859                 if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
2860                         goto auth_err;
2861 -               memset(&rsikey, 0, sizeof(rsikey));
2862 -               if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
2863 +               rsikey = kmalloc(sizeof(*rsikey), GFP_KERNEL);
2864 +               if (!rsikey)
2865 +                       goto drop;
2866 +               memset(rsikey, 0, sizeof(*rsikey));
2867 +               cache_init(&rsikey->h);
2868 +               if (dup_netobj(&rsikey->in_handle, &gc->gc_ctx))
2869                         goto drop;
2870                 *authp = rpc_autherr_badverf;
2871 -               if (svc_safe_getnetobj(argv, &tmpobj)) {
2872 -                       kfree(rsikey.in_handle.data);
2873 +               if (svc_safe_getnetobj(argv, &tmpobj))
2874                         goto auth_err;
2875 -               }
2876 -               if (dup_netobj(&rsikey.in_token, &tmpobj)) {
2877 -                       kfree(rsikey.in_handle.data);
2878 +               if (dup_netobj(&rsikey->in_token, &tmpobj))
2879                         goto drop;
2880 -               }
2881  
2882 -               rsip = rsi_lookup(&rsikey, 0);
2883 -               rsi_free(&rsikey);
2884 -               if (!rsip) {
2885 -                       goto drop;
2886 -               }
2887 -               switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
2888 -               case -EAGAIN:
2889 +               rsip = gssd_upcall(rsikey, rqstp);
2890 +               if (!rsip)
2891                         goto drop;
2892 -               case -ENOENT:
2893 -                       goto drop;
2894 -               case 0:
2895 +               else {
2896                         rsci = gss_svc_searchbyctx(&rsip->out_handle);
2897                         if (!rsci) {
2898                                 goto drop;
2899 @@ -893,11 +994,6 @@
2900                 svc_putu32(resv, rpc_success);
2901                 goto complete;
2902         case RPC_GSS_PROC_DATA:
2903 -               *authp = rpc_autherr_badcred;
2904 -               rqstp->rq_client =
2905 -                       find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
2906 -               if (rqstp->rq_client == NULL)
2907 -                       goto auth_err;
2908                 *authp = rpcsec_gsserr_ctxproblem;
2909                 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
2910                         goto auth_err;
2911 @@ -911,6 +1007,15 @@
2912                         if (unwrap_integ_data(&rqstp->rq_arg,
2913                                         gc->gc_seq, rsci->mechctx))
2914                                 goto auth_err;
2915 +                       /* placeholders for length and seq. number: */
2916 +                       svcdata->body_start = resv->iov_base + resv->iov_len;
2917 +                       svc_putu32(resv, 0);
2918 +                       svc_putu32(resv, 0);
2919 +                       break;
2920 +               case RPC_GSS_SVC_PRIVACY:
2921 +                       if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
2922 +                                       gc->gc_seq, rsci->mechctx))
2923 +                               goto auth_err;
2924                         svcdata->rsci = rsci;
2925                         cache_get(&rsci->h);
2926                         /* placeholders for length and seq. number: */
2927 @@ -918,11 +1023,11 @@
2928                         svc_putu32(resv, 0);
2929                         svc_putu32(resv, 0);
2930                         break;
2931 -               case RPC_GSS_SVC_PRIVACY:
2932 -                       /* currently unsupported */
2933                 default:
2934                         goto auth_err;
2935                 }
2936 +               svcdata->rsci = rsci;
2937 +               cache_get(&rsci->h);
2938                 ret = SVC_OK;
2939                 goto out;
2940         }
2941 @@ -937,13 +1042,15 @@
2942  drop:
2943         ret = SVC_DROP;
2944  out:
2945 +       if (rsikey)
2946 +               rsi_put(&rsikey->h, &rsi_cache);
2947         if (rsci)
2948                 rsc_put(&rsci->h, &rsc_cache);
2949         return ret;
2950  }
2951  
2952 -static int
2953 -svcauth_gss_release(struct svc_rqst *rqstp)
2954 +static inline int
2955 +svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
2956  {
2957         struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
2958         struct rpc_gss_wire_cred *gc = &gsd->clcred;
2959 @@ -955,10 +1062,160 @@
2960         int integ_offset, integ_len;
2961         int stat = -EINVAL;
2962  
2963 +       p = gsd->body_start;
2964 +       gsd->body_start = NULL;
2965 +       /* move accept_stat to right place: */
2966 +       memcpy(p, p + 2, 4);
2967 +       /* Don't wrap in failure case: */
2968 +       /* Counting on not getting here if call was not even accepted! */
2969 +       if (*p != rpc_success) {
2970 +               resbuf->head[0].iov_len -= 2 * 4;
2971 +               goto out;
2972 +       }
2973 +       p++;
2974 +       integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
2975 +       integ_len = resbuf->len - integ_offset;
2976 +       BUG_ON(integ_len % 4);
2977 +       *p++ = htonl(integ_len);
2978 +       *p++ = htonl(gc->gc_seq);
2979 +       if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
2980 +                               integ_len))
2981 +               BUG();
2982 +       if (resbuf->page_len == 0
2983 +                       && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE
2984 +                       < PAGE_SIZE) {
2985 +               BUG_ON(resbuf->tail[0].iov_len);
2986 +               /* Use head for everything */
2987 +               resv = &resbuf->head[0];
2988 +       } else if (resbuf->tail[0].iov_base == NULL) {
2989 +               /* copied from nfsd4_encode_read */
2990 +               svc_take_page(rqstp);
2991 +               resbuf->tail[0].iov_base = page_address(rqstp
2992 +                               ->rq_respages[rqstp->rq_resused-1]);
2993 +               rqstp->rq_restailpage = rqstp->rq_resused-1;
2994 +               resbuf->tail[0].iov_len = 0;
2995 +               resv = &resbuf->tail[0];
2996 +       } else {
2997 +               resv = &resbuf->tail[0];
2998 +       }
2999 +       mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
3000 +       if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic))
3001 +               goto out_err;
3002 +       svc_putu32(resv, htonl(mic.len));
3003 +       memset(mic.data + mic.len, 0,
3004 +                       round_up_to_quad(mic.len) - mic.len);
3005 +       resv->iov_len += XDR_QUADLEN(mic.len) << 2;
3006 +       /* not strictly required: */
3007 +       resbuf->len += XDR_QUADLEN(mic.len) << 2;
3008 +       BUG_ON(resv->iov_len > PAGE_SIZE);
3009 +out:
3010 +       stat = 0;
3011 +out_err:
3012 +       return stat;
3013 +}
3014 +
3015 +/* XXXJBF: Look for chances to share code with client */
3016 +/* XXXJBF: Do we need to preallocate these pages somehow?  E.g. see
3017 + * buffer size calculations in svcsock.c */
3018 +/* XXXJBF: how does reference counting on pages work? */
3019 +static struct page **
3020 +svc_alloc_enc_pages(struct xdr_buf *buf)
3021 +{
3022 +       struct page **ret;
3023 +       int last, i;
3024 +
3025 +       if (buf->page_len == 0)
3026 +               return NULL;
3027 +       BUG_ON(buf->page_base >> PAGE_CACHE_SHIFT);
3028 +       last = (buf->page_base + buf->page_len - 1) >> PAGE_CACHE_SHIFT;
3029 +       ret = kmalloc((last + 1) * sizeof(struct page *), GFP_KERNEL);
3030 +       if (!ret)
3031 +               goto out;
3032 +       for (i = 0; i<= last; i++) {
3033 +               ret[i] = alloc_page(GFP_KERNEL);
3034 +               if (ret[i] == NULL)
3035 +                       goto out_free;
3036 +       }
3037 +out:
3038 +       return ret;
3039 +out_free:
3040 +       for (i--; i >= 0; i--) {
3041 +               __free_page(ret[i]);
3042 +       }
3043 +       return NULL;
3044 +}
3045 +
3046 +static inline int
3047 +svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
3048 +{
3049 +       struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
3050 +       struct rpc_gss_wire_cred *gc = &gsd->clcred;
3051 +       struct xdr_buf *resbuf = &rqstp->rq_res;
3052 +       struct page **inpages;
3053 +       u32 *p;
3054 +       int offset, *len;
3055 +       int pad;
3056 +       int stat = -EINVAL;
3057 +
3058 +       p = gsd->body_start;
3059 +       gsd->body_start = NULL;
3060 +       /* move accept_stat to right place: */
3061 +       memcpy(p, p + 2, 4);
3062 +       /* Don't wrap in failure case: */
3063 +       /* Counting on not getting here if call was not even accepted! */
3064 +       if (*p != rpc_success) {
3065 +               resbuf->head[0].iov_len -= 2 * 4;
3066 +               goto out;
3067 +       }
3068 +       p++;
3069 +       len = p++;
3070 +       offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base;
3071 +       *p++ = htonl(gc->gc_seq);
3072 +       stat = -ENOMEM;
3073 +       inpages = resbuf->pages;
3074 +       /* XXXJBF: huge memory leaks here: allocated pages probably aren't
3075 +        * freed, and neither is memory used to hold page array. */
3076 +       resbuf->pages = svc_alloc_enc_pages(resbuf);
3077 +       if (resbuf->page_len && !resbuf->pages)
3078 +               goto out_err; /* XXX sleep and retry? Reserve ahead of time
3079 +                               and BUG_ON? */
3080 +       if (resbuf->tail[0].iov_len == 0 || resbuf->tail[0].iov_base == NULL) {
3081 +               /* copied from nfsd4_encode_read */
3082 +               {int i = svc_take_page(rqstp); BUG_ON(i); }
3083 +               resbuf->tail[0].iov_base = page_address(rqstp
3084 +                               ->rq_respages[rqstp->rq_resused-1]);
3085 +               rqstp->rq_restailpage = rqstp->rq_resused-1;
3086 +               resbuf->tail[0].iov_len = 0;
3087 +       }
3088 +       /* XXX: Will svc code attempt to free stuff in xdr_buf->pages?
3089 +        * Or can we leave it in any old state on error?? */
3090 +       stat = -EINVAL;
3091 +       if (gss_wrap(gsd->rsci->mechctx, GSS_C_QOP_DEFAULT, offset,
3092 +                               resbuf, inpages))
3093 +               goto out_err;
3094 +       *len = htonl(resbuf->len - offset);
3095 +       pad = 3 - ((resbuf->len - offset - 1)&3);
3096 +       p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len);
3097 +       memset(p, 0, pad);
3098 +       resbuf->tail[0].iov_len += pad;
3099 +out:
3100 +       return 0;
3101 +out_err:
3102 +       return stat;
3103 +}
3104 +
3105 +static int
3106 +svcauth_gss_release(struct svc_rqst *rqstp)
3107 +{
3108 +       struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
3109 +       struct rpc_gss_wire_cred *gc = &gsd->clcred;
3110 +       struct xdr_buf *resbuf = &rqstp->rq_res;
3111 +       int stat = -EINVAL;
3112 +
3113         if (gc->gc_proc != RPC_GSS_PROC_DATA)
3114                 goto out;
3115         /* Release can be called twice, but we only wrap once. */
3116 -       if (gsd->body_start == 0)
3117 +       if (gsd->body_start == NULL)
3118                 goto out;
3119         /* normally not set till svc_send, but we need it here: */
3120         resbuf->len = resbuf->head[0].iov_len
3121 @@ -967,55 +1224,15 @@
3122         case RPC_GSS_SVC_NONE:
3123                 break;
3124         case RPC_GSS_SVC_INTEGRITY:
3125 -               p = gsd->body_start;
3126 -               gsd->body_start = NULL;
3127 -               /* move accept_stat to right place: */
3128 -               memcpy(p, p + 2, 4);
3129 -               /* don't wrap in failure case: */
3130 -               /* Note: counting on not getting here if call was not even
3131 -                * accepted! */
3132 -               if (*p != rpc_success) {
3133 -                       resbuf->head[0].iov_len -= 2 * 4;
3134 -                       goto out;
3135 -               }
3136 -               p++;
3137 -               integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
3138 -               integ_len = resbuf->len - integ_offset;
3139 -               BUG_ON(integ_len % 4);
3140 -               *p++ = htonl(integ_len);
3141 -               *p++ = htonl(gc->gc_seq);
3142 -               if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
3143 -                                       integ_len))
3144 -                       BUG();
3145 -               if (resbuf->page_len == 0
3146 -                       && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE
3147 -                               < PAGE_SIZE) {
3148 -                       BUG_ON(resbuf->tail[0].iov_len);
3149 -                       /* Use head for everything */
3150 -                       resv = &resbuf->head[0];
3151 -               } else if (resbuf->tail[0].iov_base == NULL) {
3152 -                       /* copied from nfsd4_encode_read */
3153 -                       svc_take_page(rqstp);
3154 -                       resbuf->tail[0].iov_base = page_address(rqstp
3155 -                                       ->rq_respages[rqstp->rq_resused-1]);
3156 -                       rqstp->rq_restailpage = rqstp->rq_resused-1;
3157 -                       resbuf->tail[0].iov_len = 0;
3158 -                       resv = &resbuf->tail[0];
3159 -               } else {
3160 -                       resv = &resbuf->tail[0];
3161 -               }
3162 -               mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
3163 -               if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic))
3164 +               stat = svcauth_gss_wrap_resp_integ(rqstp);
3165 +               if (stat)
3166                         goto out_err;
3167 -               svc_putu32(resv, htonl(mic.len));
3168 -               memset(mic.data + mic.len, 0,
3169 -                               round_up_to_quad(mic.len) - mic.len);
3170 -               resv->iov_len += XDR_QUADLEN(mic.len) << 2;
3171 -               /* not strictly required: */
3172 -               resbuf->len += XDR_QUADLEN(mic.len) << 2;
3173 -               BUG_ON(resv->iov_len > PAGE_SIZE);
3174                 break;
3175         case RPC_GSS_SVC_PRIVACY:
3176 +               stat = svcauth_gss_wrap_resp_priv(rqstp);
3177 +               if (stat)
3178 +                       goto out_err;
3179 +               break;
3180         default:
3181                 goto out_err;
3182         }
3183 @@ -1052,6 +1269,7 @@
3184         .accept         = svcauth_gss_accept,
3185         .release        = svcauth_gss_release,
3186         .domain_release = svcauth_gss_domain_release,
3187 +       .set_client     = svcauth_gss_set_client,
3188  };
3189  
3190  int
3191 Index: linux-2.6.10/net/sunrpc/auth_gss/sunrpcgss_syms.c
3192 ===================================================================
3193 --- linux-2.6.10.orig/net/sunrpc/auth_gss/sunrpcgss_syms.c      2004-12-25 05:35:23.000000000 +0800
3194 +++ linux-2.6.10/net/sunrpc/auth_gss/sunrpcgss_syms.c   2005-04-05 19:01:49.158500672 +0800
3195 @@ -1,37 +0,0 @@
3196 -#include <linux/config.h>
3197 -#include <linux/module.h>
3198 -
3199 -#include <linux/types.h>
3200 -#include <linux/socket.h>
3201 -#include <linux/sched.h>
3202 -#include <linux/uio.h>
3203 -#include <linux/unistd.h>
3204 -
3205 -#include <linux/sunrpc/auth_gss.h>
3206 -#include <linux/sunrpc/svcauth_gss.h>
3207 -#include <linux/sunrpc/gss_asn1.h>
3208 -#include <linux/sunrpc/gss_krb5.h>
3209 -
3210 -/* svcauth_gss.c: */
3211 -EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
3212 -
3213 -/* registering gss mechanisms to the mech switching code: */
3214 -EXPORT_SYMBOL(gss_mech_register);
3215 -EXPORT_SYMBOL(gss_mech_unregister);
3216 -EXPORT_SYMBOL(gss_mech_get);
3217 -EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor);
3218 -EXPORT_SYMBOL(gss_mech_get_by_name);
3219 -EXPORT_SYMBOL(gss_mech_put);
3220 -EXPORT_SYMBOL(gss_pseudoflavor_to_service);
3221 -EXPORT_SYMBOL(gss_service_to_auth_domain_name);
3222 -
3223 -/* generic functionality in gss code: */
3224 -EXPORT_SYMBOL(g_make_token_header);
3225 -EXPORT_SYMBOL(g_verify_token_header);
3226 -EXPORT_SYMBOL(g_token_size);
3227 -EXPORT_SYMBOL(make_checksum);
3228 -EXPORT_SYMBOL(krb5_encrypt);
3229 -EXPORT_SYMBOL(krb5_decrypt);
3230 -
3231 -/* debug */
3232 -EXPORT_SYMBOL(print_hexl);
3233 Index: linux-2.6.10/net/sunrpc/auth_gss/Makefile
3234 ===================================================================
3235 --- linux-2.6.10.orig/net/sunrpc/auth_gss/Makefile      2004-12-25 05:34:33.000000000 +0800
3236 +++ linux-2.6.10/net/sunrpc/auth_gss/Makefile   2005-04-05 14:49:13.408690888 +0800
3237 @@ -10,7 +10,7 @@
3238  obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
3239  
3240  rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
3241 -       gss_krb5_seqnum.o
3242 +       gss_krb5_seqnum.o gss_krb5_wrap.o
3243  
3244  obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
3245  
3246 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_mech.c
3247 ===================================================================
3248 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_mech.c       2004-12-25 05:35:23.000000000 +0800
3249 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_mech.c    2005-04-05 14:49:13.400692104 +0800
3250 @@ -182,6 +182,7 @@
3251         kfree(kctx);
3252  }
3253  
3254 +/* XXX the following wrappers have become pointless; kill them. */
3255  static u32
3256  gss_verify_mic_kerberos(struct gss_ctx         *ctx,
3257                         struct xdr_buf          *message,
3258 @@ -191,8 +192,7 @@
3259         int qop_state;
3260         struct krb5_ctx *kctx = ctx->internal_ctx_id;
3261  
3262 -       maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state,
3263 -                                  KG_TOK_MIC_MSG);
3264 +       maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state);
3265         if (!maj_stat && qop_state)
3266             *qstate = qop_state;
3267  
3268 @@ -208,7 +208,7 @@
3269         u32 err = 0;
3270         struct krb5_ctx *kctx = ctx->internal_ctx_id;
3271  
3272 -       err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG);
3273 +       err = krb5_make_token(kctx, qop, message, mic_token);
3274  
3275         dprintk("RPC:      gss_get_mic_kerberos returning %d\n",err);
3276  
3277 @@ -219,6 +219,8 @@
3278         .gss_import_sec_context = gss_import_sec_context_kerberos,
3279         .gss_get_mic            = gss_get_mic_kerberos,
3280         .gss_verify_mic         = gss_verify_mic_kerberos,
3281 +       .gss_wrap               = gss_wrap_kerberos,
3282 +       .gss_unwrap             = gss_unwrap_kerberos,
3283         .gss_delete_sec_context = gss_delete_sec_context_kerberos,
3284  };
3285  
3286 @@ -233,6 +235,11 @@
3287                 .service = RPC_GSS_SVC_INTEGRITY,
3288                 .name = "krb5i",
3289         },
3290 +       [2] = {
3291 +               .pseudoflavor = RPC_AUTH_GSS_KRB5P,
3292 +               .service = RPC_GSS_SVC_PRIVACY,
3293 +               .name = "krb5p",
3294 +       },
3295  };
3296  
3297  static struct gss_api_mech gss_kerberos_mech = {
3298 Index: linux-2.6.10/net/sunrpc/auth_gss/auth_gss.c
3299 ===================================================================
3300 --- linux-2.6.10.orig/net/sunrpc/auth_gss/auth_gss.c    2004-12-25 05:34:44.000000000 +0800
3301 +++ linux-2.6.10/net/sunrpc/auth_gss/auth_gss.c 2005-04-05 14:49:13.404691496 +0800
3302 @@ -45,6 +45,7 @@
3303  #include <linux/socket.h>
3304  #include <linux/in.h>
3305  #include <linux/sched.h>
3306 +#include <linux/pagemap.h>
3307  #include <linux/sunrpc/clnt.h>
3308  #include <linux/sunrpc/auth.h>
3309  #include <linux/sunrpc/auth_gss.h>
3310 @@ -480,12 +481,14 @@
3311         if (!cred)
3312                 goto err;
3313         if (gss_err)
3314 -               cred->cr_flags |= RPCAUTH_CRED_DEAD;
3315 +               cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3316         else
3317                 gss_cred_set_ctx(cred, ctx);
3318         spin_lock(&gss_auth->lock);
3319         gss_msg = __gss_find_upcall(gss_auth, acred.uid);
3320         if (gss_msg) {
3321 +               if (gss_err)
3322 +                       gss_msg->msg.errno = -EACCES;
3323                 __gss_unhash_msg(gss_msg);
3324                 spin_unlock(&gss_auth->lock);
3325                 gss_release_msg(gss_msg);
3326 @@ -740,7 +743,9 @@
3327         maj_stat = gss_get_mic(ctx->gc_gss_ctx,
3328                                GSS_C_QOP_DEFAULT, 
3329                                &verf_buf, &mic);
3330 -       if(maj_stat != 0){
3331 +       if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
3332 +               cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3333 +       } else if (maj_stat != 0) {
3334                 printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
3335                 goto out_put_ctx;
3336         }
3337 @@ -779,6 +784,7 @@
3338         struct xdr_netobj mic;
3339         u32             flav,len;
3340         u32             service;
3341 +       u32             maj_stat;
3342  
3343         dprintk("RPC: %4u gss_validate\n", task->tk_pid);
3344  
3345 @@ -794,8 +800,11 @@
3346         mic.data = (u8 *)p;
3347         mic.len = len;
3348  
3349 -       if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state))
3350 -               goto out_bad;
3351 +       maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state);
3352 +       if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3353 +               cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3354 +       if (maj_stat)
3355 +               goto out_bad;
3356         service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type,
3357                                         gss_cred->gc_flavor);
3358         switch (service) {
3359 @@ -807,6 +816,11 @@
3360                /* verifier data, flavor, length, length, sequence number: */
3361                task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4;
3362                break;
3363 +       case RPC_GSS_SVC_PRIVACY:
3364 +              /* XXXJBF: Ugh. Going for a wild overestimate.
3365 +               * Need some info from krb5 layer? */
3366 +              task->tk_auth->au_rslack = XDR_QUADLEN(len) + 32;
3367 +              break;
3368         default:
3369                goto out_bad;
3370         }
3371 @@ -821,11 +835,10 @@
3372  }
3373  
3374  static inline int
3375 -gss_wrap_req_integ(struct gss_cl_ctx *ctx,
3376 -                       kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
3377 +gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
3378 +               kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
3379  {
3380 -       struct rpc_rqst *req = (struct rpc_rqst *)rqstp;
3381 -       struct xdr_buf  *snd_buf = &req->rq_snd_buf;
3382 +       struct xdr_buf  *snd_buf = &rqstp->rq_snd_buf;
3383         struct xdr_buf  integ_buf;
3384         u32             *integ_len = NULL;
3385         struct xdr_netobj mic;
3386 @@ -836,7 +849,7 @@
3387  
3388         integ_len = p++;
3389         offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
3390 -       *p++ = htonl(req->rq_seqno);
3391 +       *p++ = htonl(rqstp->rq_seqno);
3392  
3393         status = encode(rqstp, p, obj);
3394         if (status)
3395 @@ -848,7 +861,7 @@
3396         *integ_len = htonl(integ_buf.len);
3397  
3398         /* guess whether we're in the head or the tail: */
3399 -       if (snd_buf->page_len || snd_buf->tail[0].iov_len) 
3400 +       if (snd_buf->page_len || snd_buf->tail[0].iov_len)
3401                 iov = snd_buf->tail;
3402         else
3403                 iov = snd_buf->head;
3404 @@ -858,7 +871,9 @@
3405         maj_stat = gss_get_mic(ctx->gc_gss_ctx,
3406                         GSS_C_QOP_DEFAULT, &integ_buf, &mic);
3407         status = -EIO; /* XXX? */
3408 -       if (maj_stat)
3409 +       if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3410 +               cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3411 +       else if (maj_stat)
3412                 return status;
3413         q = xdr_encode_opaque(p, NULL, mic.len);
3414  
3415 @@ -868,6 +883,112 @@
3416         return 0;
3417  }
3418  
3419 +static void
3420 +priv_release_snd_buf(struct rpc_rqst *rqstp)
3421 +{
3422 +       int i;
3423 +
3424 +       for (i=0; i < rqstp->rq_enc_pages_num; i++)
3425 +               __free_page(rqstp->rq_enc_pages[i]);
3426 +       kfree(rqstp->rq_enc_pages);
3427 +}
3428 +
3429 +static int
3430 +alloc_enc_pages(struct rpc_rqst *rqstp)
3431 +{
3432 +       struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
3433 +       int first, last, i;
3434 +
3435 +       if (snd_buf->page_len == 0) {
3436 +               rqstp->rq_enc_pages_num = 0;
3437 +               return 0;
3438 +       }
3439 +
3440 +       first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
3441 +       last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT;
3442 +       rqstp->rq_enc_pages_num = last - first + 1 + 1;
3443 +       rqstp->rq_enc_pages
3444 +               = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
3445 +                               GFP_NOFS);
3446 +       if (!rqstp->rq_enc_pages)
3447 +               goto out;
3448 +       for (i=0; i < rqstp->rq_enc_pages_num; i++) {
3449 +               rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS);
3450 +               if (rqstp->rq_enc_pages[i] == NULL)
3451 +                       goto out_free;
3452 +       }
3453 +       rqstp->rq_release_snd_buf = priv_release_snd_buf;
3454 +       return 0;
3455 +out_free:
3456 +       for (i--; i >= 0; i--) {
3457 +               __free_page(rqstp->rq_enc_pages[i]);
3458 +       }
3459 +out:
3460 +       return -EAGAIN;
3461 +}
3462 +
3463 +static inline int
3464 +gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
3465 +               kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
3466 +{
3467 +       struct xdr_buf  *snd_buf = &rqstp->rq_snd_buf;
3468 +       u32             offset;
3469 +       u32             maj_stat;
3470 +       int             status;
3471 +       u32             *opaque_len;
3472 +       struct page     **inpages;
3473 +       int             first;
3474 +       int             pad;
3475 +       struct kvec     *iov;
3476 +       char            *tmp;
3477 +
3478 +       opaque_len = p++;
3479 +       offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
3480 +       *p++ = htonl(rqstp->rq_seqno);
3481 +
3482 +       status = encode(rqstp, p, obj);
3483 +       if (status)
3484 +               return status;
3485 +
3486 +       status = alloc_enc_pages(rqstp);
3487 +       if (status)
3488 +               return status;
3489 +       /* XXXJBF: Oops!  Do we need rq_enc_pages really any more?? */
3490 +       first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
3491 +       inpages = snd_buf->pages + first;
3492 +       snd_buf->pages = rqstp->rq_enc_pages;
3493 +       snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
3494 +       /* XXX?: tail needs to be separate if we want to be able to expand
3495 +        * the head (since it's often put right after the head).  But is
3496 +        * expanding the head safe in any case? */
3497 +       if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
3498 +               tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
3499 +               memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
3500 +               snd_buf->tail[0].iov_base = tmp;
3501 +       }
3502 +       maj_stat = gss_wrap(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, offset,
3503 +                               snd_buf, inpages);
3504 +        status = -EIO; /* XXX? */
3505 +       if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3506 +               cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3507 +       else if (maj_stat)
3508 +               return status;
3509 +
3510 +       *opaque_len = htonl(snd_buf->len - offset);
3511 +       /* guess whether we're in the head or the tail: */
3512 +       if (snd_buf->page_len || snd_buf->tail[0].iov_len)
3513 +               iov = snd_buf->tail;
3514 +       else
3515 +               iov = snd_buf->head;
3516 +       p = iov->iov_base + iov->iov_len;
3517 +       pad = 3 - ((snd_buf->len - offset - 1) & 3);
3518 +       memset(p, 0, pad);
3519 +       iov->iov_len += pad;
3520 +       snd_buf->len += pad;
3521 +
3522 +       return 0;
3523 +}
3524 +
3525  static int
3526  gss_wrap_req(struct rpc_task *task,
3527              kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
3528 @@ -894,9 +1015,13 @@
3529                         status = encode(rqstp, p, obj);
3530                         goto out;
3531                 case RPC_GSS_SVC_INTEGRITY:
3532 -                       status = gss_wrap_req_integ(ctx, encode, rqstp, p, obj);
3533 +                       status = gss_wrap_req_integ(cred, ctx, encode,
3534 +                                                               rqstp, p, obj);
3535                         goto out;
3536                 case RPC_GSS_SVC_PRIVACY:
3537 +                       status = gss_wrap_req_priv(cred, ctx, encode,
3538 +                                                               rqstp, p, obj);
3539 +                       goto out;
3540                 default:
3541                         goto out;
3542         }
3543 @@ -907,11 +1032,10 @@
3544  }
3545  
3546  static inline int
3547 -gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
3548 -               kxdrproc_t decode, void *rqstp, u32 **p, void *obj)
3549 +gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
3550 +               struct rpc_rqst *rqstp, u32 **p)
3551  {
3552 -       struct rpc_rqst *req = (struct rpc_rqst *)rqstp;
3553 -       struct xdr_buf  *rcv_buf = &req->rq_rcv_buf;
3554 +       struct xdr_buf  *rcv_buf = &rqstp->rq_rcv_buf;
3555         struct xdr_buf integ_buf;
3556         struct xdr_netobj mic;
3557         u32 data_offset, mic_offset;
3558 @@ -926,7 +1050,7 @@
3559         mic_offset = integ_len + data_offset;
3560         if (mic_offset > rcv_buf->len)
3561                 return status;
3562 -       if (ntohl(*(*p)++) != req->rq_seqno)
3563 +       if (ntohl(*(*p)++) != rqstp->rq_seqno)
3564                 return status;
3565  
3566         if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
3567 @@ -938,11 +1062,44 @@
3568  
3569         maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf,
3570                         &mic, NULL);
3571 +       if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3572 +               cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3573 +       if (maj_stat != GSS_S_COMPLETE)
3574 +               return status;
3575 +       return 0;
3576 +}
3577 +
3578 +static inline int
3579 +gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
3580 +               struct rpc_rqst *rqstp, u32 **p)
3581 +{
3582 +       struct xdr_buf  *rcv_buf = &rqstp->rq_rcv_buf;
3583 +       u32 offset, out_offset;
3584 +       u32 opaque_len;
3585 +       u32 maj_stat;
3586 +       int status = -EIO;
3587 +
3588 +       opaque_len = ntohl(*(*p)++);
3589 +       offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
3590 +       if (offset + opaque_len > rcv_buf->len)
3591 +               return status;
3592 +       /* remove padding: */
3593 +       rcv_buf->len = offset + opaque_len;
3594 +
3595 +       maj_stat = gss_unwrap(ctx->gc_gss_ctx, NULL,
3596 +                       offset, rcv_buf, &out_offset);
3597 +       if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3598 +               cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3599         if (maj_stat != GSS_S_COMPLETE)
3600                 return status;
3601 +       *p = (u32 *)(rcv_buf->head[0].iov_base + out_offset);
3602 +       if (ntohl(*(*p)++) != rqstp->rq_seqno)
3603 +               return status;
3604 +
3605         return 0;
3606  }
3607  
3608 +
3609  static int
3610  gss_unwrap_resp(struct rpc_task *task,
3611                 kxdrproc_t decode, void *rqstp, u32 *p, void *obj)
3612 @@ -962,12 +1119,16 @@
3613                 case RPC_GSS_SVC_NONE:
3614                         goto out_decode;
3615                 case RPC_GSS_SVC_INTEGRITY:
3616 -                       status = gss_unwrap_resp_integ(ctx, decode, 
3617 -                                                       rqstp, &p, obj);
3618 +                       status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
3619                         if (status)
3620                                 goto out;
3621                         break;
3622                 case RPC_GSS_SVC_PRIVACY:
3623 +                       status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
3624 +                       if (status)
3625 +                               goto out;
3626 +                       break;
3627 +
3628                 default:
3629                         goto out;
3630         }
3631 Index: linux-2.6.10/net/sunrpc/svc.c
3632 ===================================================================
3633 --- linux-2.6.10.orig/net/sunrpc/svc.c  2004-12-25 05:35:28.000000000 +0800
3634 +++ linux-2.6.10/net/sunrpc/svc.c       2005-04-05 14:49:13.409690736 +0800
3635 @@ -264,6 +264,7 @@
3636         u32                     dir, prog, vers, proc,
3637                                 auth_stat, rpc_stat;
3638         int                     auth_res;
3639 +       u32                     *accept_statp;
3640  
3641         rpc_stat = rpc_success;
3642  
3643 @@ -299,6 +300,9 @@
3644         if (vers != 2)          /* RPC version number */
3645                 goto err_bad_rpc;
3646  
3647 +       /* Save position in case we later decide to reject: */
3648 +       accept_statp = resv->iov_base + resv->iov_len;
3649 +
3650         svc_putu32(resv, xdr_zero);             /* ACCEPT */
3651  
3652         rqstp->rq_prog = prog = ntohl(svc_getu32(argv));        /* program number */
3653 @@ -311,10 +315,12 @@
3654          * We do this before anything else in order to get a decent
3655          * auth verifier.
3656          */
3657 -       if (progp->pg_authenticate != NULL)
3658 -               auth_res = progp->pg_authenticate(rqstp, &auth_stat);
3659 -       else
3660 -               auth_res = svc_authenticate(rqstp, &auth_stat);
3661 +       auth_res = svc_authenticate(rqstp, &auth_stat);
3662 +       /* Also give the program a chance to reject this call: */
3663 +       if (auth_res == SVC_OK) {
3664 +               auth_stat = rpc_autherr_badcred;
3665 +               auth_res = progp->pg_authenticate(rqstp);
3666 +       }
3667         switch (auth_res) {
3668         case SVC_OK:
3669                 break;
3670 @@ -437,7 +443,8 @@
3671  err_bad_auth:
3672         dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
3673         serv->sv_stats->rpcbadauth++;
3674 -       resv->iov_len -= 4;
3675 +       /* Restore write pointer to location of accept status: */
3676 +       xdr_ressize_check(rqstp, accept_statp);
3677         svc_putu32(resv, xdr_one);      /* REJECT */
3678         svc_putu32(resv, xdr_one);      /* AUTH_ERROR */
3679         svc_putu32(resv, auth_stat);    /* status */
3680 Index: linux-2.6.10/net/sunrpc/sched.c
3681 ===================================================================
3682 --- linux-2.6.10.orig/net/sunrpc/sched.c        2004-12-25 05:34:58.000000000 +0800
3683 +++ linux-2.6.10/net/sunrpc/sched.c     2005-04-05 14:49:13.391693472 +0800
3684 @@ -41,13 +41,7 @@
3685  
3686  static void                    __rpc_default_timer(struct rpc_task *task);
3687  static void                    rpciod_killall(void);
3688 -
3689 -/*
3690 - * When an asynchronous RPC task is activated within a bottom half
3691 - * handler, or while executing another RPC task, it is put on
3692 - * schedq, and rpciod is woken up.
3693 - */
3694 -static RPC_WAITQ(schedq, "schedq");
3695 +static void                    rpc_async_schedule(void *);
3696  
3697  /*
3698   * RPC tasks that create another task (e.g. for contacting the portmapper)
3699 @@ -68,26 +62,18 @@
3700  /*
3701   * rpciod-related stuff
3702   */
3703 -static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
3704 -static DECLARE_COMPLETION(rpciod_killer);
3705  static DECLARE_MUTEX(rpciod_sema);
3706  static unsigned int            rpciod_users;
3707 -static pid_t                   rpciod_pid;
3708 -static int                     rpc_inhibit;
3709 +static struct workqueue_struct *rpciod_workqueue;
3710  
3711  /*
3712 - * Spinlock for wait queues. Access to the latter also has to be
3713 - * interrupt-safe in order to allow timers to wake up sleeping tasks.
3714 - */
3715 -static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
3716 -/*
3717   * Spinlock for other critical sections of code.
3718   */
3719  static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
3720  
3721  /*
3722   * Disable the timer for a given RPC task. Should be called with
3723 - * rpc_queue_lock and bh_disabled in order to avoid races within
3724 + * queue->lock and bh_disabled in order to avoid races within
3725   * rpc_run_timer().
3726   */
3727  static inline void
3728 @@ -105,19 +91,19 @@
3729   * without calling del_timer_sync(). The latter could cause a
3730   * deadlock if called while we're holding spinlocks...
3731   */
3732 -static void
3733 -rpc_run_timer(struct rpc_task *task)
3734 +static void rpc_run_timer(struct rpc_task *task)
3735  {
3736         void (*callback)(struct rpc_task *);
3737  
3738 -       spin_lock_bh(&rpc_queue_lock);
3739         callback = task->tk_timeout_fn;
3740         task->tk_timeout_fn = NULL;
3741 -       spin_unlock_bh(&rpc_queue_lock);
3742 -       if (callback) {
3743 +       if (callback && RPC_IS_QUEUED(task)) {
3744                 dprintk("RPC: %4d running timer\n", task->tk_pid);
3745                 callback(task);
3746         }
3747 +       smp_mb__before_clear_bit();
3748 +       clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
3749 +       smp_mb__after_clear_bit();
3750  }
3751  
3752  /*
3753 @@ -136,29 +122,21 @@
3754                 task->tk_timeout_fn = timer;
3755         else
3756                 task->tk_timeout_fn = __rpc_default_timer;
3757 +       set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
3758         mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
3759  }
3760  
3761  /*
3762 - * Set up a timer for an already sleeping task.
3763 - */
3764 -void rpc_add_timer(struct rpc_task *task, rpc_action timer)
3765 -{
3766 -       spin_lock_bh(&rpc_queue_lock);
3767 -       if (!RPC_IS_RUNNING(task))
3768 -               __rpc_add_timer(task, timer);
3769 -       spin_unlock_bh(&rpc_queue_lock);
3770 -}
3771 -
3772 -/*
3773   * Delete any timer for the current task. Because we use del_timer_sync(),
3774 - * this function should never be called while holding rpc_queue_lock.
3775 + * this function should never be called while holding queue->lock.
3776   */
3777  static inline void
3778  rpc_delete_timer(struct rpc_task *task)
3779  {
3780 -       if (del_timer_sync(&task->tk_timer))
3781 +       if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
3782 +               del_singleshot_timer_sync(&task->tk_timer);
3783                 dprintk("RPC: %4d deleting timer\n", task->tk_pid);
3784 +       }
3785  }
3786  
3787  /*
3788 @@ -169,16 +147,17 @@
3789         struct list_head *q;
3790         struct rpc_task *t;
3791  
3792 +       INIT_LIST_HEAD(&task->u.tk_wait.links);
3793         q = &queue->tasks[task->tk_priority];
3794         if (unlikely(task->tk_priority > queue->maxpriority))
3795                 q = &queue->tasks[queue->maxpriority];
3796 -       list_for_each_entry(t, q, tk_list) {
3797 +       list_for_each_entry(t, q, u.tk_wait.list) {
3798                 if (t->tk_cookie == task->tk_cookie) {
3799 -                       list_add_tail(&task->tk_list, &t->tk_links);
3800 +                       list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
3801                         return;
3802                 }
3803         }
3804 -       list_add_tail(&task->tk_list, q);
3805 +       list_add_tail(&task->u.tk_wait.list, q);
3806  }
3807  
3808  /*
3809 @@ -189,37 +168,21 @@
3810   * improve overall performance.
3811   * Everyone else gets appended to the queue to ensure proper FIFO behavior.
3812   */
3813 -static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
3814 +static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
3815  {
3816 -       if (task->tk_rpcwait == queue)
3817 -               return 0;
3818 +       BUG_ON (RPC_IS_QUEUED(task));
3819  
3820 -       if (task->tk_rpcwait) {
3821 -               printk(KERN_WARNING "RPC: doubly enqueued task!\n");
3822 -               return -EWOULDBLOCK;
3823 -       }
3824         if (RPC_IS_PRIORITY(queue))
3825                 __rpc_add_wait_queue_priority(queue, task);
3826         else if (RPC_IS_SWAPPER(task))
3827 -               list_add(&task->tk_list, &queue->tasks[0]);
3828 +               list_add(&task->u.tk_wait.list, &queue->tasks[0]);
3829         else
3830 -               list_add_tail(&task->tk_list, &queue->tasks[0]);
3831 -       task->tk_rpcwait = queue;
3832 +               list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
3833 +       task->u.tk_wait.rpc_waitq = queue;
3834 +       rpc_set_queued(task);
3835  
3836         dprintk("RPC: %4d added to queue %p \"%s\"\n",
3837                                 task->tk_pid, queue, rpc_qname(queue));
3838 -
3839 -       return 0;
3840 -}
3841 -
3842 -int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
3843 -{
3844 -       int             result;
3845 -
3846 -       spin_lock_bh(&rpc_queue_lock);
3847 -       result = __rpc_add_wait_queue(q, task);
3848 -       spin_unlock_bh(&rpc_queue_lock);
3849 -       return result;
3850  }
3851  
3852  /*
3853 @@ -229,12 +192,12 @@
3854  {
3855         struct rpc_task *t;
3856  
3857 -       if (!list_empty(&task->tk_links)) {
3858 -               t = list_entry(task->tk_links.next, struct rpc_task, tk_list);
3859 -               list_move(&t->tk_list, &task->tk_list);
3860 -               list_splice_init(&task->tk_links, &t->tk_links);
3861 +       if (!list_empty(&task->u.tk_wait.links)) {
3862 +               t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
3863 +               list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
3864 +               list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
3865         }
3866 -       list_del(&task->tk_list);
3867 +       list_del(&task->u.tk_wait.list);
3868  }
3869  
3870  /*
3871 @@ -243,31 +206,17 @@
3872   */
3873  static void __rpc_remove_wait_queue(struct rpc_task *task)
3874  {
3875 -       struct rpc_wait_queue *queue = task->tk_rpcwait;
3876 -
3877 -       if (!queue)
3878 -               return;
3879 +       struct rpc_wait_queue *queue;
3880 +       queue = task->u.tk_wait.rpc_waitq;
3881  
3882         if (RPC_IS_PRIORITY(queue))
3883                 __rpc_remove_wait_queue_priority(task);
3884         else
3885 -               list_del(&task->tk_list);
3886 -       task->tk_rpcwait = NULL;
3887 -
3888 +               list_del(&task->u.tk_wait.list);
3889         dprintk("RPC: %4d removed from queue %p \"%s\"\n",
3890                                 task->tk_pid, queue, rpc_qname(queue));
3891  }
3892  
3893 -void
3894 -rpc_remove_wait_queue(struct rpc_task *task)
3895 -{
3896 -       if (!task->tk_rpcwait)
3897 -               return;
3898 -       spin_lock_bh(&rpc_queue_lock);
3899 -       __rpc_remove_wait_queue(task);
3900 -       spin_unlock_bh(&rpc_queue_lock);
3901 -}
3902 -
3903  static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
3904  {
3905         queue->priority = priority;
3906 @@ -290,6 +239,7 @@
3907  {
3908         int i;
3909  
3910 +       spin_lock_init(&queue->lock);
3911         for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
3912                 INIT_LIST_HEAD(&queue->tasks[i]);
3913         queue->maxpriority = maxprio;
3914 @@ -316,34 +266,31 @@
3915   * Note: If the task is ASYNC, this must be called with 
3916   * the spinlock held to protect the wait queue operation.
3917   */
3918 -static inline void
3919 -rpc_make_runnable(struct rpc_task *task)
3920 +static void rpc_make_runnable(struct rpc_task *task)
3921  {
3922 -       if (task->tk_timeout_fn) {
3923 -               printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
3924 +       int do_ret;
3925 +
3926 +       BUG_ON(task->tk_timeout_fn);
3927 +       do_ret = rpc_test_and_set_running(task);
3928 +       rpc_clear_queued(task);
3929 +       if (do_ret)
3930                 return;
3931 -       }
3932 -       rpc_set_running(task);
3933         if (RPC_IS_ASYNC(task)) {
3934 -               if (RPC_IS_SLEEPING(task)) {
3935 -                       int status;
3936 -                       status = __rpc_add_wait_queue(&schedq, task);
3937 -                       if (status < 0) {
3938 -                               printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
3939 -                               task->tk_status = status;
3940 -                               return;
3941 -                       }
3942 -                       rpc_clear_sleeping(task);
3943 -                       wake_up(&rpciod_idle);
3944 +               int status;
3945 +
3946 +               INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
3947 +               status = queue_work(task->tk_workqueue, &task->u.tk_work);
3948 +               if (status < 0) {
3949 +                       printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
3950 +                       task->tk_status = status;
3951 +                       return;
3952                 }
3953 -       } else {
3954 -               rpc_clear_sleeping(task);
3955 -               wake_up(&task->tk_wait);
3956 -       }
3957 +       } else
3958 +               wake_up(&task->u.tk_wait.waitq);
3959  }
3960  
3961  /*
3962 - * Place a newly initialized task on the schedq.
3963 + * Place a newly initialized task on the workqueue.
3964   */
3965  static inline void
3966  rpc_schedule_run(struct rpc_task *task)
3967 @@ -352,33 +299,18 @@
3968         if (RPC_IS_ACTIVATED(task))
3969                 return;
3970         task->tk_active = 1;
3971 -       rpc_set_sleeping(task);
3972         rpc_make_runnable(task);
3973  }
3974  
3975  /*
3976 - *     For other people who may need to wake the I/O daemon
3977 - *     but should (for now) know nothing about its innards
3978 - */
3979 -void rpciod_wake_up(void)
3980 -{
3981 -       if(rpciod_pid==0)
3982 -               printk(KERN_ERR "rpciod: wot no daemon?\n");
3983 -       wake_up(&rpciod_idle);
3984 -}
3985 -
3986 -/*
3987   * Prepare for sleeping on a wait queue.
3988   * By always appending tasks to the list we ensure FIFO behavior.
3989   * NB: An RPC task will only receive interrupt-driven events as long
3990   * as it's on a wait queue.
3991   */
3992 -static void
3993 -__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
3994 +static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
3995                         rpc_action action, rpc_action timer)
3996  {
3997 -       int status;
3998 -
3999         dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
4000                                 rpc_qname(q), jiffies);
4001  
4002 @@ -388,49 +320,36 @@
4003         }
4004  
4005         /* Mark the task as being activated if so needed */
4006 -       if (!RPC_IS_ACTIVATED(task)) {
4007 +       if (!RPC_IS_ACTIVATED(task))
4008                 task->tk_active = 1;
4009 -               rpc_set_sleeping(task);
4010 -       }
4011  
4012 -       status = __rpc_add_wait_queue(q, task);
4013 -       if (status) {
4014 -               printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
4015 -               task->tk_status = status;
4016 -       } else {
4017 -               rpc_clear_running(task);
4018 -               if (task->tk_callback) {
4019 -                       dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
4020 -                       BUG();
4021 -               }
4022 -               task->tk_callback = action;
4023 -               __rpc_add_timer(task, timer);
4024 -       }
4025 +       __rpc_add_wait_queue(q, task);
4026 +
4027 +       BUG_ON(task->tk_callback != NULL);
4028 +       task->tk_callback = action;
4029 +       __rpc_add_timer(task, timer);
4030  }
4031  
4032 -void
4033 -rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
4034 +void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
4035                                 rpc_action action, rpc_action timer)
4036  {
4037         /*
4038          * Protect the queue operations.
4039          */
4040 -       spin_lock_bh(&rpc_queue_lock);
4041 +       spin_lock_bh(&q->lock);
4042         __rpc_sleep_on(q, task, action, timer);
4043 -       spin_unlock_bh(&rpc_queue_lock);
4044 +       spin_unlock_bh(&q->lock);
4045  }
4046  
4047  /**
4048 - * __rpc_wake_up_task - wake up a single rpc_task
4049 + * __rpc_do_wake_up_task - wake up a single rpc_task
4050   * @task: task to be woken up
4051   *
4052 - * Caller must hold rpc_queue_lock
4053 + * Caller must hold queue->lock, and have cleared the task queued flag.
4054   */
4055 -static void
4056 -__rpc_wake_up_task(struct rpc_task *task)
4057 +static void __rpc_do_wake_up_task(struct rpc_task *task)
4058  {
4059 -       dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n",
4060 -                                       task->tk_pid, jiffies, rpc_inhibit);
4061 +       dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies);
4062  
4063  #ifdef RPC_DEBUG
4064         if (task->tk_magic != 0xf00baa) {
4065 @@ -445,12 +364,9 @@
4066                 printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
4067                 return;
4068         }
4069 -       if (RPC_IS_RUNNING(task))
4070 -               return;
4071  
4072         __rpc_disable_timer(task);
4073 -       if (task->tk_rpcwait != &schedq)
4074 -               __rpc_remove_wait_queue(task);
4075 +       __rpc_remove_wait_queue(task);
4076  
4077         rpc_make_runnable(task);
4078  
4079 @@ -458,6 +374,18 @@
4080  }
4081  
4082  /*
4083 + * Wake up the specified task
4084 + */
4085 +static void __rpc_wake_up_task(struct rpc_task *task)
4086 +{
4087 +       if (rpc_start_wakeup(task)) {
4088 +               if (RPC_IS_QUEUED(task))
4089 +                       __rpc_do_wake_up_task(task);
4090 +               rpc_finish_wakeup(task);
4091 +       }
4092 +}
4093 +
4094 +/*
4095   * Default timeout handler if none specified by user
4096   */
4097  static void
4098 @@ -471,14 +399,18 @@
4099  /*
4100   * Wake up the specified task
4101   */
4102 -void
4103 -rpc_wake_up_task(struct rpc_task *task)
4104 +void rpc_wake_up_task(struct rpc_task *task)
4105  {
4106 -       if (RPC_IS_RUNNING(task))
4107 -               return;
4108 -       spin_lock_bh(&rpc_queue_lock);
4109 -       __rpc_wake_up_task(task);
4110 -       spin_unlock_bh(&rpc_queue_lock);
4111 +       if (rpc_start_wakeup(task)) {
4112 +               if (RPC_IS_QUEUED(task)) {
4113 +                       struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
4114 +
4115 +                       spin_lock_bh(&queue->lock);
4116 +                       __rpc_do_wake_up_task(task);
4117 +                       spin_unlock_bh(&queue->lock);
4118 +               }
4119 +               rpc_finish_wakeup(task);
4120 +       }
4121  }
4122  
4123  /*
4124 @@ -494,11 +426,11 @@
4125          */
4126         q = &queue->tasks[queue->priority];
4127         if (!list_empty(q)) {
4128 -               task = list_entry(q->next, struct rpc_task, tk_list);
4129 +               task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
4130                 if (queue->cookie == task->tk_cookie) {
4131                         if (--queue->nr)
4132                                 goto out;
4133 -                       list_move_tail(&task->tk_list, q);
4134 +                       list_move_tail(&task->u.tk_wait.list, q);
4135                 }
4136                 /*
4137                  * Check if we need to switch queues.
4138 @@ -516,7 +448,7 @@
4139                 else
4140                         q = q - 1;
4141                 if (!list_empty(q)) {
4142 -                       task = list_entry(q->next, struct rpc_task, tk_list);
4143 +                       task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
4144                         goto new_queue;
4145                 }
4146         } while (q != &queue->tasks[queue->priority]);
4147 @@ -541,14 +473,14 @@
4148         struct rpc_task *task = NULL;
4149  
4150         dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
4151 -       spin_lock_bh(&rpc_queue_lock);
4152 +       spin_lock_bh(&queue->lock);
4153         if (RPC_IS_PRIORITY(queue))
4154                 task = __rpc_wake_up_next_priority(queue);
4155         else {
4156                 task_for_first(task, &queue->tasks[0])
4157                         __rpc_wake_up_task(task);
4158         }
4159 -       spin_unlock_bh(&rpc_queue_lock);
4160 +       spin_unlock_bh(&queue->lock);
4161  
4162         return task;
4163  }
4164 @@ -557,25 +489,25 @@
4165   * rpc_wake_up - wake up all rpc_tasks
4166   * @queue: rpc_wait_queue on which the tasks are sleeping
4167   *
4168 - * Grabs rpc_queue_lock
4169 + * Grabs queue->lock
4170   */
4171  void rpc_wake_up(struct rpc_wait_queue *queue)
4172  {
4173         struct rpc_task *task;
4174  
4175         struct list_head *head;
4176 -       spin_lock_bh(&rpc_queue_lock);
4177 +       spin_lock_bh(&queue->lock);
4178         head = &queue->tasks[queue->maxpriority];
4179         for (;;) {
4180                 while (!list_empty(head)) {
4181 -                       task = list_entry(head->next, struct rpc_task, tk_list);
4182 +                       task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
4183                         __rpc_wake_up_task(task);
4184                 }
4185                 if (head == &queue->tasks[0])
4186                         break;
4187                 head--;
4188         }
4189 -       spin_unlock_bh(&rpc_queue_lock);
4190 +       spin_unlock_bh(&queue->lock);
4191  }
4192  
4193  /**
4194 @@ -583,18 +515,18 @@
4195   * @queue: rpc_wait_queue on which the tasks are sleeping
4196   * @status: status value to set
4197   *
4198 - * Grabs rpc_queue_lock
4199 + * Grabs queue->lock
4200   */
4201  void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
4202  {
4203         struct list_head *head;
4204         struct rpc_task *task;
4205  
4206 -       spin_lock_bh(&rpc_queue_lock);
4207 +       spin_lock_bh(&queue->lock);
4208         head = &queue->tasks[queue->maxpriority];
4209         for (;;) {
4210                 while (!list_empty(head)) {
4211 -                       task = list_entry(head->next, struct rpc_task, tk_list);
4212 +                       task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
4213                         task->tk_status = status;
4214                         __rpc_wake_up_task(task);
4215                 }
4216 @@ -602,7 +534,7 @@
4217                         break;
4218                 head--;
4219         }
4220 -       spin_unlock_bh(&rpc_queue_lock);
4221 +       spin_unlock_bh(&queue->lock);
4222  }
4223  
4224  /*
4225 @@ -626,22 +558,23 @@
4226  /*
4227   * This is the RPC `scheduler' (or rather, the finite state machine).
4228   */
4229 -static int
4230 -__rpc_execute(struct rpc_task *task)
4231 +static int __rpc_execute(struct rpc_task *task)
4232  {
4233         int             status = 0;
4234  
4235         dprintk("RPC: %4d rpc_execute flgs %x\n",
4236                                 task->tk_pid, task->tk_flags);
4237  
4238 -       if (!RPC_IS_RUNNING(task)) {
4239 -               printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
4240 -               return 0;
4241 -       }
4242 +       BUG_ON(RPC_IS_QUEUED(task));
4243  
4244   restarted:
4245         while (1) {
4246                 /*
4247 +                * Garbage collection of pending timers...
4248 +                */
4249 +               rpc_delete_timer(task);
4250 +
4251 +               /*
4252                  * Execute any pending callback.
4253                  */
4254                 if (RPC_DO_CALLBACK(task)) {
4255 @@ -657,7 +590,9 @@
4256                          */
4257                         save_callback=task->tk_callback;
4258                         task->tk_callback=NULL;
4259 +                       lock_kernel();
4260                         save_callback(task);
4261 +                       unlock_kernel();
4262                 }
4263  
4264                 /*
4265 @@ -665,43 +600,35 @@
4266                  * tk_action may be NULL when the task has been killed
4267                  * by someone else.
4268                  */
4269 -               if (RPC_IS_RUNNING(task)) {
4270 -                       /*
4271 -                        * Garbage collection of pending timers...
4272 -                        */
4273 -                       rpc_delete_timer(task);
4274 +               if (!RPC_IS_QUEUED(task)) {
4275                         if (!task->tk_action)
4276                                 break;
4277 +                       lock_kernel();
4278                         task->tk_action(task);
4279 -                       /* micro-optimization to avoid spinlock */
4280 -                       if (RPC_IS_RUNNING(task))
4281 -                               continue;
4282 +                       unlock_kernel();
4283                 }
4284  
4285                 /*
4286 -                * Check whether task is sleeping.
4287 +                * Lockless check for whether task is sleeping or not.
4288                  */
4289 -               spin_lock_bh(&rpc_queue_lock);
4290 -               if (!RPC_IS_RUNNING(task)) {
4291 -                       rpc_set_sleeping(task);
4292 -                       if (RPC_IS_ASYNC(task)) {
4293 -                               spin_unlock_bh(&rpc_queue_lock);
4294 +               if (!RPC_IS_QUEUED(task))
4295 +                       continue;
4296 +               rpc_clear_running(task);
4297 +               if (RPC_IS_ASYNC(task)) {
4298 +                       /* Careful! we may have raced... */
4299 +                       if (RPC_IS_QUEUED(task))
4300                                 return 0;
4301 -                       }
4302 +                       if (rpc_test_and_set_running(task))
4303 +                               return 0;
4304 +                       continue;
4305                 }
4306 -               spin_unlock_bh(&rpc_queue_lock);
4307  
4308 -               if (!RPC_IS_SLEEPING(task))
4309 -                       continue;
4310                 /* sync task: sleep here */
4311                 dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
4312 -               if (current->pid == rpciod_pid)
4313 -                       printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
4314 -
4315                 if (RPC_TASK_UNINTERRUPTIBLE(task)) {
4316 -                       __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
4317 +                       __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
4318                 } else {
4319 -                       __wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status);
4320 +                       __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
4321                         /*
4322                          * When a sync task receives a signal, it exits with
4323                          * -ERESTARTSYS. In order to catch any callbacks that
4324 @@ -715,11 +642,14 @@
4325                                 rpc_wake_up_task(task);
4326                         }
4327                 }
4328 +               rpc_set_running(task);
4329                 dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
4330         }
4331  
4332         if (task->tk_exit) {
4333 +               lock_kernel();
4334                 task->tk_exit(task);
4335 +               unlock_kernel();
4336                 /* If tk_action is non-null, the user wants us to restart */
4337                 if (task->tk_action) {
4338                         if (!RPC_ASSASSINATED(task)) {
4339 @@ -738,7 +668,6 @@
4340  
4341         /* Release all resources associated with the task */
4342         rpc_release_task(task);
4343 -
4344         return status;
4345  }
4346  
4347 @@ -754,57 +683,16 @@
4348  int
4349  rpc_execute(struct rpc_task *task)
4350  {
4351 -       int status = -EIO;
4352 -       if (rpc_inhibit) {
4353 -               printk(KERN_INFO "RPC: execution inhibited!\n");
4354 -               goto out_release;
4355 -       }
4356 -
4357 -       status = -EWOULDBLOCK;
4358 -       if (task->tk_active) {
4359 -               printk(KERN_ERR "RPC: active task was run twice!\n");
4360 -               goto out_err;
4361 -       }
4362 +       BUG_ON(task->tk_active);
4363  
4364         task->tk_active = 1;
4365         rpc_set_running(task);
4366         return __rpc_execute(task);
4367 - out_release:
4368 -       rpc_release_task(task);
4369 - out_err:
4370 -       return status;
4371  }
4372  
4373 -/*
4374 - * This is our own little scheduler for async RPC tasks.
4375 - */
4376 -static void
4377 -__rpc_schedule(void)
4378 +static void rpc_async_schedule(void *arg)
4379  {
4380 -       struct rpc_task *task;
4381 -       int             count = 0;
4382 -
4383 -       dprintk("RPC:      rpc_schedule enter\n");
4384 -       while (1) {
4385 -
4386 -               task_for_first(task, &schedq.tasks[0]) {
4387 -                       __rpc_remove_wait_queue(task);
4388 -                       spin_unlock_bh(&rpc_queue_lock);
4389 -
4390 -                       __rpc_execute(task);
4391 -                       spin_lock_bh(&rpc_queue_lock);
4392 -               } else {
4393 -                       break;
4394 -               }
4395 -
4396 -               if (++count >= 200 || need_resched()) {
4397 -                       count = 0;
4398 -                       spin_unlock_bh(&rpc_queue_lock);
4399 -                       schedule();
4400 -                       spin_lock_bh(&rpc_queue_lock);
4401 -               }
4402 -       }
4403 -       dprintk("RPC:      rpc_schedule leave\n");
4404 +       __rpc_execute((struct rpc_task *)arg);
4405  }
4406  
4407  /*
4408 @@ -862,7 +750,6 @@
4409         task->tk_client = clnt;
4410         task->tk_flags  = flags;
4411         task->tk_exit   = callback;
4412 -       init_waitqueue_head(&task->tk_wait);
4413         if (current->uid != current->fsuid || current->gid != current->fsgid)
4414                 task->tk_flags |= RPC_TASK_SETUID;
4415  
4416 @@ -873,7 +760,11 @@
4417  
4418         task->tk_priority = RPC_PRIORITY_NORMAL;
4419         task->tk_cookie = (unsigned long)current;
4420 -       INIT_LIST_HEAD(&task->tk_links);
4421 +
4422 +       /* Initialize workqueue for async tasks */
4423 +       task->tk_workqueue = rpciod_workqueue;
4424 +       if (!RPC_IS_ASYNC(task))
4425 +               init_waitqueue_head(&task->u.tk_wait.waitq);
4426  
4427         /* Add to global list of all tasks */
4428         spin_lock(&rpc_sched_lock);
4429 @@ -944,8 +835,7 @@
4430         goto out;
4431  }
4432  
4433 -void
4434 -rpc_release_task(struct rpc_task *task)
4435 +void rpc_release_task(struct rpc_task *task)
4436  {
4437         dprintk("RPC: %4d release task\n", task->tk_pid);
4438  
4439 @@ -963,19 +853,9 @@
4440         list_del(&task->tk_task);
4441         spin_unlock(&rpc_sched_lock);
4442  
4443 -       /* Protect the execution below. */
4444 -       spin_lock_bh(&rpc_queue_lock);
4445 -
4446 -       /* Disable timer to prevent zombie wakeup */
4447 -       __rpc_disable_timer(task);
4448 -
4449 -       /* Remove from any wait queue we're still on */
4450 -       __rpc_remove_wait_queue(task);
4451 -
4452 +       BUG_ON (RPC_IS_QUEUED(task));
4453         task->tk_active = 0;
4454  
4455 -       spin_unlock_bh(&rpc_queue_lock);
4456 -
4457         /* Synchronously delete any running timer */
4458         rpc_delete_timer(task);
4459  
4460 @@ -1005,10 +885,9 @@
4461   * queue 'childq'. If so returns a pointer to the parent.
4462   * Upon failure returns NULL.
4463   *
4464 - * Caller must hold rpc_queue_lock
4465 + * Caller must hold childq.lock
4466   */
4467 -static inline struct rpc_task *
4468 -rpc_find_parent(struct rpc_task *child)
4469 +static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
4470  {
4471         struct rpc_task *task, *parent;
4472         struct list_head *le;
4473 @@ -1021,17 +900,16 @@
4474         return NULL;
4475  }
4476  
4477 -static void
4478 -rpc_child_exit(struct rpc_task *child)
4479 +static void rpc_child_exit(struct rpc_task *child)
4480  {
4481         struct rpc_task *parent;
4482  
4483 -       spin_lock_bh(&rpc_queue_lock);
4484 +       spin_lock_bh(&childq.lock);
4485         if ((parent = rpc_find_parent(child)) != NULL) {
4486                 parent->tk_status = child->tk_status;
4487                 __rpc_wake_up_task(parent);
4488         }
4489 -       spin_unlock_bh(&rpc_queue_lock);
4490 +       spin_unlock_bh(&childq.lock);
4491  }
4492  
4493  /*
4494 @@ -1054,22 +932,20 @@
4495         return NULL;
4496  }
4497  
4498 -void
4499 -rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
4500 +void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
4501  {
4502 -       spin_lock_bh(&rpc_queue_lock);
4503 +       spin_lock_bh(&childq.lock);
4504         /* N.B. Is it possible for the child to have already finished? */
4505         __rpc_sleep_on(&childq, task, func, NULL);
4506         rpc_schedule_run(child);
4507 -       spin_unlock_bh(&rpc_queue_lock);
4508 +       spin_unlock_bh(&childq.lock);
4509  }
4510  
4511  /*
4512   * Kill all tasks for the given client.
4513   * XXX: kill their descendants as well?
4514   */
4515 -void
4516 -rpc_killall_tasks(struct rpc_clnt *clnt)
4517 +void rpc_killall_tasks(struct rpc_clnt *clnt)
4518  {
4519         struct rpc_task *rovr;
4520         struct list_head *le;
4521 @@ -1091,93 +967,14 @@
4522  
4523  static DECLARE_MUTEX_LOCKED(rpciod_running);
4524  
4525 -static inline int
4526 -rpciod_task_pending(void)
4527 -{
4528 -       return !list_empty(&schedq.tasks[0]);
4529 -}
4530 -
4531 -
4532 -/*
4533 - * This is the rpciod kernel thread
4534 - */
4535 -static int
4536 -rpciod(void *ptr)
4537 -{
4538 -       int             rounds = 0;
4539 -
4540 -       lock_kernel();
4541 -       /*
4542 -        * Let our maker know we're running ...
4543 -        */
4544 -       rpciod_pid = current->pid;
4545 -       up(&rpciod_running);
4546 -
4547 -       daemonize("rpciod");
4548 -       allow_signal(SIGKILL);
4549 -
4550 -       dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
4551 -       spin_lock_bh(&rpc_queue_lock);
4552 -       while (rpciod_users) {
4553 -               DEFINE_WAIT(wait);
4554 -               if (signalled()) {
4555 -                       spin_unlock_bh(&rpc_queue_lock);
4556 -                       rpciod_killall();
4557 -                       flush_signals(current);
4558 -                       spin_lock_bh(&rpc_queue_lock);
4559 -               }
4560 -               __rpc_schedule();
4561 -               if (current->flags & PF_FREEZE) {
4562 -                       spin_unlock_bh(&rpc_queue_lock);
4563 -                       refrigerator(PF_FREEZE);
4564 -                       spin_lock_bh(&rpc_queue_lock);
4565 -               }
4566 -
4567 -               if (++rounds >= 64) {   /* safeguard */
4568 -                       spin_unlock_bh(&rpc_queue_lock);
4569 -                       schedule();
4570 -                       rounds = 0;
4571 -                       spin_lock_bh(&rpc_queue_lock);
4572 -               }
4573 -
4574 -               dprintk("RPC: rpciod back to sleep\n");
4575 -               prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE);
4576 -               if (!rpciod_task_pending() && !signalled()) {
4577 -                       spin_unlock_bh(&rpc_queue_lock);
4578 -                       schedule();
4579 -                       rounds = 0;
4580 -                       spin_lock_bh(&rpc_queue_lock);
4581 -               }
4582 -               finish_wait(&rpciod_idle, &wait);
4583 -               dprintk("RPC: switch to rpciod\n");
4584 -       }
4585 -       spin_unlock_bh(&rpc_queue_lock);
4586 -
4587 -       dprintk("RPC: rpciod shutdown commences\n");
4588 -       if (!list_empty(&all_tasks)) {
4589 -               printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
4590 -               rpciod_killall();
4591 -       }
4592 -
4593 -       dprintk("RPC: rpciod exiting\n");
4594 -       unlock_kernel();
4595 -
4596 -       rpciod_pid = 0;
4597 -       complete_and_exit(&rpciod_killer, 0);
4598 -       return 0;
4599 -}
4600 -
4601 -static void
4602 -rpciod_killall(void)
4603 +static void rpciod_killall(void)
4604  {
4605         unsigned long flags;
4606  
4607         while (!list_empty(&all_tasks)) {
4608                 clear_thread_flag(TIF_SIGPENDING);
4609                 rpc_killall_tasks(NULL);
4610 -               spin_lock_bh(&rpc_queue_lock);
4611 -               __rpc_schedule();
4612 -               spin_unlock_bh(&rpc_queue_lock);
4613 +               flush_workqueue(rpciod_workqueue);
4614                 if (!list_empty(&all_tasks)) {
4615                         dprintk("rpciod_killall: waiting for tasks to exit\n");
4616                         yield();
4617 @@ -1195,28 +992,30 @@
4618  int
4619  rpciod_up(void)
4620  {
4621 +       struct workqueue_struct *wq;
4622         int error = 0;
4623  
4624         down(&rpciod_sema);
4625 -       dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
4626 +       dprintk("rpciod_up: users %d\n", rpciod_users);
4627         rpciod_users++;
4628 -       if (rpciod_pid)
4629 +       if (rpciod_workqueue)
4630                 goto out;
4631         /*
4632          * If there's no pid, we should be the first user.
4633          */
4634         if (rpciod_users > 1)
4635 -               printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users);
4636 +               printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users);
4637         /*
4638          * Create the rpciod thread and wait for it to start.
4639          */
4640 -       error = kernel_thread(rpciod, NULL, 0);
4641 -       if (error < 0) {
4642 -               printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error);
4643 +       error = -ENOMEM;
4644 +       wq = create_workqueue("rpciod");
4645 +       if (wq == NULL) {
4646 +               printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
4647                 rpciod_users--;
4648                 goto out;
4649         }
4650 -       down(&rpciod_running);
4651 +       rpciod_workqueue = wq;
4652         error = 0;
4653  out:
4654         up(&rpciod_sema);
4655 @@ -1227,20 +1026,21 @@
4656  rpciod_down(void)
4657  {
4658         down(&rpciod_sema);
4659 -       dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
4660 +       dprintk("rpciod_down sema %d\n", rpciod_users);
4661         if (rpciod_users) {
4662                 if (--rpciod_users)
4663                         goto out;
4664         } else
4665 -               printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid);
4666 +               printk(KERN_WARNING "rpciod_down: no users??\n");
4667  
4668 -       if (!rpciod_pid) {
4669 +       if (!rpciod_workqueue) {
4670                 dprintk("rpciod_down: Nothing to do!\n");
4671                 goto out;
4672         }
4673 +       rpciod_killall();
4674  
4675 -       kill_proc(rpciod_pid, SIGKILL, 1);
4676 -       wait_for_completion(&rpciod_killer);
4677 +       destroy_workqueue(rpciod_workqueue);
4678 +       rpciod_workqueue = NULL;
4679   out:
4680         up(&rpciod_sema);
4681  }
4682 @@ -1258,7 +1058,12 @@
4683         }
4684         printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
4685                 "-rpcwait -action- --exit--\n");
4686 -       alltask_for_each(t, le, &all_tasks)
4687 +       alltask_for_each(t, le, &all_tasks) {
4688 +               const char *rpc_waitq = "none";
4689 +
4690 +               if (RPC_IS_QUEUED(t))
4691 +                       rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
4692 +
4693                 printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
4694                         t->tk_pid,
4695                         (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
4696 @@ -1266,8 +1071,9 @@
4697                         t->tk_client,
4698                         (t->tk_client ? t->tk_client->cl_prog : 0),
4699                         t->tk_rqstp, t->tk_timeout,
4700 -                       rpc_qname(t->tk_rpcwait),
4701 +                       rpc_waitq,
4702                         t->tk_action, t->tk_exit);
4703 +       }
4704         spin_unlock(&rpc_sched_lock);
4705  }
4706  #endif
4707 Index: linux-2.6.10/net/sunrpc/sunrpc_syms.c
4708 ===================================================================
4709 --- linux-2.6.10.orig/net/sunrpc/sunrpc_syms.c  2004-12-25 05:35:25.000000000 +0800
4710 +++ linux-2.6.10/net/sunrpc/sunrpc_syms.c       2005-04-05 14:49:13.411690432 +0800
4711 @@ -58,6 +58,9 @@
4712  EXPORT_SYMBOL(rpc_wake_up);
4713  EXPORT_SYMBOL(rpc_queue_upcall);
4714  EXPORT_SYMBOL(rpc_mkpipe);
4715 +EXPORT_SYMBOL(rpc_mkdir);
4716 +EXPORT_SYMBOL(rpc_rmdir);
4717 +
4718  
4719  /* Client transport */
4720  EXPORT_SYMBOL(xprt_create_proto);
4721 @@ -90,6 +93,7 @@
4722  EXPORT_SYMBOL(svc_auth_register);
4723  EXPORT_SYMBOL(auth_domain_lookup);
4724  EXPORT_SYMBOL(svc_authenticate);
4725 +EXPORT_SYMBOL(svc_set_client);
4726  
4727  /* RPC statistics */
4728  #ifdef CONFIG_PROC_FS
4729 Index: linux-2.6.10/kernel/exit.c
4730 ===================================================================
4731 --- linux-2.6.10.orig/kernel/exit.c     2005-04-05 14:48:52.534864192 +0800
4732 +++ linux-2.6.10/kernel/exit.c  2005-04-05 14:50:57.737830448 +0800
4733 @@ -848,6 +848,8 @@
4734         for (;;) ;
4735  }
4736  
4737 +EXPORT_SYMBOL(do_exit);
4738 +
4739  NORET_TYPE void complete_and_exit(struct completion *comp, long code)
4740  {
4741         if (comp)
4742 Index: linux-2.6.10/fs/locks.c
4743 ===================================================================
4744 --- linux-2.6.10.orig/fs/locks.c        2004-12-25 05:35:28.000000000 +0800
4745 +++ linux-2.6.10/fs/locks.c     2005-04-05 14:49:13.434686936 +0800
4746 @@ -1096,15 +1096,13 @@
4747  */
4748  void remove_lease(struct file_lock *fl)
4749  {
4750 -       if (!IS_LEASE(fl))
4751 -               return;
4752 -
4753         lock_kernel();
4754 -
4755 +       if (!fl || !IS_LEASE(fl))
4756 +               goto out;
4757         fl->fl_type = F_UNLCK | F_INPROGRESS;
4758         fl->fl_break_time = jiffies - 10;
4759         time_out_leases(fl->fl_file->f_dentry->d_inode);
4760 -
4761 +out:
4762         unlock_kernel();
4763  }
4764  
4765 @@ -1563,9 +1561,6 @@
4766                 error = filp->f_op->lock(filp, F_GETLK, &file_lock);
4767                 if (error < 0)
4768                         goto out;
4769 -               else if (error == LOCK_USE_CLNT)
4770 -                 /* Bypass for NFS with no locking - 2.0.36 compat */
4771 -                 fl = posix_test_lock(filp, &file_lock);
4772                 else
4773                   fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
4774         } else {
4775 @@ -1708,9 +1703,6 @@
4776                 error = filp->f_op->lock(filp, F_GETLK, &file_lock);
4777                 if (error < 0)
4778                         goto out;
4779 -               else if (error == LOCK_USE_CLNT)
4780 -                 /* Bypass for NFS with no locking - 2.0.36 compat */
4781 -                 fl = posix_test_lock(filp, &file_lock);
4782                 else
4783                   fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
4784         } else {
4785 Index: linux-2.6.10/fs/dcache.c
4786 ===================================================================
4787 --- linux-2.6.10.orig/fs/dcache.c       2005-03-31 15:35:26.000000000 +0800
4788 +++ linux-2.6.10/fs/dcache.c    2005-04-05 14:49:13.413690128 +0800
4789 @@ -789,6 +789,54 @@
4790  }
4791  
4792  /**
4793 + * d_instantiate_unique - instantiate a non-aliased dentry
4794 + * @entry: dentry to instantiate
4795 + * @inode: inode to attach to this dentry
4796 + *
4797 + * Fill in inode information in the entry. On success, it returns NULL.
4798 + * If an unhashed alias of "entry" already exists, then we return the
4799 + * aliased dentry instead.
4800 + *
4801 + * Note that in order to avoid conflicts with rename() etc, the caller
4802 + * had better be holding the parent directory semaphore.
4803 + */
4804 +struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
4805 +{
4806 +       struct dentry *alias;
4807 +       int len = entry->d_name.len;
4808 +       const char *name = entry->d_name.name;
4809 +       unsigned int hash = entry->d_name.hash;
4810 +
4811 +       BUG_ON(!list_empty(&entry->d_alias));
4812 +       spin_lock(&dcache_lock);
4813 +       if (!inode)
4814 +               goto do_negative;
4815 +       list_for_each_entry(alias, &inode->i_dentry, d_alias) {
4816 +               struct qstr *qstr = &alias->d_name;
4817 +
4818 +               if (qstr->hash != hash)
4819 +                       continue;
4820 +               if (alias->d_parent != entry->d_parent)
4821 +                       continue;
4822 +               if (qstr->len != len)
4823 +                       continue;
4824 +               if (memcmp(qstr->name, name, len))
4825 +                       continue;
4826 +               dget_locked(alias);
4827 +               spin_unlock(&dcache_lock);
4828 +               BUG_ON(!d_unhashed(alias));
4829 +               return alias;
4830 +       }
4831 +       list_add(&entry->d_alias, &inode->i_dentry);
4832 +do_negative:
4833 +       entry->d_inode = inode;
4834 +       spin_unlock(&dcache_lock);
4835 +       security_d_instantiate(entry, inode);
4836 +       return NULL;
4837 +}
4838 +EXPORT_SYMBOL(d_instantiate_unique);
4839 +
4840 +/**
4841   * d_alloc_root - allocate root dentry
4842   * @root_inode: inode to allocate the root for
4843   *
4844 Index: linux-2.6.10/fs/lockd/svc.c
4845 ===================================================================
4846 --- linux-2.6.10.orig/fs/lockd/svc.c    2005-03-31 15:35:26.000000000 +0800
4847 +++ linux-2.6.10/fs/lockd/svc.c 2005-04-05 14:49:13.458683288 +0800
4848 @@ -418,6 +418,38 @@
4849         return 0;                                                       \
4850  }
4851  
4852 +static inline int is_callback(u32 proc)
4853 +{
4854 +       return proc == NLMPROC_GRANTED
4855 +               || proc == NLMPROC_GRANTED_MSG
4856 +               || proc == NLMPROC_TEST_RES
4857 +               || proc == NLMPROC_LOCK_RES
4858 +               || proc == NLMPROC_CANCEL_RES
4859 +               || proc == NLMPROC_UNLOCK_RES
4860 +               || proc == NLMPROC_NSM_NOTIFY;
4861 +}
4862 +
4863 +
4864 +static int lockd_authenticate(struct svc_rqst *rqstp)
4865 +{
4866 +       rqstp->rq_client = NULL;
4867 +       switch (rqstp->rq_authop->flavour) {
4868 +               case RPC_AUTH_NULL:
4869 +               case RPC_AUTH_UNIX:
4870 +                       if (rqstp->rq_proc == 0)
4871 +                               return SVC_OK;
4872 +                       if (is_callback(rqstp->rq_proc)) {
4873 +                               /* Leave it to individual procedures to
4874 +                                * call nlmsvc_lookup_host(rqstp)
4875 +                                */
4876 +                               return SVC_OK;
4877 +                       }
4878 +                       return svc_set_client(rqstp);
4879 +       }
4880 +       return SVC_DENIED;
4881 +}
4882 +
4883 +
4884  param_set_min_max(port, int, simple_strtol, 0, 65535)
4885  param_set_min_max(grace_period, unsigned long, simple_strtoul,
4886                   nlm_grace_period_min, nlm_grace_period_max)
4887 @@ -498,4 +530,5 @@
4888         .pg_name        = "lockd",              /* service name */
4889         .pg_class       = "nfsd",               /* share authentication with nfsd */
4890         .pg_stats       = &nlmsvc_stats,        /* stats table */
4891 +       .pg_authenticate = &lockd_authenticate  /* export authentication */
4892  };
4893 Index: linux-2.6.10/fs/nfsd/nfs4xdr.c
4894 ===================================================================
4895 --- linux-2.6.10.orig/fs/nfsd/nfs4xdr.c 2004-12-25 05:35:24.000000000 +0800
4896 +++ linux-2.6.10/fs/nfsd/nfs4xdr.c      2005-04-05 14:49:13.425688304 +0800
4897 @@ -60,121 +60,6 @@
4898  
4899  #define NFSDDBG_FACILITY               NFSDDBG_XDR
4900  
4901 -static const char utf8_byte_len[256] = {
4902 -       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
4903 -       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
4904 -       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
4905 -       1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
4906 -       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4907 -       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4908 -       0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
4909 -       3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0
4910 -};
4911 -
4912 -static inline int
4913 -is_legal_utf8_sequence(unsigned char *source, int length)
4914 -{
4915 -       unsigned char *ptr;
4916 -       unsigned char c;
4917 -
4918 -       if (length==1) return 1;
4919 -
4920 -       /* Check for overlong sequence, and check second byte */
4921 -       c = *(source + 1);
4922 -       switch (*source) {
4923 -       case 0xE0: /* 3 bytes */
4924 -               if ( c < 0xA0 ) return 0;
4925 -               break;
4926 -       case 0xF0: /* 4 bytes */
4927 -               if ( c < 0x90 ) return 0;
4928 -               break;
4929 -       case 0xF8: /* 5 bytes */
4930 -               if ( c < 0xC8 ) return 0;
4931 -               break;
4932 -       case 0xFC: /* 6 bytes */
4933 -               if ( c < 0x84 ) return 0;
4934 -               break;
4935 -       default:
4936 -               if ( (c & 0xC0) != 0x80) return 0;
4937 -       }
4938 -
4939 -       /* Check that trailing bytes look like 10xxxxxx */
4940 -       for (ptr = source++ + length - 1; ptr>source; ptr--)
4941 -               if ( ((*ptr) & 0xC0) != 0x80 ) return 0;
4942 -       return 1;
4943 -}
4944 -
4945 -/* This does some screening on disallowed unicode characters.  It is NOT
4946 - * comprehensive.
4947 - */
4948 -static int
4949 -is_allowed_utf8_char(unsigned char *source, int length)
4950 -{
4951 -       /* We assume length and source point to a valid utf8 sequence */
4952 -       unsigned char c;
4953 -
4954 -       /* Disallow F0000 and up (in utf8, F3B08080) */
4955 -       if (*source > 0xF3 ) return 0;
4956 -       c = *(source + 1);
4957 -       switch (*source) {
4958 -       case 0xF3:
4959 -               if (c >= 0xB0) return 0;
4960 -               break;
4961 -       /* Disallow D800-F8FF (in utf8, EDA080-EFA3BF */
4962 -       case 0xED:
4963 -               if (c >= 0xA0) return 0;
4964 -               break;
4965 -       case 0xEE:
4966 -               return 0;
4967 -               break;
4968 -       case 0xEF:
4969 -               if (c <= 0xA3) return 0;
4970 -       /* Disallow FFF9-FFFF (EFBFB9-EFBFBF) */
4971 -               if (c==0xBF)
4972 -                       /* Don't need to check <=0xBF, since valid utf8 */
4973 -                       if ( *(source+2) >= 0xB9) return 0;
4974 -               break;
4975 -       }
4976 -       return 1;
4977 -}
4978 -
4979 -/* This routine should really check to see that the proper stringprep
4980 - * mappings have been applied.  Instead, we do a simple screen of some
4981 - * of the more obvious illegal values by calling is_allowed_utf8_char.
4982 - * This will allow many illegal strings through, but if a client behaves,
4983 - * it will get full functionality.  The other option (apart from full
4984 - * stringprep checking) is to limit everything to an easily handled subset,
4985 - * such as 7-bit ascii.
4986 - *
4987 - * Note - currently calling routines ignore return value except as boolean.
4988 - */
4989 -static int
4990 -check_utf8(char *str, int len)
4991 -{
4992 -       unsigned char *chunk, *sourceend;
4993 -       int chunklen;
4994 -
4995 -       chunk = str;
4996 -       sourceend = str + len;
4997 -
4998 -       while (chunk < sourceend) {
4999 -               chunklen = utf8_byte_len[*chunk];
5000 -               if (!chunklen)
5001 -                       return nfserr_inval;
5002 -               if (chunk + chunklen > sourceend)
5003 -                       return nfserr_inval;
5004 -               if (!is_legal_utf8_sequence(chunk, chunklen))
5005 -                       return nfserr_inval;
5006 -               if (!is_allowed_utf8_char(chunk, chunklen))
5007 -                       return nfserr_inval;
5008 -               if ( (chunklen==1) && (!*chunk) )
5009 -                       return nfserr_inval; /* Disallow embedded nulls */
5010 -               chunk += chunklen;
5011 -       }
5012 -
5013 -       return 0;
5014 -}
5015 -
5016  static int
5017  check_filename(char *str, int len, int err)
5018  {
5019 @@ -187,7 +72,7 @@
5020         for (i = 0; i < len; i++)
5021                 if (str[i] == '/')
5022                         return err;
5023 -       return check_utf8(str, len);
5024 +       return 0;
5025  }
5026  
5027  /*
5028 @@ -403,8 +288,6 @@
5029                         READ_BUF(dummy32);
5030                         len += XDR_QUADLEN(dummy32) << 2;
5031                         READMEM(buf, dummy32);
5032 -                       if (check_utf8(buf, dummy32))
5033 -                               return nfserr_inval;
5034                         ace.whotype = nfs4_acl_get_whotype(buf, dummy32);
5035                         status = 0;
5036                         if (ace.whotype != NFS4_ACL_WHO_NAMED)
5037 @@ -439,8 +322,6 @@
5038                 READ_BUF(dummy32);
5039                 len += (XDR_QUADLEN(dummy32) << 2);
5040                 READMEM(buf, dummy32);
5041 -               if (check_utf8(buf, dummy32))
5042 -                       return nfserr_inval;
5043                 if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
5044                         goto out_nfserr;
5045                 iattr->ia_valid |= ATTR_UID;
5046 @@ -452,8 +333,6 @@
5047                 READ_BUF(dummy32);
5048                 len += (XDR_QUADLEN(dummy32) << 2);
5049                 READMEM(buf, dummy32);
5050 -               if (check_utf8(buf, dummy32))
5051 -                       return nfserr_inval;
5052                 if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
5053                         goto out_nfserr;
5054                 iattr->ia_valid |= ATTR_GID;
5055 @@ -525,7 +404,7 @@
5056                 }
5057         }
5058         if (len != expected_len)
5059 -               goto xdr_error;
5060 +               printk("nfsd: funky nfs4 client sent extra bytes in setattr\n");
5061  
5062         DECODE_TAIL;
5063  
5064 @@ -585,8 +464,6 @@
5065                 READ32(create->cr_linklen);
5066                 READ_BUF(create->cr_linklen);
5067                 SAVEMEM(create->cr_linkname, create->cr_linklen);
5068 -               if (check_utf8(create->cr_linkname, create->cr_linklen))
5069 -                       return nfserr_inval;
5070                 break;
5071         case NF4BLK:
5072         case NF4CHR:
5073 @@ -615,6 +492,18 @@
5074  }
5075  
5076  static inline int
5077 +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
5078 +{
5079 +       DECODE_HEAD;
5080 +
5081 +       READ_BUF(sizeof(stateid_t));
5082 +       READ32(dr->dr_stateid.si_generation);
5083 +       COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t));
5084 +
5085 +       DECODE_TAIL;
5086 +}
5087 +
5088 +static inline int
5089  nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
5090  {
5091         return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
5092 @@ -790,8 +679,8 @@
5093                 READ32(open->op_delegate_type);
5094                 break;
5095         case NFS4_OPEN_CLAIM_DELEGATE_CUR:
5096 -               READ_BUF(sizeof(delegation_stateid_t) + 4);
5097 -               COPYMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
5098 +               READ_BUF(sizeof(stateid_t) + 4);
5099 +               COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t));
5100                 READ32(open->op_fname.len);
5101                 READ_BUF(open->op_fname.len);
5102                 SAVEMEM(open->op_fname.data, open->op_fname.len);
5103 @@ -825,7 +714,7 @@
5104         DECODE_HEAD;
5105                     
5106         open_down->od_stateowner = NULL;
5107 -       READ_BUF(4 + sizeof(stateid_t));
5108 +       READ_BUF(12 + sizeof(stateid_t));
5109         READ32(open_down->od_stateid.si_generation);
5110         COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t));
5111         READ32(open_down->od_seqid);
5112 @@ -1170,6 +1059,9 @@
5113                 case OP_CREATE:
5114                         op->status = nfsd4_decode_create(argp, &op->u.create);
5115                         break;
5116 +               case OP_DELEGRETURN:
5117 +                       op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
5118 +                       break;
5119                 case OP_GETATTR:
5120                         op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
5121                         break;
5122 @@ -1425,7 +1317,7 @@
5123                 if (status)
5124                         goto out_nfserr;
5125         }
5126 -       if ((bmval0 & FATTR4_WORD0_FILEHANDLE) && !fhp) {
5127 +       if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
5128                 fh_init(&tempfh, NFS4_FHSIZE);
5129                 status = fh_compose(&tempfh, exp, dentry, NULL);
5130                 if (status)
5131 @@ -1471,7 +1363,10 @@
5132         if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
5133                 if ((buflen -= 4) < 0)
5134                         goto out_resource;
5135 -               WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
5136 +               if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
5137 +                       WRITE32(NFS4_FH_PERSISTENT);
5138 +               else
5139 +                       WRITE32(NFS4_FH_VOL_RENAME);
5140         }
5141         if (bmval0 & FATTR4_WORD0_CHANGE) {
5142                 /*
5143 @@ -1508,10 +1403,15 @@
5144         if (bmval0 & FATTR4_WORD0_FSID) {
5145                 if ((buflen -= 16) < 0)
5146                         goto out_resource;
5147 -               WRITE32(0);
5148 -               WRITE32(MAJOR(stat.dev));
5149 -               WRITE32(0);
5150 -               WRITE32(MINOR(stat.dev));
5151 +               if (is_fsid(fhp, rqstp->rq_reffh)) {
5152 +                       WRITE64((u64)exp->ex_fsid);
5153 +                       WRITE64((u64)0);
5154 +               } else {
5155 +                       WRITE32(0);
5156 +                       WRITE32(MAJOR(stat.dev));
5157 +                       WRITE32(0);
5158 +                       WRITE32(MINOR(stat.dev));
5159 +               }
5160         }
5161         if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
5162                 if ((buflen -= 4) < 0)
5163 @@ -1765,17 +1665,65 @@
5164  }
5165  
5166  static int
5167 +nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
5168 +               const char *name, int namlen, u32 *p, int *buflen)
5169 +{
5170 +       struct svc_export *exp = cd->rd_fhp->fh_export;
5171 +       struct dentry *dentry;
5172 +       int nfserr;
5173 +
5174 +       dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
5175 +       if (IS_ERR(dentry))
5176 +               return nfserrno(PTR_ERR(dentry));
5177 +
5178 +       exp_get(exp);
5179 +       if (d_mountpoint(dentry)) {
5180 +               if (nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp)) {
5181 +               /*
5182 +                * -EAGAIN is the only error returned from
5183 +                * nfsd_cross_mnt() and it indicates that an
5184 +                * up-call has  been initiated to fill in the export
5185 +                * options on exp.  When the answer comes back,
5186 +                * this call will be retried.
5187 +                */
5188 +                       nfserr = nfserr_dropit;
5189 +                       goto out_put;
5190 +               }
5191 +
5192 +       }
5193 +       nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
5194 +                                       cd->rd_rqstp);
5195 +out_put:
5196 +       dput(dentry);
5197 +       exp_put(exp);
5198 +       return nfserr;
5199 +}
5200 +
5201 +static u32 *
5202 +nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr)
5203 +{
5204 +       u32 *attrlenp;
5205 +
5206 +       if (buflen < 6)
5207 +               return NULL;
5208 +       *p++ = htonl(2);
5209 +       *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
5210 +       *p++ = htonl(0);                         /* bmval1 */
5211 +
5212 +       attrlenp = p++;
5213 +       *p++ = nfserr;       /* no htonl */
5214 +       *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
5215 +       return p;
5216 +}
5217 +
5218 +static int
5219  nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
5220                     loff_t offset, ino_t ino, unsigned int d_type)
5221  {
5222         struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
5223         int buflen;
5224         u32 *p = cd->buffer;
5225 -       u32 *attrlenp;
5226 -       struct dentry *dentry;
5227 -       struct svc_export *exp = cd->rd_fhp->fh_export;
5228 -       u32 bmval0, bmval1;
5229 -       int nfserr = 0;
5230 +       int nfserr = nfserr_toosmall;
5231  
5232         /* In nfsv4, "." and ".." never make it onto the wire.. */
5233         if (name && isdotent(name, namlen)) {
5234 @@ -1788,106 +1736,44 @@
5235  
5236         buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
5237         if (buflen < 0)
5238 -               goto nospc;
5239 +               goto fail;
5240  
5241         *p++ = xdr_one;                             /* mark entry present */
5242         cd->offset = p;                             /* remember pointer */
5243         p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
5244         p = xdr_encode_array(p, name, namlen);      /* name length & name */
5245  
5246 -       /*
5247 -        * Now we come to the ugly part: writing the fattr for this entry.
5248 -        */
5249 -       bmval0 = cd->rd_bmval[0];
5250 -       bmval1 = cd->rd_bmval[1];
5251 -       if ((bmval0 & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_FILEID)) || bmval1)  {
5252 -               /*
5253 -                * "Heavyweight" case: we have no choice except to
5254 -                * call nfsd4_encode_fattr(). 
5255 -                */
5256 -               dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
5257 -               if (IS_ERR(dentry)) {
5258 -                       nfserr = nfserrno(PTR_ERR(dentry));
5259 -                       goto error;
5260 -               }
5261 -
5262 -               exp_get(exp);
5263 -               if (d_mountpoint(dentry)) {
5264 -                       if ((nfserr = nfsd_cross_mnt(cd->rd_rqstp, &dentry, 
5265 -                                        &exp))) {      
5266 -                       /* 
5267 -                        * -EAGAIN is the only error returned from 
5268 -                        * nfsd_cross_mnt() and it indicates that an 
5269 -                        * up-call has  been initiated to fill in the export 
5270 -                        * options on exp.  When the answer comes back,
5271 -                        * this call will be retried.
5272 -                        */
5273 -                               dput(dentry);
5274 -                               exp_put(exp);
5275 -                               nfserr = nfserr_dropit;
5276 -                               goto error;
5277 -                       }
5278 -
5279 -               }
5280 -
5281 -               nfserr = nfsd4_encode_fattr(NULL, exp,
5282 -                               dentry, p, &buflen, cd->rd_bmval,
5283 -                               cd->rd_rqstp);
5284 -               dput(dentry);
5285 -               exp_put(exp);
5286 -               if (!nfserr) {
5287 -                       p += buflen;
5288 -                       goto out;
5289 -               }
5290 -               if (nfserr == nfserr_resource)
5291 -                       goto nospc;
5292 -
5293 -error:
5294 +       nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen);
5295 +       switch (nfserr) {
5296 +       case nfs_ok:
5297 +               p += buflen;
5298 +               break;
5299 +       case nfserr_resource:
5300 +               nfserr = nfserr_toosmall;
5301 +               goto fail;
5302 +       case nfserr_dropit:
5303 +               goto fail;
5304 +       default:
5305                 /*
5306 -                * If we get here, we experienced a miscellaneous
5307 -                * failure while writing the attributes.  If the
5308 -                * client requested the RDATTR_ERROR attribute,
5309 +                * If the client requested the RDATTR_ERROR attribute,
5310                  * we stuff the error code into this attribute
5311                  * and continue.  If this attribute was not requested,
5312                  * then in accordance with the spec, we fail the
5313                  * entire READDIR operation(!)
5314                  */
5315 -               if (!(bmval0 & FATTR4_WORD0_RDATTR_ERROR)) {
5316 -                       cd->common.err = nfserr;
5317 -                       return -EINVAL;
5318 -               }
5319 -
5320 -               bmval0 = FATTR4_WORD0_RDATTR_ERROR;
5321 -               bmval1 = 0;
5322 -               /* falling through here will do the right thing... */
5323 +               if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
5324 +                       goto fail;
5325 +               nfserr = nfserr_toosmall;
5326 +               p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
5327 +               if (p == NULL)
5328 +                       goto fail;
5329         }
5330 -
5331 -       /*
5332 -        * In the common "lightweight" case, we avoid
5333 -        * the overhead of nfsd4_encode_fattr() by assembling
5334 -        * a small fattr by hand.
5335 -        */
5336 -       if (buflen < 6)
5337 -               goto nospc;
5338 -       *p++ = htonl(2);
5339 -       *p++ = htonl(bmval0);
5340 -       *p++ = htonl(bmval1);
5341 -
5342 -       attrlenp = p++;
5343 -       if (bmval0 & FATTR4_WORD0_RDATTR_ERROR)
5344 -               *p++ = nfserr;       /* no htonl */
5345 -       if (bmval0 & FATTR4_WORD0_FILEID)
5346 -               p = xdr_encode_hyper(p, (u64)ino);
5347 -       *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
5348 -
5349 -out:
5350         cd->buflen -= (p - cd->buffer);
5351         cd->buffer = p;
5352         cd->common.err = nfs_ok;
5353         return 0;
5354 -
5355 -nospc:
5356 -       cd->common.err = nfserr_toosmall;
5357 +fail:
5358 +       cd->common.err = nfserr;
5359         return -EINVAL;
5360  }
5361  
5362 @@ -2081,8 +1967,8 @@
5363         case NFS4_OPEN_DELEGATE_NONE:
5364                 break;
5365         case NFS4_OPEN_DELEGATE_READ:
5366 -               RESERVE_SPACE(20 + sizeof(delegation_stateid_t));
5367 -               WRITEMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
5368 +               RESERVE_SPACE(20 + sizeof(stateid_t));
5369 +               WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
5370                 WRITE32(0);
5371  
5372                 /*
5373 @@ -2095,8 +1981,8 @@
5374                 ADJUST_ARGS();
5375                 break;
5376         case NFS4_OPEN_DELEGATE_WRITE:
5377 -               RESERVE_SPACE(32 + sizeof(delegation_stateid_t));
5378 -               WRITEMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
5379 +               RESERVE_SPACE(32 + sizeof(stateid_t));
5380 +               WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
5381                 WRITE32(0);
5382  
5383                 /*
5384 @@ -2185,10 +2071,17 @@
5385         }
5386         read->rd_vlen = v;
5387  
5388 -       nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp,
5389 -                          read->rd_offset,
5390 -                          read->rd_iov, read->rd_vlen,
5391 -                          &maxcount);
5392 +       if (read->rd_filp)
5393 +               nfserr = nfsd_vfs_read(read->rd_rqstp, read->rd_fhp,
5394 +                               read->rd_filp, read->rd_offset,
5395 +                               read->rd_iov, read->rd_vlen,
5396 +                               &maxcount);
5397 +       else
5398 +               nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp,
5399 +                               read->rd_offset,
5400 +                               read->rd_iov, read->rd_vlen,
5401 +                               &maxcount);
5402 +
5403         if (nfserr == nfserr_symlink)
5404                 nfserr = nfserr_inval;
5405         if (nfserr)
5406 @@ -2460,6 +2353,8 @@
5407         case OP_CREATE:
5408                 nfsd4_encode_create(resp, op->status, &op->u.create);
5409                 break;
5410 +       case OP_DELEGRETURN:
5411 +               break;
5412         case OP_GETATTR:
5413                 op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
5414                 break;
5415 Index: linux-2.6.10/fs/nfsd/nfs4state.c
5416 ===================================================================
5417 --- linux-2.6.10.orig/fs/nfsd/nfs4state.c       2004-12-25 05:35:23.000000000 +0800
5418 +++ linux-2.6.10/fs/nfsd/nfs4state.c    2005-04-05 14:49:13.421688912 +0800
5419 @@ -44,6 +44,7 @@
5420  #include <linux/mount.h>
5421  #include <linux/workqueue.h>
5422  #include <linux/smp_lock.h>
5423 +#include <linux/kthread.h>
5424  #include <linux/nfs4.h>
5425  #include <linux/nfsd/state.h>
5426  #include <linux/nfsd/xdr4.h>
5427 @@ -56,9 +57,11 @@
5428  static u32 nfs4_reclaim_init = 0;
5429  time_t boot_time;
5430  static time_t grace_end = 0;
5431 +static u32 first_run = 1;       /* laundromat threads first run */
5432  static u32 current_clientid = 1;
5433 -static u32 current_ownerid;
5434 -static u32 current_fileid;
5435 +static u32 current_ownerid = 1;
5436 +static u32 current_fileid = 1;
5437 +static u32 current_delegid = 1;
5438  static u32 nfs4_init;
5439  stateid_t zerostateid;             /* bits all 0 */
5440  stateid_t onestateid;              /* bits all 1 */
5441 @@ -70,14 +73,17 @@
5442  u32 del_perclient = 0;
5443  u32 alloc_file = 0;
5444  u32 free_file = 0;
5445 -u32 alloc_sowner = 0;
5446 -u32 free_sowner = 0;
5447  u32 vfsopen = 0;
5448  u32 vfsclose = 0;
5449 -u32 alloc_lsowner= 0;
5450 +u32 alloc_delegation= 0;
5451 +u32 free_delegation= 0;
5452  
5453  /* forward declarations */
5454  struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
5455 +static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
5456 +static void release_delegation(struct nfs4_delegation *dp);
5457 +static void release_stateid_lockowner(struct nfs4_stateid *open_stp);
5458 +extern char recovery_dirname[];
5459  
5460  /* Locking:
5461   *
5462 @@ -117,6 +123,112 @@
5463  static void release_stateid(struct nfs4_stateid *stp, int flags);
5464  static void release_file(struct nfs4_file *fp);
5465  
5466 +/*
5467 + * Delegation state
5468 + */
5469 +
5470 +/* recall_lock protects the del_recall_lru */
5471 +spinlock_t recall_lock;
5472 +static struct list_head del_recall_lru;
5473 +
5474 +static struct nfs4_delegation *
5475 +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
5476 +{
5477 +       struct nfs4_delegation *dp;
5478 +       struct nfs4_file *fp = stp->st_file;
5479 +
5480 +       dprintk("NFSD alloc_init_deleg\n");
5481 +       if ((dp = kmalloc(sizeof(struct nfs4_delegation),
5482 +               GFP_KERNEL)) == NULL)
5483 +               return dp;
5484 +       INIT_LIST_HEAD(&dp->dl_del_perfile);
5485 +       INIT_LIST_HEAD(&dp->dl_del_perclnt);
5486 +       INIT_LIST_HEAD(&dp->dl_recall_lru);
5487 +       dp->dl_client = clp;
5488 +       dp->dl_file = fp;
5489 +       dp->dl_flock = NULL;
5490 +       dp->dl_stp = stp;
5491 +       dp->dl_flags = 0;
5492 +       dp->dl_type = type;
5493 +       dp->dl_recall.cbr_dp = NULL;
5494 +       dp->dl_recall.cbr_ident = 0;
5495 +       dp->dl_recall.cbr_trunc = 0;
5496 +       dp->dl_stateid.si_boot = boot_time;
5497 +       dp->dl_stateid.si_stateownerid = current_delegid++;
5498 +       dp->dl_stateid.si_fileid = 0;
5499 +       dp->dl_stateid.si_generation = 0;
5500 +       dp->dl_fhlen = current_fh->fh_handle.fh_size;
5501 +       memcpy(dp->dl_fhval, &current_fh->fh_handle.fh_base,
5502 +       current_fh->fh_handle.fh_size);
5503 +       dp->dl_time = 0;
5504 +       atomic_set(&dp->dl_state, NFS4_NO_RECALL);
5505 +       atomic_set(&dp->dl_count, 1);
5506 +       atomic_set(&dp->dl_recall_cnt, 0);
5507 +       list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
5508 +       list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
5509 +       alloc_delegation++;
5510 +       return dp;
5511 +}
5512 +
5513 +/*
5514 + * Free the delegation structure.
5515 + */
5516 +static void
5517 +nfs4_free_delegation(struct nfs4_delegation *dp)
5518 +{
5519 +       dprintk("NFSD: nfs4_free_delegation freeing dp %p\n",dp);
5520 +       list_del(&dp->dl_recall_lru);
5521 +       kfree(dp);
5522 +       free_delegation++;
5523 +}
5524 +
5525 +/* release_delegation:
5526 + *
5527 + * lease_modify() is called to remove the FS_LEASE file_lock from
5528 + * the i_flock list, eventually calling nfsd's lock_manager
5529 + * fl_release_callback.
5530 + *
5531 + * call either:
5532 + *   nfsd_close : if last close, locks_remove_flock calls lease_modify.
5533 + *                otherwise, recalled state set to NFS4_RECALL_COMPLETE
5534 + *                so that it will be reaped by the laundromat service.
5535 + * or
5536 + *   remove_lease (calls time_out_lease which calls lease_modify).
5537 + *   and nfs4_free_delegation.
5538 + *
5539 + * lock_kernel() protects dp->dl_flock which is set under the kernel lock
5540 + * by nfsd_copy_lock_deleg_callback and nfsd_release_deleg_callback.
5541 + *
5542 + */
5543 +
5544 +static void
5545 +release_delegation(struct nfs4_delegation *dp)
5546 +{
5547 +       /* delayed nfsd_close */
5548 +       if (dp->dl_flags && NFS4_DELAY_CLOSE) {
5549 +               struct file *filp = dp->dl_stp->st_vfs_file;
5550 +
5551 +               dprintk("NFSD: release_delegation CLOSE\n");
5552 +               release_stateid_lockowner(dp->dl_stp);
5553 +               kfree(dp->dl_stp);
5554 +               dp->dl_flags &= ~NFS4_DELAY_CLOSE;
5555 +               dp->dl_stp = NULL;
5556 +               atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
5557 +               nfsd_close(filp);
5558 +               vfsclose++;
5559 +       } else {
5560 +               dprintk("NFSD: release_delegation remove lease dl_flock %p\n",
5561 +                       dp->dl_flock);
5562 +               remove_lease(dp->dl_flock);
5563 +               list_del_init(&dp->dl_del_perfile);
5564 +               list_del_init(&dp->dl_del_perclnt);
5565 +               /* dl_count > 0 => outstanding recall rpc */
5566 +               dprintk("NFSD: release_delegation free deleg dl_count %d\n",
5567 +               atomic_read(&dp->dl_count));
5568 +               if (atomic_dec_and_test(&dp->dl_count))
5569 +                       nfs4_free_delegation(dp);
5570 +       }
5571 +}
5572  
5573  /* 
5574   * SETCLIENTID state 
5575 @@ -148,7 +260,7 @@
5576   * for last close replay.
5577   */
5578  static struct list_head        reclaim_str_hashtbl[CLIENT_HASH_SIZE];
5579 -static int reclaim_str_hashtbl_size;
5580 +static int reclaim_str_hashtbl_size = 0;
5581  static struct list_head        conf_id_hashtbl[CLIENT_HASH_SIZE];
5582  static struct list_head        conf_str_hashtbl[CLIENT_HASH_SIZE];
5583  static struct list_head        unconf_str_hashtbl[CLIENT_HASH_SIZE];
5584 @@ -213,12 +325,38 @@
5585         kfree(clp);
5586  }
5587  
5588 +void
5589 +put_nfs4_client(struct nfs4_client *clp)
5590 +{
5591 +       if (atomic_dec_and_test(&clp->cl_count))
5592 +               free_client(clp);
5593 +}
5594 +
5595  static void
5596  expire_client(struct nfs4_client *clp)
5597  {
5598         struct nfs4_stateowner *sop;
5599 +       struct nfs4_delegation *dp;
5600 +       struct nfs4_callback *cb = &clp->cl_callback;
5601 +       struct rpc_clnt *clnt = clp->cl_callback.cb_client;
5602 +
5603 +       dprintk("NFSD: expire_client cl_count %d\n",
5604 +                           atomic_read(&clp->cl_count));
5605  
5606 -       dprintk("NFSD: expire_client\n");
5607 +       /* shutdown rpc client, ending any outstanding recall rpcs */
5608 +       if (atomic_read(&cb->cb_set) == 1 && clnt) {
5609 +               rpc_shutdown_client(clnt);
5610 +               clnt = clp->cl_callback.cb_client = NULL;
5611 +       }
5612 +       while (!list_empty(&clp->cl_del_perclnt)) {
5613 +               dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
5614 +               dprintk("NFSD: expire client. dp %p, dl_state %d, fp %p\n",
5615 +                               dp, atomic_read(&dp->dl_state), dp->dl_flock);
5616 +
5617 +               /* force release of delegation. */
5618 +               atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
5619 +               release_delegation(dp);
5620 +       }
5621         list_del(&clp->cl_idhash);
5622         list_del(&clp->cl_strhash);
5623         list_del(&clp->cl_lru);
5624 @@ -226,7 +364,7 @@
5625                 sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
5626                 release_stateowner(sop);
5627         }
5628 -       free_client(clp);
5629 +       put_nfs4_client(clp);
5630  }
5631  
5632  static struct nfs4_client *
5633 @@ -235,9 +373,13 @@
5634  
5635         if (!(clp = alloc_client(name)))
5636                 goto out;
5637 +       atomic_set(&clp->cl_count, 1);
5638 +       atomic_set(&clp->cl_callback.cb_set, 0);
5639 +       clp->cl_callback.cb_parsed = 0;
5640         INIT_LIST_HEAD(&clp->cl_idhash);
5641         INIT_LIST_HEAD(&clp->cl_strhash);
5642         INIT_LIST_HEAD(&clp->cl_perclient);
5643 +       INIT_LIST_HEAD(&clp->cl_del_perclnt);
5644         INIT_LIST_HEAD(&clp->cl_lru);
5645  out:
5646         return clp;
5647 @@ -420,17 +562,24 @@
5648  {
5649         struct nfs4_callback *cb = &clp->cl_callback;
5650  
5651 +       /* Currently, we only support tcp for the callback channel */
5652 +       if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
5653 +               goto out_err;
5654 +
5655         if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
5656 -                        &cb->cb_addr, &cb->cb_port))) {
5657 -               printk(KERN_INFO "NFSD: BAD callback address. client will not receive delegations\n");
5658 -               cb->cb_parsed = 0;
5659 -               return;
5660 -       }
5661 -       cb->cb_netid.len = se->se_callback_netid_len;
5662 -       cb->cb_netid.data = se->se_callback_netid_val;
5663 +                                 &cb->cb_addr, &cb->cb_port)))
5664 +               goto out_err;
5665         cb->cb_prog = se->se_callback_prog;
5666         cb->cb_ident = se->se_callback_ident;
5667         cb->cb_parsed = 1;
5668 +       return;
5669 +out_err:
5670 +       printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
5671 +               "will not receive delegations\n",
5672 +               clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
5673 +
5674 +       cb->cb_parsed = 0;
5675 +       return;
5676  }
5677  
5678  /*
5679 @@ -707,6 +856,7 @@
5680                         status = nfserr_clid_inuse;
5681                 else {
5682                         expire_client(conf);
5683 +                       clp = unconf;
5684                         move_to_confirmed(unconf, idhashval);
5685                         status = nfs_ok;
5686                 }
5687 @@ -724,6 +874,7 @@
5688                 if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
5689                         status = nfserr_clid_inuse;
5690                 } else {
5691 +                       clp = conf;
5692                         status = nfs_ok;
5693                 }
5694                 goto out;
5695 @@ -738,6 +889,7 @@
5696                         status = nfserr_clid_inuse;
5697                 } else {
5698                         status = nfs_ok;
5699 +                       clp = unconf;
5700                         move_to_confirmed(unconf, idhashval);
5701                 }
5702                 goto out;
5703 @@ -757,7 +909,8 @@
5704         status = nfserr_inval;
5705         goto out;
5706  out:
5707 -       /* XXX if status == nfs_ok, probe callback path */
5708 +       if (!status)
5709 +               nfsd4_probe_callback(clp);
5710         nfs4_unlock_state();
5711         return status;
5712  }
5713 @@ -803,6 +956,7 @@
5714         if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
5715                 INIT_LIST_HEAD(&fp->fi_hash);
5716                 INIT_LIST_HEAD(&fp->fi_perfile);
5717 +               INIT_LIST_HEAD(&fp->fi_del_perfile);
5718                 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
5719                 fp->fi_inode = igrab(ino);
5720                 fp->fi_id = current_fileid++;
5721 @@ -822,7 +976,7 @@
5722                 while (!list_empty(&file_hashtbl[i])) {
5723                         fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
5724                         /* this should never be more than once... */
5725 -                       if (!list_empty(&fp->fi_perfile)) {
5726 +                       if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
5727                                 printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
5728                         }
5729                         release_file(fp);
5730 @@ -830,15 +984,36 @@
5731         }
5732  }
5733  
5734 -/* should use a slab cache */
5735 +kmem_cache_t *stateowner_slab = NULL;
5736 +
5737 +int
5738 +nfsd4_init_slabs(void)
5739 +{
5740 +       stateowner_slab = kmem_cache_create("nfsd4_stateowners",
5741 +                       sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
5742 +       if (stateowner_slab == NULL)
5743 +               return -ENOMEM;
5744 +       return 0;
5745 +}
5746 +
5747 +int
5748 +nfsd4_free_slabs(void)
5749 +{
5750 +       int status = 0;
5751 +
5752 +       if (stateowner_slab)
5753 +               status = kmem_cache_destroy(stateowner_slab);
5754 +       stateowner_slab = NULL;
5755 +       return status;
5756 +}
5757 +
5758  void
5759  nfs4_free_stateowner(struct kref *kref)
5760  {
5761         struct nfs4_stateowner *sop =
5762                 container_of(kref, struct nfs4_stateowner, so_ref);
5763         kfree(sop->so_owner.data);
5764 -       kfree(sop);
5765 -       free_sowner++;
5766 +       kmem_cache_free(stateowner_slab, sop);
5767  }
5768  
5769  static inline struct nfs4_stateowner *
5770 @@ -846,14 +1021,14 @@
5771  {
5772         struct nfs4_stateowner *sop;
5773  
5774 -       if ((sop = kmalloc(sizeof(struct nfs4_stateowner),GFP_KERNEL))) {
5775 +       if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) {
5776                 if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
5777                         memcpy(sop->so_owner.data, owner->data, owner->len);
5778                         sop->so_owner.len = owner->len;
5779                         kref_init(&sop->so_ref);
5780                         return sop;
5781                 } 
5782 -               kfree(sop);
5783 +               kmem_cache_free(stateowner_slab, sop);
5784         }
5785         return NULL;
5786  }
5787 @@ -887,7 +1062,6 @@
5788         rp->rp_status = NFSERR_SERVERFAULT;
5789         rp->rp_buflen = 0;
5790         rp->rp_buf = rp->rp_ibuf;
5791 -       alloc_sowner++;
5792         return sop;
5793  }
5794  
5795 @@ -957,14 +1131,29 @@
5796         __set_bit(open->op_share_deny, &stp->st_deny_bmap);
5797  }
5798  
5799 +/*
5800 +* Because nfsd_close() can call locks_remove_flock() which removes leases,
5801 +* delay nfsd_close() for delegations from the nfsd_open() clientid
5802 +* until the delegation is reaped.
5803 +*/
5804  static void
5805 -release_stateid(struct nfs4_stateid *stp, int flags) {
5806 +release_stateid(struct nfs4_stateid *stp, int flags)
5807 +{
5808 +       struct nfs4_delegation *dp;
5809 +       struct nfs4_file *fp = stp->st_file;
5810  
5811         list_del(&stp->st_hash);
5812         list_del_perfile++;
5813         list_del(&stp->st_perfile);
5814         list_del(&stp->st_perfilestate);
5815         if ((stp->st_vfs_set) && (flags & OPEN_STATE)) {
5816 +               list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
5817 +                       if(cmp_clid(&dp->dl_client->cl_clientid,
5818 +                           &stp->st_stateowner->so_client->cl_clientid)) {
5819 +                               dp->dl_flags |= NFS4_DELAY_CLOSE;
5820 +                               return;
5821 +                       }
5822 +               }
5823                 release_stateid_lockowner(stp);
5824                 nfsd_close(stp->st_vfs_file);
5825                 vfsclose++;
5826 @@ -1013,7 +1202,7 @@
5827         if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
5828                 move_to_close_lru(sop);
5829         /* unused nfs4_file's are releseed. XXX slab cache? */
5830 -       if (list_empty(&fp->fi_perfile)) {
5831 +       if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
5832                 release_file(fp);
5833         }
5834  }
5835 @@ -1141,6 +1330,100 @@
5836         }
5837  }
5838  
5839 +/*
5840 + * Recall a delegation
5841 + */
5842 +static int
5843 +do_recall(void *__dp)
5844 +{
5845 +       struct nfs4_delegation *dp = __dp;
5846 +
5847 +       atomic_inc(&dp->dl_count);
5848 +       nfsd4_cb_recall(dp);
5849 +       do_exit(0);
5850 +       return 0;
5851 +}
5852 +
5853 +/*
5854 + * Spawn a thread to perform a recall on the delegation represented
5855 + * by the lease (file_lock)
5856 + *
5857 + * Called from break_lease() with lock_kernel() held,
5858 + *
5859 + */
5860 +static
5861 +void nfsd_break_deleg_cb(struct file_lock *fl)
5862 +{
5863 +       struct nfs4_delegation *dp=  (struct nfs4_delegation *)fl->fl_owner;
5864 +       struct task_struct *t;
5865 +
5866 +       dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
5867 +       if (!dp)
5868 +               return;
5869 +
5870 +       /* schedule delegation for recall */
5871 +       spin_lock(&recall_lock);
5872 +       atomic_set(&dp->dl_state, NFS4_RECALL_IN_PROGRESS);
5873 +       list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
5874 +       spin_unlock(&recall_lock);
5875 +
5876 +       /* only place dl_time is set. protected by lock_kernel*/
5877 +       dp->dl_time = get_seconds();
5878 +
5879 +       /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */
5880 +       fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ;
5881 +
5882 +       t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
5883 +       if (IS_ERR(t)) {
5884 +               struct nfs4_client *clp = dp->dl_client;
5885 +
5886 +               printk(KERN_INFO "NFSD: Callback thread failed for "
5887 +                       "for client (clientid %08x/%08x)\n",
5888 +                       clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
5889 +       }
5890 +}
5891 +
5892 +/*
5893 + * The file_lock is being reapd.
5894 + *
5895 + * Called by locks_free_lock() with lock_kernel() held.
5896 + */
5897 +static
5898 +void nfsd_release_deleg_cb(struct file_lock *fl)
5899 +{
5900 +       struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
5901 +
5902 +       dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d, dl_state %d\n", fl,dp, atomic_read(&dp->dl_count), atomic_read(&dp->dl_state));
5903 +
5904 +       if (!(fl->fl_flags & FL_LEASE) || !dp)
5905 +               return;
5906 +       atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE);
5907 +       dp->dl_flock = NULL;
5908 +}
5909 +
5910 +/*
5911 + * Set the delegation file_lock back pointer.
5912 + *
5913 + * Called from __setlease() with lock_kernel() held.
5914 + */
5915 +static
5916 +void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
5917 +{
5918 +       struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
5919 +
5920 +       dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
5921 +       if (!dp)
5922 +               return;
5923 +       dp->dl_flock = new;
5924 +}
5925 +
5926 +struct lock_manager_operations nfsd_lease_mng_ops = {
5927 +        .fl_break = nfsd_break_deleg_cb,
5928 +        .fl_release_private = nfsd_release_deleg_cb,
5929 +        .fl_copy_lock = nfsd_copy_lock_deleg_cb,
5930 +};
5931 +
5932 +
5933  
5934  /*
5935   * nfsd4_process_open1()
5936 @@ -1238,6 +1521,43 @@
5937  }
5938  
5939  static int
5940 +nfs4_deleg_conflict(u32 share, u32 dtype)
5941 +{
5942 +       return (((share & NFS4_SHARE_ACCESS_WRITE) &&
5943 +               dtype == NFS4_OPEN_DELEGATE_READ) ||
5944 +               ((share & NFS4_SHARE_ACCESS_READ) &&
5945 +               dtype == NFS4_OPEN_DELEGATE_WRITE));
5946 +}
5947 +
5948 +#define DONT_DELEGATE  8
5949 +
5950 +/*
5951 + * nfs4_check_deleg_recall()
5952 + *
5953 + * Test any delegation that is currently within an incompleted recalled
5954 + * state, and return NFSERR_DELAY for conflicting open share.
5955 + * flag is set to DONT_DELEGATE for shares that match the deleg type.
5956 + */
5957 +static int
5958 +nfs4_check_deleg_recall(struct nfs4_file *fp, struct nfsd4_open *op, int *flag)
5959 +{
5960 +       struct nfs4_delegation *dp;
5961 +       int status = 0;
5962 +
5963 +       list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
5964 +               dprintk("NFSD: found delegation %p with dl_state %d\n",
5965 +                                        dp, atomic_read(&dp->dl_state));
5966 +               if (atomic_read(&dp->dl_state) == NFS4_RECALL_IN_PROGRESS) {
5967 +                       if(nfs4_deleg_conflict(op->op_share_access, dp->dl_type))
5968 +                               status = nfserr_jukebox;
5969 +                       else
5970 +                               *flag = DONT_DELEGATE;
5971 +               }
5972 +       }
5973 +       return status;
5974 +}
5975 +
5976 +static int
5977  nfs4_check_open(struct nfs4_file *fp, struct nfs4_stateowner *sop, struct nfsd4_open *open, struct nfs4_stateid **stpp)
5978  {
5979         struct nfs4_stateid *local;
5980 @@ -1339,6 +1659,65 @@
5981  }
5982  
5983  /*
5984 + * Attempt to hand out a delegation.
5985 + */
5986 +static void
5987 +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp, int *flag)
5988 +{
5989 +       struct nfs4_delegation *dp;
5990 +       struct nfs4_stateowner *sop = stp->st_stateowner;
5991 +       struct nfs4_callback *cb = &sop->so_client->cl_callback;
5992 +       struct file_lock fl, *flp = &fl;
5993 +       int status;
5994 +
5995 +       if (*flag == DONT_DELEGATE) {
5996 +               *flag = NFS4_OPEN_DELEGATE_NONE;
5997 +               return;
5998 +       }
5999 +
6000 +       /* set flag */
6001 +       *flag = NFS4_OPEN_DELEGATE_NONE;
6002 +       if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
6003 +            || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
6004 +               return;
6005 +
6006 +       if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
6007 +               *flag = NFS4_OPEN_DELEGATE_READ;
6008 +
6009 +       else if (!(open->op_share_access & NFS4_SHARE_ACCESS_READ))
6010 +               *flag = NFS4_OPEN_DELEGATE_WRITE;
6011 +
6012 +       if (!(dp = alloc_init_deleg(sop->so_client, stp, fh, *flag)))
6013 +               return;
6014 +       locks_init_lock(&fl);
6015 +       fl.fl_lmops = &nfsd_lease_mng_ops;
6016 +       fl.fl_flags = FL_LEASE;
6017 +       fl.fl_end = OFFSET_MAX;
6018 +       fl.fl_owner =  (fl_owner_t)dp;
6019 +       fl.fl_file = stp->st_vfs_file;
6020 +       fl.fl_pid = current->tgid;
6021 +
6022 +       if ((status = setlease(stp->st_vfs_file,
6023 +               *flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
6024 +               dprintk("NFSD: setlease failed [%d], no delegation\n", status);
6025 +               list_del(&dp->dl_del_perfile);
6026 +               list_del(&dp->dl_del_perclnt);
6027 +               kfree(dp);
6028 +               free_delegation++;
6029 +               *flag = NFS4_OPEN_DELEGATE_NONE;
6030 +               return;
6031 +       }
6032 +
6033 +       memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
6034 +
6035 +       dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
6036 +                    dp->dl_stateid.si_boot,
6037 +                    dp->dl_stateid.si_stateownerid,
6038 +                    dp->dl_stateid.si_fileid,
6039 +                    dp->dl_stateid.si_generation);
6040 +}
6041 +
6042 +/*
6043   * called with nfs4_lock_state() held.
6044   */
6045  int
6046 @@ -1346,28 +1725,24 @@
6047  {
6048         struct nfs4_stateowner *sop = open->op_stateowner;
6049         struct nfs4_file *fp = NULL;
6050 -       struct inode *ino;
6051 +       struct inode *ino = current_fh->fh_dentry->d_inode;
6052         unsigned int fi_hashval;
6053         struct nfs4_stateid *stp = NULL;
6054 -       int status;
6055 -
6056 -       status = nfserr_resource;
6057 -       if (!sop)
6058 -               return status;
6059 -
6060 -       ino = current_fh->fh_dentry->d_inode;
6061 +       int status, delegflag = 0;
6062  
6063         status = nfserr_inval;
6064         if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
6065                 goto out;
6066         /*
6067 -        * Lookup file; if found, lookup stateid and check open request;
6068 -        * not found, create
6069 +        * Lookup file; if found, lookup stateid and check open request,
6070 +        * and check for delegations in the process of being recalled.
6071 +        * If not found, create the nfs4_file struct
6072          */
6073         fi_hashval = file_hashval(ino);
6074         if (find_file(fi_hashval, ino, &fp)) {
6075 -               status = nfs4_check_open(fp, sop, open, &stp);
6076 -               if (status)
6077 +               if ((status = nfs4_check_open(fp, sop, open, &stp)))
6078 +                       goto out;
6079 +               if ((status = nfs4_check_deleg_recall(fp, open, &delegflag)))
6080                         goto out;
6081         } else {
6082                 status = nfserr_resource;
6083 @@ -1407,14 +1782,20 @@
6084                         }
6085                 }
6086         }
6087 -       dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
6088 -                   stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
6089 -                   stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
6090 -
6091         memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
6092  
6093 -       open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
6094 +       /*
6095 +       * Attempt to hand out a delegation. No error return, because the
6096 +       * OPEN succeeds even if we fail.
6097 +       */
6098 +       nfs4_open_delegation(current_fh, open, stp, &delegflag);
6099 +       open->op_delegate_type = delegflag;
6100 +
6101         status = nfs_ok;
6102 +
6103 +       dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
6104 +                   stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
6105 +                   stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
6106  out:
6107         /* take the opportunity to clean up unused state */
6108         if (fp && list_empty(&fp->fi_perfile))
6109 @@ -1480,14 +1861,26 @@
6110  {
6111         struct nfs4_client *clp;
6112         struct nfs4_stateowner *sop;
6113 +       struct nfs4_delegation *dp;
6114         struct list_head *pos, *next;
6115         time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
6116         time_t t, clientid_val = NFSD_LEASE_TIME;
6117 -       time_t u, close_val = NFSD_LEASE_TIME;
6118 +       time_t u, test_val = NFSD_LEASE_TIME;
6119  
6120         nfs4_lock_state();
6121  
6122 -       dprintk("NFSD: laundromat service - starting, examining clients\n");
6123 +       dprintk("NFSD: laundromat service - starting\n");
6124 +       /* Remove clientid's from recovery directory */
6125 +       if (first_run) {
6126 +               int status;
6127 +
6128 +               dprintk("NFSD: laundromat service - FIRST_RUN\n");
6129 +               status  = nfsd4_list_rec_dir(1);
6130 +               if (status < 0)
6131 +                       printk("NFSD: error clearing recovery directory %s\n",
6132 +                               recovery_dirname);
6133 +               first_run = 0;
6134 +       }
6135         list_for_each_safe(pos, next, &client_lru) {
6136                 clp = list_entry(pos, struct nfs4_client, cl_lru);
6137                 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
6138 @@ -1498,14 +1891,34 @@
6139                 }
6140                 dprintk("NFSD: purging unused client (clientid %08x)\n",
6141                         clp->cl_clientid.cl_id);
6142 +               if (clp->cl_firststate)
6143 +                       nfsd4_remove_clid_file(clp);
6144                 expire_client(clp);
6145         }
6146 +       spin_lock(&recall_lock);
6147 +       list_for_each_safe(pos, next, &del_recall_lru) {
6148 +               dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6149 +               if (atomic_read(&dp->dl_state) == NFS4_RECALL_COMPLETE)
6150 +                       goto reap;
6151 +               if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
6152 +                       u = dp->dl_time - cutoff;
6153 +                       if (test_val > u)
6154 +                               test_val = u;
6155 +                       break;
6156 +               }
6157 +reap:
6158 +               dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
6159 +                                   dp, dp->dl_flock);
6160 +               release_delegation(dp);
6161 +       }
6162 +       spin_unlock(&recall_lock);
6163 +       test_val = NFSD_LEASE_TIME;
6164         list_for_each_safe(pos, next, &close_lru) {
6165                 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
6166                 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
6167                         u = sop->so_time - cutoff;
6168 -                       if (close_val > u)
6169 -                               close_val = u;
6170 +                       if (test_val > u)
6171 +                               test_val = u;
6172                         break;
6173                 }
6174                 dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
6175 @@ -1564,21 +1977,81 @@
6176         return 1;
6177  }
6178  
6179 +static inline int
6180 +access_permit_read(unsigned long access_bmap)
6181 +{
6182 +       return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
6183 +               test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
6184 +}
6185 +
6186 +static inline int
6187 +access_permit_write(unsigned long access_bmap)
6188 +{
6189 +       return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
6190 +               test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
6191 +}
6192 +
6193 +static
6194 +int nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
6195 +{
6196 +        int status = nfserr_openmode;
6197 +
6198 +       if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
6199 +                goto out;
6200 +       if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
6201 +                goto out;
6202 +       status = nfs_ok;
6203 +out:
6204 +       return status;
6205 +}
6206 +
6207 +static int
6208 +nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
6209 +{
6210 +       int status = nfserr_openmode;
6211 +
6212 +       if ((flags & WR_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
6213 +               goto out;
6214 +       if ((flags & RD_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_WRITE))
6215 +               goto out;
6216 +       status = nfs_ok;
6217 +out:
6218 +       return status;
6219 +}
6220 +
6221 +static int
6222 +nfs4_rw_grace(int flags)
6223 +{
6224 +       return (nfs4_in_grace() && ((flags & RD_STATE) || (flags & WR_STATE)));
6225 +}
6226 +
6227 +/*
6228 + * Allow READ/WRITE during grace period on recovered state only for files
6229 + * that are not able to provide mandatory locking.
6230 + */
6231 +static int
6232 +nfs4_check_rw_grace(umode_t mode, int flags)
6233 +{
6234 +       return (nfs4_rw_grace(flags) && ((mode & S_IXGRP) && (mode & S_ISGID)));
6235 +}
6236  
6237  /*
6238  * Checks for stateid operations
6239  */
6240  int
6241 -nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct nfs4_stateid **stpp)
6242 +nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp)
6243  {
6244 -       struct nfs4_stateid *stp;
6245 +       struct nfs4_stateid *stp = NULL;
6246 +       struct nfs4_delegation *dp = NULL;
6247 +       stateid_t *stidp;
6248 +       struct inode *ino = current_fh->fh_dentry->d_inode;
6249         int status;
6250  
6251         dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
6252                 stateid->si_boot, stateid->si_stateownerid, 
6253                 stateid->si_fileid, stateid->si_generation); 
6254 -
6255 -       *stpp = NULL;
6256 +       if (filpp)
6257 +               *filpp = NULL;
6258  
6259         /* STALE STATEID */
6260         status = nfserr_stale_stateid;
6261 @@ -1587,33 +2060,58 @@
6262  
6263         /* BAD STATEID */
6264         status = nfserr_bad_stateid;
6265 -       if (!(stp = find_stateid(stateid, flags))) {
6266 -               dprintk("NFSD: preprocess_stateid_op: no open stateid!\n");
6267 -               goto out;
6268 -       }
6269 -       if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
6270 -               dprintk("NFSD: preprocess_stateid_op: fh-stateid mismatch!\n");
6271 -               stp->st_vfs_set = 0;
6272 -               goto out;
6273 -       }
6274 -       if (!stp->st_stateowner->so_confirmed) {
6275 -               dprintk("preprocess_stateid_op: lockowner not confirmed yet!\n");
6276 -               goto out;
6277 +       if (!stateid->si_fileid) { /* delegation stateid */
6278 +
6279 +               if(!(dp = find_delegation_stateid(ino, stateid))) {
6280 +                       dprintk("NFSD: delegation stateid not found\n");
6281 +                       if (nfs4_rw_grace(flags))
6282 +                               status = nfserr_grace;
6283 +                       goto out;
6284 +               }
6285 +               stidp = &dp->dl_stateid;
6286 +       } else { /* open or lock stateid */
6287 +               if (!(stp = find_stateid(stateid, flags))) {
6288 +                       dprintk("NFSD: open or lock stateid not found\n");
6289 +                       if (nfs4_rw_grace(flags))
6290 +                               status = nfserr_grace;
6291 +                       goto out;
6292 +               }
6293 +               if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp))
6294 +                       goto out;
6295 +               if (!stp->st_stateowner->so_confirmed)
6296 +                       goto out;
6297 +               stidp = &stp->st_stateid;
6298         }
6299 -       if (stateid->si_generation > stp->st_stateid.si_generation) {
6300 -               dprintk("preprocess_stateid_op: future stateid?!\n");
6301 +       if (stateid->si_generation > stidp->si_generation)
6302                 goto out;
6303 -       }
6304  
6305         /* OLD STATEID */
6306         status = nfserr_old_stateid;
6307 -       if (stateid->si_generation < stp->st_stateid.si_generation) {
6308 -               dprintk("preprocess_stateid_op: old stateid!\n");
6309 +       if (stateid->si_generation < stidp->si_generation)
6310                 goto out;
6311 +
6312 +       status = nfserr_grace;
6313 +       if (nfs4_check_rw_grace(ino->i_mode, flags))
6314 +               goto out;
6315 +
6316 +       if (stp) {
6317 +               renew_client(stp->st_stateowner->so_client);
6318 +               if ((status = nfs4_check_openmode(stp,flags)))
6319 +                       goto out;
6320 +               if (filpp)
6321 +                       *filpp = stp->st_vfs_file;
6322 +       } else if (dp) {
6323 +               renew_client(dp->dl_client);
6324 +               if ((status = nfs4_check_delegmode(dp, flags)))
6325 +                       goto out;
6326 +               if (flags & DELEG_RET) {
6327 +                       atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE);
6328 +                       release_delegation(dp);
6329 +               }
6330 +               if (filpp && dp && dp->dl_stp)
6331 +                       *filpp = dp->dl_stp->st_vfs_file;
6332         }
6333 -       *stpp = stp;
6334         status = nfs_ok;
6335 -       renew_client(stp->st_stateowner->so_client);
6336  out:
6337         return status;
6338  }
6339 @@ -1750,17 +2248,6 @@
6340         goto out;
6341  }
6342  
6343 -/*
6344 - * eventually, this will perform an upcall to the 'state daemon' as well as
6345 - * set the cl_first_state field.
6346 - */
6347 -void
6348 -first_state(struct nfs4_client *clp)
6349 -{
6350 -       if (!clp->cl_first_state)
6351 -               clp->cl_first_state = get_seconds();
6352 -}
6353 -
6354  int
6355  nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
6356  {
6357 @@ -1793,8 +2280,16 @@
6358                          stp->st_stateid.si_stateownerid,
6359                          stp->st_stateid.si_fileid,
6360                          stp->st_stateid.si_generation);
6361 -       status = nfs_ok;
6362 -       first_state(sop->so_client);
6363 +
6364 +       if (!sop->so_client->cl_firststate) {
6365 +               int err = nfsd4_create_clid_file(sop->so_client);
6366 +               if (!err) {
6367 +                       sop->so_client->cl_firststate = 1;
6368 +                       dprintk("NFSD: OPEN_CONFIRM firststate set [%.*s]\n",
6369 +                               sop->so_client->cl_name.len,
6370 +                               sop->so_client->cl_name.data);
6371 +               }
6372 +       }
6373  out:
6374         if (oc->oc_stateowner)
6375                 nfs4_get_stateowner(oc->oc_stateowner);
6376 @@ -1912,6 +2407,22 @@
6377         return status;
6378  }
6379  
6380 +int
6381 +nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
6382 +{
6383 +       int status;
6384 +
6385 +       if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
6386 +               goto out;
6387 +
6388 +       nfs4_lock_state();
6389 +       status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET, NULL);
6390 +       nfs4_unlock_state();
6391 +out:
6392 +       return status;
6393 +}
6394 +
6395 +
6396  /* 
6397   * Lock owner state (byte-range locks)
6398   */
6399 @@ -1938,7 +2449,7 @@
6400         unsigned int hashval;
6401  
6402         dprintk("NFSD: find_stateid flags 0x%x\n",flags);
6403 -       if ((flags & LOCK_STATE) || (flags & RDWR_STATE)) {
6404 +       if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
6405                 hashval = stateid_hashval(st_id, f_id);
6406                 list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
6407                         if ((local->st_stateid.si_stateownerid == st_id) &&
6408 @@ -1946,7 +2457,7 @@
6409                                 return local;
6410                 }
6411         } 
6412 -       if ((flags & OPEN_STATE) || (flags & RDWR_STATE)) {
6413 +       if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
6414                 hashval = stateid_hashval(st_id, f_id);
6415                 list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
6416                         if ((local->st_stateid.si_stateownerid == st_id) &&
6417 @@ -1958,6 +2469,30 @@
6418         return NULL;
6419  }
6420  
6421 +static struct nfs4_delegation *
6422 +find_delegation_stateid(struct inode *ino, stateid_t *stid)
6423 +{
6424 +       struct nfs4_delegation *dp = NULL;
6425 +       struct nfs4_file *fp = NULL;
6426 +       u32 st_id;
6427 +       unsigned int fi_hashval;
6428 +
6429 +       dprintk("NFSD:find_delegation_stateid ino %p, stid %p\n",ino,stid);
6430 +
6431 +       if(!ino || !stid)
6432 +               return NULL;
6433 +       st_id = stid->si_stateownerid;
6434 +       fi_hashval = file_hashval(ino);
6435 +       if (find_file(fi_hashval, ino, &fp)) {
6436 +               list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
6437 +                       if(dp->dl_stateid.si_stateownerid == st_id) {
6438 +                               dprintk("NFSD: find_delegation dp %p\n",dp);
6439 +                               return dp;
6440 +                       }
6441 +               }
6442 +       }
6443 +       return NULL;
6444 +}
6445  
6446  /*
6447   * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
6448 @@ -2085,7 +2620,6 @@
6449         rp->rp_status = NFSERR_SERVERFAULT;
6450         rp->rp_buflen = 0;
6451         rp->rp_buf = rp->rp_ibuf;
6452 -       alloc_lsowner++;
6453         return sop;
6454  }
6455  
6456 @@ -2558,22 +3092,22 @@
6457  /*
6458   * failure => all reset bets are off, nfserr_no_grace...
6459   */
6460 -static int
6461 -nfs4_client_to_reclaim(struct nfs4_client *clp)
6462 +int
6463 +nfs4_client_to_reclaim(char *name, int namlen)
6464  {
6465         unsigned int strhashval;
6466         struct nfs4_client_reclaim *crp = NULL;
6467  
6468 -       crp = alloc_reclaim(clp->cl_name.len);
6469 +       dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
6470 +       crp = alloc_reclaim(namlen);
6471         if (!crp)
6472                 return 0;
6473 -       strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
6474 +       strhashval = clientstr_hashval(name, namlen);
6475         INIT_LIST_HEAD(&crp->cr_strhash);
6476         list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
6477 -       memcpy(crp->cr_name.data, clp->cl_name.data, clp->cl_name.len);
6478 -       crp->cr_name.len = clp->cl_name.len;
6479 -       crp->cr_first_state = clp->cl_first_state;
6480 -       crp->cr_expired = 0;
6481 +       memcpy(crp->cr_name.data, name, namlen);
6482 +       crp->cr_name.len = namlen;
6483 +       reclaim_str_hashtbl_size++;
6484         return 1;
6485  }
6486  
6487 @@ -2618,6 +3152,9 @@
6488         if (!client)
6489                 return NULL;
6490  
6491 +       dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
6492 +                           clp->cl_name.len, clp->cl_name.data);
6493 +
6494         /* find clp->cl_name in reclaim_str_hashtbl */
6495         strhashval = clientstr_hashval(client->cl_name.data,
6496                                       client->cl_name.len);
6497 @@ -2639,8 +3176,6 @@
6498  
6499         if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
6500                 return nfserr_reclaim_bad;
6501 -       if (crp->cr_expired)
6502 -               return nfserr_no_grace;
6503         return nfs_ok;
6504  }
6505  
6506 @@ -2657,10 +3192,18 @@
6507  
6508         if (nfs4_init)
6509                 return;
6510 +       if (nfsd4_init_slabs())
6511 +               BUG(); /* XXXXXX!!! */
6512         if (!nfs4_reclaim_init) {
6513 +               int status;
6514 +
6515                 for (i = 0; i < CLIENT_HASH_SIZE; i++)
6516                         INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
6517                 reclaim_str_hashtbl_size = 0;
6518 +               nfsd4_init_rec_dir(recovery_dirname);
6519 +               status = nfsd4_list_rec_dir(0);
6520 +               if (status)
6521 +                       printk("NFSD: Failure in reading recovery data\n");
6522                 nfs4_reclaim_init = 1;
6523         }
6524         for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6525 @@ -2689,6 +3232,8 @@
6526  
6527         INIT_LIST_HEAD(&close_lru);
6528         INIT_LIST_HEAD(&client_lru);
6529 +       INIT_LIST_HEAD(&del_recall_lru);
6530 +       spin_lock_init(&recall_lock);
6531         boot_time = get_seconds();
6532         grace_time = max(old_lease_time, lease_time);
6533         if (reclaim_str_hashtbl_size == 0)
6534 @@ -2725,6 +3270,15 @@
6535  {
6536         int i;
6537         struct nfs4_client *clp = NULL;
6538 +       struct nfs4_delegation *dp = NULL;
6539 +       struct nfs4_stateowner *sop = NULL;
6540 +       struct list_head *pos, *next;
6541 +
6542 +       list_for_each_safe(pos, next, &close_lru) {
6543 +               sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
6544 +               list_del(&sop->so_close_lru);
6545 +               nfs4_put_stateowner(sop);
6546 +       }
6547  
6548         for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6549                 while (!list_empty(&conf_id_hashtbl[i])) {
6550 @@ -2736,20 +3290,31 @@
6551                         expire_client(clp);
6552                 }
6553         }
6554 +       spin_lock(&recall_lock);
6555 +       list_for_each_safe(pos, next, &del_recall_lru) {
6556 +               dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6557 +               atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
6558 +               release_delegation(dp);
6559 +       }
6560 +       spin_unlock(&recall_lock);
6561 +
6562         release_all_files();
6563         cancel_delayed_work(&laundromat_work);
6564         flush_scheduled_work();
6565         nfs4_init = 0;
6566 +       nfs4_reclaim_init = 0;
6567         dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
6568                         list_add_perfile, list_del_perfile);
6569         dprintk("NFSD: add_perclient %d del_perclient %d\n",
6570                         add_perclient, del_perclient);
6571         dprintk("NFSD: alloc_file %d free_file %d\n",
6572                         alloc_file, free_file);
6573 -       dprintk("NFSD: alloc_sowner %d alloc_lsowner %d free_sowner %d\n",
6574 -                       alloc_sowner, alloc_lsowner, free_sowner);
6575         dprintk("NFSD: vfsopen %d vfsclose %d\n",
6576                         vfsopen, vfsclose);
6577 +       dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
6578 +                       alloc_delegation, free_delegation);
6579 +       if (nfsd4_free_slabs())
6580 +               BUG(); /* XXX? */
6581  }
6582  
6583  void
6584 @@ -2801,11 +3366,10 @@
6585         /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
6586         for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6587                 list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
6588 -                       if (!nfs4_client_to_reclaim(clp)) {
6589 +                       if (!nfs4_client_to_reclaim(clp->cl_name.data, clp->cl_name.len)) {
6590                                 nfs4_release_reclaim();
6591                                 goto init_state;
6592                         }
6593 -                       reclaim_str_hashtbl_size++;
6594                 }
6595         }
6596  init_state:
6597 Index: linux-2.6.10/fs/nfsd/nfsproc.c
6598 ===================================================================
6599 --- linux-2.6.10.orig/fs/nfsd/nfsproc.c 2004-12-25 05:34:30.000000000 +0800
6600 +++ linux-2.6.10/fs/nfsd/nfsproc.c      2005-04-05 14:49:13.426688152 +0800
6601 @@ -586,7 +586,6 @@
6602                 { nfserr_dquot, -EDQUOT },
6603  #endif
6604                 { nfserr_stale, -ESTALE },
6605 -               { nfserr_jukebox, -EWOULDBLOCK },
6606                 { nfserr_jukebox, -ETIMEDOUT },
6607                 { nfserr_dropit, -EAGAIN },
6608                 { nfserr_dropit, -ENOMEM },
6609 Index: linux-2.6.10/fs/nfsd/nfs4acl.c
6610 ===================================================================
6611 --- linux-2.6.10.orig/fs/nfsd/nfs4acl.c 2004-12-25 05:34:29.000000000 +0800
6612 +++ linux-2.6.10/fs/nfsd/nfs4acl.c      2005-04-05 14:49:13.429687696 +0800
6613 @@ -89,6 +89,8 @@
6614         return ret;
6615  }
6616  
6617 +/* modify functions to take NFS errors */
6618 +
6619  static int
6620  mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
6621  {
6622 Index: linux-2.6.10/fs/nfsd/nfs4idmap.c
6623 ===================================================================
6624 --- linux-2.6.10.orig/fs/nfsd/nfs4idmap.c       2004-12-25 05:35:23.000000000 +0800
6625 +++ linux-2.6.10/fs/nfsd/nfs4idmap.c    2005-04-05 14:49:13.414689976 +0800
6626 @@ -78,9 +78,9 @@
6627  
6628  #define DefineSimpleCacheLookupMap(STRUCT, FUNC)                       \
6629          DefineCacheLookup(struct STRUCT, h, FUNC##_lookup,             \
6630 -        (struct STRUCT *item, int set), /*no setup */,                 \
6631 +        (struct STRUCT *item, int set),                        \
6632         & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp),     \
6633 -       STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
6634 +       STRUCT##_init(new, item), STRUCT##_update(tmp, item))
6635  
6636  /* Common entry handling */
6637  
6638 Index: linux-2.6.10/fs/nfsd/vfs.c
6639 ===================================================================
6640 --- linux-2.6.10.orig/fs/nfsd/vfs.c     2005-03-31 15:35:26.000000000 +0800
6641 +++ linux-2.6.10/fs/nfsd/vfs.c  2005-04-05 14:49:13.417689520 +0800
6642 @@ -304,6 +304,8 @@
6643                  * we need to break all leases.
6644                  */
6645                 err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
6646 +               if (err == -EWOULDBLOCK)
6647 +                       err = -ETIMEDOUT;
6648                 if (err) /* ENOMEM or EWOULDBLOCK */
6649                         goto out_nfserr;
6650  
6651 @@ -678,6 +680,8 @@
6652          * This may block while leases are broken.
6653          */
6654         err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
6655 +       if (err == -EWOULDBLOCK)
6656 +               err = -ETIMEDOUT;
6657         if (err) /* NOMEM or WOULDBLOCK */
6658                 goto out_nfserr;
6659  
6660 @@ -822,21 +826,34 @@
6661  nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
6662            struct kvec *vec, int vlen, unsigned long *count)
6663  {
6664 -       struct raparms  *ra;
6665 -       mm_segment_t    oldfs;
6666         int             err;
6667         struct file     *file;
6668 -       struct inode    *inode;
6669  
6670         err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
6671         if (err)
6672                 goto out;
6673 +       err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
6674 +
6675 +       nfsd_close(file);
6676 +out:
6677 +       return err;
6678 +}
6679 +
6680 +int
6681 +nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
6682 +              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
6683 +{
6684 +       struct inode *inode;
6685 +       struct raparms  *ra;
6686 +       mm_segment_t    oldfs;
6687 +       int             err;
6688 +
6689         err = nfserr_perm;
6690         inode = file->f_dentry->d_inode;
6691  #ifdef MSNFS
6692         if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
6693                 (!lock_may_read(inode, offset, *count)))
6694 -               goto out_close;
6695 +               goto out;
6696  #endif
6697  
6698         /* Get readahead parameters */
6699 @@ -872,8 +889,6 @@
6700                 dnotify_parent(file->f_dentry, DN_ACCESS);
6701         } else 
6702                 err = nfserrno(err);
6703 -out_close:
6704 -       nfsd_close(file);
6705  out:
6706         return err;
6707  }
6708 @@ -888,25 +903,40 @@
6709                                 struct kvec *vec, int vlen,
6710                                 unsigned long cnt, int *stablep)
6711  {
6712 -       struct svc_export       *exp;
6713         struct file             *file;
6714 -       struct dentry           *dentry;
6715 -       struct inode            *inode;
6716 -       mm_segment_t            oldfs;
6717         int                     err = 0;
6718 -       int                     stable = *stablep;
6719  
6720         err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
6721         if (err)
6722                 goto out;
6723         if (!cnt)
6724                 goto out_close;
6725 +
6726 +       err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep);
6727 +out_close:
6728 +       nfsd_close(file);
6729 +out:
6730 +       return err;
6731 +}
6732 +
6733 +int
6734 +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
6735 +                               loff_t offset, struct kvec *vec, int vlen,
6736 +                               unsigned long cnt, int *stablep)
6737 +{
6738 +       struct svc_export       *exp;
6739 +       struct dentry           *dentry;
6740 +       struct inode            *inode;
6741 +       mm_segment_t            oldfs;
6742 +       int                     err = 0;
6743 +       int                     stable = *stablep;
6744 +
6745         err = nfserr_perm;
6746  
6747  #ifdef MSNFS
6748         if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
6749                 (!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
6750 -               goto out_close;
6751 +               goto out;
6752  #endif
6753  
6754         dentry = file->f_dentry;
6755 @@ -993,13 +1023,10 @@
6756                 err = 0;
6757         else 
6758                 err = nfserrno(err);
6759 -out_close:
6760 -       nfsd_close(file);
6761  out:
6762         return err;
6763  }
6764  
6765 -
6766  #ifdef CONFIG_NFSD_V3
6767  /*
6768   * Commit all pending writes to stable storage.
6769 Index: linux-2.6.10/fs/nfsd/nfs4callback.c
6770 ===================================================================
6771 --- linux-2.6.10.orig/fs/nfsd/nfs4callback.c    2005-04-05 19:01:49.158500672 +0800
6772 +++ linux-2.6.10/fs/nfsd/nfs4callback.c 2005-04-05 14:49:13.428687848 +0800
6773 @@ -0,0 +1,589 @@
6774 +/*
6775 + *  linux/fs/nfsd/nfs4callback.c
6776 + *
6777 + *  Copyright (c) 2001 The Regents of the University of Michigan.
6778 + *  All rights reserved.
6779 + *
6780 + *  Kendrick Smith <kmsmith@umich.edu>
6781 + *  Andy Adamson <andros@umich.edu>
6782 + *
6783 + *  Redistribution and use in source and binary forms, with or without
6784 + *  modification, are permitted provided that the following conditions
6785 + *  are met:
6786 + *
6787 + *  1. Redistributions of source code must retain the above copyright
6788 + *     notice, this list of conditions and the following disclaimer.
6789 + *  2. Redistributions in binary form must reproduce the above copyright
6790 + *     notice, this list of conditions and the following disclaimer in the
6791 + *     documentation and/or other materials provided with the distribution.
6792 + *  3. Neither the name of the University nor the names of its
6793 + *     contributors may be used to endorse or promote products derived
6794 + *     from this software without specific prior written permission.
6795 + *
6796 + *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
6797 + *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
6798 + *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
6799 + *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
6800 + *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
6801 + *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
6802 + *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
6803 + *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
6804 + *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
6805 + *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
6806 + *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6807 + */
6808 +
6809 +#include <linux/config.h>
6810 +#include <linux/module.h>
6811 +#include <linux/list.h>
6812 +#include <linux/inet.h>
6813 +#include <linux/errno.h>
6814 +#include <linux/sunrpc/xdr.h>
6815 +#include <linux/sunrpc/svc.h>
6816 +#include <linux/sunrpc/clnt.h>
6817 +#include <linux/nfsd/nfsd.h>
6818 +#include <linux/nfsd/state.h>
6819 +#include <linux/sunrpc/sched.h>
6820 +#include <linux/nfs4.h>
6821 +
6822 +#define NFSDDBG_FACILITY                NFSDDBG_PROC
6823 +
6824 +#define NFSPROC4_CB_NULL 0
6825 +#define NFSPROC4_CB_COMPOUND 1
6826 +
6827 +/* declarations */
6828 +static void nfs4_cb_null(struct rpc_task *task);
6829 +extern spinlock_t recall_lock;
6830 +
6831 +/* Index of predefined Linux callback client operations */
6832 +
6833 +enum {
6834 +        NFSPROC4_CLNT_CB_NULL = 0,
6835 +       NFSPROC4_CLNT_CB_RECALL,
6836 +};
6837 +
6838 +enum nfs_cb_opnum4 {
6839 +       OP_CB_RECALL            = 4,
6840 +};
6841 +
6842 +#define NFS4_MAXTAGLEN         20
6843 +
6844 +#define NFS4_enc_cb_null_sz            0
6845 +#define NFS4_dec_cb_null_sz            0
6846 +#define cb_compound_enc_hdr_sz         4
6847 +#define cb_compound_dec_hdr_sz         (3 + (NFS4_MAXTAGLEN >> 2))
6848 +#define op_enc_sz                      1
6849 +#define op_dec_sz                      2
6850 +#define enc_nfs4_fh_sz                 (1 + (NFS4_FHSIZE >> 2))
6851 +#define enc_stateid_sz                 16
6852 +#define NFS4_enc_cb_recall_sz          (cb_compound_enc_hdr_sz +       \
6853 +                                       1 + enc_stateid_sz +            \
6854 +                                       enc_nfs4_fh_sz)
6855 +
6856 +#define NFS4_dec_cb_recall_sz          (cb_compound_dec_hdr_sz  +      \
6857 +                                       op_dec_sz)
6858 +
6859 +/*
6860 +* Generic encode routines from fs/nfs/nfs4xdr.c
6861 +*/
6862 +static inline u32 *
6863 +xdr_writemem(u32 *p, const void *ptr, int nbytes)
6864 +{
6865 +       int tmp = XDR_QUADLEN(nbytes);
6866 +       if (!tmp)
6867 +               return p;
6868 +       p[tmp-1] = 0;
6869 +       memcpy(p, ptr, nbytes);
6870 +       return p + tmp;
6871 +}
6872 +
6873 +#define WRITE32(n)               *p++ = htonl(n)
6874 +#define WRITEMEM(ptr,nbytes)     do {                           \
6875 +       p = xdr_writemem(p, ptr, nbytes);                       \
6876 +} while (0)
6877 +#define RESERVE_SPACE(nbytes)   do {                            \
6878 +       p = xdr_reserve_space(xdr, nbytes);                     \
6879 +       if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
6880 +       BUG_ON(!p);                                             \
6881 +} while (0)
6882 +
6883 +/*
6884 + * Generic decode routines from fs/nfs/nfs4xdr.c
6885 + */
6886 +#define DECODE_TAIL                             \
6887 +       status = 0;                             \
6888 +out:                                            \
6889 +       return status;                          \
6890 +xdr_error:                                      \
6891 +       dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
6892 +       status = -EIO;                          \
6893 +       goto out
6894 +
6895 +#define READ32(x)         (x) = ntohl(*p++)
6896 +#define READ64(x)         do {                  \
6897 +       (x) = (u64)ntohl(*p++) << 32;           \
6898 +       (x) |= ntohl(*p++);                     \
6899 +} while (0)
6900 +#define READTIME(x)       do {                  \
6901 +       p++;                                    \
6902 +       (x.tv_sec) = ntohl(*p++);               \
6903 +       (x.tv_nsec) = ntohl(*p++);              \
6904 +} while (0)
6905 +#define READ_BUF(nbytes)  do { \
6906 +       p = xdr_inline_decode(xdr, nbytes); \
6907 +       if (!p) { \
6908 +               dprintk("NFSD: %s: reply buffer overflowed in line %d.", \
6909 +                       __FUNCTION__, __LINE__); \
6910 +               return -EIO; \
6911 +       } \
6912 +} while (0)
6913 +
6914 +struct nfs4_cb_compound_hdr {
6915 +       int             status;
6916 +       u32             ident;
6917 +       u32             nops;
6918 +       u32             taglen;
6919 +       char *          tag;
6920 +};
6921 +
6922 +static struct {
6923 +int stat;
6924 +int errno;
6925 +} nfs_cb_errtbl[] = {
6926 +       { NFS4_OK,              0               },
6927 +       { NFS4ERR_PERM,         EPERM           },
6928 +       { NFS4ERR_NOENT,        ENOENT          },
6929 +       { NFS4ERR_IO,           EIO             },
6930 +       { NFS4ERR_NXIO,         ENXIO           },
6931 +       { NFS4ERR_ACCESS,       EACCES          },
6932 +       { NFS4ERR_EXIST,        EEXIST          },
6933 +       { NFS4ERR_XDEV,         EXDEV           },
6934 +       { NFS4ERR_NOTDIR,       ENOTDIR         },
6935 +       { NFS4ERR_ISDIR,        EISDIR          },
6936 +       { NFS4ERR_INVAL,        EINVAL          },
6937 +       { NFS4ERR_FBIG,         EFBIG           },
6938 +       { NFS4ERR_NOSPC,        ENOSPC          },
6939 +       { NFS4ERR_ROFS,         EROFS           },
6940 +       { NFS4ERR_MLINK,        EMLINK          },
6941 +       { NFS4ERR_NAMETOOLONG,  ENAMETOOLONG    },
6942 +       { NFS4ERR_NOTEMPTY,     ENOTEMPTY       },
6943 +       { NFS4ERR_DQUOT,        EDQUOT          },
6944 +       { NFS4ERR_STALE,        ESTALE          },
6945 +       { NFS4ERR_BADHANDLE,    EBADHANDLE      },
6946 +       { NFS4ERR_BAD_COOKIE,   EBADCOOKIE      },
6947 +       { NFS4ERR_NOTSUPP,      ENOTSUPP        },
6948 +       { NFS4ERR_TOOSMALL,     ETOOSMALL       },
6949 +       { NFS4ERR_SERVERFAULT,  ESERVERFAULT    },
6950 +       { NFS4ERR_BADTYPE,      EBADTYPE        },
6951 +       { NFS4ERR_LOCKED,       EAGAIN          },
6952 +       { NFS4ERR_RESOURCE,     EREMOTEIO       },
6953 +       { NFS4ERR_SYMLINK,      ELOOP           },
6954 +       { NFS4ERR_OP_ILLEGAL,   EOPNOTSUPP      },
6955 +       { NFS4ERR_DEADLOCK,     EDEADLK         },
6956 +       { -1,                   EIO             }
6957 +};
6958 +
6959 +static int
6960 +nfs_cb_stat_to_errno(int stat)
6961 +{
6962 +       int i;
6963 +       for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
6964 +               if (nfs_cb_errtbl[i].stat == stat)
6965 +                       return nfs_cb_errtbl[i].errno;
6966 +       }
6967 +       /* If we cannot translate the error, the recovery routines should
6968 +       * handle it.
6969 +       * Note: remaining NFSv4 error codes have values > 10000, so should
6970 +       * not conflict with native Linux error codes.
6971 +       */
6972 +       return stat;
6973 +}
6974 +
6975 +/*
6976 + * XDR encode
6977 + */
6978 +
6979 +static int
6980 +encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
6981 +{
6982 +       u32 * p;
6983 +
6984 +       RESERVE_SPACE(16);
6985 +       WRITE32(0);            /* tag length is always 0 */
6986 +       WRITE32(NFS4_MINOR_VERSION);
6987 +       WRITE32(hdr->ident);
6988 +       WRITE32(hdr->nops);
6989 +       return 0;
6990 +}
6991 +
6992 +static int
6993 +encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
6994 +{
6995 +       u32 *p;
6996 +       int len = cb_rec->cbr_fhlen;
6997 +
6998 +       RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
6999 +       WRITE32(OP_CB_RECALL);
7000 +       WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t));
7001 +       WRITE32(cb_rec->cbr_trunc);
7002 +       WRITE32(len);
7003 +       WRITEMEM(cb_rec->cbr_fhval, len);
7004 +       return 0;
7005 +}
7006 +
7007 +static int
7008 +nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p)
7009 +{
7010 +       struct xdr_stream xdrs, *xdr = &xdrs;
7011 +
7012 +       xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
7013 +        RESERVE_SPACE(0);
7014 +       return 0;
7015 +}
7016 +
7017 +static int
7018 +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args)
7019 +{
7020 +       struct xdr_stream xdr;
7021 +       struct nfs4_cb_compound_hdr hdr = {
7022 +               .nops   = 1,
7023 +       };
7024 +
7025 +       xdr_init_encode(&xdr, &req->rq_snd_buf, p);
7026 +       encode_cb_compound_hdr(&xdr, &hdr);
7027 +       return (encode_cb_recall(&xdr, args));
7028 +}
7029 +
7030 +
7031 +static int
7032 +decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
7033 +        u32 *p;
7034 +
7035 +        READ_BUF(8);
7036 +        READ32(hdr->status);
7037 +        READ32(hdr->taglen);
7038 +        READ_BUF(hdr->taglen + 4);
7039 +        hdr->tag = (char *)p;
7040 +        p += XDR_QUADLEN(hdr->taglen);
7041 +        READ32(hdr->nops);
7042 +        return 0;
7043 +}
7044 +
7045 +static int
7046 +decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
7047 +{
7048 +       u32 *p;
7049 +       u32 op;
7050 +       int32_t nfserr;
7051 +
7052 +       READ_BUF(8);
7053 +       READ32(op);
7054 +       if (op != expected) {
7055 +               dprintk("NFSD: decode_cb_op_hdr: Callback server returned "
7056 +                        " operation %d but we issued a request for %d\n",
7057 +                        op, expected);
7058 +               return -EIO;
7059 +       }
7060 +       READ32(nfserr);
7061 +       if (nfserr != NFS_OK)
7062 +               return -nfs_cb_stat_to_errno(nfserr);
7063 +       return 0;
7064 +}
7065 +
7066 +static int
7067 +nfs4_xdr_dec_cb_null(struct rpc_rqst *req, u32 *p)
7068 +{
7069 +       return 0;
7070 +}
7071 +
7072 +static int
7073 +nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p)
7074 +{
7075 +       struct xdr_stream xdr;
7076 +       struct nfs4_cb_compound_hdr hdr;
7077 +       int status;
7078 +
7079 +       xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
7080 +       status = decode_cb_compound_hdr(&xdr, &hdr);
7081 +       if (status)
7082 +               goto out;
7083 +       status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
7084 +out    :
7085 +       return status;
7086 +}
7087 +
7088 +/*
7089 + * RPC procedure tables
7090 + */
7091 +#ifndef MAX
7092 +# define MAX(a, b)      (((a) > (b))? (a) : (b))
7093 +#endif
7094 +
7095 +#define PROC(proc, call, argtype, restype)                              \
7096 +[NFSPROC4_CLNT_##proc] = {                                             \
7097 +        .p_proc   = NFSPROC4_CB_##call,                                        \
7098 +        .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
7099 +        .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
7100 +        .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
7101 +}
7102 +
7103 +struct rpc_procinfo     nfs4_cb_procedures[] = {
7104 +    PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
7105 +    PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
7106 +};
7107 +
7108 +struct rpc_version              nfs_cb_version4 = {
7109 +        .number                 = 1,
7110 +        .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
7111 +        .procs                  = nfs4_cb_procedures
7112 +};
7113 +
7114 +static struct rpc_version *    nfs_cb_version[] = {
7115 +       NULL,
7116 +       &nfs_cb_version4,
7117 +};
7118 +
7119 +/*
7120 + * Use the SETCLIENTID credential
7121 + */
7122 +struct rpc_cred *
7123 +nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
7124 +{
7125 +        struct auth_cred acred;
7126 +       struct rpc_clnt *clnt = clp->cl_callback.cb_client;
7127 +        struct rpc_cred *ret = NULL;
7128 +
7129 +       if (!clnt)
7130 +               goto out;
7131 +        get_group_info(clp->cl_cred.cr_group_info);
7132 +        acred.uid = clp->cl_cred.cr_uid;
7133 +        acred.gid = clp->cl_cred.cr_gid;
7134 +        acred.group_info = clp->cl_cred.cr_group_info;
7135 +
7136 +        dprintk("NFSD:     looking up %s cred\n",
7137 +                clnt->cl_auth->au_ops->au_name);
7138 +        ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags);
7139 +        put_group_info(clp->cl_cred.cr_group_info);
7140 +out:
7141 +        return ret;
7142 +}
7143 +
7144 +/*
7145 + * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
7146 + */
7147 +void
7148 +nfsd4_probe_callback(struct nfs4_client *clp)
7149 +{
7150 +       struct sockaddr_in      addr;
7151 +       struct nfs4_callback    *cb = &clp->cl_callback;
7152 +       struct rpc_timeout      timeparms;
7153 +       struct rpc_xprt *       xprt;
7154 +       struct rpc_program *    program = &cb->cb_program;
7155 +       struct rpc_stat *       stat = &cb->cb_stat;
7156 +       struct rpc_clnt *       clnt;
7157 +       struct rpc_message msg = {
7158 +               .rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
7159 +               .rpc_argp       = clp,
7160 +       };
7161 +       char                    hostname[32];
7162 +       int status;
7163 +
7164 +       dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
7165 +                       cb->cb_parsed, atomic_read(&cb->cb_set));
7166 +       if (!cb->cb_parsed || atomic_read(&cb->cb_set))
7167 +               return;
7168 +
7169 +       /* Initialize address */
7170 +       memset(&addr, 0, sizeof(addr));
7171 +       addr.sin_family = AF_INET;
7172 +       addr.sin_port = htons(cb->cb_port);
7173 +       addr.sin_addr.s_addr = htonl(cb->cb_addr);
7174 +
7175 +       /* Initialize timeout */
7176 +       timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
7177 +       timeparms.to_retries = 5;
7178 +       timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
7179 +       timeparms.to_exponential = 1;
7180 +
7181 +       /* Create RPC transport */
7182 +       if (!(xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms))) {
7183 +               dprintk("NFSD: couldn't create callback transport!\n");
7184 +               goto out_err;
7185 +       }
7186 +
7187 +       /* Initialize rpc_program */
7188 +       program->name = "nfs4_cb";
7189 +       program->number = cb->cb_prog;
7190 +       program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]);
7191 +       program->version = nfs_cb_version;
7192 +       program->stats = stat;
7193 +
7194 +       /* Initialize rpc_stat */
7195 +       memset(stat, 0, sizeof(struct rpc_stat));
7196 +       stat->program = program;
7197 +
7198 +       /* Create RPC client
7199 +        *
7200 +        * XXX AUTH_UNIX only - need AUTH_GSS....
7201 +        */
7202 +       sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
7203 +       if (!(clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX))) {
7204 +               dprintk("NFSD: couldn't create callback client\n");
7205 +               goto out_xprt;
7206 +       }
7207 +       clnt->cl_intr = 1;
7208 +       clnt->cl_softrtry = 1;
7209 +       clnt->cl_chatty = 1;
7210 +
7211 +       /* Kick rpciod, put the call on the wire. */
7212 +
7213 +       if (rpciod_up() != 0) {
7214 +               dprintk("nfsd: couldn't start rpciod for callbacks!\n");
7215 +               goto out_clnt;
7216 +       }
7217 +
7218 +       /* the task holds a reference to the nfs4_client struct */
7219 +       cb->cb_client = clnt;
7220 +       atomic_inc(&clp->cl_count);
7221 +
7222 +       msg.rpc_cred = nfsd4_lookupcred(clp,0);
7223 +       status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL);
7224 +
7225 +       if (status != 0) {
7226 +               dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
7227 +               goto out_rpciod;
7228 +       }
7229 +       return;
7230 +
7231 +out_rpciod:
7232 +       atomic_dec(&clp->cl_count);
7233 +       rpciod_down();
7234 +out_clnt:
7235 +       rpc_shutdown_client(clnt);
7236 +       goto out_err;
7237 +out_xprt:
7238 +       xprt_destroy(xprt);
7239 +out_err:
7240 +       dprintk("NFSD: warning: no callback path to client %.*s\n",
7241 +               clp->cl_name.len, clp->cl_name.data);
7242 +       cb->cb_client = NULL;
7243 +}
7244 +
7245 +static void
7246 +nfs4_cb_null(struct rpc_task *task)
7247 +{
7248 +       struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
7249 +       struct nfs4_callback *cb = &clp->cl_callback;
7250 +       u32 addr = htonl(cb->cb_addr);
7251 +
7252 +       dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status);
7253 +
7254 +       if (task->tk_status < 0) {
7255 +               dprintk("NFSD: callback establishment to client %.*s failed\n",
7256 +                       clp->cl_name.len, clp->cl_name.data);
7257 +               goto out;
7258 +       }
7259 +       atomic_set(&cb->cb_set, 1);
7260 +       dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr));
7261 +out:
7262 +       put_nfs4_client(clp);
7263 +}
7264 +
7265 +/*
7266 + *  Called with dp->dl_count incremented
7267 + */
7268 +static void
7269 +nfs4_cb_recall_done(struct rpc_task *task)
7270 +{
7271 +       struct nfs4_cb_recall *cbr = (struct nfs4_cb_recall *)task->tk_calldata;
7272 +       struct nfs4_delegation *dp = cbr->cbr_dp;
7273 +       int status;
7274 +
7275 +       spin_lock(&recall_lock);
7276 +
7277 +       /* all is well... */
7278 +       if (task->tk_status == 0)
7279 +               goto out;
7280 +
7281 +       /* network partition, retry nfsd4_cb_recall once.  */
7282 +       if (task->tk_status == -EIO) {
7283 +               if (atomic_read(&dp->dl_recall_cnt) == 0)
7284 +                       goto retry;
7285 +               else
7286 +                       /* callback channel no longer available */
7287 +                       atomic_set(&dp->dl_client->cl_callback.cb_set, 0);
7288 +       }
7289 +
7290 +       /* Race: a recall occurred miliseconds after a delegation was granted.
7291 +       * Client may have received recall prior to delegation. retry recall
7292 +       * once.
7293 +       * XXX what about nfserr_bad_stateid?
7294 +       */
7295 +       if (task->tk_status == -EBADHANDLE) {
7296 +               if (atomic_read(&dp->dl_recall_cnt) == 0)
7297 +                       goto retry;
7298 +       }
7299 +
7300 +       /* nfs4_laundromat will reap delegation */
7301 +       atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
7302 +
7303 +out:
7304 +       atomic_dec(&dp->dl_count);
7305 +       BUG_ON(atomic_read(&dp->dl_count) < 0);
7306 +       spin_unlock(&recall_lock);
7307 +       return;
7308 +
7309 +retry:
7310 +       atomic_inc(&dp->dl_recall_cnt);
7311 +       spin_unlock(&recall_lock);
7312 +       /* sleep 2 seconds before retrying recall */
7313 +       set_current_state(TASK_UNINTERRUPTIBLE);
7314 +       schedule_timeout(2*HZ);
7315 +       status = nfsd4_cb_recall(dp);
7316 +       dprintk("NFSD: nfs4_cb_recall_done: retry status: %d  dp %p dl_flock %p\n",status,dp, dp->dl_flock);
7317 +}
7318 +
7319 +/*
7320 + * called with dp->dl_count inc'ed.
7321 + * nfs4_lock_state() may or may not have been called.
7322 + */
7323 +int
7324 +nfsd4_cb_recall(struct nfs4_delegation *dp)
7325 +{
7326 +       struct nfs4_client *clp;
7327 +       struct rpc_clnt *clnt;
7328 +       struct rpc_message msg = {
7329 +               .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
7330 +       };
7331 +       struct nfs4_cb_recall *cbr = &dp->dl_recall;
7332 +       int status;
7333 +
7334 +       dprintk("NFSD: nfsd4_cb_recall NFS4_enc_cb_recall_sz %d NFS4_dec_cb_recall_sz %d \n",NFS4_enc_cb_recall_sz,NFS4_dec_cb_recall_sz);
7335 +
7336 +       clp = dp->dl_client;
7337 +       clnt = clp->cl_callback.cb_client;
7338 +       status = EIO;
7339 +       if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
7340 +               goto out_free;
7341 +
7342 +       msg.rpc_argp = cbr;
7343 +       msg.rpc_resp = cbr;
7344 +       msg.rpc_cred = nfsd4_lookupcred(clp,0);
7345 +
7346 +       cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
7347 +       cbr->cbr_dp = dp;
7348 +
7349 +       if ((status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
7350 +               nfs4_cb_recall_done, cbr ))) {
7351 +               dprintk("NFSD: recall_delegation: rpc_call_async failed %d\n",
7352 +                       status);
7353 +               goto out_fail;
7354 +       }
7355 +out:
7356 +       return status;
7357 +out_fail:
7358 +       status = nfserrno(status);
7359 +       out_free:
7360 +       kfree(cbr);
7361 +       goto out;
7362 +}
7363 Index: linux-2.6.10/fs/nfsd/nfs4proc.c
7364 ===================================================================
7365 --- linux-2.6.10.orig/fs/nfsd/nfs4proc.c        2004-12-25 05:35:40.000000000 +0800
7366 +++ linux-2.6.10/fs/nfsd/nfs4proc.c     2005-04-05 14:49:13.432687240 +0800
7367 @@ -461,28 +461,12 @@
7368  }
7369  
7370  static inline int
7371 -access_bits_permit_read(unsigned long access_bmap)
7372 -{
7373 -       return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
7374 -               test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
7375 -}
7376 -
7377 -static inline int
7378 -access_bits_permit_write(unsigned long access_bmap)
7379 -{
7380 -       return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
7381 -               test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
7382 -}
7383 -
7384 -static inline int
7385  nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
7386  {
7387 -       struct nfs4_stateid *stp;
7388         int status;
7389 +       struct file *filp;
7390  
7391         /* no need to check permission - this will be done in nfsd_read() */
7392 -       if (nfs4_in_grace())
7393 -               return nfserr_grace;
7394  
7395         if (read->rd_offset >= OFFSET_MAX)
7396                 return nfserr_inval;
7397 @@ -508,21 +492,17 @@
7398                 goto out;
7399         }
7400         /* check stateid */
7401 -       if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid, 
7402 -                                       CHECK_FH | RDWR_STATE, &stp))) {
7403 +       if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
7404 +                                       CHECK_FH | RD_STATE, &filp))) {
7405                 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
7406                 goto out;
7407         }
7408 -       status = nfserr_openmode;
7409 -       if (!access_bits_permit_read(stp->st_access_bmap)) {
7410 -               dprintk("NFSD: nfsd4_read: file not opened for read!\n");
7411 -               goto out;
7412 -       }
7413         status = nfs_ok;
7414  out:
7415         nfs4_unlock_state();
7416         read->rd_rqstp = rqstp;
7417         read->rd_fhp = current_fh;
7418 +       read->rd_filp = filp;
7419         return status;
7420  }
7421  
7422 @@ -562,6 +542,8 @@
7423  {
7424         int status;
7425  
7426 +       if (nfs4_in_grace())
7427 +               return nfserr_grace;
7428         status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
7429         if (status == nfserr_symlink)
7430                 return nfserr_notdir;
7431 @@ -580,6 +562,9 @@
7432  
7433         if (!save_fh->fh_dentry)
7434                 return status;
7435 +       if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
7436 +                                       & NFSEXP_NOSUBTREECHECK))
7437 +               return nfserr_grace;
7438         status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
7439                              rename->rn_snamelen, current_fh,
7440                              rename->rn_tname, rename->rn_tnamelen);
7441 @@ -605,12 +590,8 @@
7442  static inline int
7443  nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
7444  {
7445 -       struct nfs4_stateid *stp;
7446         int status = nfs_ok;
7447  
7448 -       if (nfs4_in_grace())
7449 -               return nfserr_grace;
7450 -
7451         if (!current_fh->fh_dentry)
7452                 return nfserr_nofilehandle;
7453  
7454 @@ -626,15 +607,10 @@
7455                 nfs4_lock_state();
7456                 if ((status = nfs4_preprocess_stateid_op(current_fh, 
7457                                                 &setattr->sa_stateid, 
7458 -                                               CHECK_FH | RDWR_STATE, &stp))) {
7459 +                                               CHECK_FH | WR_STATE, NULL))) {
7460                         dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
7461                         goto out_unlock;
7462                 }
7463 -               status = nfserr_openmode;
7464 -               if (!access_bits_permit_write(stp->st_access_bmap)) {
7465 -                       dprintk("NFSD: nfsd4_setattr: not opened for write!\n");
7466 -                       goto out_unlock;
7467 -               }
7468                 nfs4_unlock_state();
7469         }
7470         status = nfs_ok;
7471 @@ -654,14 +630,11 @@
7472  static inline int
7473  nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write)
7474  {
7475 -       struct nfs4_stateid *stp;
7476         stateid_t *stateid = &write->wr_stateid;
7477 +       struct file *filp;
7478         u32 *p;
7479         int status = nfs_ok;
7480  
7481 -       if (nfs4_in_grace())
7482 -               return nfserr_grace;
7483 -
7484         /* no need to check permission - this will be done in nfsd_write() */
7485  
7486         if (write->wr_offset >= OFFSET_MAX)
7487 @@ -677,18 +650,13 @@
7488                 goto zero_stateid;
7489         }
7490         if ((status = nfs4_preprocess_stateid_op(current_fh, stateid, 
7491 -                                       CHECK_FH | RDWR_STATE, &stp))) {
7492 +                                       CHECK_FH | WR_STATE, &filp))) {
7493                 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
7494                 goto out;
7495         }
7496  
7497 -       status = nfserr_openmode;
7498 -       if (!access_bits_permit_write(stp->st_access_bmap)) {
7499 -               dprintk("NFSD: nfsd4_write: file not open for write!\n");
7500 -               goto out;
7501 -       }
7502 -
7503  zero_stateid:
7504 +
7505         nfs4_unlock_state();
7506         write->wr_bytes_written = write->wr_buflen;
7507         write->wr_how_written = write->wr_stable_how;
7508 @@ -696,9 +664,16 @@
7509         *p++ = nfssvc_boot.tv_sec;
7510         *p++ = nfssvc_boot.tv_usec;
7511  
7512 -       status =  nfsd_write(rqstp, current_fh, write->wr_offset,
7513 -                         write->wr_vec, write->wr_vlen, write->wr_buflen,
7514 -                         &write->wr_how_written);
7515 +       if (filp)
7516 +               status =  nfsd_vfs_write(rqstp, current_fh, filp,
7517 +                               write->wr_offset, write->wr_vec,
7518 +                               write->wr_vlen, write->wr_buflen,
7519 +                               &write->wr_how_written);
7520 +       else
7521 +               status =  nfsd_write(rqstp, current_fh, write->wr_offset,
7522 +                               write->wr_vec, write->wr_vlen, write->wr_buflen,
7523 +                               &write->wr_how_written);
7524 +
7525         if (status == nfserr_symlink)
7526                 status = nfserr_inval;
7527         return status;
7528 @@ -872,6 +847,9 @@
7529                 case OP_CREATE:
7530                         op->status = nfsd4_create(rqstp, current_fh, &op->u.create);
7531                         break;
7532 +               case OP_DELEGRETURN:
7533 +                       op->status = nfsd4_delegreturn(rqstp, current_fh, &op->u.delegreturn);
7534 +                       break;
7535                 case OP_GETATTR:
7536                         op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr);
7537                         break;
7538 Index: linux-2.6.10/fs/nfsd/export.c
7539 ===================================================================
7540 --- linux-2.6.10.orig/fs/nfsd/export.c  2004-12-25 05:34:58.000000000 +0800
7541 +++ linux-2.6.10/fs/nfsd/export.c       2005-04-05 14:49:13.415689824 +0800
7542 @@ -255,7 +255,7 @@
7543         new->ek_export = item->ek_export;
7544  }
7545  
7546 -static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
7547 +static DefineSimpleCacheLookup(svc_expkey)
7548  
7549  #define        EXPORT_HASHBITS         8
7550  #define        EXPORT_HASHMAX          (1<< EXPORT_HASHBITS)
7551 @@ -492,8 +492,72 @@
7552         new->ex_fsid = item->ex_fsid;
7553  }
7554  
7555 -static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
7556 +struct svc_export *
7557 +svc_export_lookup(struct svc_export *item, int set)
7558 +{
7559 +       struct svc_export *tmp, *new = NULL;
7560 +       struct cache_head **hp, **head;
7561  
7562 +       head = &svc_export_cache.hash_table[svc_export_hash(item)];
7563 +retry:
7564 +       if (set||new)
7565 +               write_lock(&svc_export_cache.hash_lock);
7566 +       else
7567 +               read_lock(&svc_export_cache.hash_lock);
7568 +       for(hp=head; *hp != NULL; hp = &tmp->h.next) {
7569 +               tmp = container_of(*hp, struct svc_export, h);
7570 +               if (svc_export_match(item, tmp)) { /* found a match */
7571 +                       cache_get(&tmp->h);
7572 +                       if (set) {
7573 +                               if (test_bit(CACHE_NEGATIVE,  &item->h.flags))
7574 +                                        set_bit(CACHE_NEGATIVE, &tmp->h.flags);
7575 +                               else {
7576 +                                       clear_bit(CACHE_NEGATIVE, &tmp->h.flags);
7577 +                                       svc_export_update(tmp, item);
7578 +                               }
7579 +                       }
7580 +                       if (set||new)
7581 +                               write_unlock(&svc_export_cache.hash_lock);
7582 +                       else
7583 +                               read_unlock(&svc_export_cache.hash_lock);
7584 +                       if (set)
7585 +                               cache_fresh(&svc_export_cache, &tmp->h,
7586 +                                               item->h.expiry_time);
7587 +                       if (new)
7588 +                               svc_export_put(&new->h, &svc_export_cache);
7589 +                       return tmp;
7590 +               }
7591 +       }
7592 +       /* Didn't find anything */
7593 +       if (new) {
7594 +               svc_export_init(new, item);
7595 +               new->h.next = *head;
7596 +               *head = &new->h;
7597 +               set_bit(CACHE_HASHED, &new->h.flags);
7598 +               svc_export_cache.entries++;
7599 +               if (set) {
7600 +                       tmp = new;
7601 +                       if (test_bit(CACHE_NEGATIVE, &item->h.flags))
7602 +                               set_bit(CACHE_NEGATIVE, &tmp->h.flags);
7603 +                       else
7604 +                               svc_export_update(tmp, item);
7605 +               }
7606 +       }
7607 +       if (set||new)
7608 +               write_unlock(&svc_export_cache.hash_lock);
7609 +       else
7610 +               read_unlock(&svc_export_cache.hash_lock);
7611 +       if (new && set)
7612 +               cache_fresh(&svc_export_cache, &new->h, item->h.expiry_time);
7613 +       if (new)
7614 +               return new;
7615 +       new = kmalloc(sizeof(*new), GFP_KERNEL);
7616 +       if (new) {
7617 +               cache_init(&new->h);
7618 +               goto retry;
7619 +       }
7620 +       return NULL;
7621 +}
7622  
7623  struct svc_expkey *
7624  exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
7625 Index: linux-2.6.10/fs/nfsd/nfssvc.c
7626 ===================================================================
7627 --- linux-2.6.10.orig/fs/nfsd/nfssvc.c  2004-12-25 05:34:58.000000000 +0800
7628 +++ linux-2.6.10/fs/nfsd/nfssvc.c       2005-04-05 14:49:13.422688760 +0800
7629 @@ -378,4 +378,6 @@
7630         .pg_name                = "nfsd",               /* program name */
7631         .pg_class               = "nfsd",               /* authentication class */
7632         .pg_stats               = &nfsd_svcstats,       /* version table */
7633 +       .pg_authenticate        = &svc_set_client,      /* export authentication */
7634 +
7635  };
7636 Index: linux-2.6.10/fs/nfsd/nfs4recover.c
7637 ===================================================================
7638 --- linux-2.6.10.orig/fs/nfsd/nfs4recover.c     2005-04-05 19:01:49.158500672 +0800
7639 +++ linux-2.6.10/fs/nfsd/nfs4recover.c  2005-04-05 14:49:13.430687544 +0800
7640 @@ -0,0 +1,411 @@
7641 +/*
7642 +*  linux/fs/nfsd/nfs4recover.c
7643 +*
7644 +*  Copyright (c) 2004 The Regents of the University of Michigan.
7645 +*  All rights reserved.
7646 +*
7647 +*  Andy Adamson <andros@umich.edu>
7648 +*
7649 +*  Redistribution and use in source and binary forms, with or without
7650 +*  modification, are permitted provided that the following conditions
7651 +*  are met:
7652 +*
7653 +*  1. Redistributions of source code must retain the above copyright
7654 +*     notice, this list of conditions and the following disclaimer.
7655 +*  2. Redistributions in binary form must reproduce the above copyright
7656 +*     notice, this list of conditions and the following disclaimer in the
7657 +*     documentation and/or other materials provided with the distribution.
7658 +*  3. Neither the name of the University nor the names of its
7659 +*     contributors may be used to endorse or promote products derived
7660 +*     from this software without specific prior written permission.
7661 +*
7662 +*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
7663 +*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
7664 +*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
7665 +*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
7666 +*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
7667 +*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
7668 +*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
7669 +*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
7670 +*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
7671 +*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
7672 +*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7673 +*
7674 +*/
7675 +
7676 +
7677 +#include <linux/param.h>
7678 +#include <linux/sunrpc/svc.h>
7679 +#include <linux/nfsd/nfsd.h>
7680 +#include <linux/nfs4.h>
7681 +#include <linux/nfsd/state.h>
7682 +#include <linux/nfsd/xdr4.h>
7683 +#include <linux/file.h>
7684 +#include <linux/namei.h>
7685 +#include <asm/uaccess.h>
7686 +
7687 +#define NFSDDBG_FACILITY                NFSDDBG_PROC
7688 +
7689 +/* MAX_FILE_LEN/2 = max client id name length due to changing name
7690 + * into hex
7691 + */
7692 +#define MAX_FILE_LEN 256
7693 +
7694 +/* Globals */
7695 +char recovery_dirname[] = "/var/lib/nfs/v4recovery";
7696 +static uid_t   saveuid;
7697 +static gid_t   savegid;
7698 +static struct nameidata nd_rec_init;
7699 +static int rec_dir_init = 0;
7700 +
7701 +void
7702 +nfs4_save_set_user(void)
7703 +{
7704 +       saveuid = current->fsuid;
7705 +       savegid = current->fsgid;
7706 +       current->fsuid = 0;
7707 +       current->fsgid = 0;
7708 +}
7709 +
7710 +void
7711 +nfs4_reset_user(void)
7712 +{
7713 +       current->fsuid = saveuid;
7714 +       current->fsgid = savegid;
7715 +}
7716 +
7717 +void
7718 +nfs4_make_rec_filename(char **filename, struct nfs4_client *clp)
7719 +{
7720 +       char    *fname = *filename;
7721 +       int     flen = MAX_FILE_LEN;
7722 +
7723 +       memset(fname, 0, flen);
7724 +       qword_addhex(&fname, &flen, clp->cl_name.data, clp->cl_name.len);
7725 +}
7726 +
7727 +/* XXX need to check dput() mntput ?? */
7728 +int
7729 +nfsd4_create_clid_file(struct nfs4_client *clp)
7730 +{
7731 +       struct file             *filp = NULL;
7732 +       struct dentry           *dentry;
7733 +       mm_segment_t            oldfs;
7734 +       loff_t                  offset = 0;
7735 +       char                    fbuf[MAX_FILE_LEN], *fname = fbuf;
7736 +       int                     status;
7737 +
7738 +
7739 +       if (!rec_dir_init)
7740 +               return -EINVAL;
7741 +       nfs4_save_set_user();
7742 +
7743 +       dprintk("NFSD: nfsd4_create_clid_file IN recdir [d:mnt] count %d:%d\n",
7744 +               atomic_read(&nd_rec_init.dentry->d_count),
7745 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7746 +
7747 +       /* lock the parent */
7748 +       down(&nd_rec_init.dentry->d_inode->i_sem);
7749 +
7750 +       nfs4_make_rec_filename(&fname, clp);
7751 +       /* dentry->d_count will be 1 */
7752 +       dentry = lookup_one_len(fname, nd_rec_init.dentry, strlen(fname));
7753 +       status = PTR_ERR(dentry);
7754 +       if (IS_ERR(dentry))
7755 +               goto out_unlock;
7756 +
7757 +       status = -EEXIST;
7758 +       if (dentry->d_inode){
7759 +               dprintk("NFSD: nfsd4_create_clid_file: FILE EXISTS\n");
7760 +               goto out_unlock;
7761 +       }
7762 +
7763 +       /* nd_rec_init.dentry->d_count is bumped */
7764 +       status = vfs_create(nd_rec_init.dentry->d_inode, dentry, S_IRWXU, NULL);
7765 +       if (status < 0)
7766 +               goto out_unlock;
7767 +
7768 +       up(&nd_rec_init.dentry->d_inode->i_sem);
7769 +
7770 +       filp = dentry_open(dget(dentry), mntget(nd_rec_init.mnt), O_RDWR);
7771 +       status = PTR_ERR(filp);
7772 +       if (IS_ERR(filp))
7773 +               goto out_mnt;
7774 +
7775 +       oldfs = get_fs(); set_fs(KERNEL_DS);
7776 +       status = vfs_write(filp, clp->cl_name.data, clp->cl_name.len, &offset);
7777 +       set_fs(oldfs);
7778 +
7779 +       dprintk("NFSD: nfsd4_create_clid_file vfs_write returns %d\n",status);
7780 +       if (status >= 0)
7781 +               status = nfs_ok;
7782 +
7783 +       if (filp->f_op && filp->f_op->flush) {
7784 +               int err = filp->f_op->flush(filp);
7785 +               dprintk("NFSD: nfsd4_create_clid_file called flush\n");
7786 +               if (!status)
7787 +                       status = err;
7788 +       }
7789 +       /* dget and mntget in dentry_open call */
7790 +       fput(filp);
7791 +
7792 +       /* dentry->d_count will be 0 */
7793 +       dput(dentry);
7794 +out_mnt:
7795 +       /* dget in vfs_create call */
7796 +       dput(nd_rec_init.dentry);
7797 +
7798 +out:
7799 +       nfs4_reset_user();
7800 +
7801 +       dprintk("NFSD: nfsd4_create_clid_file OUT recdir [d:mnt] count %d:%d\n",
7802 +               atomic_read(&nd_rec_init.dentry->d_count),
7803 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7804 +       dprintk("NFSD: nfsd4_create_clid_file returns %d\n",status);
7805 +
7806 +       return status;
7807 +
7808 +out_unlock:
7809 +       up(&nd_rec_init.dentry->d_inode->i_sem);
7810 +       goto out;
7811 +}
7812 +
7813 +/*
7814 + * called with pdentry->d_inode->i_sem held ?
7815 + */
7816 +int
7817 +nfsd4_unlink_rec_file(char *name, int namlen)
7818 +{
7819 +       struct dentry *dentry;
7820 +       int type, status;
7821 +
7822 +       dprintk("NFSD: nfsd4_unlink_rec_file. name %.*s\n", namlen, name);
7823 +
7824 +       dentry = lookup_one_len(name, nd_rec_init.dentry, namlen);
7825 +       dprintk("NFSD: nfsd4_unlink_rec_file POST LOOKUP nd_rec d_count %d\n",
7826 +               atomic_read(&nd_rec_init.dentry->d_count));
7827 +       status = PTR_ERR(dentry);
7828 +       if (IS_ERR(dentry))
7829 +               goto out;
7830 +
7831 +       status = -ENOENT;
7832 +       if (!dentry->d_inode) {
7833 +               dput(dentry);
7834 +               goto out;
7835 +       }
7836 +
7837 +       /* should only be files here! */
7838 +       type = dentry->d_inode->i_mode & S_IFMT;
7839 +       status = -EISDIR;
7840 +       if (!(type & S_IFREG)) {
7841 +               dput(dentry);
7842 +               goto out;
7843 +       }
7844 +
7845 +       dprintk("NFSD: nfsd4_unlink_rec_file PRE VFS UNLINK [%d:%d]\n",
7846 +               atomic_read(&nd_rec_init.dentry->d_count),
7847 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7848 +
7849 +       status = vfs_unlink(nd_rec_init.dentry->d_inode, dentry);
7850 +
7851 +       dprintk("NFSD: nfsd4_unlink_rec_file POST VFS UNLINK [%d:%d]\n",
7852 +               atomic_read(&nd_rec_init.dentry->d_count),
7853 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7854 +
7855 +       dprintk("NFSD: nfsd4_unlink_rec_file FILE dentry->d_count %d\n",
7856 +               atomic_read(&dentry->d_count));
7857 +out:
7858 +       dprintk("NFSD: nfsd4_unlink_rec_file returns %d\n",status);
7859 +       return status;
7860 +}
7861 +
7862 +void
7863 +nfsd4_remove_clid_file(struct nfs4_client *clp)
7864 +{
7865 +       char                    fbuf[MAX_FILE_LEN], *fname = fbuf;
7866 +       int                     status;
7867 +
7868 +       if (!rec_dir_init)
7869 +               return;
7870 +
7871 +       dprintk("NFSD: nfsd4_remove_clid_file client %.*s\n",
7872 +               clp->cl_name.len,clp->cl_name.data);
7873 +
7874 +       nfs4_save_set_user();
7875 +
7876 +       dprintk("NFSD: nfsd4_remove_clid_file IN recdir [d:mnt] count %d:%d\n",
7877 +               atomic_read(&nd_rec_init.dentry->d_count),
7878 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7879 +
7880 +       nfs4_make_rec_filename(&fname, clp);
7881 +       status = nfsd4_unlink_rec_file(fname, strlen(fname));
7882 +       nfs4_reset_user();
7883 +       if (status != nfs_ok)
7884 +               printk("NFSD: Failed to remove expired client state file %.*s from %s\n", strlen(fname), fname, recovery_dirname);
7885 +
7886 +       dprintk("NFSD: nfsd4_remove_clid_file OUT recdir [d:mnt] count %d:%d\n",
7887 +               atomic_read(&nd_rec_init.dentry->d_count),
7888 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7889 +       return;
7890 +}
7891 +
7892 +struct rec_dirent {
7893 +       int clear;
7894 +};
7895 +
7896 +/*
7897 + * on reboot, stuff the reclaim hash with known client id's.
7898 + *
7899 + * the filename may not equal the clid. the clid might be the first
7900 + * (and so far only) line of data in the file.
7901 + *
7902 + * i will probably end up writing data such as the setclientid principal
7903 + * to each clid file. if i do, i will always put the clid as the
7904 + * first line of data.
7905 + */
7906 +
7907 +int
7908 +nfsd4_get_recdir_dirent(struct rec_dirent *rdirent, const char *name,
7909 +               int namlen, loff_t offset, ino_t ino, unsigned int d_type)
7910 +{
7911 +       struct dentry           *dclid;
7912 +       struct file             *filp;
7913 +       mm_segment_t            oldfs;
7914 +       int                     status = nfs_ok;
7915 +
7916 +       dprintk("NFSD: nfsd4_get_recdir_dirent IN recdir [d:mnt] count %d:%d\n",
7917 +               atomic_read(&nd_rec_init.dentry->d_count),
7918 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7919 +
7920 +       dprintk("NFSD: nfsd4_get_recdir_dirent name %.*s, clear %d\n",
7921 +               namlen, name, rdirent->clear);
7922 +
7923 +       if (name && isdotent(name, namlen))
7924 +               goto out;
7925 +
7926 +       dclid = lookup_one_len(name, nd_rec_init.dentry, namlen);
7927 +       status = PTR_ERR(dclid);
7928 +       if(IS_ERR(dclid))
7929 +               goto out;
7930 +
7931 +       if (rdirent->clear){
7932 +               dprintk("NFSD: nfsd4_get_recdir_dirent REMOVE\n");
7933 +
7934 +       dprintk("NFSD: nfsd4_get_recdir_dirent PRE VFS_UNLINK [%d:%d]\n",
7935 +               atomic_read(&nd_rec_init.dentry->d_count),
7936 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7937 +
7938 +               status = vfs_unlink(nd_rec_init.dentry->d_inode, dclid);
7939 +
7940 +       dprintk("NFSD: nfsd4_get_recdir_dirent POST VFS_UNLINK [%d:%d]\n",
7941 +               atomic_read(&nd_rec_init.dentry->d_count),
7942 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7943 +
7944 +       } else {
7945 +               char    buf[MAX_FILE_LEN];
7946 +
7947 +               dprintk("NFSD: nfsd4_get_recdir_dirent READ\n");
7948 +
7949 +               filp = dentry_open(dclid, mntget(nd_rec_init.mnt), O_RDWR);
7950 +               if (IS_ERR(filp)) {
7951 +                       status = PTR_ERR(filp);
7952 +                       goto out;
7953 +               }
7954 +
7955 +               memset(buf, 0, MAX_FILE_LEN);
7956 +               oldfs = get_fs(); set_fs(KERNEL_DS);
7957 +               status = vfs_read(filp, buf, MAX_FILE_LEN, &filp->f_pos);
7958 +               set_fs(oldfs);
7959 +
7960 +               dprintk("NFSD: nfsd4_get_recdir_dirent vfs_read returns %d\n",
7961 +                       status);
7962 +               if (status > 0)
7963 +                       status = nfs4_client_to_reclaim(buf, status);
7964 +               fput(filp);
7965 +       }
7966 +out:
7967 +       dprintk("NFSD:nfsd4_get_recdir_dirent OUT recdir [d:mnt] count %d:%d\n",
7968 +               atomic_read(&nd_rec_init.dentry->d_count),
7969 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7970 +
7971 +       dprintk("NFSD: nfsd4_get_recdir_dirent returns %d\n",status);
7972 +       return 0;
7973 +}
7974 +
7975 +int
7976 +nfsd4_list_rec_dir(int clear)
7977 +{
7978 +       struct file             *filp;
7979 +       struct rec_dirent       rdirent;
7980 +       int                     status;
7981 +
7982 +       if (!rec_dir_init)
7983 +               return -EINVAL;
7984 +
7985 +       nfs4_save_set_user();
7986 +
7987 +       dprintk("NFSD: nfsd4_list_rec_dir IN recdir [d:mnt] count %d:%d\n",
7988 +               atomic_read(&nd_rec_init.dentry->d_count),
7989 +               atomic_read(&nd_rec_init.mnt->mnt_count));
7990 +
7991 +       /* open directory */
7992 +       filp = dentry_open(dget(nd_rec_init.dentry), mntget(nd_rec_init.mnt),
7993 +                           O_RDWR);
7994 +       status = PTR_ERR(filp);
7995 +       if (IS_ERR(filp))
7996 +               goto out;
7997 +       rdirent.clear = clear;
7998 +
7999 +       /* read the directory entries into memory */
8000 +       status = vfs_readdir(filp, (filldir_t) nfsd4_get_recdir_dirent,
8001 +                       (void*)&rdirent);
8002 +
8003 +       fput(filp);
8004 +out:
8005 +       dprintk("NFSD: nfsd4_list_rec_dir OUT recdir [d:mnt] count %d:%d\n",
8006 +               atomic_read(&nd_rec_init.dentry->d_count),
8007 +               atomic_read(&nd_rec_init.mnt->mnt_count));
8008 +
8009 +       dprintk("NFSD: nfsd4_list_rec_dir DONE status: %d\n", status);
8010 +
8011 +       nfs4_reset_user();
8012 +       return status;
8013 +}
8014 +
8015 +
8016 +/*
8017 + * Hold reference to the recovery directory.
8018 + */
8019 +
8020 +void
8021 +nfsd4_init_rec_dir(char *rec_dirname)
8022 +{
8023 +       int                     status;
8024 +
8025 +       printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
8026 +               rec_dirname);
8027 +
8028 +       nfs4_save_set_user();
8029 +
8030 +        status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &nd_rec_init);
8031 +
8032 +       printk("NFSD: nfsd4_init_rec_dir INITIAL recdir [d:mnt] count %d:%d\n",
8033 +               atomic_read(&nd_rec_init.dentry->d_count),
8034 +               atomic_read(&nd_rec_init.mnt->mnt_count));
8035 +
8036 +       if (!status)
8037 +               rec_dir_init = 1;
8038 +       nfs4_reset_user();
8039 +       printk("NFSD: nfsd4_init_rec_dir rec_dir_init %d\n", rec_dir_init);
8040 +}
8041 +
8042 +void
8043 +nfsd4_shutdown_rec_dir(void)
8044 +{
8045 +       rec_dir_init = 0;
8046 +       path_release(&nd_rec_init);
8047 +
8048 +       printk("NFSD: nfsd4_shutdown_rec_dir FINAL recdir [d:mnt] count %d:%d\n",
8049 +               atomic_read(&nd_rec_init.dentry->d_count),
8050 +               atomic_read(&nd_rec_init.mnt->mnt_count));
8051 +}
8052 Index: linux-2.6.10/fs/nfsd/Makefile
8053 ===================================================================
8054 --- linux-2.6.10.orig/fs/nfsd/Makefile  2004-12-25 05:35:50.000000000 +0800
8055 +++ linux-2.6.10/fs/nfsd/Makefile       2005-04-05 14:49:13.431687392 +0800
8056 @@ -8,5 +8,5 @@
8057                            export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
8058  nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
8059  nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
8060 -                          nfs4acl.o
8061 +                          nfs4acl.o nfs4callback.o nfs4recover.o
8062  nfsd-objs              := $(nfsd-y)
8063 Index: linux-2.6.10/fs/nfs/nfs4xdr.c
8064 ===================================================================
8065 --- linux-2.6.10.orig/fs/nfs/nfs4xdr.c  2004-12-25 05:35:40.000000000 +0800
8066 +++ linux-2.6.10/fs/nfs/nfs4xdr.c       2005-04-05 14:49:13.452684200 +0800
8067 @@ -82,12 +82,16 @@
8068  #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
8069  #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
8070                                 ((3+NFS4_FHSIZE) >> 2))
8071 -#define encode_getattr_maxsz    (op_encode_hdr_maxsz + 3)
8072 +#define nfs4_fattr_bitmap_maxsz 3
8073 +#define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
8074  #define nfs4_name_maxsz                (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
8075  #define nfs4_path_maxsz                (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
8076 -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
8077 -#define decode_getattr_maxsz    (op_decode_hdr_maxsz + 3 + \
8078 -                                nfs4_fattr_bitmap_maxsz)
8079 +/* This is based on getfattr, which uses the most attributes: */
8080 +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
8081 +                               3 + 3 + 3 + 2 * nfs4_name_maxsz))
8082 +#define nfs4_fattr_maxsz       (nfs4_fattr_bitmap_maxsz + \
8083 +                               nfs4_fattr_value_maxsz)
8084 +#define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
8085  #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
8086  #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
8087  #define encode_fsinfo_maxsz    (op_encode_hdr_maxsz + 2)
8088 @@ -122,11 +126,11 @@
8089  #define encode_symlink_maxsz   (op_encode_hdr_maxsz + \
8090                                 1 + nfs4_name_maxsz + \
8091                                 nfs4_path_maxsz + \
8092 -                               nfs4_fattr_bitmap_maxsz)
8093 +                               nfs4_fattr_maxsz)
8094  #define decode_symlink_maxsz   (op_decode_hdr_maxsz + 8)
8095  #define encode_create_maxsz    (op_encode_hdr_maxsz + \
8096                                 2 + nfs4_name_maxsz + \
8097 -                               nfs4_fattr_bitmap_maxsz)
8098 +                               nfs4_fattr_maxsz)
8099  #define decode_create_maxsz    (op_decode_hdr_maxsz + 8)
8100  #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
8101  #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
8102 @@ -205,7 +209,7 @@
8103  #define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
8104                                  encode_putfh_maxsz + \
8105                                  op_encode_hdr_maxsz + 4 + \
8106 -                                nfs4_fattr_bitmap_maxsz + \
8107 +                                nfs4_fattr_maxsz + \
8108                                  encode_getattr_maxsz)
8109  #define NFS4_dec_setattr_sz     (compound_decode_hdr_maxsz + \
8110                                  decode_putfh_maxsz + \
8111 @@ -360,6 +364,20 @@
8112                                 encode_delegreturn_maxsz)
8113  #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
8114                                 decode_delegreturn_maxsz)
8115 +#define NFS4_enc_getacl_sz     (compound_encode_hdr_maxsz + \
8116 +                               encode_putfh_maxsz + \
8117 +                               encode_getattr_maxsz)
8118 +#define NFS4_dec_getacl_sz     (compound_decode_hdr_maxsz + \
8119 +                               decode_putfh_maxsz + \
8120 +                               op_decode_hdr_maxsz + \
8121 +                               nfs4_fattr_bitmap_maxsz + 1)
8122 +#define NFS4_enc_setacl_sz     (compound_encode_hdr_maxsz + \
8123 +                               encode_putfh_maxsz + \
8124 +                               op_encode_hdr_maxsz + 4 + \
8125 +                               nfs4_fattr_bitmap_maxsz + 1)
8126 +#define NFS4_dec_setacl_sz     (compound_decode_hdr_maxsz + \
8127 +                               decode_putfh_maxsz + \
8128 +                               op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
8129  
8130  static struct {
8131         unsigned int    mode;
8132 @@ -459,7 +477,7 @@
8133          * In the worst-case, this would be
8134          *   12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
8135          *          = 36 bytes, plus any contribution from variable-length fields
8136 -        *            such as owner/group/acl's.
8137 +        *            such as owner/group.
8138          */
8139         len = 16;
8140  
8141 @@ -1083,6 +1101,27 @@
8142         return 0;
8143  }
8144  
8145 +extern nfs4_stateid zero_stateid;
8146 +
8147 +static int
8148 +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
8149 +{
8150 +       uint32_t *p;
8151 +
8152 +       RESERVE_SPACE(4+sizeof(zero_stateid.data));
8153 +       WRITE32(OP_SETATTR);
8154 +       WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
8155 +       RESERVE_SPACE(2*4);
8156 +       WRITE32(1);
8157 +       WRITE32(FATTR4_WORD0_ACL);
8158 +       if (arg->acl_len % 4)
8159 +               return -EINVAL;
8160 +       RESERVE_SPACE(4);
8161 +       WRITE32(arg->acl_len);
8162 +       xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
8163 +       return 0;
8164 +}
8165 +
8166  static int
8167  encode_savefh(struct xdr_stream *xdr)
8168  {
8169 @@ -1627,6 +1666,34 @@
8170  }
8171  
8172  /*
8173 + * Encode a GETACL request
8174 + */
8175 +static int
8176 +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,
8177 +               struct nfs_getaclargs *args)
8178 +{
8179 +       struct xdr_stream xdr;
8180 +       struct rpc_auth *auth = req->rq_task->tk_auth;
8181 +       struct compound_hdr hdr = {
8182 +               .nops   = 2,
8183 +       };
8184 +       int replen, status;
8185 +
8186 +       xdr_init_encode(&xdr, &req->rq_snd_buf, p);
8187 +       encode_compound_hdr(&xdr, &hdr);
8188 +       status = encode_putfh(&xdr, args->fh);
8189 +       if (status)
8190 +               goto out;
8191 +       status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
8192 +       /* set up reply buffer: */
8193 +       replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
8194 +       xdr_inline_pages(&req->rq_rcv_buf, replen,
8195 +               args->acl_pages, args->acl_pgbase, args->acl_len);
8196 +out:
8197 +       return status;
8198 +}
8199 +
8200 +/*
8201   * Encode a WRITE request
8202   */
8203  static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
8204 @@ -3122,6 +3189,46 @@
8205         return decode_op_hdr(xdr, OP_RENEW);
8206  }
8207  
8208 +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
8209 +               ssize_t *acl_len)
8210 +{
8211 +       uint32_t *savep;
8212 +       uint32_t attrlen,
8213 +                bitmap[2] = {0};
8214 +       struct kvec *iov = req->rq_rcv_buf.head;
8215 +       int status;
8216 +
8217 +       if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
8218 +               goto out;
8219 +       if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
8220 +               goto out;
8221 +       if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
8222 +               goto out;
8223 +
8224 +       if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
8225 +               return -EIO;
8226 +       if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
8227 +               int hdrlen, recvd;
8228 +
8229 +               /* We ignore &savep and don't do consistency checks on
8230 +                * the attr length.  Let userspace figure it out.... */
8231 +               hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
8232 +               recvd = req->rq_rcv_buf.len - hdrlen;
8233 +               if (attrlen > recvd) {
8234 +                       printk(KERN_WARNING "NFS: server cheating in getattr"
8235 +                                       " acl reply: attrlen %u > recvd %u\n",
8236 +                                       attrlen, recvd);
8237 +                       return -EINVAL;
8238 +               }
8239 +               if (attrlen <= *acl_len)
8240 +                       xdr_read_pages(xdr, attrlen);
8241 +               *acl_len = attrlen;
8242 +       }
8243 +
8244 +out:
8245 +       return status;
8246 +}
8247 +
8248  static int
8249  decode_savefh(struct xdr_stream *xdr)
8250  {
8251 @@ -3413,6 +3520,71 @@
8252  
8253  }
8254  
8255 +/*
8256 + * Encode an SETACL request
8257 + */
8258 +static int
8259 +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args)
8260 +{
8261 +        struct xdr_stream xdr;
8262 +        struct compound_hdr hdr = {
8263 +                .nops   = 2,
8264 +        };
8265 +        int status;
8266 +
8267 +        xdr_init_encode(&xdr, &req->rq_snd_buf, p);
8268 +        encode_compound_hdr(&xdr, &hdr);
8269 +        status = encode_putfh(&xdr, args->fh);
8270 +        if (status)
8271 +                goto out;
8272 +        status = encode_setacl(&xdr, args);
8273 +out:
8274 +        return status;
8275 +}
8276 +/*
8277 + * Decode SETACL response
8278 + */
8279 +static int
8280 +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res)
8281 +{
8282 +       struct xdr_stream xdr;
8283 +       struct compound_hdr hdr;
8284 +       int status;
8285 +
8286 +       xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
8287 +       status = decode_compound_hdr(&xdr, &hdr);
8288 +       if (status)
8289 +               goto out;
8290 +       status = decode_putfh(&xdr);
8291 +       if (status)
8292 +               goto out;
8293 +       status = decode_setattr(&xdr, res);
8294 +out:
8295 +       return status;
8296 +}
8297 +
8298 +/*
8299 + * Decode GETACL response
8300 + */
8301 +static int
8302 +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, ssize_t *acl_len)
8303 +{
8304 +       struct xdr_stream xdr;
8305 +       struct compound_hdr hdr;
8306 +       int status;
8307 +
8308 +       xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
8309 +       status = decode_compound_hdr(&xdr, &hdr);
8310 +       if (status)
8311 +               goto out;
8312 +       status = decode_putfh(&xdr);
8313 +       if (status)
8314 +               goto out;
8315 +       status = decode_getacl(&xdr, rqstp, acl_len);
8316 +
8317 +out:
8318 +       return status;
8319 +}
8320  
8321  /*
8322   * Decode CLOSE response
8323 @@ -4009,6 +4181,8 @@
8324    PROC(READDIR,                enc_readdir,    dec_readdir),
8325    PROC(SERVER_CAPS,    enc_server_caps, dec_server_caps),
8326    PROC(DELEGRETURN,    enc_delegreturn, dec_delegreturn),
8327 +  PROC(GETACL,         enc_getacl,     dec_getacl),
8328 +  PROC(SETACL,         enc_setacl,     dec_setacl),
8329  };
8330  
8331  struct rpc_version             nfs_version4 = {
8332 Index: linux-2.6.10/fs/nfs/inode.c
8333 ===================================================================
8334 --- linux-2.6.10.orig/fs/nfs/inode.c    2004-12-25 05:35:24.000000000 +0800
8335 +++ linux-2.6.10/fs/nfs/inode.c 2005-04-05 14:49:13.445685264 +0800
8336 @@ -486,13 +486,27 @@
8337         if (error < 0)
8338                 goto out_err;
8339  
8340 -       buf->f_frsize = server->wtmult;
8341 +       /*
8342 +        * Current versions of glibc do not correctly handle the
8343 +        * case where f_frsize != f_bsize.  Eventually we want to
8344 +        * report the value of wtmult in this field.
8345 +        */
8346 +       buf->f_frsize = sb->s_blocksize;
8347 +
8348 +       /*
8349 +        * On most *nix systems, f_blocks, f_bfree, and f_bavail
8350 +        * are reported in units of f_frsize.  Linux hasn't had
8351 +        * an f_frsize field in its statfs struct until recently,
8352 +        * thus historically Linux's sys_statfs reports these
8353 +        * fields in units of f_bsize.
8354 +        */
8355         buf->f_bsize = sb->s_blocksize;
8356         blockbits = sb->s_blocksize_bits;
8357         blockres = (1 << blockbits) - 1;
8358         buf->f_blocks = (res.tbytes + blockres) >> blockbits;
8359         buf->f_bfree = (res.fbytes + blockres) >> blockbits;
8360         buf->f_bavail = (res.abytes + blockres) >> blockbits;
8361 +
8362         buf->f_files = res.tfiles;
8363         buf->f_ffree = res.afiles;
8364  
8365 @@ -565,9 +579,9 @@
8366  
8367         memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
8368         if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
8369 -               nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
8370 +               nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
8371         else
8372 -               nfsi->flags |= NFS_INO_INVALID_ATTR;
8373 +               nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
8374  }
8375  
8376  /*
8377 @@ -605,7 +619,7 @@
8378                 return 0;
8379         if (nfs_compare_fh(NFS_FH(inode), fh))
8380                 return 0;
8381 -       if (is_bad_inode(inode))
8382 +       if (is_bad_inode(inode) || NFS_STALE(inode))
8383                 return 0;
8384         return 1;
8385  }
8386 @@ -664,7 +678,7 @@
8387                 /* Why so? Because we want revalidate for devices/FIFOs, and
8388                  * that's precisely what we have in nfs_file_inode_operations.
8389                  */
8390 -               inode->i_op = &nfs_file_inode_operations;
8391 +               inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
8392                 if (S_ISREG(inode->i_mode)) {
8393                         inode->i_fop = &nfs_file_operations;
8394                         inode->i_data.a_ops = &nfs_file_aops;
8395 @@ -766,13 +780,8 @@
8396                         vmtruncate(inode, attr->ia_size);
8397                 }
8398         }
8399 -       if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
8400 -               struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
8401 -               if (*cred) {
8402 -                       put_rpccred(*cred);
8403 -                       *cred = NULL;
8404 -               }
8405 -       }
8406 +       if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
8407 +               NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
8408         nfs_end_data_update(inode);
8409         unlock_kernel();
8410         return error;
8411 @@ -949,14 +958,14 @@
8412         lock_kernel();
8413         if (!inode || is_bad_inode(inode))
8414                 goto out_nowait;
8415 -       if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode)
8416 +       if (NFS_STALE(inode))
8417                 goto out_nowait;
8418  
8419         while (NFS_REVALIDATING(inode)) {
8420                 status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
8421                 if (status < 0)
8422                         goto out_nowait;
8423 -               if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC)
8424 +               if (NFS_ATTRTIMEO(inode) == 0)
8425                         continue;
8426                 if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
8427                         continue;
8428 @@ -968,14 +977,14 @@
8429         /* Protect against RPC races by saving the change attribute */
8430         verifier = nfs_save_change_attribute(inode);
8431         status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
8432 -       if (status) {
8433 +       if (status != 0) {
8434                 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
8435                          inode->i_sb->s_id,
8436                          (long long)NFS_FILEID(inode), status);
8437                 if (status == -ESTALE) {
8438 -                       NFS_FLAGS(inode) |= NFS_INO_STALE;
8439 -                       if (inode != inode->i_sb->s_root->d_inode)
8440 -                               remove_inode_hash(inode);
8441 +                       nfs_zap_caches(inode);
8442 +                       if (!S_ISDIR(inode->i_mode))
8443 +                               NFS_FLAGS(inode) |= NFS_INO_STALE;
8444                 }
8445                 goto out;
8446         }
8447 @@ -1014,7 +1023,6 @@
8448                 inode->i_sb->s_id,
8449                 (long long)NFS_FILEID(inode));
8450  
8451 -       NFS_FLAGS(inode) &= ~NFS_INO_STALE;
8452  out:
8453         NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
8454         wake_up(&nfsi->nfs_i_wait);
8455 @@ -1161,7 +1169,7 @@
8456         if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
8457                         || inode->i_uid != fattr->uid
8458                         || inode->i_gid != fattr->gid)
8459 -               nfsi->flags |= NFS_INO_INVALID_ATTR;
8460 +               nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
8461  
8462         /* Has the link count changed? */
8463         if (inode->i_nlink != fattr->nlink)
8464 @@ -1270,7 +1278,7 @@
8465  #endif
8466                 nfsi->change_attr = fattr->change_attr;
8467                 if (!data_unstable)
8468 -                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
8469 +                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
8470         }
8471  
8472         memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
8473 @@ -1278,14 +1286,8 @@
8474  
8475         if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
8476             inode->i_uid != fattr->uid ||
8477 -           inode->i_gid != fattr->gid) {
8478 -               struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
8479 -               if (*cred) {
8480 -                       put_rpccred(*cred);
8481 -                       *cred = NULL;
8482 -               }
8483 -               invalid |= NFS_INO_INVALID_ATTR;
8484 -       }
8485 +           inode->i_gid != fattr->gid)
8486 +               invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
8487  
8488         inode->i_mode = fattr->mode;
8489         inode->i_nlink = fattr->nlink;
8490 @@ -1335,7 +1337,8 @@
8491          */
8492         nfs_invalidate_inode(inode);
8493   out_err:
8494 -       return -EIO;
8495 +       NFS_FLAGS(inode) |= NFS_INO_STALE;
8496 +       return -ESTALE;
8497  }
8498  
8499  /*
8500 @@ -1449,8 +1452,6 @@
8501  
8502         kill_anon_super(s);
8503  
8504 -       nfs4_renewd_prepare_shutdown(server);
8505 -
8506         if (server->client != NULL && !IS_ERR(server->client))
8507                 rpc_shutdown_client(server->client);
8508         if (server->client_sys != NULL && !IS_ERR(server->client_sys))
8509 @@ -1461,8 +1462,6 @@
8510  
8511         rpciod_down();          /* release rpciod */
8512  
8513 -       destroy_nfsv4_state(server);
8514 -
8515         if (server->hostname != NULL)
8516                 kfree(server->hostname);
8517         kfree(server);
8518 @@ -1478,8 +1477,53 @@
8519  
8520  #ifdef CONFIG_NFS_V4
8521  
8522 -static void nfs4_clear_inode(struct inode *);
8523 +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
8524 +
8525 +int
8526 +nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
8527 +               size_t buflen, int flags)
8528 +{
8529 +       struct inode *inode = dentry->d_inode;
8530 +
8531 +       if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
8532 +               return -EINVAL;
8533 +
8534 +       if (!S_ISREG(inode->i_mode) &&
8535 +           (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
8536 +               return -EPERM;
8537 +
8538 +       return nfs4_proc_set_acl(inode, buf, buflen);
8539 +}
8540 +
8541 +/* The getxattr man page suggests returning -ENODATA for unknown attributes,
8542 + * and that's what we'll do for e.g. user attributes that haven't been set.
8543 + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
8544 + * attributes in kernel-managed attribute namespaces. */
8545 +ssize_t
8546 +nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
8547 +               size_t buflen)
8548 +{
8549 +       struct inode *inode = dentry->d_inode;
8550  
8551 +       if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
8552 +               return -EOPNOTSUPP;
8553 +
8554 +       return nfs4_proc_get_acl(inode, buf, buflen);
8555 +}
8556 +
8557 +ssize_t
8558 +nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
8559 +{
8560 +       ssize_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
8561 +
8562 +       if (buf && buflen < len)
8563 +               return -ERANGE;
8564 +       if (buf)
8565 +               memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
8566 +       return len;
8567 +}
8568 +
8569 +static void nfs4_clear_inode(struct inode *);
8570  
8571  static struct super_operations nfs4_sops = { 
8572         .alloc_inode    = nfs_alloc_inode,
8573 @@ -1543,9 +1587,6 @@
8574                 server->wsize = nfs_block_size(data->wsize, NULL);
8575         server->flags = data->flags & NFS_MOUNT_FLAGMASK;
8576  
8577 -       /* NFSv4 doesn't use NLM locking */
8578 -       server->flags |= NFS_MOUNT_NONLM;
8579 -
8580         server->acregmin = data->acregmin*HZ;
8581         server->acregmax = data->acregmax*HZ;
8582         server->acdirmin = data->acdirmin*HZ;
8583 @@ -1790,8 +1831,22 @@
8584  
8585  static void nfs4_kill_super(struct super_block *sb)
8586  {
8587 +       struct nfs_server *server = NFS_SB(sb);
8588 +
8589         nfs_return_all_delegations(sb);
8590 -       nfs_kill_super(sb);
8591 +       kill_anon_super(sb);
8592 +
8593 +       nfs4_renewd_prepare_shutdown(server);
8594 +
8595 +       if (server->client != NULL && !IS_ERR(server->client))
8596 +               rpc_shutdown_client(server->client);
8597 +       rpciod_down();          /* release rpciod */
8598 +
8599 +       destroy_nfsv4_state(server);
8600 +
8601 +       if (server->hostname != NULL)
8602 +               kfree(server->hostname);
8603 +       kfree(server);
8604  }
8605  
8606  static struct file_system_type nfs4_fs_type = {
8607 @@ -1821,9 +1876,13 @@
8608  extern int nfs_init_nfspagecache(void);
8609  extern void nfs_destroy_nfspagecache(void);
8610  extern int nfs_init_readpagecache(void);
8611 -extern int nfs_destroy_readpagecache(void);
8612 +extern void nfs_destroy_readpagecache(void);
8613  extern int nfs_init_writepagecache(void);
8614 -extern int nfs_destroy_writepagecache(void);
8615 +extern void nfs_destroy_writepagecache(void);
8616 +#ifdef CONFIG_NFS_DIRECTIO
8617 +extern int nfs_init_directcache(void);
8618 +extern void nfs_destroy_directcache(void);
8619 +#endif
8620  
8621  static kmem_cache_t * nfs_inode_cachep;
8622  
8623 @@ -1904,6 +1963,12 @@
8624         if (err)
8625                 goto out1;
8626  
8627 +#ifdef CONFIG_NFS_DIRECTIO
8628 +       err = nfs_init_directcache();
8629 +       if (err)
8630 +               goto out0;
8631 +#endif
8632 +
8633  #ifdef CONFIG_PROC_FS
8634         rpc_proc_register(&nfs_rpcstat);
8635  #endif
8636 @@ -1914,8 +1979,14 @@
8637                 goto out;
8638         return 0;
8639  out:
8640 +#ifdef CONFIG_PROC_FS
8641         rpc_proc_unregister("nfs");
8642 +#endif
8643         nfs_destroy_writepagecache();
8644 +#ifdef CONFIG_NFS_DIRECTIO
8645 +out0:
8646 +       nfs_destroy_directcache();
8647 +#endif
8648  out1:
8649         nfs_destroy_readpagecache();
8650  out2:
8651 @@ -1928,6 +1999,9 @@
8652  
8653  static void __exit exit_nfs_fs(void)
8654  {
8655 +#ifdef CONFIG_NFS_DIRECTIO
8656 +       nfs_destroy_directcache();
8657 +#endif
8658         nfs_destroy_writepagecache();
8659         nfs_destroy_readpagecache();
8660         nfs_destroy_inodecache();
8661 Index: linux-2.6.10/fs/nfs/nfs4state.c
8662 ===================================================================
8663 --- linux-2.6.10.orig/fs/nfs/nfs4state.c        2004-12-25 05:33:49.000000000 +0800
8664 +++ linux-2.6.10/fs/nfs/nfs4state.c     2005-04-05 14:49:13.446685112 +0800
8665 @@ -445,7 +445,7 @@
8666                 state->owner = owner;
8667                 atomic_inc(&owner->so_count);
8668                 list_add(&state->inode_states, &nfsi->open_states);
8669 -               state->inode = inode;
8670 +               state->inode = igrab(inode);
8671                 spin_unlock(&inode->i_lock);
8672         } else {
8673                 spin_unlock(&inode->i_lock);
8674 @@ -471,6 +471,7 @@
8675                 list_del(&state->inode_states);
8676         spin_unlock(&inode->i_lock);
8677         list_del(&state->open_states);
8678 +       iput(inode);
8679         BUG_ON (state->state != 0);
8680         nfs4_free_open_state(state);
8681         nfs4_put_state_owner(owner);
8682 @@ -486,7 +487,6 @@
8683         struct nfs4_state_owner *owner = state->owner;
8684         struct nfs4_client *clp = owner->so_client;
8685         int newstate;
8686 -       int status = 0;
8687  
8688         atomic_inc(&owner->so_count);
8689         down_read(&clp->cl_sem);
8690 @@ -508,10 +508,8 @@
8691                         newstate |= FMODE_WRITE;
8692                 if (state->state == newstate)
8693                         goto out;
8694 -               if (newstate != 0)
8695 -                       status = nfs4_do_downgrade(inode, state, newstate);
8696 -               else
8697 -                       status = nfs4_do_close(inode, state);
8698 +               if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
8699 +                       return;
8700         }
8701  out:
8702         nfs4_put_open_state(state);
8703 Index: linux-2.6.10/fs/nfs/idmap.c
8704 ===================================================================
8705 --- linux-2.6.10.orig/fs/nfs/idmap.c    2004-12-25 05:34:26.000000000 +0800
8706 +++ linux-2.6.10/fs/nfs/idmap.c 2005-04-05 14:49:13.454683896 +0800
8707 @@ -80,6 +80,7 @@
8708  static ssize_t   idmap_pipe_downcall(struct file *, const char __user *,
8709                      size_t);
8710  void             idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
8711 +static void    idmap_pipe_release(struct inode *inode);
8712  
8713  static unsigned int fnvhash32(const void *, size_t);
8714  
8715 @@ -87,6 +88,7 @@
8716          .upcall         = idmap_pipe_upcall,
8717          .downcall       = idmap_pipe_downcall,
8718          .destroy_msg    = idmap_pipe_destroy_msg,
8719 +       .release_pipe   = idmap_pipe_release,
8720  };
8721  
8722  void
8723 @@ -448,6 +450,19 @@
8724         up(&idmap->idmap_im_lock);
8725  }
8726  
8727 +static void
8728 +idmap_pipe_release(struct inode *inode)
8729 +{
8730 +       struct rpc_inode *rpci = RPC_I(inode);
8731 +       struct idmap *idmap = (struct idmap *)rpci->private;
8732 +       struct idmap_msg *im = &idmap->idmap_im;
8733 +
8734 +       down(&idmap->idmap_im_lock);
8735 +       im->im_status = IDMAP_STATUS_LOOKUPFAIL;
8736 +       wake_up(&idmap->idmap_wq);
8737 +       up(&idmap->idmap_im_lock);
8738 +}
8739 +
8740  /* 
8741   * Fowler/Noll/Vo hash
8742   *    http://www.isthe.com/chongo/tech/comp/fnv/
8743 Index: linux-2.6.10/fs/nfs/dir.c
8744 ===================================================================
8745 --- linux-2.6.10.orig/fs/nfs/dir.c      2005-03-31 15:35:26.000000000 +0800
8746 +++ linux-2.6.10/fs/nfs/dir.c   2005-04-05 14:49:13.439686176 +0800
8747 @@ -40,8 +40,6 @@
8748  static int nfs_opendir(struct inode *, struct file *);
8749  static int nfs_readdir(struct file *, void *, filldir_t);
8750  static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
8751 -static int nfs_cached_lookup(struct inode *, struct dentry *,
8752 -                               struct nfs_fh *, struct nfs_fattr *);
8753  static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
8754  static int nfs_mkdir(struct inode *, struct dentry *, int);
8755  static int nfs_rmdir(struct inode *, struct dentry *);
8756 @@ -92,6 +90,9 @@
8757         .permission     = nfs_permission,
8758         .getattr        = nfs_getattr,
8759         .setattr        = nfs_setattr,
8760 +       .getxattr       = nfs4_getxattr,
8761 +       .setxattr       = nfs4_setxattr,
8762 +       .listxattr      = nfs4_listxattr,
8763  };
8764  
8765  #endif /* CONFIG_NFS_V4 */
8766 @@ -294,24 +295,13 @@
8767         return res;
8768  }
8769  
8770 -static unsigned int nfs_type2dtype[] = {
8771 -       DT_UNKNOWN,
8772 -       DT_REG,
8773 -       DT_DIR,
8774 -       DT_BLK,
8775 -       DT_CHR,
8776 -       DT_LNK,
8777 -       DT_SOCK,
8778 -       DT_UNKNOWN,
8779 -       DT_FIFO
8780 -};
8781 -
8782 -static inline
8783 -unsigned int nfs_type_to_d_type(enum nfs_ftype type)
8784 +static inline unsigned int dt_type(struct inode *inode)
8785  {
8786 -       return nfs_type2dtype[type];
8787 +       return (inode->i_mode >> 12) & 15;
8788  }
8789  
8790 +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
8791 +
8792  /*
8793   * Once we've found the start of the dirent within a page: fill 'er up...
8794   */
8795 @@ -321,6 +311,7 @@
8796  {
8797         struct file     *file = desc->file;
8798         struct nfs_entry *entry = desc->entry;
8799 +       struct dentry   *dentry = NULL;
8800         unsigned long   fileid;
8801         int             loop_count = 0,
8802                         res;
8803 @@ -333,9 +324,16 @@
8804                  *       retrieving the current dirent on the server */
8805                 fileid = nfs_fileid_to_ino_t(entry->ino);
8806  
8807 +               /* Get a dentry if we have one */
8808 +               if (dentry != NULL)
8809 +                       dput(dentry);
8810 +               dentry = nfs_readdir_lookup(desc);
8811 +
8812                 /* Use readdirplus info */
8813 -               if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR))
8814 -                       d_type = nfs_type_to_d_type(entry->fattr->type);
8815 +               if (dentry != NULL && dentry->d_inode != NULL) {
8816 +                       d_type = dt_type(dentry->d_inode);
8817 +                       fileid = dentry->d_inode->i_ino;
8818 +               }
8819  
8820                 res = filldir(dirent, entry->name, entry->len, 
8821                               entry->prev_cookie, fileid, d_type);
8822 @@ -352,7 +350,8 @@
8823                 }
8824         }
8825         dir_page_release(desc);
8826 -
8827 +       if (dentry != NULL)
8828 +               dput(dentry);
8829         dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
8830         return res;
8831  }
8832 @@ -615,24 +614,10 @@
8833                 goto out_valid;
8834         }
8835  
8836 -       /*
8837 -        * Note: we're not holding inode->i_sem and so may be racing with
8838 -        * operations that change the directory. We therefore save the
8839 -        * change attribute *before* we do the RPC call.
8840 -        */
8841 -       verifier = nfs_save_change_attribute(dir);
8842 -       error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
8843 -       if (!error) {
8844 -               if (nfs_compare_fh(NFS_FH(inode), &fhandle))
8845 -                       goto out_bad;
8846 -               if (nfs_lookup_verify_inode(inode, isopen))
8847 -                       goto out_zap_parent;
8848 -               goto out_valid_renew;
8849 -       }
8850 -
8851         if (NFS_STALE(inode))
8852                 goto out_bad;
8853  
8854 +       verifier = nfs_save_change_attribute(dir);
8855         error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
8856         if (error)
8857                 goto out_bad;
8858 @@ -641,7 +626,6 @@
8859         if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
8860                 goto out_bad;
8861  
8862 - out_valid_renew:
8863         nfs_renew_times(dentry);
8864         nfs_set_verifier(dentry, verifier);
8865   out_valid:
8866 @@ -723,6 +707,7 @@
8867  
8868  static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
8869  {
8870 +       struct dentry *res;
8871         struct inode *inode = NULL;
8872         int error;
8873         struct nfs_fh fhandle;
8874 @@ -731,11 +716,11 @@
8875         dfprintk(VFS, "NFS: lookup(%s/%s)\n",
8876                 dentry->d_parent->d_name.name, dentry->d_name.name);
8877  
8878 -       error = -ENAMETOOLONG;
8879 +       res = ERR_PTR(-ENAMETOOLONG);
8880         if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
8881                 goto out;
8882  
8883 -       error = -ENOMEM;
8884 +       res = ERR_PTR(-ENOMEM);
8885         dentry->d_op = NFS_PROTO(dir)->dentry_ops;
8886  
8887         lock_kernel();
8888 @@ -746,29 +731,27 @@
8889         if (nfs_is_exclusive_create(dir, nd))
8890                 goto no_entry;
8891  
8892 -       error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
8893 -       if (error != 0) {
8894 -               error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name,
8895 -                               &fhandle, &fattr);
8896 -               if (error == -ENOENT)
8897 -                       goto no_entry;
8898 -               if (error != 0)
8899 -                       goto out_unlock;
8900 +       error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
8901 +       if (error == -ENOENT)
8902 +               goto no_entry;
8903 +       if (error < 0) {
8904 +               res = ERR_PTR(error);
8905 +               goto out_unlock;
8906         }
8907 -       error = -EACCES;
8908 +       res = ERR_PTR(-EACCES);
8909         inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
8910         if (!inode)
8911                 goto out_unlock;
8912  no_entry:
8913 -       error = 0;
8914 -       d_add(dentry, inode);
8915 +       res = d_add_unique(dentry, inode);
8916 +       if (res != NULL)
8917 +               dentry = res;
8918         nfs_renew_times(dentry);
8919         nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
8920  out_unlock:
8921         unlock_kernel();
8922  out:
8923 -       BUG_ON(error > 0);
8924 -       return ERR_PTR(error);
8925 +       return res;
8926  }
8927  
8928  #ifdef CONFIG_NFS_V4
8929 @@ -798,15 +781,15 @@
8930  
8931  static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
8932  {
8933 +       struct dentry *res = NULL;
8934         struct inode *inode = NULL;
8935 -       int error = 0;
8936  
8937         /* Check that we are indeed trying to open this file */
8938         if (!is_atomic_open(dir, nd))
8939                 goto no_open;
8940  
8941         if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
8942 -               error = -ENAMETOOLONG;
8943 +               res = ERR_PTR(-ENAMETOOLONG);
8944                 goto out;
8945         }
8946         dentry->d_op = NFS_PROTO(dir)->dentry_ops;
8947 @@ -828,7 +811,7 @@
8948                 inode = nfs4_atomic_open(dir, dentry, nd);
8949         unlock_kernel();
8950         if (IS_ERR(inode)) {
8951 -               error = PTR_ERR(inode);
8952 +               int error = PTR_ERR(inode);
8953                 switch (error) {
8954                         /* Make a negative dentry */
8955                         case -ENOENT:
8956 @@ -841,16 +824,18 @@
8957                         /* case -EISDIR: */
8958                         /* case -EINVAL: */
8959                         default:
8960 +                               res = ERR_PTR(error);
8961                                 goto out;
8962                 }
8963         }
8964  no_entry:
8965 -       d_add(dentry, inode);
8966 +       res = d_add_unique(dentry, inode);
8967 +       if (res != NULL)
8968 +               dentry = res;
8969         nfs_renew_times(dentry);
8970         nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
8971  out:
8972 -       BUG_ON(error > 0);
8973 -       return ERR_PTR(error);
8974 +       return res;
8975  no_open:
8976         return nfs_lookup(dir, dentry, nd);
8977  }
8978 @@ -906,83 +891,51 @@
8979  }
8980  #endif /* CONFIG_NFSV4 */
8981  
8982 -static inline
8983 -int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry)
8984 +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
8985  {
8986 +       struct dentry *parent = desc->file->f_dentry;
8987 +       struct inode *dir = parent->d_inode;
8988         struct nfs_entry *entry = desc->entry;
8989 -       int              status;
8990 -
8991 -       while((status = dir_decode(desc)) == 0) {
8992 -               if (entry->len != dentry->d_name.len)
8993 -                       continue;
8994 -               if (memcmp(entry->name, dentry->d_name.name, entry->len))
8995 -                       continue;
8996 -               if (!(entry->fattr->valid & NFS_ATTR_FATTR))
8997 -                       continue;
8998 -               break;
8999 -       }
9000 -       return status;
9001 -}
9002 -
9003 -/*
9004 - * Use the cached Readdirplus results in order to avoid a LOOKUP call
9005 - * whenever we believe that the parent directory has not changed.
9006 - *
9007 - * We assume that any file creation/rename changes the directory mtime.
9008 - * As this results in a page cache invalidation whenever it occurs,
9009 - * we don't require any other tests for cache coherency.
9010 - */
9011 -static
9012 -int nfs_cached_lookup(struct inode *dir, struct dentry *dentry,
9013 -                       struct nfs_fh *fh, struct nfs_fattr *fattr)
9014 -{
9015 -       nfs_readdir_descriptor_t desc;
9016 -       struct nfs_server *server;
9017 -       struct nfs_entry entry;
9018 -       struct page *page;
9019 -       unsigned long timestamp;
9020 -       int res;
9021 -
9022 -       if (!NFS_USE_READDIRPLUS(dir))
9023 -               return -ENOENT;
9024 -       server = NFS_SERVER(dir);
9025 -       /* Don't use readdirplus unless the cache is stable */
9026 -       if ((server->flags & NFS_MOUNT_NOAC) != 0
9027 -                       || nfs_caches_unstable(dir)
9028 -                       || nfs_attribute_timeout(dir))
9029 -               return -ENOENT;
9030 -       if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0)
9031 -               return -ENOENT;
9032 -       timestamp = NFS_I(dir)->readdir_timestamp;
9033 -
9034 -       entry.fh = fh;
9035 -       entry.fattr = fattr;
9036 -
9037 -       desc.decode = NFS_PROTO(dir)->decode_dirent;
9038 -       desc.entry = &entry;
9039 -       desc.page_index = 0;
9040 -       desc.plus = 1;
9041 -
9042 -       for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) {
9043 -
9044 -               res = -EIO;
9045 -               if (PageUptodate(page)) {
9046 -                       void * kaddr = kmap_atomic(page, KM_USER0);
9047 -                       desc.ptr = kaddr;
9048 -                       res = find_dirent_name(&desc, page, dentry);
9049 -                       kunmap_atomic(kaddr, KM_USER0);
9050 -               }
9051 -               page_cache_release(page);
9052 +       struct dentry *dentry, *alias;
9053 +       struct qstr name = {
9054 +               .name = entry->name,
9055 +               .len = entry->len,
9056 +       };
9057 +       struct inode *inode;
9058  
9059 -               if (res == 0)
9060 -                       goto out_found;
9061 -               if (res != -EAGAIN)
9062 +       switch (name.len) {
9063 +               case 2:
9064 +                       if (name.name[0] == '.' && name.name[1] == '.')
9065 +                               return dget_parent(parent);
9066                         break;
9067 +               case 1:
9068 +                       if (name.name[0] == '.')
9069 +                               return dget(parent);
9070 +       }
9071 +       name.hash = full_name_hash(name.name, name.len);
9072 +       dentry = d_lookup(parent, &name);
9073 +       if (dentry != NULL)
9074 +               return dentry;
9075 +       if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
9076 +               return NULL;
9077 +       /* Note: caller is already holding the dir->i_sem! */
9078 +       dentry = d_alloc(parent, &name);
9079 +       if (dentry == NULL)
9080 +               return NULL;
9081 +       dentry->d_op = NFS_PROTO(dir)->dentry_ops;
9082 +       inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
9083 +       if (!inode) {
9084 +               dput(dentry);
9085 +               return NULL;
9086         }
9087 -       return -ENOENT;
9088 - out_found:
9089 -       fattr->timestamp = timestamp;
9090 -       return 0;
9091 +       alias = d_add_unique(dentry, inode);
9092 +       if (alias != NULL) {
9093 +               dput(dentry);
9094 +               dentry = alias;
9095 +       }
9096 +       nfs_renew_times(dentry);
9097 +       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
9098 +       return dentry;
9099  }
9100  
9101  /*
9102 @@ -1045,15 +998,9 @@
9103         if (nd && (nd->flags & LOOKUP_CREATE))
9104                 open_flags = nd->intent.open.flags;
9105  
9106 -       /*
9107 -        * The 0 argument passed into the create function should one day
9108 -        * contain the O_EXCL flag if requested. This allows NFSv3 to
9109 -        * select the appropriate create strategy. Currently open_namei
9110 -        * does not pass the create flags.
9111 -        */
9112         lock_kernel();
9113         nfs_begin_data_update(dir);
9114 -       inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags);
9115 +       inode = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
9116         nfs_end_data_update(dir);
9117         if (!IS_ERR(inode)) {
9118                 d_instantiate(dentry, inode);
9119 @@ -1508,7 +1455,7 @@
9120  
9121         if (cache->cred != cred
9122                         || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
9123 -                       || (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR))
9124 +                       || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
9125                 return -ENOENT;
9126         memcpy(res, cache, sizeof(*res));
9127         return 0;
9128 @@ -1522,6 +1469,7 @@
9129                 if (cache->cred)
9130                         put_rpccred(cache->cred);
9131                 cache->cred = get_rpccred(set->cred);
9132 +               NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
9133         }
9134         cache->jiffies = set->jiffies;
9135         cache->mask = set->mask;
9136 Index: linux-2.6.10/fs/nfs/unlink.c
9137 ===================================================================
9138 --- linux-2.6.10.orig/fs/nfs/unlink.c   2004-12-25 05:35:29.000000000 +0800
9139 +++ linux-2.6.10/fs/nfs/unlink.c        2005-04-05 14:49:13.435686784 +0800
9140 @@ -215,7 +215,6 @@
9141         spin_lock(&dentry->d_lock);
9142         dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
9143         spin_unlock(&dentry->d_lock);
9144 -       if (data->task.tk_rpcwait == &nfs_delete_queue)
9145 -               rpc_wake_up_task(&data->task);
9146 +       rpc_wake_up_task(&data->task);
9147         nfs_put_unlinkdata(data);
9148  }
9149 Index: linux-2.6.10/fs/nfs/write.c
9150 ===================================================================
9151 --- linux-2.6.10.orig/fs/nfs/write.c    2004-12-25 05:35:23.000000000 +0800
9152 +++ linux-2.6.10/fs/nfs/write.c 2005-04-05 14:49:13.443685568 +0800
9153 @@ -61,7 +61,6 @@
9154  #include <linux/nfs_page.h>
9155  #include <asm/uaccess.h>
9156  #include <linux/smp_lock.h>
9157 -#include <linux/mempool.h>
9158  
9159  #include "delegation.h"
9160  
9161 @@ -83,49 +82,17 @@
9162  static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
9163  
9164  static kmem_cache_t *nfs_wdata_cachep;
9165 -static mempool_t *nfs_wdata_mempool;
9166 -static mempool_t *nfs_commit_mempool;
9167 +mempool_t *nfs_wdata_mempool;
9168 +mempool_t *nfs_commit_mempool;
9169  
9170  static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
9171  
9172 -static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
9173 -{
9174 -       struct nfs_write_data   *p;
9175 -       p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
9176 -       if (p) {
9177 -               memset(p, 0, sizeof(*p));
9178 -               INIT_LIST_HEAD(&p->pages);
9179 -       }
9180 -       return p;
9181 -}
9182 -
9183 -static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
9184 -{
9185 -       mempool_free(p, nfs_wdata_mempool);
9186 -}
9187 -
9188 -static void nfs_writedata_release(struct rpc_task *task)
9189 +void nfs_writedata_release(struct rpc_task *task)
9190  {
9191         struct nfs_write_data   *wdata = (struct nfs_write_data *)task->tk_calldata;
9192         nfs_writedata_free(wdata);
9193  }
9194  
9195 -static __inline__ struct nfs_write_data *nfs_commit_alloc(void)
9196 -{
9197 -       struct nfs_write_data   *p;
9198 -       p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
9199 -       if (p) {
9200 -               memset(p, 0, sizeof(*p));
9201 -               INIT_LIST_HEAD(&p->pages);
9202 -       }
9203 -       return p;
9204 -}
9205 -
9206 -static __inline__ void nfs_commit_free(struct nfs_write_data *p)
9207 -{
9208 -       mempool_free(p, nfs_commit_mempool);
9209 -}
9210 -
9211  /* Adjust the file length if we're writing beyond the end */
9212  static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
9213  {
9214 @@ -184,11 +151,10 @@
9215         int             result, written = 0;
9216         struct nfs_write_data *wdata;
9217  
9218 -       wdata = kmalloc(sizeof(*wdata), GFP_NOFS);
9219 +       wdata = nfs_writedata_alloc();
9220         if (!wdata)
9221                 return -ENOMEM;
9222  
9223 -       memset(wdata, 0, sizeof(*wdata));
9224         wdata->flags = how;
9225         wdata->cred = ctx->cred;
9226         wdata->inode = inode;
9227 @@ -238,8 +204,7 @@
9228  
9229  io_error:
9230         nfs_end_data_update_defer(inode);
9231 -
9232 -       kfree(wdata);
9233 +       nfs_writedata_free(wdata);
9234         return written ? written : result;
9235  }
9236  
9237 @@ -1199,7 +1164,8 @@
9238                 }
9239                 if (time_before(complain, jiffies)) {
9240                         printk(KERN_WARNING
9241 -                              "NFS: Server wrote less than requested.\n");
9242 +                              "NFS: Server wrote zero bytes, expected %u.\n",
9243 +                                       argp->count);
9244                         complain = jiffies + 300 * HZ;
9245                 }
9246                 /* Can't do anything about it except throw an error. */
9247 Index: linux-2.6.10/fs/nfs/proc.c
9248 ===================================================================
9249 --- linux-2.6.10.orig/fs/nfs/proc.c     2004-12-25 05:35:28.000000000 +0800
9250 +++ linux-2.6.10/fs/nfs/proc.c  2005-04-05 14:49:13.440686024 +0800
9251 @@ -63,12 +63,12 @@
9252         dprintk("%s: call getattr\n", __FUNCTION__);
9253         fattr->valid = 0;
9254         status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
9255 -       dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
9256 +       dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
9257         if (status)
9258                 return status;
9259         dprintk("%s: call statfs\n", __FUNCTION__);
9260         status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
9261 -       dprintk("%s: reply statfs %d\n", __FUNCTION__, status);
9262 +       dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
9263         if (status)
9264                 return status;
9265         info->rtmax  = NFS_MAXDATA;
9266 @@ -96,7 +96,7 @@
9267         fattr->valid = 0;
9268         status = rpc_call(server->client, NFSPROC_GETATTR,
9269                                 fhandle, fattr, 0);
9270 -       dprintk("NFS reply getattr\n");
9271 +       dprintk("NFS reply getattr: %d\n", status);
9272         return status;
9273  }
9274  
9275 @@ -114,7 +114,7 @@
9276         dprintk("NFS call  setattr\n");
9277         fattr->valid = 0;
9278         status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
9279 -       dprintk("NFS reply setattr\n");
9280 +       dprintk("NFS reply setattr: %d\n", status);
9281         return status;
9282  }
9283  
9284 @@ -213,15 +213,15 @@
9285  }
9286  
9287  static struct inode *
9288 -nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
9289 +nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
9290                 int flags)
9291  {
9292         struct nfs_fh           fhandle;
9293         struct nfs_fattr        fattr;
9294         struct nfs_createargs   arg = {
9295                 .fh             = NFS_FH(dir),
9296 -               .name           = name->name,
9297 -               .len            = name->len,
9298 +               .name           = dentry->d_name.name,
9299 +               .len            = dentry->d_name.len,
9300                 .sattr          = sattr
9301         };
9302         struct nfs_diropok      res = {
9303 @@ -231,7 +231,7 @@
9304         int                     status;
9305  
9306         fattr.valid = 0;
9307 -       dprintk("NFS call  create %s\n", name->name);
9308 +       dprintk("NFS call  create %s\n", dentry->d_name.name);
9309         status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
9310         dprintk("NFS reply create: %d\n", status);
9311         if (status == 0) {
9312 @@ -620,6 +620,7 @@
9313         .version        = 2,                   /* protocol version */
9314         .dentry_ops     = &nfs_dentry_operations,
9315         .dir_inode_ops  = &nfs_dir_inode_operations,
9316 +       .file_inode_ops = &nfs_file_inode_operations,
9317         .getroot        = nfs_proc_get_root,
9318         .getattr        = nfs_proc_getattr,
9319         .setattr        = nfs_proc_setattr,
9320 Index: linux-2.6.10/fs/nfs/callback.c
9321 ===================================================================
9322 --- linux-2.6.10.orig/fs/nfs/callback.c 2004-12-25 05:34:57.000000000 +0800
9323 +++ linux-2.6.10/fs/nfs/callback.c      2005-04-05 14:49:13.436686632 +0800
9324 @@ -139,133 +139,10 @@
9325         return ret;
9326  }
9327  
9328 -/*
9329 - * AUTH_NULL authentication
9330 - */
9331 -static int nfs_callback_null_accept(struct svc_rqst *rqstp, u32 *authp)
9332 -{
9333 -       struct kvec    *argv = &rqstp->rq_arg.head[0];
9334 -       struct kvec    *resv = &rqstp->rq_res.head[0];
9335 -
9336 -       if (argv->iov_len < 3*4)
9337 -               return SVC_GARBAGE;
9338 -
9339 -       if (svc_getu32(argv) != 0) {
9340 -               dprintk("svc: bad null cred\n");
9341 -               *authp = rpc_autherr_badcred;
9342 -               return SVC_DENIED;
9343 -       }
9344 -       if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
9345 -               dprintk("svc: bad null verf\n");
9346 -                *authp = rpc_autherr_badverf;
9347 -                return SVC_DENIED;
9348 -       }
9349 -
9350 -       /* Signal that mapping to nobody uid/gid is required */
9351 -       rqstp->rq_cred.cr_uid = (uid_t) -1;
9352 -       rqstp->rq_cred.cr_gid = (gid_t) -1;
9353 -       rqstp->rq_cred.cr_group_info = groups_alloc(0);
9354 -       if (rqstp->rq_cred.cr_group_info == NULL)
9355 -               return SVC_DROP; /* kmalloc failure - client must retry */
9356 -
9357 -       /* Put NULL verifier */
9358 -       svc_putu32(resv, RPC_AUTH_NULL);
9359 -       svc_putu32(resv, 0);
9360 -       dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK);
9361 -       return SVC_OK;
9362 -}
9363 -
9364 -static int nfs_callback_null_release(struct svc_rqst *rqstp)
9365 -{
9366 -       if (rqstp->rq_cred.cr_group_info)
9367 -               put_group_info(rqstp->rq_cred.cr_group_info);
9368 -       rqstp->rq_cred.cr_group_info = NULL;
9369 -       return 0; /* don't drop */
9370 -}
9371 -
9372 -static struct auth_ops nfs_callback_auth_null = {
9373 -       .name = "null",
9374 -       .flavour = RPC_AUTH_NULL,
9375 -       .accept = nfs_callback_null_accept,
9376 -       .release = nfs_callback_null_release,
9377 -};
9378 -
9379 -/*
9380 - * AUTH_SYS authentication
9381 - */
9382 -static int nfs_callback_unix_accept(struct svc_rqst *rqstp, u32 *authp)
9383 -{
9384 -       struct kvec    *argv = &rqstp->rq_arg.head[0];
9385 -       struct kvec    *resv = &rqstp->rq_res.head[0];
9386 -       struct svc_cred *cred = &rqstp->rq_cred;
9387 -       u32 slen, i;
9388 -       int len = argv->iov_len;
9389 -
9390 -       dprintk("%s: start\n", __FUNCTION__);
9391 -       cred->cr_group_info = NULL;
9392 -       rqstp->rq_client = NULL;
9393 -       if ((len -= 3*4) < 0)
9394 -               return SVC_GARBAGE;
9395 -
9396 -       /* Get length, time stamp and machine name */
9397 -       svc_getu32(argv);
9398 -       svc_getu32(argv);
9399 -       slen = XDR_QUADLEN(ntohl(svc_getu32(argv)));
9400 -       if (slen > 64 || (len -= (slen + 3)*4) < 0)
9401 -               goto badcred;
9402 -       argv->iov_base = (void*)((u32*)argv->iov_base + slen);
9403 -       argv->iov_len -= slen*4;
9404 -
9405 -       cred->cr_uid = ntohl(svc_getu32(argv));
9406 -       cred->cr_gid = ntohl(svc_getu32(argv));
9407 -       slen = ntohl(svc_getu32(argv));
9408 -       if (slen > 16 || (len -= (slen + 2)*4) < 0)
9409 -               goto badcred;
9410 -       cred->cr_group_info = groups_alloc(slen);
9411 -       if (cred->cr_group_info == NULL)
9412 -               return SVC_DROP;
9413 -       for (i = 0; i < slen; i++)
9414 -               GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv));
9415 -
9416 -       if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
9417 -               *authp = rpc_autherr_badverf;
9418 -               return SVC_DENIED;
9419 -       }
9420 -       /* Put NULL verifier */
9421 -       svc_putu32(resv, RPC_AUTH_NULL);
9422 -       svc_putu32(resv, 0);
9423 -       dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK);
9424 -       return SVC_OK;
9425 -badcred:
9426 -       *authp = rpc_autherr_badcred;
9427 -       return SVC_DENIED;
9428 -}
9429 -
9430 -static int nfs_callback_unix_release(struct svc_rqst *rqstp)
9431 -{
9432 -       if (rqstp->rq_cred.cr_group_info)
9433 -               put_group_info(rqstp->rq_cred.cr_group_info);
9434 -       rqstp->rq_cred.cr_group_info = NULL;
9435 -       return 0;
9436 -}
9437 -
9438 -static struct auth_ops nfs_callback_auth_unix = {
9439 -       .name = "unix",
9440 -       .flavour = RPC_AUTH_UNIX,
9441 -       .accept = nfs_callback_unix_accept,
9442 -       .release = nfs_callback_unix_release,
9443 -};
9444 -
9445 -/*
9446 - * Hook the authentication protocol
9447 - */
9448 -static int nfs_callback_auth(struct svc_rqst *rqstp, u32 *authp)
9449 +static int nfs_callback_authenticate(struct svc_rqst *rqstp)
9450  {
9451         struct in_addr *addr = &rqstp->rq_addr.sin_addr;
9452         struct nfs4_client *clp;
9453 -       struct kvec *argv = &rqstp->rq_arg.head[0];
9454 -       int flavour;
9455 -       int retval;
9456  
9457         /* Don't talk to strangers */
9458         clp = nfs4_find_client(addr);
9459 @@ -273,34 +150,19 @@
9460                 return SVC_DROP;
9461         dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
9462         nfs4_put_client(clp);
9463 -       flavour = ntohl(svc_getu32(argv));
9464 -       switch(flavour) {
9465 +       switch (rqstp->rq_authop->flavour) {
9466                 case RPC_AUTH_NULL:
9467 -                       if (rqstp->rq_proc != CB_NULL) {
9468 -                               *authp = rpc_autherr_tooweak;
9469 -                               retval = SVC_DENIED;
9470 -                               break;
9471 -                       }
9472 -                       rqstp->rq_authop = &nfs_callback_auth_null;
9473 -                       retval = nfs_callback_null_accept(rqstp, authp);
9474 +                       if (rqstp->rq_proc != CB_NULL)
9475 +                               return SVC_DENIED;
9476                         break;
9477                 case RPC_AUTH_UNIX:
9478 -                       /* Eat the authentication flavour */
9479 -                       rqstp->rq_authop = &nfs_callback_auth_unix;
9480 -                       retval = nfs_callback_unix_accept(rqstp, authp);
9481                         break;
9482 +               case RPC_AUTH_GSS:
9483 +                       /* FIXME: RPCSEC_GSS handling? */
9484                 default:
9485 -                       /* FIXME: need to add RPCSEC_GSS upcalls */
9486 -#if 0
9487 -                       svc_ungetu32(argv);
9488 -                       retval = svc_authenticate(rqstp, authp);
9489 -#else
9490 -                       *authp = rpc_autherr_rejectedcred;
9491 -                       retval = SVC_DENIED;
9492 -#endif
9493 +                       return SVC_DENIED;
9494         }
9495 -       dprintk("%s: flavour %d returning error %d\n", __FUNCTION__, flavour, retval);
9496 -       return retval;
9497 +       return SVC_OK;
9498  }
9499  
9500  /*
9501 @@ -321,5 +183,5 @@
9502         .pg_name = "NFSv4 callback",                    /* service name */
9503         .pg_class = "nfs",                              /* authentication class */
9504         .pg_stats = &nfs4_callback_stats,
9505 -       .pg_authenticate = nfs_callback_auth,
9506 +       .pg_authenticate = nfs_callback_authenticate,
9507  };
9508 Index: linux-2.6.10/fs/nfs/file.c
9509 ===================================================================
9510 --- linux-2.6.10.orig/fs/nfs/file.c     2004-12-25 05:35:01.000000000 +0800
9511 +++ linux-2.6.10/fs/nfs/file.c  2005-04-05 14:49:13.453684048 +0800
9512 @@ -67,6 +67,19 @@
9513         .setattr        = nfs_setattr,
9514  };
9515  
9516 +#ifdef CONFIG_NFS_V4
9517 +
9518 +struct inode_operations nfs4_file_inode_operations = {
9519 +       .permission     = nfs_permission,
9520 +       .getattr        = nfs_getattr,
9521 +       .setattr        = nfs_setattr,
9522 +       .getxattr       = nfs4_getxattr,
9523 +       .setxattr       = nfs4_setxattr,
9524 +       .listxattr      = nfs4_listxattr,
9525 +};
9526 +
9527 +#endif /* CONFIG_NFS_V4 */
9528 +
9529  /* Hack for future NFS swap support */
9530  #ifndef IS_SWAPFILE
9531  # define IS_SWAPFILE(inode)    (0)
9532 @@ -295,10 +308,19 @@
9533  static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
9534  {
9535         struct inode *inode = filp->f_mapping->host;
9536 -       int status;
9537 +       int status = 0;
9538  
9539         lock_kernel();
9540 -       status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9541 +       /* Use local locking if mounted with "-onolock" */
9542 +       if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
9543 +               status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9544 +       else {
9545 +               struct file_lock *cfl = posix_test_lock(filp, fl);
9546 +               if (cfl != NULL) {
9547 +                       memcpy(fl, cfl, sizeof(*fl));
9548 +                       fl->fl_type = F_UNLCK;
9549 +               }
9550 +       }
9551         unlock_kernel();
9552         return status;
9553  }
9554 @@ -325,7 +347,11 @@
9555          *      still need to complete the unlock.
9556          */
9557         lock_kernel();
9558 -       status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9559 +       /* Use local locking if mounted with "-onolock" */
9560 +       if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
9561 +               status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9562 +       else
9563 +               status = posix_lock_file_wait(filp, fl);
9564         rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
9565         return status;
9566  }
9567 @@ -351,15 +377,19 @@
9568                 return status;
9569  
9570         lock_kernel();
9571 -       status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9572 -       /* If we were signalled we still need to ensure that
9573 -        * we clean up any state on the server. We therefore
9574 -        * record the lock call as having succeeded in order to
9575 -        * ensure that locks_remove_posix() cleans it out when
9576 -        * the process exits.
9577 -        */
9578 -       if (status == -EINTR || status == -ERESTARTSYS)
9579 -               posix_lock_file(filp, fl);
9580 +       /* Use local locking if mounted with "-onolock" */
9581 +       if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
9582 +               status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9583 +               /* If we were signalled we still need to ensure that
9584 +                * we clean up any state on the server. We therefore
9585 +                * record the lock call as having succeeded in order to
9586 +                * ensure that locks_remove_posix() cleans it out when
9587 +                * the process exits.
9588 +                */
9589 +               if (status == -EINTR || status == -ERESTARTSYS)
9590 +                       posix_lock_file(filp, fl);
9591 +       } else
9592 +               status = posix_lock_file_wait(filp, fl);
9593         unlock_kernel();
9594         if (status < 0)
9595                 return status;
9596 @@ -396,15 +426,6 @@
9597         if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
9598                 return -ENOLCK;
9599  
9600 -       if (NFS_PROTO(inode)->version != 4) {
9601 -               /* Fake OK code if mounted without NLM support */
9602 -               if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) {
9603 -                       if (IS_GETLK(cmd))
9604 -                               return LOCK_USE_CLNT;
9605 -                       return 0;
9606 -               }
9607 -       }
9608 -
9609         /*
9610          * No BSD flocks over NFS allowed.
9611          * Note: we could try to fake a POSIX lock request here by
9612 Index: linux-2.6.10/fs/nfs/nfs3proc.c
9613 ===================================================================
9614 --- linux-2.6.10.orig/fs/nfs/nfs3proc.c 2004-12-25 05:34:45.000000000 +0800
9615 +++ linux-2.6.10/fs/nfs/nfs3proc.c      2005-04-05 14:49:13.441685872 +0800
9616 @@ -80,10 +80,10 @@
9617         dprintk("%s: call  fsinfo\n", __FUNCTION__);
9618         info->fattr->valid = 0;
9619         status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
9620 -       dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status);
9621 +       dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
9622         if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
9623                 status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
9624 -               dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
9625 +               dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
9626         }
9627         return status;
9628  }
9629 @@ -101,7 +101,7 @@
9630         fattr->valid = 0;
9631         status = rpc_call(server->client, NFS3PROC_GETATTR,
9632                           fhandle, fattr, 0);
9633 -       dprintk("NFS reply getattr\n");
9634 +       dprintk("NFS reply getattr: %d\n", status);
9635         return status;
9636  }
9637  
9638 @@ -119,7 +119,7 @@
9639         dprintk("NFS call  setattr\n");
9640         fattr->valid = 0;
9641         status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
9642 -       dprintk("NFS reply setattr\n");
9643 +       dprintk("NFS reply setattr: %d\n", status);
9644         return status;
9645  }
9646  
9647 @@ -198,7 +198,7 @@
9648                 if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
9649                         entry->mask |= MAY_EXEC;
9650         }
9651 -       dprintk("NFS reply access, status = %d\n", status);
9652 +       dprintk("NFS reply access: %d\n", status);
9653         return status;
9654  }
9655  
9656 @@ -296,7 +296,7 @@
9657   * For now, we don't implement O_EXCL.
9658   */
9659  static struct inode *
9660 -nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
9661 +nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
9662                  int flags)
9663  {
9664         struct nfs_fh           fhandle;
9665 @@ -304,8 +304,8 @@
9666         struct nfs_fattr        dir_attr;
9667         struct nfs3_createargs  arg = {
9668                 .fh             = NFS_FH(dir),
9669 -               .name           = name->name,
9670 -               .len            = name->len,
9671 +               .name           = dentry->d_name.name,
9672 +               .len            = dentry->d_name.len,
9673                 .sattr          = sattr,
9674         };
9675         struct nfs3_diropres    res = {
9676 @@ -315,7 +315,7 @@
9677         };
9678         int                     status;
9679  
9680 -       dprintk("NFS call  create %s\n", name->name);
9681 +       dprintk("NFS call  create %s\n", dentry->d_name.name);
9682         arg.createmode = NFS3_CREATE_UNCHECKED;
9683         if (flags & O_EXCL) {
9684                 arg.createmode  = NFS3_CREATE_EXCLUSIVE;
9685 @@ -353,7 +353,7 @@
9686         if (status != 0)
9687                 goto out;
9688         if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) {
9689 -               status = nfs3_proc_lookup(dir, name, &fhandle, &fattr);
9690 +               status = nfs3_proc_lookup(dir, &dentry->d_name, &fhandle, &fattr);
9691                 if (status != 0)
9692                         goto out;
9693         }
9694 @@ -838,6 +838,7 @@
9695         .version        = 3,                    /* protocol version */
9696         .dentry_ops     = &nfs_dentry_operations,
9697         .dir_inode_ops  = &nfs_dir_inode_operations,
9698 +       .file_inode_ops = &nfs_file_inode_operations,
9699         .getroot        = nfs3_proc_get_root,
9700         .getattr        = nfs3_proc_getattr,
9701         .setattr        = nfs3_proc_setattr,
9702 Index: linux-2.6.10/fs/nfs/nfs4proc.c
9703 ===================================================================
9704 --- linux-2.6.10.orig/fs/nfs/nfs4proc.c 2004-12-25 05:35:23.000000000 +0800
9705 +++ linux-2.6.10/fs/nfs/nfs4proc.c      2005-04-05 14:49:13.456683592 +0800
9706 @@ -477,7 +477,7 @@
9707  /*
9708   * Returns an nfs4_state + an referenced inode
9709   */
9710 -static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
9711 +static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
9712  {
9713         struct nfs4_state_owner  *sp;
9714         struct nfs4_state     *state = NULL;
9715 @@ -491,7 +491,7 @@
9716         struct nfs_openargs o_arg = {
9717                 .fh             = NFS_FH(dir),
9718                 .open_flags     = flags,
9719 -               .name           = name,
9720 +               .name           = &dentry->d_name,
9721                 .server         = server,
9722                 .bitmask = server->attr_bitmask,
9723                 .claim = NFS4_OPEN_CLAIM_NULL,
9724 @@ -581,14 +581,14 @@
9725  }
9726  
9727  
9728 -struct nfs4_state *nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred)
9729 +struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
9730  {
9731         struct nfs4_exception exception = { };
9732         struct nfs4_state *res;
9733         int status;
9734  
9735         do {
9736 -               status = _nfs4_do_open(dir, name, flags, sattr, cred, &res);
9737 +               status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
9738                 if (status == 0)
9739                         break;
9740                 /* NOTE: BAD_SEQID means the server and client disagree about the
9741 @@ -635,6 +635,8 @@
9742  
9743          fattr->valid = 0;
9744  
9745 +       if (state != NULL)
9746 +               msg.rpc_cred = state->owner->so_cred;
9747         if (sattr->ia_valid & ATTR_SIZE)
9748                 nfs4_copy_stateid(&arg.stateid, state, NULL);
9749         else
9750 @@ -658,6 +660,61 @@
9751         return err;
9752  }
9753  
9754 +struct nfs4_closedata {
9755 +       struct inode *inode;
9756 +       struct nfs4_state *state;
9757 +       struct nfs_closeargs arg;
9758 +       struct nfs_closeres res;
9759 +};
9760 +
9761 +static void nfs4_close_done(struct rpc_task *task)
9762 +{
9763 +       struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
9764 +       struct nfs4_state *state = calldata->state;
9765 +       struct nfs4_state_owner *sp = state->owner;
9766 +       struct nfs_server *server = NFS_SERVER(calldata->inode);
9767 +
9768 +        /* hmm. we are done with the inode, and in the process of freeing
9769 +        * the state_owner. we keep this around to process errors
9770 +        */
9771 +       nfs4_increment_seqid(task->tk_status, sp);
9772 +       switch (task->tk_status) {
9773 +               case 0:
9774 +                       state->state = calldata->arg.open_flags;
9775 +                       memcpy(&state->stateid, &calldata->res.stateid,
9776 +                                       sizeof(state->stateid));
9777 +                       break;
9778 +               case -NFS4ERR_STALE_STATEID:
9779 +               case -NFS4ERR_EXPIRED:
9780 +                       state->state = calldata->arg.open_flags;
9781 +                       nfs4_schedule_state_recovery(server->nfs4_state);
9782 +                       break;
9783 +               default:
9784 +                       if (nfs4_async_handle_error(task, server) == -EAGAIN) {
9785 +                               rpc_restart_call(task);
9786 +                               return;
9787 +                       }
9788 +       }
9789 +       nfs4_put_open_state(state);
9790 +       up(&sp->so_sema);
9791 +       nfs4_put_state_owner(sp);
9792 +       up_read(&server->nfs4_state->cl_sem);
9793 +       kfree(calldata);
9794 +}
9795 +
9796 +static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
9797 +{
9798 +       struct rpc_message msg = {
9799 +               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
9800 +               .rpc_argp = &calldata->arg,
9801 +               .rpc_resp = &calldata->res,
9802 +               .rpc_cred = calldata->state->owner->so_cred,
9803 +       };
9804 +       if (calldata->arg.open_flags != 0)
9805 +               msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
9806 +       return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
9807 +}
9808 +
9809  /* 
9810   * It is possible for data to be read/written from a mem-mapped file 
9811   * after the sys_close call (which hits the vfs layer as a flush).
9812 @@ -669,102 +726,34 @@
9813   *
9814   * NOTE: Caller must be holding the sp->so_owner semaphore!
9815   */
9816 -static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
9817 +int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
9818  {
9819 -       struct nfs4_state_owner *sp = state->owner;
9820 -       int status = 0;
9821 -       struct nfs_closeargs arg = {
9822 -               .fh             = NFS_FH(inode),
9823 -       };
9824 -       struct nfs_closeres res;
9825 -       struct rpc_message msg = {
9826 -               .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
9827 -               .rpc_argp       = &arg,
9828 -               .rpc_resp       = &res,
9829 -       };
9830 +       struct nfs4_closedata *calldata;
9831 +       int status;
9832  
9833 -       if (test_bit(NFS_DELEGATED_STATE, &state->flags))
9834 +       /* Tell caller we're done */
9835 +       if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
9836 +               state->state = mode;
9837                 return 0;
9838 -       memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
9839 +       }
9840 +       calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
9841 +       if (calldata == NULL)
9842 +               return -ENOMEM;
9843 +       calldata->inode = inode;
9844 +       calldata->state = state;
9845 +       calldata->arg.fh = NFS_FH(inode);
9846         /* Serialization for the sequence id */
9847 -       arg.seqid = sp->so_seqid,
9848 -       status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
9849 -
9850 -        /* hmm. we are done with the inode, and in the process of freeing
9851 -        * the state_owner. we keep this around to process errors
9852 +       calldata->arg.seqid = state->owner->so_seqid;
9853 +       calldata->arg.open_flags = mode;
9854 +       memcpy(&calldata->arg.stateid, &state->stateid,
9855 +                       sizeof(calldata->arg.stateid));
9856 +       status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
9857 +       /*
9858 +        * Return -EINPROGRESS on success in order to indicate to the
9859 +        * caller that an asynchronous RPC call has been launched, and
9860 +        * that it will release the semaphores on completion.
9861          */
9862 -       nfs4_increment_seqid(status, sp);
9863 -       if (!status)
9864 -               memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
9865 -
9866 -       return status;
9867 -}
9868 -
9869 -int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
9870 -{
9871 -       struct nfs_server *server = NFS_SERVER(state->inode);
9872 -       struct nfs4_exception exception = { };
9873 -       int err;
9874 -       do {
9875 -               err = _nfs4_do_close(inode, state);
9876 -               switch (err) {
9877 -                       case -NFS4ERR_STALE_STATEID:
9878 -                       case -NFS4ERR_EXPIRED:
9879 -                               nfs4_schedule_state_recovery(server->nfs4_state);
9880 -                               err = 0;
9881 -                       default:
9882 -                               state->state = 0;
9883 -               }
9884 -               err = nfs4_handle_exception(server, err, &exception);
9885 -       } while (exception.retry);
9886 -       return err;
9887 -}
9888 -
9889 -static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) 
9890 -{
9891 -       struct nfs4_state_owner *sp = state->owner;
9892 -       int status = 0;
9893 -       struct nfs_closeargs arg = {
9894 -               .fh             = NFS_FH(inode),
9895 -               .seqid          = sp->so_seqid,
9896 -               .open_flags     = mode,
9897 -       };
9898 -       struct nfs_closeres res;
9899 -       struct rpc_message msg = {
9900 -               .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE],
9901 -               .rpc_argp       = &arg,
9902 -               .rpc_resp       = &res,
9903 -       };
9904 -
9905 -       if (test_bit(NFS_DELEGATED_STATE, &state->flags))
9906 -               return 0;
9907 -       memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
9908 -       status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
9909 -       nfs4_increment_seqid(status, sp);
9910 -       if (!status)
9911 -               memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
9912 -
9913 -       return status;
9914 -}
9915 -
9916 -int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) 
9917 -{
9918 -       struct nfs_server *server = NFS_SERVER(state->inode);
9919 -       struct nfs4_exception exception = { };
9920 -       int err;
9921 -       do {
9922 -               err = _nfs4_do_downgrade(inode, state, mode);
9923 -               switch (err) {
9924 -                       case -NFS4ERR_STALE_STATEID:
9925 -                       case -NFS4ERR_EXPIRED:
9926 -                               nfs4_schedule_state_recovery(server->nfs4_state);
9927 -                               err = 0;
9928 -                       default:
9929 -                               state->state = mode;
9930 -               }
9931 -               err = nfs4_handle_exception(server, err, &exception);
9932 -       } while (exception.retry);
9933 -       return err;
9934 +       return (status == 0) ? -EINPROGRESS : status;
9935  }
9936  
9937  struct inode *
9938 @@ -785,7 +774,7 @@
9939         }
9940  
9941         cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
9942 -       state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
9943 +       state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
9944         put_rpccred(cred);
9945         if (IS_ERR(state))
9946                 return (struct inode *)state;
9947 @@ -802,7 +791,7 @@
9948         cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
9949         state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
9950         if (IS_ERR(state))
9951 -               state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred);
9952 +               state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
9953         put_rpccred(cred);
9954         if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
9955                 return 1;
9956 @@ -1026,7 +1015,7 @@
9957                                         FMODE_WRITE, cred);
9958                         if (IS_ERR(state))
9959                                 state = nfs4_do_open(dentry->d_parent->d_inode,
9960 -                                               &dentry->d_name, FMODE_WRITE,
9961 +                                               dentry, FMODE_WRITE,
9962                                                 NULL, cred);
9963                         need_iput = 1;
9964                 }
9965 @@ -1327,7 +1316,7 @@
9966   */
9967  
9968  static struct inode *
9969 -nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
9970 +nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
9971                   int flags)
9972  {
9973         struct inode *inode;
9974 @@ -1335,7 +1324,7 @@
9975         struct rpc_cred *cred;
9976  
9977         cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
9978 -       state = nfs4_do_open(dir, name, flags, sattr, cred);
9979 +       state = nfs4_do_open(dir, dentry, flags, sattr, cred);
9980         put_rpccred(cred);
9981         if (!IS_ERR(state)) {
9982                 inode = state->inode;
9983 @@ -2049,6 +2038,86 @@
9984  }
9985  
9986  static int
9987 +nfs4_server_supports_acls(struct nfs_server *server)
9988 +{
9989 +       return (server->caps & NFS_CAP_ACLS)
9990 +               && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
9991 +               && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
9992 +}
9993 +
9994 +/* XXX: assuming XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE,
9995 + * and that it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE)
9996 + * bytes on the stack.  (Currently probably both true.)
9997 + */
9998 +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
9999 +
10000 +static void buf_to_pages(const void *buf, ssize_t buflen,
10001 +               struct page **pages, unsigned int *pgbase)
10002 +{
10003 +       const void *p = buf;
10004 +
10005 +       *pgbase = offset_in_page(buf);
10006 +       p -= *pgbase;
10007 +       while (p < buf + buflen) {
10008 +               *(pages++) = virt_to_page(p);
10009 +               p += PAGE_CACHE_SIZE;
10010 +       }
10011 +}
10012 +
10013 +ssize_t
10014 +nfs4_proc_get_acl(struct inode *inode, void *buf, ssize_t buflen)
10015 +{
10016 +       struct nfs_server *server = NFS_SERVER(inode);
10017 +       struct page *pages[NFS4ACL_MAXPAGES];
10018 +       struct nfs_getaclargs args = {
10019 +               .fh = NFS_FH(inode),
10020 +               .acl_pages = pages,
10021 +               .acl_len = buflen,
10022 +       };
10023 +       ssize_t acl_len = buflen;
10024 +       struct rpc_message msg = {
10025 +               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
10026 +               .rpc_argp = &args,
10027 +               .rpc_resp = &acl_len,
10028 +       };
10029 +       int ret;
10030 +
10031 +       if (!nfs4_server_supports_acls(server))
10032 +               return -EOPNOTSUPP;
10033 +       buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
10034 +       ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
10035 +       if (buflen && acl_len > buflen)
10036 +               return -ERANGE;
10037 +       if (ret == 0)
10038 +               ret = acl_len;
10039 +       return ret;
10040 +}
10041 +
10042 +int
10043 +nfs4_proc_set_acl(struct inode *inode, const void *buf, ssize_t buflen)
10044 +{
10045 +       struct nfs_server *server = NFS_SERVER(inode);
10046 +       struct page *pages[NFS4ACL_MAXPAGES];
10047 +       struct nfs_setaclargs arg = {
10048 +               .fh             = NFS_FH(inode),
10049 +               .acl_pages      = pages,
10050 +               .acl_len        = buflen,
10051 +       };
10052 +       struct rpc_message msg = {
10053 +               .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_SETACL],
10054 +               .rpc_argp       = &arg,
10055 +               .rpc_resp       = NULL,
10056 +       };
10057 +       int ret;
10058 +
10059 +       if (!nfs4_server_supports_acls(server))
10060 +               return -EOPNOTSUPP;
10061 +       buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
10062 +       ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
10063 +       return ret;
10064 +}
10065 +
10066 +static int
10067  nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
10068  {
10069         struct nfs4_client *clp = server->nfs4_state;
10070 @@ -2589,6 +2658,7 @@
10071         .version        = 4,                    /* protocol version */
10072         .dentry_ops     = &nfs4_dentry_operations,
10073         .dir_inode_ops  = &nfs4_dir_inode_operations,
10074 +       .file_inode_ops = &nfs4_file_inode_operations,
10075         .getroot        = nfs4_proc_get_root,
10076         .getattr        = nfs4_proc_getattr,
10077         .setattr        = nfs4_proc_setattr,
10078 Index: linux-2.6.10/fs/nfs/direct.c
10079 ===================================================================
10080 --- linux-2.6.10.orig/fs/nfs/direct.c   2005-03-31 15:35:23.000000000 +0800
10081 +++ linux-2.6.10/fs/nfs/direct.c        2005-04-05 14:49:13.448684808 +0800
10082 @@ -33,6 +33,7 @@
10083   * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
10084   * 08 Jun 2003 Port to 2.5 APIs  --cel
10085   * 31 Mar 2004 Handle direct I/O without VFS support  --cel
10086 + * 15 Sep 2004 Parallel async reads  --cel
10087   *
10088   */
10089  
10090 @@ -43,6 +44,7 @@
10091  #include <linux/smp_lock.h>
10092  #include <linux/file.h>
10093  #include <linux/pagemap.h>
10094 +#include <linux/kref.h>
10095  
10096  #include <linux/nfs_fs.h>
10097  #include <linux/nfs_page.h>
10098 @@ -50,11 +52,27 @@
10099  
10100  #include <asm/system.h>
10101  #include <asm/uaccess.h>
10102 +#include <asm/atomic.h>
10103  
10104  #define NFSDBG_FACILITY                NFSDBG_VFS
10105 -#define VERF_SIZE              (2 * sizeof(__u32))
10106  #define MAX_DIRECTIO_SIZE      (4096UL << PAGE_SHIFT)
10107  
10108 +static kmem_cache_t *nfs_direct_cachep;
10109 +
10110 +/*
10111 + * This represents a set of asynchronous requests that we're waiting on
10112 + */
10113 +struct nfs_direct_req {
10114 +       struct kref             kref;           /* release manager */
10115 +       struct list_head        list;           /* nfs_read_data structs */
10116 +       wait_queue_head_t       wait;           /* wait for i/o completion */
10117 +       struct page **          pages;          /* pages in our buffer */
10118 +       unsigned int            npages;         /* count of pages */
10119 +       atomic_t                complete,       /* i/os we're waiting for */
10120 +                               count,          /* bytes actually processed */
10121 +                               error;          /* any reported error */
10122 +};
10123 +
10124  
10125  /**
10126   * nfs_get_user_pages - find and set up pages underlying user's buffer
10127 @@ -71,7 +89,8 @@
10128         unsigned long page_count;
10129         size_t array_size;
10130  
10131 -       /* set an arbitrary limit to prevent arithmetic overflow */
10132 +       /* set an arbitrary limit to prevent type overflow */
10133 +       /* XXX: this can probably be as large as INT_MAX */
10134         if (size > MAX_DIRECTIO_SIZE) {
10135                 *pages = NULL;
10136                 return -EFBIG;
10137 @@ -95,6 +114,8 @@
10138  /**
10139   * nfs_free_user_pages - tear down page struct array
10140   * @pages: array of page struct pointers underlying target buffer
10141 + * @npages: number of pages in the array
10142 + * @do_dirty: dirty the pages as we release them
10143   */
10144  static void
10145  nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
10146 @@ -109,77 +130,231 @@
10147  }
10148  
10149  /**
10150 - * nfs_direct_read_seg - Read in one iov segment.  Generate separate
10151 - *                        read RPCs for each "rsize" bytes.
10152 + * nfs_direct_req_release - release  nfs_direct_req structure for direct read
10153 + * @kref: kref object embedded in an nfs_direct_req structure
10154 + *
10155 + */
10156 +static void nfs_direct_req_release(struct kref *kref)
10157 +{
10158 +       struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
10159 +       kmem_cache_free(nfs_direct_cachep, dreq);
10160 +}
10161 +
10162 +/**
10163 + * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
10164 + * @count: count of bytes for the read request
10165 + * @rsize: local rsize setting
10166 + *
10167 + * Note we also set the number of requests we have in the dreq when we are
10168 + * done.  This prevents races with I/O completion so we will always wait
10169 + * until all requests have been dispatched and completed.
10170 + */
10171 +static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
10172 +{
10173 +       struct list_head *list;
10174 +       struct nfs_direct_req *dreq;
10175 +       unsigned int reads = 0;
10176 +
10177 +       dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
10178 +       if (!dreq)
10179 +               return NULL;
10180 +
10181 +       kref_init(&dreq->kref);
10182 +       init_waitqueue_head(&dreq->wait);
10183 +       INIT_LIST_HEAD(&dreq->list);
10184 +       atomic_set(&dreq->count, 0);
10185 +       atomic_set(&dreq->error, 0);
10186 +
10187 +       list = &dreq->list;
10188 +       for(;;) {
10189 +               struct nfs_read_data *data = nfs_readdata_alloc();
10190 +
10191 +               if (unlikely(!data)) {
10192 +                       while (!list_empty(list)) {
10193 +                               data = list_entry(list->next,
10194 +                                                 struct nfs_read_data, pages);
10195 +                               list_del(&data->pages);
10196 +                               nfs_readdata_free(data);
10197 +                       }
10198 +                       kref_put(&dreq->kref, nfs_direct_req_release);
10199 +                       return NULL;
10200 +               }
10201 +
10202 +               INIT_LIST_HEAD(&data->pages);
10203 +               list_add(&data->pages, list);
10204 +
10205 +               data->req = (struct nfs_page *) dreq;
10206 +               reads++;
10207 +               if (nbytes <= rsize)
10208 +                       break;
10209 +               nbytes -= rsize;
10210 +       }
10211 +       kref_get(&dreq->kref);
10212 +       atomic_set(&dreq->complete, reads);
10213 +       return dreq;
10214 +}
10215 +
10216 +/**
10217 + * nfs_direct_read_result - handle a read reply for a direct read request
10218 + * @data: address of NFS READ operation control block
10219 + * @status: status of this NFS READ operation
10220 + *
10221 + * We must hold a reference to all the pages in this direct read request
10222 + * until the RPCs complete.  This could be long *after* we are woken up in
10223 + * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
10224 + */
10225 +static void nfs_direct_read_result(struct nfs_read_data *data, int status)
10226 +{
10227 +       struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
10228 +
10229 +       if (likely(status >= 0))
10230 +               atomic_add(data->res.count, &dreq->count);
10231 +       else
10232 +               atomic_set(&dreq->error, status);
10233 +
10234 +       if (unlikely(atomic_dec_and_test(&dreq->complete))) {
10235 +               nfs_free_user_pages(dreq->pages, dreq->npages, 1);
10236 +               wake_up(&dreq->wait);
10237 +               kref_put(&dreq->kref, nfs_direct_req_release);
10238 +       }
10239 +}
10240 +
10241 +/**
10242 + * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
10243 + * @dreq: address of nfs_direct_req struct for this request
10244   * @inode: target inode
10245   * @ctx: target file open context
10246 - * user_addr: starting address of this segment of user's buffer
10247 - * count: size of this segment
10248 - * file_offset: offset in file to begin the operation
10249 - * @pages: array of addresses of page structs defining user's buffer
10250 - * nr_pages: size of pages array
10251 + * @user_addr: starting address of this segment of user's buffer
10252 + * @count: size of this segment
10253 + * @file_offset: offset in file to begin the operation
10254 + *
10255 + * For each nfs_read_data struct that was allocated on the list, dispatch
10256 + * an NFS READ operation
10257   */
10258 -static int
10259 -nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
10260 -               unsigned long user_addr, size_t count, loff_t file_offset,
10261 -               struct page **pages, int nr_pages)
10262 -{
10263 -       const unsigned int rsize = NFS_SERVER(inode)->rsize;
10264 -       int tot_bytes = 0;
10265 -       int curpage = 0;
10266 -       struct nfs_read_data    rdata = {
10267 -               .inode          = inode,
10268 -               .cred           = ctx->cred,
10269 -               .args           = {
10270 -                       .fh             = NFS_FH(inode),
10271 -                       .context        = ctx,
10272 -               },
10273 -               .res            = {
10274 -                       .fattr          = &rdata.fattr,
10275 -               },
10276 -       };
10277 +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
10278 +               struct inode *inode, struct nfs_open_context *ctx,
10279 +               unsigned long user_addr, size_t count, loff_t file_offset)
10280 +{
10281 +       struct list_head *list = &dreq->list;
10282 +       struct page **pages = dreq->pages;
10283 +       unsigned int curpage, pgbase;
10284 +       unsigned int rsize = NFS_SERVER(inode)->rsize;
10285  
10286 -       rdata.args.pgbase = user_addr & ~PAGE_MASK;
10287 -       rdata.args.offset = file_offset;
10288 -        do {
10289 -               int result;
10290 -
10291 -               rdata.args.count = count;
10292 -                if (rdata.args.count > rsize)
10293 -                        rdata.args.count = rsize;
10294 -               rdata.args.pages = &pages[curpage];
10295 -
10296 -               dprintk("NFS: direct read: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
10297 -                       rdata.args.count, (long long) rdata.args.offset,
10298 -                       user_addr + tot_bytes, rdata.args.pgbase, curpage);
10299 +       curpage = 0;
10300 +       pgbase = user_addr & ~PAGE_MASK;
10301 +       do {
10302 +               struct nfs_read_data *data;
10303 +               unsigned int bytes;
10304 +
10305 +               bytes = rsize;
10306 +               if (count < rsize)
10307 +                       bytes = count;
10308 +
10309 +               data = list_entry(list->next, struct nfs_read_data, pages);
10310 +               list_del_init(&data->pages);
10311 +
10312 +               data->inode = inode;
10313 +               data->cred = ctx->cred;
10314 +               data->args.fh = NFS_FH(inode);
10315 +               data->args.context = ctx;
10316 +               data->args.offset = file_offset;
10317 +               data->args.pgbase = pgbase;
10318 +               data->args.pages = &pages[curpage];
10319 +               data->args.count = bytes;
10320 +               data->res.fattr = &data->fattr;
10321 +               data->res.eof = 0;
10322 +               data->res.count = bytes;
10323 +
10324 +               NFS_PROTO(inode)->read_setup(data);
10325 +
10326 +               data->task.tk_cookie = (unsigned long) inode;
10327 +               data->task.tk_calldata = data;
10328 +               data->task.tk_release = nfs_readdata_release;
10329 +               data->complete = nfs_direct_read_result;
10330  
10331                 lock_kernel();
10332 -               result = NFS_PROTO(inode)->read(&rdata);
10333 +               rpc_execute(&data->task);
10334                 unlock_kernel();
10335  
10336 -               if (result <= 0) {
10337 -                       if (tot_bytes > 0)
10338 -                               break;
10339 -                       if (result == -EISDIR)
10340 -                               result = -EINVAL;
10341 -                       return result;
10342 -               }
10343 +               dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
10344 +                               data->task.tk_pid,
10345 +                               inode->i_sb->s_id,
10346 +                               (long long)NFS_FILEID(inode),
10347 +                               bytes,
10348 +                               (unsigned long long)data->args.offset);
10349 +
10350 +               file_offset += bytes;
10351 +               pgbase += bytes;
10352 +               curpage += pgbase >> PAGE_SHIFT;
10353 +               pgbase &= ~PAGE_MASK;
10354  
10355 -                tot_bytes += result;
10356 -               if (rdata.res.eof)
10357 -                       break;
10358 -
10359 -                rdata.args.offset += result;
10360 -               rdata.args.pgbase += result;
10361 -               curpage += rdata.args.pgbase >> PAGE_SHIFT;
10362 -               rdata.args.pgbase &= ~PAGE_MASK;
10363 -               count -= result;
10364 +               count -= bytes;
10365         } while (count != 0);
10366 +}
10367  
10368 -       /* XXX: should we zero the rest of the user's buffer if we
10369 -        *      hit eof? */
10370 +/**
10371 + * nfs_direct_read_wait - wait for I/O completion for direct reads
10372 + * @dreq: request on which we are to wait
10373 + * @intr: whether or not this wait can be interrupted
10374 + *
10375 + * Collects and returns the final error value/byte-count.
10376 + */
10377 +static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
10378 +{
10379 +       int result = 0;
10380  
10381 -       return tot_bytes;
10382 +       if (intr) {
10383 +               result = wait_event_interruptible(dreq->wait,
10384 +                                       (atomic_read(&dreq->complete) == 0));
10385 +       } else {
10386 +               wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
10387 +       }
10388 +
10389 +       if (!result)
10390 +               result = atomic_read(&dreq->error);
10391 +       if (!result)
10392 +               result = atomic_read(&dreq->count);
10393 +
10394 +       kref_put(&dreq->kref, nfs_direct_req_release);
10395 +       return (ssize_t) result;
10396 +}
10397 +
10398 +/**
10399 + * nfs_direct_read_seg - Read in one iov segment.  Generate separate
10400 + *                        read RPCs for each "rsize" bytes.
10401 + * @inode: target inode
10402 + * @ctx: target file open context
10403 + * @user_addr: starting address of this segment of user's buffer
10404 + * @count: size of this segment
10405 + * @file_offset: offset in file to begin the operation
10406 + * @pages: array of addresses of page structs defining user's buffer
10407 + * @nr_pages: number of pages in the array
10408 + *
10409 + */
10410 +static ssize_t nfs_direct_read_seg(struct inode *inode,
10411 +               struct nfs_open_context *ctx, unsigned long user_addr,
10412 +               size_t count, loff_t file_offset, struct page **pages,
10413 +               unsigned int nr_pages)
10414 +{
10415 +       ssize_t result;
10416 +       sigset_t oldset;
10417 +       struct rpc_clnt *clnt = NFS_CLIENT(inode);
10418 +       struct nfs_direct_req *dreq;
10419 +
10420 +       dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
10421 +       if (!dreq)
10422 +               return -ENOMEM;
10423 +
10424 +       dreq->pages = pages;
10425 +       dreq->npages = nr_pages;
10426 +
10427 +       rpc_clnt_sigmask(clnt, &oldset);
10428 +       nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
10429 +                                file_offset);
10430 +       result = nfs_direct_read_wait(dreq, clnt->cl_intr);
10431 +       rpc_clnt_sigunmask(clnt, &oldset);
10432 +
10433 +       return result;
10434  }
10435  
10436  /**
10437 @@ -191,9 +366,8 @@
10438   * file_offset: offset in file to begin the operation
10439   * nr_segs: size of iovec array
10440   *
10441 - * generic_file_direct_IO has already pushed out any non-direct
10442 - * writes so that this read will see them when we read from the
10443 - * server.
10444 + * We've already pushed out any non-direct writes so that this read
10445 + * will see them when we read from the server.
10446   */
10447  static ssize_t
10448  nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
10449 @@ -222,8 +396,6 @@
10450                 result = nfs_direct_read_seg(inode, ctx, user_addr, size,
10451                                 file_offset, pages, page_count);
10452  
10453 -               nfs_free_user_pages(pages, page_count, 1);
10454 -
10455                 if (result <= 0) {
10456                         if (tot_bytes > 0)
10457                                 break;
10458 @@ -249,31 +421,31 @@
10459   * @pages: array of addresses of page structs defining user's buffer
10460   * nr_pages: size of pages array
10461   */
10462 -static int
10463 -nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
10464 -               unsigned long user_addr, size_t count, loff_t file_offset,
10465 -               struct page **pages, int nr_pages)
10466 +static ssize_t nfs_direct_write_seg(struct inode *inode,
10467 +               struct nfs_open_context *ctx, unsigned long user_addr,
10468 +               size_t count, loff_t file_offset, struct page **pages,
10469 +               int nr_pages)
10470  {
10471         const unsigned int wsize = NFS_SERVER(inode)->wsize;
10472         size_t request;
10473 -       int curpage, need_commit, result, tot_bytes;
10474 +       int curpage, need_commit;
10475 +       ssize_t result, tot_bytes;
10476         struct nfs_writeverf first_verf;
10477 -       struct nfs_write_data   wdata = {
10478 -               .inode          = inode,
10479 -               .cred           = ctx->cred,
10480 -               .args           = {
10481 -                       .fh             = NFS_FH(inode),
10482 -                       .context        = ctx,
10483 -               },
10484 -               .res            = {
10485 -                       .fattr          = &wdata.fattr,
10486 -                       .verf           = &wdata.verf,
10487 -               },
10488 -       };
10489 +       struct nfs_write_data *wdata;
10490  
10491 -       wdata.args.stable = NFS_UNSTABLE;
10492 +       wdata = nfs_writedata_alloc();
10493 +       if (!wdata)
10494 +               return -ENOMEM;
10495 +
10496 +       wdata->inode = inode;
10497 +       wdata->cred = ctx->cred;
10498 +       wdata->args.fh = NFS_FH(inode);
10499 +       wdata->args.context = ctx;
10500 +       wdata->args.stable = NFS_UNSTABLE;
10501         if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
10502 -               wdata.args.stable = NFS_FILE_SYNC;
10503 +               wdata->args.stable = NFS_FILE_SYNC;
10504 +       wdata->res.fattr = &wdata->fattr;
10505 +       wdata->res.verf = &wdata->verf;
10506  
10507         nfs_begin_data_update(inode);
10508  retry:
10509 @@ -281,20 +453,20 @@
10510         tot_bytes = 0;
10511         curpage = 0;
10512         request = count;
10513 -       wdata.args.pgbase = user_addr & ~PAGE_MASK;
10514 -       wdata.args.offset = file_offset;
10515 -        do {
10516 -               wdata.args.count = request;
10517 -                if (wdata.args.count > wsize)
10518 -                        wdata.args.count = wsize;
10519 -               wdata.args.pages = &pages[curpage];
10520 +       wdata->args.pgbase = user_addr & ~PAGE_MASK;
10521 +       wdata->args.offset = file_offset;
10522 +       do {
10523 +               wdata->args.count = request;
10524 +               if (wdata->args.count > wsize)
10525 +                       wdata->args.count = wsize;
10526 +               wdata->args.pages = &pages[curpage];
10527  
10528                 dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
10529 -                       wdata.args.count, (long long) wdata.args.offset,
10530 -                       user_addr + tot_bytes, wdata.args.pgbase, curpage);
10531 +                       wdata->args.count, (long long) wdata->args.offset,
10532 +                       user_addr + tot_bytes, wdata->args.pgbase, curpage);
10533  
10534                 lock_kernel();
10535 -               result = NFS_PROTO(inode)->write(&wdata);
10536 +               result = NFS_PROTO(inode)->write(wdata);
10537                 unlock_kernel();
10538  
10539                 if (result <= 0) {
10540 @@ -304,20 +476,25 @@
10541                 }
10542  
10543                 if (tot_bytes == 0)
10544 -                       memcpy(&first_verf.verifier, &wdata.verf.verifier,
10545 -                                                               VERF_SIZE);
10546 -               if (wdata.verf.committed != NFS_FILE_SYNC) {
10547 +                       memcpy(&first_verf.verifier, &wdata->verf.verifier,
10548 +                                               sizeof(first_verf.verifier));
10549 +               if (wdata->verf.committed != NFS_FILE_SYNC) {
10550                         need_commit = 1;
10551 -                       if (memcmp(&first_verf.verifier,
10552 -                                       &wdata.verf.verifier, VERF_SIZE))
10553 +                       if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
10554 +                                       sizeof(first_verf.verifier)));
10555                                 goto sync_retry;
10556                 }
10557  
10558 -                tot_bytes += result;
10559 -                wdata.args.offset += result;
10560 -               wdata.args.pgbase += result;
10561 -               curpage += wdata.args.pgbase >> PAGE_SHIFT;
10562 -               wdata.args.pgbase &= ~PAGE_MASK;
10563 +               tot_bytes += result;
10564 +
10565 +               /* in case of a short write: stop now, let the app recover */
10566 +               if (result < wdata->args.count)
10567 +                       break;
10568 +
10569 +               wdata->args.offset += result;
10570 +               wdata->args.pgbase += result;
10571 +               curpage += wdata->args.pgbase >> PAGE_SHIFT;
10572 +               wdata->args.pgbase &= ~PAGE_MASK;
10573                 request -= result;
10574         } while (request != 0);
10575  
10576 @@ -325,27 +502,27 @@
10577          * Commit data written so far, even in the event of an error
10578          */
10579         if (need_commit) {
10580 -               wdata.args.count = tot_bytes;
10581 -               wdata.args.offset = file_offset;
10582 +               wdata->args.count = tot_bytes;
10583 +               wdata->args.offset = file_offset;
10584  
10585                 lock_kernel();
10586 -               result = NFS_PROTO(inode)->commit(&wdata);
10587 +               result = NFS_PROTO(inode)->commit(wdata);
10588                 unlock_kernel();
10589  
10590                 if (result < 0 || memcmp(&first_verf.verifier,
10591 -                                               &wdata.verf.verifier,
10592 -                                               VERF_SIZE) != 0)
10593 +                                        &wdata->verf.verifier,
10594 +                                        sizeof(first_verf.verifier)) != 0)
10595                         goto sync_retry;
10596         }
10597         result = tot_bytes;
10598  
10599  out:
10600         nfs_end_data_update_defer(inode);
10601 -
10602 +       nfs_writedata_free(wdata);
10603         return result;
10604  
10605  sync_retry:
10606 -       wdata.args.stable = NFS_FILE_SYNC;
10607 +       wdata->args.stable = NFS_FILE_SYNC;
10608         goto retry;
10609  }
10610  
10611 @@ -362,9 +539,9 @@
10612   * that non-direct readers might access, so they will pick up these
10613   * writes immediately.
10614   */
10615 -static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx,
10616 -               const struct iovec *iov, loff_t file_offset,
10617 -               unsigned long nr_segs)
10618 +static ssize_t nfs_direct_write(struct inode *inode,
10619 +               struct nfs_open_context *ctx, const struct iovec *iov,
10620 +               loff_t file_offset, unsigned long nr_segs)
10621  {
10622         ssize_t tot_bytes = 0;
10623         unsigned long seg = 0;
10624 @@ -504,6 +681,8 @@
10625         if (mapping->nrpages) {
10626                 retval = filemap_fdatawrite(mapping);
10627                 if (retval == 0)
10628 +                       retval = nfs_wb_all(inode);
10629 +               if (retval == 0)
10630                         retval = filemap_fdatawait(mapping);
10631                 if (retval)
10632                         goto out;
10633 @@ -593,6 +772,8 @@
10634         if (mapping->nrpages) {
10635                 retval = filemap_fdatawrite(mapping);
10636                 if (retval == 0)
10637 +                       retval = nfs_wb_all(inode);
10638 +               if (retval == 0)
10639                         retval = filemap_fdatawait(mapping);
10640                 if (retval)
10641                         goto out;
10642 @@ -607,3 +788,21 @@
10643  out:
10644         return retval;
10645  }
10646 +
10647 +int nfs_init_directcache(void)
10648 +{
10649 +       nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
10650 +                                               sizeof(struct nfs_direct_req),
10651 +                                               0, SLAB_RECLAIM_ACCOUNT,
10652 +                                               NULL, NULL);
10653 +       if (nfs_direct_cachep == NULL)
10654 +               return -ENOMEM;
10655 +
10656 +       return 0;
10657 +}
10658 +
10659 +void nfs_destroy_directcache(void)
10660 +{
10661 +       if (kmem_cache_destroy(nfs_direct_cachep))
10662 +               printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
10663 +}
10664 Index: linux-2.6.10/fs/nfs/read.c
10665 ===================================================================
10666 --- linux-2.6.10.orig/fs/nfs/read.c     2004-12-25 05:33:47.000000000 +0800
10667 +++ linux-2.6.10/fs/nfs/read.c  2005-04-05 14:49:13.437686480 +0800
10668 @@ -24,7 +24,6 @@
10669  #include <linux/mm.h>
10670  #include <linux/slab.h>
10671  #include <linux/pagemap.h>
10672 -#include <linux/mempool.h>
10673  #include <linux/sunrpc/clnt.h>
10674  #include <linux/nfs_fs.h>
10675  #include <linux/nfs_page.h>
10676 @@ -39,25 +38,11 @@
10677  static void nfs_readpage_result_full(struct nfs_read_data *, int);
10678  
10679  static kmem_cache_t *nfs_rdata_cachep;
10680 -static mempool_t *nfs_rdata_mempool;
10681 +mempool_t *nfs_rdata_mempool;
10682  
10683  #define MIN_POOL_READ  (32)
10684  
10685 -static struct nfs_read_data *nfs_readdata_alloc(void)
10686 -{
10687 -       struct nfs_read_data   *p;
10688 -       p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
10689 -       if (p)
10690 -               memset(p, 0, sizeof(*p));
10691 -       return p;
10692 -}
10693 -
10694 -static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
10695 -{
10696 -       mempool_free(p, nfs_rdata_mempool);
10697 -}
10698 -
10699 -static void nfs_readdata_release(struct rpc_task *task)
10700 +void nfs_readdata_release(struct rpc_task *task)
10701  {
10702          struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
10703          nfs_readdata_free(data);