3 The complete set of citi nfsv4 patches combined into one patch.
5 Changes since 2.6.10-rc3-CITI_NFS4_ALL-3
6 * minor adjustments to xdr buffer length calculations in fs/nfs4xdr.c
7 * client acl revisions: pass acls in page array of xdr bufs, removing
8 arbitrary length restrictions. Temporarily disable acl caching.
10 Index: linux-2.6.10/include/linux/nfsd/state.h
11 ===================================================================
12 --- linux-2.6.10.orig/include/linux/nfsd/state.h 2004-12-25 05:33:50.000000000 +0800
13 +++ linux-2.6.10/include/linux/nfsd/state.h 2005-04-05 14:49:13.465682224 +0800
15 #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
16 #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
18 +/* Delegation recall states */
19 +#define NFS4_NO_RECALL 0x000
20 +#define NFS4_RECALL_IN_PROGRESS 0x001
21 +#define NFS4_RECALL_COMPLETE 0x002
24 +/* Delegation flags */
25 +#define NFS4_DELAY_CLOSE 0x001
27 +struct nfs4_cb_recall {
30 + stateid_t cbr_stateid;
32 + u32 cbr_fhval[NFS4_FHSIZE];
33 + struct nfs4_delegation *cbr_dp;
36 +struct nfs4_delegation {
37 + struct list_head dl_del_perfile; /* nfs4_file->fi_del_perfile */
38 + struct list_head dl_del_perclnt; /* nfs4_client->cl_del_perclnt*/
39 + struct list_head dl_recall_lru; /* delegation recalled */
40 + atomic_t dl_recall_cnt; /* resend cb_recall only once */
41 + atomic_t dl_count; /* ref count */
42 + atomic_t dl_state; /* recall state */
43 + struct nfs4_client *dl_client;
44 + struct nfs4_file *dl_file;
45 + struct file_lock *dl_flock;
46 + struct nfs4_stateid *dl_stp;
50 + struct nfs4_cb_recall dl_recall;
53 +#define dl_stateid dl_recall.cbr_stateid
54 +#define dl_fhlen dl_recall.cbr_fhlen
55 +#define dl_fhval dl_recall.cbr_fhval
57 /* client delegation callback info */
58 struct nfs4_callback {
59 /* SETCLIENTID info */
61 unsigned short cb_port;
64 - struct xdr_netobj cb_netid;
66 - u32 cb_set; /* successful CB_NULL call */
67 + atomic_t cb_set; /* successful CB_NULL call */
68 struct rpc_program cb_program;
69 struct rpc_stat cb_stat;
70 struct rpc_clnt * cb_client;
72 struct list_head cl_idhash; /* hash by cl_clientid.id */
73 struct list_head cl_strhash; /* hash by cl_name */
74 struct list_head cl_perclient; /* list: stateowners */
75 + struct list_head cl_del_perclnt; /* list: delegations */
76 struct list_head cl_lru; /* tail queue */
77 struct xdr_netobj cl_name; /* id generated by client */
78 nfs4_verifier cl_verifier; /* generated by client */
80 clientid_t cl_clientid; /* generated by server */
81 nfs4_verifier cl_confirm; /* generated by server */
82 struct nfs4_callback cl_callback; /* callback info */
83 - time_t cl_first_state; /* first state aquisition*/
84 + atomic_t cl_count; /* ref count */
85 + u32 cl_firststate; /* recovery file creation */
88 /* struct nfs4_client_reset
90 struct nfs4_client_reclaim {
91 struct list_head cr_strhash; /* hash by cr_name */
92 struct xdr_netobj cr_name; /* id generated by client */
93 - time_t cr_first_state; /* first state aquisition */
94 - u32 cr_expired; /* boolean: lease expired? */
100 struct list_head fi_hash; /* hash by "struct inode *" */
101 struct list_head fi_perfile; /* list: nfs4_stateid */
102 + struct list_head fi_del_perfile; /* list: nfs4_delegation */
103 struct inode *fi_inode;
104 u32 fi_id; /* used with stateowner->so_id
105 * for stateid_hashtbl hash */
107 #define CONFIRM 0x00000002
108 #define OPEN_STATE 0x00000004
109 #define LOCK_STATE 0x00000008
110 -#define RDWR_STATE 0x00000010
111 -#define CLOSE_STATE 0x00000020
112 +#define RD_STATE 0x00000010
113 +#define WR_STATE 0x00000020
114 +#define CLOSE_STATE 0x00000040
115 +#define DELEG_RET 0x00000080
117 #define seqid_mutating_err(err) \
118 (((err) != nfserr_stale_clientid) && \
119 @@ -243,14 +284,24 @@
120 extern time_t nfs4_laundromat(void);
121 extern int nfsd4_renew(clientid_t *clid);
122 extern int nfs4_preprocess_stateid_op(struct svc_fh *current_fh,
123 - stateid_t *stateid, int flags, struct nfs4_stateid **stpp);
124 + stateid_t *stateid, int flags, struct file **filp);
125 extern int nfs4_share_conflict(struct svc_fh *current_fh,
126 unsigned int deny_type);
127 extern void nfs4_lock_state(void);
128 extern void nfs4_unlock_state(void);
129 extern int nfs4_in_grace(void);
130 extern int nfs4_check_open_reclaim(clientid_t *clid);
131 +extern void put_nfs4_client(struct nfs4_client *clp);
132 extern void nfs4_free_stateowner(struct kref *kref);
133 +extern void nfsd4_probe_callback(struct nfs4_client *clp);
134 +extern int nfsd4_cb_recall(struct nfs4_delegation *dp);
135 +extern int nfsd4_create_clid_file(struct nfs4_client *clp);
136 +extern void nfsd4_remove_clid_file(struct nfs4_client *clp);
137 +extern int nfsd4_list_rec_dir(int clear);
138 +extern void nfsd4_init_rec_dir(char *rec_dirname);
139 +extern void nfsd4_shutdown_rec_dir(void);
140 +extern int nfs4_client_to_reclaim(char *name, int namlen);
144 nfs4_put_stateowner(struct nfs4_stateowner *so)
145 Index: linux-2.6.10/include/linux/nfsd/nfsd.h
146 ===================================================================
147 --- linux-2.6.10.orig/include/linux/nfsd/nfsd.h 2004-12-25 05:35:39.000000000 +0800
148 +++ linux-2.6.10/include/linux/nfsd/nfsd.h 2005-04-05 14:49:13.464682376 +0800
150 void nfsd_close(struct file *);
151 int nfsd_read(struct svc_rqst *, struct svc_fh *,
152 loff_t, struct kvec *,int, unsigned long *);
153 +int nfsd_vfs_read(struct svc_rqst *, struct svc_fh *, struct file *,
154 + loff_t, struct kvec *, int, unsigned long *);
155 int nfsd_write(struct svc_rqst *, struct svc_fh *,
156 loff_t, struct kvec *,int, unsigned long, int *);
157 +int nfsd_vfs_write(struct svc_rqst *, struct svc_fh *,struct file *,
158 + loff_t, struct kvec *,int, unsigned long, int *);
159 int nfsd_readlink(struct svc_rqst *, struct svc_fh *,
161 int nfsd_symlink(struct svc_rqst *, struct svc_fh *,
162 Index: linux-2.6.10/include/linux/nfsd/xdr4.h
163 ===================================================================
164 --- linux-2.6.10.orig/include/linux/nfsd/xdr4.h 2004-12-25 05:34:01.000000000 +0800
165 +++ linux-2.6.10/include/linux/nfsd/xdr4.h 2005-04-05 14:49:13.466682072 +0800
167 #define NFSD4_MAX_TAGLEN 128
168 #define XDR_LEN(n) (((n) + 3) & ~3)
170 -typedef u32 delegation_zero_t;
171 -typedef u32 delegation_boot_t;
172 -typedef u64 delegation_id_t;
175 - delegation_zero_t ds_zero;
176 - delegation_boot_t ds_boot;
177 - delegation_id_t ds_id;
178 -} delegation_stateid_t;
180 struct nfsd4_change_info {
182 u32 before_ctime_sec;
184 #define cr_specdata1 u.dev.specdata1
185 #define cr_specdata2 u.dev.specdata2
187 +struct nfsd4_delegreturn {
188 + stateid_t dr_stateid;
191 struct nfsd4_getattr {
192 u32 ga_bmval[2]; /* request */
193 struct svc_fh *ga_fhp; /* response */
194 @@ -202,13 +196,13 @@
195 u32 op_claim_type; /* request */
196 struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
197 u32 op_delegate_type; /* request - CLAIM_PREV only */
198 - delegation_stateid_t op_delegate_stateid; /* request - CLAIM_DELEGATE_CUR only */
199 + stateid_t op_delegate_stateid; /* request - response */
200 u32 op_create; /* request */
201 u32 op_createmode; /* request */
202 u32 op_bmval[2]; /* request */
203 union { /* request */
204 - struct iattr iattr; /* UNCHECKED4,GUARDED4 */
205 - nfs4_verifier verf; /* EXCLUSIVE4 */
206 + struct iattr iattr; /* UNCHECKED4,GUARDED4 */
207 + nfs4_verifier verf; /* EXCLUSIVE4 */
209 clientid_t op_clientid; /* request */
210 struct xdr_netobj op_owner; /* request */
212 u32 rd_length; /* request */
213 struct kvec rd_iov[RPCSVC_MAXPAGES];
215 + struct file *rd_filp;
217 struct svc_rqst *rd_rqstp; /* response */
218 struct svc_fh * rd_fhp; /* response */
220 struct nfsd4_close close;
221 struct nfsd4_commit commit;
222 struct nfsd4_create create;
223 + struct nfsd4_delegreturn delegreturn;
224 struct nfsd4_getattr getattr;
225 struct svc_fh * getfh;
226 struct nfsd4_link link;
228 nfsd4_release_lockowner(struct svc_rqst *rqstp,
229 struct nfsd4_release_lockowner *rlockowner);
230 extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
231 +extern int nfsd4_delegreturn(struct svc_rqst *rqstp,
232 + struct svc_fh *current_fh, struct nfsd4_delegreturn *dr);
236 Index: linux-2.6.10/include/linux/fs.h
237 ===================================================================
238 --- linux-2.6.10.orig/include/linux/fs.h 2005-03-31 15:35:26.000000000 +0800
239 +++ linux-2.6.10/include/linux/fs.h 2005-04-05 14:49:13.461682832 +0800
240 @@ -1185,11 +1185,6 @@
242 extern int vfs_statfs(struct super_block *, struct kstatfs *);
244 -/* Return value for VFS lock functions - tells locks.c to lock conventionally
245 - * REALLY kosha for root NFS and nfs_lock
247 -#define LOCK_USE_CLNT 1
249 #define FLOCK_VERIFY_READ 1
250 #define FLOCK_VERIFY_WRITE 2
252 Index: linux-2.6.10/include/linux/dcache.h
253 ===================================================================
254 --- linux-2.6.10.orig/include/linux/dcache.h 2005-03-31 15:35:26.000000000 +0800
255 +++ linux-2.6.10/include/linux/dcache.h 2005-04-05 14:49:13.460682984 +0800
257 * These are the low-level FS interfaces to the dcache..
259 extern void d_instantiate(struct dentry *, struct inode *);
260 +extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
261 extern void d_delete(struct dentry *);
263 /* allocate/de-allocate */
269 + * d_add_unique - add dentry to hash queues without aliasing
270 + * @entry: dentry to add
271 + * @inode: The inode to attach to this dentry
273 + * This adds the entry to the hash queues and initializes @inode.
274 + * The entry was actually filled in earlier during d_alloc().
276 +static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode)
278 + struct dentry *res;
280 + res = d_instantiate_unique(entry, inode);
281 + d_rehash(res != NULL ? res : entry);
285 /* used for rename() and baskets */
286 extern void d_move(struct dentry *, struct dentry *);
288 Index: linux-2.6.10/include/linux/nfs_fs.h
289 ===================================================================
290 --- linux-2.6.10.orig/include/linux/nfs_fs.h 2004-12-25 05:34:31.000000000 +0800
291 +++ linux-2.6.10/include/linux/nfs_fs.h 2005-04-05 14:49:13.463682528 +0800
293 #include <linux/nfs_xdr.h>
294 #include <linux/rwsem.h>
295 #include <linux/workqueue.h>
296 +#include <linux/mempool.h>
299 * Enable debugging support for nfs client.
301 #define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */
302 #define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */
303 #define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */
304 +#define NFS_INO_INVALID_ACCESS 0x0040 /* cached access cred invalid */
306 static inline struct nfs_inode *NFS_I(struct inode *inode)
309 static inline void NFS_CACHEINV(struct inode *inode)
311 if (!nfs_caches_unstable(inode))
312 - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR;
313 + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
316 static inline int nfs_server_capable(struct inode *inode, int cap)
318 return nfs_wb_page_priority(inode, page, 0);
322 + * Allocate and free nfs_write_data structures
324 +extern mempool_t *nfs_wdata_mempool;
325 +extern mempool_t *nfs_commit_mempool;
327 +static inline struct nfs_write_data *nfs_writedata_alloc(void)
329 + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
331 + memset(p, 0, sizeof(*p));
332 + INIT_LIST_HEAD(&p->pages);
337 +static inline void nfs_writedata_free(struct nfs_write_data *p)
339 + mempool_free(p, nfs_wdata_mempool);
342 +extern void nfs_writedata_release(struct rpc_task *task);
344 +static inline struct nfs_write_data *nfs_commit_alloc(void)
346 + struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
348 + memset(p, 0, sizeof(*p));
349 + INIT_LIST_HEAD(&p->pages);
354 +static inline void nfs_commit_free(struct nfs_write_data *p)
356 + mempool_free(p, nfs_commit_mempool);
359 /* Hack for future NFS swap support */
361 # define IS_SWAPFILE(inode) (0)
363 extern void nfs_readpage_result(struct rpc_task *);
366 + * Allocate and free nfs_read_data structures
368 +extern mempool_t *nfs_rdata_mempool;
370 +static inline struct nfs_read_data *nfs_readdata_alloc(void)
372 + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
374 + memset(p, 0, sizeof(*p));
378 +static inline void nfs_readdata_free(struct nfs_read_data *p)
380 + mempool_free(p, nfs_rdata_mempool);
383 +extern void nfs_readdata_release(struct rpc_task *task);
386 * linux/fs/mount_clnt.c
387 * (Used only by nfsroot module)
391 extern struct dentry_operations nfs4_dentry_operations;
392 extern struct inode_operations nfs4_dir_inode_operations;
393 +extern struct inode_operations nfs4_file_inode_operations;
396 +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
397 +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int);
398 +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
401 extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
402 @@ -651,13 +717,14 @@
403 extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
404 extern int nfs4_proc_async_renew(struct nfs4_client *);
405 extern int nfs4_proc_renew(struct nfs4_client *);
406 -extern int nfs4_do_close(struct inode *, struct nfs4_state *);
407 -extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
408 +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
409 extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
410 extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
411 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
412 extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
413 extern int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request);
414 +extern ssize_t nfs4_proc_get_acl(struct inode *, void *buf, ssize_t buflen);
415 +extern int nfs4_proc_set_acl(struct inode *, const void *buf, ssize_t buflen);
418 extern void nfs4_schedule_state_renewal(struct nfs4_client *);
419 Index: linux-2.6.10/include/linux/nfs4.h
420 ===================================================================
421 --- linux-2.6.10.orig/include/linux/nfs4.h 2004-12-25 05:34:45.000000000 +0800
422 +++ linux-2.6.10/include/linux/nfs4.h 2005-04-05 14:49:13.474680856 +0800
424 #define NFS4_ACCESS_DELETE 0x0010
425 #define NFS4_ACCESS_EXECUTE 0x0020
427 -#define NFS4_FH_PERISTENT 0x0000
428 +#define NFS4_FH_PERSISTENT 0x0000
429 #define NFS4_FH_NOEXPIRE_WITH_OPEN 0x0001
430 #define NFS4_FH_VOLATILE_ANY 0x0002
431 #define NFS4_FH_VOL_MIGRATION 0x0004
433 NFSPROC4_CLNT_READDIR,
434 NFSPROC4_CLNT_SERVER_CAPS,
435 NFSPROC4_CLNT_DELEGRETURN,
436 + NFSPROC4_CLNT_GETACL,
437 + NFSPROC4_CLNT_SETACL,
441 Index: linux-2.6.10/include/linux/sunrpc/auth.h
442 ===================================================================
443 --- linux-2.6.10.orig/include/linux/sunrpc/auth.h 2004-12-25 05:34:57.000000000 +0800
444 +++ linux-2.6.10/include/linux/sunrpc/auth.h 2005-04-05 14:49:13.468681768 +0800
447 #define RPCAUTH_CRED_LOCKED 0x0001
448 #define RPCAUTH_CRED_UPTODATE 0x0002
449 -#define RPCAUTH_CRED_DEAD 0x0004
451 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0
454 int rpcauth_refreshcred(struct rpc_task *);
455 void rpcauth_invalcred(struct rpc_task *);
456 int rpcauth_uptodatecred(struct rpc_task *);
457 -int rpcauth_deadcred(struct rpc_task *);
458 void rpcauth_init_credcache(struct rpc_auth *);
459 void rpcauth_free_credcache(struct rpc_auth *);
461 Index: linux-2.6.10/include/linux/sunrpc/svc.h
462 ===================================================================
463 --- linux-2.6.10.orig/include/linux/sunrpc/svc.h 2004-12-25 05:34:58.000000000 +0800
464 +++ linux-2.6.10/include/linux/sunrpc/svc.h 2005-04-05 14:49:13.467681920 +0800
466 char * pg_name; /* service name */
467 char * pg_class; /* class name: services sharing authentication */
468 struct svc_stat * pg_stats; /* rpc statistics */
469 - /* Override authentication. NULL means use default */
470 - int (*pg_authenticate)(struct svc_rqst *, u32 *);
471 + int (*pg_authenticate)(struct svc_rqst *);
475 Index: linux-2.6.10/include/linux/sunrpc/cache.h
476 ===================================================================
477 --- linux-2.6.10.orig/include/linux/sunrpc/cache.h 2004-12-25 05:34:57.000000000 +0800
478 +++ linux-2.6.10/include/linux/sunrpc/cache.h 2005-04-05 14:49:13.470681464 +0800
479 @@ -128,20 +128,17 @@
480 * just like a template in C++, this macro does cache lookup
482 * The function is passed some sort of HANDLE from which a cache_detail
483 - * structure can be determined (via SETUP, DETAIL), a template
484 + * structure can be determined (via DETAIL), a template
485 * cache entry (type RTN*), and a "set" flag. Using the HASHFN and the
486 * TEST, the function will try to find a matching cache entry in the cache.
488 * If an entry is found, it is returned
489 * If no entry is found, a new non-VALID entry is created.
490 - * If "set" == 1 and INPLACE == 0 :
492 * If no entry is found a new one is inserted with data from "template"
493 * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE
494 * If a CACHE_VALID entry is found, a new entry is swapped in with data
496 - * If set == 1, and INPLACE == 1 :
497 - * As above, except that if a CACHE_VALID entry is found, we UPDATE in place
498 - * instead of swapping in a new entry.
500 * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not
501 * run but insteead CACHE_NEGATIVE is set in any new item.
502 @@ -153,25 +150,22 @@
503 * MEMBER is the member of the cache which is cache_head, which must be first
504 * FNAME is the name for the function
505 * ARGS are arguments to function and must contain RTN *item, int set. May
506 - * also contain something to be usedby SETUP or DETAIL to find cache_detail.
507 - * SETUP locates the cache detail and makes it available as...
508 - * DETAIL identifies the cache detail, possibly set up by SETUP
509 + * also contain something to be used by DETAIL to find cache_detail.
510 + * DETAIL identifies the cache detail
511 * HASHFN returns a hash value of the cache entry "item"
512 * TEST tests if "tmp" matches "item"
513 * INIT copies key information from "item" to "new"
514 * UPDATE copies content information from "item" to "tmp"
515 - * INPLACE is true if updates can happen inplace rather than allocating a new structure
517 * WARNING: any substantial changes to this must be reflected in
518 * net/sunrpc/svcauth.c(auth_domain_lookup)
519 * which is a similar routine that is open-coded.
521 -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE) \
522 +#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,DETAIL,HASHFN,TEST,INIT,UPDATE) \
525 RTN *tmp, *new=NULL; \
526 struct cache_head **hp, **head; \
528 head = &(DETAIL)->hash_table[HASHFN]; \
530 if (set||new) write_lock(&(DETAIL)->hash_lock); \
531 @@ -180,14 +174,14 @@
532 tmp = container_of(*hp, RTN, MEMBER); \
533 if (TEST) { /* found a match */ \
535 - if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
536 + if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
541 cache_get(&tmp->MEMBER); \
543 - if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
544 + if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
545 { /* need to swap in new */ \
549 else read_unlock(&(DETAIL)->hash_lock); \
551 cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \
552 - if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0); \
553 + if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \
554 if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \
557 @@ -242,10 +236,10 @@
561 -#define DefineSimpleCacheLookup(STRUCT,INPLACE) \
562 - DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */, \
563 +#define DefineSimpleCacheLookup(STRUCT) \
564 + DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), \
565 & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\
566 - STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE)
567 + STRUCT##_init(new, item), STRUCT##_update(tmp, item))
569 #define cache_for_each(pos, detail, index, member) \
570 for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \
571 Index: linux-2.6.10/include/linux/sunrpc/sched.h
572 ===================================================================
573 --- linux-2.6.10.orig/include/linux/sunrpc/sched.h 2004-12-25 05:35:01.000000000 +0800
574 +++ linux-2.6.10/include/linux/sunrpc/sched.h 2005-04-05 14:49:13.472681160 +0800
577 #include <linux/timer.h>
578 #include <linux/sunrpc/types.h>
579 +#include <linux/spinlock.h>
580 #include <linux/wait.h>
581 +#include <linux/workqueue.h>
582 #include <linux/sunrpc/xdr.h>
586 struct rpc_cred * rpc_cred; /* Credentials */
589 +struct rpc_wait_queue;
591 + struct list_head list; /* wait queue links */
592 + struct list_head links; /* Links to related tasks */
593 + wait_queue_head_t waitq; /* sync: sleep on this q */
594 + struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */
598 * This is the RPC task struct
601 - struct list_head tk_list; /* wait queue links */
603 unsigned long tk_magic; /* 0xf00baa */
606 struct rpc_clnt * tk_client; /* RPC client */
607 struct rpc_rqst * tk_rqstp; /* RPC request */
608 int tk_status; /* result of last operation */
609 - struct rpc_wait_queue * tk_rpcwait; /* RPC wait queue we're on */
614 * you have a pathological interest in kernel oopses.
616 struct timer_list tk_timer; /* kernel timer */
617 - wait_queue_head_t tk_wait; /* sync: sleep on this q */
618 unsigned long tk_timeout; /* timeout for rpc_sleep() */
619 unsigned short tk_flags; /* misc flags */
620 unsigned char tk_active : 1;/* Task has been activated */
621 unsigned char tk_priority : 2;/* Task priority */
622 unsigned long tk_runstate; /* Task run status */
623 - struct list_head tk_links; /* links to related tasks */
624 + struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could
628 + struct work_struct tk_work; /* Async task work queue */
629 + struct rpc_wait tk_wait; /* RPC wait */
632 unsigned short tk_pid; /* debugging aid */
635 /* support walking a list of tasks on a wait queue */
636 #define task_for_each(task, pos, head) \
637 list_for_each(pos, head) \
638 - if ((task=list_entry(pos, struct rpc_task, tk_list)),1)
639 + if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1)
641 #define task_for_first(task, head) \
642 if (!list_empty(head) && \
643 - ((task=list_entry((head)->next, struct rpc_task, tk_list)),1))
644 + ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
646 /* .. and walking list of all tasks */
647 #define alltask_for_each(task, pos, head) \
648 @@ -126,22 +139,39 @@
649 #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT)
650 #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
652 -#define RPC_TASK_SLEEPING 0
653 -#define RPC_TASK_RUNNING 1
654 -#define RPC_IS_SLEEPING(t) (test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
655 -#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
656 +#define RPC_TASK_RUNNING 0
657 +#define RPC_TASK_QUEUED 1
658 +#define RPC_TASK_WAKEUP 2
659 +#define RPC_TASK_HAS_TIMER 3
661 +#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
662 #define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
663 -#define rpc_clear_running(t) (clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
664 +#define rpc_test_and_set_running(t) \
665 + (test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
666 +#define rpc_clear_running(t) \
668 + smp_mb__before_clear_bit(); \
669 + clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \
670 + smp_mb__after_clear_bit(); \
673 -#define rpc_set_sleeping(t) (set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
674 +#define RPC_IS_QUEUED(t) (test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
675 +#define rpc_set_queued(t) (set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
676 +#define rpc_clear_queued(t) \
678 + smp_mb__before_clear_bit(); \
679 + clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \
680 + smp_mb__after_clear_bit(); \
683 -#define rpc_clear_sleeping(t) \
684 +#define rpc_start_wakeup(t) \
685 + (test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0)
686 +#define rpc_finish_wakeup(t) \
688 smp_mb__before_clear_bit(); \
689 - clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \
690 + clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \
691 smp_mb__after_clear_bit(); \
698 * RPC synchronization objects
700 struct rpc_wait_queue {
702 struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
703 unsigned long cookie; /* cookie of last task serviced */
704 unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */
708 # define RPC_WAITQ_INIT(var,qname) { \
709 + .lock = SPIN_LOCK_UNLOCKED, \
711 [0] = LIST_HEAD_INIT(var.tasks[0]), \
712 [1] = LIST_HEAD_INIT(var.tasks[1]), \
716 # define RPC_WAITQ_INIT(var,qname) { \
717 + .lock = SPIN_LOCK_UNLOCKED, \
719 [0] = LIST_HEAD_INIT(var.tasks[0]), \
720 [1] = LIST_HEAD_INIT(var.tasks[1]), \
721 @@ -209,13 +242,10 @@
722 int rpc_execute(struct rpc_task *);
723 void rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
725 -int rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
726 -void rpc_remove_wait_queue(struct rpc_task *);
727 void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
728 void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
729 void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
730 rpc_action action, rpc_action timer);
731 -void rpc_add_timer(struct rpc_task *, rpc_action);
732 void rpc_wake_up_task(struct rpc_task *);
733 void rpc_wake_up(struct rpc_wait_queue *);
734 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
735 Index: linux-2.6.10/include/linux/sunrpc/gss_krb5.h
736 ===================================================================
737 --- linux-2.6.10.orig/include/linux/sunrpc/gss_krb5.h 2004-12-25 05:34:57.000000000 +0800
738 +++ linux-2.6.10/include/linux/sunrpc/gss_krb5.h 2005-04-05 14:49:13.473681008 +0800
740 struct xdr_netobj mech_used;
743 +extern spinlock_t krb5_seq_lock;
745 #define KG_TOK_MIC_MSG 0x0101
746 #define KG_TOK_WRAP_MSG 0x0201
748 @@ -116,18 +118,25 @@
751 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
752 - struct xdr_netobj *cksum);
753 + int body_offset, struct xdr_netobj *cksum);
756 krb5_make_token(struct krb5_ctx *context_handle, int qop_req,
757 struct xdr_buf *input_message_buffer,
758 - struct xdr_netobj *output_message_buffer, int toktype);
759 + struct xdr_netobj *output_message_buffer);
762 krb5_read_token(struct krb5_ctx *context_handle,
763 struct xdr_netobj *input_token_buffer,
764 - struct xdr_buf *message_buffer,
765 - int *qop_state, int toktype);
766 + struct xdr_buf *message_buffer, int *qop_state);
769 +gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset,
770 + struct xdr_buf *outbuf, struct page **pages);
773 +gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset,
774 + struct xdr_buf *buf, int *out_offset);
777 krb5_encrypt(struct crypto_tfm * key,
779 krb5_decrypt(struct crypto_tfm * key,
780 void *iv, void *in, void *out, int length);
783 +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset,
784 + struct page **pages);
787 +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset);
790 krb5_make_seq_num(struct crypto_tfm * key,
792 Index: linux-2.6.10/include/linux/sunrpc/xdr.h
793 ===================================================================
794 --- linux-2.6.10.orig/include/linux/sunrpc/xdr.h 2004-12-25 05:35:40.000000000 +0800
795 +++ linux-2.6.10/include/linux/sunrpc/xdr.h 2005-04-05 14:49:13.467681920 +0800
797 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p);
798 extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
799 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
800 +extern void truncate_xdr_buf(struct xdr_buf *xdr, int len);
802 #endif /* __KERNEL__ */
804 Index: linux-2.6.10/include/linux/sunrpc/gss_api.h
805 ===================================================================
806 --- linux-2.6.10.orig/include/linux/sunrpc/gss_api.h 2004-12-25 05:35:28.000000000 +0800
807 +++ linux-2.6.10/include/linux/sunrpc/gss_api.h 2005-04-05 14:49:13.471681312 +0800
809 struct xdr_buf *message,
810 struct xdr_netobj *mic_token,
813 + struct gss_ctx *ctx_id,
816 + struct xdr_buf *outbuf,
817 + struct page **inpages);
819 + struct gss_ctx *ctx_id,
822 + struct xdr_buf *inbuf,
824 u32 gss_delete_sec_context(
825 struct gss_ctx **ctx_id);
828 struct xdr_buf *message,
829 struct xdr_netobj *mic_token,
832 + struct gss_ctx *ctx_id,
835 + struct xdr_buf *outbuf,
836 + struct page **inpages);
838 + struct gss_ctx *ctx_id,
841 + struct xdr_buf *buf,
843 void (*gss_delete_sec_context)(
844 void *internal_ctx_id);
846 Index: linux-2.6.10/include/linux/sunrpc/svcauth.h
847 ===================================================================
848 --- linux-2.6.10.orig/include/linux/sunrpc/svcauth.h 2004-12-25 05:34:31.000000000 +0800
849 +++ linux-2.6.10/include/linux/sunrpc/svcauth.h 2005-04-05 14:49:13.469681616 +0800
851 struct svc_rqst; /* forward decl */
853 /* Authentication is done in the context of a domain.
854 - * For a server, a domain represents a group of clients using
856 + * Currently, the nfs server uses the auth_domain to stand
857 + * for the "client" listed in /etc/exports.
859 + * More generally, a domain might represent a group of clients using
860 * a common mechanism for authentication and having a common mapping
861 * between local identity (uid) and network identity. All clients
862 * in a domain have similar general access rights. Each domain can
863 * contain multiple principals which will have different specific right
864 * based on normal Discretionary Access Control.
866 - * For a client, a domain represents a number of servers which all
867 - * use a common authentication mechanism and network identity name space.
869 * A domain is created by an authentication flavour module based on name
870 * only. Userspace then fills in detail on demand.
872 - * The creation of a domain typically implies creation of one or
873 - * more caches for storing domain specific information.
874 + * In the case of auth_unix and auth_null, the auth_domain is also
875 + * associated with entries in another cache representing the mapping
876 + * of ip addresses to the given client.
881 int (*accept)(struct svc_rqst *rq, u32 *authp);
882 int (*release)(struct svc_rqst *rq);
883 void (*domain_release)(struct auth_domain *);
884 + int (*set_client)(struct svc_rqst *rq);
887 #define SVC_GARBAGE 1
890 extern int svc_authenticate(struct svc_rqst *rqstp, u32 *authp);
891 extern int svc_authorise(struct svc_rqst *rqstp);
892 +extern int svc_set_client(struct svc_rqst *rqstp);
893 extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
894 extern void svc_auth_unregister(rpc_authflavor_t flavor);
896 Index: linux-2.6.10/include/linux/sunrpc/xprt.h
897 ===================================================================
898 --- linux-2.6.10.orig/include/linux/sunrpc/xprt.h 2004-12-25 05:35:23.000000000 +0800
899 +++ linux-2.6.10/include/linux/sunrpc/xprt.h 2005-04-05 14:49:13.471681312 +0800
901 int rq_cong; /* has incremented xprt->cong */
902 int rq_received; /* receive completed */
903 u32 rq_seqno; /* gss seq no. used on req. */
905 + int rq_enc_pages_num;
906 + struct page **rq_enc_pages; /* scratch pages for use by
907 + gss privacy code */
908 + void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
909 struct list_head rq_list;
911 struct xdr_buf rq_private_buf; /* The receive buffer
912 Index: linux-2.6.10/include/linux/nfs_xdr.h
913 ===================================================================
914 --- linux-2.6.10.orig/include/linux/nfs_xdr.h 2004-12-25 05:35:24.000000000 +0800
915 +++ linux-2.6.10/include/linux/nfs_xdr.h 2005-04-05 14:49:13.459683136 +0800
920 +struct nfs_setaclargs {
921 + struct nfs_fh * fh;
923 + unsigned int acl_pgbase;
924 + struct page ** acl_pages;
927 +struct nfs_getaclargs {
928 + struct nfs_fh * fh;
930 + unsigned int acl_pgbase;
931 + struct page ** acl_pages;
934 struct nfs_setattrres {
935 struct nfs_fattr * fattr;
936 const struct nfs_server * server;
938 int version; /* Protocol version */
939 struct dentry_operations *dentry_ops;
940 struct inode_operations *dir_inode_ops;
941 + struct inode_operations *file_inode_ops;
943 int (*getroot) (struct nfs_server *, struct nfs_fh *,
944 struct nfs_fsinfo *);
946 int (*read) (struct nfs_read_data *);
947 int (*write) (struct nfs_write_data *);
948 int (*commit) (struct nfs_write_data *);
949 - struct inode * (*create) (struct inode *, struct qstr *,
950 + struct inode * (*create) (struct inode *, struct dentry *,
951 struct iattr *, int);
952 int (*remove) (struct inode *, struct qstr *);
953 int (*unlink_setup) (struct rpc_message *,
954 Index: linux-2.6.10/net/sunrpc/xprt.c
955 ===================================================================
956 --- linux-2.6.10.orig/net/sunrpc/xprt.c 2004-12-25 05:35:14.000000000 +0800
957 +++ linux-2.6.10/net/sunrpc/xprt.c 2005-04-05 14:49:13.393693168 +0800
959 xprt->tcp_flags &= ~XPRT_COPY_XID;
960 xprt->tcp_flags |= XPRT_COPY_DATA;
961 xprt->tcp_copied = 4;
962 - dprintk("RPC: reading reply for XID %08x\n", xprt->tcp_xid);
963 + dprintk("RPC: reading reply for XID %08x\n",
964 + ntohl(xprt->tcp_xid));
965 tcp_check_recm(xprt);
970 xprt->tcp_flags &= ~XPRT_COPY_DATA;
971 dprintk("RPC: XID %08x request not found!\n",
973 + ntohl(xprt->tcp_xid));
974 spin_unlock(&xprt->sock_lock);
977 @@ -1101,7 +1102,7 @@
980 spin_lock_bh(&xprt->sock_lock);
981 - if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
982 + if (xprt->snd_task)
983 rpc_wake_up_task(xprt->snd_task);
984 spin_unlock_bh(&xprt->sock_lock);
986 @@ -1359,8 +1360,9 @@
989 req->rq_xid = xprt_alloc_xid(xprt);
990 + req->rq_release_snd_buf = NULL;
991 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
993 + req, ntohl(req->rq_xid));
997 @@ -1384,6 +1386,8 @@
998 mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT);
999 spin_unlock_bh(&xprt->sock_lock);
1000 task->tk_rqstp = NULL;
1001 + if (req->rq_release_snd_buf)
1002 + req->rq_release_snd_buf(req);
1003 memset(req, 0, sizeof(*req)); /* mark unused */
1005 dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
1006 Index: linux-2.6.10/net/sunrpc/auth.c
1007 ===================================================================
1008 --- linux-2.6.10.orig/net/sunrpc/auth.c 2004-12-25 05:34:57.000000000 +0800
1009 +++ linux-2.6.10/net/sunrpc/auth.c 2005-04-05 14:49:13.394693016 +0800
1011 list_for_each_safe(pos, next, &auth->au_credcache[nr]) {
1012 struct rpc_cred *entry;
1013 entry = list_entry(pos, struct rpc_cred, cr_hash);
1014 - if (entry->cr_flags & RPCAUTH_CRED_DEAD)
1016 if (rpcauth_prune_expired(entry, &free))
1018 if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
1020 if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
1023 - if ((cred->cr_flags & RPCAUTH_CRED_DEAD) && !list_empty(&cred->cr_hash))
1024 - list_del_init(&cred->cr_hash);
1026 if (list_empty(&cred->cr_hash)) {
1027 spin_unlock(&rpc_credcache_lock);
1028 rpcauth_crdestroy(cred);
1029 @@ -413,10 +408,3 @@
1030 return !(task->tk_msg.rpc_cred) ||
1031 (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
1035 -rpcauth_deadcred(struct rpc_task *task)
1037 - return !(task->tk_msg.rpc_cred) ||
1038 - (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_DEAD);
1040 Index: linux-2.6.10/net/sunrpc/svcauth_unix.c
1041 ===================================================================
1042 --- linux-2.6.10.orig/net/sunrpc/svcauth_unix.c 2004-12-25 05:35:00.000000000 +0800
1043 +++ linux-2.6.10/net/sunrpc/svcauth_unix.c 2005-04-05 14:49:13.395692864 +0800
1046 static struct cache_head *ip_table[IP_HASHMAX];
1048 -void ip_map_put(struct cache_head *item, struct cache_detail *cd)
1049 +static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
1051 struct ip_map *im = container_of(item, struct ip_map,h);
1052 if (cache_put(item, cd)) {
1054 .cache_show = ip_map_show,
1057 -static DefineSimpleCacheLookup(ip_map, 0)
1058 +static DefineSimpleCacheLookup(ip_map)
1061 int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
1062 @@ -329,14 +329,49 @@
1063 cache_purge(&auth_domain_cache);
1067 +svcauth_unix_set_client(struct svc_rqst *rqstp)
1069 + struct ip_map key, *ipm;
1071 + rqstp->rq_client = NULL;
1072 + if (rqstp->rq_proc == 0)
1075 + strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
1076 + key.m_addr = rqstp->rq_addr.sin_addr;
1078 + ipm = ip_map_lookup(&key, 0);
1081 + return SVC_DENIED;
1083 + switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
1087 + return SVC_DENIED;
1089 + rqstp->rq_client = &ipm->m_client->h;
1090 + cache_get(&rqstp->rq_client->h);
1091 + ip_map_put(&ipm->h, &ip_map_cache);
1096 + /* shut up gcc: */
1101 svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
1103 struct kvec *argv = &rqstp->rq_arg.head[0];
1104 struct kvec *resv = &rqstp->rq_res.head[0];
1106 - struct ip_map key, *ipm;
1107 + struct svc_cred *cred = &rqstp->rq_cred;
1109 + cred->cr_group_info = NULL;
1110 + rqstp->rq_client = NULL;
1112 if (argv->iov_len < 3*4)
1114 @@ -353,45 +388,17 @@
1117 /* Signal that mapping to nobody uid/gid is required */
1118 - rqstp->rq_cred.cr_uid = (uid_t) -1;
1119 - rqstp->rq_cred.cr_gid = (gid_t) -1;
1120 - rqstp->rq_cred.cr_group_info = groups_alloc(0);
1121 - if (rqstp->rq_cred.cr_group_info == NULL)
1122 + cred->cr_uid = (uid_t) -1;
1123 + cred->cr_gid = (gid_t) -1;
1124 + cred->cr_group_info = groups_alloc(0);
1125 + if (cred->cr_group_info == NULL)
1126 return SVC_DROP; /* kmalloc failure - client must retry */
1128 /* Put NULL verifier */
1129 svc_putu32(resv, RPC_AUTH_NULL);
1130 svc_putu32(resv, 0);
1132 - strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
1133 - key.m_addr = rqstp->rq_addr.sin_addr;
1135 - ipm = ip_map_lookup(&key, 0);
1137 - rqstp->rq_client = NULL;
1140 - switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
1145 - rv = SVC_OK; /* rq_client is NULL */
1148 - rqstp->rq_client = &ipm->m_client->h;
1149 - cache_get(&rqstp->rq_client->h);
1150 - ip_map_put(&ipm->h, &ip_map_cache);
1155 - else rv = SVC_DROP;
1157 - if (rqstp->rq_client == NULL && rqstp->rq_proc != 0)
1158 - *authp = rpc_autherr_badcred;
1166 .flavour = RPC_AUTH_NULL,
1167 .accept = svcauth_null_accept,
1168 .release = svcauth_null_release,
1169 + .set_client = svcauth_unix_set_client,
1174 struct svc_cred *cred = &rqstp->rq_cred;
1176 int len = argv->iov_len;
1178 - struct ip_map key, *ipm;
1180 cred->cr_group_info = NULL;
1181 rqstp->rq_client = NULL;
1182 @@ -458,39 +464,11 @@
1187 - strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
1188 - key.m_addr = rqstp->rq_addr.sin_addr;
1191 - ipm = ip_map_lookup(&key, 0);
1194 - switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
1199 - rv = SVC_OK; /* rq_client is NULL */
1202 - rqstp->rq_client = &ipm->m_client->h;
1203 - cache_get(&rqstp->rq_client->h);
1204 - ip_map_put(&ipm->h, &ip_map_cache);
1209 - else rv = SVC_DROP;
1211 - if (rv == SVC_OK && rqstp->rq_client == NULL && rqstp->rq_proc != 0)
1214 /* Put NULL verifier */
1215 svc_putu32(resv, RPC_AUTH_NULL);
1216 svc_putu32(resv, 0);
1222 *authp = rpc_autherr_badcred;
1224 .accept = svcauth_unix_accept,
1225 .release = svcauth_unix_release,
1226 .domain_release = svcauth_unix_domain_release,
1227 + .set_client = svcauth_unix_set_client,
1230 Index: linux-2.6.10/net/sunrpc/clnt.c
1231 ===================================================================
1232 --- linux-2.6.10.orig/net/sunrpc/clnt.c 2005-03-31 15:35:26.000000000 +0800
1233 +++ linux-2.6.10/net/sunrpc/clnt.c 2005-04-05 14:49:13.410690584 +0800
1234 @@ -636,8 +636,14 @@
1235 rpc_exit(task, -EIO);
1238 - if (encode && (status = rpcauth_wrap_req(task, encode, req, p,
1239 - task->tk_msg.rpc_argp)) < 0) {
1240 + if (encode == NULL)
1243 + status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp);
1244 + if (status == -EAGAIN) {
1245 + printk("XXXJBF: out of memory? Should retry here!!!\n");
1248 printk(KERN_WARNING "%s: can't encode arguments: %d\n",
1249 clnt->cl_protname, -status);
1250 rpc_exit(task, status);
1252 task->tk_action = call_reserve;
1253 if (status >= 0 && rpcauth_uptodatecred(task))
1255 - if (rpcauth_deadcred(task)) {
1256 + if (status == -EACCES) {
1257 rpc_exit(task, -EACCES);
1262 if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) {
1263 printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n);
1264 - } else if (--len < 0)
1265 + } else if (--len == 0)
1266 switch ((n = ntohl(*p++))) {
1267 case RPC_AUTH_REJECTEDCRED:
1268 case RPC_AUTH_REJECTEDVERF:
1269 Index: linux-2.6.10/net/sunrpc/svcauth.c
1270 ===================================================================
1271 --- linux-2.6.10.orig/net/sunrpc/svcauth.c 2004-12-25 05:35:23.000000000 +0800
1272 +++ linux-2.6.10/net/sunrpc/svcauth.c 2005-04-05 14:49:13.392693320 +0800
1274 return aops->accept(rqstp, authp);
1277 +int svc_set_client(struct svc_rqst *rqstp)
1279 + return rqstp->rq_authop->set_client(rqstp);
1282 /* A request, which was authenticated, has now executed.
1283 * Time to finalise the the credentials and verifier
1284 * and release and resources
1285 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_unseal.c
1286 ===================================================================
1287 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_unseal.c 2004-12-25 05:35:24.000000000 +0800
1288 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_unseal.c 2005-04-05 14:49:13.401691952 +0800
1293 -/* message_buffer is an input if toktype is MIC and an output if it is WRAP:
1294 - * If toktype is MIC: read_token is a mic token, and message_buffer is the
1295 - * data that the mic was supposedly taken over.
1296 - * If toktype is WRAP: read_token is a wrap token, and message_buffer is used
1297 - * to return the decrypted data.
1299 +/* read_token is a mic token, and message_buffer is the data that the mic was
1300 + * supposedly taken over. */
1302 -/* XXX will need to change prototype and/or just split into a separate function
1303 - * when we add privacy (because read_token will be in pages too). */
1305 krb5_read_token(struct krb5_ctx *ctx,
1306 struct xdr_netobj *read_token,
1307 - struct xdr_buf *message_buffer,
1308 - int *qop_state, int toktype)
1309 + struct xdr_buf *message_buffer, int *qop_state)
1313 @@ -100,16 +93,12 @@
1317 - if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff)))
1318 + if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
1319 + (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) )
1322 /* XXX sanity-check bodysize?? */
1324 - if (toktype == KG_TOK_WRAP_MSG) {
1329 /* get the sign and seal algorithms */
1331 signalg = ptr[0] + (ptr[1] << 8);
1332 @@ -120,14 +109,7 @@
1333 if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
1336 - if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) ||
1337 - ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff)))
1340 - /* in the current spec, there is only one valid seal algorithm per
1341 - key type, so a simple comparison is ok */
1343 - if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg))
1344 + if (sealalg != 0xffff)
1347 /* there are several mappings of seal algorithms to sign algorithms,
1350 case SGN_ALG_DES_MAC_MD5:
1351 ret = make_checksum(checksum_type, ptr - 2, 8,
1352 - message_buffer, &md5cksum);
1353 + message_buffer, 0, &md5cksum);
1357 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_mech_switch.c
1358 ===================================================================
1359 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_mech_switch.c 2004-12-25 05:35:01.000000000 +0800
1360 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_mech_switch.c 2005-04-05 14:49:13.408690888 +0800
1361 @@ -279,6 +279,29 @@
1366 +gss_wrap(struct gss_ctx *ctx_id,
1369 + struct xdr_buf *buf,
1370 + struct page **inpages)
1372 + return ctx_id->mech_type->gm_ops
1373 + ->gss_wrap(ctx_id, qop, offset, buf, inpages);
1377 +gss_unwrap(struct gss_ctx *ctx_id,
1380 + struct xdr_buf *buf,
1383 + return ctx_id->mech_type->gm_ops
1384 + ->gss_unwrap(ctx_id, qop, offset, buf, out_offset);
1388 /* gss_delete_sec_context: free all resources associated with context_handle.
1389 * Note this differs from the RFC 2744-specified prototype in that we don't
1390 * bother returning an output token, since it would never be used anyway. */
1391 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_wrap.c
1392 ===================================================================
1393 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_wrap.c 2005-04-05 19:01:49.158500672 +0800
1394 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_wrap.c 2005-04-05 14:49:13.397692560 +0800
1396 +#include <linux/types.h>
1397 +#include <linux/slab.h>
1398 +#include <linux/jiffies.h>
1399 +#include <linux/sunrpc/gss_krb5.h>
1400 +#include <linux/random.h>
1401 +#include <linux/pagemap.h>
1402 +#include <asm/scatterlist.h>
1403 +#include <linux/crypto.h>
1406 +# define RPCDBG_FACILITY RPCDBG_AUTH
1410 +gss_krb5_padding(int blocksize, int length)
1412 + /* Most of the code is block-size independent but currently we
1414 + BUG_ON(blocksize != 8);
1415 + return 8 - (length & 7);
1419 +gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
1421 + int padding = gss_krb5_padding(blocksize, buf->len - offset);
1425 + if (buf->page_len || buf->tail[0].iov_len)
1426 + iov = &buf->tail[0];
1428 + iov = &buf->head[0];
1429 + p = iov->iov_base + iov->iov_len;
1430 + iov->iov_len += padding;
1431 + buf->len += padding;
1432 + memset(p, padding, padding);
1436 +gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
1440 + int len = buf->len;
1442 + if (len <= buf->head[0].iov_len) {
1443 + pad = *(u8 *)(buf->head[0].iov_base + len - 1);
1446 + len -= buf->head[0].iov_len;
1447 + if (len <= buf->page_len) {
1448 + int last = (buf->page_base + len - 1)
1449 + >>PAGE_CACHE_SHIFT;
1450 + int offset = (buf->page_base + len - 1)
1451 + & (PAGE_CACHE_SIZE - 1);
1452 + ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA);
1453 + pad = *(ptr + offset);
1454 + kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA);
1457 + len -= buf->page_len;
1458 + BUG_ON(len > buf->tail[0].iov_len);
1459 + pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
1461 + if (pad > blocksize)
1468 +make_confounder(char *p, int blocksize)
1470 + /* XXX? Is this OK to do on every packet? */
1471 + get_random_bytes(p, blocksize);
1474 +/* Assumptions: the head and tail of inbuf are ours to play with.
1475 + * The pages, however, may be real pages in the page cache and we replace
1476 + * them with scratch pages from **pages before writing to them. */
1477 +/* XXX: obviously the above should be documentation of wrap interface,
1478 + * and shouldn't be in this kerberos-specific file. */
1480 +/* XXX factor out common code with seal/unseal. */
1483 +gss_wrap_kerberos(struct gss_ctx *ctx, u32 qop, int offset,
1484 + struct xdr_buf *buf, struct page **pages)
1486 + struct krb5_ctx *kctx = ctx->internal_ctx_id;
1487 + s32 checksum_type;
1488 + struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
1489 + int blocksize = 0, plainlen;
1490 + unsigned char *ptr, *krb5_hdr, *msg_start;
1493 + struct page **tmp_pages;
1496 + dprintk("RPC: gss_wrap_kerberos\n");
1498 + now = get_seconds();
1503 + switch (kctx->signalg) {
1504 + case SGN_ALG_DES_MAC_MD5:
1505 + checksum_type = CKSUMTYPE_RSA_MD5;
1508 + dprintk("RPC: gss_krb5_seal: kctx->signalg %d not"
1509 + " supported\n", kctx->signalg);
1512 + if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) {
1513 + dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n",
1518 + blocksize = crypto_tfm_alg_blocksize(kctx->enc);
1519 + gss_krb5_add_padding(buf, offset, blocksize);
1520 + BUG_ON((buf->len - offset) % blocksize);
1521 + plainlen = blocksize + buf->len - offset;
1523 + headlen = g_token_size(&kctx->mech_used, 22 + plainlen) -
1524 + (buf->len - offset);
1526 + ptr = buf->head[0].iov_base + offset;
1527 + /* shift data to make room for header. */
1528 + /* XXX Would be cleverer to encrypt while copying. */
1529 + /* XXX bounds checking, slack, etc. */
1530 + memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
1531 + buf->head[0].iov_len += headlen;
1532 + buf->len += headlen;
1533 + BUG_ON((buf->len - offset - headlen) % blocksize);
1535 + g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr);
1538 + *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff);
1539 + *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff);
1541 + /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
1542 + krb5_hdr = ptr - 2;
1543 + msg_start = krb5_hdr + 24;
1544 + /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
1546 + *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg);
1547 + memset(krb5_hdr + 4, 0xff, 4);
1548 + *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
1550 + make_confounder(msg_start, blocksize);
1552 + /* XXXJBF: UGH!: */
1553 + tmp_pages = buf->pages;
1554 + buf->pages = pages;
1555 + if (make_checksum(checksum_type, krb5_hdr, 8, buf,
1556 + offset + headlen - blocksize, &md5cksum))
1558 + buf->pages = tmp_pages;
1560 + switch (kctx->signalg) {
1561 + case SGN_ALG_DES_MAC_MD5:
1562 + if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
1563 + md5cksum.data, md5cksum.len))
1565 + memcpy(krb5_hdr + 16,
1566 + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
1567 + KRB5_CKSUM_LENGTH);
1569 + dprintk("RPC: make_seal_token: cksum data: \n");
1570 + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
1576 + kfree(md5cksum.data);
1578 + spin_lock(&krb5_seq_lock);
1579 + seq_send = kctx->seq_send++;
1580 + spin_unlock(&krb5_seq_lock);
1582 + /* XXX would probably be more efficient to compute checksum
1583 + * and encrypt at the same time: */
1584 + if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
1585 + seq_send, krb5_hdr + 16, krb5_hdr + 8)))
1588 + if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
1592 + return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
1594 + if (md5cksum.data) kfree(md5cksum.data);
1595 + return GSS_S_FAILURE;
1599 +gss_unwrap_kerberos(struct gss_ctx *ctx, u32 *qop, int offset,
1600 + struct xdr_buf *buf, int *out_offset)
1602 + struct krb5_ctx *kctx = ctx->internal_ctx_id;
1605 + s32 checksum_type;
1606 + struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
1610 + unsigned char *ptr;
1612 + u32 ret = GSS_S_DEFECTIVE_TOKEN;
1616 + dprintk("RPC: gss_unwrap_kerberos\n");
1618 + ptr = (u8 *)buf->head[0].iov_base + offset;
1619 + if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
1620 + buf->len - offset))
1623 + if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
1624 + (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) )
1627 + /* XXX sanity-check bodysize?? */
1629 + /* get the sign and seal algorithms */
1631 + signalg = ptr[0] + (ptr[1] << 8);
1632 + sealalg = ptr[2] + (ptr[3] << 8);
1634 + /* Sanity checks */
1636 + if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
1639 + if (sealalg == 0xffff)
1642 + /* in the current spec, there is only one valid seal algorithm per
1643 + key type, so a simple comparison is ok */
1645 + if (sealalg != kctx->sealalg)
1648 + /* there are several mappings of seal algorithms to sign algorithms,
1649 + but few enough that we can try them all. */
1651 + if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
1652 + (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
1653 + (kctx->sealalg == SEAL_ALG_DES3KD &&
1654 + signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
1657 + if (gss_decrypt_xdr_buf(kctx->enc, buf,
1658 + ptr + 22 - (unsigned char *)buf->head[0].iov_base))
1661 + /* compute the checksum of the message */
1663 + /* initialize the the cksum */
1664 + switch (signalg) {
1665 + case SGN_ALG_DES_MAC_MD5:
1666 + checksum_type = CKSUMTYPE_RSA_MD5;
1669 + ret = GSS_S_DEFECTIVE_TOKEN;
1673 + switch (signalg) {
1674 + case SGN_ALG_DES_MAC_MD5:
1675 + ret = make_checksum(checksum_type, ptr - 2, 8, buf,
1676 + ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum);
1680 + ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data,
1681 + md5cksum.data, md5cksum.len);
1685 + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
1686 + ret = GSS_S_BAD_SIG;
1691 + ret = GSS_S_DEFECTIVE_TOKEN;
1695 + /* it got through unscathed. Make sure the context is unexpired */
1698 + *qop = GSS_C_QOP_DEFAULT;
1700 + now = get_seconds();
1702 + ret = GSS_S_CONTEXT_EXPIRED;
1703 + if (now > kctx->endtime)
1706 + /* do sequencing checks */
1708 + ret = GSS_S_BAD_SIG;
1709 + if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
1713 + if ((kctx->initiate && direction != 0xff) ||
1714 + (!kctx->initiate && direction != 0))
1717 + /* Copy the data back to the right position. XXX: Would probably be
1718 + * better to copy and encrypt at the same time. */
1720 + blocksize = crypto_tfm_alg_blocksize(kctx->enc);
1721 + data_start = ptr + 22 + blocksize;
1722 + *out_offset = data_start - (u8 *)buf->head[0].iov_base;
1724 + ret = GSS_S_DEFECTIVE_TOKEN;
1725 + if (gss_krb5_remove_padding(buf, blocksize))
1728 + ret = GSS_S_COMPLETE;
1730 + if (md5cksum.data) kfree(md5cksum.data);
1733 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_crypto.c
1734 ===================================================================
1735 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_crypto.c 2004-12-25 05:33:50.000000000 +0800
1736 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_crypto.c 2005-04-05 14:49:13.398692408 +0800
1737 @@ -139,17 +139,91 @@
1742 +process_xdr_buf(struct xdr_buf *buf, int offset, int len,
1743 + int (*actor)(struct scatterlist *, void *), void *data)
1745 + int i, page_len, thislen, page_offset, ret = 0;
1746 + struct scatterlist sg[1];
1748 + if (offset >= buf->head[0].iov_len) {
1749 + offset -= buf->head[0].iov_len;
1751 + thislen = buf->head[0].iov_len - offset;
1752 + if (thislen > len)
1754 + buf_to_sg(sg, buf->head[0].iov_base + offset, thislen);
1755 + ret = actor(sg, data);
1764 + if (offset >= buf->page_len) {
1765 + offset -= buf->page_len;
1767 + page_len = buf->page_len - offset;
1768 + if (page_len > len)
1771 + page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
1772 + i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
1773 + thislen = PAGE_CACHE_SIZE - page_offset;
1775 + if (thislen > page_len)
1776 + thislen = page_len;
1777 + sg->page = buf->pages[i];
1778 + sg->offset = page_offset;
1779 + sg->length = thislen;
1780 + ret = actor(sg, data);
1783 + page_len -= thislen;
1786 + thislen = PAGE_CACHE_SIZE;
1787 + } while (page_len != 0);
1793 + if (offset < buf->tail[0].iov_len) {
1794 + thislen = buf->tail[0].iov_len - offset;
1795 + if (thislen > len)
1797 + buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen);
1798 + ret = actor(sg, data);
1808 +checksummer(struct scatterlist *sg, void *data)
1810 + struct crypto_tfm *tfm = (struct crypto_tfm *)data;
1812 + crypto_digest_update(tfm, sg, 1);
1817 /* checksum the plaintext data and hdrlen bytes of the token header */
1819 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
1820 - struct xdr_netobj *cksum)
1821 + int body_offset, struct xdr_netobj *cksum)
1824 struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */
1825 struct scatterlist sg[1];
1826 u32 code = GSS_S_FAILURE;
1827 - int len, thislen, offset;
1830 switch (cksumtype) {
1831 case CKSUMTYPE_RSA_MD5:
1832 @@ -169,35 +243,8 @@
1833 crypto_digest_init(tfm);
1834 buf_to_sg(sg, header, hdrlen);
1835 crypto_digest_update(tfm, sg, 1);
1836 - if (body->head[0].iov_len) {
1837 - buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len);
1838 - crypto_digest_update(tfm, sg, 1);
1841 - len = body->page_len;
1843 - offset = body->page_base & (PAGE_CACHE_SIZE - 1);
1844 - i = body->page_base >> PAGE_CACHE_SHIFT;
1845 - thislen = PAGE_CACHE_SIZE - offset;
1847 - if (thislen > len)
1849 - sg->page = body->pages[i];
1850 - sg->offset = offset;
1851 - sg->length = thislen;
1852 - kmap(sg->page); /* XXX kmap_atomic? */
1853 - crypto_digest_update(tfm, sg, 1);
1858 - thislen = PAGE_CACHE_SIZE;
1859 - } while(len != 0);
1861 - if (body->tail[0].iov_len) {
1862 - buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len);
1863 - crypto_digest_update(tfm, sg, 1);
1865 + process_xdr_buf(body, body_offset, body->len - body_offset,
1866 + checksummer, tfm);
1867 crypto_digest_final(tfm, cksum->data);
1870 @@ -207,3 +254,154 @@
1873 EXPORT_SYMBOL(make_checksum);
1875 +struct encryptor_desc {
1876 + u8 iv[8]; /* XXX hard-coded blocksize */
1877 + struct crypto_tfm *tfm;
1879 + struct xdr_buf *outbuf;
1880 + struct page **pages;
1881 + struct scatterlist infrags[4];
1882 + struct scatterlist outfrags[4];
1888 +encryptor(struct scatterlist *sg, void *data)
1890 + struct encryptor_desc *desc = data;
1891 + struct xdr_buf *outbuf = desc->outbuf;
1892 + struct page *in_page;
1893 + int thislen = desc->fraglen + sg->length;
1897 + /* Worst case is 4 fragments: head, end of page 1, start
1898 + * of page 2, tail. Anything more is a bug. */
1899 + BUG_ON(desc->fragno > 3);
1900 + desc->infrags[desc->fragno] = *sg;
1901 + desc->outfrags[desc->fragno] = *sg;
1903 + page_pos = desc->pos - outbuf->head[0].iov_len;
1904 + if (page_pos >= 0 && page_pos < outbuf->page_len) {
1905 + /* pages are not in place: */
1906 + int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT;
1907 + in_page = desc->pages[i];
1909 + in_page = sg->page;
1911 + desc->infrags[desc->fragno].page = in_page;
1913 + desc->fraglen += sg->length;
1914 + desc->pos += sg->length;
1916 + fraglen = thislen & 7; /* XXX hardcoded blocksize */
1917 + thislen -= fraglen;
1922 + ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags,
1923 + thislen, desc->iv);
1927 + desc->outfrags[0].page = sg->page;
1928 + desc->outfrags[0].offset = sg->offset + sg->length - fraglen;
1929 + desc->outfrags[0].length = fraglen;
1930 + desc->infrags[0] = desc->outfrags[0];
1931 + desc->infrags[0].page = in_page;
1933 + desc->fraglen = fraglen;
1936 + desc->fraglen = 0;
1942 +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset,
1943 + struct page **pages)
1946 + struct encryptor_desc desc;
1948 + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
1950 + memset(desc.iv, 0, sizeof(desc.iv));
1952 + desc.pos = offset;
1953 + desc.outbuf = buf;
1954 + desc.pages = pages;
1958 + ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc);
1962 +EXPORT_SYMBOL(gss_encrypt_xdr_buf);
1964 +struct decryptor_desc {
1965 + u8 iv[8]; /* XXX hard-coded blocksize */
1966 + struct crypto_tfm *tfm;
1967 + struct scatterlist frags[4];
1973 +decryptor(struct scatterlist *sg, void *data)
1975 + struct decryptor_desc *desc = data;
1976 + int thislen = desc->fraglen + sg->length;
1979 + /* Worst case is 4 fragments: head, end of page 1, start
1980 + * of page 2, tail. Anything more is a bug. */
1981 + BUG_ON(desc->fragno > 3);
1982 + desc->frags[desc->fragno] = *sg;
1984 + desc->fraglen += sg->length;
1986 + fraglen = thislen & 7; /* XXX hardcoded blocksize */
1987 + thislen -= fraglen;
1992 + ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags,
1993 + thislen, desc->iv);
1997 + desc->frags[0].page = sg->page;
1998 + desc->frags[0].offset = sg->offset + sg->length - fraglen;
1999 + desc->frags[0].length = fraglen;
2001 + desc->fraglen = fraglen;
2004 + desc->fraglen = 0;
2010 +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset)
2012 + struct decryptor_desc desc;
2015 + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0);
2017 + memset(desc.iv, 0, sizeof(desc.iv));
2021 + return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
2024 +EXPORT_SYMBOL(gss_decrypt_xdr_buf);
2025 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_seal.c
2026 ===================================================================
2027 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_seal.c 2004-12-25 05:33:47.000000000 +0800
2028 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_seal.c 2005-04-05 14:49:13.402691800 +0800
2030 # define RPCDBG_FACILITY RPCDBG_AUTH
2034 -gss_krb5_padding(int blocksize, int length) {
2035 - /* Most of the code is block-size independent but in practice we
2037 - BUG_ON(blocksize != 8);
2038 - return 8 - (length & 7);
2040 +spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
2043 krb5_make_token(struct krb5_ctx *ctx, int qop_req,
2044 - struct xdr_buf *text, struct xdr_netobj *token,
2046 + struct xdr_buf *text, struct xdr_netobj *token)
2049 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
2050 - int blocksize = 0, tmsglen;
2051 unsigned char *ptr, *krb5_hdr, *msg_start;
2055 dprintk("RPC: gss_krb5_seal\n");
2057 @@ -111,21 +104,13 @@
2061 - if (toktype == KG_TOK_WRAP_MSG) {
2062 - blocksize = crypto_tfm_alg_blocksize(ctx->enc);
2063 - tmsglen = blocksize + text->len
2064 - + gss_krb5_padding(blocksize, blocksize + text->len);
2069 - token->len = g_token_size(&ctx->mech_used, 22 + tmsglen);
2070 + token->len = g_token_size(&ctx->mech_used, 22);
2073 - g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr);
2074 + g_make_token_header(&ctx->mech_used, 22, &ptr);
2076 - *ptr++ = (unsigned char) ((toktype>>8)&0xff);
2077 - *ptr++ = (unsigned char) (toktype&0xff);
2078 + *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff);
2079 + *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff);
2081 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
2083 @@ -133,17 +118,9 @@
2085 *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
2086 memset(krb5_hdr + 4, 0xff, 4);
2087 - if (toktype == KG_TOK_WRAP_MSG)
2088 - *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg);
2090 - if (toktype == KG_TOK_WRAP_MSG) {
2091 - /* XXX removing support for now */
2093 - } else { /* Sign only. */
2094 - if (make_checksum(checksum_type, krb5_hdr, 8, text,
2096 + if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum))
2100 switch (ctx->signalg) {
2101 case SGN_ALG_DES_MAC_MD5:
2102 @@ -163,12 +140,14 @@
2104 kfree(md5cksum.data);
2106 + spin_lock(&krb5_seq_lock);
2107 + seq_send = ctx->seq_send++;
2108 + spin_unlock(&krb5_seq_lock);
2110 if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
2111 - ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
2112 + seq_send, krb5_hdr + 16, krb5_hdr + 8)))
2117 return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
2119 if (md5cksum.data) kfree(md5cksum.data);
2120 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_pseudoflavors.c
2121 ===================================================================
2122 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_pseudoflavors.c 2004-12-25 05:34:45.000000000 +0800
2123 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_pseudoflavors.c 2005-04-05 19:01:49.158500672 +0800
2126 - * linux/net/sunrpc/gss_union.c
2128 - * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic code
2130 - * Copyright (c) 2001 The Regents of the University of Michigan.
2131 - * All rights reserved.
2133 - * Andy Adamson <andros@umich.edu>
2138 - * Copyright 1993 by OpenVision Technologies, Inc.
2140 - * Permission to use, copy, modify, distribute, and sell this software
2141 - * and its documentation for any purpose is hereby granted without fee,
2142 - * provided that the above copyright notice appears in all copies and
2143 - * that both that copyright notice and this permission notice appear in
2144 - * supporting documentation, and that the name of OpenVision not be used
2145 - * in advertising or publicity pertaining to distribution of the software
2146 - * without specific, written prior permission. OpenVision makes no
2147 - * representations about the suitability of this software for any
2148 - * purpose. It is provided "as is" without express or implied warranty.
2150 - * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
2151 - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
2152 - * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
2153 - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
2154 - * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
2155 - * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
2156 - * PERFORMANCE OF THIS SOFTWARE.
2159 -#include <linux/types.h>
2160 -#include <linux/slab.h>
2161 -#include <linux/socket.h>
2162 -#include <linux/sunrpc/gss_asn1.h>
2163 -#include <linux/sunrpc/auth_gss.h>
2166 -# define RPCDBG_FACILITY RPCDBG_AUTH
2169 -static LIST_HEAD(registered_triples);
2170 -static spinlock_t registered_triples_lock = SPIN_LOCK_UNLOCKED;
2172 -/* The following must be called with spinlock held: */
2173 -static struct sup_sec_triple *
2174 -do_lookup_triple_by_pseudoflavor(u32 pseudoflavor)
2176 - struct sup_sec_triple *pos, *triple = NULL;
2178 - list_for_each_entry(pos, ®istered_triples, triples) {
2179 - if (pos->pseudoflavor == pseudoflavor) {
2187 -/* XXX Need to think about reference counting of triples and of mechs.
2188 - * Currently we do no reference counting of triples, and I think that's
2189 - * probably OK given the reference counting on mechs, but there's probably
2190 - * a better way to do all this. */
2193 -gss_register_triple(u32 pseudoflavor, struct gss_api_mech *mech,
2194 - u32 qop, u32 service)
2196 - struct sup_sec_triple *triple;
2198 - if (!(triple = kmalloc(sizeof(*triple), GFP_KERNEL))) {
2199 - printk("Alloc failed in gss_register_triple");
2202 - triple->pseudoflavor = pseudoflavor;
2203 - triple->mech = gss_mech_get_by_OID(&mech->gm_oid);
2204 - triple->qop = qop;
2205 - triple->service = service;
2207 - spin_lock(®istered_triples_lock);
2208 - if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) {
2209 - printk(KERN_WARNING "RPC: Registered pseudoflavor %d again\n",
2213 - list_add(&triple->triples, ®istered_triples);
2214 - spin_unlock(®istered_triples_lock);
2215 - dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor);
2221 - spin_unlock(®istered_triples_lock);
2227 -gss_unregister_triple(u32 pseudoflavor)
2229 - struct sup_sec_triple *triple;
2231 - spin_lock(®istered_triples_lock);
2232 - if (!(triple = do_lookup_triple_by_pseudoflavor(pseudoflavor))) {
2233 - spin_unlock(®istered_triples_lock);
2234 - printk("Can't unregister unregistered pseudoflavor %d\n",
2238 - list_del(&triple->triples);
2239 - spin_unlock(®istered_triples_lock);
2240 - gss_mech_put(triple->mech);
2247 -print_sec_triple(struct xdr_netobj *oid,u32 qop,u32 service)
2249 - dprintk("RPC: print_sec_triple:\n");
2250 - dprintk(" oid_len %d\n oid :\n",oid->len);
2251 - print_hexl((u32 *)oid->data,oid->len,0);
2252 - dprintk(" qop %d\n",qop);
2253 - dprintk(" service %d\n",service);
2256 -/* Function: gss_get_cmp_triples
2258 - * Description: search sec_triples for a matching security triple
2259 - * return pseudoflavor if match, else 0
2260 - * (Note that 0 is a valid pseudoflavor, but not for any gss pseudoflavor
2261 - * (0 means auth_null), so this shouldn't cause confusion.)
2264 -gss_cmp_triples(u32 oid_len, char *oid_data, u32 qop, u32 service)
2266 - struct sup_sec_triple *triple;
2267 - u32 pseudoflavor = 0;
2268 - struct xdr_netobj oid;
2270 - oid.len = oid_len;
2271 - oid.data = oid_data;
2273 - dprintk("RPC: gss_cmp_triples\n");
2274 - print_sec_triple(&oid,qop,service);
2276 - spin_lock(®istered_triples_lock);
2277 - list_for_each_entry(triple, ®istered_triples, triples) {
2278 - if((g_OID_equal(&oid, &triple->mech->gm_oid))
2279 - && (qop == triple->qop)
2280 - && (service == triple->service)) {
2281 - pseudoflavor = triple->pseudoflavor;
2285 - spin_unlock(®istered_triples_lock);
2286 - dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor);
2287 - return pseudoflavor;
2291 -gss_get_pseudoflavor(struct gss_ctx *ctx, u32 qop, u32 service)
2293 - return gss_cmp_triples(ctx->mech_type->gm_oid.len,
2294 - ctx->mech_type->gm_oid.data,
2298 -/* Returns nonzero iff the given pseudoflavor is in the supported list.
2299 - * (Note that without incrementing a reference count or anything, this
2300 - * doesn't give any guarantees.) */
2302 -gss_pseudoflavor_supported(u32 pseudoflavor)
2304 - struct sup_sec_triple *triple;
2306 - spin_lock(®istered_triples_lock);
2307 - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
2308 - spin_unlock(®istered_triples_lock);
2309 - return (triple ? 1 : 0);
2313 -gss_pseudoflavor_to_service(u32 pseudoflavor)
2315 - struct sup_sec_triple *triple;
2317 - spin_lock(®istered_triples_lock);
2318 - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
2319 - spin_unlock(®istered_triples_lock);
2321 - dprintk("RPC: gss_pseudoflavor_to_service called with unsupported pseudoflavor %d\n",
2325 - return triple->service;
2328 -struct gss_api_mech *
2329 -gss_pseudoflavor_to_mech(u32 pseudoflavor) {
2330 - struct sup_sec_triple *triple;
2331 - struct gss_api_mech *mech = NULL;
2333 - spin_lock(®istered_triples_lock);
2334 - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
2335 - spin_unlock(®istered_triples_lock);
2337 - mech = gss_mech_get(triple->mech);
2339 - dprintk("RPC: gss_pseudoflavor_to_mech called with unsupported pseudoflavor %d\n",
2345 -gss_pseudoflavor_to_mechOID(u32 pseudoflavor, struct xdr_netobj * oid)
2347 - struct gss_api_mech *mech;
2349 - mech = gss_pseudoflavor_to_mech(pseudoflavor);
2351 - dprintk("RPC: gss_pseudoflavor_to_mechOID called with unsupported pseudoflavor %d\n",
2355 - oid->len = mech->gm_oid.len;
2356 - if (!(oid->data = kmalloc(oid->len, GFP_KERNEL)))
2358 - memcpy(oid->data, mech->gm_oid.data, oid->len);
2359 - gss_mech_put(mech);
2362 Index: linux-2.6.10/net/sunrpc/auth_gss/svcauth_gss.c
2363 ===================================================================
2364 --- linux-2.6.10.orig/net/sunrpc/auth_gss/svcauth_gss.c 2004-12-25 05:34:44.000000000 +0800
2365 +++ linux-2.6.10/net/sunrpc/auth_gss/svcauth_gss.c 2005-04-05 14:49:13.407691040 +0800
2370 +#include <asm/bitops.h>
2371 #include <linux/types.h>
2372 #include <linux/module.h>
2373 #include <linux/pagemap.h>
2376 static struct cache_head *rsi_table[RSI_HASHMAX];
2377 static struct cache_detail rsi_cache;
2378 -static struct rsi *rsi_lookup(struct rsi *item, int set);
2380 static void rsi_free(struct rsi *rsii)
2382 @@ -125,38 +125,6 @@
2383 return dup_to_netobj(dst, src->data, src->len);
2386 -static inline void rsi_init(struct rsi *new, struct rsi *item)
2388 - new->out_handle.data = NULL;
2389 - new->out_handle.len = 0;
2390 - new->out_token.data = NULL;
2391 - new->out_token.len = 0;
2392 - new->in_handle.len = item->in_handle.len;
2393 - item->in_handle.len = 0;
2394 - new->in_token.len = item->in_token.len;
2395 - item->in_token.len = 0;
2396 - new->in_handle.data = item->in_handle.data;
2397 - item->in_handle.data = NULL;
2398 - new->in_token.data = item->in_token.data;
2399 - item->in_token.data = NULL;
2402 -static inline void rsi_update(struct rsi *new, struct rsi *item)
2404 - BUG_ON(new->out_handle.data || new->out_token.data);
2405 - new->out_handle.len = item->out_handle.len;
2406 - item->out_handle.len = 0;
2407 - new->out_token.len = item->out_token.len;
2408 - item->out_token.len = 0;
2409 - new->out_handle.data = item->out_handle.data;
2410 - item->out_handle.data = NULL;
2411 - new->out_token.data = item->out_token.data;
2412 - item->out_token.data = NULL;
2414 - new->major_status = item->major_status;
2415 - new->minor_status = item->minor_status;
2418 static void rsi_request(struct cache_detail *cd,
2419 struct cache_head *h,
2420 char **bpp, int *blen)
2421 @@ -168,6 +136,75 @@
2426 +gssd_reply(struct rsi *item)
2429 + struct cache_head **hp, **head;
2431 + head = &rsi_cache.hash_table[rsi_hash(item)];
2432 + write_lock(&rsi_cache.hash_lock);
2433 + for (hp = head; *hp != NULL; hp = &tmp->h.next) {
2434 + tmp = container_of(*hp, struct rsi, h);
2435 + if (rsi_match(tmp, item)) {
2436 + cache_get(&tmp->h);
2437 + clear_bit(CACHE_HASHED, &tmp->h.flags);
2438 + *hp = tmp->h.next;
2439 + tmp->h.next = NULL;
2440 + rsi_cache.entries--;
2441 + if (test_bit(CACHE_VALID, &tmp->h.flags)) {
2442 + write_unlock(&rsi_cache.hash_lock);
2443 + rsi_put(&tmp->h, &rsi_cache);
2446 + set_bit(CACHE_HASHED, &item->h.flags);
2447 + item->h.next = *hp;
2449 + rsi_cache.entries++;
2450 + set_bit(CACHE_VALID, &item->h.flags);
2451 + item->h.last_refresh = get_seconds();
2452 + write_unlock(&rsi_cache.hash_lock);
2453 + cache_fresh(&rsi_cache, &tmp->h, 0);
2454 + rsi_put(&tmp->h, &rsi_cache);
2458 + write_unlock(&rsi_cache.hash_lock);
2462 +static inline struct rsi *
2463 +gssd_upcall(struct rsi *item, struct svc_rqst *rqstp)
2466 + struct cache_head **hp, **head;
2468 + head = &rsi_cache.hash_table[rsi_hash(item)];
2469 + read_lock(&rsi_cache.hash_lock);
2470 + for (hp = head; *hp != NULL; hp = &tmp->h.next) {
2471 + tmp = container_of(*hp, struct rsi, h);
2472 + if (rsi_match(tmp, item)) {
2473 + if (!test_bit(CACHE_VALID, &tmp->h.flags)) {
2474 + read_unlock(&rsi_cache.hash_lock);
2477 + *hp = tmp->h.next;
2478 + tmp->h.next = NULL;
2479 + rsi_cache.entries--;
2480 + read_unlock(&rsi_cache.hash_lock);
2484 + cache_get(&item->h);
2485 + item->h.next = *head;
2487 + rsi_cache.entries++;
2488 + read_unlock(&rsi_cache.hash_lock);
2489 + cache_get(&item->h);
2490 + if (cache_check(&rsi_cache, &item->h, &rqstp->rq_chandle))
2495 static int rsi_parse(struct cache_detail *cd,
2496 char *mesg, int mlen)
2497 @@ -176,17 +213,22 @@
2501 - struct rsi rsii, *rsip = NULL;
2504 int status = -EINVAL;
2506 - memset(&rsii, 0, sizeof(rsii));
2507 + rsii = kmalloc(sizeof(*rsii), GFP_KERNEL);
2510 + memset(rsii, 0, sizeof(*rsii));
2511 + cache_init(&rsii->h);
2514 len = qword_get(&mesg, buf, mlen);
2518 - if (dup_to_netobj(&rsii.in_handle, buf, len))
2519 + if (dup_to_netobj(&rsii->in_handle, buf, len))
2523 @@ -195,10 +237,9 @@
2527 - if (dup_to_netobj(&rsii.in_token, buf, len))
2528 + if (dup_to_netobj(&rsii->in_token, buf, len))
2533 expiry = get_expiry(&mesg);
2535 @@ -212,13 +253,13 @@
2539 - rsii.major_status = simple_strtoul(buf, &ep, 10);
2540 + rsii->major_status = simple_strtoul(buf, &ep, 10);
2543 len = qword_get(&mesg, buf, mlen);
2546 - rsii.minor_status = simple_strtoul(buf, &ep, 10);
2547 + rsii->minor_status = simple_strtoul(buf, &ep, 10);
2555 - if (dup_to_netobj(&rsii.out_handle, buf, len))
2556 + if (dup_to_netobj(&rsii->out_handle, buf, len))
2560 @@ -236,16 +277,14 @@
2564 - if (dup_to_netobj(&rsii.out_token, buf, len))
2565 + if (dup_to_netobj(&rsii->out_token, buf, len))
2568 - rsii.h.expiry_time = expiry;
2569 - rsip = rsi_lookup(&rsii, 1);
2571 + rsii->h.expiry_time = expiry;
2572 + status = gssd_reply(rsii);
2576 - rsi_put(&rsip->h, &rsi_cache);
2578 + rsi_put(&rsii->h, &rsi_cache);
2583 .cache_parse = rsi_parse,
2586 -static DefineSimpleCacheLookup(rsi, 0)
2589 * The rpcsec_context cache is used to store a context that is
2590 * used in data exchange.
2593 static struct cache_head *rsc_table[RSC_HASHMAX];
2594 static struct cache_detail rsc_cache;
2595 -static struct rsc *rsc_lookup(struct rsc *item, int set);
2597 static void rsc_free(struct rsc *rsci)
2599 @@ -325,26 +361,46 @@
2600 return netobj_equal(&new->handle, &tmp->handle);
2604 -rsc_init(struct rsc *new, struct rsc *tmp)
2605 +static struct rsc *rsc_lookup(struct rsc *item, int set)
2607 - new->handle.len = tmp->handle.len;
2608 - tmp->handle.len = 0;
2609 - new->handle.data = tmp->handle.data;
2610 - tmp->handle.data = NULL;
2611 - new->mechctx = NULL;
2612 - new->cred.cr_group_info = NULL;
2616 -rsc_update(struct rsc *new, struct rsc *tmp)
2618 - new->mechctx = tmp->mechctx;
2619 - tmp->mechctx = NULL;
2620 - memset(&new->seqdata, 0, sizeof(new->seqdata));
2621 - spin_lock_init(&new->seqdata.sd_lock);
2622 - new->cred = tmp->cred;
2623 - tmp->cred.cr_group_info = NULL;
2624 + struct rsc *tmp = NULL;
2625 + struct cache_head **hp, **head;
2626 + head = &rsc_cache.hash_table[rsc_hash(item)];
2629 + write_lock(&rsc_cache.hash_lock);
2631 + read_lock(&rsc_cache.hash_lock);
2632 + for (hp = head; *hp != NULL; hp = &tmp->h.next) {
2633 + tmp = container_of(*hp, struct rsc, h);
2634 + if (!rsc_match(tmp, item))
2636 + cache_get(&tmp->h);
2639 + *hp = tmp->h.next;
2640 + tmp->h.next = NULL;
2641 + clear_bit(CACHE_HASHED, &tmp->h.flags);
2642 + rsc_put(&tmp->h, &rsc_cache);
2645 + /* Didn't find anything */
2648 + rsc_cache.entries++;
2650 + set_bit(CACHE_HASHED, &item->h.flags);
2651 + item->h.next = *head;
2653 + write_unlock(&rsc_cache.hash_lock);
2654 + cache_fresh(&rsc_cache, &item->h, item->h.expiry_time);
2655 + cache_get(&item->h);
2660 + read_unlock(&rsc_cache.hash_lock);
2664 static int rsc_parse(struct cache_detail *cd,
2665 @@ -353,19 +409,22 @@
2666 /* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */
2669 - struct rsc rsci, *rscp = NULL;
2670 + struct rsc *rsci, *res = NULL;
2672 int status = -EINVAL;
2674 - memset(&rsci, 0, sizeof(rsci));
2675 + rsci = kmalloc(sizeof(*rsci), GFP_KERNEL);
2678 + memset(rsci, 0, sizeof(*rsci));
2679 + cache_init(&rsci->h);
2680 /* context handle */
2681 len = qword_get(&mesg, buf, mlen);
2682 if (len < 0) goto out;
2684 - if (dup_to_netobj(&rsci.handle, buf, len))
2685 + if (dup_to_netobj(&rsci->handle, buf, len))
2690 expiry = get_expiry(&mesg);
2692 @@ -373,26 +432,26 @@
2695 /* uid, or NEGATIVE */
2696 - rv = get_int(&mesg, &rsci.cred.cr_uid);
2697 + rv = get_int(&mesg, &rsci->cred.cr_uid);
2701 - set_bit(CACHE_NEGATIVE, &rsci.h.flags);
2702 + set_bit(CACHE_NEGATIVE, &rsci->h.flags);
2705 struct gss_api_mech *gm;
2706 struct xdr_netobj tmp_buf;
2709 - if (get_int(&mesg, &rsci.cred.cr_gid))
2710 + if (get_int(&mesg, &rsci->cred.cr_gid))
2713 /* number of additional gid's */
2714 if (get_int(&mesg, &N))
2717 - rsci.cred.cr_group_info = groups_alloc(N);
2718 - if (rsci.cred.cr_group_info == NULL)
2719 + rsci->cred.cr_group_info = groups_alloc(N);
2720 + if (rsci->cred.cr_group_info == NULL)
2726 if (get_int(&mesg, &gid))
2728 - GROUP_AT(rsci.cred.cr_group_info, i) = gid;
2729 + GROUP_AT(rsci->cred.cr_group_info, i) = gid;
2733 @@ -422,19 +481,21 @@
2737 - if (gss_import_sec_context(&tmp_buf, gm, &rsci.mechctx)) {
2738 + if (gss_import_sec_context(&tmp_buf, gm, &rsci->mechctx)) {
2744 - rsci.h.expiry_time = expiry;
2745 - rscp = rsc_lookup(&rsci, 1);
2746 + rsci->h.expiry_time = expiry;
2747 + spin_lock_init(&rsci->seqdata.sd_lock);
2748 + res = rsc_lookup(rsci, 1);
2749 + rsc_put(&res->h, &rsc_cache);
2755 - rsc_put(&rscp->h, &rsc_cache);
2757 + rsc_put(&rsci->h, &rsc_cache);
2761 @@ -446,19 +507,14 @@
2762 .cache_parse = rsc_parse,
2765 -static DefineSimpleCacheLookup(rsc, 0);
2768 gss_svc_searchbyctx(struct xdr_netobj *handle)
2773 - memset(&rsci, 0, sizeof(rsci));
2774 - if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
2776 + rsci.handle = *handle;
2777 found = rsc_lookup(&rsci, 0);
2781 if (cache_check(&rsc_cache, &found->h, NULL))
2782 @@ -721,6 +777,45 @@
2787 +unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
2789 + int stat = -EINVAL;
2792 + u32 priv_len, maj_stat;
2795 + lenp = buf->head[0].iov_base;
2796 + priv_len = ntohl(svc_getu32(&buf->head[0]));
2797 + if (priv_len > buf->len) /* XXXJBF: wrong check */
2799 + /* XXXJBF: bizarre hack: to handle revisits (and not decrypt
2800 + * twice), the first time through we write an offset
2801 + * telling us where to skip to find the already-decrypted data */
2802 + if (rqstp->rq_deferred) {
2803 + buf->head[0].iov_base += priv_len;
2804 + buf->head[0].iov_len -= priv_len;
2807 + saved_len = buf->len; /* XXX HACK */
2808 + buf->len = priv_len;
2809 + maj_stat = gss_unwrap(ctx, NULL, 0, buf, &out_offset);
2810 + buf->len = saved_len;
2811 + buf->head[0].iov_base += out_offset;
2812 + buf->head[0].iov_len -= out_offset;
2813 + BUG_ON(buf->head[0].iov_len <= 0);
2814 + if (maj_stat != GSS_S_COMPLETE)
2816 + if (ntohl(svc_getu32(&buf->head[0])) != seq)
2818 + /* XXXJBF: see "bizarre hack", above. */
2819 + *lenp = htonl(out_offset + 4);
2825 struct gss_svc_data {
2826 /* decoded gss client cred: */
2827 struct rpc_gss_wire_cred clcred;
2828 @@ -730,6 +825,19 @@
2833 +svcauth_gss_set_client(struct svc_rqst *rqstp)
2835 + struct gss_svc_data *svcdata = rqstp->rq_auth_data;
2836 + struct rsc *rsci = svcdata->rsci;
2837 + struct rpc_gss_wire_cred *gc = &svcdata->clcred;
2839 + rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
2840 + if (rqstp->rq_client == NULL)
2841 + return SVC_DENIED;
2846 * Accept an rpcsec packet.
2847 * If context establishment, punt to user space
2849 struct gss_svc_data *svcdata = rqstp->rq_auth_data;
2850 struct rpc_gss_wire_cred *gc;
2851 struct rsc *rsci = NULL;
2852 - struct rsi *rsip, rsikey;
2853 + struct rsi *rsip, *rsikey = NULL;
2855 u32 *reject_stat = resv->iov_base + resv->iov_len;
2857 @@ -841,30 +949,23 @@
2858 *authp = rpc_autherr_badcred;
2859 if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
2861 - memset(&rsikey, 0, sizeof(rsikey));
2862 - if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
2863 + rsikey = kmalloc(sizeof(*rsikey), GFP_KERNEL);
2866 + memset(rsikey, 0, sizeof(*rsikey));
2867 + cache_init(&rsikey->h);
2868 + if (dup_netobj(&rsikey->in_handle, &gc->gc_ctx))
2870 *authp = rpc_autherr_badverf;
2871 - if (svc_safe_getnetobj(argv, &tmpobj)) {
2872 - kfree(rsikey.in_handle.data);
2873 + if (svc_safe_getnetobj(argv, &tmpobj))
2876 - if (dup_netobj(&rsikey.in_token, &tmpobj)) {
2877 - kfree(rsikey.in_handle.data);
2878 + if (dup_netobj(&rsikey->in_token, &tmpobj))
2882 - rsip = rsi_lookup(&rsikey, 0);
2883 - rsi_free(&rsikey);
2887 - switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
2889 + rsip = gssd_upcall(rsikey, rqstp);
2896 rsci = gss_svc_searchbyctx(&rsip->out_handle);
2899 @@ -893,11 +994,6 @@
2900 svc_putu32(resv, rpc_success);
2902 case RPC_GSS_PROC_DATA:
2903 - *authp = rpc_autherr_badcred;
2904 - rqstp->rq_client =
2905 - find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
2906 - if (rqstp->rq_client == NULL)
2908 *authp = rpcsec_gsserr_ctxproblem;
2909 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
2911 @@ -911,6 +1007,15 @@
2912 if (unwrap_integ_data(&rqstp->rq_arg,
2913 gc->gc_seq, rsci->mechctx))
2915 + /* placeholders for length and seq. number: */
2916 + svcdata->body_start = resv->iov_base + resv->iov_len;
2917 + svc_putu32(resv, 0);
2918 + svc_putu32(resv, 0);
2920 + case RPC_GSS_SVC_PRIVACY:
2921 + if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
2922 + gc->gc_seq, rsci->mechctx))
2924 svcdata->rsci = rsci;
2925 cache_get(&rsci->h);
2926 /* placeholders for length and seq. number: */
2927 @@ -918,11 +1023,11 @@
2928 svc_putu32(resv, 0);
2929 svc_putu32(resv, 0);
2931 - case RPC_GSS_SVC_PRIVACY:
2932 - /* currently unsupported */
2936 + svcdata->rsci = rsci;
2937 + cache_get(&rsci->h);
2941 @@ -937,13 +1042,15 @@
2946 + rsi_put(&rsikey->h, &rsi_cache);
2948 rsc_put(&rsci->h, &rsc_cache);
2953 -svcauth_gss_release(struct svc_rqst *rqstp)
2955 +svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
2957 struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
2958 struct rpc_gss_wire_cred *gc = &gsd->clcred;
2959 @@ -955,10 +1062,160 @@
2960 int integ_offset, integ_len;
2963 + p = gsd->body_start;
2964 + gsd->body_start = NULL;
2965 + /* move accept_stat to right place: */
2966 + memcpy(p, p + 2, 4);
2967 + /* Don't wrap in failure case: */
2968 + /* Counting on not getting here if call was not even accepted! */
2969 + if (*p != rpc_success) {
2970 + resbuf->head[0].iov_len -= 2 * 4;
2974 + integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
2975 + integ_len = resbuf->len - integ_offset;
2976 + BUG_ON(integ_len % 4);
2977 + *p++ = htonl(integ_len);
2978 + *p++ = htonl(gc->gc_seq);
2979 + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
2982 + if (resbuf->page_len == 0
2983 + && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE
2985 + BUG_ON(resbuf->tail[0].iov_len);
2986 + /* Use head for everything */
2987 + resv = &resbuf->head[0];
2988 + } else if (resbuf->tail[0].iov_base == NULL) {
2989 + /* copied from nfsd4_encode_read */
2990 + svc_take_page(rqstp);
2991 + resbuf->tail[0].iov_base = page_address(rqstp
2992 + ->rq_respages[rqstp->rq_resused-1]);
2993 + rqstp->rq_restailpage = rqstp->rq_resused-1;
2994 + resbuf->tail[0].iov_len = 0;
2995 + resv = &resbuf->tail[0];
2997 + resv = &resbuf->tail[0];
2999 + mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
3000 + if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic))
3002 + svc_putu32(resv, htonl(mic.len));
3003 + memset(mic.data + mic.len, 0,
3004 + round_up_to_quad(mic.len) - mic.len);
3005 + resv->iov_len += XDR_QUADLEN(mic.len) << 2;
3006 + /* not strictly required: */
3007 + resbuf->len += XDR_QUADLEN(mic.len) << 2;
3008 + BUG_ON(resv->iov_len > PAGE_SIZE);
3015 +/* XXXJBF: Look for chances to share code with client */
3016 +/* XXXJBF: Do we need to preallocate these pages somehow? E.g. see
3017 + * buffer size calculations in svcsock.c */
3018 +/* XXXJBF: how does reference counting on pages work? */
3019 +static struct page **
3020 +svc_alloc_enc_pages(struct xdr_buf *buf)
3022 + struct page **ret;
3025 + if (buf->page_len == 0)
3027 + BUG_ON(buf->page_base >> PAGE_CACHE_SHIFT);
3028 + last = (buf->page_base + buf->page_len - 1) >> PAGE_CACHE_SHIFT;
3029 + ret = kmalloc((last + 1) * sizeof(struct page *), GFP_KERNEL);
3032 + for (i = 0; i<= last; i++) {
3033 + ret[i] = alloc_page(GFP_KERNEL);
3034 + if (ret[i] == NULL)
3040 + for (i--; i >= 0; i--) {
3041 + __free_page(ret[i]);
3047 +svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
3049 + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
3050 + struct rpc_gss_wire_cred *gc = &gsd->clcred;
3051 + struct xdr_buf *resbuf = &rqstp->rq_res;
3052 + struct page **inpages;
3056 + int stat = -EINVAL;
3058 + p = gsd->body_start;
3059 + gsd->body_start = NULL;
3060 + /* move accept_stat to right place: */
3061 + memcpy(p, p + 2, 4);
3062 + /* Don't wrap in failure case: */
3063 + /* Counting on not getting here if call was not even accepted! */
3064 + if (*p != rpc_success) {
3065 + resbuf->head[0].iov_len -= 2 * 4;
3070 + offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base;
3071 + *p++ = htonl(gc->gc_seq);
3073 + inpages = resbuf->pages;
3074 + /* XXXJBF: huge memory leaks here: allocated pages probably aren't
3075 + * freed, and neither is memory used to hold page array. */
3076 + resbuf->pages = svc_alloc_enc_pages(resbuf);
3077 + if (resbuf->page_len && !resbuf->pages)
3078 + goto out_err; /* XXX sleep and retry? Reserve ahead of time
3080 + if (resbuf->tail[0].iov_len == 0 || resbuf->tail[0].iov_base == NULL) {
3081 + /* copied from nfsd4_encode_read */
3082 + {int i = svc_take_page(rqstp); BUG_ON(i); }
3083 + resbuf->tail[0].iov_base = page_address(rqstp
3084 + ->rq_respages[rqstp->rq_resused-1]);
3085 + rqstp->rq_restailpage = rqstp->rq_resused-1;
3086 + resbuf->tail[0].iov_len = 0;
3088 + /* XXX: Will svc code attempt to free stuff in xdr_buf->pages?
3089 + * Or can we leave it in any old state on error?? */
3091 + if (gss_wrap(gsd->rsci->mechctx, GSS_C_QOP_DEFAULT, offset,
3094 + *len = htonl(resbuf->len - offset);
3095 + pad = 3 - ((resbuf->len - offset - 1)&3);
3096 + p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len);
3097 + memset(p, 0, pad);
3098 + resbuf->tail[0].iov_len += pad;
3106 +svcauth_gss_release(struct svc_rqst *rqstp)
3108 + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
3109 + struct rpc_gss_wire_cred *gc = &gsd->clcred;
3110 + struct xdr_buf *resbuf = &rqstp->rq_res;
3111 + int stat = -EINVAL;
3113 if (gc->gc_proc != RPC_GSS_PROC_DATA)
3115 /* Release can be called twice, but we only wrap once. */
3116 - if (gsd->body_start == 0)
3117 + if (gsd->body_start == NULL)
3119 /* normally not set till svc_send, but we need it here: */
3120 resbuf->len = resbuf->head[0].iov_len
3121 @@ -967,55 +1224,15 @@
3122 case RPC_GSS_SVC_NONE:
3124 case RPC_GSS_SVC_INTEGRITY:
3125 - p = gsd->body_start;
3126 - gsd->body_start = NULL;
3127 - /* move accept_stat to right place: */
3128 - memcpy(p, p + 2, 4);
3129 - /* don't wrap in failure case: */
3130 - /* Note: counting on not getting here if call was not even
3132 - if (*p != rpc_success) {
3133 - resbuf->head[0].iov_len -= 2 * 4;
3137 - integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
3138 - integ_len = resbuf->len - integ_offset;
3139 - BUG_ON(integ_len % 4);
3140 - *p++ = htonl(integ_len);
3141 - *p++ = htonl(gc->gc_seq);
3142 - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
3145 - if (resbuf->page_len == 0
3146 - && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE
3148 - BUG_ON(resbuf->tail[0].iov_len);
3149 - /* Use head for everything */
3150 - resv = &resbuf->head[0];
3151 - } else if (resbuf->tail[0].iov_base == NULL) {
3152 - /* copied from nfsd4_encode_read */
3153 - svc_take_page(rqstp);
3154 - resbuf->tail[0].iov_base = page_address(rqstp
3155 - ->rq_respages[rqstp->rq_resused-1]);
3156 - rqstp->rq_restailpage = rqstp->rq_resused-1;
3157 - resbuf->tail[0].iov_len = 0;
3158 - resv = &resbuf->tail[0];
3160 - resv = &resbuf->tail[0];
3162 - mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
3163 - if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic))
3164 + stat = svcauth_gss_wrap_resp_integ(rqstp);
3167 - svc_putu32(resv, htonl(mic.len));
3168 - memset(mic.data + mic.len, 0,
3169 - round_up_to_quad(mic.len) - mic.len);
3170 - resv->iov_len += XDR_QUADLEN(mic.len) << 2;
3171 - /* not strictly required: */
3172 - resbuf->len += XDR_QUADLEN(mic.len) << 2;
3173 - BUG_ON(resv->iov_len > PAGE_SIZE);
3175 case RPC_GSS_SVC_PRIVACY:
3176 + stat = svcauth_gss_wrap_resp_priv(rqstp);
3183 @@ -1052,6 +1269,7 @@
3184 .accept = svcauth_gss_accept,
3185 .release = svcauth_gss_release,
3186 .domain_release = svcauth_gss_domain_release,
3187 + .set_client = svcauth_gss_set_client,
3191 Index: linux-2.6.10/net/sunrpc/auth_gss/sunrpcgss_syms.c
3192 ===================================================================
3193 --- linux-2.6.10.orig/net/sunrpc/auth_gss/sunrpcgss_syms.c 2004-12-25 05:35:23.000000000 +0800
3194 +++ linux-2.6.10/net/sunrpc/auth_gss/sunrpcgss_syms.c 2005-04-05 19:01:49.158500672 +0800
3196 -#include <linux/config.h>
3197 -#include <linux/module.h>
3199 -#include <linux/types.h>
3200 -#include <linux/socket.h>
3201 -#include <linux/sched.h>
3202 -#include <linux/uio.h>
3203 -#include <linux/unistd.h>
3205 -#include <linux/sunrpc/auth_gss.h>
3206 -#include <linux/sunrpc/svcauth_gss.h>
3207 -#include <linux/sunrpc/gss_asn1.h>
3208 -#include <linux/sunrpc/gss_krb5.h>
3210 -/* svcauth_gss.c: */
3211 -EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
3213 -/* registering gss mechanisms to the mech switching code: */
3214 -EXPORT_SYMBOL(gss_mech_register);
3215 -EXPORT_SYMBOL(gss_mech_unregister);
3216 -EXPORT_SYMBOL(gss_mech_get);
3217 -EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor);
3218 -EXPORT_SYMBOL(gss_mech_get_by_name);
3219 -EXPORT_SYMBOL(gss_mech_put);
3220 -EXPORT_SYMBOL(gss_pseudoflavor_to_service);
3221 -EXPORT_SYMBOL(gss_service_to_auth_domain_name);
3223 -/* generic functionality in gss code: */
3224 -EXPORT_SYMBOL(g_make_token_header);
3225 -EXPORT_SYMBOL(g_verify_token_header);
3226 -EXPORT_SYMBOL(g_token_size);
3227 -EXPORT_SYMBOL(make_checksum);
3228 -EXPORT_SYMBOL(krb5_encrypt);
3229 -EXPORT_SYMBOL(krb5_decrypt);
3232 -EXPORT_SYMBOL(print_hexl);
3233 Index: linux-2.6.10/net/sunrpc/auth_gss/Makefile
3234 ===================================================================
3235 --- linux-2.6.10.orig/net/sunrpc/auth_gss/Makefile 2004-12-25 05:34:33.000000000 +0800
3236 +++ linux-2.6.10/net/sunrpc/auth_gss/Makefile 2005-04-05 14:49:13.408690888 +0800
3238 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
3240 rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
3242 + gss_krb5_seqnum.o gss_krb5_wrap.o
3244 obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
3246 Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_mech.c
3247 ===================================================================
3248 --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_mech.c 2004-12-25 05:35:23.000000000 +0800
3249 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_mech.c 2005-04-05 14:49:13.400692104 +0800
3254 +/* XXX the following wrappers have become pointless; kill them. */
3256 gss_verify_mic_kerberos(struct gss_ctx *ctx,
3257 struct xdr_buf *message,
3260 struct krb5_ctx *kctx = ctx->internal_ctx_id;
3262 - maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state,
3264 + maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state);
3265 if (!maj_stat && qop_state)
3266 *qstate = qop_state;
3270 struct krb5_ctx *kctx = ctx->internal_ctx_id;
3272 - err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG);
3273 + err = krb5_make_token(kctx, qop, message, mic_token);
3275 dprintk("RPC: gss_get_mic_kerberos returning %d\n",err);
3278 .gss_import_sec_context = gss_import_sec_context_kerberos,
3279 .gss_get_mic = gss_get_mic_kerberos,
3280 .gss_verify_mic = gss_verify_mic_kerberos,
3281 + .gss_wrap = gss_wrap_kerberos,
3282 + .gss_unwrap = gss_unwrap_kerberos,
3283 .gss_delete_sec_context = gss_delete_sec_context_kerberos,
3286 @@ -233,6 +235,11 @@
3287 .service = RPC_GSS_SVC_INTEGRITY,
3291 + .pseudoflavor = RPC_AUTH_GSS_KRB5P,
3292 + .service = RPC_GSS_SVC_PRIVACY,
3297 static struct gss_api_mech gss_kerberos_mech = {
3298 Index: linux-2.6.10/net/sunrpc/auth_gss/auth_gss.c
3299 ===================================================================
3300 --- linux-2.6.10.orig/net/sunrpc/auth_gss/auth_gss.c 2004-12-25 05:34:44.000000000 +0800
3301 +++ linux-2.6.10/net/sunrpc/auth_gss/auth_gss.c 2005-04-05 14:49:13.404691496 +0800
3303 #include <linux/socket.h>
3304 #include <linux/in.h>
3305 #include <linux/sched.h>
3306 +#include <linux/pagemap.h>
3307 #include <linux/sunrpc/clnt.h>
3308 #include <linux/sunrpc/auth.h>
3309 #include <linux/sunrpc/auth_gss.h>
3310 @@ -480,12 +481,14 @@
3314 - cred->cr_flags |= RPCAUTH_CRED_DEAD;
3315 + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3317 gss_cred_set_ctx(cred, ctx);
3318 spin_lock(&gss_auth->lock);
3319 gss_msg = __gss_find_upcall(gss_auth, acred.uid);
3322 + gss_msg->msg.errno = -EACCES;
3323 __gss_unhash_msg(gss_msg);
3324 spin_unlock(&gss_auth->lock);
3325 gss_release_msg(gss_msg);
3327 maj_stat = gss_get_mic(ctx->gc_gss_ctx,
3330 - if(maj_stat != 0){
3331 + if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
3332 + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3333 + } else if (maj_stat != 0) {
3334 printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
3338 struct xdr_netobj mic;
3343 dprintk("RPC: %4u gss_validate\n", task->tk_pid);
3345 @@ -794,8 +800,11 @@
3349 - if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state))
3351 + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state);
3352 + if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3353 + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3356 service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type,
3357 gss_cred->gc_flavor);
3359 @@ -807,6 +816,11 @@
3360 /* verifier data, flavor, length, length, sequence number: */
3361 task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4;
3363 + case RPC_GSS_SVC_PRIVACY:
3364 + /* XXXJBF: Ugh. Going for a wild overestimate.
3365 + * Need some info from krb5 layer? */
3366 + task->tk_auth->au_rslack = XDR_QUADLEN(len) + 32;
3371 @@ -821,11 +835,10 @@
3375 -gss_wrap_req_integ(struct gss_cl_ctx *ctx,
3376 - kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
3377 +gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
3378 + kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
3380 - struct rpc_rqst *req = (struct rpc_rqst *)rqstp;
3381 - struct xdr_buf *snd_buf = &req->rq_snd_buf;
3382 + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
3383 struct xdr_buf integ_buf;
3384 u32 *integ_len = NULL;
3385 struct xdr_netobj mic;
3389 offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
3390 - *p++ = htonl(req->rq_seqno);
3391 + *p++ = htonl(rqstp->rq_seqno);
3393 status = encode(rqstp, p, obj);
3396 *integ_len = htonl(integ_buf.len);
3398 /* guess whether we're in the head or the tail: */
3399 - if (snd_buf->page_len || snd_buf->tail[0].iov_len)
3400 + if (snd_buf->page_len || snd_buf->tail[0].iov_len)
3401 iov = snd_buf->tail;
3403 iov = snd_buf->head;
3405 maj_stat = gss_get_mic(ctx->gc_gss_ctx,
3406 GSS_C_QOP_DEFAULT, &integ_buf, &mic);
3407 status = -EIO; /* XXX? */
3409 + if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3410 + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3411 + else if (maj_stat)
3413 q = xdr_encode_opaque(p, NULL, mic.len);
3415 @@ -868,6 +883,112 @@
3420 +priv_release_snd_buf(struct rpc_rqst *rqstp)
3424 + for (i=0; i < rqstp->rq_enc_pages_num; i++)
3425 + __free_page(rqstp->rq_enc_pages[i]);
3426 + kfree(rqstp->rq_enc_pages);
3430 +alloc_enc_pages(struct rpc_rqst *rqstp)
3432 + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
3433 + int first, last, i;
3435 + if (snd_buf->page_len == 0) {
3436 + rqstp->rq_enc_pages_num = 0;
3440 + first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
3441 + last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT;
3442 + rqstp->rq_enc_pages_num = last - first + 1 + 1;
3443 + rqstp->rq_enc_pages
3444 + = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
3446 + if (!rqstp->rq_enc_pages)
3448 + for (i=0; i < rqstp->rq_enc_pages_num; i++) {
3449 + rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS);
3450 + if (rqstp->rq_enc_pages[i] == NULL)
3453 + rqstp->rq_release_snd_buf = priv_release_snd_buf;
3456 + for (i--; i >= 0; i--) {
3457 + __free_page(rqstp->rq_enc_pages[i]);
3464 +gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
3465 + kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
3467 + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
3472 + struct page **inpages;
3479 + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
3480 + *p++ = htonl(rqstp->rq_seqno);
3482 + status = encode(rqstp, p, obj);
3486 + status = alloc_enc_pages(rqstp);
3489 + /* XXXJBF: Oops! Do we need rq_enc_pages really any more?? */
3490 + first = snd_buf->page_base >> PAGE_CACHE_SHIFT;
3491 + inpages = snd_buf->pages + first;
3492 + snd_buf->pages = rqstp->rq_enc_pages;
3493 + snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
3494 + /* XXX?: tail needs to be separate if we want to be able to expand
3495 + * the head (since it's often put right after the head). But is
3496 + * expanding the head safe in any case? */
3497 + if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
3498 + tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
3499 + memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
3500 + snd_buf->tail[0].iov_base = tmp;
3502 + maj_stat = gss_wrap(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, offset,
3503 + snd_buf, inpages);
3504 + status = -EIO; /* XXX? */
3505 + if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3506 + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3507 + else if (maj_stat)
3510 + *opaque_len = htonl(snd_buf->len - offset);
3511 + /* guess whether we're in the head or the tail: */
3512 + if (snd_buf->page_len || snd_buf->tail[0].iov_len)
3513 + iov = snd_buf->tail;
3515 + iov = snd_buf->head;
3516 + p = iov->iov_base + iov->iov_len;
3517 + pad = 3 - ((snd_buf->len - offset - 1) & 3);
3518 + memset(p, 0, pad);
3519 + iov->iov_len += pad;
3520 + snd_buf->len += pad;
3526 gss_wrap_req(struct rpc_task *task,
3527 kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
3528 @@ -894,9 +1015,13 @@
3529 status = encode(rqstp, p, obj);
3531 case RPC_GSS_SVC_INTEGRITY:
3532 - status = gss_wrap_req_integ(ctx, encode, rqstp, p, obj);
3533 + status = gss_wrap_req_integ(cred, ctx, encode,
3536 case RPC_GSS_SVC_PRIVACY:
3537 + status = gss_wrap_req_priv(cred, ctx, encode,
3543 @@ -907,11 +1032,10 @@
3547 -gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
3548 - kxdrproc_t decode, void *rqstp, u32 **p, void *obj)
3549 +gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
3550 + struct rpc_rqst *rqstp, u32 **p)
3552 - struct rpc_rqst *req = (struct rpc_rqst *)rqstp;
3553 - struct xdr_buf *rcv_buf = &req->rq_rcv_buf;
3554 + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
3555 struct xdr_buf integ_buf;
3556 struct xdr_netobj mic;
3557 u32 data_offset, mic_offset;
3558 @@ -926,7 +1050,7 @@
3559 mic_offset = integ_len + data_offset;
3560 if (mic_offset > rcv_buf->len)
3562 - if (ntohl(*(*p)++) != req->rq_seqno)
3563 + if (ntohl(*(*p)++) != rqstp->rq_seqno)
3566 if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
3567 @@ -938,11 +1062,44 @@
3569 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf,
3571 + if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3572 + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3573 + if (maj_stat != GSS_S_COMPLETE)
3579 +gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
3580 + struct rpc_rqst *rqstp, u32 **p)
3582 + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
3583 + u32 offset, out_offset;
3586 + int status = -EIO;
3588 + opaque_len = ntohl(*(*p)++);
3589 + offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
3590 + if (offset + opaque_len > rcv_buf->len)
3592 + /* remove padding: */
3593 + rcv_buf->len = offset + opaque_len;
3595 + maj_stat = gss_unwrap(ctx->gc_gss_ctx, NULL,
3596 + offset, rcv_buf, &out_offset);
3597 + if (maj_stat == GSS_S_CONTEXT_EXPIRED)
3598 + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
3599 if (maj_stat != GSS_S_COMPLETE)
3601 + *p = (u32 *)(rcv_buf->head[0].iov_base + out_offset);
3602 + if (ntohl(*(*p)++) != rqstp->rq_seqno)
3610 gss_unwrap_resp(struct rpc_task *task,
3611 kxdrproc_t decode, void *rqstp, u32 *p, void *obj)
3612 @@ -962,12 +1119,16 @@
3613 case RPC_GSS_SVC_NONE:
3615 case RPC_GSS_SVC_INTEGRITY:
3616 - status = gss_unwrap_resp_integ(ctx, decode,
3618 + status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
3622 case RPC_GSS_SVC_PRIVACY:
3623 + status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
3631 Index: linux-2.6.10/net/sunrpc/svc.c
3632 ===================================================================
3633 --- linux-2.6.10.orig/net/sunrpc/svc.c 2004-12-25 05:35:28.000000000 +0800
3634 +++ linux-2.6.10/net/sunrpc/svc.c 2005-04-05 14:49:13.409690736 +0800
3636 u32 dir, prog, vers, proc,
3637 auth_stat, rpc_stat;
3639 + u32 *accept_statp;
3641 rpc_stat = rpc_success;
3644 if (vers != 2) /* RPC version number */
3647 + /* Save position in case we later decide to reject: */
3648 + accept_statp = resv->iov_base + resv->iov_len;
3650 svc_putu32(resv, xdr_zero); /* ACCEPT */
3652 rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */
3653 @@ -311,10 +315,12 @@
3654 * We do this before anything else in order to get a decent
3657 - if (progp->pg_authenticate != NULL)
3658 - auth_res = progp->pg_authenticate(rqstp, &auth_stat);
3660 - auth_res = svc_authenticate(rqstp, &auth_stat);
3661 + auth_res = svc_authenticate(rqstp, &auth_stat);
3662 + /* Also give the program a chance to reject this call: */
3663 + if (auth_res == SVC_OK) {
3664 + auth_stat = rpc_autherr_badcred;
3665 + auth_res = progp->pg_authenticate(rqstp);
3672 dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
3673 serv->sv_stats->rpcbadauth++;
3674 - resv->iov_len -= 4;
3675 + /* Restore write pointer to location of accept status: */
3676 + xdr_ressize_check(rqstp, accept_statp);
3677 svc_putu32(resv, xdr_one); /* REJECT */
3678 svc_putu32(resv, xdr_one); /* AUTH_ERROR */
3679 svc_putu32(resv, auth_stat); /* status */
3680 Index: linux-2.6.10/net/sunrpc/sched.c
3681 ===================================================================
3682 --- linux-2.6.10.orig/net/sunrpc/sched.c 2004-12-25 05:34:58.000000000 +0800
3683 +++ linux-2.6.10/net/sunrpc/sched.c 2005-04-05 14:49:13.391693472 +0800
3686 static void __rpc_default_timer(struct rpc_task *task);
3687 static void rpciod_killall(void);
3690 - * When an asynchronous RPC task is activated within a bottom half
3691 - * handler, or while executing another RPC task, it is put on
3692 - * schedq, and rpciod is woken up.
3694 -static RPC_WAITQ(schedq, "schedq");
3695 +static void rpc_async_schedule(void *);
3698 * RPC tasks that create another task (e.g. for contacting the portmapper)
3701 * rpciod-related stuff
3703 -static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
3704 -static DECLARE_COMPLETION(rpciod_killer);
3705 static DECLARE_MUTEX(rpciod_sema);
3706 static unsigned int rpciod_users;
3707 -static pid_t rpciod_pid;
3708 -static int rpc_inhibit;
3709 +static struct workqueue_struct *rpciod_workqueue;
3712 - * Spinlock for wait queues. Access to the latter also has to be
3713 - * interrupt-safe in order to allow timers to wake up sleeping tasks.
3715 -static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
3717 * Spinlock for other critical sections of code.
3719 static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
3722 * Disable the timer for a given RPC task. Should be called with
3723 - * rpc_queue_lock and bh_disabled in order to avoid races within
3724 + * queue->lock and bh_disabled in order to avoid races within
3728 @@ -105,19 +91,19 @@
3729 * without calling del_timer_sync(). The latter could cause a
3730 * deadlock if called while we're holding spinlocks...
3733 -rpc_run_timer(struct rpc_task *task)
3734 +static void rpc_run_timer(struct rpc_task *task)
3736 void (*callback)(struct rpc_task *);
3738 - spin_lock_bh(&rpc_queue_lock);
3739 callback = task->tk_timeout_fn;
3740 task->tk_timeout_fn = NULL;
3741 - spin_unlock_bh(&rpc_queue_lock);
3743 + if (callback && RPC_IS_QUEUED(task)) {
3744 dprintk("RPC: %4d running timer\n", task->tk_pid);
3747 + smp_mb__before_clear_bit();
3748 + clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
3749 + smp_mb__after_clear_bit();
3753 @@ -136,29 +122,21 @@
3754 task->tk_timeout_fn = timer;
3756 task->tk_timeout_fn = __rpc_default_timer;
3757 + set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
3758 mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
3762 - * Set up a timer for an already sleeping task.
3764 -void rpc_add_timer(struct rpc_task *task, rpc_action timer)
3766 - spin_lock_bh(&rpc_queue_lock);
3767 - if (!RPC_IS_RUNNING(task))
3768 - __rpc_add_timer(task, timer);
3769 - spin_unlock_bh(&rpc_queue_lock);
3773 * Delete any timer for the current task. Because we use del_timer_sync(),
3774 - * this function should never be called while holding rpc_queue_lock.
3775 + * this function should never be called while holding queue->lock.
3778 rpc_delete_timer(struct rpc_task *task)
3780 - if (del_timer_sync(&task->tk_timer))
3781 + if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
3782 + del_singleshot_timer_sync(&task->tk_timer);
3783 dprintk("RPC: %4d deleting timer\n", task->tk_pid);
3788 @@ -169,16 +147,17 @@
3789 struct list_head *q;
3792 + INIT_LIST_HEAD(&task->u.tk_wait.links);
3793 q = &queue->tasks[task->tk_priority];
3794 if (unlikely(task->tk_priority > queue->maxpriority))
3795 q = &queue->tasks[queue->maxpriority];
3796 - list_for_each_entry(t, q, tk_list) {
3797 + list_for_each_entry(t, q, u.tk_wait.list) {
3798 if (t->tk_cookie == task->tk_cookie) {
3799 - list_add_tail(&task->tk_list, &t->tk_links);
3800 + list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
3804 - list_add_tail(&task->tk_list, q);
3805 + list_add_tail(&task->u.tk_wait.list, q);
3809 @@ -189,37 +168,21 @@
3810 * improve overall performance.
3811 * Everyone else gets appended to the queue to ensure proper FIFO behavior.
3813 -static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
3814 +static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
3816 - if (task->tk_rpcwait == queue)
3818 + BUG_ON (RPC_IS_QUEUED(task));
3820 - if (task->tk_rpcwait) {
3821 - printk(KERN_WARNING "RPC: doubly enqueued task!\n");
3822 - return -EWOULDBLOCK;
3824 if (RPC_IS_PRIORITY(queue))
3825 __rpc_add_wait_queue_priority(queue, task);
3826 else if (RPC_IS_SWAPPER(task))
3827 - list_add(&task->tk_list, &queue->tasks[0]);
3828 + list_add(&task->u.tk_wait.list, &queue->tasks[0]);
3830 - list_add_tail(&task->tk_list, &queue->tasks[0]);
3831 - task->tk_rpcwait = queue;
3832 + list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
3833 + task->u.tk_wait.rpc_waitq = queue;
3834 + rpc_set_queued(task);
3836 dprintk("RPC: %4d added to queue %p \"%s\"\n",
3837 task->tk_pid, queue, rpc_qname(queue));
3842 -int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
3846 - spin_lock_bh(&rpc_queue_lock);
3847 - result = __rpc_add_wait_queue(q, task);
3848 - spin_unlock_bh(&rpc_queue_lock);
3853 @@ -229,12 +192,12 @@
3857 - if (!list_empty(&task->tk_links)) {
3858 - t = list_entry(task->tk_links.next, struct rpc_task, tk_list);
3859 - list_move(&t->tk_list, &task->tk_list);
3860 - list_splice_init(&task->tk_links, &t->tk_links);
3861 + if (!list_empty(&task->u.tk_wait.links)) {
3862 + t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
3863 + list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
3864 + list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
3866 - list_del(&task->tk_list);
3867 + list_del(&task->u.tk_wait.list);
3871 @@ -243,31 +206,17 @@
3873 static void __rpc_remove_wait_queue(struct rpc_task *task)
3875 - struct rpc_wait_queue *queue = task->tk_rpcwait;
3879 + struct rpc_wait_queue *queue;
3880 + queue = task->u.tk_wait.rpc_waitq;
3882 if (RPC_IS_PRIORITY(queue))
3883 __rpc_remove_wait_queue_priority(task);
3885 - list_del(&task->tk_list);
3886 - task->tk_rpcwait = NULL;
3888 + list_del(&task->u.tk_wait.list);
3889 dprintk("RPC: %4d removed from queue %p \"%s\"\n",
3890 task->tk_pid, queue, rpc_qname(queue));
3894 -rpc_remove_wait_queue(struct rpc_task *task)
3896 - if (!task->tk_rpcwait)
3898 - spin_lock_bh(&rpc_queue_lock);
3899 - __rpc_remove_wait_queue(task);
3900 - spin_unlock_bh(&rpc_queue_lock);
3903 static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
3905 queue->priority = priority;
3910 + spin_lock_init(&queue->lock);
3911 for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
3912 INIT_LIST_HEAD(&queue->tasks[i]);
3913 queue->maxpriority = maxprio;
3914 @@ -316,34 +266,31 @@
3915 * Note: If the task is ASYNC, this must be called with
3916 * the spinlock held to protect the wait queue operation.
3919 -rpc_make_runnable(struct rpc_task *task)
3920 +static void rpc_make_runnable(struct rpc_task *task)
3922 - if (task->tk_timeout_fn) {
3923 - printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
3926 + BUG_ON(task->tk_timeout_fn);
3927 + do_ret = rpc_test_and_set_running(task);
3928 + rpc_clear_queued(task);
3932 - rpc_set_running(task);
3933 if (RPC_IS_ASYNC(task)) {
3934 - if (RPC_IS_SLEEPING(task)) {
3936 - status = __rpc_add_wait_queue(&schedq, task);
3938 - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
3939 - task->tk_status = status;
3942 - rpc_clear_sleeping(task);
3943 - wake_up(&rpciod_idle);
3946 + INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
3947 + status = queue_work(task->tk_workqueue, &task->u.tk_work);
3949 + printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
3950 + task->tk_status = status;
3954 - rpc_clear_sleeping(task);
3955 - wake_up(&task->tk_wait);
3958 + wake_up(&task->u.tk_wait.waitq);
3962 - * Place a newly initialized task on the schedq.
3963 + * Place a newly initialized task on the workqueue.
3966 rpc_schedule_run(struct rpc_task *task)
3967 @@ -352,33 +299,18 @@
3968 if (RPC_IS_ACTIVATED(task))
3970 task->tk_active = 1;
3971 - rpc_set_sleeping(task);
3972 rpc_make_runnable(task);
3976 - * For other people who may need to wake the I/O daemon
3977 - * but should (for now) know nothing about its innards
3979 -void rpciod_wake_up(void)
3982 - printk(KERN_ERR "rpciod: wot no daemon?\n");
3983 - wake_up(&rpciod_idle);
3987 * Prepare for sleeping on a wait queue.
3988 * By always appending tasks to the list we ensure FIFO behavior.
3989 * NB: An RPC task will only receive interrupt-driven events as long
3990 * as it's on a wait queue.
3993 -__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
3994 +static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
3995 rpc_action action, rpc_action timer)
3999 dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
4000 rpc_qname(q), jiffies);
4002 @@ -388,49 +320,36 @@
4005 /* Mark the task as being activated if so needed */
4006 - if (!RPC_IS_ACTIVATED(task)) {
4007 + if (!RPC_IS_ACTIVATED(task))
4008 task->tk_active = 1;
4009 - rpc_set_sleeping(task);
4012 - status = __rpc_add_wait_queue(q, task);
4014 - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
4015 - task->tk_status = status;
4017 - rpc_clear_running(task);
4018 - if (task->tk_callback) {
4019 - dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
4022 - task->tk_callback = action;
4023 - __rpc_add_timer(task, timer);
4025 + __rpc_add_wait_queue(q, task);
4027 + BUG_ON(task->tk_callback != NULL);
4028 + task->tk_callback = action;
4029 + __rpc_add_timer(task, timer);
4033 -rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
4034 +void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
4035 rpc_action action, rpc_action timer)
4038 * Protect the queue operations.
4040 - spin_lock_bh(&rpc_queue_lock);
4041 + spin_lock_bh(&q->lock);
4042 __rpc_sleep_on(q, task, action, timer);
4043 - spin_unlock_bh(&rpc_queue_lock);
4044 + spin_unlock_bh(&q->lock);
4048 - * __rpc_wake_up_task - wake up a single rpc_task
4049 + * __rpc_do_wake_up_task - wake up a single rpc_task
4050 * @task: task to be woken up
4052 - * Caller must hold rpc_queue_lock
4053 + * Caller must hold queue->lock, and have cleared the task queued flag.
4056 -__rpc_wake_up_task(struct rpc_task *task)
4057 +static void __rpc_do_wake_up_task(struct rpc_task *task)
4059 - dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n",
4060 - task->tk_pid, jiffies, rpc_inhibit);
4061 + dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies);
4064 if (task->tk_magic != 0xf00baa) {
4065 @@ -445,12 +364,9 @@
4066 printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
4069 - if (RPC_IS_RUNNING(task))
4072 __rpc_disable_timer(task);
4073 - if (task->tk_rpcwait != &schedq)
4074 - __rpc_remove_wait_queue(task);
4075 + __rpc_remove_wait_queue(task);
4077 rpc_make_runnable(task);
4079 @@ -458,6 +374,18 @@
4083 + * Wake up the specified task
4085 +static void __rpc_wake_up_task(struct rpc_task *task)
4087 + if (rpc_start_wakeup(task)) {
4088 + if (RPC_IS_QUEUED(task))
4089 + __rpc_do_wake_up_task(task);
4090 + rpc_finish_wakeup(task);
4095 * Default timeout handler if none specified by user
4098 @@ -471,14 +399,18 @@
4100 * Wake up the specified task
4103 -rpc_wake_up_task(struct rpc_task *task)
4104 +void rpc_wake_up_task(struct rpc_task *task)
4106 - if (RPC_IS_RUNNING(task))
4108 - spin_lock_bh(&rpc_queue_lock);
4109 - __rpc_wake_up_task(task);
4110 - spin_unlock_bh(&rpc_queue_lock);
4111 + if (rpc_start_wakeup(task)) {
4112 + if (RPC_IS_QUEUED(task)) {
4113 + struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
4115 + spin_lock_bh(&queue->lock);
4116 + __rpc_do_wake_up_task(task);
4117 + spin_unlock_bh(&queue->lock);
4119 + rpc_finish_wakeup(task);
4124 @@ -494,11 +426,11 @@
4126 q = &queue->tasks[queue->priority];
4127 if (!list_empty(q)) {
4128 - task = list_entry(q->next, struct rpc_task, tk_list);
4129 + task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
4130 if (queue->cookie == task->tk_cookie) {
4133 - list_move_tail(&task->tk_list, q);
4134 + list_move_tail(&task->u.tk_wait.list, q);
4137 * Check if we need to switch queues.
4141 if (!list_empty(q)) {
4142 - task = list_entry(q->next, struct rpc_task, tk_list);
4143 + task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
4146 } while (q != &queue->tasks[queue->priority]);
4147 @@ -541,14 +473,14 @@
4148 struct rpc_task *task = NULL;
4150 dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
4151 - spin_lock_bh(&rpc_queue_lock);
4152 + spin_lock_bh(&queue->lock);
4153 if (RPC_IS_PRIORITY(queue))
4154 task = __rpc_wake_up_next_priority(queue);
4156 task_for_first(task, &queue->tasks[0])
4157 __rpc_wake_up_task(task);
4159 - spin_unlock_bh(&rpc_queue_lock);
4160 + spin_unlock_bh(&queue->lock);
4164 @@ -557,25 +489,25 @@
4165 * rpc_wake_up - wake up all rpc_tasks
4166 * @queue: rpc_wait_queue on which the tasks are sleeping
4168 - * Grabs rpc_queue_lock
4169 + * Grabs queue->lock
4171 void rpc_wake_up(struct rpc_wait_queue *queue)
4173 struct rpc_task *task;
4175 struct list_head *head;
4176 - spin_lock_bh(&rpc_queue_lock);
4177 + spin_lock_bh(&queue->lock);
4178 head = &queue->tasks[queue->maxpriority];
4180 while (!list_empty(head)) {
4181 - task = list_entry(head->next, struct rpc_task, tk_list);
4182 + task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
4183 __rpc_wake_up_task(task);
4185 if (head == &queue->tasks[0])
4189 - spin_unlock_bh(&rpc_queue_lock);
4190 + spin_unlock_bh(&queue->lock);
4194 @@ -583,18 +515,18 @@
4195 * @queue: rpc_wait_queue on which the tasks are sleeping
4196 * @status: status value to set
4198 - * Grabs rpc_queue_lock
4199 + * Grabs queue->lock
4201 void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
4203 struct list_head *head;
4204 struct rpc_task *task;
4206 - spin_lock_bh(&rpc_queue_lock);
4207 + spin_lock_bh(&queue->lock);
4208 head = &queue->tasks[queue->maxpriority];
4210 while (!list_empty(head)) {
4211 - task = list_entry(head->next, struct rpc_task, tk_list);
4212 + task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
4213 task->tk_status = status;
4214 __rpc_wake_up_task(task);
4220 - spin_unlock_bh(&rpc_queue_lock);
4221 + spin_unlock_bh(&queue->lock);
4225 @@ -626,22 +558,23 @@
4227 * This is the RPC `scheduler' (or rather, the finite state machine).
4230 -__rpc_execute(struct rpc_task *task)
4231 +static int __rpc_execute(struct rpc_task *task)
4235 dprintk("RPC: %4d rpc_execute flgs %x\n",
4236 task->tk_pid, task->tk_flags);
4238 - if (!RPC_IS_RUNNING(task)) {
4239 - printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
4242 + BUG_ON(RPC_IS_QUEUED(task));
4247 + * Garbage collection of pending timers...
4249 + rpc_delete_timer(task);
4252 * Execute any pending callback.
4254 if (RPC_DO_CALLBACK(task)) {
4257 save_callback=task->tk_callback;
4258 task->tk_callback=NULL;
4260 save_callback(task);
4265 @@ -665,43 +600,35 @@
4266 * tk_action may be NULL when the task has been killed
4269 - if (RPC_IS_RUNNING(task)) {
4271 - * Garbage collection of pending timers...
4273 - rpc_delete_timer(task);
4274 + if (!RPC_IS_QUEUED(task)) {
4275 if (!task->tk_action)
4278 task->tk_action(task);
4279 - /* micro-optimization to avoid spinlock */
4280 - if (RPC_IS_RUNNING(task))
4286 - * Check whether task is sleeping.
4287 + * Lockless check for whether task is sleeping or not.
4289 - spin_lock_bh(&rpc_queue_lock);
4290 - if (!RPC_IS_RUNNING(task)) {
4291 - rpc_set_sleeping(task);
4292 - if (RPC_IS_ASYNC(task)) {
4293 - spin_unlock_bh(&rpc_queue_lock);
4294 + if (!RPC_IS_QUEUED(task))
4296 + rpc_clear_running(task);
4297 + if (RPC_IS_ASYNC(task)) {
4298 + /* Careful! we may have raced... */
4299 + if (RPC_IS_QUEUED(task))
4302 + if (rpc_test_and_set_running(task))
4306 - spin_unlock_bh(&rpc_queue_lock);
4308 - if (!RPC_IS_SLEEPING(task))
4310 /* sync task: sleep here */
4311 dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
4312 - if (current->pid == rpciod_pid)
4313 - printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
4315 if (RPC_TASK_UNINTERRUPTIBLE(task)) {
4316 - __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
4317 + __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
4319 - __wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status);
4320 + __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
4322 * When a sync task receives a signal, it exits with
4323 * -ERESTARTSYS. In order to catch any callbacks that
4324 @@ -715,11 +642,14 @@
4325 rpc_wake_up_task(task);
4328 + rpc_set_running(task);
4329 dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
4332 if (task->tk_exit) {
4334 task->tk_exit(task);
4336 /* If tk_action is non-null, the user wants us to restart */
4337 if (task->tk_action) {
4338 if (!RPC_ASSASSINATED(task)) {
4341 /* Release all resources associated with the task */
4342 rpc_release_task(task);
4347 @@ -754,57 +683,16 @@
4349 rpc_execute(struct rpc_task *task)
4351 - int status = -EIO;
4352 - if (rpc_inhibit) {
4353 - printk(KERN_INFO "RPC: execution inhibited!\n");
4357 - status = -EWOULDBLOCK;
4358 - if (task->tk_active) {
4359 - printk(KERN_ERR "RPC: active task was run twice!\n");
4362 + BUG_ON(task->tk_active);
4364 task->tk_active = 1;
4365 rpc_set_running(task);
4366 return __rpc_execute(task);
4368 - rpc_release_task(task);
4374 - * This is our own little scheduler for async RPC tasks.
4377 -__rpc_schedule(void)
4378 +static void rpc_async_schedule(void *arg)
4380 - struct rpc_task *task;
4383 - dprintk("RPC: rpc_schedule enter\n");
4386 - task_for_first(task, &schedq.tasks[0]) {
4387 - __rpc_remove_wait_queue(task);
4388 - spin_unlock_bh(&rpc_queue_lock);
4390 - __rpc_execute(task);
4391 - spin_lock_bh(&rpc_queue_lock);
4396 - if (++count >= 200 || need_resched()) {
4398 - spin_unlock_bh(&rpc_queue_lock);
4400 - spin_lock_bh(&rpc_queue_lock);
4403 - dprintk("RPC: rpc_schedule leave\n");
4404 + __rpc_execute((struct rpc_task *)arg);
4409 task->tk_client = clnt;
4410 task->tk_flags = flags;
4411 task->tk_exit = callback;
4412 - init_waitqueue_head(&task->tk_wait);
4413 if (current->uid != current->fsuid || current->gid != current->fsgid)
4414 task->tk_flags |= RPC_TASK_SETUID;
4416 @@ -873,7 +760,11 @@
4418 task->tk_priority = RPC_PRIORITY_NORMAL;
4419 task->tk_cookie = (unsigned long)current;
4420 - INIT_LIST_HEAD(&task->tk_links);
4422 + /* Initialize workqueue for async tasks */
4423 + task->tk_workqueue = rpciod_workqueue;
4424 + if (!RPC_IS_ASYNC(task))
4425 + init_waitqueue_head(&task->u.tk_wait.waitq);
4427 /* Add to global list of all tasks */
4428 spin_lock(&rpc_sched_lock);
4434 -rpc_release_task(struct rpc_task *task)
4435 +void rpc_release_task(struct rpc_task *task)
4437 dprintk("RPC: %4d release task\n", task->tk_pid);
4439 @@ -963,19 +853,9 @@
4440 list_del(&task->tk_task);
4441 spin_unlock(&rpc_sched_lock);
4443 - /* Protect the execution below. */
4444 - spin_lock_bh(&rpc_queue_lock);
4446 - /* Disable timer to prevent zombie wakeup */
4447 - __rpc_disable_timer(task);
4449 - /* Remove from any wait queue we're still on */
4450 - __rpc_remove_wait_queue(task);
4452 + BUG_ON (RPC_IS_QUEUED(task));
4453 task->tk_active = 0;
4455 - spin_unlock_bh(&rpc_queue_lock);
4457 /* Synchronously delete any running timer */
4458 rpc_delete_timer(task);
4460 @@ -1005,10 +885,9 @@
4461 * queue 'childq'. If so returns a pointer to the parent.
4462 * Upon failure returns NULL.
4464 - * Caller must hold rpc_queue_lock
4465 + * Caller must hold childq.lock
4467 -static inline struct rpc_task *
4468 -rpc_find_parent(struct rpc_task *child)
4469 +static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
4471 struct rpc_task *task, *parent;
4472 struct list_head *le;
4473 @@ -1021,17 +900,16 @@
4478 -rpc_child_exit(struct rpc_task *child)
4479 +static void rpc_child_exit(struct rpc_task *child)
4481 struct rpc_task *parent;
4483 - spin_lock_bh(&rpc_queue_lock);
4484 + spin_lock_bh(&childq.lock);
4485 if ((parent = rpc_find_parent(child)) != NULL) {
4486 parent->tk_status = child->tk_status;
4487 __rpc_wake_up_task(parent);
4489 - spin_unlock_bh(&rpc_queue_lock);
4490 + spin_unlock_bh(&childq.lock);
4494 @@ -1054,22 +932,20 @@
4499 -rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
4500 +void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
4502 - spin_lock_bh(&rpc_queue_lock);
4503 + spin_lock_bh(&childq.lock);
4504 /* N.B. Is it possible for the child to have already finished? */
4505 __rpc_sleep_on(&childq, task, func, NULL);
4506 rpc_schedule_run(child);
4507 - spin_unlock_bh(&rpc_queue_lock);
4508 + spin_unlock_bh(&childq.lock);
4512 * Kill all tasks for the given client.
4513 * XXX: kill their descendants as well?
4516 -rpc_killall_tasks(struct rpc_clnt *clnt)
4517 +void rpc_killall_tasks(struct rpc_clnt *clnt)
4519 struct rpc_task *rovr;
4520 struct list_head *le;
4521 @@ -1091,93 +967,14 @@
4523 static DECLARE_MUTEX_LOCKED(rpciod_running);
4526 -rpciod_task_pending(void)
4528 - return !list_empty(&schedq.tasks[0]);
4533 - * This is the rpciod kernel thread
4542 - * Let our maker know we're running ...
4544 - rpciod_pid = current->pid;
4545 - up(&rpciod_running);
4547 - daemonize("rpciod");
4548 - allow_signal(SIGKILL);
4550 - dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
4551 - spin_lock_bh(&rpc_queue_lock);
4552 - while (rpciod_users) {
4553 - DEFINE_WAIT(wait);
4554 - if (signalled()) {
4555 - spin_unlock_bh(&rpc_queue_lock);
4557 - flush_signals(current);
4558 - spin_lock_bh(&rpc_queue_lock);
4561 - if (current->flags & PF_FREEZE) {
4562 - spin_unlock_bh(&rpc_queue_lock);
4563 - refrigerator(PF_FREEZE);
4564 - spin_lock_bh(&rpc_queue_lock);
4567 - if (++rounds >= 64) { /* safeguard */
4568 - spin_unlock_bh(&rpc_queue_lock);
4571 - spin_lock_bh(&rpc_queue_lock);
4574 - dprintk("RPC: rpciod back to sleep\n");
4575 - prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE);
4576 - if (!rpciod_task_pending() && !signalled()) {
4577 - spin_unlock_bh(&rpc_queue_lock);
4580 - spin_lock_bh(&rpc_queue_lock);
4582 - finish_wait(&rpciod_idle, &wait);
4583 - dprintk("RPC: switch to rpciod\n");
4585 - spin_unlock_bh(&rpc_queue_lock);
4587 - dprintk("RPC: rpciod shutdown commences\n");
4588 - if (!list_empty(&all_tasks)) {
4589 - printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
4593 - dprintk("RPC: rpciod exiting\n");
4597 - complete_and_exit(&rpciod_killer, 0);
4602 -rpciod_killall(void)
4603 +static void rpciod_killall(void)
4605 unsigned long flags;
4607 while (!list_empty(&all_tasks)) {
4608 clear_thread_flag(TIF_SIGPENDING);
4609 rpc_killall_tasks(NULL);
4610 - spin_lock_bh(&rpc_queue_lock);
4612 - spin_unlock_bh(&rpc_queue_lock);
4613 + flush_workqueue(rpciod_workqueue);
4614 if (!list_empty(&all_tasks)) {
4615 dprintk("rpciod_killall: waiting for tasks to exit\n");
4617 @@ -1195,28 +992,30 @@
4621 + struct workqueue_struct *wq;
4625 - dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
4626 + dprintk("rpciod_up: users %d\n", rpciod_users);
4629 + if (rpciod_workqueue)
4632 * If there's no pid, we should be the first user.
4634 if (rpciod_users > 1)
4635 - printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users);
4636 + printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users);
4638 * Create the rpciod thread and wait for it to start.
4640 - error = kernel_thread(rpciod, NULL, 0);
4642 - printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error);
4644 + wq = create_workqueue("rpciod");
4646 + printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
4650 - down(&rpciod_running);
4651 + rpciod_workqueue = wq;
4655 @@ -1227,20 +1026,21 @@
4659 - dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
4660 + dprintk("rpciod_down sema %d\n", rpciod_users);
4665 - printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid);
4666 + printk(KERN_WARNING "rpciod_down: no users??\n");
4668 - if (!rpciod_pid) {
4669 + if (!rpciod_workqueue) {
4670 dprintk("rpciod_down: Nothing to do!\n");
4675 - kill_proc(rpciod_pid, SIGKILL, 1);
4676 - wait_for_completion(&rpciod_killer);
4677 + destroy_workqueue(rpciod_workqueue);
4678 + rpciod_workqueue = NULL;
4682 @@ -1258,7 +1058,12 @@
4684 printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
4685 "-rpcwait -action- --exit--\n");
4686 - alltask_for_each(t, le, &all_tasks)
4687 + alltask_for_each(t, le, &all_tasks) {
4688 + const char *rpc_waitq = "none";
4690 + if (RPC_IS_QUEUED(t))
4691 + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
4693 printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
4695 (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
4696 @@ -1266,8 +1071,9 @@
4698 (t->tk_client ? t->tk_client->cl_prog : 0),
4699 t->tk_rqstp, t->tk_timeout,
4700 - rpc_qname(t->tk_rpcwait),
4702 t->tk_action, t->tk_exit);
4704 spin_unlock(&rpc_sched_lock);
4707 Index: linux-2.6.10/net/sunrpc/sunrpc_syms.c
4708 ===================================================================
4709 --- linux-2.6.10.orig/net/sunrpc/sunrpc_syms.c 2004-12-25 05:35:25.000000000 +0800
4710 +++ linux-2.6.10/net/sunrpc/sunrpc_syms.c 2005-04-05 14:49:13.411690432 +0800
4712 EXPORT_SYMBOL(rpc_wake_up);
4713 EXPORT_SYMBOL(rpc_queue_upcall);
4714 EXPORT_SYMBOL(rpc_mkpipe);
4715 +EXPORT_SYMBOL(rpc_mkdir);
4716 +EXPORT_SYMBOL(rpc_rmdir);
4719 /* Client transport */
4720 EXPORT_SYMBOL(xprt_create_proto);
4722 EXPORT_SYMBOL(svc_auth_register);
4723 EXPORT_SYMBOL(auth_domain_lookup);
4724 EXPORT_SYMBOL(svc_authenticate);
4725 +EXPORT_SYMBOL(svc_set_client);
4727 /* RPC statistics */
4728 #ifdef CONFIG_PROC_FS
4729 Index: linux-2.6.10/kernel/exit.c
4730 ===================================================================
4731 --- linux-2.6.10.orig/kernel/exit.c 2005-04-05 14:48:52.534864192 +0800
4732 +++ linux-2.6.10/kernel/exit.c 2005-04-05 14:50:57.737830448 +0800
4737 +EXPORT_SYMBOL(do_exit);
4739 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
4742 Index: linux-2.6.10/fs/locks.c
4743 ===================================================================
4744 --- linux-2.6.10.orig/fs/locks.c 2004-12-25 05:35:28.000000000 +0800
4745 +++ linux-2.6.10/fs/locks.c 2005-04-05 14:49:13.434686936 +0800
4746 @@ -1096,15 +1096,13 @@
4748 void remove_lease(struct file_lock *fl)
4750 - if (!IS_LEASE(fl))
4755 + if (!fl || !IS_LEASE(fl))
4757 fl->fl_type = F_UNLCK | F_INPROGRESS;
4758 fl->fl_break_time = jiffies - 10;
4759 time_out_leases(fl->fl_file->f_dentry->d_inode);
4765 @@ -1563,9 +1561,6 @@
4766 error = filp->f_op->lock(filp, F_GETLK, &file_lock);
4769 - else if (error == LOCK_USE_CLNT)
4770 - /* Bypass for NFS with no locking - 2.0.36 compat */
4771 - fl = posix_test_lock(filp, &file_lock);
4773 fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
4775 @@ -1708,9 +1703,6 @@
4776 error = filp->f_op->lock(filp, F_GETLK, &file_lock);
4779 - else if (error == LOCK_USE_CLNT)
4780 - /* Bypass for NFS with no locking - 2.0.36 compat */
4781 - fl = posix_test_lock(filp, &file_lock);
4783 fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
4785 Index: linux-2.6.10/fs/dcache.c
4786 ===================================================================
4787 --- linux-2.6.10.orig/fs/dcache.c 2005-03-31 15:35:26.000000000 +0800
4788 +++ linux-2.6.10/fs/dcache.c 2005-04-05 14:49:13.413690128 +0800
4789 @@ -789,6 +789,54 @@
4793 + * d_instantiate_unique - instantiate a non-aliased dentry
4794 + * @entry: dentry to instantiate
4795 + * @inode: inode to attach to this dentry
4797 + * Fill in inode information in the entry. On success, it returns NULL.
4798 + * If an unhashed alias of "entry" already exists, then we return the
4799 + * aliased dentry instead.
4801 + * Note that in order to avoid conflicts with rename() etc, the caller
4802 + * had better be holding the parent directory semaphore.
4804 +struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
4806 + struct dentry *alias;
4807 + int len = entry->d_name.len;
4808 + const char *name = entry->d_name.name;
4809 + unsigned int hash = entry->d_name.hash;
4811 + BUG_ON(!list_empty(&entry->d_alias));
4812 + spin_lock(&dcache_lock);
4815 + list_for_each_entry(alias, &inode->i_dentry, d_alias) {
4816 + struct qstr *qstr = &alias->d_name;
4818 + if (qstr->hash != hash)
4820 + if (alias->d_parent != entry->d_parent)
4822 + if (qstr->len != len)
4824 + if (memcmp(qstr->name, name, len))
4826 + dget_locked(alias);
4827 + spin_unlock(&dcache_lock);
4828 + BUG_ON(!d_unhashed(alias));
4831 + list_add(&entry->d_alias, &inode->i_dentry);
4833 + entry->d_inode = inode;
4834 + spin_unlock(&dcache_lock);
4835 + security_d_instantiate(entry, inode);
4838 +EXPORT_SYMBOL(d_instantiate_unique);
4841 * d_alloc_root - allocate root dentry
4842 * @root_inode: inode to allocate the root for
4844 Index: linux-2.6.10/fs/lockd/svc.c
4845 ===================================================================
4846 --- linux-2.6.10.orig/fs/lockd/svc.c 2005-03-31 15:35:26.000000000 +0800
4847 +++ linux-2.6.10/fs/lockd/svc.c 2005-04-05 14:49:13.458683288 +0800
4848 @@ -418,6 +418,38 @@
4852 +static inline int is_callback(u32 proc)
4854 + return proc == NLMPROC_GRANTED
4855 + || proc == NLMPROC_GRANTED_MSG
4856 + || proc == NLMPROC_TEST_RES
4857 + || proc == NLMPROC_LOCK_RES
4858 + || proc == NLMPROC_CANCEL_RES
4859 + || proc == NLMPROC_UNLOCK_RES
4860 + || proc == NLMPROC_NSM_NOTIFY;
4864 +static int lockd_authenticate(struct svc_rqst *rqstp)
4866 + rqstp->rq_client = NULL;
4867 + switch (rqstp->rq_authop->flavour) {
4868 + case RPC_AUTH_NULL:
4869 + case RPC_AUTH_UNIX:
4870 + if (rqstp->rq_proc == 0)
4872 + if (is_callback(rqstp->rq_proc)) {
4873 + /* Leave it to individual procedures to
4874 + * call nlmsvc_lookup_host(rqstp)
4878 + return svc_set_client(rqstp);
4880 + return SVC_DENIED;
4884 param_set_min_max(port, int, simple_strtol, 0, 65535)
4885 param_set_min_max(grace_period, unsigned long, simple_strtoul,
4886 nlm_grace_period_min, nlm_grace_period_max)
4888 .pg_name = "lockd", /* service name */
4889 .pg_class = "nfsd", /* share authentication with nfsd */
4890 .pg_stats = &nlmsvc_stats, /* stats table */
4891 + .pg_authenticate = &lockd_authenticate /* export authentication */
4893 Index: linux-2.6.10/fs/nfsd/nfs4xdr.c
4894 ===================================================================
4895 --- linux-2.6.10.orig/fs/nfsd/nfs4xdr.c 2004-12-25 05:35:24.000000000 +0800
4896 +++ linux-2.6.10/fs/nfsd/nfs4xdr.c 2005-04-05 14:49:13.425688304 +0800
4899 #define NFSDDBG_FACILITY NFSDDBG_XDR
4901 -static const char utf8_byte_len[256] = {
4902 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
4903 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
4904 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
4905 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
4906 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4907 - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4908 - 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
4909 - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0
4913 -is_legal_utf8_sequence(unsigned char *source, int length)
4915 - unsigned char *ptr;
4918 - if (length==1) return 1;
4920 - /* Check for overlong sequence, and check second byte */
4921 - c = *(source + 1);
4922 - switch (*source) {
4923 - case 0xE0: /* 3 bytes */
4924 - if ( c < 0xA0 ) return 0;
4926 - case 0xF0: /* 4 bytes */
4927 - if ( c < 0x90 ) return 0;
4929 - case 0xF8: /* 5 bytes */
4930 - if ( c < 0xC8 ) return 0;
4932 - case 0xFC: /* 6 bytes */
4933 - if ( c < 0x84 ) return 0;
4936 - if ( (c & 0xC0) != 0x80) return 0;
4939 - /* Check that trailing bytes look like 10xxxxxx */
4940 - for (ptr = source++ + length - 1; ptr>source; ptr--)
4941 - if ( ((*ptr) & 0xC0) != 0x80 ) return 0;
4945 -/* This does some screening on disallowed unicode characters. It is NOT
4949 -is_allowed_utf8_char(unsigned char *source, int length)
4951 - /* We assume length and source point to a valid utf8 sequence */
4954 - /* Disallow F0000 and up (in utf8, F3B08080) */
4955 - if (*source > 0xF3 ) return 0;
4956 - c = *(source + 1);
4957 - switch (*source) {
4959 - if (c >= 0xB0) return 0;
4961 - /* Disallow D800-F8FF (in utf8, EDA080-EFA3BF */
4963 - if (c >= 0xA0) return 0;
4969 - if (c <= 0xA3) return 0;
4970 - /* Disallow FFF9-FFFF (EFBFB9-EFBFBF) */
4972 - /* Don't need to check <=0xBF, since valid utf8 */
4973 - if ( *(source+2) >= 0xB9) return 0;
4979 -/* This routine should really check to see that the proper stringprep
4980 - * mappings have been applied. Instead, we do a simple screen of some
4981 - * of the more obvious illegal values by calling is_allowed_utf8_char.
4982 - * This will allow many illegal strings through, but if a client behaves,
4983 - * it will get full functionality. The other option (apart from full
4984 - * stringprep checking) is to limit everything to an easily handled subset,
4985 - * such as 7-bit ascii.
4987 - * Note - currently calling routines ignore return value except as boolean.
4990 -check_utf8(char *str, int len)
4992 - unsigned char *chunk, *sourceend;
4996 - sourceend = str + len;
4998 - while (chunk < sourceend) {
4999 - chunklen = utf8_byte_len[*chunk];
5001 - return nfserr_inval;
5002 - if (chunk + chunklen > sourceend)
5003 - return nfserr_inval;
5004 - if (!is_legal_utf8_sequence(chunk, chunklen))
5005 - return nfserr_inval;
5006 - if (!is_allowed_utf8_char(chunk, chunklen))
5007 - return nfserr_inval;
5008 - if ( (chunklen==1) && (!*chunk) )
5009 - return nfserr_inval; /* Disallow embedded nulls */
5010 - chunk += chunklen;
5017 check_filename(char *str, int len, int err)
5020 for (i = 0; i < len; i++)
5023 - return check_utf8(str, len);
5030 len += XDR_QUADLEN(dummy32) << 2;
5031 READMEM(buf, dummy32);
5032 - if (check_utf8(buf, dummy32))
5033 - return nfserr_inval;
5034 ace.whotype = nfs4_acl_get_whotype(buf, dummy32);
5036 if (ace.whotype != NFS4_ACL_WHO_NAMED)
5039 len += (XDR_QUADLEN(dummy32) << 2);
5040 READMEM(buf, dummy32);
5041 - if (check_utf8(buf, dummy32))
5042 - return nfserr_inval;
5043 if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
5045 iattr->ia_valid |= ATTR_UID;
5048 len += (XDR_QUADLEN(dummy32) << 2);
5049 READMEM(buf, dummy32);
5050 - if (check_utf8(buf, dummy32))
5051 - return nfserr_inval;
5052 if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
5054 iattr->ia_valid |= ATTR_GID;
5058 if (len != expected_len)
5060 + printk("nfsd: funky nfs4 client sent extra bytes in setattr\n");
5065 READ32(create->cr_linklen);
5066 READ_BUF(create->cr_linklen);
5067 SAVEMEM(create->cr_linkname, create->cr_linklen);
5068 - if (check_utf8(create->cr_linkname, create->cr_linklen))
5069 - return nfserr_inval;
5073 @@ -615,6 +492,18 @@
5077 +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
5081 + READ_BUF(sizeof(stateid_t));
5082 + READ32(dr->dr_stateid.si_generation);
5083 + COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t));
5089 nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
5091 return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
5093 READ32(open->op_delegate_type);
5095 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
5096 - READ_BUF(sizeof(delegation_stateid_t) + 4);
5097 - COPYMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
5098 + READ_BUF(sizeof(stateid_t) + 4);
5099 + COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t));
5100 READ32(open->op_fname.len);
5101 READ_BUF(open->op_fname.len);
5102 SAVEMEM(open->op_fname.data, open->op_fname.len);
5106 open_down->od_stateowner = NULL;
5107 - READ_BUF(4 + sizeof(stateid_t));
5108 + READ_BUF(12 + sizeof(stateid_t));
5109 READ32(open_down->od_stateid.si_generation);
5110 COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t));
5111 READ32(open_down->od_seqid);
5112 @@ -1170,6 +1059,9 @@
5114 op->status = nfsd4_decode_create(argp, &op->u.create);
5116 + case OP_DELEGRETURN:
5117 + op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
5120 op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
5122 @@ -1425,7 +1317,7 @@
5126 - if ((bmval0 & FATTR4_WORD0_FILEHANDLE) && !fhp) {
5127 + if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
5128 fh_init(&tempfh, NFS4_FHSIZE);
5129 status = fh_compose(&tempfh, exp, dentry, NULL);
5131 @@ -1471,7 +1363,10 @@
5132 if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
5133 if ((buflen -= 4) < 0)
5135 - WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
5136 + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
5137 + WRITE32(NFS4_FH_PERSISTENT);
5139 + WRITE32(NFS4_FH_VOL_RENAME);
5141 if (bmval0 & FATTR4_WORD0_CHANGE) {
5143 @@ -1508,10 +1403,15 @@
5144 if (bmval0 & FATTR4_WORD0_FSID) {
5145 if ((buflen -= 16) < 0)
5148 - WRITE32(MAJOR(stat.dev));
5150 - WRITE32(MINOR(stat.dev));
5151 + if (is_fsid(fhp, rqstp->rq_reffh)) {
5152 + WRITE64((u64)exp->ex_fsid);
5156 + WRITE32(MAJOR(stat.dev));
5158 + WRITE32(MINOR(stat.dev));
5161 if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
5162 if ((buflen -= 4) < 0)
5163 @@ -1765,17 +1665,65 @@
5167 +nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
5168 + const char *name, int namlen, u32 *p, int *buflen)
5170 + struct svc_export *exp = cd->rd_fhp->fh_export;
5171 + struct dentry *dentry;
5174 + dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
5175 + if (IS_ERR(dentry))
5176 + return nfserrno(PTR_ERR(dentry));
5179 + if (d_mountpoint(dentry)) {
5180 + if (nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp)) {
5182 + * -EAGAIN is the only error returned from
5183 + * nfsd_cross_mnt() and it indicates that an
5184 + * up-call has been initiated to fill in the export
5185 + * options on exp. When the answer comes back,
5186 + * this call will be retried.
5188 + nfserr = nfserr_dropit;
5193 + nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
5202 +nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr)
5209 + *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
5210 + *p++ = htonl(0); /* bmval1 */
5213 + *p++ = nfserr; /* no htonl */
5214 + *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
5219 nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
5220 loff_t offset, ino_t ino, unsigned int d_type)
5222 struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
5224 u32 *p = cd->buffer;
5226 - struct dentry *dentry;
5227 - struct svc_export *exp = cd->rd_fhp->fh_export;
5228 - u32 bmval0, bmval1;
5230 + int nfserr = nfserr_toosmall;
5232 /* In nfsv4, "." and ".." never make it onto the wire.. */
5233 if (name && isdotent(name, namlen)) {
5234 @@ -1788,106 +1736,44 @@
5236 buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
5241 *p++ = xdr_one; /* mark entry present */
5242 cd->offset = p; /* remember pointer */
5243 p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
5244 p = xdr_encode_array(p, name, namlen); /* name length & name */
5247 - * Now we come to the ugly part: writing the fattr for this entry.
5249 - bmval0 = cd->rd_bmval[0];
5250 - bmval1 = cd->rd_bmval[1];
5251 - if ((bmval0 & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_FILEID)) || bmval1) {
5253 - * "Heavyweight" case: we have no choice except to
5254 - * call nfsd4_encode_fattr().
5256 - dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
5257 - if (IS_ERR(dentry)) {
5258 - nfserr = nfserrno(PTR_ERR(dentry));
5263 - if (d_mountpoint(dentry)) {
5264 - if ((nfserr = nfsd_cross_mnt(cd->rd_rqstp, &dentry,
5267 - * -EAGAIN is the only error returned from
5268 - * nfsd_cross_mnt() and it indicates that an
5269 - * up-call has been initiated to fill in the export
5270 - * options on exp. When the answer comes back,
5271 - * this call will be retried.
5275 - nfserr = nfserr_dropit;
5281 - nfserr = nfsd4_encode_fattr(NULL, exp,
5282 - dentry, p, &buflen, cd->rd_bmval,
5290 - if (nfserr == nfserr_resource)
5294 + nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen);
5299 + case nfserr_resource:
5300 + nfserr = nfserr_toosmall;
5302 + case nfserr_dropit:
5306 - * If we get here, we experienced a miscellaneous
5307 - * failure while writing the attributes. If the
5308 - * client requested the RDATTR_ERROR attribute,
5309 + * If the client requested the RDATTR_ERROR attribute,
5310 * we stuff the error code into this attribute
5311 * and continue. If this attribute was not requested,
5312 * then in accordance with the spec, we fail the
5313 * entire READDIR operation(!)
5315 - if (!(bmval0 & FATTR4_WORD0_RDATTR_ERROR)) {
5316 - cd->common.err = nfserr;
5320 - bmval0 = FATTR4_WORD0_RDATTR_ERROR;
5322 - /* falling through here will do the right thing... */
5323 + if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
5325 + nfserr = nfserr_toosmall;
5326 + p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
5332 - * In the common "lightweight" case, we avoid
5333 - * the overhead of nfsd4_encode_fattr() by assembling
5334 - * a small fattr by hand.
5339 - *p++ = htonl(bmval0);
5340 - *p++ = htonl(bmval1);
5343 - if (bmval0 & FATTR4_WORD0_RDATTR_ERROR)
5344 - *p++ = nfserr; /* no htonl */
5345 - if (bmval0 & FATTR4_WORD0_FILEID)
5346 - p = xdr_encode_hyper(p, (u64)ino);
5347 - *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
5350 cd->buflen -= (p - cd->buffer);
5352 cd->common.err = nfs_ok;
5356 - cd->common.err = nfserr_toosmall;
5358 + cd->common.err = nfserr;
5362 @@ -2081,8 +1967,8 @@
5363 case NFS4_OPEN_DELEGATE_NONE:
5365 case NFS4_OPEN_DELEGATE_READ:
5366 - RESERVE_SPACE(20 + sizeof(delegation_stateid_t));
5367 - WRITEMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
5368 + RESERVE_SPACE(20 + sizeof(stateid_t));
5369 + WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
5373 @@ -2095,8 +1981,8 @@
5376 case NFS4_OPEN_DELEGATE_WRITE:
5377 - RESERVE_SPACE(32 + sizeof(delegation_stateid_t));
5378 - WRITEMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t));
5379 + RESERVE_SPACE(32 + sizeof(stateid_t));
5380 + WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
5384 @@ -2185,10 +2071,17 @@
5388 - nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp,
5390 - read->rd_iov, read->rd_vlen,
5392 + if (read->rd_filp)
5393 + nfserr = nfsd_vfs_read(read->rd_rqstp, read->rd_fhp,
5394 + read->rd_filp, read->rd_offset,
5395 + read->rd_iov, read->rd_vlen,
5398 + nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp,
5400 + read->rd_iov, read->rd_vlen,
5403 if (nfserr == nfserr_symlink)
5404 nfserr = nfserr_inval;
5406 @@ -2460,6 +2353,8 @@
5408 nfsd4_encode_create(resp, op->status, &op->u.create);
5410 + case OP_DELEGRETURN:
5413 op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
5415 Index: linux-2.6.10/fs/nfsd/nfs4state.c
5416 ===================================================================
5417 --- linux-2.6.10.orig/fs/nfsd/nfs4state.c 2004-12-25 05:35:23.000000000 +0800
5418 +++ linux-2.6.10/fs/nfsd/nfs4state.c 2005-04-05 14:49:13.421688912 +0800
5420 #include <linux/mount.h>
5421 #include <linux/workqueue.h>
5422 #include <linux/smp_lock.h>
5423 +#include <linux/kthread.h>
5424 #include <linux/nfs4.h>
5425 #include <linux/nfsd/state.h>
5426 #include <linux/nfsd/xdr4.h>
5428 static u32 nfs4_reclaim_init = 0;
5430 static time_t grace_end = 0;
5431 +static u32 first_run = 1; /* laundromat threads first run */
5432 static u32 current_clientid = 1;
5433 -static u32 current_ownerid;
5434 -static u32 current_fileid;
5435 +static u32 current_ownerid = 1;
5436 +static u32 current_fileid = 1;
5437 +static u32 current_delegid = 1;
5438 static u32 nfs4_init;
5439 stateid_t zerostateid; /* bits all 0 */
5440 stateid_t onestateid; /* bits all 1 */
5442 u32 del_perclient = 0;
5445 -u32 alloc_sowner = 0;
5446 -u32 free_sowner = 0;
5449 -u32 alloc_lsowner= 0;
5450 +u32 alloc_delegation= 0;
5451 +u32 free_delegation= 0;
5453 /* forward declarations */
5454 struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
5455 +static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
5456 +static void release_delegation(struct nfs4_delegation *dp);
5457 +static void release_stateid_lockowner(struct nfs4_stateid *open_stp);
5458 +extern char recovery_dirname[];
5462 @@ -117,6 +123,112 @@
5463 static void release_stateid(struct nfs4_stateid *stp, int flags);
5464 static void release_file(struct nfs4_file *fp);
5467 + * Delegation state
5470 +/* recall_lock protects the del_recall_lru */
5471 +spinlock_t recall_lock;
5472 +static struct list_head del_recall_lru;
5474 +static struct nfs4_delegation *
5475 +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
5477 + struct nfs4_delegation *dp;
5478 + struct nfs4_file *fp = stp->st_file;
5480 + dprintk("NFSD alloc_init_deleg\n");
5481 + if ((dp = kmalloc(sizeof(struct nfs4_delegation),
5482 + GFP_KERNEL)) == NULL)
5484 + INIT_LIST_HEAD(&dp->dl_del_perfile);
5485 + INIT_LIST_HEAD(&dp->dl_del_perclnt);
5486 + INIT_LIST_HEAD(&dp->dl_recall_lru);
5487 + dp->dl_client = clp;
5489 + dp->dl_flock = NULL;
5492 + dp->dl_type = type;
5493 + dp->dl_recall.cbr_dp = NULL;
5494 + dp->dl_recall.cbr_ident = 0;
5495 + dp->dl_recall.cbr_trunc = 0;
5496 + dp->dl_stateid.si_boot = boot_time;
5497 + dp->dl_stateid.si_stateownerid = current_delegid++;
5498 + dp->dl_stateid.si_fileid = 0;
5499 + dp->dl_stateid.si_generation = 0;
5500 + dp->dl_fhlen = current_fh->fh_handle.fh_size;
5501 + memcpy(dp->dl_fhval, ¤t_fh->fh_handle.fh_base,
5502 + current_fh->fh_handle.fh_size);
5504 + atomic_set(&dp->dl_state, NFS4_NO_RECALL);
5505 + atomic_set(&dp->dl_count, 1);
5506 + atomic_set(&dp->dl_recall_cnt, 0);
5507 + list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
5508 + list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
5509 + alloc_delegation++;
5514 + * Free the delegation structure.
5517 +nfs4_free_delegation(struct nfs4_delegation *dp)
5519 + dprintk("NFSD: nfs4_free_delegation freeing dp %p\n",dp);
5520 + list_del(&dp->dl_recall_lru);
5522 + free_delegation++;
5525 +/* release_delegation:
5527 + * lease_modify() is called to remove the FS_LEASE file_lock from
5528 + * the i_flock list, eventually calling nfsd's lock_manager
5529 + * fl_release_callback.
5532 + * nfsd_close : if last close, locks_remove_flock calls lease_modify.
5533 + * otherwise, recalled state set to NFS4_RECALL_COMPLETE
5534 + * so that it will be reaped by the laundromat service.
5536 + * remove_lease (calls time_out_lease which calls lease_modify).
5537 + * and nfs4_free_delegation.
5539 + * lock_kernel() protects dp->dl_flock which is set under the kernel lock
5540 + * by nfsd_copy_lock_deleg_callback and nfsd_release_deleg_callback.
5545 +release_delegation(struct nfs4_delegation *dp)
5547 + /* delayed nfsd_close */
5548 + if (dp->dl_flags && NFS4_DELAY_CLOSE) {
5549 + struct file *filp = dp->dl_stp->st_vfs_file;
5551 + dprintk("NFSD: release_delegation CLOSE\n");
5552 + release_stateid_lockowner(dp->dl_stp);
5553 + kfree(dp->dl_stp);
5554 + dp->dl_flags &= ~NFS4_DELAY_CLOSE;
5555 + dp->dl_stp = NULL;
5556 + atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
5560 + dprintk("NFSD: release_delegation remove lease dl_flock %p\n",
5562 + remove_lease(dp->dl_flock);
5563 + list_del_init(&dp->dl_del_perfile);
5564 + list_del_init(&dp->dl_del_perclnt);
5565 + /* dl_count > 0 => outstanding recall rpc */
5566 + dprintk("NFSD: release_delegation free deleg dl_count %d\n",
5567 + atomic_read(&dp->dl_count));
5568 + if (atomic_dec_and_test(&dp->dl_count))
5569 + nfs4_free_delegation(dp);
5576 * for last close replay.
5578 static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE];
5579 -static int reclaim_str_hashtbl_size;
5580 +static int reclaim_str_hashtbl_size = 0;
5581 static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE];
5582 static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE];
5583 static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE];
5584 @@ -213,12 +325,38 @@
5589 +put_nfs4_client(struct nfs4_client *clp)
5591 + if (atomic_dec_and_test(&clp->cl_count))
5596 expire_client(struct nfs4_client *clp)
5598 struct nfs4_stateowner *sop;
5599 + struct nfs4_delegation *dp;
5600 + struct nfs4_callback *cb = &clp->cl_callback;
5601 + struct rpc_clnt *clnt = clp->cl_callback.cb_client;
5603 + dprintk("NFSD: expire_client cl_count %d\n",
5604 + atomic_read(&clp->cl_count));
5606 - dprintk("NFSD: expire_client\n");
5607 + /* shutdown rpc client, ending any outstanding recall rpcs */
5608 + if (atomic_read(&cb->cb_set) == 1 && clnt) {
5609 + rpc_shutdown_client(clnt);
5610 + clnt = clp->cl_callback.cb_client = NULL;
5612 + while (!list_empty(&clp->cl_del_perclnt)) {
5613 + dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
5614 + dprintk("NFSD: expire client. dp %p, dl_state %d, fp %p\n",
5615 + dp, atomic_read(&dp->dl_state), dp->dl_flock);
5617 + /* force release of delegation. */
5618 + atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
5619 + release_delegation(dp);
5621 list_del(&clp->cl_idhash);
5622 list_del(&clp->cl_strhash);
5623 list_del(&clp->cl_lru);
5625 sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
5626 release_stateowner(sop);
5629 + put_nfs4_client(clp);
5632 static struct nfs4_client *
5633 @@ -235,9 +373,13 @@
5635 if (!(clp = alloc_client(name)))
5637 + atomic_set(&clp->cl_count, 1);
5638 + atomic_set(&clp->cl_callback.cb_set, 0);
5639 + clp->cl_callback.cb_parsed = 0;
5640 INIT_LIST_HEAD(&clp->cl_idhash);
5641 INIT_LIST_HEAD(&clp->cl_strhash);
5642 INIT_LIST_HEAD(&clp->cl_perclient);
5643 + INIT_LIST_HEAD(&clp->cl_del_perclnt);
5644 INIT_LIST_HEAD(&clp->cl_lru);
5647 @@ -420,17 +562,24 @@
5649 struct nfs4_callback *cb = &clp->cl_callback;
5651 + /* Currently, we only support tcp for the callback channel */
5652 + if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
5655 if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
5656 - &cb->cb_addr, &cb->cb_port))) {
5657 - printk(KERN_INFO "NFSD: BAD callback address. client will not receive delegations\n");
5658 - cb->cb_parsed = 0;
5661 - cb->cb_netid.len = se->se_callback_netid_len;
5662 - cb->cb_netid.data = se->se_callback_netid_val;
5663 + &cb->cb_addr, &cb->cb_port)))
5665 cb->cb_prog = se->se_callback_prog;
5666 cb->cb_ident = se->se_callback_ident;
5670 + printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
5671 + "will not receive delegations\n",
5672 + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
5674 + cb->cb_parsed = 0;
5680 status = nfserr_clid_inuse;
5682 expire_client(conf);
5684 move_to_confirmed(unconf, idhashval);
5688 if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
5689 status = nfserr_clid_inuse;
5696 status = nfserr_clid_inuse;
5700 move_to_confirmed(unconf, idhashval);
5704 status = nfserr_inval;
5707 - /* XXX if status == nfs_ok, probe callback path */
5709 + nfsd4_probe_callback(clp);
5710 nfs4_unlock_state();
5714 if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
5715 INIT_LIST_HEAD(&fp->fi_hash);
5716 INIT_LIST_HEAD(&fp->fi_perfile);
5717 + INIT_LIST_HEAD(&fp->fi_del_perfile);
5718 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
5719 fp->fi_inode = igrab(ino);
5720 fp->fi_id = current_fileid++;
5722 while (!list_empty(&file_hashtbl[i])) {
5723 fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
5724 /* this should never be more than once... */
5725 - if (!list_empty(&fp->fi_perfile)) {
5726 + if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
5727 printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
5730 @@ -830,15 +984,36 @@
5734 -/* should use a slab cache */
5735 +kmem_cache_t *stateowner_slab = NULL;
5738 +nfsd4_init_slabs(void)
5740 + stateowner_slab = kmem_cache_create("nfsd4_stateowners",
5741 + sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
5742 + if (stateowner_slab == NULL)
5748 +nfsd4_free_slabs(void)
5752 + if (stateowner_slab)
5753 + status = kmem_cache_destroy(stateowner_slab);
5754 + stateowner_slab = NULL;
5759 nfs4_free_stateowner(struct kref *kref)
5761 struct nfs4_stateowner *sop =
5762 container_of(kref, struct nfs4_stateowner, so_ref);
5763 kfree(sop->so_owner.data);
5766 + kmem_cache_free(stateowner_slab, sop);
5769 static inline struct nfs4_stateowner *
5770 @@ -846,14 +1021,14 @@
5772 struct nfs4_stateowner *sop;
5774 - if ((sop = kmalloc(sizeof(struct nfs4_stateowner),GFP_KERNEL))) {
5775 + if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) {
5776 if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
5777 memcpy(sop->so_owner.data, owner->data, owner->len);
5778 sop->so_owner.len = owner->len;
5779 kref_init(&sop->so_ref);
5783 + kmem_cache_free(stateowner_slab, sop);
5787 @@ -887,7 +1062,6 @@
5788 rp->rp_status = NFSERR_SERVERFAULT;
5790 rp->rp_buf = rp->rp_ibuf;
5795 @@ -957,14 +1131,29 @@
5796 __set_bit(open->op_share_deny, &stp->st_deny_bmap);
5800 +* Because nfsd_close() can call locks_remove_flock() which removes leases,
5801 +* delay nfsd_close() for delegations from the nfsd_open() clientid
5802 +* until the delegation is reaped.
5805 -release_stateid(struct nfs4_stateid *stp, int flags) {
5806 +release_stateid(struct nfs4_stateid *stp, int flags)
5808 + struct nfs4_delegation *dp;
5809 + struct nfs4_file *fp = stp->st_file;
5811 list_del(&stp->st_hash);
5813 list_del(&stp->st_perfile);
5814 list_del(&stp->st_perfilestate);
5815 if ((stp->st_vfs_set) && (flags & OPEN_STATE)) {
5816 + list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
5817 + if(cmp_clid(&dp->dl_client->cl_clientid,
5818 + &stp->st_stateowner->so_client->cl_clientid)) {
5819 + dp->dl_flags |= NFS4_DELAY_CLOSE;
5823 release_stateid_lockowner(stp);
5824 nfsd_close(stp->st_vfs_file);
5826 @@ -1013,7 +1202,7 @@
5827 if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
5828 move_to_close_lru(sop);
5829 /* unused nfs4_file's are releseed. XXX slab cache? */
5830 - if (list_empty(&fp->fi_perfile)) {
5831 + if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
5835 @@ -1141,6 +1330,100 @@
5840 + * Recall a delegation
5843 +do_recall(void *__dp)
5845 + struct nfs4_delegation *dp = __dp;
5847 + atomic_inc(&dp->dl_count);
5848 + nfsd4_cb_recall(dp);
5854 + * Spawn a thread to perform a recall on the delegation represented
5855 + * by the lease (file_lock)
5857 + * Called from break_lease() with lock_kernel() held,
5861 +void nfsd_break_deleg_cb(struct file_lock *fl)
5863 + struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner;
5864 + struct task_struct *t;
5866 + dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
5870 + /* schedule delegation for recall */
5871 + spin_lock(&recall_lock);
5872 + atomic_set(&dp->dl_state, NFS4_RECALL_IN_PROGRESS);
5873 + list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
5874 + spin_unlock(&recall_lock);
5876 + /* only place dl_time is set. protected by lock_kernel*/
5877 + dp->dl_time = get_seconds();
5879 + /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */
5880 + fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ;
5882 + t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
5884 + struct nfs4_client *clp = dp->dl_client;
5886 + printk(KERN_INFO "NFSD: Callback thread failed for "
5887 + "for client (clientid %08x/%08x)\n",
5888 + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
5893 + * The file_lock is being reapd.
5895 + * Called by locks_free_lock() with lock_kernel() held.
5898 +void nfsd_release_deleg_cb(struct file_lock *fl)
5900 + struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
5902 + dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d, dl_state %d\n", fl,dp, atomic_read(&dp->dl_count), atomic_read(&dp->dl_state));
5904 + if (!(fl->fl_flags & FL_LEASE) || !dp)
5906 + atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE);
5907 + dp->dl_flock = NULL;
5911 + * Set the delegation file_lock back pointer.
5913 + * Called from __setlease() with lock_kernel() held.
5916 +void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
5918 + struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
5920 + dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
5923 + dp->dl_flock = new;
5926 +struct lock_manager_operations nfsd_lease_mng_ops = {
5927 + .fl_break = nfsd_break_deleg_cb,
5928 + .fl_release_private = nfsd_release_deleg_cb,
5929 + .fl_copy_lock = nfsd_copy_lock_deleg_cb,
5935 * nfsd4_process_open1()
5936 @@ -1238,6 +1521,43 @@
5940 +nfs4_deleg_conflict(u32 share, u32 dtype)
5942 + return (((share & NFS4_SHARE_ACCESS_WRITE) &&
5943 + dtype == NFS4_OPEN_DELEGATE_READ) ||
5944 + ((share & NFS4_SHARE_ACCESS_READ) &&
5945 + dtype == NFS4_OPEN_DELEGATE_WRITE));
5948 +#define DONT_DELEGATE 8
5951 + * nfs4_check_deleg_recall()
5953 + * Test any delegation that is currently within an incompleted recalled
5954 + * state, and return NFSERR_DELAY for conflicting open share.
5955 + * flag is set to DONT_DELEGATE for shares that match the deleg type.
5958 +nfs4_check_deleg_recall(struct nfs4_file *fp, struct nfsd4_open *op, int *flag)
5960 + struct nfs4_delegation *dp;
5963 + list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
5964 + dprintk("NFSD: found delegation %p with dl_state %d\n",
5965 + dp, atomic_read(&dp->dl_state));
5966 + if (atomic_read(&dp->dl_state) == NFS4_RECALL_IN_PROGRESS) {
5967 + if(nfs4_deleg_conflict(op->op_share_access, dp->dl_type))
5968 + status = nfserr_jukebox;
5970 + *flag = DONT_DELEGATE;
5977 nfs4_check_open(struct nfs4_file *fp, struct nfs4_stateowner *sop, struct nfsd4_open *open, struct nfs4_stateid **stpp)
5979 struct nfs4_stateid *local;
5980 @@ -1339,6 +1659,65 @@
5984 + * Attempt to hand out a delegation.
5987 +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp, int *flag)
5989 + struct nfs4_delegation *dp;
5990 + struct nfs4_stateowner *sop = stp->st_stateowner;
5991 + struct nfs4_callback *cb = &sop->so_client->cl_callback;
5992 + struct file_lock fl, *flp = &fl;
5995 + if (*flag == DONT_DELEGATE) {
5996 + *flag = NFS4_OPEN_DELEGATE_NONE;
6001 + *flag = NFS4_OPEN_DELEGATE_NONE;
6002 + if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
6003 + || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
6006 + if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
6007 + *flag = NFS4_OPEN_DELEGATE_READ;
6009 + else if (!(open->op_share_access & NFS4_SHARE_ACCESS_READ))
6010 + *flag = NFS4_OPEN_DELEGATE_WRITE;
6012 + if (!(dp = alloc_init_deleg(sop->so_client, stp, fh, *flag)))
6014 + locks_init_lock(&fl);
6015 + fl.fl_lmops = &nfsd_lease_mng_ops;
6016 + fl.fl_flags = FL_LEASE;
6017 + fl.fl_end = OFFSET_MAX;
6018 + fl.fl_owner = (fl_owner_t)dp;
6019 + fl.fl_file = stp->st_vfs_file;
6020 + fl.fl_pid = current->tgid;
6022 + if ((status = setlease(stp->st_vfs_file,
6023 + *flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
6024 + dprintk("NFSD: setlease failed [%d], no delegation\n", status);
6025 + list_del(&dp->dl_del_perfile);
6026 + list_del(&dp->dl_del_perclnt);
6028 + free_delegation++;
6029 + *flag = NFS4_OPEN_DELEGATE_NONE;
6033 + memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
6035 + dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
6036 + dp->dl_stateid.si_boot,
6037 + dp->dl_stateid.si_stateownerid,
6038 + dp->dl_stateid.si_fileid,
6039 + dp->dl_stateid.si_generation);
6043 * called with nfs4_lock_state() held.
6046 @@ -1346,28 +1725,24 @@
6048 struct nfs4_stateowner *sop = open->op_stateowner;
6049 struct nfs4_file *fp = NULL;
6050 - struct inode *ino;
6051 + struct inode *ino = current_fh->fh_dentry->d_inode;
6052 unsigned int fi_hashval;
6053 struct nfs4_stateid *stp = NULL;
6056 - status = nfserr_resource;
6060 - ino = current_fh->fh_dentry->d_inode;
6061 + int status, delegflag = 0;
6063 status = nfserr_inval;
6064 if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
6067 - * Lookup file; if found, lookup stateid and check open request;
6068 - * not found, create
6069 + * Lookup file; if found, lookup stateid and check open request,
6070 + * and check for delegations in the process of being recalled.
6071 + * If not found, create the nfs4_file struct
6073 fi_hashval = file_hashval(ino);
6074 if (find_file(fi_hashval, ino, &fp)) {
6075 - status = nfs4_check_open(fp, sop, open, &stp);
6077 + if ((status = nfs4_check_open(fp, sop, open, &stp)))
6079 + if ((status = nfs4_check_deleg_recall(fp, open, &delegflag)))
6082 status = nfserr_resource;
6083 @@ -1407,14 +1782,20 @@
6087 - dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
6088 - stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
6089 - stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
6091 memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
6093 - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
6095 + * Attempt to hand out a delegation. No error return, because the
6096 + * OPEN succeeds even if we fail.
6098 + nfs4_open_delegation(current_fh, open, stp, &delegflag);
6099 + open->op_delegate_type = delegflag;
6103 + dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
6104 + stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
6105 + stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
6107 /* take the opportunity to clean up unused state */
6108 if (fp && list_empty(&fp->fi_perfile))
6109 @@ -1480,14 +1861,26 @@
6111 struct nfs4_client *clp;
6112 struct nfs4_stateowner *sop;
6113 + struct nfs4_delegation *dp;
6114 struct list_head *pos, *next;
6115 time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
6116 time_t t, clientid_val = NFSD_LEASE_TIME;
6117 - time_t u, close_val = NFSD_LEASE_TIME;
6118 + time_t u, test_val = NFSD_LEASE_TIME;
6122 - dprintk("NFSD: laundromat service - starting, examining clients\n");
6123 + dprintk("NFSD: laundromat service - starting\n");
6124 + /* Remove clientid's from recovery directory */
6128 + dprintk("NFSD: laundromat service - FIRST_RUN\n");
6129 + status = nfsd4_list_rec_dir(1);
6131 + printk("NFSD: error clearing recovery directory %s\n",
6132 + recovery_dirname);
6135 list_for_each_safe(pos, next, &client_lru) {
6136 clp = list_entry(pos, struct nfs4_client, cl_lru);
6137 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
6138 @@ -1498,14 +1891,34 @@
6140 dprintk("NFSD: purging unused client (clientid %08x)\n",
6141 clp->cl_clientid.cl_id);
6142 + if (clp->cl_firststate)
6143 + nfsd4_remove_clid_file(clp);
6146 + spin_lock(&recall_lock);
6147 + list_for_each_safe(pos, next, &del_recall_lru) {
6148 + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6149 + if (atomic_read(&dp->dl_state) == NFS4_RECALL_COMPLETE)
6151 + if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
6152 + u = dp->dl_time - cutoff;
6158 + dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
6159 + dp, dp->dl_flock);
6160 + release_delegation(dp);
6162 + spin_unlock(&recall_lock);
6163 + test_val = NFSD_LEASE_TIME;
6164 list_for_each_safe(pos, next, &close_lru) {
6165 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
6166 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
6167 u = sop->so_time - cutoff;
6168 - if (close_val > u)
6174 dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
6175 @@ -1564,21 +1977,81 @@
6180 +access_permit_read(unsigned long access_bmap)
6182 + return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
6183 + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
6187 +access_permit_write(unsigned long access_bmap)
6189 + return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
6190 + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
6194 +int nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
6196 + int status = nfserr_openmode;
6198 + if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
6200 + if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
6208 +nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
6210 + int status = nfserr_openmode;
6212 + if ((flags & WR_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
6214 + if ((flags & RD_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_WRITE))
6222 +nfs4_rw_grace(int flags)
6224 + return (nfs4_in_grace() && ((flags & RD_STATE) || (flags & WR_STATE)));
6228 + * Allow READ/WRITE during grace period on recovered state only for files
6229 + * that are not able to provide mandatory locking.
6232 +nfs4_check_rw_grace(umode_t mode, int flags)
6234 + return (nfs4_rw_grace(flags) && ((mode & S_IXGRP) && (mode & S_ISGID)));
6238 * Checks for stateid operations
6241 -nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct nfs4_stateid **stpp)
6242 +nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp)
6244 - struct nfs4_stateid *stp;
6245 + struct nfs4_stateid *stp = NULL;
6246 + struct nfs4_delegation *dp = NULL;
6248 + struct inode *ino = current_fh->fh_dentry->d_inode;
6251 dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
6252 stateid->si_boot, stateid->si_stateownerid,
6253 stateid->si_fileid, stateid->si_generation);
6260 status = nfserr_stale_stateid;
6261 @@ -1587,33 +2060,58 @@
6264 status = nfserr_bad_stateid;
6265 - if (!(stp = find_stateid(stateid, flags))) {
6266 - dprintk("NFSD: preprocess_stateid_op: no open stateid!\n");
6269 - if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
6270 - dprintk("NFSD: preprocess_stateid_op: fh-stateid mismatch!\n");
6271 - stp->st_vfs_set = 0;
6274 - if (!stp->st_stateowner->so_confirmed) {
6275 - dprintk("preprocess_stateid_op: lockowner not confirmed yet!\n");
6277 + if (!stateid->si_fileid) { /* delegation stateid */
6279 + if(!(dp = find_delegation_stateid(ino, stateid))) {
6280 + dprintk("NFSD: delegation stateid not found\n");
6281 + if (nfs4_rw_grace(flags))
6282 + status = nfserr_grace;
6285 + stidp = &dp->dl_stateid;
6286 + } else { /* open or lock stateid */
6287 + if (!(stp = find_stateid(stateid, flags))) {
6288 + dprintk("NFSD: open or lock stateid not found\n");
6289 + if (nfs4_rw_grace(flags))
6290 + status = nfserr_grace;
6293 + if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp))
6295 + if (!stp->st_stateowner->so_confirmed)
6297 + stidp = &stp->st_stateid;
6299 - if (stateid->si_generation > stp->st_stateid.si_generation) {
6300 - dprintk("preprocess_stateid_op: future stateid?!\n");
6301 + if (stateid->si_generation > stidp->si_generation)
6306 status = nfserr_old_stateid;
6307 - if (stateid->si_generation < stp->st_stateid.si_generation) {
6308 - dprintk("preprocess_stateid_op: old stateid!\n");
6309 + if (stateid->si_generation < stidp->si_generation)
6312 + status = nfserr_grace;
6313 + if (nfs4_check_rw_grace(ino->i_mode, flags))
6317 + renew_client(stp->st_stateowner->so_client);
6318 + if ((status = nfs4_check_openmode(stp,flags)))
6321 + *filpp = stp->st_vfs_file;
6323 + renew_client(dp->dl_client);
6324 + if ((status = nfs4_check_delegmode(dp, flags)))
6326 + if (flags & DELEG_RET) {
6327 + atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE);
6328 + release_delegation(dp);
6330 + if (filpp && dp && dp->dl_stp)
6331 + *filpp = dp->dl_stp->st_vfs_file;
6335 - renew_client(stp->st_stateowner->so_client);
6339 @@ -1750,17 +2248,6 @@
6344 - * eventually, this will perform an upcall to the 'state daemon' as well as
6345 - * set the cl_first_state field.
6348 -first_state(struct nfs4_client *clp)
6350 - if (!clp->cl_first_state)
6351 - clp->cl_first_state = get_seconds();
6355 nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
6357 @@ -1793,8 +2280,16 @@
6358 stp->st_stateid.si_stateownerid,
6359 stp->st_stateid.si_fileid,
6360 stp->st_stateid.si_generation);
6362 - first_state(sop->so_client);
6364 + if (!sop->so_client->cl_firststate) {
6365 + int err = nfsd4_create_clid_file(sop->so_client);
6367 + sop->so_client->cl_firststate = 1;
6368 + dprintk("NFSD: OPEN_CONFIRM firststate set [%.*s]\n",
6369 + sop->so_client->cl_name.len,
6370 + sop->so_client->cl_name.data);
6374 if (oc->oc_stateowner)
6375 nfs4_get_stateowner(oc->oc_stateowner);
6376 @@ -1912,6 +2407,22 @@
6381 +nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
6385 + if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
6388 + nfs4_lock_state();
6389 + status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET, NULL);
6390 + nfs4_unlock_state();
6397 * Lock owner state (byte-range locks)
6399 @@ -1938,7 +2449,7 @@
6400 unsigned int hashval;
6402 dprintk("NFSD: find_stateid flags 0x%x\n",flags);
6403 - if ((flags & LOCK_STATE) || (flags & RDWR_STATE)) {
6404 + if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
6405 hashval = stateid_hashval(st_id, f_id);
6406 list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
6407 if ((local->st_stateid.si_stateownerid == st_id) &&
6408 @@ -1946,7 +2457,7 @@
6412 - if ((flags & OPEN_STATE) || (flags & RDWR_STATE)) {
6413 + if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
6414 hashval = stateid_hashval(st_id, f_id);
6415 list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
6416 if ((local->st_stateid.si_stateownerid == st_id) &&
6417 @@ -1958,6 +2469,30 @@
6421 +static struct nfs4_delegation *
6422 +find_delegation_stateid(struct inode *ino, stateid_t *stid)
6424 + struct nfs4_delegation *dp = NULL;
6425 + struct nfs4_file *fp = NULL;
6427 + unsigned int fi_hashval;
6429 + dprintk("NFSD:find_delegation_stateid ino %p, stid %p\n",ino,stid);
6433 + st_id = stid->si_stateownerid;
6434 + fi_hashval = file_hashval(ino);
6435 + if (find_file(fi_hashval, ino, &fp)) {
6436 + list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
6437 + if(dp->dl_stateid.si_stateownerid == st_id) {
6438 + dprintk("NFSD: find_delegation dp %p\n",dp);
6447 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
6448 @@ -2085,7 +2620,6 @@
6449 rp->rp_status = NFSERR_SERVERFAULT;
6451 rp->rp_buf = rp->rp_ibuf;
6456 @@ -2558,22 +3092,22 @@
6458 * failure => all reset bets are off, nfserr_no_grace...
6461 -nfs4_client_to_reclaim(struct nfs4_client *clp)
6463 +nfs4_client_to_reclaim(char *name, int namlen)
6465 unsigned int strhashval;
6466 struct nfs4_client_reclaim *crp = NULL;
6468 - crp = alloc_reclaim(clp->cl_name.len);
6469 + dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
6470 + crp = alloc_reclaim(namlen);
6473 - strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
6474 + strhashval = clientstr_hashval(name, namlen);
6475 INIT_LIST_HEAD(&crp->cr_strhash);
6476 list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
6477 - memcpy(crp->cr_name.data, clp->cl_name.data, clp->cl_name.len);
6478 - crp->cr_name.len = clp->cl_name.len;
6479 - crp->cr_first_state = clp->cl_first_state;
6480 - crp->cr_expired = 0;
6481 + memcpy(crp->cr_name.data, name, namlen);
6482 + crp->cr_name.len = namlen;
6483 + reclaim_str_hashtbl_size++;
6487 @@ -2618,6 +3152,9 @@
6491 + dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
6492 + clp->cl_name.len, clp->cl_name.data);
6494 /* find clp->cl_name in reclaim_str_hashtbl */
6495 strhashval = clientstr_hashval(client->cl_name.data,
6496 client->cl_name.len);
6497 @@ -2639,8 +3176,6 @@
6499 if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
6500 return nfserr_reclaim_bad;
6501 - if (crp->cr_expired)
6502 - return nfserr_no_grace;
6506 @@ -2657,10 +3192,18 @@
6510 + if (nfsd4_init_slabs())
6511 + BUG(); /* XXXXXX!!! */
6512 if (!nfs4_reclaim_init) {
6515 for (i = 0; i < CLIENT_HASH_SIZE; i++)
6516 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
6517 reclaim_str_hashtbl_size = 0;
6518 + nfsd4_init_rec_dir(recovery_dirname);
6519 + status = nfsd4_list_rec_dir(0);
6521 + printk("NFSD: Failure in reading recovery data\n");
6522 nfs4_reclaim_init = 1;
6524 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6525 @@ -2689,6 +3232,8 @@
6527 INIT_LIST_HEAD(&close_lru);
6528 INIT_LIST_HEAD(&client_lru);
6529 + INIT_LIST_HEAD(&del_recall_lru);
6530 + spin_lock_init(&recall_lock);
6531 boot_time = get_seconds();
6532 grace_time = max(old_lease_time, lease_time);
6533 if (reclaim_str_hashtbl_size == 0)
6534 @@ -2725,6 +3270,15 @@
6537 struct nfs4_client *clp = NULL;
6538 + struct nfs4_delegation *dp = NULL;
6539 + struct nfs4_stateowner *sop = NULL;
6540 + struct list_head *pos, *next;
6542 + list_for_each_safe(pos, next, &close_lru) {
6543 + sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
6544 + list_del(&sop->so_close_lru);
6545 + nfs4_put_stateowner(sop);
6548 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6549 while (!list_empty(&conf_id_hashtbl[i])) {
6550 @@ -2736,20 +3290,31 @@
6554 + spin_lock(&recall_lock);
6555 + list_for_each_safe(pos, next, &del_recall_lru) {
6556 + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6557 + atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
6558 + release_delegation(dp);
6560 + spin_unlock(&recall_lock);
6562 release_all_files();
6563 cancel_delayed_work(&laundromat_work);
6564 flush_scheduled_work();
6566 + nfs4_reclaim_init = 0;
6567 dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
6568 list_add_perfile, list_del_perfile);
6569 dprintk("NFSD: add_perclient %d del_perclient %d\n",
6570 add_perclient, del_perclient);
6571 dprintk("NFSD: alloc_file %d free_file %d\n",
6572 alloc_file, free_file);
6573 - dprintk("NFSD: alloc_sowner %d alloc_lsowner %d free_sowner %d\n",
6574 - alloc_sowner, alloc_lsowner, free_sowner);
6575 dprintk("NFSD: vfsopen %d vfsclose %d\n",
6577 + dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
6578 + alloc_delegation, free_delegation);
6579 + if (nfsd4_free_slabs())
6584 @@ -2801,11 +3366,10 @@
6585 /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
6586 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6587 list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
6588 - if (!nfs4_client_to_reclaim(clp)) {
6589 + if (!nfs4_client_to_reclaim(clp->cl_name.data, clp->cl_name.len)) {
6590 nfs4_release_reclaim();
6593 - reclaim_str_hashtbl_size++;
6597 Index: linux-2.6.10/fs/nfsd/nfsproc.c
6598 ===================================================================
6599 --- linux-2.6.10.orig/fs/nfsd/nfsproc.c 2004-12-25 05:34:30.000000000 +0800
6600 +++ linux-2.6.10/fs/nfsd/nfsproc.c 2005-04-05 14:49:13.426688152 +0800
6602 { nfserr_dquot, -EDQUOT },
6604 { nfserr_stale, -ESTALE },
6605 - { nfserr_jukebox, -EWOULDBLOCK },
6606 { nfserr_jukebox, -ETIMEDOUT },
6607 { nfserr_dropit, -EAGAIN },
6608 { nfserr_dropit, -ENOMEM },
6609 Index: linux-2.6.10/fs/nfsd/nfs4acl.c
6610 ===================================================================
6611 --- linux-2.6.10.orig/fs/nfsd/nfs4acl.c 2004-12-25 05:34:29.000000000 +0800
6612 +++ linux-2.6.10/fs/nfsd/nfs4acl.c 2005-04-05 14:49:13.429687696 +0800
6617 +/* modify functions to take NFS errors */
6620 mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
6622 Index: linux-2.6.10/fs/nfsd/nfs4idmap.c
6623 ===================================================================
6624 --- linux-2.6.10.orig/fs/nfsd/nfs4idmap.c 2004-12-25 05:35:23.000000000 +0800
6625 +++ linux-2.6.10/fs/nfsd/nfs4idmap.c 2005-04-05 14:49:13.414689976 +0800
6628 #define DefineSimpleCacheLookupMap(STRUCT, FUNC) \
6629 DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \
6630 - (struct STRUCT *item, int set), /*no setup */, \
6631 + (struct STRUCT *item, int set), \
6632 & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \
6633 - STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
6634 + STRUCT##_init(new, item), STRUCT##_update(tmp, item))
6636 /* Common entry handling */
6638 Index: linux-2.6.10/fs/nfsd/vfs.c
6639 ===================================================================
6640 --- linux-2.6.10.orig/fs/nfsd/vfs.c 2005-03-31 15:35:26.000000000 +0800
6641 +++ linux-2.6.10/fs/nfsd/vfs.c 2005-04-05 14:49:13.417689520 +0800
6643 * we need to break all leases.
6645 err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
6646 + if (err == -EWOULDBLOCK)
6648 if (err) /* ENOMEM or EWOULDBLOCK */
6652 * This may block while leases are broken.
6654 err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
6655 + if (err == -EWOULDBLOCK)
6657 if (err) /* NOMEM or WOULDBLOCK */
6660 @@ -822,21 +826,34 @@
6661 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
6662 struct kvec *vec, int vlen, unsigned long *count)
6664 - struct raparms *ra;
6665 - mm_segment_t oldfs;
6668 - struct inode *inode;
6670 err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
6673 + err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
6681 +nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
6682 + loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
6684 + struct inode *inode;
6685 + struct raparms *ra;
6686 + mm_segment_t oldfs;
6690 inode = file->f_dentry->d_inode;
6692 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
6693 (!lock_may_read(inode, offset, *count)))
6698 /* Get readahead parameters */
6700 dnotify_parent(file->f_dentry, DN_ACCESS);
6702 err = nfserrno(err);
6708 @@ -888,25 +903,40 @@
6709 struct kvec *vec, int vlen,
6710 unsigned long cnt, int *stablep)
6712 - struct svc_export *exp;
6714 - struct dentry *dentry;
6715 - struct inode *inode;
6716 - mm_segment_t oldfs;
6718 - int stable = *stablep;
6720 err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
6726 + err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep);
6734 +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
6735 + loff_t offset, struct kvec *vec, int vlen,
6736 + unsigned long cnt, int *stablep)
6738 + struct svc_export *exp;
6739 + struct dentry *dentry;
6740 + struct inode *inode;
6741 + mm_segment_t oldfs;
6743 + int stable = *stablep;
6748 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
6749 (!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
6754 dentry = file->f_dentry;
6755 @@ -993,13 +1023,10 @@
6758 err = nfserrno(err);
6766 #ifdef CONFIG_NFSD_V3
6768 * Commit all pending writes to stable storage.
6769 Index: linux-2.6.10/fs/nfsd/nfs4callback.c
6770 ===================================================================
6771 --- linux-2.6.10.orig/fs/nfsd/nfs4callback.c 2005-04-05 19:01:49.158500672 +0800
6772 +++ linux-2.6.10/fs/nfsd/nfs4callback.c 2005-04-05 14:49:13.428687848 +0800
6775 + * linux/fs/nfsd/nfs4callback.c
6777 + * Copyright (c) 2001 The Regents of the University of Michigan.
6778 + * All rights reserved.
6780 + * Kendrick Smith <kmsmith@umich.edu>
6781 + * Andy Adamson <andros@umich.edu>
6783 + * Redistribution and use in source and binary forms, with or without
6784 + * modification, are permitted provided that the following conditions
6787 + * 1. Redistributions of source code must retain the above copyright
6788 + * notice, this list of conditions and the following disclaimer.
6789 + * 2. Redistributions in binary form must reproduce the above copyright
6790 + * notice, this list of conditions and the following disclaimer in the
6791 + * documentation and/or other materials provided with the distribution.
6792 + * 3. Neither the name of the University nor the names of its
6793 + * contributors may be used to endorse or promote products derived
6794 + * from this software without specific prior written permission.
6796 + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
6797 + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
6798 + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
6799 + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
6800 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
6801 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
6802 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
6803 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
6804 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
6805 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
6806 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6809 +#include <linux/config.h>
6810 +#include <linux/module.h>
6811 +#include <linux/list.h>
6812 +#include <linux/inet.h>
6813 +#include <linux/errno.h>
6814 +#include <linux/sunrpc/xdr.h>
6815 +#include <linux/sunrpc/svc.h>
6816 +#include <linux/sunrpc/clnt.h>
6817 +#include <linux/nfsd/nfsd.h>
6818 +#include <linux/nfsd/state.h>
6819 +#include <linux/sunrpc/sched.h>
6820 +#include <linux/nfs4.h>
6822 +#define NFSDDBG_FACILITY NFSDDBG_PROC
6824 +#define NFSPROC4_CB_NULL 0
6825 +#define NFSPROC4_CB_COMPOUND 1
6828 +static void nfs4_cb_null(struct rpc_task *task);
6829 +extern spinlock_t recall_lock;
6831 +/* Index of predefined Linux callback client operations */
6834 + NFSPROC4_CLNT_CB_NULL = 0,
6835 + NFSPROC4_CLNT_CB_RECALL,
6838 +enum nfs_cb_opnum4 {
6842 +#define NFS4_MAXTAGLEN 20
6844 +#define NFS4_enc_cb_null_sz 0
6845 +#define NFS4_dec_cb_null_sz 0
6846 +#define cb_compound_enc_hdr_sz 4
6847 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
6848 +#define op_enc_sz 1
6849 +#define op_dec_sz 2
6850 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2))
6851 +#define enc_stateid_sz 16
6852 +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \
6853 + 1 + enc_stateid_sz + \
6856 +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \
6860 +* Generic encode routines from fs/nfs/nfs4xdr.c
6862 +static inline u32 *
6863 +xdr_writemem(u32 *p, const void *ptr, int nbytes)
6865 + int tmp = XDR_QUADLEN(nbytes);
6869 + memcpy(p, ptr, nbytes);
6873 +#define WRITE32(n) *p++ = htonl(n)
6874 +#define WRITEMEM(ptr,nbytes) do { \
6875 + p = xdr_writemem(p, ptr, nbytes); \
6877 +#define RESERVE_SPACE(nbytes) do { \
6878 + p = xdr_reserve_space(xdr, nbytes); \
6879 + if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
6884 + * Generic decode routines from fs/nfs/nfs4xdr.c
6886 +#define DECODE_TAIL \
6891 + dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
6895 +#define READ32(x) (x) = ntohl(*p++)
6896 +#define READ64(x) do { \
6897 + (x) = (u64)ntohl(*p++) << 32; \
6898 + (x) |= ntohl(*p++); \
6900 +#define READTIME(x) do { \
6902 + (x.tv_sec) = ntohl(*p++); \
6903 + (x.tv_nsec) = ntohl(*p++); \
6905 +#define READ_BUF(nbytes) do { \
6906 + p = xdr_inline_decode(xdr, nbytes); \
6908 + dprintk("NFSD: %s: reply buffer overflowed in line %d.", \
6909 + __FUNCTION__, __LINE__); \
6914 +struct nfs4_cb_compound_hdr {
6925 +} nfs_cb_errtbl[] = {
6927 + { NFS4ERR_PERM, EPERM },
6928 + { NFS4ERR_NOENT, ENOENT },
6929 + { NFS4ERR_IO, EIO },
6930 + { NFS4ERR_NXIO, ENXIO },
6931 + { NFS4ERR_ACCESS, EACCES },
6932 + { NFS4ERR_EXIST, EEXIST },
6933 + { NFS4ERR_XDEV, EXDEV },
6934 + { NFS4ERR_NOTDIR, ENOTDIR },
6935 + { NFS4ERR_ISDIR, EISDIR },
6936 + { NFS4ERR_INVAL, EINVAL },
6937 + { NFS4ERR_FBIG, EFBIG },
6938 + { NFS4ERR_NOSPC, ENOSPC },
6939 + { NFS4ERR_ROFS, EROFS },
6940 + { NFS4ERR_MLINK, EMLINK },
6941 + { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
6942 + { NFS4ERR_NOTEMPTY, ENOTEMPTY },
6943 + { NFS4ERR_DQUOT, EDQUOT },
6944 + { NFS4ERR_STALE, ESTALE },
6945 + { NFS4ERR_BADHANDLE, EBADHANDLE },
6946 + { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
6947 + { NFS4ERR_NOTSUPP, ENOTSUPP },
6948 + { NFS4ERR_TOOSMALL, ETOOSMALL },
6949 + { NFS4ERR_SERVERFAULT, ESERVERFAULT },
6950 + { NFS4ERR_BADTYPE, EBADTYPE },
6951 + { NFS4ERR_LOCKED, EAGAIN },
6952 + { NFS4ERR_RESOURCE, EREMOTEIO },
6953 + { NFS4ERR_SYMLINK, ELOOP },
6954 + { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
6955 + { NFS4ERR_DEADLOCK, EDEADLK },
6960 +nfs_cb_stat_to_errno(int stat)
6963 + for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
6964 + if (nfs_cb_errtbl[i].stat == stat)
6965 + return nfs_cb_errtbl[i].errno;
6967 + /* If we cannot translate the error, the recovery routines should
6969 + * Note: remaining NFSv4 error codes have values > 10000, so should
6970 + * not conflict with native Linux error codes.
6980 +encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
6984 + RESERVE_SPACE(16);
6985 + WRITE32(0); /* tag length is always 0 */
6986 + WRITE32(NFS4_MINOR_VERSION);
6987 + WRITE32(hdr->ident);
6988 + WRITE32(hdr->nops);
6993 +encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
6996 + int len = cb_rec->cbr_fhlen;
6998 + RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
6999 + WRITE32(OP_CB_RECALL);
7000 + WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t));
7001 + WRITE32(cb_rec->cbr_trunc);
7003 + WRITEMEM(cb_rec->cbr_fhval, len);
7008 +nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p)
7010 + struct xdr_stream xdrs, *xdr = &xdrs;
7012 + xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
7018 +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args)
7020 + struct xdr_stream xdr;
7021 + struct nfs4_cb_compound_hdr hdr = {
7025 + xdr_init_encode(&xdr, &req->rq_snd_buf, p);
7026 + encode_cb_compound_hdr(&xdr, &hdr);
7027 + return (encode_cb_recall(&xdr, args));
7032 +decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
7036 + READ32(hdr->status);
7037 + READ32(hdr->taglen);
7038 + READ_BUF(hdr->taglen + 4);
7039 + hdr->tag = (char *)p;
7040 + p += XDR_QUADLEN(hdr->taglen);
7041 + READ32(hdr->nops);
7046 +decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
7054 + if (op != expected) {
7055 + dprintk("NFSD: decode_cb_op_hdr: Callback server returned "
7056 + " operation %d but we issued a request for %d\n",
7061 + if (nfserr != NFS_OK)
7062 + return -nfs_cb_stat_to_errno(nfserr);
7067 +nfs4_xdr_dec_cb_null(struct rpc_rqst *req, u32 *p)
7073 +nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p)
7075 + struct xdr_stream xdr;
7076 + struct nfs4_cb_compound_hdr hdr;
7079 + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
7080 + status = decode_cb_compound_hdr(&xdr, &hdr);
7083 + status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
7089 + * RPC procedure tables
7092 +# define MAX(a, b) (((a) > (b))? (a) : (b))
7095 +#define PROC(proc, call, argtype, restype) \
7096 +[NFSPROC4_CLNT_##proc] = { \
7097 + .p_proc = NFSPROC4_CB_##call, \
7098 + .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
7099 + .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
7100 + .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
7103 +struct rpc_procinfo nfs4_cb_procedures[] = {
7104 + PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null),
7105 + PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall),
7108 +struct rpc_version nfs_cb_version4 = {
7110 + .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
7111 + .procs = nfs4_cb_procedures
7114 +static struct rpc_version * nfs_cb_version[] = {
7120 + * Use the SETCLIENTID credential
7123 +nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
7125 + struct auth_cred acred;
7126 + struct rpc_clnt *clnt = clp->cl_callback.cb_client;
7127 + struct rpc_cred *ret = NULL;
7131 + get_group_info(clp->cl_cred.cr_group_info);
7132 + acred.uid = clp->cl_cred.cr_uid;
7133 + acred.gid = clp->cl_cred.cr_gid;
7134 + acred.group_info = clp->cl_cred.cr_group_info;
7136 + dprintk("NFSD: looking up %s cred\n",
7137 + clnt->cl_auth->au_ops->au_name);
7138 + ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags);
7139 + put_group_info(clp->cl_cred.cr_group_info);
7145 + * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
7148 +nfsd4_probe_callback(struct nfs4_client *clp)
7150 + struct sockaddr_in addr;
7151 + struct nfs4_callback *cb = &clp->cl_callback;
7152 + struct rpc_timeout timeparms;
7153 + struct rpc_xprt * xprt;
7154 + struct rpc_program * program = &cb->cb_program;
7155 + struct rpc_stat * stat = &cb->cb_stat;
7156 + struct rpc_clnt * clnt;
7157 + struct rpc_message msg = {
7158 + .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
7161 + char hostname[32];
7164 + dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
7165 + cb->cb_parsed, atomic_read(&cb->cb_set));
7166 + if (!cb->cb_parsed || atomic_read(&cb->cb_set))
7169 + /* Initialize address */
7170 + memset(&addr, 0, sizeof(addr));
7171 + addr.sin_family = AF_INET;
7172 + addr.sin_port = htons(cb->cb_port);
7173 + addr.sin_addr.s_addr = htonl(cb->cb_addr);
7175 + /* Initialize timeout */
7176 + timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
7177 + timeparms.to_retries = 5;
7178 + timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
7179 + timeparms.to_exponential = 1;
7181 + /* Create RPC transport */
7182 + if (!(xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms))) {
7183 + dprintk("NFSD: couldn't create callback transport!\n");
7187 + /* Initialize rpc_program */
7188 + program->name = "nfs4_cb";
7189 + program->number = cb->cb_prog;
7190 + program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]);
7191 + program->version = nfs_cb_version;
7192 + program->stats = stat;
7194 + /* Initialize rpc_stat */
7195 + memset(stat, 0, sizeof(struct rpc_stat));
7196 + stat->program = program;
7198 + /* Create RPC client
7200 + * XXX AUTH_UNIX only - need AUTH_GSS....
7202 + sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
7203 + if (!(clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX))) {
7204 + dprintk("NFSD: couldn't create callback client\n");
7207 + clnt->cl_intr = 1;
7208 + clnt->cl_softrtry = 1;
7209 + clnt->cl_chatty = 1;
7211 + /* Kick rpciod, put the call on the wire. */
7213 + if (rpciod_up() != 0) {
7214 + dprintk("nfsd: couldn't start rpciod for callbacks!\n");
7218 + /* the task holds a reference to the nfs4_client struct */
7219 + cb->cb_client = clnt;
7220 + atomic_inc(&clp->cl_count);
7222 + msg.rpc_cred = nfsd4_lookupcred(clp,0);
7223 + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL);
7225 + if (status != 0) {
7226 + dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
7232 + atomic_dec(&clp->cl_count);
7235 + rpc_shutdown_client(clnt);
7238 + xprt_destroy(xprt);
7240 + dprintk("NFSD: warning: no callback path to client %.*s\n",
7241 + clp->cl_name.len, clp->cl_name.data);
7242 + cb->cb_client = NULL;
7246 +nfs4_cb_null(struct rpc_task *task)
7248 + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
7249 + struct nfs4_callback *cb = &clp->cl_callback;
7250 + u32 addr = htonl(cb->cb_addr);
7252 + dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status);
7254 + if (task->tk_status < 0) {
7255 + dprintk("NFSD: callback establishment to client %.*s failed\n",
7256 + clp->cl_name.len, clp->cl_name.data);
7259 + atomic_set(&cb->cb_set, 1);
7260 + dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr));
7262 + put_nfs4_client(clp);
7266 + * Called with dp->dl_count incremented
7269 +nfs4_cb_recall_done(struct rpc_task *task)
7271 + struct nfs4_cb_recall *cbr = (struct nfs4_cb_recall *)task->tk_calldata;
7272 + struct nfs4_delegation *dp = cbr->cbr_dp;
7275 + spin_lock(&recall_lock);
7277 + /* all is well... */
7278 + if (task->tk_status == 0)
7281 + /* network partition, retry nfsd4_cb_recall once. */
7282 + if (task->tk_status == -EIO) {
7283 + if (atomic_read(&dp->dl_recall_cnt) == 0)
7286 + /* callback channel no longer available */
7287 + atomic_set(&dp->dl_client->cl_callback.cb_set, 0);
7290 + /* Race: a recall occurred miliseconds after a delegation was granted.
7291 + * Client may have received recall prior to delegation. retry recall
7293 + * XXX what about nfserr_bad_stateid?
7295 + if (task->tk_status == -EBADHANDLE) {
7296 + if (atomic_read(&dp->dl_recall_cnt) == 0)
7300 + /* nfs4_laundromat will reap delegation */
7301 + atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE);
7304 + atomic_dec(&dp->dl_count);
7305 + BUG_ON(atomic_read(&dp->dl_count) < 0);
7306 + spin_unlock(&recall_lock);
7310 + atomic_inc(&dp->dl_recall_cnt);
7311 + spin_unlock(&recall_lock);
7312 + /* sleep 2 seconds before retrying recall */
7313 + set_current_state(TASK_UNINTERRUPTIBLE);
7314 + schedule_timeout(2*HZ);
7315 + status = nfsd4_cb_recall(dp);
7316 + dprintk("NFSD: nfs4_cb_recall_done: retry status: %d dp %p dl_flock %p\n",status,dp, dp->dl_flock);
7320 + * called with dp->dl_count inc'ed.
7321 + * nfs4_lock_state() may or may not have been called.
7324 +nfsd4_cb_recall(struct nfs4_delegation *dp)
7326 + struct nfs4_client *clp;
7327 + struct rpc_clnt *clnt;
7328 + struct rpc_message msg = {
7329 + .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
7331 + struct nfs4_cb_recall *cbr = &dp->dl_recall;
7334 + dprintk("NFSD: nfsd4_cb_recall NFS4_enc_cb_recall_sz %d NFS4_dec_cb_recall_sz %d \n",NFS4_enc_cb_recall_sz,NFS4_dec_cb_recall_sz);
7336 + clp = dp->dl_client;
7337 + clnt = clp->cl_callback.cb_client;
7339 + if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
7342 + msg.rpc_argp = cbr;
7343 + msg.rpc_resp = cbr;
7344 + msg.rpc_cred = nfsd4_lookupcred(clp,0);
7346 + cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
7349 + if ((status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
7350 + nfs4_cb_recall_done, cbr ))) {
7351 + dprintk("NFSD: recall_delegation: rpc_call_async failed %d\n",
7358 + status = nfserrno(status);
7363 Index: linux-2.6.10/fs/nfsd/nfs4proc.c
7364 ===================================================================
7365 --- linux-2.6.10.orig/fs/nfsd/nfs4proc.c 2004-12-25 05:35:40.000000000 +0800
7366 +++ linux-2.6.10/fs/nfsd/nfs4proc.c 2005-04-05 14:49:13.432687240 +0800
7367 @@ -461,28 +461,12 @@
7371 -access_bits_permit_read(unsigned long access_bmap)
7373 - return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
7374 - test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
7378 -access_bits_permit_write(unsigned long access_bmap)
7380 - return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
7381 - test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
7385 nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
7387 - struct nfs4_stateid *stp;
7389 + struct file *filp;
7391 /* no need to check permission - this will be done in nfsd_read() */
7392 - if (nfs4_in_grace())
7393 - return nfserr_grace;
7395 if (read->rd_offset >= OFFSET_MAX)
7396 return nfserr_inval;
7397 @@ -508,21 +492,17 @@
7401 - if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
7402 - CHECK_FH | RDWR_STATE, &stp))) {
7403 + if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
7404 + CHECK_FH | RD_STATE, &filp))) {
7405 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
7408 - status = nfserr_openmode;
7409 - if (!access_bits_permit_read(stp->st_access_bmap)) {
7410 - dprintk("NFSD: nfsd4_read: file not opened for read!\n");
7415 nfs4_unlock_state();
7416 read->rd_rqstp = rqstp;
7417 read->rd_fhp = current_fh;
7418 + read->rd_filp = filp;
7426 + if (nfs4_in_grace())
7427 + return nfserr_grace;
7428 status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
7429 if (status == nfserr_symlink)
7430 return nfserr_notdir;
7433 if (!save_fh->fh_dentry)
7435 + if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
7436 + & NFSEXP_NOSUBTREECHECK))
7437 + return nfserr_grace;
7438 status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
7439 rename->rn_snamelen, current_fh,
7440 rename->rn_tname, rename->rn_tnamelen);
7441 @@ -605,12 +590,8 @@
7443 nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
7445 - struct nfs4_stateid *stp;
7446 int status = nfs_ok;
7448 - if (nfs4_in_grace())
7449 - return nfserr_grace;
7451 if (!current_fh->fh_dentry)
7452 return nfserr_nofilehandle;
7454 @@ -626,15 +607,10 @@
7456 if ((status = nfs4_preprocess_stateid_op(current_fh,
7457 &setattr->sa_stateid,
7458 - CHECK_FH | RDWR_STATE, &stp))) {
7459 + CHECK_FH | WR_STATE, NULL))) {
7460 dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
7463 - status = nfserr_openmode;
7464 - if (!access_bits_permit_write(stp->st_access_bmap)) {
7465 - dprintk("NFSD: nfsd4_setattr: not opened for write!\n");
7468 nfs4_unlock_state();
7471 @@ -654,14 +630,11 @@
7473 nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write)
7475 - struct nfs4_stateid *stp;
7476 stateid_t *stateid = &write->wr_stateid;
7477 + struct file *filp;
7479 int status = nfs_ok;
7481 - if (nfs4_in_grace())
7482 - return nfserr_grace;
7484 /* no need to check permission - this will be done in nfsd_write() */
7486 if (write->wr_offset >= OFFSET_MAX)
7487 @@ -677,18 +650,13 @@
7490 if ((status = nfs4_preprocess_stateid_op(current_fh, stateid,
7491 - CHECK_FH | RDWR_STATE, &stp))) {
7492 + CHECK_FH | WR_STATE, &filp))) {
7493 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
7497 - status = nfserr_openmode;
7498 - if (!access_bits_permit_write(stp->st_access_bmap)) {
7499 - dprintk("NFSD: nfsd4_write: file not open for write!\n");
7505 nfs4_unlock_state();
7506 write->wr_bytes_written = write->wr_buflen;
7507 write->wr_how_written = write->wr_stable_how;
7508 @@ -696,9 +664,16 @@
7509 *p++ = nfssvc_boot.tv_sec;
7510 *p++ = nfssvc_boot.tv_usec;
7512 - status = nfsd_write(rqstp, current_fh, write->wr_offset,
7513 - write->wr_vec, write->wr_vlen, write->wr_buflen,
7514 - &write->wr_how_written);
7516 + status = nfsd_vfs_write(rqstp, current_fh, filp,
7517 + write->wr_offset, write->wr_vec,
7518 + write->wr_vlen, write->wr_buflen,
7519 + &write->wr_how_written);
7521 + status = nfsd_write(rqstp, current_fh, write->wr_offset,
7522 + write->wr_vec, write->wr_vlen, write->wr_buflen,
7523 + &write->wr_how_written);
7525 if (status == nfserr_symlink)
7526 status = nfserr_inval;
7530 op->status = nfsd4_create(rqstp, current_fh, &op->u.create);
7532 + case OP_DELEGRETURN:
7533 + op->status = nfsd4_delegreturn(rqstp, current_fh, &op->u.delegreturn);
7536 op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr);
7538 Index: linux-2.6.10/fs/nfsd/export.c
7539 ===================================================================
7540 --- linux-2.6.10.orig/fs/nfsd/export.c 2004-12-25 05:34:58.000000000 +0800
7541 +++ linux-2.6.10/fs/nfsd/export.c 2005-04-05 14:49:13.415689824 +0800
7543 new->ek_export = item->ek_export;
7546 -static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
7547 +static DefineSimpleCacheLookup(svc_expkey)
7549 #define EXPORT_HASHBITS 8
7550 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS)
7551 @@ -492,8 +492,72 @@
7552 new->ex_fsid = item->ex_fsid;
7555 -static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
7556 +struct svc_export *
7557 +svc_export_lookup(struct svc_export *item, int set)
7559 + struct svc_export *tmp, *new = NULL;
7560 + struct cache_head **hp, **head;
7562 + head = &svc_export_cache.hash_table[svc_export_hash(item)];
7565 + write_lock(&svc_export_cache.hash_lock);
7567 + read_lock(&svc_export_cache.hash_lock);
7568 + for(hp=head; *hp != NULL; hp = &tmp->h.next) {
7569 + tmp = container_of(*hp, struct svc_export, h);
7570 + if (svc_export_match(item, tmp)) { /* found a match */
7571 + cache_get(&tmp->h);
7573 + if (test_bit(CACHE_NEGATIVE, &item->h.flags))
7574 + set_bit(CACHE_NEGATIVE, &tmp->h.flags);
7576 + clear_bit(CACHE_NEGATIVE, &tmp->h.flags);
7577 + svc_export_update(tmp, item);
7581 + write_unlock(&svc_export_cache.hash_lock);
7583 + read_unlock(&svc_export_cache.hash_lock);
7585 + cache_fresh(&svc_export_cache, &tmp->h,
7586 + item->h.expiry_time);
7588 + svc_export_put(&new->h, &svc_export_cache);
7592 + /* Didn't find anything */
7594 + svc_export_init(new, item);
7595 + new->h.next = *head;
7597 + set_bit(CACHE_HASHED, &new->h.flags);
7598 + svc_export_cache.entries++;
7601 + if (test_bit(CACHE_NEGATIVE, &item->h.flags))
7602 + set_bit(CACHE_NEGATIVE, &tmp->h.flags);
7604 + svc_export_update(tmp, item);
7608 + write_unlock(&svc_export_cache.hash_lock);
7610 + read_unlock(&svc_export_cache.hash_lock);
7612 + cache_fresh(&svc_export_cache, &new->h, item->h.expiry_time);
7615 + new = kmalloc(sizeof(*new), GFP_KERNEL);
7617 + cache_init(&new->h);
7624 exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
7625 Index: linux-2.6.10/fs/nfsd/nfssvc.c
7626 ===================================================================
7627 --- linux-2.6.10.orig/fs/nfsd/nfssvc.c 2004-12-25 05:34:58.000000000 +0800
7628 +++ linux-2.6.10/fs/nfsd/nfssvc.c 2005-04-05 14:49:13.422688760 +0800
7630 .pg_name = "nfsd", /* program name */
7631 .pg_class = "nfsd", /* authentication class */
7632 .pg_stats = &nfsd_svcstats, /* version table */
7633 + .pg_authenticate = &svc_set_client, /* export authentication */
7636 Index: linux-2.6.10/fs/nfsd/nfs4recover.c
7637 ===================================================================
7638 --- linux-2.6.10.orig/fs/nfsd/nfs4recover.c 2005-04-05 19:01:49.158500672 +0800
7639 +++ linux-2.6.10/fs/nfsd/nfs4recover.c 2005-04-05 14:49:13.430687544 +0800
7642 +* linux/fs/nfsd/nfs4recover.c
7644 +* Copyright (c) 2004 The Regents of the University of Michigan.
7645 +* All rights reserved.
7647 +* Andy Adamson <andros@umich.edu>
7649 +* Redistribution and use in source and binary forms, with or without
7650 +* modification, are permitted provided that the following conditions
7653 +* 1. Redistributions of source code must retain the above copyright
7654 +* notice, this list of conditions and the following disclaimer.
7655 +* 2. Redistributions in binary form must reproduce the above copyright
7656 +* notice, this list of conditions and the following disclaimer in the
7657 +* documentation and/or other materials provided with the distribution.
7658 +* 3. Neither the name of the University nor the names of its
7659 +* contributors may be used to endorse or promote products derived
7660 +* from this software without specific prior written permission.
7662 +* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
7663 +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
7664 +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
7665 +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
7666 +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
7667 +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
7668 +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
7669 +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
7670 +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
7671 +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
7672 +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7677 +#include <linux/param.h>
7678 +#include <linux/sunrpc/svc.h>
7679 +#include <linux/nfsd/nfsd.h>
7680 +#include <linux/nfs4.h>
7681 +#include <linux/nfsd/state.h>
7682 +#include <linux/nfsd/xdr4.h>
7683 +#include <linux/file.h>
7684 +#include <linux/namei.h>
7685 +#include <asm/uaccess.h>
7687 +#define NFSDDBG_FACILITY NFSDDBG_PROC
7689 +/* MAX_FILE_LEN/2 = max client id name length due to changing name
7692 +#define MAX_FILE_LEN 256
7695 +char recovery_dirname[] = "/var/lib/nfs/v4recovery";
7696 +static uid_t saveuid;
7697 +static gid_t savegid;
7698 +static struct nameidata nd_rec_init;
7699 +static int rec_dir_init = 0;
7702 +nfs4_save_set_user(void)
7704 + saveuid = current->fsuid;
7705 + savegid = current->fsgid;
7706 + current->fsuid = 0;
7707 + current->fsgid = 0;
7711 +nfs4_reset_user(void)
7713 + current->fsuid = saveuid;
7714 + current->fsgid = savegid;
7718 +nfs4_make_rec_filename(char **filename, struct nfs4_client *clp)
7720 + char *fname = *filename;
7721 + int flen = MAX_FILE_LEN;
7723 + memset(fname, 0, flen);
7724 + qword_addhex(&fname, &flen, clp->cl_name.data, clp->cl_name.len);
7727 +/* XXX need to check dput() mntput ?? */
7729 +nfsd4_create_clid_file(struct nfs4_client *clp)
7731 + struct file *filp = NULL;
7732 + struct dentry *dentry;
7733 + mm_segment_t oldfs;
7734 + loff_t offset = 0;
7735 + char fbuf[MAX_FILE_LEN], *fname = fbuf;
7739 + if (!rec_dir_init)
7741 + nfs4_save_set_user();
7743 + dprintk("NFSD: nfsd4_create_clid_file IN recdir [d:mnt] count %d:%d\n",
7744 + atomic_read(&nd_rec_init.dentry->d_count),
7745 + atomic_read(&nd_rec_init.mnt->mnt_count));
7747 + /* lock the parent */
7748 + down(&nd_rec_init.dentry->d_inode->i_sem);
7750 + nfs4_make_rec_filename(&fname, clp);
7751 + /* dentry->d_count will be 1 */
7752 + dentry = lookup_one_len(fname, nd_rec_init.dentry, strlen(fname));
7753 + status = PTR_ERR(dentry);
7754 + if (IS_ERR(dentry))
7758 + if (dentry->d_inode){
7759 + dprintk("NFSD: nfsd4_create_clid_file: FILE EXISTS\n");
7763 + /* nd_rec_init.dentry->d_count is bumped */
7764 + status = vfs_create(nd_rec_init.dentry->d_inode, dentry, S_IRWXU, NULL);
7768 + up(&nd_rec_init.dentry->d_inode->i_sem);
7770 + filp = dentry_open(dget(dentry), mntget(nd_rec_init.mnt), O_RDWR);
7771 + status = PTR_ERR(filp);
7775 + oldfs = get_fs(); set_fs(KERNEL_DS);
7776 + status = vfs_write(filp, clp->cl_name.data, clp->cl_name.len, &offset);
7779 + dprintk("NFSD: nfsd4_create_clid_file vfs_write returns %d\n",status);
7783 + if (filp->f_op && filp->f_op->flush) {
7784 + int err = filp->f_op->flush(filp);
7785 + dprintk("NFSD: nfsd4_create_clid_file called flush\n");
7789 + /* dget and mntget in dentry_open call */
7792 + /* dentry->d_count will be 0 */
7795 + /* dget in vfs_create call */
7796 + dput(nd_rec_init.dentry);
7799 + nfs4_reset_user();
7801 + dprintk("NFSD: nfsd4_create_clid_file OUT recdir [d:mnt] count %d:%d\n",
7802 + atomic_read(&nd_rec_init.dentry->d_count),
7803 + atomic_read(&nd_rec_init.mnt->mnt_count));
7804 + dprintk("NFSD: nfsd4_create_clid_file returns %d\n",status);
7809 + up(&nd_rec_init.dentry->d_inode->i_sem);
7814 + * called with pdentry->d_inode->i_sem held ?
7817 +nfsd4_unlink_rec_file(char *name, int namlen)
7819 + struct dentry *dentry;
7822 + dprintk("NFSD: nfsd4_unlink_rec_file. name %.*s\n", namlen, name);
7824 + dentry = lookup_one_len(name, nd_rec_init.dentry, namlen);
7825 + dprintk("NFSD: nfsd4_unlink_rec_file POST LOOKUP nd_rec d_count %d\n",
7826 + atomic_read(&nd_rec_init.dentry->d_count));
7827 + status = PTR_ERR(dentry);
7828 + if (IS_ERR(dentry))
7832 + if (!dentry->d_inode) {
7837 + /* should only be files here! */
7838 + type = dentry->d_inode->i_mode & S_IFMT;
7840 + if (!(type & S_IFREG)) {
7845 + dprintk("NFSD: nfsd4_unlink_rec_file PRE VFS UNLINK [%d:%d]\n",
7846 + atomic_read(&nd_rec_init.dentry->d_count),
7847 + atomic_read(&nd_rec_init.mnt->mnt_count));
7849 + status = vfs_unlink(nd_rec_init.dentry->d_inode, dentry);
7851 + dprintk("NFSD: nfsd4_unlink_rec_file POST VFS UNLINK [%d:%d]\n",
7852 + atomic_read(&nd_rec_init.dentry->d_count),
7853 + atomic_read(&nd_rec_init.mnt->mnt_count));
7855 + dprintk("NFSD: nfsd4_unlink_rec_file FILE dentry->d_count %d\n",
7856 + atomic_read(&dentry->d_count));
7858 + dprintk("NFSD: nfsd4_unlink_rec_file returns %d\n",status);
7863 +nfsd4_remove_clid_file(struct nfs4_client *clp)
7865 + char fbuf[MAX_FILE_LEN], *fname = fbuf;
7868 + if (!rec_dir_init)
7871 + dprintk("NFSD: nfsd4_remove_clid_file client %.*s\n",
7872 + clp->cl_name.len,clp->cl_name.data);
7874 + nfs4_save_set_user();
7876 + dprintk("NFSD: nfsd4_remove_clid_file IN recdir [d:mnt] count %d:%d\n",
7877 + atomic_read(&nd_rec_init.dentry->d_count),
7878 + atomic_read(&nd_rec_init.mnt->mnt_count));
7880 + nfs4_make_rec_filename(&fname, clp);
7881 + status = nfsd4_unlink_rec_file(fname, strlen(fname));
7882 + nfs4_reset_user();
7883 + if (status != nfs_ok)
7884 + printk("NFSD: Failed to remove expired client state file %.*s from %s\n", strlen(fname), fname, recovery_dirname);
7886 + dprintk("NFSD: nfsd4_remove_clid_file OUT recdir [d:mnt] count %d:%d\n",
7887 + atomic_read(&nd_rec_init.dentry->d_count),
7888 + atomic_read(&nd_rec_init.mnt->mnt_count));
7892 +struct rec_dirent {
7897 + * on reboot, stuff the reclaim hash with known client id's.
7899 + * the filename may not equal the clid. the clid might be the first
7900 + * (and so far only) line of data in the file.
7902 + * i will probably end up writing data such as the setclientid principal
7903 + * to each clid file. if i do, i will always put the clid as the
7904 + * first line of data.
7908 +nfsd4_get_recdir_dirent(struct rec_dirent *rdirent, const char *name,
7909 + int namlen, loff_t offset, ino_t ino, unsigned int d_type)
7911 + struct dentry *dclid;
7912 + struct file *filp;
7913 + mm_segment_t oldfs;
7914 + int status = nfs_ok;
7916 + dprintk("NFSD: nfsd4_get_recdir_dirent IN recdir [d:mnt] count %d:%d\n",
7917 + atomic_read(&nd_rec_init.dentry->d_count),
7918 + atomic_read(&nd_rec_init.mnt->mnt_count));
7920 + dprintk("NFSD: nfsd4_get_recdir_dirent name %.*s, clear %d\n",
7921 + namlen, name, rdirent->clear);
7923 + if (name && isdotent(name, namlen))
7926 + dclid = lookup_one_len(name, nd_rec_init.dentry, namlen);
7927 + status = PTR_ERR(dclid);
7931 + if (rdirent->clear){
7932 + dprintk("NFSD: nfsd4_get_recdir_dirent REMOVE\n");
7934 + dprintk("NFSD: nfsd4_get_recdir_dirent PRE VFS_UNLINK [%d:%d]\n",
7935 + atomic_read(&nd_rec_init.dentry->d_count),
7936 + atomic_read(&nd_rec_init.mnt->mnt_count));
7938 + status = vfs_unlink(nd_rec_init.dentry->d_inode, dclid);
7940 + dprintk("NFSD: nfsd4_get_recdir_dirent POST VFS_UNLINK [%d:%d]\n",
7941 + atomic_read(&nd_rec_init.dentry->d_count),
7942 + atomic_read(&nd_rec_init.mnt->mnt_count));
7945 + char buf[MAX_FILE_LEN];
7947 + dprintk("NFSD: nfsd4_get_recdir_dirent READ\n");
7949 + filp = dentry_open(dclid, mntget(nd_rec_init.mnt), O_RDWR);
7950 + if (IS_ERR(filp)) {
7951 + status = PTR_ERR(filp);
7955 + memset(buf, 0, MAX_FILE_LEN);
7956 + oldfs = get_fs(); set_fs(KERNEL_DS);
7957 + status = vfs_read(filp, buf, MAX_FILE_LEN, &filp->f_pos);
7960 + dprintk("NFSD: nfsd4_get_recdir_dirent vfs_read returns %d\n",
7963 + status = nfs4_client_to_reclaim(buf, status);
7967 + dprintk("NFSD:nfsd4_get_recdir_dirent OUT recdir [d:mnt] count %d:%d\n",
7968 + atomic_read(&nd_rec_init.dentry->d_count),
7969 + atomic_read(&nd_rec_init.mnt->mnt_count));
7971 + dprintk("NFSD: nfsd4_get_recdir_dirent returns %d\n",status);
7976 +nfsd4_list_rec_dir(int clear)
7978 + struct file *filp;
7979 + struct rec_dirent rdirent;
7982 + if (!rec_dir_init)
7985 + nfs4_save_set_user();
7987 + dprintk("NFSD: nfsd4_list_rec_dir IN recdir [d:mnt] count %d:%d\n",
7988 + atomic_read(&nd_rec_init.dentry->d_count),
7989 + atomic_read(&nd_rec_init.mnt->mnt_count));
7991 + /* open directory */
7992 + filp = dentry_open(dget(nd_rec_init.dentry), mntget(nd_rec_init.mnt),
7994 + status = PTR_ERR(filp);
7997 + rdirent.clear = clear;
7999 + /* read the directory entries into memory */
8000 + status = vfs_readdir(filp, (filldir_t) nfsd4_get_recdir_dirent,
8005 + dprintk("NFSD: nfsd4_list_rec_dir OUT recdir [d:mnt] count %d:%d\n",
8006 + atomic_read(&nd_rec_init.dentry->d_count),
8007 + atomic_read(&nd_rec_init.mnt->mnt_count));
8009 + dprintk("NFSD: nfsd4_list_rec_dir DONE status: %d\n", status);
8011 + nfs4_reset_user();
8017 + * Hold reference to the recovery directory.
8021 +nfsd4_init_rec_dir(char *rec_dirname)
8025 + printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
8028 + nfs4_save_set_user();
8030 + status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &nd_rec_init);
8032 + printk("NFSD: nfsd4_init_rec_dir INITIAL recdir [d:mnt] count %d:%d\n",
8033 + atomic_read(&nd_rec_init.dentry->d_count),
8034 + atomic_read(&nd_rec_init.mnt->mnt_count));
8038 + nfs4_reset_user();
8039 + printk("NFSD: nfsd4_init_rec_dir rec_dir_init %d\n", rec_dir_init);
8043 +nfsd4_shutdown_rec_dir(void)
8046 + path_release(&nd_rec_init);
8048 + printk("NFSD: nfsd4_shutdown_rec_dir FINAL recdir [d:mnt] count %d:%d\n",
8049 + atomic_read(&nd_rec_init.dentry->d_count),
8050 + atomic_read(&nd_rec_init.mnt->mnt_count));
8052 Index: linux-2.6.10/fs/nfsd/Makefile
8053 ===================================================================
8054 --- linux-2.6.10.orig/fs/nfsd/Makefile 2004-12-25 05:35:50.000000000 +0800
8055 +++ linux-2.6.10/fs/nfsd/Makefile 2005-04-05 14:49:13.431687392 +0800
8057 export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
8058 nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
8059 nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
8061 + nfs4acl.o nfs4callback.o nfs4recover.o
8062 nfsd-objs := $(nfsd-y)
8063 Index: linux-2.6.10/fs/nfs/nfs4xdr.c
8064 ===================================================================
8065 --- linux-2.6.10.orig/fs/nfs/nfs4xdr.c 2004-12-25 05:35:40.000000000 +0800
8066 +++ linux-2.6.10/fs/nfs/nfs4xdr.c 2005-04-05 14:49:13.452684200 +0800
8068 #define encode_getfh_maxsz (op_encode_hdr_maxsz)
8069 #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \
8070 ((3+NFS4_FHSIZE) >> 2))
8071 -#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3)
8072 +#define nfs4_fattr_bitmap_maxsz 3
8073 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
8074 #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
8075 #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
8076 -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
8077 -#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \
8078 - nfs4_fattr_bitmap_maxsz)
8079 +/* This is based on getfattr, which uses the most attributes: */
8080 +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
8081 + 3 + 3 + 3 + 2 * nfs4_name_maxsz))
8082 +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
8083 + nfs4_fattr_value_maxsz)
8084 +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
8085 #define encode_savefh_maxsz (op_encode_hdr_maxsz)
8086 #define decode_savefh_maxsz (op_decode_hdr_maxsz)
8087 #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2)
8088 @@ -122,11 +126,11 @@
8089 #define encode_symlink_maxsz (op_encode_hdr_maxsz + \
8090 1 + nfs4_name_maxsz + \
8092 - nfs4_fattr_bitmap_maxsz)
8094 #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8)
8095 #define encode_create_maxsz (op_encode_hdr_maxsz + \
8096 2 + nfs4_name_maxsz + \
8097 - nfs4_fattr_bitmap_maxsz)
8099 #define decode_create_maxsz (op_decode_hdr_maxsz + 8)
8100 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
8101 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
8103 #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \
8104 encode_putfh_maxsz + \
8105 op_encode_hdr_maxsz + 4 + \
8106 - nfs4_fattr_bitmap_maxsz + \
8107 + nfs4_fattr_maxsz + \
8108 encode_getattr_maxsz)
8109 #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \
8110 decode_putfh_maxsz + \
8111 @@ -360,6 +364,20 @@
8112 encode_delegreturn_maxsz)
8113 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
8114 decode_delegreturn_maxsz)
8115 +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \
8116 + encode_putfh_maxsz + \
8117 + encode_getattr_maxsz)
8118 +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \
8119 + decode_putfh_maxsz + \
8120 + op_decode_hdr_maxsz + \
8121 + nfs4_fattr_bitmap_maxsz + 1)
8122 +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \
8123 + encode_putfh_maxsz + \
8124 + op_encode_hdr_maxsz + 4 + \
8125 + nfs4_fattr_bitmap_maxsz + 1)
8126 +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \
8127 + decode_putfh_maxsz + \
8128 + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
8133 * In the worst-case, this would be
8134 * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
8135 * = 36 bytes, plus any contribution from variable-length fields
8136 - * such as owner/group/acl's.
8137 + * such as owner/group.
8141 @@ -1083,6 +1101,27 @@
8145 +extern nfs4_stateid zero_stateid;
8148 +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
8152 + RESERVE_SPACE(4+sizeof(zero_stateid.data));
8153 + WRITE32(OP_SETATTR);
8154 + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
8155 + RESERVE_SPACE(2*4);
8157 + WRITE32(FATTR4_WORD0_ACL);
8158 + if (arg->acl_len % 4)
8161 + WRITE32(arg->acl_len);
8162 + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
8167 encode_savefh(struct xdr_stream *xdr)
8169 @@ -1627,6 +1666,34 @@
8173 + * Encode a GETACL request
8176 +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,
8177 + struct nfs_getaclargs *args)
8179 + struct xdr_stream xdr;
8180 + struct rpc_auth *auth = req->rq_task->tk_auth;
8181 + struct compound_hdr hdr = {
8184 + int replen, status;
8186 + xdr_init_encode(&xdr, &req->rq_snd_buf, p);
8187 + encode_compound_hdr(&xdr, &hdr);
8188 + status = encode_putfh(&xdr, args->fh);
8191 + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
8192 + /* set up reply buffer: */
8193 + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
8194 + xdr_inline_pages(&req->rq_rcv_buf, replen,
8195 + args->acl_pages, args->acl_pgbase, args->acl_len);
8201 * Encode a WRITE request
8203 static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
8204 @@ -3122,6 +3189,46 @@
8205 return decode_op_hdr(xdr, OP_RENEW);
8208 +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
8214 + struct kvec *iov = req->rq_rcv_buf.head;
8217 + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
8219 + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
8221 + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
8224 + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
8226 + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
8227 + int hdrlen, recvd;
8229 + /* We ignore &savep and don't do consistency checks on
8230 + * the attr length. Let userspace figure it out.... */
8231 + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
8232 + recvd = req->rq_rcv_buf.len - hdrlen;
8233 + if (attrlen > recvd) {
8234 + printk(KERN_WARNING "NFS: server cheating in getattr"
8235 + " acl reply: attrlen %u > recvd %u\n",
8239 + if (attrlen <= *acl_len)
8240 + xdr_read_pages(xdr, attrlen);
8241 + *acl_len = attrlen;
8249 decode_savefh(struct xdr_stream *xdr)
8251 @@ -3413,6 +3520,71 @@
8256 + * Encode an SETACL request
8259 +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args)
8261 + struct xdr_stream xdr;
8262 + struct compound_hdr hdr = {
8267 + xdr_init_encode(&xdr, &req->rq_snd_buf, p);
8268 + encode_compound_hdr(&xdr, &hdr);
8269 + status = encode_putfh(&xdr, args->fh);
8272 + status = encode_setacl(&xdr, args);
8277 + * Decode SETACL response
8280 +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res)
8282 + struct xdr_stream xdr;
8283 + struct compound_hdr hdr;
8286 + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
8287 + status = decode_compound_hdr(&xdr, &hdr);
8290 + status = decode_putfh(&xdr);
8293 + status = decode_setattr(&xdr, res);
8299 + * Decode GETACL response
8302 +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, ssize_t *acl_len)
8304 + struct xdr_stream xdr;
8305 + struct compound_hdr hdr;
8308 + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
8309 + status = decode_compound_hdr(&xdr, &hdr);
8312 + status = decode_putfh(&xdr);
8315 + status = decode_getacl(&xdr, rqstp, acl_len);
8322 * Decode CLOSE response
8323 @@ -4009,6 +4181,8 @@
8324 PROC(READDIR, enc_readdir, dec_readdir),
8325 PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
8326 PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
8327 + PROC(GETACL, enc_getacl, dec_getacl),
8328 + PROC(SETACL, enc_setacl, dec_setacl),
8331 struct rpc_version nfs_version4 = {
8332 Index: linux-2.6.10/fs/nfs/inode.c
8333 ===================================================================
8334 --- linux-2.6.10.orig/fs/nfs/inode.c 2004-12-25 05:35:24.000000000 +0800
8335 +++ linux-2.6.10/fs/nfs/inode.c 2005-04-05 14:49:13.445685264 +0800
8336 @@ -486,13 +486,27 @@
8340 - buf->f_frsize = server->wtmult;
8342 + * Current versions of glibc do not correctly handle the
8343 + * case where f_frsize != f_bsize. Eventually we want to
8344 + * report the value of wtmult in this field.
8346 + buf->f_frsize = sb->s_blocksize;
8349 + * On most *nix systems, f_blocks, f_bfree, and f_bavail
8350 + * are reported in units of f_frsize. Linux hasn't had
8351 + * an f_frsize field in its statfs struct until recently,
8352 + * thus historically Linux's sys_statfs reports these
8353 + * fields in units of f_bsize.
8355 buf->f_bsize = sb->s_blocksize;
8356 blockbits = sb->s_blocksize_bits;
8357 blockres = (1 << blockbits) - 1;
8358 buf->f_blocks = (res.tbytes + blockres) >> blockbits;
8359 buf->f_bfree = (res.fbytes + blockres) >> blockbits;
8360 buf->f_bavail = (res.abytes + blockres) >> blockbits;
8362 buf->f_files = res.tfiles;
8363 buf->f_ffree = res.afiles;
8367 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
8368 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
8369 - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
8370 + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
8372 - nfsi->flags |= NFS_INO_INVALID_ATTR;
8373 + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
8379 if (nfs_compare_fh(NFS_FH(inode), fh))
8381 - if (is_bad_inode(inode))
8382 + if (is_bad_inode(inode) || NFS_STALE(inode))
8387 /* Why so? Because we want revalidate for devices/FIFOs, and
8388 * that's precisely what we have in nfs_file_inode_operations.
8390 - inode->i_op = &nfs_file_inode_operations;
8391 + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
8392 if (S_ISREG(inode->i_mode)) {
8393 inode->i_fop = &nfs_file_operations;
8394 inode->i_data.a_ops = &nfs_file_aops;
8395 @@ -766,13 +780,8 @@
8396 vmtruncate(inode, attr->ia_size);
8399 - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
8400 - struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
8402 - put_rpccred(*cred);
8406 + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
8407 + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
8408 nfs_end_data_update(inode);
8411 @@ -949,14 +958,14 @@
8413 if (!inode || is_bad_inode(inode))
8415 - if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode)
8416 + if (NFS_STALE(inode))
8419 while (NFS_REVALIDATING(inode)) {
8420 status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
8423 - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC)
8424 + if (NFS_ATTRTIMEO(inode) == 0)
8426 if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
8428 @@ -968,14 +977,14 @@
8429 /* Protect against RPC races by saving the change attribute */
8430 verifier = nfs_save_change_attribute(inode);
8431 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
8433 + if (status != 0) {
8434 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
8436 (long long)NFS_FILEID(inode), status);
8437 if (status == -ESTALE) {
8438 - NFS_FLAGS(inode) |= NFS_INO_STALE;
8439 - if (inode != inode->i_sb->s_root->d_inode)
8440 - remove_inode_hash(inode);
8441 + nfs_zap_caches(inode);
8442 + if (!S_ISDIR(inode->i_mode))
8443 + NFS_FLAGS(inode) |= NFS_INO_STALE;
8447 @@ -1014,7 +1023,6 @@
8449 (long long)NFS_FILEID(inode));
8451 - NFS_FLAGS(inode) &= ~NFS_INO_STALE;
8453 NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
8454 wake_up(&nfsi->nfs_i_wait);
8455 @@ -1161,7 +1169,7 @@
8456 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
8457 || inode->i_uid != fattr->uid
8458 || inode->i_gid != fattr->gid)
8459 - nfsi->flags |= NFS_INO_INVALID_ATTR;
8460 + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
8462 /* Has the link count changed? */
8463 if (inode->i_nlink != fattr->nlink)
8464 @@ -1270,7 +1278,7 @@
8466 nfsi->change_attr = fattr->change_attr;
8468 - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
8469 + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
8472 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
8473 @@ -1278,14 +1286,8 @@
8475 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
8476 inode->i_uid != fattr->uid ||
8477 - inode->i_gid != fattr->gid) {
8478 - struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
8480 - put_rpccred(*cred);
8483 - invalid |= NFS_INO_INVALID_ATTR;
8485 + inode->i_gid != fattr->gid)
8486 + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
8488 inode->i_mode = fattr->mode;
8489 inode->i_nlink = fattr->nlink;
8490 @@ -1335,7 +1337,8 @@
8492 nfs_invalidate_inode(inode);
8495 + NFS_FLAGS(inode) |= NFS_INO_STALE;
8500 @@ -1449,8 +1452,6 @@
8504 - nfs4_renewd_prepare_shutdown(server);
8506 if (server->client != NULL && !IS_ERR(server->client))
8507 rpc_shutdown_client(server->client);
8508 if (server->client_sys != NULL && !IS_ERR(server->client_sys))
8509 @@ -1461,8 +1462,6 @@
8511 rpciod_down(); /* release rpciod */
8513 - destroy_nfsv4_state(server);
8515 if (server->hostname != NULL)
8516 kfree(server->hostname);
8518 @@ -1478,8 +1477,53 @@
8520 #ifdef CONFIG_NFS_V4
8522 -static void nfs4_clear_inode(struct inode *);
8523 +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
8526 +nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
8527 + size_t buflen, int flags)
8529 + struct inode *inode = dentry->d_inode;
8531 + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
8534 + if (!S_ISREG(inode->i_mode) &&
8535 + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
8538 + return nfs4_proc_set_acl(inode, buf, buflen);
8541 +/* The getxattr man page suggests returning -ENODATA for unknown attributes,
8542 + * and that's what we'll do for e.g. user attributes that haven't been set.
8543 + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported
8544 + * attributes in kernel-managed attribute namespaces. */
8546 +nfs4_getxattr(struct dentry *dentry, const char *key, void *buf,
8549 + struct inode *inode = dentry->d_inode;
8551 + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
8552 + return -EOPNOTSUPP;
8554 + return nfs4_proc_get_acl(inode, buf, buflen);
8558 +nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
8560 + ssize_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
8562 + if (buf && buflen < len)
8565 + memcpy(buf, XATTR_NAME_NFSV4_ACL, len);
8569 +static void nfs4_clear_inode(struct inode *);
8571 static struct super_operations nfs4_sops = {
8572 .alloc_inode = nfs_alloc_inode,
8573 @@ -1543,9 +1587,6 @@
8574 server->wsize = nfs_block_size(data->wsize, NULL);
8575 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
8577 - /* NFSv4 doesn't use NLM locking */
8578 - server->flags |= NFS_MOUNT_NONLM;
8580 server->acregmin = data->acregmin*HZ;
8581 server->acregmax = data->acregmax*HZ;
8582 server->acdirmin = data->acdirmin*HZ;
8583 @@ -1790,8 +1831,22 @@
8585 static void nfs4_kill_super(struct super_block *sb)
8587 + struct nfs_server *server = NFS_SB(sb);
8589 nfs_return_all_delegations(sb);
8590 - nfs_kill_super(sb);
8591 + kill_anon_super(sb);
8593 + nfs4_renewd_prepare_shutdown(server);
8595 + if (server->client != NULL && !IS_ERR(server->client))
8596 + rpc_shutdown_client(server->client);
8597 + rpciod_down(); /* release rpciod */
8599 + destroy_nfsv4_state(server);
8601 + if (server->hostname != NULL)
8602 + kfree(server->hostname);
8606 static struct file_system_type nfs4_fs_type = {
8607 @@ -1821,9 +1876,13 @@
8608 extern int nfs_init_nfspagecache(void);
8609 extern void nfs_destroy_nfspagecache(void);
8610 extern int nfs_init_readpagecache(void);
8611 -extern int nfs_destroy_readpagecache(void);
8612 +extern void nfs_destroy_readpagecache(void);
8613 extern int nfs_init_writepagecache(void);
8614 -extern int nfs_destroy_writepagecache(void);
8615 +extern void nfs_destroy_writepagecache(void);
8616 +#ifdef CONFIG_NFS_DIRECTIO
8617 +extern int nfs_init_directcache(void);
8618 +extern void nfs_destroy_directcache(void);
8621 static kmem_cache_t * nfs_inode_cachep;
8623 @@ -1904,6 +1963,12 @@
8627 +#ifdef CONFIG_NFS_DIRECTIO
8628 + err = nfs_init_directcache();
8633 #ifdef CONFIG_PROC_FS
8634 rpc_proc_register(&nfs_rpcstat);
8636 @@ -1914,8 +1979,14 @@
8640 +#ifdef CONFIG_PROC_FS
8641 rpc_proc_unregister("nfs");
8643 nfs_destroy_writepagecache();
8644 +#ifdef CONFIG_NFS_DIRECTIO
8646 + nfs_destroy_directcache();
8649 nfs_destroy_readpagecache();
8651 @@ -1928,6 +1999,9 @@
8653 static void __exit exit_nfs_fs(void)
8655 +#ifdef CONFIG_NFS_DIRECTIO
8656 + nfs_destroy_directcache();
8658 nfs_destroy_writepagecache();
8659 nfs_destroy_readpagecache();
8660 nfs_destroy_inodecache();
8661 Index: linux-2.6.10/fs/nfs/nfs4state.c
8662 ===================================================================
8663 --- linux-2.6.10.orig/fs/nfs/nfs4state.c 2004-12-25 05:33:49.000000000 +0800
8664 +++ linux-2.6.10/fs/nfs/nfs4state.c 2005-04-05 14:49:13.446685112 +0800
8666 state->owner = owner;
8667 atomic_inc(&owner->so_count);
8668 list_add(&state->inode_states, &nfsi->open_states);
8669 - state->inode = inode;
8670 + state->inode = igrab(inode);
8671 spin_unlock(&inode->i_lock);
8673 spin_unlock(&inode->i_lock);
8675 list_del(&state->inode_states);
8676 spin_unlock(&inode->i_lock);
8677 list_del(&state->open_states);
8679 BUG_ON (state->state != 0);
8680 nfs4_free_open_state(state);
8681 nfs4_put_state_owner(owner);
8683 struct nfs4_state_owner *owner = state->owner;
8684 struct nfs4_client *clp = owner->so_client;
8688 atomic_inc(&owner->so_count);
8689 down_read(&clp->cl_sem);
8690 @@ -508,10 +508,8 @@
8691 newstate |= FMODE_WRITE;
8692 if (state->state == newstate)
8694 - if (newstate != 0)
8695 - status = nfs4_do_downgrade(inode, state, newstate);
8697 - status = nfs4_do_close(inode, state);
8698 + if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
8702 nfs4_put_open_state(state);
8703 Index: linux-2.6.10/fs/nfs/idmap.c
8704 ===================================================================
8705 --- linux-2.6.10.orig/fs/nfs/idmap.c 2004-12-25 05:34:26.000000000 +0800
8706 +++ linux-2.6.10/fs/nfs/idmap.c 2005-04-05 14:49:13.454683896 +0800
8708 static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
8710 void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
8711 +static void idmap_pipe_release(struct inode *inode);
8713 static unsigned int fnvhash32(const void *, size_t);
8716 .upcall = idmap_pipe_upcall,
8717 .downcall = idmap_pipe_downcall,
8718 .destroy_msg = idmap_pipe_destroy_msg,
8719 + .release_pipe = idmap_pipe_release,
8723 @@ -448,6 +450,19 @@
8724 up(&idmap->idmap_im_lock);
8728 +idmap_pipe_release(struct inode *inode)
8730 + struct rpc_inode *rpci = RPC_I(inode);
8731 + struct idmap *idmap = (struct idmap *)rpci->private;
8732 + struct idmap_msg *im = &idmap->idmap_im;
8734 + down(&idmap->idmap_im_lock);
8735 + im->im_status = IDMAP_STATUS_LOOKUPFAIL;
8736 + wake_up(&idmap->idmap_wq);
8737 + up(&idmap->idmap_im_lock);
8741 * Fowler/Noll/Vo hash
8742 * http://www.isthe.com/chongo/tech/comp/fnv/
8743 Index: linux-2.6.10/fs/nfs/dir.c
8744 ===================================================================
8745 --- linux-2.6.10.orig/fs/nfs/dir.c 2005-03-31 15:35:26.000000000 +0800
8746 +++ linux-2.6.10/fs/nfs/dir.c 2005-04-05 14:49:13.439686176 +0800
8748 static int nfs_opendir(struct inode *, struct file *);
8749 static int nfs_readdir(struct file *, void *, filldir_t);
8750 static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
8751 -static int nfs_cached_lookup(struct inode *, struct dentry *,
8752 - struct nfs_fh *, struct nfs_fattr *);
8753 static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
8754 static int nfs_mkdir(struct inode *, struct dentry *, int);
8755 static int nfs_rmdir(struct inode *, struct dentry *);
8757 .permission = nfs_permission,
8758 .getattr = nfs_getattr,
8759 .setattr = nfs_setattr,
8760 + .getxattr = nfs4_getxattr,
8761 + .setxattr = nfs4_setxattr,
8762 + .listxattr = nfs4_listxattr,
8765 #endif /* CONFIG_NFS_V4 */
8766 @@ -294,24 +295,13 @@
8770 -static unsigned int nfs_type2dtype[] = {
8783 -unsigned int nfs_type_to_d_type(enum nfs_ftype type)
8784 +static inline unsigned int dt_type(struct inode *inode)
8786 - return nfs_type2dtype[type];
8787 + return (inode->i_mode >> 12) & 15;
8790 +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
8793 * Once we've found the start of the dirent within a page: fill 'er up...
8797 struct file *file = desc->file;
8798 struct nfs_entry *entry = desc->entry;
8799 + struct dentry *dentry = NULL;
8800 unsigned long fileid;
8803 @@ -333,9 +324,16 @@
8804 * retrieving the current dirent on the server */
8805 fileid = nfs_fileid_to_ino_t(entry->ino);
8807 + /* Get a dentry if we have one */
8808 + if (dentry != NULL)
8810 + dentry = nfs_readdir_lookup(desc);
8812 /* Use readdirplus info */
8813 - if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR))
8814 - d_type = nfs_type_to_d_type(entry->fattr->type);
8815 + if (dentry != NULL && dentry->d_inode != NULL) {
8816 + d_type = dt_type(dentry->d_inode);
8817 + fileid = dentry->d_inode->i_ino;
8820 res = filldir(dirent, entry->name, entry->len,
8821 entry->prev_cookie, fileid, d_type);
8825 dir_page_release(desc);
8827 + if (dentry != NULL)
8829 dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
8832 @@ -615,24 +614,10 @@
8837 - * Note: we're not holding inode->i_sem and so may be racing with
8838 - * operations that change the directory. We therefore save the
8839 - * change attribute *before* we do the RPC call.
8841 - verifier = nfs_save_change_attribute(dir);
8842 - error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
8844 - if (nfs_compare_fh(NFS_FH(inode), &fhandle))
8846 - if (nfs_lookup_verify_inode(inode, isopen))
8847 - goto out_zap_parent;
8848 - goto out_valid_renew;
8851 if (NFS_STALE(inode))
8854 + verifier = nfs_save_change_attribute(dir);
8855 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
8859 if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
8863 nfs_renew_times(dentry);
8864 nfs_set_verifier(dentry, verifier);
8868 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
8870 + struct dentry *res;
8871 struct inode *inode = NULL;
8873 struct nfs_fh fhandle;
8874 @@ -731,11 +716,11 @@
8875 dfprintk(VFS, "NFS: lookup(%s/%s)\n",
8876 dentry->d_parent->d_name.name, dentry->d_name.name);
8878 - error = -ENAMETOOLONG;
8879 + res = ERR_PTR(-ENAMETOOLONG);
8880 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
8884 + res = ERR_PTR(-ENOMEM);
8885 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
8888 @@ -746,29 +731,27 @@
8889 if (nfs_is_exclusive_create(dir, nd))
8892 - error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
8894 - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name,
8895 - &fhandle, &fattr);
8896 - if (error == -ENOENT)
8900 + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
8901 + if (error == -ENOENT)
8904 + res = ERR_PTR(error);
8908 + res = ERR_PTR(-EACCES);
8909 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
8914 - d_add(dentry, inode);
8915 + res = d_add_unique(dentry, inode);
8918 nfs_renew_times(dentry);
8919 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
8923 - BUG_ON(error > 0);
8924 - return ERR_PTR(error);
8928 #ifdef CONFIG_NFS_V4
8929 @@ -798,15 +781,15 @@
8931 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
8933 + struct dentry *res = NULL;
8934 struct inode *inode = NULL;
8937 /* Check that we are indeed trying to open this file */
8938 if (!is_atomic_open(dir, nd))
8941 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
8942 - error = -ENAMETOOLONG;
8943 + res = ERR_PTR(-ENAMETOOLONG);
8946 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
8948 inode = nfs4_atomic_open(dir, dentry, nd);
8950 if (IS_ERR(inode)) {
8951 - error = PTR_ERR(inode);
8952 + int error = PTR_ERR(inode);
8954 /* Make a negative dentry */
8956 @@ -841,16 +824,18 @@
8960 + res = ERR_PTR(error);
8965 - d_add(dentry, inode);
8966 + res = d_add_unique(dentry, inode);
8969 nfs_renew_times(dentry);
8970 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
8972 - BUG_ON(error > 0);
8973 - return ERR_PTR(error);
8976 return nfs_lookup(dir, dentry, nd);
8978 @@ -906,83 +891,51 @@
8980 #endif /* CONFIG_NFSV4 */
8983 -int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry)
8984 +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
8986 + struct dentry *parent = desc->file->f_dentry;
8987 + struct inode *dir = parent->d_inode;
8988 struct nfs_entry *entry = desc->entry;
8991 - while((status = dir_decode(desc)) == 0) {
8992 - if (entry->len != dentry->d_name.len)
8994 - if (memcmp(entry->name, dentry->d_name.name, entry->len))
8996 - if (!(entry->fattr->valid & NFS_ATTR_FATTR))
9004 - * Use the cached Readdirplus results in order to avoid a LOOKUP call
9005 - * whenever we believe that the parent directory has not changed.
9007 - * We assume that any file creation/rename changes the directory mtime.
9008 - * As this results in a page cache invalidation whenever it occurs,
9009 - * we don't require any other tests for cache coherency.
9012 -int nfs_cached_lookup(struct inode *dir, struct dentry *dentry,
9013 - struct nfs_fh *fh, struct nfs_fattr *fattr)
9015 - nfs_readdir_descriptor_t desc;
9016 - struct nfs_server *server;
9017 - struct nfs_entry entry;
9018 - struct page *page;
9019 - unsigned long timestamp;
9022 - if (!NFS_USE_READDIRPLUS(dir))
9024 - server = NFS_SERVER(dir);
9025 - /* Don't use readdirplus unless the cache is stable */
9026 - if ((server->flags & NFS_MOUNT_NOAC) != 0
9027 - || nfs_caches_unstable(dir)
9028 - || nfs_attribute_timeout(dir))
9030 - if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0)
9032 - timestamp = NFS_I(dir)->readdir_timestamp;
9035 - entry.fattr = fattr;
9037 - desc.decode = NFS_PROTO(dir)->decode_dirent;
9038 - desc.entry = &entry;
9039 - desc.page_index = 0;
9042 - for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) {
9045 - if (PageUptodate(page)) {
9046 - void * kaddr = kmap_atomic(page, KM_USER0);
9048 - res = find_dirent_name(&desc, page, dentry);
9049 - kunmap_atomic(kaddr, KM_USER0);
9051 - page_cache_release(page);
9052 + struct dentry *dentry, *alias;
9053 + struct qstr name = {
9054 + .name = entry->name,
9055 + .len = entry->len,
9057 + struct inode *inode;
9061 - if (res != -EAGAIN)
9062 + switch (name.len) {
9064 + if (name.name[0] == '.' && name.name[1] == '.')
9065 + return dget_parent(parent);
9068 + if (name.name[0] == '.')
9069 + return dget(parent);
9071 + name.hash = full_name_hash(name.name, name.len);
9072 + dentry = d_lookup(parent, &name);
9073 + if (dentry != NULL)
9075 + if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
9077 + /* Note: caller is already holding the dir->i_sem! */
9078 + dentry = d_alloc(parent, &name);
9079 + if (dentry == NULL)
9081 + dentry->d_op = NFS_PROTO(dir)->dentry_ops;
9082 + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
9089 - fattr->timestamp = timestamp;
9091 + alias = d_add_unique(dentry, inode);
9092 + if (alias != NULL) {
9096 + nfs_renew_times(dentry);
9097 + nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
9102 @@ -1045,15 +998,9 @@
9103 if (nd && (nd->flags & LOOKUP_CREATE))
9104 open_flags = nd->intent.open.flags;
9107 - * The 0 argument passed into the create function should one day
9108 - * contain the O_EXCL flag if requested. This allows NFSv3 to
9109 - * select the appropriate create strategy. Currently open_namei
9110 - * does not pass the create flags.
9113 nfs_begin_data_update(dir);
9114 - inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags);
9115 + inode = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
9116 nfs_end_data_update(dir);
9117 if (!IS_ERR(inode)) {
9118 d_instantiate(dentry, inode);
9119 @@ -1508,7 +1455,7 @@
9121 if (cache->cred != cred
9122 || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
9123 - || (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR))
9124 + || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
9126 memcpy(res, cache, sizeof(*res));
9128 @@ -1522,6 +1469,7 @@
9130 put_rpccred(cache->cred);
9131 cache->cred = get_rpccred(set->cred);
9132 + NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
9134 cache->jiffies = set->jiffies;
9135 cache->mask = set->mask;
9136 Index: linux-2.6.10/fs/nfs/unlink.c
9137 ===================================================================
9138 --- linux-2.6.10.orig/fs/nfs/unlink.c 2004-12-25 05:35:29.000000000 +0800
9139 +++ linux-2.6.10/fs/nfs/unlink.c 2005-04-05 14:49:13.435686784 +0800
9141 spin_lock(&dentry->d_lock);
9142 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
9143 spin_unlock(&dentry->d_lock);
9144 - if (data->task.tk_rpcwait == &nfs_delete_queue)
9145 - rpc_wake_up_task(&data->task);
9146 + rpc_wake_up_task(&data->task);
9147 nfs_put_unlinkdata(data);
9149 Index: linux-2.6.10/fs/nfs/write.c
9150 ===================================================================
9151 --- linux-2.6.10.orig/fs/nfs/write.c 2004-12-25 05:35:23.000000000 +0800
9152 +++ linux-2.6.10/fs/nfs/write.c 2005-04-05 14:49:13.443685568 +0800
9154 #include <linux/nfs_page.h>
9155 #include <asm/uaccess.h>
9156 #include <linux/smp_lock.h>
9157 -#include <linux/mempool.h>
9159 #include "delegation.h"
9162 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
9164 static kmem_cache_t *nfs_wdata_cachep;
9165 -static mempool_t *nfs_wdata_mempool;
9166 -static mempool_t *nfs_commit_mempool;
9167 +mempool_t *nfs_wdata_mempool;
9168 +mempool_t *nfs_commit_mempool;
9170 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
9172 -static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
9174 - struct nfs_write_data *p;
9175 - p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
9177 - memset(p, 0, sizeof(*p));
9178 - INIT_LIST_HEAD(&p->pages);
9183 -static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
9185 - mempool_free(p, nfs_wdata_mempool);
9188 -static void nfs_writedata_release(struct rpc_task *task)
9189 +void nfs_writedata_release(struct rpc_task *task)
9191 struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
9192 nfs_writedata_free(wdata);
9195 -static __inline__ struct nfs_write_data *nfs_commit_alloc(void)
9197 - struct nfs_write_data *p;
9198 - p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
9200 - memset(p, 0, sizeof(*p));
9201 - INIT_LIST_HEAD(&p->pages);
9206 -static __inline__ void nfs_commit_free(struct nfs_write_data *p)
9208 - mempool_free(p, nfs_commit_mempool);
9211 /* Adjust the file length if we're writing beyond the end */
9212 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
9214 @@ -184,11 +151,10 @@
9215 int result, written = 0;
9216 struct nfs_write_data *wdata;
9218 - wdata = kmalloc(sizeof(*wdata), GFP_NOFS);
9219 + wdata = nfs_writedata_alloc();
9223 - memset(wdata, 0, sizeof(*wdata));
9225 wdata->cred = ctx->cred;
9226 wdata->inode = inode;
9230 nfs_end_data_update_defer(inode);
9233 + nfs_writedata_free(wdata);
9234 return written ? written : result;
9237 @@ -1199,7 +1164,8 @@
9239 if (time_before(complain, jiffies)) {
9241 - "NFS: Server wrote less than requested.\n");
9242 + "NFS: Server wrote zero bytes, expected %u.\n",
9244 complain = jiffies + 300 * HZ;
9246 /* Can't do anything about it except throw an error. */
9247 Index: linux-2.6.10/fs/nfs/proc.c
9248 ===================================================================
9249 --- linux-2.6.10.orig/fs/nfs/proc.c 2004-12-25 05:35:28.000000000 +0800
9250 +++ linux-2.6.10/fs/nfs/proc.c 2005-04-05 14:49:13.440686024 +0800
9252 dprintk("%s: call getattr\n", __FUNCTION__);
9254 status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
9255 - dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
9256 + dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
9259 dprintk("%s: call statfs\n", __FUNCTION__);
9260 status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
9261 - dprintk("%s: reply statfs %d\n", __FUNCTION__, status);
9262 + dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
9265 info->rtmax = NFS_MAXDATA;
9268 status = rpc_call(server->client, NFSPROC_GETATTR,
9270 - dprintk("NFS reply getattr\n");
9271 + dprintk("NFS reply getattr: %d\n", status);
9276 dprintk("NFS call setattr\n");
9278 status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
9279 - dprintk("NFS reply setattr\n");
9280 + dprintk("NFS reply setattr: %d\n", status);
9284 @@ -213,15 +213,15 @@
9287 static struct inode *
9288 -nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
9289 +nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
9292 struct nfs_fh fhandle;
9293 struct nfs_fattr fattr;
9294 struct nfs_createargs arg = {
9296 - .name = name->name,
9298 + .name = dentry->d_name.name,
9299 + .len = dentry->d_name.len,
9302 struct nfs_diropok res = {
9307 - dprintk("NFS call create %s\n", name->name);
9308 + dprintk("NFS call create %s\n", dentry->d_name.name);
9309 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
9310 dprintk("NFS reply create: %d\n", status);
9313 .version = 2, /* protocol version */
9314 .dentry_ops = &nfs_dentry_operations,
9315 .dir_inode_ops = &nfs_dir_inode_operations,
9316 + .file_inode_ops = &nfs_file_inode_operations,
9317 .getroot = nfs_proc_get_root,
9318 .getattr = nfs_proc_getattr,
9319 .setattr = nfs_proc_setattr,
9320 Index: linux-2.6.10/fs/nfs/callback.c
9321 ===================================================================
9322 --- linux-2.6.10.orig/fs/nfs/callback.c 2004-12-25 05:34:57.000000000 +0800
9323 +++ linux-2.6.10/fs/nfs/callback.c 2005-04-05 14:49:13.436686632 +0800
9324 @@ -139,133 +139,10 @@
9329 - * AUTH_NULL authentication
9331 -static int nfs_callback_null_accept(struct svc_rqst *rqstp, u32 *authp)
9333 - struct kvec *argv = &rqstp->rq_arg.head[0];
9334 - struct kvec *resv = &rqstp->rq_res.head[0];
9336 - if (argv->iov_len < 3*4)
9337 - return SVC_GARBAGE;
9339 - if (svc_getu32(argv) != 0) {
9340 - dprintk("svc: bad null cred\n");
9341 - *authp = rpc_autherr_badcred;
9342 - return SVC_DENIED;
9344 - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
9345 - dprintk("svc: bad null verf\n");
9346 - *authp = rpc_autherr_badverf;
9347 - return SVC_DENIED;
9350 - /* Signal that mapping to nobody uid/gid is required */
9351 - rqstp->rq_cred.cr_uid = (uid_t) -1;
9352 - rqstp->rq_cred.cr_gid = (gid_t) -1;
9353 - rqstp->rq_cred.cr_group_info = groups_alloc(0);
9354 - if (rqstp->rq_cred.cr_group_info == NULL)
9355 - return SVC_DROP; /* kmalloc failure - client must retry */
9357 - /* Put NULL verifier */
9358 - svc_putu32(resv, RPC_AUTH_NULL);
9359 - svc_putu32(resv, 0);
9360 - dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK);
9364 -static int nfs_callback_null_release(struct svc_rqst *rqstp)
9366 - if (rqstp->rq_cred.cr_group_info)
9367 - put_group_info(rqstp->rq_cred.cr_group_info);
9368 - rqstp->rq_cred.cr_group_info = NULL;
9369 - return 0; /* don't drop */
9372 -static struct auth_ops nfs_callback_auth_null = {
9374 - .flavour = RPC_AUTH_NULL,
9375 - .accept = nfs_callback_null_accept,
9376 - .release = nfs_callback_null_release,
9380 - * AUTH_SYS authentication
9382 -static int nfs_callback_unix_accept(struct svc_rqst *rqstp, u32 *authp)
9384 - struct kvec *argv = &rqstp->rq_arg.head[0];
9385 - struct kvec *resv = &rqstp->rq_res.head[0];
9386 - struct svc_cred *cred = &rqstp->rq_cred;
9388 - int len = argv->iov_len;
9390 - dprintk("%s: start\n", __FUNCTION__);
9391 - cred->cr_group_info = NULL;
9392 - rqstp->rq_client = NULL;
9393 - if ((len -= 3*4) < 0)
9394 - return SVC_GARBAGE;
9396 - /* Get length, time stamp and machine name */
9399 - slen = XDR_QUADLEN(ntohl(svc_getu32(argv)));
9400 - if (slen > 64 || (len -= (slen + 3)*4) < 0)
9402 - argv->iov_base = (void*)((u32*)argv->iov_base + slen);
9403 - argv->iov_len -= slen*4;
9405 - cred->cr_uid = ntohl(svc_getu32(argv));
9406 - cred->cr_gid = ntohl(svc_getu32(argv));
9407 - slen = ntohl(svc_getu32(argv));
9408 - if (slen > 16 || (len -= (slen + 2)*4) < 0)
9410 - cred->cr_group_info = groups_alloc(slen);
9411 - if (cred->cr_group_info == NULL)
9413 - for (i = 0; i < slen; i++)
9414 - GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv));
9416 - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
9417 - *authp = rpc_autherr_badverf;
9418 - return SVC_DENIED;
9420 - /* Put NULL verifier */
9421 - svc_putu32(resv, RPC_AUTH_NULL);
9422 - svc_putu32(resv, 0);
9423 - dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK);
9426 - *authp = rpc_autherr_badcred;
9427 - return SVC_DENIED;
9430 -static int nfs_callback_unix_release(struct svc_rqst *rqstp)
9432 - if (rqstp->rq_cred.cr_group_info)
9433 - put_group_info(rqstp->rq_cred.cr_group_info);
9434 - rqstp->rq_cred.cr_group_info = NULL;
9438 -static struct auth_ops nfs_callback_auth_unix = {
9440 - .flavour = RPC_AUTH_UNIX,
9441 - .accept = nfs_callback_unix_accept,
9442 - .release = nfs_callback_unix_release,
9446 - * Hook the authentication protocol
9448 -static int nfs_callback_auth(struct svc_rqst *rqstp, u32 *authp)
9449 +static int nfs_callback_authenticate(struct svc_rqst *rqstp)
9451 struct in_addr *addr = &rqstp->rq_addr.sin_addr;
9452 struct nfs4_client *clp;
9453 - struct kvec *argv = &rqstp->rq_arg.head[0];
9457 /* Don't talk to strangers */
9458 clp = nfs4_find_client(addr);
9459 @@ -273,34 +150,19 @@
9461 dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
9462 nfs4_put_client(clp);
9463 - flavour = ntohl(svc_getu32(argv));
9465 + switch (rqstp->rq_authop->flavour) {
9467 - if (rqstp->rq_proc != CB_NULL) {
9468 - *authp = rpc_autherr_tooweak;
9469 - retval = SVC_DENIED;
9472 - rqstp->rq_authop = &nfs_callback_auth_null;
9473 - retval = nfs_callback_null_accept(rqstp, authp);
9474 + if (rqstp->rq_proc != CB_NULL)
9475 + return SVC_DENIED;
9478 - /* Eat the authentication flavour */
9479 - rqstp->rq_authop = &nfs_callback_auth_unix;
9480 - retval = nfs_callback_unix_accept(rqstp, authp);
9482 + case RPC_AUTH_GSS:
9483 + /* FIXME: RPCSEC_GSS handling? */
9485 - /* FIXME: need to add RPCSEC_GSS upcalls */
9487 - svc_ungetu32(argv);
9488 - retval = svc_authenticate(rqstp, authp);
9490 - *authp = rpc_autherr_rejectedcred;
9491 - retval = SVC_DENIED;
9493 + return SVC_DENIED;
9495 - dprintk("%s: flavour %d returning error %d\n", __FUNCTION__, flavour, retval);
9502 .pg_name = "NFSv4 callback", /* service name */
9503 .pg_class = "nfs", /* authentication class */
9504 .pg_stats = &nfs4_callback_stats,
9505 - .pg_authenticate = nfs_callback_auth,
9506 + .pg_authenticate = nfs_callback_authenticate,
9508 Index: linux-2.6.10/fs/nfs/file.c
9509 ===================================================================
9510 --- linux-2.6.10.orig/fs/nfs/file.c 2004-12-25 05:35:01.000000000 +0800
9511 +++ linux-2.6.10/fs/nfs/file.c 2005-04-05 14:49:13.453684048 +0800
9513 .setattr = nfs_setattr,
9516 +#ifdef CONFIG_NFS_V4
9518 +struct inode_operations nfs4_file_inode_operations = {
9519 + .permission = nfs_permission,
9520 + .getattr = nfs_getattr,
9521 + .setattr = nfs_setattr,
9522 + .getxattr = nfs4_getxattr,
9523 + .setxattr = nfs4_setxattr,
9524 + .listxattr = nfs4_listxattr,
9527 +#endif /* CONFIG_NFS_V4 */
9529 /* Hack for future NFS swap support */
9531 # define IS_SWAPFILE(inode) (0)
9532 @@ -295,10 +308,19 @@
9533 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
9535 struct inode *inode = filp->f_mapping->host;
9540 - status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9541 + /* Use local locking if mounted with "-onolock" */
9542 + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
9543 + status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9545 + struct file_lock *cfl = posix_test_lock(filp, fl);
9546 + if (cfl != NULL) {
9547 + memcpy(fl, cfl, sizeof(*fl));
9548 + fl->fl_type = F_UNLCK;
9554 @@ -325,7 +347,11 @@
9555 * still need to complete the unlock.
9558 - status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9559 + /* Use local locking if mounted with "-onolock" */
9560 + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
9561 + status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9563 + status = posix_lock_file_wait(filp, fl);
9564 rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
9567 @@ -351,15 +377,19 @@
9571 - status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9572 - /* If we were signalled we still need to ensure that
9573 - * we clean up any state on the server. We therefore
9574 - * record the lock call as having succeeded in order to
9575 - * ensure that locks_remove_posix() cleans it out when
9576 - * the process exits.
9578 - if (status == -EINTR || status == -ERESTARTSYS)
9579 - posix_lock_file(filp, fl);
9580 + /* Use local locking if mounted with "-onolock" */
9581 + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
9582 + status = NFS_PROTO(inode)->lock(filp, cmd, fl);
9583 + /* If we were signalled we still need to ensure that
9584 + * we clean up any state on the server. We therefore
9585 + * record the lock call as having succeeded in order to
9586 + * ensure that locks_remove_posix() cleans it out when
9587 + * the process exits.
9589 + if (status == -EINTR || status == -ERESTARTSYS)
9590 + posix_lock_file(filp, fl);
9592 + status = posix_lock_file_wait(filp, fl);
9596 @@ -396,15 +426,6 @@
9597 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
9600 - if (NFS_PROTO(inode)->version != 4) {
9601 - /* Fake OK code if mounted without NLM support */
9602 - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) {
9603 - if (IS_GETLK(cmd))
9604 - return LOCK_USE_CLNT;
9610 * No BSD flocks over NFS allowed.
9611 * Note: we could try to fake a POSIX lock request here by
9612 Index: linux-2.6.10/fs/nfs/nfs3proc.c
9613 ===================================================================
9614 --- linux-2.6.10.orig/fs/nfs/nfs3proc.c 2004-12-25 05:34:45.000000000 +0800
9615 +++ linux-2.6.10/fs/nfs/nfs3proc.c 2005-04-05 14:49:13.441685872 +0800
9617 dprintk("%s: call fsinfo\n", __FUNCTION__);
9618 info->fattr->valid = 0;
9619 status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
9620 - dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status);
9621 + dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
9622 if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
9623 status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
9624 - dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
9625 + dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
9631 status = rpc_call(server->client, NFS3PROC_GETATTR,
9633 - dprintk("NFS reply getattr\n");
9634 + dprintk("NFS reply getattr: %d\n", status);
9639 dprintk("NFS call setattr\n");
9641 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
9642 - dprintk("NFS reply setattr\n");
9643 + dprintk("NFS reply setattr: %d\n", status);
9648 if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
9649 entry->mask |= MAY_EXEC;
9651 - dprintk("NFS reply access, status = %d\n", status);
9652 + dprintk("NFS reply access: %d\n", status);
9657 * For now, we don't implement O_EXCL.
9659 static struct inode *
9660 -nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
9661 +nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
9664 struct nfs_fh fhandle;
9666 struct nfs_fattr dir_attr;
9667 struct nfs3_createargs arg = {
9669 - .name = name->name,
9671 + .name = dentry->d_name.name,
9672 + .len = dentry->d_name.len,
9675 struct nfs3_diropres res = {
9680 - dprintk("NFS call create %s\n", name->name);
9681 + dprintk("NFS call create %s\n", dentry->d_name.name);
9682 arg.createmode = NFS3_CREATE_UNCHECKED;
9683 if (flags & O_EXCL) {
9684 arg.createmode = NFS3_CREATE_EXCLUSIVE;
9688 if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) {
9689 - status = nfs3_proc_lookup(dir, name, &fhandle, &fattr);
9690 + status = nfs3_proc_lookup(dir, &dentry->d_name, &fhandle, &fattr);
9695 .version = 3, /* protocol version */
9696 .dentry_ops = &nfs_dentry_operations,
9697 .dir_inode_ops = &nfs_dir_inode_operations,
9698 + .file_inode_ops = &nfs_file_inode_operations,
9699 .getroot = nfs3_proc_get_root,
9700 .getattr = nfs3_proc_getattr,
9701 .setattr = nfs3_proc_setattr,
9702 Index: linux-2.6.10/fs/nfs/nfs4proc.c
9703 ===================================================================
9704 --- linux-2.6.10.orig/fs/nfs/nfs4proc.c 2004-12-25 05:35:23.000000000 +0800
9705 +++ linux-2.6.10/fs/nfs/nfs4proc.c 2005-04-05 14:49:13.456683592 +0800
9708 * Returns an nfs4_state + an referenced inode
9710 -static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
9711 +static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
9713 struct nfs4_state_owner *sp;
9714 struct nfs4_state *state = NULL;
9716 struct nfs_openargs o_arg = {
9718 .open_flags = flags,
9720 + .name = &dentry->d_name,
9722 .bitmask = server->attr_bitmask,
9723 .claim = NFS4_OPEN_CLAIM_NULL,
9724 @@ -581,14 +581,14 @@
9728 -struct nfs4_state *nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred)
9729 +struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
9731 struct nfs4_exception exception = { };
9732 struct nfs4_state *res;
9736 - status = _nfs4_do_open(dir, name, flags, sattr, cred, &res);
9737 + status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
9740 /* NOTE: BAD_SEQID means the server and client disagree about the
9745 + if (state != NULL)
9746 + msg.rpc_cred = state->owner->so_cred;
9747 if (sattr->ia_valid & ATTR_SIZE)
9748 nfs4_copy_stateid(&arg.stateid, state, NULL);
9750 @@ -658,6 +660,61 @@
9754 +struct nfs4_closedata {
9755 + struct inode *inode;
9756 + struct nfs4_state *state;
9757 + struct nfs_closeargs arg;
9758 + struct nfs_closeres res;
9761 +static void nfs4_close_done(struct rpc_task *task)
9763 + struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
9764 + struct nfs4_state *state = calldata->state;
9765 + struct nfs4_state_owner *sp = state->owner;
9766 + struct nfs_server *server = NFS_SERVER(calldata->inode);
9768 + /* hmm. we are done with the inode, and in the process of freeing
9769 + * the state_owner. we keep this around to process errors
9771 + nfs4_increment_seqid(task->tk_status, sp);
9772 + switch (task->tk_status) {
9774 + state->state = calldata->arg.open_flags;
9775 + memcpy(&state->stateid, &calldata->res.stateid,
9776 + sizeof(state->stateid));
9778 + case -NFS4ERR_STALE_STATEID:
9779 + case -NFS4ERR_EXPIRED:
9780 + state->state = calldata->arg.open_flags;
9781 + nfs4_schedule_state_recovery(server->nfs4_state);
9784 + if (nfs4_async_handle_error(task, server) == -EAGAIN) {
9785 + rpc_restart_call(task);
9789 + nfs4_put_open_state(state);
9791 + nfs4_put_state_owner(sp);
9792 + up_read(&server->nfs4_state->cl_sem);
9796 +static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
9798 + struct rpc_message msg = {
9799 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
9800 + .rpc_argp = &calldata->arg,
9801 + .rpc_resp = &calldata->res,
9802 + .rpc_cred = calldata->state->owner->so_cred,
9804 + if (calldata->arg.open_flags != 0)
9805 + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
9806 + return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
9810 * It is possible for data to be read/written from a mem-mapped file
9811 * after the sys_close call (which hits the vfs layer as a flush).
9812 @@ -669,102 +726,34 @@
9814 * NOTE: Caller must be holding the sp->so_owner semaphore!
9816 -static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state)
9817 +int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode)
9819 - struct nfs4_state_owner *sp = state->owner;
9821 - struct nfs_closeargs arg = {
9822 - .fh = NFS_FH(inode),
9824 - struct nfs_closeres res;
9825 - struct rpc_message msg = {
9826 - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
9830 + struct nfs4_closedata *calldata;
9833 - if (test_bit(NFS_DELEGATED_STATE, &state->flags))
9834 + /* Tell caller we're done */
9835 + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
9836 + state->state = mode;
9838 - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
9840 + calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
9841 + if (calldata == NULL)
9843 + calldata->inode = inode;
9844 + calldata->state = state;
9845 + calldata->arg.fh = NFS_FH(inode);
9846 /* Serialization for the sequence id */
9847 - arg.seqid = sp->so_seqid,
9848 - status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
9850 - /* hmm. we are done with the inode, and in the process of freeing
9851 - * the state_owner. we keep this around to process errors
9852 + calldata->arg.seqid = state->owner->so_seqid;
9853 + calldata->arg.open_flags = mode;
9854 + memcpy(&calldata->arg.stateid, &state->stateid,
9855 + sizeof(calldata->arg.stateid));
9856 + status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
9858 + * Return -EINPROGRESS on success in order to indicate to the
9859 + * caller that an asynchronous RPC call has been launched, and
9860 + * that it will release the semaphores on completion.
9862 - nfs4_increment_seqid(status, sp);
9864 - memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
9869 -int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
9871 - struct nfs_server *server = NFS_SERVER(state->inode);
9872 - struct nfs4_exception exception = { };
9875 - err = _nfs4_do_close(inode, state);
9877 - case -NFS4ERR_STALE_STATEID:
9878 - case -NFS4ERR_EXPIRED:
9879 - nfs4_schedule_state_recovery(server->nfs4_state);
9884 - err = nfs4_handle_exception(server, err, &exception);
9885 - } while (exception.retry);
9889 -static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
9891 - struct nfs4_state_owner *sp = state->owner;
9893 - struct nfs_closeargs arg = {
9894 - .fh = NFS_FH(inode),
9895 - .seqid = sp->so_seqid,
9896 - .open_flags = mode,
9898 - struct nfs_closeres res;
9899 - struct rpc_message msg = {
9900 - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE],
9905 - if (test_bit(NFS_DELEGATED_STATE, &state->flags))
9907 - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
9908 - status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
9909 - nfs4_increment_seqid(status, sp);
9911 - memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
9916 -int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
9918 - struct nfs_server *server = NFS_SERVER(state->inode);
9919 - struct nfs4_exception exception = { };
9922 - err = _nfs4_do_downgrade(inode, state, mode);
9924 - case -NFS4ERR_STALE_STATEID:
9925 - case -NFS4ERR_EXPIRED:
9926 - nfs4_schedule_state_recovery(server->nfs4_state);
9929 - state->state = mode;
9931 - err = nfs4_handle_exception(server, err, &exception);
9932 - } while (exception.retry);
9934 + return (status == 0) ? -EINPROGRESS : status;
9941 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
9942 - state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
9943 + state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
9946 return (struct inode *)state;
9948 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
9949 state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
9951 - state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred);
9952 + state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
9954 if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
9956 @@ -1026,7 +1015,7 @@
9959 state = nfs4_do_open(dentry->d_parent->d_inode,
9960 - &dentry->d_name, FMODE_WRITE,
9961 + dentry, FMODE_WRITE,
9965 @@ -1327,7 +1316,7 @@
9968 static struct inode *
9969 -nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
9970 +nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
9973 struct inode *inode;
9974 @@ -1335,7 +1324,7 @@
9975 struct rpc_cred *cred;
9977 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
9978 - state = nfs4_do_open(dir, name, flags, sattr, cred);
9979 + state = nfs4_do_open(dir, dentry, flags, sattr, cred);
9981 if (!IS_ERR(state)) {
9982 inode = state->inode;
9983 @@ -2049,6 +2038,86 @@
9987 +nfs4_server_supports_acls(struct nfs_server *server)
9989 + return (server->caps & NFS_CAP_ACLS)
9990 + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
9991 + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
9994 +/* XXX: assuming XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE,
9995 + * and that it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE)
9996 + * bytes on the stack. (Currently probably both true.)
9998 +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
10000 +static void buf_to_pages(const void *buf, ssize_t buflen,
10001 + struct page **pages, unsigned int *pgbase)
10003 + const void *p = buf;
10005 + *pgbase = offset_in_page(buf);
10007 + while (p < buf + buflen) {
10008 + *(pages++) = virt_to_page(p);
10009 + p += PAGE_CACHE_SIZE;
10014 +nfs4_proc_get_acl(struct inode *inode, void *buf, ssize_t buflen)
10016 + struct nfs_server *server = NFS_SERVER(inode);
10017 + struct page *pages[NFS4ACL_MAXPAGES];
10018 + struct nfs_getaclargs args = {
10019 + .fh = NFS_FH(inode),
10020 + .acl_pages = pages,
10021 + .acl_len = buflen,
10023 + ssize_t acl_len = buflen;
10024 + struct rpc_message msg = {
10025 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
10026 + .rpc_argp = &args,
10027 + .rpc_resp = &acl_len,
10031 + if (!nfs4_server_supports_acls(server))
10032 + return -EOPNOTSUPP;
10033 + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
10034 + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
10035 + if (buflen && acl_len > buflen)
10043 +nfs4_proc_set_acl(struct inode *inode, const void *buf, ssize_t buflen)
10045 + struct nfs_server *server = NFS_SERVER(inode);
10046 + struct page *pages[NFS4ACL_MAXPAGES];
10047 + struct nfs_setaclargs arg = {
10048 + .fh = NFS_FH(inode),
10049 + .acl_pages = pages,
10050 + .acl_len = buflen,
10052 + struct rpc_message msg = {
10053 + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL],
10054 + .rpc_argp = &arg,
10055 + .rpc_resp = NULL,
10059 + if (!nfs4_server_supports_acls(server))
10060 + return -EOPNOTSUPP;
10061 + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
10062 + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
10067 nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
10069 struct nfs4_client *clp = server->nfs4_state;
10070 @@ -2589,6 +2658,7 @@
10071 .version = 4, /* protocol version */
10072 .dentry_ops = &nfs4_dentry_operations,
10073 .dir_inode_ops = &nfs4_dir_inode_operations,
10074 + .file_inode_ops = &nfs4_file_inode_operations,
10075 .getroot = nfs4_proc_get_root,
10076 .getattr = nfs4_proc_getattr,
10077 .setattr = nfs4_proc_setattr,
10078 Index: linux-2.6.10/fs/nfs/direct.c
10079 ===================================================================
10080 --- linux-2.6.10.orig/fs/nfs/direct.c 2005-03-31 15:35:23.000000000 +0800
10081 +++ linux-2.6.10/fs/nfs/direct.c 2005-04-05 14:49:13.448684808 +0800
10083 * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
10084 * 08 Jun 2003 Port to 2.5 APIs --cel
10085 * 31 Mar 2004 Handle direct I/O without VFS support --cel
10086 + * 15 Sep 2004 Parallel async reads --cel
10091 #include <linux/smp_lock.h>
10092 #include <linux/file.h>
10093 #include <linux/pagemap.h>
10094 +#include <linux/kref.h>
10096 #include <linux/nfs_fs.h>
10097 #include <linux/nfs_page.h>
10098 @@ -50,11 +52,27 @@
10100 #include <asm/system.h>
10101 #include <asm/uaccess.h>
10102 +#include <asm/atomic.h>
10104 #define NFSDBG_FACILITY NFSDBG_VFS
10105 -#define VERF_SIZE (2 * sizeof(__u32))
10106 #define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
10108 +static kmem_cache_t *nfs_direct_cachep;
10111 + * This represents a set of asynchronous requests that we're waiting on
10113 +struct nfs_direct_req {
10114 + struct kref kref; /* release manager */
10115 + struct list_head list; /* nfs_read_data structs */
10116 + wait_queue_head_t wait; /* wait for i/o completion */
10117 + struct page ** pages; /* pages in our buffer */
10118 + unsigned int npages; /* count of pages */
10119 + atomic_t complete, /* i/os we're waiting for */
10120 + count, /* bytes actually processed */
10121 + error; /* any reported error */
10126 * nfs_get_user_pages - find and set up pages underlying user's buffer
10128 unsigned long page_count;
10131 - /* set an arbitrary limit to prevent arithmetic overflow */
10132 + /* set an arbitrary limit to prevent type overflow */
10133 + /* XXX: this can probably be as large as INT_MAX */
10134 if (size > MAX_DIRECTIO_SIZE) {
10139 * nfs_free_user_pages - tear down page struct array
10140 * @pages: array of page struct pointers underlying target buffer
10141 + * @npages: number of pages in the array
10142 + * @do_dirty: dirty the pages as we release them
10145 nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
10146 @@ -109,77 +130,231 @@
10150 - * nfs_direct_read_seg - Read in one iov segment. Generate separate
10151 - * read RPCs for each "rsize" bytes.
10152 + * nfs_direct_req_release - release nfs_direct_req structure for direct read
10153 + * @kref: kref object embedded in an nfs_direct_req structure
10156 +static void nfs_direct_req_release(struct kref *kref)
10158 + struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
10159 + kmem_cache_free(nfs_direct_cachep, dreq);
10163 + * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
10164 + * @count: count of bytes for the read request
10165 + * @rsize: local rsize setting
10167 + * Note we also set the number of requests we have in the dreq when we are
10168 + * done. This prevents races with I/O completion so we will always wait
10169 + * until all requests have been dispatched and completed.
10171 +static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
10173 + struct list_head *list;
10174 + struct nfs_direct_req *dreq;
10175 + unsigned int reads = 0;
10177 + dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
10181 + kref_init(&dreq->kref);
10182 + init_waitqueue_head(&dreq->wait);
10183 + INIT_LIST_HEAD(&dreq->list);
10184 + atomic_set(&dreq->count, 0);
10185 + atomic_set(&dreq->error, 0);
10187 + list = &dreq->list;
10189 + struct nfs_read_data *data = nfs_readdata_alloc();
10191 + if (unlikely(!data)) {
10192 + while (!list_empty(list)) {
10193 + data = list_entry(list->next,
10194 + struct nfs_read_data, pages);
10195 + list_del(&data->pages);
10196 + nfs_readdata_free(data);
10198 + kref_put(&dreq->kref, nfs_direct_req_release);
10202 + INIT_LIST_HEAD(&data->pages);
10203 + list_add(&data->pages, list);
10205 + data->req = (struct nfs_page *) dreq;
10207 + if (nbytes <= rsize)
10211 + kref_get(&dreq->kref);
10212 + atomic_set(&dreq->complete, reads);
10217 + * nfs_direct_read_result - handle a read reply for a direct read request
10218 + * @data: address of NFS READ operation control block
10219 + * @status: status of this NFS READ operation
10221 + * We must hold a reference to all the pages in this direct read request
10222 + * until the RPCs complete. This could be long *after* we are woken up in
10223 + * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
10225 +static void nfs_direct_read_result(struct nfs_read_data *data, int status)
10227 + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
10229 + if (likely(status >= 0))
10230 + atomic_add(data->res.count, &dreq->count);
10232 + atomic_set(&dreq->error, status);
10234 + if (unlikely(atomic_dec_and_test(&dreq->complete))) {
10235 + nfs_free_user_pages(dreq->pages, dreq->npages, 1);
10236 + wake_up(&dreq->wait);
10237 + kref_put(&dreq->kref, nfs_direct_req_release);
10242 + * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
10243 + * @dreq: address of nfs_direct_req struct for this request
10244 * @inode: target inode
10245 * @ctx: target file open context
10246 - * user_addr: starting address of this segment of user's buffer
10247 - * count: size of this segment
10248 - * file_offset: offset in file to begin the operation
10249 - * @pages: array of addresses of page structs defining user's buffer
10250 - * nr_pages: size of pages array
10251 + * @user_addr: starting address of this segment of user's buffer
10252 + * @count: size of this segment
10253 + * @file_offset: offset in file to begin the operation
10255 + * For each nfs_read_data struct that was allocated on the list, dispatch
10256 + * an NFS READ operation
10259 -nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
10260 - unsigned long user_addr, size_t count, loff_t file_offset,
10261 - struct page **pages, int nr_pages)
10263 - const unsigned int rsize = NFS_SERVER(inode)->rsize;
10264 - int tot_bytes = 0;
10266 - struct nfs_read_data rdata = {
10268 - .cred = ctx->cred,
10270 - .fh = NFS_FH(inode),
10274 - .fattr = &rdata.fattr,
10277 +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
10278 + struct inode *inode, struct nfs_open_context *ctx,
10279 + unsigned long user_addr, size_t count, loff_t file_offset)
10281 + struct list_head *list = &dreq->list;
10282 + struct page **pages = dreq->pages;
10283 + unsigned int curpage, pgbase;
10284 + unsigned int rsize = NFS_SERVER(inode)->rsize;
10286 - rdata.args.pgbase = user_addr & ~PAGE_MASK;
10287 - rdata.args.offset = file_offset;
10291 - rdata.args.count = count;
10292 - if (rdata.args.count > rsize)
10293 - rdata.args.count = rsize;
10294 - rdata.args.pages = &pages[curpage];
10296 - dprintk("NFS: direct read: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
10297 - rdata.args.count, (long long) rdata.args.offset,
10298 - user_addr + tot_bytes, rdata.args.pgbase, curpage);
10300 + pgbase = user_addr & ~PAGE_MASK;
10302 + struct nfs_read_data *data;
10303 + unsigned int bytes;
10306 + if (count < rsize)
10309 + data = list_entry(list->next, struct nfs_read_data, pages);
10310 + list_del_init(&data->pages);
10312 + data->inode = inode;
10313 + data->cred = ctx->cred;
10314 + data->args.fh = NFS_FH(inode);
10315 + data->args.context = ctx;
10316 + data->args.offset = file_offset;
10317 + data->args.pgbase = pgbase;
10318 + data->args.pages = &pages[curpage];
10319 + data->args.count = bytes;
10320 + data->res.fattr = &data->fattr;
10321 + data->res.eof = 0;
10322 + data->res.count = bytes;
10324 + NFS_PROTO(inode)->read_setup(data);
10326 + data->task.tk_cookie = (unsigned long) inode;
10327 + data->task.tk_calldata = data;
10328 + data->task.tk_release = nfs_readdata_release;
10329 + data->complete = nfs_direct_read_result;
10332 - result = NFS_PROTO(inode)->read(&rdata);
10333 + rpc_execute(&data->task);
10336 - if (result <= 0) {
10337 - if (tot_bytes > 0)
10339 - if (result == -EISDIR)
10340 - result = -EINVAL;
10343 + dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
10344 + data->task.tk_pid,
10345 + inode->i_sb->s_id,
10346 + (long long)NFS_FILEID(inode),
10348 + (unsigned long long)data->args.offset);
10350 + file_offset += bytes;
10352 + curpage += pgbase >> PAGE_SHIFT;
10353 + pgbase &= ~PAGE_MASK;
10355 - tot_bytes += result;
10356 - if (rdata.res.eof)
10359 - rdata.args.offset += result;
10360 - rdata.args.pgbase += result;
10361 - curpage += rdata.args.pgbase >> PAGE_SHIFT;
10362 - rdata.args.pgbase &= ~PAGE_MASK;
10365 } while (count != 0);
10368 - /* XXX: should we zero the rest of the user's buffer if we
10371 + * nfs_direct_read_wait - wait for I/O completion for direct reads
10372 + * @dreq: request on which we are to wait
10373 + * @intr: whether or not this wait can be interrupted
10375 + * Collects and returns the final error value/byte-count.
10377 +static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
10381 - return tot_bytes;
10383 + result = wait_event_interruptible(dreq->wait,
10384 + (atomic_read(&dreq->complete) == 0));
10386 + wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
10390 + result = atomic_read(&dreq->error);
10392 + result = atomic_read(&dreq->count);
10394 + kref_put(&dreq->kref, nfs_direct_req_release);
10395 + return (ssize_t) result;
10399 + * nfs_direct_read_seg - Read in one iov segment. Generate separate
10400 + * read RPCs for each "rsize" bytes.
10401 + * @inode: target inode
10402 + * @ctx: target file open context
10403 + * @user_addr: starting address of this segment of user's buffer
10404 + * @count: size of this segment
10405 + * @file_offset: offset in file to begin the operation
10406 + * @pages: array of addresses of page structs defining user's buffer
10407 + * @nr_pages: number of pages in the array
10410 +static ssize_t nfs_direct_read_seg(struct inode *inode,
10411 + struct nfs_open_context *ctx, unsigned long user_addr,
10412 + size_t count, loff_t file_offset, struct page **pages,
10413 + unsigned int nr_pages)
10417 + struct rpc_clnt *clnt = NFS_CLIENT(inode);
10418 + struct nfs_direct_req *dreq;
10420 + dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
10424 + dreq->pages = pages;
10425 + dreq->npages = nr_pages;
10427 + rpc_clnt_sigmask(clnt, &oldset);
10428 + nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
10430 + result = nfs_direct_read_wait(dreq, clnt->cl_intr);
10431 + rpc_clnt_sigunmask(clnt, &oldset);
10437 @@ -191,9 +366,8 @@
10438 * file_offset: offset in file to begin the operation
10439 * nr_segs: size of iovec array
10441 - * generic_file_direct_IO has already pushed out any non-direct
10442 - * writes so that this read will see them when we read from the
10444 + * We've already pushed out any non-direct writes so that this read
10445 + * will see them when we read from the server.
10448 nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
10449 @@ -222,8 +396,6 @@
10450 result = nfs_direct_read_seg(inode, ctx, user_addr, size,
10451 file_offset, pages, page_count);
10453 - nfs_free_user_pages(pages, page_count, 1);
10458 @@ -249,31 +421,31 @@
10459 * @pages: array of addresses of page structs defining user's buffer
10460 * nr_pages: size of pages array
10463 -nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
10464 - unsigned long user_addr, size_t count, loff_t file_offset,
10465 - struct page **pages, int nr_pages)
10466 +static ssize_t nfs_direct_write_seg(struct inode *inode,
10467 + struct nfs_open_context *ctx, unsigned long user_addr,
10468 + size_t count, loff_t file_offset, struct page **pages,
10471 const unsigned int wsize = NFS_SERVER(inode)->wsize;
10473 - int curpage, need_commit, result, tot_bytes;
10474 + int curpage, need_commit;
10475 + ssize_t result, tot_bytes;
10476 struct nfs_writeverf first_verf;
10477 - struct nfs_write_data wdata = {
10479 - .cred = ctx->cred,
10481 - .fh = NFS_FH(inode),
10485 - .fattr = &wdata.fattr,
10486 - .verf = &wdata.verf,
10489 + struct nfs_write_data *wdata;
10491 - wdata.args.stable = NFS_UNSTABLE;
10492 + wdata = nfs_writedata_alloc();
10496 + wdata->inode = inode;
10497 + wdata->cred = ctx->cred;
10498 + wdata->args.fh = NFS_FH(inode);
10499 + wdata->args.context = ctx;
10500 + wdata->args.stable = NFS_UNSTABLE;
10501 if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
10502 - wdata.args.stable = NFS_FILE_SYNC;
10503 + wdata->args.stable = NFS_FILE_SYNC;
10504 + wdata->res.fattr = &wdata->fattr;
10505 + wdata->res.verf = &wdata->verf;
10507 nfs_begin_data_update(inode);
10509 @@ -281,20 +453,20 @@
10513 - wdata.args.pgbase = user_addr & ~PAGE_MASK;
10514 - wdata.args.offset = file_offset;
10516 - wdata.args.count = request;
10517 - if (wdata.args.count > wsize)
10518 - wdata.args.count = wsize;
10519 - wdata.args.pages = &pages[curpage];
10520 + wdata->args.pgbase = user_addr & ~PAGE_MASK;
10521 + wdata->args.offset = file_offset;
10523 + wdata->args.count = request;
10524 + if (wdata->args.count > wsize)
10525 + wdata->args.count = wsize;
10526 + wdata->args.pages = &pages[curpage];
10528 dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
10529 - wdata.args.count, (long long) wdata.args.offset,
10530 - user_addr + tot_bytes, wdata.args.pgbase, curpage);
10531 + wdata->args.count, (long long) wdata->args.offset,
10532 + user_addr + tot_bytes, wdata->args.pgbase, curpage);
10535 - result = NFS_PROTO(inode)->write(&wdata);
10536 + result = NFS_PROTO(inode)->write(wdata);
10540 @@ -304,20 +476,25 @@
10543 if (tot_bytes == 0)
10544 - memcpy(&first_verf.verifier, &wdata.verf.verifier,
10546 - if (wdata.verf.committed != NFS_FILE_SYNC) {
10547 + memcpy(&first_verf.verifier, &wdata->verf.verifier,
10548 + sizeof(first_verf.verifier));
10549 + if (wdata->verf.committed != NFS_FILE_SYNC) {
10551 - if (memcmp(&first_verf.verifier,
10552 - &wdata.verf.verifier, VERF_SIZE))
10553 + if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
10554 + sizeof(first_verf.verifier)));
10558 - tot_bytes += result;
10559 - wdata.args.offset += result;
10560 - wdata.args.pgbase += result;
10561 - curpage += wdata.args.pgbase >> PAGE_SHIFT;
10562 - wdata.args.pgbase &= ~PAGE_MASK;
10563 + tot_bytes += result;
10565 + /* in case of a short write: stop now, let the app recover */
10566 + if (result < wdata->args.count)
10569 + wdata->args.offset += result;
10570 + wdata->args.pgbase += result;
10571 + curpage += wdata->args.pgbase >> PAGE_SHIFT;
10572 + wdata->args.pgbase &= ~PAGE_MASK;
10574 } while (request != 0);
10576 @@ -325,27 +502,27 @@
10577 * Commit data written so far, even in the event of an error
10580 - wdata.args.count = tot_bytes;
10581 - wdata.args.offset = file_offset;
10582 + wdata->args.count = tot_bytes;
10583 + wdata->args.offset = file_offset;
10586 - result = NFS_PROTO(inode)->commit(&wdata);
10587 + result = NFS_PROTO(inode)->commit(wdata);
10590 if (result < 0 || memcmp(&first_verf.verifier,
10591 - &wdata.verf.verifier,
10593 + &wdata->verf.verifier,
10594 + sizeof(first_verf.verifier)) != 0)
10597 result = tot_bytes;
10600 nfs_end_data_update_defer(inode);
10602 + nfs_writedata_free(wdata);
10606 - wdata.args.stable = NFS_FILE_SYNC;
10607 + wdata->args.stable = NFS_FILE_SYNC;
10611 @@ -362,9 +539,9 @@
10612 * that non-direct readers might access, so they will pick up these
10613 * writes immediately.
10615 -static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx,
10616 - const struct iovec *iov, loff_t file_offset,
10617 - unsigned long nr_segs)
10618 +static ssize_t nfs_direct_write(struct inode *inode,
10619 + struct nfs_open_context *ctx, const struct iovec *iov,
10620 + loff_t file_offset, unsigned long nr_segs)
10622 ssize_t tot_bytes = 0;
10623 unsigned long seg = 0;
10624 @@ -504,6 +681,8 @@
10625 if (mapping->nrpages) {
10626 retval = filemap_fdatawrite(mapping);
10628 + retval = nfs_wb_all(inode);
10630 retval = filemap_fdatawait(mapping);
10633 @@ -593,6 +772,8 @@
10634 if (mapping->nrpages) {
10635 retval = filemap_fdatawrite(mapping);
10637 + retval = nfs_wb_all(inode);
10639 retval = filemap_fdatawait(mapping);
10642 @@ -607,3 +788,21 @@
10647 +int nfs_init_directcache(void)
10649 + nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
10650 + sizeof(struct nfs_direct_req),
10651 + 0, SLAB_RECLAIM_ACCOUNT,
10653 + if (nfs_direct_cachep == NULL)
10659 +void nfs_destroy_directcache(void)
10661 + if (kmem_cache_destroy(nfs_direct_cachep))
10662 + printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
10664 Index: linux-2.6.10/fs/nfs/read.c
10665 ===================================================================
10666 --- linux-2.6.10.orig/fs/nfs/read.c 2004-12-25 05:33:47.000000000 +0800
10667 +++ linux-2.6.10/fs/nfs/read.c 2005-04-05 14:49:13.437686480 +0800
10669 #include <linux/mm.h>
10670 #include <linux/slab.h>
10671 #include <linux/pagemap.h>
10672 -#include <linux/mempool.h>
10673 #include <linux/sunrpc/clnt.h>
10674 #include <linux/nfs_fs.h>
10675 #include <linux/nfs_page.h>
10676 @@ -39,25 +38,11 @@
10677 static void nfs_readpage_result_full(struct nfs_read_data *, int);
10679 static kmem_cache_t *nfs_rdata_cachep;
10680 -static mempool_t *nfs_rdata_mempool;
10681 +mempool_t *nfs_rdata_mempool;
10683 #define MIN_POOL_READ (32)
10685 -static struct nfs_read_data *nfs_readdata_alloc(void)
10687 - struct nfs_read_data *p;
10688 - p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
10690 - memset(p, 0, sizeof(*p));
10694 -static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
10696 - mempool_free(p, nfs_rdata_mempool);
10699 -static void nfs_readdata_release(struct rpc_task *task)
10700 +void nfs_readdata_release(struct rpc_task *task)
10702 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
10703 nfs_readdata_free(data);