The complete set of citi nfsv4 patches combined into one patch. Changes since 2.6.10-rc3-CITI_NFS4_ALL-3 * minor adjustments to xdr buffer length calculations in fs/nfs4xdr.c * client acl revisions: pass acls in page array of xdr bufs, removing arbitrary length restrictions. Temporarily disable acl caching. Index: linux-2.6.10/include/linux/nfsd/state.h =================================================================== --- linux-2.6.10.orig/include/linux/nfsd/state.h 2004-12-25 05:33:50.000000000 +0800 +++ linux-2.6.10/include/linux/nfsd/state.h 2005-04-05 14:49:13.465682224 +0800 @@ -67,6 +67,45 @@ #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) +/* Delegation recall states */ +#define NFS4_NO_RECALL 0x000 +#define NFS4_RECALL_IN_PROGRESS 0x001 +#define NFS4_RECALL_COMPLETE 0x002 + + +/* Delegation flags */ +#define NFS4_DELAY_CLOSE 0x001 + +struct nfs4_cb_recall { + u32 cbr_ident; + int cbr_trunc; + stateid_t cbr_stateid; + u32 cbr_fhlen; + u32 cbr_fhval[NFS4_FHSIZE]; + struct nfs4_delegation *cbr_dp; +}; + +struct nfs4_delegation { + struct list_head dl_del_perfile; /* nfs4_file->fi_del_perfile */ + struct list_head dl_del_perclnt; /* nfs4_client->cl_del_perclnt*/ + struct list_head dl_recall_lru; /* delegation recalled */ + atomic_t dl_recall_cnt; /* resend cb_recall only once */ + atomic_t dl_count; /* ref count */ + atomic_t dl_state; /* recall state */ + struct nfs4_client *dl_client; + struct nfs4_file *dl_file; + struct file_lock *dl_flock; + struct nfs4_stateid *dl_stp; + u32 dl_flags; + u32 dl_type; + time_t dl_time; + struct nfs4_cb_recall dl_recall; +}; + +#define dl_stateid dl_recall.cbr_stateid +#define dl_fhlen dl_recall.cbr_fhlen +#define dl_fhval dl_recall.cbr_fhval + /* client delegation callback info */ struct nfs4_callback { /* SETCLIENTID info */ @@ -75,9 +114,8 @@ unsigned short cb_port; u32 cb_prog; u32 cb_ident; - struct xdr_netobj cb_netid; /* RPC client info */ - u32 cb_set; /* successful CB_NULL call */ + atomic_t cb_set; /* successful CB_NULL call */ struct rpc_program cb_program; struct rpc_stat cb_stat; struct rpc_clnt * cb_client; @@ -97,6 +135,7 @@ struct list_head cl_idhash; /* hash by cl_clientid.id */ struct list_head cl_strhash; /* hash by cl_name */ struct list_head cl_perclient; /* list: stateowners */ + struct list_head cl_del_perclnt; /* list: delegations */ struct list_head cl_lru; /* tail queue */ struct xdr_netobj cl_name; /* id generated by client */ nfs4_verifier cl_verifier; /* generated by client */ @@ -106,7 +145,8 @@ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ struct nfs4_callback cl_callback; /* callback info */ - time_t cl_first_state; /* first state aquisition*/ + atomic_t cl_count; /* ref count */ + u32 cl_firststate; /* recovery file creation */ }; /* struct nfs4_client_reset @@ -117,8 +157,6 @@ struct nfs4_client_reclaim { struct list_head cr_strhash; /* hash by cr_name */ struct xdr_netobj cr_name; /* id generated by client */ - time_t cr_first_state; /* first state aquisition */ - u32 cr_expired; /* boolean: lease expired? */ }; static inline void @@ -194,6 +232,7 @@ struct nfs4_file { struct list_head fi_hash; /* hash by "struct inode *" */ struct list_head fi_perfile; /* list: nfs4_stateid */ + struct list_head fi_del_perfile; /* list: nfs4_delegation */ struct inode *fi_inode; u32 fi_id; /* used with stateowner->so_id * for stateid_hashtbl hash */ @@ -231,8 +270,10 @@ #define CONFIRM 0x00000002 #define OPEN_STATE 0x00000004 #define LOCK_STATE 0x00000008 -#define RDWR_STATE 0x00000010 -#define CLOSE_STATE 0x00000020 +#define RD_STATE 0x00000010 +#define WR_STATE 0x00000020 +#define CLOSE_STATE 0x00000040 +#define DELEG_RET 0x00000080 #define seqid_mutating_err(err) \ (((err) != nfserr_stale_clientid) && \ @@ -243,14 +284,24 @@ extern time_t nfs4_laundromat(void); extern int nfsd4_renew(clientid_t *clid); extern int nfs4_preprocess_stateid_op(struct svc_fh *current_fh, - stateid_t *stateid, int flags, struct nfs4_stateid **stpp); + stateid_t *stateid, int flags, struct file **filp); extern int nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type); extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); extern int nfs4_in_grace(void); extern int nfs4_check_open_reclaim(clientid_t *clid); +extern void put_nfs4_client(struct nfs4_client *clp); extern void nfs4_free_stateowner(struct kref *kref); +extern void nfsd4_probe_callback(struct nfs4_client *clp); +extern int nfsd4_cb_recall(struct nfs4_delegation *dp); +extern int nfsd4_create_clid_file(struct nfs4_client *clp); +extern void nfsd4_remove_clid_file(struct nfs4_client *clp); +extern int nfsd4_list_rec_dir(int clear); +extern void nfsd4_init_rec_dir(char *rec_dirname); +extern void nfsd4_shutdown_rec_dir(void); +extern int nfs4_client_to_reclaim(char *name, int namlen); + static inline void nfs4_put_stateowner(struct nfs4_stateowner *so) Index: linux-2.6.10/include/linux/nfsd/nfsd.h =================================================================== --- linux-2.6.10.orig/include/linux/nfsd/nfsd.h 2004-12-25 05:35:39.000000000 +0800 +++ linux-2.6.10/include/linux/nfsd/nfsd.h 2005-04-05 14:49:13.464682376 +0800 @@ -98,8 +98,12 @@ void nfsd_close(struct file *); int nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *,int, unsigned long *); +int nfsd_vfs_read(struct svc_rqst *, struct svc_fh *, struct file *, + loff_t, struct kvec *, int, unsigned long *); int nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *,int, unsigned long, int *); +int nfsd_vfs_write(struct svc_rqst *, struct svc_fh *,struct file *, + loff_t, struct kvec *,int, unsigned long, int *); int nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); int nfsd_symlink(struct svc_rqst *, struct svc_fh *, Index: linux-2.6.10/include/linux/nfsd/xdr4.h =================================================================== --- linux-2.6.10.orig/include/linux/nfsd/xdr4.h 2004-12-25 05:34:01.000000000 +0800 +++ linux-2.6.10/include/linux/nfsd/xdr4.h 2005-04-05 14:49:13.466682072 +0800 @@ -44,16 +44,6 @@ #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) -typedef u32 delegation_zero_t; -typedef u32 delegation_boot_t; -typedef u64 delegation_id_t; - -typedef struct { - delegation_zero_t ds_zero; - delegation_boot_t ds_boot; - delegation_id_t ds_id; -} delegation_stateid_t; - struct nfsd4_change_info { u32 atomic; u32 before_ctime_sec; @@ -104,6 +94,10 @@ #define cr_specdata1 u.dev.specdata1 #define cr_specdata2 u.dev.specdata2 +struct nfsd4_delegreturn { + stateid_t dr_stateid; +}; + struct nfsd4_getattr { u32 ga_bmval[2]; /* request */ struct svc_fh *ga_fhp; /* response */ @@ -202,13 +196,13 @@ u32 op_claim_type; /* request */ struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ u32 op_delegate_type; /* request - CLAIM_PREV only */ - delegation_stateid_t op_delegate_stateid; /* request - CLAIM_DELEGATE_CUR only */ + stateid_t op_delegate_stateid; /* request - response */ u32 op_create; /* request */ u32 op_createmode; /* request */ u32 op_bmval[2]; /* request */ union { /* request */ - struct iattr iattr; /* UNCHECKED4,GUARDED4 */ - nfs4_verifier verf; /* EXCLUSIVE4 */ + struct iattr iattr; /* UNCHECKED4,GUARDED4 */ + nfs4_verifier verf; /* EXCLUSIVE4 */ } u; clientid_t op_clientid; /* request */ struct xdr_netobj op_owner; /* request */ @@ -247,6 +241,7 @@ u32 rd_length; /* request */ struct kvec rd_iov[RPCSVC_MAXPAGES]; int rd_vlen; + struct file *rd_filp; struct svc_rqst *rd_rqstp; /* response */ struct svc_fh * rd_fhp; /* response */ @@ -345,6 +340,7 @@ struct nfsd4_close close; struct nfsd4_commit commit; struct nfsd4_create create; + struct nfsd4_delegreturn delegreturn; struct nfsd4_getattr getattr; struct svc_fh * getfh; struct nfsd4_link link; @@ -456,6 +452,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner); extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *); +extern int nfsd4_delegreturn(struct svc_rqst *rqstp, + struct svc_fh *current_fh, struct nfsd4_delegreturn *dr); #endif /* Index: linux-2.6.10/include/linux/fs.h =================================================================== --- linux-2.6.10.orig/include/linux/fs.h 2005-03-31 15:35:26.000000000 +0800 +++ linux-2.6.10/include/linux/fs.h 2005-04-05 14:49:13.461682832 +0800 @@ -1185,11 +1185,6 @@ extern int vfs_statfs(struct super_block *, struct kstatfs *); -/* Return value for VFS lock functions - tells locks.c to lock conventionally - * REALLY kosha for root NFS and nfs_lock - */ -#define LOCK_USE_CLNT 1 - #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 Index: linux-2.6.10/include/linux/dcache.h =================================================================== --- linux-2.6.10.orig/include/linux/dcache.h 2005-03-31 15:35:26.000000000 +0800 +++ linux-2.6.10/include/linux/dcache.h 2005-04-05 14:49:13.460682984 +0800 @@ -200,6 +200,7 @@ * These are the low-level FS interfaces to the dcache.. */ extern void d_instantiate(struct dentry *, struct inode *); +extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern void d_delete(struct dentry *); /* allocate/de-allocate */ @@ -244,6 +245,23 @@ d_rehash(entry); } +/** + * d_add_unique - add dentry to hash queues without aliasing + * @entry: dentry to add + * @inode: The inode to attach to this dentry + * + * This adds the entry to the hash queues and initializes @inode. + * The entry was actually filled in earlier during d_alloc(). + */ +static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode) +{ + struct dentry *res; + + res = d_instantiate_unique(entry, inode); + d_rehash(res != NULL ? res : entry); + return res; +} + /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); Index: linux-2.6.10/include/linux/nfs_fs.h =================================================================== --- linux-2.6.10.orig/include/linux/nfs_fs.h 2004-12-25 05:34:31.000000000 +0800 +++ linux-2.6.10/include/linux/nfs_fs.h 2005-04-05 14:49:13.463682528 +0800 @@ -30,6 +30,7 @@ #include #include #include +#include /* * Enable debugging support for nfs client. @@ -201,6 +202,7 @@ #define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */ #define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */ #define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ +#define NFS_INO_INVALID_ACCESS 0x0040 /* cached access cred invalid */ static inline struct nfs_inode *NFS_I(struct inode *inode) { @@ -239,7 +241,7 @@ static inline void NFS_CACHEINV(struct inode *inode) { if (!nfs_caches_unstable(inode)) - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR; + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; } static inline int nfs_server_capable(struct inode *inode, int cap) @@ -424,6 +426,44 @@ return nfs_wb_page_priority(inode, page, 0); } +/* + * Allocate and free nfs_write_data structures + */ +extern mempool_t *nfs_wdata_mempool; +extern mempool_t *nfs_commit_mempool; + +static inline struct nfs_write_data *nfs_writedata_alloc(void) +{ + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + } + return p; +} + +static inline void nfs_writedata_free(struct nfs_write_data *p) +{ + mempool_free(p, nfs_wdata_mempool); +} + +extern void nfs_writedata_release(struct rpc_task *task); + +static inline struct nfs_write_data *nfs_commit_alloc(void) +{ + struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + } + return p; +} + +static inline void nfs_commit_free(struct nfs_write_data *p) +{ + mempool_free(p, nfs_commit_mempool); +} + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -439,6 +479,26 @@ extern void nfs_readpage_result(struct rpc_task *); /* + * Allocate and free nfs_read_data structures + */ +extern mempool_t *nfs_rdata_mempool; + +static inline struct nfs_read_data *nfs_readdata_alloc(void) +{ + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); + if (p) + memset(p, 0, sizeof(*p)); + return p; +} + +static inline void nfs_readdata_free(struct nfs_read_data *p) +{ + mempool_free(p, nfs_rdata_mempool); +} + +extern void nfs_readdata_release(struct rpc_task *task); + +/* * linux/fs/mount_clnt.c * (Used only by nfsroot module) */ @@ -644,6 +704,12 @@ extern struct dentry_operations nfs4_dentry_operations; extern struct inode_operations nfs4_dir_inode_operations; +extern struct inode_operations nfs4_file_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); @@ -651,13 +717,14 @@ extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); -extern int nfs4_do_close(struct inode *, struct nfs4_state *); -extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request); +extern ssize_t nfs4_proc_get_acl(struct inode *, void *buf, ssize_t buflen); +extern int nfs4_proc_set_acl(struct inode *, const void *buf, ssize_t buflen); /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); Index: linux-2.6.10/include/linux/nfs4.h =================================================================== --- linux-2.6.10.orig/include/linux/nfs4.h 2004-12-25 05:34:45.000000000 +0800 +++ linux-2.6.10/include/linux/nfs4.h 2005-04-05 14:49:13.474680856 +0800 @@ -28,7 +28,7 @@ #define NFS4_ACCESS_DELETE 0x0010 #define NFS4_ACCESS_EXECUTE 0x0020 -#define NFS4_FH_PERISTENT 0x0000 +#define NFS4_FH_PERSISTENT 0x0000 #define NFS4_FH_NOEXPIRE_WITH_OPEN 0x0001 #define NFS4_FH_VOLATILE_ANY 0x0002 #define NFS4_FH_VOL_MIGRATION 0x0004 @@ -382,6 +382,8 @@ NFSPROC4_CLNT_READDIR, NFSPROC4_CLNT_SERVER_CAPS, NFSPROC4_CLNT_DELEGRETURN, + NFSPROC4_CLNT_GETACL, + NFSPROC4_CLNT_SETACL, }; #endif Index: linux-2.6.10/include/linux/sunrpc/auth.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/auth.h 2004-12-25 05:34:57.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/auth.h 2005-04-05 14:49:13.468681768 +0800 @@ -51,7 +51,6 @@ }; #define RPCAUTH_CRED_LOCKED 0x0001 #define RPCAUTH_CRED_UPTODATE 0x0002 -#define RPCAUTH_CRED_DEAD 0x0004 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 @@ -133,7 +132,6 @@ int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); -int rpcauth_deadcred(struct rpc_task *); void rpcauth_init_credcache(struct rpc_auth *); void rpcauth_free_credcache(struct rpc_auth *); Index: linux-2.6.10/include/linux/sunrpc/svc.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/svc.h 2004-12-25 05:34:58.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/svc.h 2005-04-05 14:49:13.467681920 +0800 @@ -251,8 +251,7 @@ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ - /* Override authentication. NULL means use default */ - int (*pg_authenticate)(struct svc_rqst *, u32 *); + int (*pg_authenticate)(struct svc_rqst *); }; /* Index: linux-2.6.10/include/linux/sunrpc/cache.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/cache.h 2004-12-25 05:34:57.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/cache.h 2005-04-05 14:49:13.470681464 +0800 @@ -128,20 +128,17 @@ * just like a template in C++, this macro does cache lookup * for us. * The function is passed some sort of HANDLE from which a cache_detail - * structure can be determined (via SETUP, DETAIL), a template + * structure can be determined (via DETAIL), a template * cache entry (type RTN*), and a "set" flag. Using the HASHFN and the * TEST, the function will try to find a matching cache entry in the cache. * If "set" == 0 : * If an entry is found, it is returned * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 and INPLACE == 0 : + * If "set" == 1: * If no entry is found a new one is inserted with data from "template" * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE * If a CACHE_VALID entry is found, a new entry is swapped in with data * from "template" - * If set == 1, and INPLACE == 1 : - * As above, except that if a CACHE_VALID entry is found, we UPDATE in place - * instead of swapping in a new entry. * * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not * run but insteead CACHE_NEGATIVE is set in any new item. @@ -153,25 +150,22 @@ * MEMBER is the member of the cache which is cache_head, which must be first * FNAME is the name for the function * ARGS are arguments to function and must contain RTN *item, int set. May - * also contain something to be usedby SETUP or DETAIL to find cache_detail. - * SETUP locates the cache detail and makes it available as... - * DETAIL identifies the cache detail, possibly set up by SETUP + * also contain something to be used by DETAIL to find cache_detail. + * DETAIL identifies the cache detail * HASHFN returns a hash value of the cache entry "item" * TEST tests if "tmp" matches "item" * INIT copies key information from "item" to "new" * UPDATE copies content information from "item" to "tmp" - * INPLACE is true if updates can happen inplace rather than allocating a new structure * * WARNING: any substantial changes to this must be reflected in * net/sunrpc/svcauth.c(auth_domain_lookup) * which is a similar routine that is open-coded. */ -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE) \ +#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,DETAIL,HASHFN,TEST,INIT,UPDATE) \ RTN *FNAME ARGS \ { \ RTN *tmp, *new=NULL; \ struct cache_head **hp, **head; \ - SETUP; \ head = &(DETAIL)->hash_table[HASHFN]; \ retry: \ if (set||new) write_lock(&(DETAIL)->hash_lock); \ @@ -180,14 +174,14 @@ tmp = container_of(*hp, RTN, MEMBER); \ if (TEST) { /* found a match */ \ \ - if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ + if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ break; \ \ if (new) \ {INIT;} \ cache_get(&tmp->MEMBER); \ if (set) { \ - if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ + if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ { /* need to swap in new */ \ RTN *t2; \ \ @@ -209,7 +203,7 @@ else read_unlock(&(DETAIL)->hash_lock); \ if (set) \ cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \ - if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ + if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \ return tmp; \ } \ @@ -242,10 +236,10 @@ return NULL; \ } -#define DefineSimpleCacheLookup(STRUCT,INPLACE) \ - DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */, \ +#define DefineSimpleCacheLookup(STRUCT) \ + DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), \ & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\ - STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE) + STRUCT##_init(new, item), STRUCT##_update(tmp, item)) #define cache_for_each(pos, detail, index, member) \ for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ Index: linux-2.6.10/include/linux/sunrpc/sched.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/sched.h 2004-12-25 05:35:01.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/sched.h 2005-04-05 14:49:13.472681160 +0800 @@ -11,7 +11,9 @@ #include #include +#include #include +#include #include /* @@ -25,11 +27,18 @@ struct rpc_cred * rpc_cred; /* Credentials */ }; +struct rpc_wait_queue; +struct rpc_wait { + struct list_head list; /* wait queue links */ + struct list_head links; /* Links to related tasks */ + wait_queue_head_t waitq; /* sync: sleep on this q */ + struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */ +}; + /* * This is the RPC task struct */ struct rpc_task { - struct list_head tk_list; /* wait queue links */ #ifdef RPC_DEBUG unsigned long tk_magic; /* 0xf00baa */ #endif @@ -37,7 +46,6 @@ struct rpc_clnt * tk_client; /* RPC client */ struct rpc_rqst * tk_rqstp; /* RPC request */ int tk_status; /* result of last operation */ - struct rpc_wait_queue * tk_rpcwait; /* RPC wait queue we're on */ /* * RPC call state @@ -70,13 +78,18 @@ * you have a pathological interest in kernel oopses. */ struct timer_list tk_timer; /* kernel timer */ - wait_queue_head_t tk_wait; /* sync: sleep on this q */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ unsigned char tk_active : 1;/* Task has been activated */ unsigned char tk_priority : 2;/* Task priority */ unsigned long tk_runstate; /* Task run status */ - struct list_head tk_links; /* links to related tasks */ + struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could + * be any workqueue + */ + union { + struct work_struct tk_work; /* Async task work queue */ + struct rpc_wait tk_wait; /* RPC wait */ + } u; #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ #endif @@ -87,11 +100,11 @@ /* support walking a list of tasks on a wait queue */ #define task_for_each(task, pos, head) \ list_for_each(pos, head) \ - if ((task=list_entry(pos, struct rpc_task, tk_list)),1) + if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1) #define task_for_first(task, head) \ if (!list_empty(head) && \ - ((task=list_entry((head)->next, struct rpc_task, tk_list)),1)) + ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1)) /* .. and walking list of all tasks */ #define alltask_for_each(task, pos, head) \ @@ -126,22 +139,39 @@ #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR) -#define RPC_TASK_SLEEPING 0 -#define RPC_TASK_RUNNING 1 -#define RPC_IS_SLEEPING(t) (test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) -#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) +#define RPC_TASK_RUNNING 0 +#define RPC_TASK_QUEUED 1 +#define RPC_TASK_WAKEUP 2 +#define RPC_TASK_HAS_TIMER 3 +#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) -#define rpc_clear_running(t) (clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) +#define rpc_test_and_set_running(t) \ + (test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) +#define rpc_clear_running(t) \ + do { \ + smp_mb__before_clear_bit(); \ + clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ + smp_mb__after_clear_bit(); \ + } while (0) -#define rpc_set_sleeping(t) (set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) +#define RPC_IS_QUEUED(t) (test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)) +#define rpc_set_queued(t) (set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)) +#define rpc_clear_queued(t) \ + do { \ + smp_mb__before_clear_bit(); \ + clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \ + smp_mb__after_clear_bit(); \ + } while (0) -#define rpc_clear_sleeping(t) \ +#define rpc_start_wakeup(t) \ + (test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0) +#define rpc_finish_wakeup(t) \ do { \ smp_mb__before_clear_bit(); \ - clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \ + clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \ smp_mb__after_clear_bit(); \ - } while(0) + } while (0) /* * Task priorities. @@ -157,6 +187,7 @@ * RPC synchronization objects */ struct rpc_wait_queue { + spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ unsigned long cookie; /* cookie of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ @@ -177,6 +208,7 @@ #ifndef RPC_DEBUG # define RPC_WAITQ_INIT(var,qname) { \ + .lock = SPIN_LOCK_UNLOCKED, \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ @@ -185,6 +217,7 @@ } #else # define RPC_WAITQ_INIT(var,qname) { \ + .lock = SPIN_LOCK_UNLOCKED, \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ @@ -209,13 +242,10 @@ int rpc_execute(struct rpc_task *); void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, rpc_action action); -int rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *); -void rpc_remove_wait_queue(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action, rpc_action timer); -void rpc_add_timer(struct rpc_task *, rpc_action); void rpc_wake_up_task(struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); Index: linux-2.6.10/include/linux/sunrpc/gss_krb5.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/gss_krb5.h 2004-12-25 05:34:57.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/gss_krb5.h 2005-04-05 14:49:13.473681008 +0800 @@ -53,6 +53,8 @@ struct xdr_netobj mech_used; }; +extern spinlock_t krb5_seq_lock; + #define KG_TOK_MIC_MSG 0x0101 #define KG_TOK_WRAP_MSG 0x0201 @@ -116,18 +118,25 @@ s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum); + int body_offset, struct xdr_netobj *cksum); u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, struct xdr_buf *input_message_buffer, - struct xdr_netobj *output_message_buffer, int toktype); + struct xdr_netobj *output_message_buffer); u32 krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, - struct xdr_buf *message_buffer, - int *qop_state, int toktype); + struct xdr_buf *message_buffer, int *qop_state); + +u32 +gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset, + struct xdr_buf *outbuf, struct page **pages); + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset, + struct xdr_buf *buf, int *out_offset); u32 krb5_encrypt(struct crypto_tfm * key, @@ -137,6 +146,13 @@ krb5_decrypt(struct crypto_tfm * key, void *iv, void *in, void *out, int length); +int +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset, + struct page **pages); + +int +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset); + s32 krb5_make_seq_num(struct crypto_tfm * key, int direction, Index: linux-2.6.10/include/linux/sunrpc/xdr.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/xdr.h 2004-12-25 05:35:40.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/xdr.h 2005-04-05 14:49:13.467681920 +0800 @@ -192,6 +192,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p); extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); +extern void truncate_xdr_buf(struct xdr_buf *xdr, int len); #endif /* __KERNEL__ */ Index: linux-2.6.10/include/linux/sunrpc/gss_api.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/gss_api.h 2004-12-25 05:35:28.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/gss_api.h 2005-04-05 14:49:13.471681312 +0800 @@ -47,6 +47,18 @@ struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); +u32 gss_wrap( + struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); +u32 gss_unwrap( + struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *inbuf, + int *out_offset); u32 gss_delete_sec_context( struct gss_ctx **ctx_id); @@ -93,6 +105,18 @@ struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); + u32 (*gss_wrap)( + struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); + u32 (*gss_unwrap)( + struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *buf, + int *out_offset); void (*gss_delete_sec_context)( void *internal_ctx_id); }; Index: linux-2.6.10/include/linux/sunrpc/svcauth.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/svcauth.h 2004-12-25 05:34:31.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/svcauth.h 2005-04-05 14:49:13.469681616 +0800 @@ -26,21 +26,23 @@ struct svc_rqst; /* forward decl */ /* Authentication is done in the context of a domain. - * For a server, a domain represents a group of clients using + * + * Currently, the nfs server uses the auth_domain to stand + * for the "client" listed in /etc/exports. + * + * More generally, a domain might represent a group of clients using * a common mechanism for authentication and having a common mapping * between local identity (uid) and network identity. All clients * in a domain have similar general access rights. Each domain can * contain multiple principals which will have different specific right * based on normal Discretionary Access Control. * - * For a client, a domain represents a number of servers which all - * use a common authentication mechanism and network identity name space. - * * A domain is created by an authentication flavour module based on name * only. Userspace then fills in detail on demand. * - * The creation of a domain typically implies creation of one or - * more caches for storing domain specific information. + * In the case of auth_unix and auth_null, the auth_domain is also + * associated with entries in another cache representing the mapping + * of ip addresses to the given client. */ struct auth_domain { struct cache_head h; @@ -92,6 +94,7 @@ int (*accept)(struct svc_rqst *rq, u32 *authp); int (*release)(struct svc_rqst *rq); void (*domain_release)(struct auth_domain *); + int (*set_client)(struct svc_rqst *rq); }; #define SVC_GARBAGE 1 @@ -107,6 +110,7 @@ extern int svc_authenticate(struct svc_rqst *rqstp, u32 *authp); extern int svc_authorise(struct svc_rqst *rqstp); +extern int svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); extern void svc_auth_unregister(rpc_authflavor_t flavor); Index: linux-2.6.10/include/linux/sunrpc/xprt.h =================================================================== --- linux-2.6.10.orig/include/linux/sunrpc/xprt.h 2004-12-25 05:35:23.000000000 +0800 +++ linux-2.6.10/include/linux/sunrpc/xprt.h 2005-04-05 14:49:13.471681312 +0800 @@ -95,7 +95,10 @@ int rq_cong; /* has incremented xprt->cong */ int rq_received; /* receive completed */ u32 rq_seqno; /* gss seq no. used on req. */ - + int rq_enc_pages_num; + struct page **rq_enc_pages; /* scratch pages for use by + gss privacy code */ + void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; struct xdr_buf rq_private_buf; /* The receive buffer Index: linux-2.6.10/include/linux/nfs_xdr.h =================================================================== --- linux-2.6.10.orig/include/linux/nfs_xdr.h 2004-12-25 05:35:24.000000000 +0800 +++ linux-2.6.10/include/linux/nfs_xdr.h 2005-04-05 14:49:13.459683136 +0800 @@ -326,6 +326,20 @@ const u32 * bitmask; }; +struct nfs_setaclargs { + struct nfs_fh * fh; + ssize_t acl_len; + unsigned int acl_pgbase; + struct page ** acl_pages; +}; + +struct nfs_getaclargs { + struct nfs_fh * fh; + ssize_t acl_len; + unsigned int acl_pgbase; + struct page ** acl_pages; +}; + struct nfs_setattrres { struct nfs_fattr * fattr; const struct nfs_server * server; @@ -666,6 +680,7 @@ int version; /* Protocol version */ struct dentry_operations *dentry_ops; struct inode_operations *dir_inode_ops; + struct inode_operations *file_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); @@ -681,7 +696,7 @@ int (*read) (struct nfs_read_data *); int (*write) (struct nfs_write_data *); int (*commit) (struct nfs_write_data *); - struct inode * (*create) (struct inode *, struct qstr *, + struct inode * (*create) (struct inode *, struct dentry *, struct iattr *, int); int (*remove) (struct inode *, struct qstr *); int (*unlink_setup) (struct rpc_message *, Index: linux-2.6.10/net/sunrpc/xprt.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/xprt.c 2004-12-25 05:35:14.000000000 +0800 +++ linux-2.6.10/net/sunrpc/xprt.c 2005-04-05 14:49:13.393693168 +0800 @@ -891,7 +891,8 @@ xprt->tcp_flags &= ~XPRT_COPY_XID; xprt->tcp_flags |= XPRT_COPY_DATA; xprt->tcp_copied = 4; - dprintk("RPC: reading reply for XID %08x\n", xprt->tcp_xid); + dprintk("RPC: reading reply for XID %08x\n", + ntohl(xprt->tcp_xid)); tcp_check_recm(xprt); } @@ -911,7 +912,7 @@ if (!req) { xprt->tcp_flags &= ~XPRT_COPY_DATA; dprintk("RPC: XID %08x request not found!\n", - xprt->tcp_xid); + ntohl(xprt->tcp_xid)); spin_unlock(&xprt->sock_lock); return; } @@ -1101,7 +1102,7 @@ goto out; spin_lock_bh(&xprt->sock_lock); - if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) + if (xprt->snd_task) rpc_wake_up_task(xprt->snd_task); spin_unlock_bh(&xprt->sock_lock); out: @@ -1359,8 +1360,9 @@ req->rq_task = task; req->rq_xprt = xprt; req->rq_xid = xprt_alloc_xid(xprt); + req->rq_release_snd_buf = NULL; dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, - req, req->rq_xid); + req, ntohl(req->rq_xid)); } /* @@ -1384,6 +1386,8 @@ mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); spin_unlock_bh(&xprt->sock_lock); task->tk_rqstp = NULL; + if (req->rq_release_snd_buf) + req->rq_release_snd_buf(req); memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %4d release request %p\n", task->tk_pid, req); Index: linux-2.6.10/net/sunrpc/auth.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth.c 2004-12-25 05:34:57.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth.c 2005-04-05 14:49:13.394693016 +0800 @@ -214,8 +214,6 @@ list_for_each_safe(pos, next, &auth->au_credcache[nr]) { struct rpc_cred *entry; entry = list_entry(pos, struct rpc_cred, cr_hash); - if (entry->cr_flags & RPCAUTH_CRED_DEAD) - continue; if (rpcauth_prune_expired(entry, &free)) continue; if (entry->cr_ops->crmatch(acred, entry, taskflags)) { @@ -307,9 +305,6 @@ if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) return; - if ((cred->cr_flags & RPCAUTH_CRED_DEAD) && !list_empty(&cred->cr_hash)) - list_del_init(&cred->cr_hash); - if (list_empty(&cred->cr_hash)) { spin_unlock(&rpc_credcache_lock); rpcauth_crdestroy(cred); @@ -413,10 +408,3 @@ return !(task->tk_msg.rpc_cred) || (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); } - -int -rpcauth_deadcred(struct rpc_task *task) -{ - return !(task->tk_msg.rpc_cred) || - (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_DEAD); -} Index: linux-2.6.10/net/sunrpc/svcauth_unix.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/svcauth_unix.c 2004-12-25 05:35:00.000000000 +0800 +++ linux-2.6.10/net/sunrpc/svcauth_unix.c 2005-04-05 14:49:13.395692864 +0800 @@ -97,7 +97,7 @@ }; static struct cache_head *ip_table[IP_HASHMAX]; -void ip_map_put(struct cache_head *item, struct cache_detail *cd) +static void ip_map_put(struct cache_head *item, struct cache_detail *cd) { struct ip_map *im = container_of(item, struct ip_map,h); if (cache_put(item, cd)) { @@ -258,7 +258,7 @@ .cache_show = ip_map_show, }; -static DefineSimpleCacheLookup(ip_map, 0) +static DefineSimpleCacheLookup(ip_map) int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) @@ -329,14 +329,49 @@ cache_purge(&auth_domain_cache); } +int +svcauth_unix_set_client(struct svc_rqst *rqstp) +{ + struct ip_map key, *ipm; + + rqstp->rq_client = NULL; + if (rqstp->rq_proc == 0) + return SVC_OK; + + strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class); + key.m_addr = rqstp->rq_addr.sin_addr; + + ipm = ip_map_lookup(&key, 0); + + if (ipm == NULL) + return SVC_DENIED; + + switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { + case -EAGAIN: + return SVC_DROP; + case -ENOENT: + return SVC_DENIED; + case 0: + rqstp->rq_client = &ipm->m_client->h; + cache_get(&rqstp->rq_client->h); + ip_map_put(&ipm->h, &ip_map_cache); + return SVC_OK; + default: + BUG(); + } + /* shut up gcc: */ + return -1; +} static int svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; - int rv=0; - struct ip_map key, *ipm; + struct svc_cred *cred = &rqstp->rq_cred; + + cred->cr_group_info = NULL; + rqstp->rq_client = NULL; if (argv->iov_len < 3*4) return SVC_GARBAGE; @@ -353,45 +388,17 @@ } /* Signal that mapping to nobody uid/gid is required */ - rqstp->rq_cred.cr_uid = (uid_t) -1; - rqstp->rq_cred.cr_gid = (gid_t) -1; - rqstp->rq_cred.cr_group_info = groups_alloc(0); - if (rqstp->rq_cred.cr_group_info == NULL) + cred->cr_uid = (uid_t) -1; + cred->cr_gid = (gid_t) -1; + cred->cr_group_info = groups_alloc(0); + if (cred->cr_group_info == NULL) return SVC_DROP; /* kmalloc failure - client must retry */ /* Put NULL verifier */ svc_putu32(resv, RPC_AUTH_NULL); svc_putu32(resv, 0); - strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class); - key.m_addr = rqstp->rq_addr.sin_addr; - - ipm = ip_map_lookup(&key, 0); - - rqstp->rq_client = NULL; - - if (ipm) - switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { - case -EAGAIN: - rv = SVC_DROP; - break; - case -ENOENT: - rv = SVC_OK; /* rq_client is NULL */ - break; - case 0: - rqstp->rq_client = &ipm->m_client->h; - cache_get(&rqstp->rq_client->h); - ip_map_put(&ipm->h, &ip_map_cache); - rv = SVC_OK; - break; - default: BUG(); - } - else rv = SVC_DROP; - - if (rqstp->rq_client == NULL && rqstp->rq_proc != 0) - *authp = rpc_autherr_badcred; - - return rv; + return SVC_OK; } static int @@ -414,6 +421,7 @@ .flavour = RPC_AUTH_NULL, .accept = svcauth_null_accept, .release = svcauth_null_release, + .set_client = svcauth_unix_set_client, }; @@ -425,8 +433,6 @@ struct svc_cred *cred = &rqstp->rq_cred; u32 slen, i; int len = argv->iov_len; - int rv=0; - struct ip_map key, *ipm; cred->cr_group_info = NULL; rqstp->rq_client = NULL; @@ -458,39 +464,11 @@ return SVC_DENIED; } - - strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class); - key.m_addr = rqstp->rq_addr.sin_addr; - - - ipm = ip_map_lookup(&key, 0); - - if (ipm) - switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { - case -EAGAIN: - rv = SVC_DROP; - break; - case -ENOENT: - rv = SVC_OK; /* rq_client is NULL */ - break; - case 0: - rqstp->rq_client = &ipm->m_client->h; - cache_get(&rqstp->rq_client->h); - ip_map_put(&ipm->h, &ip_map_cache); - rv = SVC_OK; - break; - default: BUG(); - } - else rv = SVC_DROP; - - if (rv == SVC_OK && rqstp->rq_client == NULL && rqstp->rq_proc != 0) - goto badcred; - /* Put NULL verifier */ svc_putu32(resv, RPC_AUTH_NULL); svc_putu32(resv, 0); - return rv; + return SVC_OK; badcred: *authp = rpc_autherr_badcred; @@ -520,5 +498,6 @@ .accept = svcauth_unix_accept, .release = svcauth_unix_release, .domain_release = svcauth_unix_domain_release, + .set_client = svcauth_unix_set_client, }; Index: linux-2.6.10/net/sunrpc/clnt.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/clnt.c 2005-03-31 15:35:26.000000000 +0800 +++ linux-2.6.10/net/sunrpc/clnt.c 2005-04-05 14:49:13.410690584 +0800 @@ -636,8 +636,14 @@ rpc_exit(task, -EIO); return; } - if (encode && (status = rpcauth_wrap_req(task, encode, req, p, - task->tk_msg.rpc_argp)) < 0) { + if (encode == NULL) + return; + + status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); + if (status == -EAGAIN) { + printk("XXXJBF: out of memory? Should retry here!!!\n"); + } + if (status < 0) { printk(KERN_WARNING "%s: can't encode arguments: %d\n", clnt->cl_protname, -status); rpc_exit(task, status); @@ -935,7 +941,7 @@ task->tk_action = call_reserve; if (status >= 0 && rpcauth_uptodatecred(task)) return; - if (rpcauth_deadcred(task)) { + if (status == -EACCES) { rpc_exit(task, -EACCES); return; } @@ -993,7 +999,7 @@ goto garbage; if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) { printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n); - } else if (--len < 0) + } else if (--len == 0) switch ((n = ntohl(*p++))) { case RPC_AUTH_REJECTEDCRED: case RPC_AUTH_REJECTEDVERF: Index: linux-2.6.10/net/sunrpc/svcauth.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/svcauth.c 2004-12-25 05:35:23.000000000 +0800 +++ linux-2.6.10/net/sunrpc/svcauth.c 2005-04-05 14:49:13.392693320 +0800 @@ -59,6 +59,11 @@ return aops->accept(rqstp, authp); } +int svc_set_client(struct svc_rqst *rqstp) +{ + return rqstp->rq_authop->set_client(rqstp); +} + /* A request, which was authenticated, has now executed. * Time to finalise the the credentials and verifier * and release and resources Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_unseal.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_unseal.c 2004-12-25 05:35:24.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_unseal.c 2005-04-05 14:49:13.401691952 +0800 @@ -68,20 +68,13 @@ #endif -/* message_buffer is an input if toktype is MIC and an output if it is WRAP: - * If toktype is MIC: read_token is a mic token, and message_buffer is the - * data that the mic was supposedly taken over. - * If toktype is WRAP: read_token is a wrap token, and message_buffer is used - * to return the decrypted data. - */ +/* read_token is a mic token, and message_buffer is the data that the mic was + * supposedly taken over. */ -/* XXX will need to change prototype and/or just split into a separate function - * when we add privacy (because read_token will be in pages too). */ u32 krb5_read_token(struct krb5_ctx *ctx, struct xdr_netobj *read_token, - struct xdr_buf *message_buffer, - int *qop_state, int toktype) + struct xdr_buf *message_buffer, int *qop_state) { int signalg; int sealalg; @@ -100,16 +93,12 @@ read_token->len)) goto out; - if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) + if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || + (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) goto out; /* XXX sanity-check bodysize?? */ - if (toktype == KG_TOK_WRAP_MSG) { - /* XXX gone */ - goto out; - } - /* get the sign and seal algorithms */ signalg = ptr[0] + (ptr[1] << 8); @@ -120,14 +109,7 @@ if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) goto out; - if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || - ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff))) - goto out; - - /* in the current spec, there is only one valid seal algorithm per - key type, so a simple comparison is ok */ - - if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg)) + if (sealalg != 0xffff) goto out; /* there are several mappings of seal algorithms to sign algorithms, @@ -154,7 +136,7 @@ switch (signalg) { case SGN_ALG_DES_MAC_MD5: ret = make_checksum(checksum_type, ptr - 2, 8, - message_buffer, &md5cksum); + message_buffer, 0, &md5cksum); if (ret) goto out; Index: linux-2.6.10/net/sunrpc/auth_gss/gss_mech_switch.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_mech_switch.c 2004-12-25 05:35:01.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_mech_switch.c 2005-04-05 14:49:13.408690888 +0800 @@ -279,6 +279,29 @@ qstate); } +u32 +gss_wrap(struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *buf, + struct page **inpages) +{ + return ctx_id->mech_type->gm_ops + ->gss_wrap(ctx_id, qop, offset, buf, inpages); +} + +u32 +gss_unwrap(struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *buf, + int *out_offset) +{ + return ctx_id->mech_type->gm_ops + ->gss_unwrap(ctx_id, qop, offset, buf, out_offset); +} + + /* gss_delete_sec_context: free all resources associated with context_handle. * Note this differs from the RFC 2744-specified prototype in that we don't * bother returning an output token, since it would never be used anyway. */ Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_wrap.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_wrap.c 2005-04-05 19:01:49.158500672 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_wrap.c 2005-04-05 14:49:13.397692560 +0800 @@ -0,0 +1,337 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +static inline int +gss_krb5_padding(int blocksize, int length) +{ + /* Most of the code is block-size independent but currently we + * use only 8: */ + BUG_ON(blocksize != 8); + return 8 - (length & 7); +} + +static inline void +gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize) +{ + int padding = gss_krb5_padding(blocksize, buf->len - offset); + char *p; + struct kvec *iov; + + if (buf->page_len || buf->tail[0].iov_len) + iov = &buf->tail[0]; + else + iov = &buf->head[0]; + p = iov->iov_base + iov->iov_len; + iov->iov_len += padding; + buf->len += padding; + memset(p, padding, padding); +} + +static inline int +gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) +{ + u8 *ptr; + u8 pad; + int len = buf->len; + + if (len <= buf->head[0].iov_len) { + pad = *(u8 *)(buf->head[0].iov_base + len - 1); + goto out; + } else + len -= buf->head[0].iov_len; + if (len <= buf->page_len) { + int last = (buf->page_base + len - 1) + >>PAGE_CACHE_SHIFT; + int offset = (buf->page_base + len - 1) + & (PAGE_CACHE_SIZE - 1); + ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); + pad = *(ptr + offset); + kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); + goto out; + } else + len -= buf->page_len; + BUG_ON(len > buf->tail[0].iov_len); + pad = *(u8 *)(buf->tail[0].iov_base + len - 1); +out: + if (pad > blocksize) + return -EINVAL; + buf->len -= pad; + return 0; +} + +static inline void +make_confounder(char *p, int blocksize) +{ + /* XXX? Is this OK to do on every packet? */ + get_random_bytes(p, blocksize); +} + +/* Assumptions: the head and tail of inbuf are ours to play with. + * The pages, however, may be real pages in the page cache and we replace + * them with scratch pages from **pages before writing to them. */ +/* XXX: obviously the above should be documentation of wrap interface, + * and shouldn't be in this kerberos-specific file. */ + +/* XXX factor out common code with seal/unseal. */ + +u32 +gss_wrap_kerberos(struct gss_ctx *ctx, u32 qop, int offset, + struct xdr_buf *buf, struct page **pages) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + s32 checksum_type; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + int blocksize = 0, plainlen; + unsigned char *ptr, *krb5_hdr, *msg_start; + s32 now; + int headlen; + struct page **tmp_pages; + u32 seq_send; + + dprintk("RPC: gss_wrap_kerberos\n"); + + now = get_seconds(); + + if (qop != 0) + goto out_err; + + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" + " supported\n", kctx->signalg); + goto out_err; + } + if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { + dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", + kctx->sealalg); + goto out_err; + } + + blocksize = crypto_tfm_alg_blocksize(kctx->enc); + gss_krb5_add_padding(buf, offset, blocksize); + BUG_ON((buf->len - offset) % blocksize); + plainlen = blocksize + buf->len - offset; + + headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - + (buf->len - offset); + + ptr = buf->head[0].iov_base + offset; + /* shift data to make room for header. */ + /* XXX Would be cleverer to encrypt while copying. */ + /* XXX bounds checking, slack, etc. */ + memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset); + buf->head[0].iov_len += headlen; + buf->len += headlen; + BUG_ON((buf->len - offset - headlen) % blocksize); + + g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); + + + *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); + *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); + + /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ + krb5_hdr = ptr - 2; + msg_start = krb5_hdr + 24; + /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); + + *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); + memset(krb5_hdr + 4, 0xff, 4); + *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); + + make_confounder(msg_start, blocksize); + + /* XXXJBF: UGH!: */ + tmp_pages = buf->pages; + buf->pages = pages; + if (make_checksum(checksum_type, krb5_hdr, 8, buf, + offset + headlen - blocksize, &md5cksum)) + goto out_err; + buf->pages = tmp_pages; + + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + goto out_err; + memcpy(krb5_hdr + 16, + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); + + dprintk("RPC: make_seal_token: cksum data: \n"); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); + break; + default: + BUG(); + } + + kfree(md5cksum.data); + + spin_lock(&krb5_seq_lock); + seq_send = kctx->seq_send++; + spin_unlock(&krb5_seq_lock); + + /* XXX would probably be more efficient to compute checksum + * and encrypt at the same time: */ + if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, + seq_send, krb5_hdr + 16, krb5_hdr + 8))) + goto out_err; + + if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, + pages)) + goto out_err; + + return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); +out_err: + if (md5cksum.data) kfree(md5cksum.data); + return GSS_S_FAILURE; +} + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx, u32 *qop, int offset, + struct xdr_buf *buf, int *out_offset) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + int signalg; + int sealalg; + s32 checksum_type; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + s32 now; + int direction; + s32 seqnum; + unsigned char *ptr; + int bodysize; + u32 ret = GSS_S_DEFECTIVE_TOKEN; + u8 *data_start; + int blocksize; + + dprintk("RPC: gss_unwrap_kerberos\n"); + + ptr = (u8 *)buf->head[0].iov_base + offset; + if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, + buf->len - offset)) + goto out; + + if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || + (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) + goto out; + + /* XXX sanity-check bodysize?? */ + + /* get the sign and seal algorithms */ + + signalg = ptr[0] + (ptr[1] << 8); + sealalg = ptr[2] + (ptr[3] << 8); + + /* Sanity checks */ + + if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + goto out; + + if (sealalg == 0xffff) + goto out; + + /* in the current spec, there is only one valid seal algorithm per + key type, so a simple comparison is ok */ + + if (sealalg != kctx->sealalg) + goto out; + + /* there are several mappings of seal algorithms to sign algorithms, + but few enough that we can try them all. */ + + if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || + (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || + (kctx->sealalg == SEAL_ALG_DES3KD && + signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) + goto out; + + if (gss_decrypt_xdr_buf(kctx->enc, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base)) + goto out; + + /* compute the checksum of the message */ + + /* initialize the the cksum */ + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + ret = make_checksum(checksum_type, ptr - 2, 8, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); + if (ret) + goto out; + + ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len); + if (ret) + goto out; + + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { + ret = GSS_S_BAD_SIG; + goto out; + } + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + /* it got through unscathed. Make sure the context is unexpired */ + + if (qop) + *qop = GSS_C_QOP_DEFAULT; + + now = get_seconds(); + + ret = GSS_S_CONTEXT_EXPIRED; + if (now > kctx->endtime) + goto out; + + /* do sequencing checks */ + + ret = GSS_S_BAD_SIG; + if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum))) + goto out; + + if ((kctx->initiate && direction != 0xff) || + (!kctx->initiate && direction != 0)) + goto out; + + /* Copy the data back to the right position. XXX: Would probably be + * better to copy and encrypt at the same time. */ + + blocksize = crypto_tfm_alg_blocksize(kctx->enc); + data_start = ptr + 22 + blocksize; + *out_offset = data_start - (u8 *)buf->head[0].iov_base; + + ret = GSS_S_DEFECTIVE_TOKEN; + if (gss_krb5_remove_padding(buf, blocksize)) + goto out; + + ret = GSS_S_COMPLETE; +out: + if (md5cksum.data) kfree(md5cksum.data); + return ret; +} Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_crypto.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_crypto.c 2004-12-25 05:33:50.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_crypto.c 2005-04-05 14:49:13.398692408 +0800 @@ -139,17 +139,91 @@ sg->length = len; } +static int +process_xdr_buf(struct xdr_buf *buf, int offset, int len, + int (*actor)(struct scatterlist *, void *), void *data) +{ + int i, page_len, thislen, page_offset, ret = 0; + struct scatterlist sg[1]; + + if (offset >= buf->head[0].iov_len) { + offset -= buf->head[0].iov_len; + } else { + thislen = buf->head[0].iov_len - offset; + if (thislen > len) + thislen = len; + buf_to_sg(sg, buf->head[0].iov_base + offset, thislen); + ret = actor(sg, data); + if (ret) + goto out; + offset = 0; + len -= thislen; + } + if (len == 0) + goto out; + + if (offset >= buf->page_len) { + offset -= buf->page_len; + } else { + page_len = buf->page_len - offset; + if (page_len > len) + page_len = len; + len -= page_len; + page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); + i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; + thislen = PAGE_CACHE_SIZE - page_offset; + do { + if (thislen > page_len) + thislen = page_len; + sg->page = buf->pages[i]; + sg->offset = page_offset; + sg->length = thislen; + ret = actor(sg, data); + if (ret) + goto out; + page_len -= thislen; + i++; + page_offset = 0; + thislen = PAGE_CACHE_SIZE; + } while (page_len != 0); + offset = 0; + } + if (len == 0) + goto out; + + if (offset < buf->tail[0].iov_len) { + thislen = buf->tail[0].iov_len - offset; + if (thislen > len) + thislen = len; + buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen); + ret = actor(sg, data); + len -= thislen; + } + if (len != 0) + ret = -EINVAL; +out: + return ret; +} + +static int +checksummer(struct scatterlist *sg, void *data) +{ + struct crypto_tfm *tfm = (struct crypto_tfm *)data; + + crypto_digest_update(tfm, sg, 1); + + return 0; +} + /* checksum the plaintext data and hdrlen bytes of the token header */ s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum) + int body_offset, struct xdr_netobj *cksum) { char *cksumname; struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ struct scatterlist sg[1]; u32 code = GSS_S_FAILURE; - int len, thislen, offset; - int i; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -169,35 +243,8 @@ crypto_digest_init(tfm); buf_to_sg(sg, header, hdrlen); crypto_digest_update(tfm, sg, 1); - if (body->head[0].iov_len) { - buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); - crypto_digest_update(tfm, sg, 1); - } - - len = body->page_len; - if (len != 0) { - offset = body->page_base & (PAGE_CACHE_SIZE - 1); - i = body->page_base >> PAGE_CACHE_SHIFT; - thislen = PAGE_CACHE_SIZE - offset; - do { - if (thislen > len) - thislen = len; - sg->page = body->pages[i]; - sg->offset = offset; - sg->length = thislen; - kmap(sg->page); /* XXX kmap_atomic? */ - crypto_digest_update(tfm, sg, 1); - kunmap(sg->page); - len -= thislen; - i++; - offset = 0; - thislen = PAGE_CACHE_SIZE; - } while(len != 0); - } - if (body->tail[0].iov_len) { - buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); - crypto_digest_update(tfm, sg, 1); - } + process_xdr_buf(body, body_offset, body->len - body_offset, + checksummer, tfm); crypto_digest_final(tfm, cksum->data); code = 0; out: @@ -207,3 +254,154 @@ } EXPORT_SYMBOL(make_checksum); + +struct encryptor_desc { + u8 iv[8]; /* XXX hard-coded blocksize */ + struct crypto_tfm *tfm; + int pos; + struct xdr_buf *outbuf; + struct page **pages; + struct scatterlist infrags[4]; + struct scatterlist outfrags[4]; + int fragno; + int fraglen; +}; + +static int +encryptor(struct scatterlist *sg, void *data) +{ + struct encryptor_desc *desc = data; + struct xdr_buf *outbuf = desc->outbuf; + struct page *in_page; + int thislen = desc->fraglen + sg->length; + int fraglen, ret; + int page_pos; + + /* Worst case is 4 fragments: head, end of page 1, start + * of page 2, tail. Anything more is a bug. */ + BUG_ON(desc->fragno > 3); + desc->infrags[desc->fragno] = *sg; + desc->outfrags[desc->fragno] = *sg; + + page_pos = desc->pos - outbuf->head[0].iov_len; + if (page_pos >= 0 && page_pos < outbuf->page_len) { + /* pages are not in place: */ + int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; + in_page = desc->pages[i]; + } else { + in_page = sg->page; + } + desc->infrags[desc->fragno].page = in_page; + desc->fragno++; + desc->fraglen += sg->length; + desc->pos += sg->length; + + fraglen = thislen & 7; /* XXX hardcoded blocksize */ + thislen -= fraglen; + + if (thislen == 0) + return 0; + + ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, + thislen, desc->iv); + if (ret) + return ret; + if (fraglen) { + desc->outfrags[0].page = sg->page; + desc->outfrags[0].offset = sg->offset + sg->length - fraglen; + desc->outfrags[0].length = fraglen; + desc->infrags[0] = desc->outfrags[0]; + desc->infrags[0].page = in_page; + desc->fragno = 1; + desc->fraglen = fraglen; + } else { + desc->fragno = 0; + desc->fraglen = 0; + } + return 0; +} + +int +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, + struct page **pages) +{ + int ret; + struct encryptor_desc desc; + + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + + memset(desc.iv, 0, sizeof(desc.iv)); + desc.tfm = tfm; + desc.pos = offset; + desc.outbuf = buf; + desc.pages = pages; + desc.fragno = 0; + desc.fraglen = 0; + + ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); + return ret; +} + +EXPORT_SYMBOL(gss_encrypt_xdr_buf); + +struct decryptor_desc { + u8 iv[8]; /* XXX hard-coded blocksize */ + struct crypto_tfm *tfm; + struct scatterlist frags[4]; + int fragno; + int fraglen; +}; + +static int +decryptor(struct scatterlist *sg, void *data) +{ + struct decryptor_desc *desc = data; + int thislen = desc->fraglen + sg->length; + int fraglen, ret; + + /* Worst case is 4 fragments: head, end of page 1, start + * of page 2, tail. Anything more is a bug. */ + BUG_ON(desc->fragno > 3); + desc->frags[desc->fragno] = *sg; + desc->fragno++; + desc->fraglen += sg->length; + + fraglen = thislen & 7; /* XXX hardcoded blocksize */ + thislen -= fraglen; + + if (thislen == 0) + return 0; + + ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, + thislen, desc->iv); + if (ret) + return ret; + if (fraglen) { + desc->frags[0].page = sg->page; + desc->frags[0].offset = sg->offset + sg->length - fraglen; + desc->frags[0].length = fraglen; + desc->fragno = 1; + desc->fraglen = fraglen; + } else { + desc->fragno = 0; + desc->fraglen = 0; + } + return 0; +} + +int +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) +{ + struct decryptor_desc desc; + + /* XXXJBF: */ + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + + memset(desc.iv, 0, sizeof(desc.iv)); + desc.tfm = tfm; + desc.fragno = 0; + desc.fraglen = 0; + return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); +} + +EXPORT_SYMBOL(gss_decrypt_xdr_buf); Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_seal.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_seal.c 2004-12-25 05:33:47.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_seal.c 2005-04-05 14:49:13.402691800 +0800 @@ -70,24 +70,17 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static inline int -gss_krb5_padding(int blocksize, int length) { - /* Most of the code is block-size independent but in practice we - * use only 8: */ - BUG_ON(blocksize != 8); - return 8 - (length & 7); -} +spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED; u32 krb5_make_token(struct krb5_ctx *ctx, int qop_req, - struct xdr_buf *text, struct xdr_netobj *token, - int toktype) + struct xdr_buf *text, struct xdr_netobj *token) { s32 checksum_type; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; - int blocksize = 0, tmsglen; unsigned char *ptr, *krb5_hdr, *msg_start; s32 now; + u32 seq_send; dprintk("RPC: gss_krb5_seal\n"); @@ -111,21 +104,13 @@ goto out_err; } - if (toktype == KG_TOK_WRAP_MSG) { - blocksize = crypto_tfm_alg_blocksize(ctx->enc); - tmsglen = blocksize + text->len - + gss_krb5_padding(blocksize, blocksize + text->len); - } else { - tmsglen = 0; - } - - token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); + token->len = g_token_size(&ctx->mech_used, 22); ptr = token->data; - g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); + g_make_token_header(&ctx->mech_used, 22, &ptr); - *ptr++ = (unsigned char) ((toktype>>8)&0xff); - *ptr++ = (unsigned char) (toktype&0xff); + *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); + *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ krb5_hdr = ptr - 2; @@ -133,17 +118,9 @@ *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - if (toktype == KG_TOK_WRAP_MSG) - *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); - if (toktype == KG_TOK_WRAP_MSG) { - /* XXX removing support for now */ - goto out_err; - } else { /* Sign only. */ - if (make_checksum(checksum_type, krb5_hdr, 8, text, - &md5cksum)) + if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) goto out_err; - } switch (ctx->signalg) { case SGN_ALG_DES_MAC_MD5: @@ -163,12 +140,14 @@ kfree(md5cksum.data); + spin_lock(&krb5_seq_lock); + seq_send = ctx->seq_send++; + spin_unlock(&krb5_seq_lock); + if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) + seq_send, krb5_hdr + 16, krb5_hdr + 8))) goto out_err; - ctx->seq_send++; - return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); out_err: if (md5cksum.data) kfree(md5cksum.data); Index: linux-2.6.10/net/sunrpc/auth_gss/gss_pseudoflavors.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_pseudoflavors.c 2004-12-25 05:34:45.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_pseudoflavors.c 2005-04-05 19:01:49.158500672 +0800 @@ -1,237 +0,0 @@ -/* - * linux/net/sunrpc/gss_union.c - * - * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic code - * - * Copyright (c) 2001 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson - * - */ - -/* - * Copyright 1993 by OpenVision Technologies, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software - * and its documentation for any purpose is hereby granted without fee, - * provided that the above copyright notice appears in all copies and - * that both that copyright notice and this permission notice appear in - * supporting documentation, and that the name of OpenVision not be used - * in advertising or publicity pertaining to distribution of the software - * without specific, written prior permission. OpenVision makes no - * representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied warranty. - * - * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF - * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR - * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -static LIST_HEAD(registered_triples); -static spinlock_t registered_triples_lock = SPIN_LOCK_UNLOCKED; - -/* The following must be called with spinlock held: */ -static struct sup_sec_triple * -do_lookup_triple_by_pseudoflavor(u32 pseudoflavor) -{ - struct sup_sec_triple *pos, *triple = NULL; - - list_for_each_entry(pos, ®istered_triples, triples) { - if (pos->pseudoflavor == pseudoflavor) { - triple = pos; - break; - } - } - return triple; -} - -/* XXX Need to think about reference counting of triples and of mechs. - * Currently we do no reference counting of triples, and I think that's - * probably OK given the reference counting on mechs, but there's probably - * a better way to do all this. */ - -int -gss_register_triple(u32 pseudoflavor, struct gss_api_mech *mech, - u32 qop, u32 service) -{ - struct sup_sec_triple *triple; - - if (!(triple = kmalloc(sizeof(*triple), GFP_KERNEL))) { - printk("Alloc failed in gss_register_triple"); - goto err; - } - triple->pseudoflavor = pseudoflavor; - triple->mech = gss_mech_get_by_OID(&mech->gm_oid); - triple->qop = qop; - triple->service = service; - - spin_lock(®istered_triples_lock); - if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) { - printk(KERN_WARNING "RPC: Registered pseudoflavor %d again\n", - pseudoflavor); - goto err_unlock; - } - list_add(&triple->triples, ®istered_triples); - spin_unlock(®istered_triples_lock); - dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor); - - return 0; - -err_unlock: - kfree(triple); - spin_unlock(®istered_triples_lock); -err: - return -1; -} - -int -gss_unregister_triple(u32 pseudoflavor) -{ - struct sup_sec_triple *triple; - - spin_lock(®istered_triples_lock); - if (!(triple = do_lookup_triple_by_pseudoflavor(pseudoflavor))) { - spin_unlock(®istered_triples_lock); - printk("Can't unregister unregistered pseudoflavor %d\n", - pseudoflavor); - return -1; - } - list_del(&triple->triples); - spin_unlock(®istered_triples_lock); - gss_mech_put(triple->mech); - kfree(triple); - return 0; - -} - -void -print_sec_triple(struct xdr_netobj *oid,u32 qop,u32 service) -{ - dprintk("RPC: print_sec_triple:\n"); - dprintk(" oid_len %d\n oid :\n",oid->len); - print_hexl((u32 *)oid->data,oid->len,0); - dprintk(" qop %d\n",qop); - dprintk(" service %d\n",service); -} - -/* Function: gss_get_cmp_triples - * - * Description: search sec_triples for a matching security triple - * return pseudoflavor if match, else 0 - * (Note that 0 is a valid pseudoflavor, but not for any gss pseudoflavor - * (0 means auth_null), so this shouldn't cause confusion.) - */ -u32 -gss_cmp_triples(u32 oid_len, char *oid_data, u32 qop, u32 service) -{ - struct sup_sec_triple *triple; - u32 pseudoflavor = 0; - struct xdr_netobj oid; - - oid.len = oid_len; - oid.data = oid_data; - - dprintk("RPC: gss_cmp_triples\n"); - print_sec_triple(&oid,qop,service); - - spin_lock(®istered_triples_lock); - list_for_each_entry(triple, ®istered_triples, triples) { - if((g_OID_equal(&oid, &triple->mech->gm_oid)) - && (qop == triple->qop) - && (service == triple->service)) { - pseudoflavor = triple->pseudoflavor; - break; - } - } - spin_unlock(®istered_triples_lock); - dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor); - return pseudoflavor; -} - -u32 -gss_get_pseudoflavor(struct gss_ctx *ctx, u32 qop, u32 service) -{ - return gss_cmp_triples(ctx->mech_type->gm_oid.len, - ctx->mech_type->gm_oid.data, - qop, service); -} - -/* Returns nonzero iff the given pseudoflavor is in the supported list. - * (Note that without incrementing a reference count or anything, this - * doesn't give any guarantees.) */ -int -gss_pseudoflavor_supported(u32 pseudoflavor) -{ - struct sup_sec_triple *triple; - - spin_lock(®istered_triples_lock); - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor); - spin_unlock(®istered_triples_lock); - return (triple ? 1 : 0); -} - -u32 -gss_pseudoflavor_to_service(u32 pseudoflavor) -{ - struct sup_sec_triple *triple; - - spin_lock(®istered_triples_lock); - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor); - spin_unlock(®istered_triples_lock); - if (!triple) { - dprintk("RPC: gss_pseudoflavor_to_service called with unsupported pseudoflavor %d\n", - pseudoflavor); - return 0; - } - return triple->service; -} - -struct gss_api_mech * -gss_pseudoflavor_to_mech(u32 pseudoflavor) { - struct sup_sec_triple *triple; - struct gss_api_mech *mech = NULL; - - spin_lock(®istered_triples_lock); - triple = do_lookup_triple_by_pseudoflavor(pseudoflavor); - spin_unlock(®istered_triples_lock); - if (triple) - mech = gss_mech_get(triple->mech); - else - dprintk("RPC: gss_pseudoflavor_to_mech called with unsupported pseudoflavor %d\n", - pseudoflavor); - return mech; -} - -int -gss_pseudoflavor_to_mechOID(u32 pseudoflavor, struct xdr_netobj * oid) -{ - struct gss_api_mech *mech; - - mech = gss_pseudoflavor_to_mech(pseudoflavor); - if (!mech) { - dprintk("RPC: gss_pseudoflavor_to_mechOID called with unsupported pseudoflavor %d\n", - pseudoflavor); - return -1; - } - oid->len = mech->gm_oid.len; - if (!(oid->data = kmalloc(oid->len, GFP_KERNEL))) - return -1; - memcpy(oid->data, mech->gm_oid.data, oid->len); - gss_mech_put(mech); - return 0; -} Index: linux-2.6.10/net/sunrpc/auth_gss/svcauth_gss.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/svcauth_gss.c 2004-12-25 05:34:44.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/svcauth_gss.c 2005-04-05 14:49:13.407691040 +0800 @@ -37,6 +37,7 @@ * */ +#include #include #include #include @@ -78,7 +79,6 @@ static struct cache_head *rsi_table[RSI_HASHMAX]; static struct cache_detail rsi_cache; -static struct rsi *rsi_lookup(struct rsi *item, int set); static void rsi_free(struct rsi *rsii) { @@ -125,38 +125,6 @@ return dup_to_netobj(dst, src->data, src->len); } -static inline void rsi_init(struct rsi *new, struct rsi *item) -{ - new->out_handle.data = NULL; - new->out_handle.len = 0; - new->out_token.data = NULL; - new->out_token.len = 0; - new->in_handle.len = item->in_handle.len; - item->in_handle.len = 0; - new->in_token.len = item->in_token.len; - item->in_token.len = 0; - new->in_handle.data = item->in_handle.data; - item->in_handle.data = NULL; - new->in_token.data = item->in_token.data; - item->in_token.data = NULL; -} - -static inline void rsi_update(struct rsi *new, struct rsi *item) -{ - BUG_ON(new->out_handle.data || new->out_token.data); - new->out_handle.len = item->out_handle.len; - item->out_handle.len = 0; - new->out_token.len = item->out_token.len; - item->out_token.len = 0; - new->out_handle.data = item->out_handle.data; - item->out_handle.data = NULL; - new->out_token.data = item->out_token.data; - item->out_token.data = NULL; - - new->major_status = item->major_status; - new->minor_status = item->minor_status; -} - static void rsi_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) @@ -168,6 +136,75 @@ (*bpp)[-1] = '\n'; } +static inline int +gssd_reply(struct rsi *item) +{ + struct rsi *tmp; + struct cache_head **hp, **head; + + head = &rsi_cache.hash_table[rsi_hash(item)]; + write_lock(&rsi_cache.hash_lock); + for (hp = head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct rsi, h); + if (rsi_match(tmp, item)) { + cache_get(&tmp->h); + clear_bit(CACHE_HASHED, &tmp->h.flags); + *hp = tmp->h.next; + tmp->h.next = NULL; + rsi_cache.entries--; + if (test_bit(CACHE_VALID, &tmp->h.flags)) { + write_unlock(&rsi_cache.hash_lock); + rsi_put(&tmp->h, &rsi_cache); + return -EINVAL; + } + set_bit(CACHE_HASHED, &item->h.flags); + item->h.next = *hp; + *hp = &item->h; + rsi_cache.entries++; + set_bit(CACHE_VALID, &item->h.flags); + item->h.last_refresh = get_seconds(); + write_unlock(&rsi_cache.hash_lock); + cache_fresh(&rsi_cache, &tmp->h, 0); + rsi_put(&tmp->h, &rsi_cache); + return 0; + } + } + write_unlock(&rsi_cache.hash_lock); + return -EINVAL; +} + +static inline struct rsi * +gssd_upcall(struct rsi *item, struct svc_rqst *rqstp) +{ + struct rsi *tmp; + struct cache_head **hp, **head; + + head = &rsi_cache.hash_table[rsi_hash(item)]; + read_lock(&rsi_cache.hash_lock); + for (hp = head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct rsi, h); + if (rsi_match(tmp, item)) { + if (!test_bit(CACHE_VALID, &tmp->h.flags)) { + read_unlock(&rsi_cache.hash_lock); + return NULL; + } + *hp = tmp->h.next; + tmp->h.next = NULL; + rsi_cache.entries--; + read_unlock(&rsi_cache.hash_lock); + return tmp; + } + } + cache_get(&item->h); + item->h.next = *head; + *head = &item->h; + rsi_cache.entries++; + read_unlock(&rsi_cache.hash_lock); + cache_get(&item->h); + if (cache_check(&rsi_cache, &item->h, &rqstp->rq_chandle)) + return NULL; + return item; +} static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -176,17 +213,22 @@ char *buf = mesg; char *ep; int len; - struct rsi rsii, *rsip = NULL; + struct rsi *rsii; time_t expiry; int status = -EINVAL; - memset(&rsii, 0, sizeof(rsii)); + rsii = kmalloc(sizeof(*rsii), GFP_KERNEL); + if (!rsii) + return -ENOMEM; + memset(rsii, 0, sizeof(*rsii)); + cache_init(&rsii->h); + /* handle */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsii.in_handle, buf, len)) + if (dup_to_netobj(&rsii->in_handle, buf, len)) goto out; /* token */ @@ -195,10 +237,9 @@ if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsii.in_token, buf, len)) + if (dup_to_netobj(&rsii->in_token, buf, len)) goto out; - rsii.h.flags = 0; /* expiry */ expiry = get_expiry(&mesg); status = -EINVAL; @@ -212,13 +253,13 @@ if (len == 0) { goto out; } else { - rsii.major_status = simple_strtoul(buf, &ep, 10); + rsii->major_status = simple_strtoul(buf, &ep, 10); if (*ep) goto out; len = qword_get(&mesg, buf, mlen); if (len <= 0) goto out; - rsii.minor_status = simple_strtoul(buf, &ep, 10); + rsii->minor_status = simple_strtoul(buf, &ep, 10); if (*ep) goto out; @@ -227,7 +268,7 @@ if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsii.out_handle, buf, len)) + if (dup_to_netobj(&rsii->out_handle, buf, len)) goto out; /* out_token */ @@ -236,16 +277,14 @@ if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsii.out_token, buf, len)) + if (dup_to_netobj(&rsii->out_token, buf, len)) goto out; } - rsii.h.expiry_time = expiry; - rsip = rsi_lookup(&rsii, 1); - status = 0; + rsii->h.expiry_time = expiry; + status = gssd_reply(rsii); out: - rsi_free(&rsii); - if (rsip) - rsi_put(&rsip->h, &rsi_cache); + if (rsii) + rsi_put(&rsii->h, &rsi_cache); return status; } @@ -258,8 +297,6 @@ .cache_parse = rsi_parse, }; -static DefineSimpleCacheLookup(rsi, 0) - /* * The rpcsec_context cache is used to store a context that is * used in data exchange. @@ -292,7 +329,6 @@ static struct cache_head *rsc_table[RSC_HASHMAX]; static struct cache_detail rsc_cache; -static struct rsc *rsc_lookup(struct rsc *item, int set); static void rsc_free(struct rsc *rsci) { @@ -325,26 +361,46 @@ return netobj_equal(&new->handle, &tmp->handle); } -static inline void -rsc_init(struct rsc *new, struct rsc *tmp) +static struct rsc *rsc_lookup(struct rsc *item, int set) { - new->handle.len = tmp->handle.len; - tmp->handle.len = 0; - new->handle.data = tmp->handle.data; - tmp->handle.data = NULL; - new->mechctx = NULL; - new->cred.cr_group_info = NULL; -} - -static inline void -rsc_update(struct rsc *new, struct rsc *tmp) -{ - new->mechctx = tmp->mechctx; - tmp->mechctx = NULL; - memset(&new->seqdata, 0, sizeof(new->seqdata)); - spin_lock_init(&new->seqdata.sd_lock); - new->cred = tmp->cred; - tmp->cred.cr_group_info = NULL; + struct rsc *tmp = NULL; + struct cache_head **hp, **head; + head = &rsc_cache.hash_table[rsc_hash(item)]; + + if (set) + write_lock(&rsc_cache.hash_lock); + else + read_lock(&rsc_cache.hash_lock); + for (hp = head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct rsc, h); + if (!rsc_match(tmp, item)) + continue; + cache_get(&tmp->h); + if (!set) + goto out_noset; + *hp = tmp->h.next; + tmp->h.next = NULL; + clear_bit(CACHE_HASHED, &tmp->h.flags); + rsc_put(&tmp->h, &rsc_cache); + goto out_set; + } + /* Didn't find anything */ + if (!set) + goto out_nada; + rsc_cache.entries++; +out_set: + set_bit(CACHE_HASHED, &item->h.flags); + item->h.next = *head; + *head = &item->h; + write_unlock(&rsc_cache.hash_lock); + cache_fresh(&rsc_cache, &item->h, item->h.expiry_time); + cache_get(&item->h); + return item; +out_nada: + tmp = NULL; +out_noset: + read_unlock(&rsc_cache.hash_lock); + return tmp; } static int rsc_parse(struct cache_detail *cd, @@ -353,19 +409,22 @@ /* contexthandle expiry [ uid gid N mechname ...mechdata... ] */ char *buf = mesg; int len, rv; - struct rsc rsci, *rscp = NULL; + struct rsc *rsci, *res = NULL; time_t expiry; int status = -EINVAL; - memset(&rsci, 0, sizeof(rsci)); + rsci = kmalloc(sizeof(*rsci), GFP_KERNEL); + if (!rsci) + return -ENOMEM; + memset(rsci, 0, sizeof(*rsci)); + cache_init(&rsci->h); /* context handle */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsci.handle, buf, len)) + if (dup_to_netobj(&rsci->handle, buf, len)) goto out; - rsci.h.flags = 0; /* expiry */ expiry = get_expiry(&mesg); status = -EINVAL; @@ -373,26 +432,26 @@ goto out; /* uid, or NEGATIVE */ - rv = get_int(&mesg, &rsci.cred.cr_uid); + rv = get_int(&mesg, &rsci->cred.cr_uid); if (rv == -EINVAL) goto out; if (rv == -ENOENT) - set_bit(CACHE_NEGATIVE, &rsci.h.flags); + set_bit(CACHE_NEGATIVE, &rsci->h.flags); else { int N, i; struct gss_api_mech *gm; struct xdr_netobj tmp_buf; /* gid */ - if (get_int(&mesg, &rsci.cred.cr_gid)) + if (get_int(&mesg, &rsci->cred.cr_gid)) goto out; /* number of additional gid's */ if (get_int(&mesg, &N)) goto out; status = -ENOMEM; - rsci.cred.cr_group_info = groups_alloc(N); - if (rsci.cred.cr_group_info == NULL) + rsci->cred.cr_group_info = groups_alloc(N); + if (rsci->cred.cr_group_info == NULL) goto out; /* gid's */ @@ -401,7 +460,7 @@ gid_t gid; if (get_int(&mesg, &gid)) goto out; - GROUP_AT(rsci.cred.cr_group_info, i) = gid; + GROUP_AT(rsci->cred.cr_group_info, i) = gid; } /* mech name */ @@ -422,19 +481,21 @@ } tmp_buf.len = len; tmp_buf.data = buf; - if (gss_import_sec_context(&tmp_buf, gm, &rsci.mechctx)) { + if (gss_import_sec_context(&tmp_buf, gm, &rsci->mechctx)) { gss_mech_put(gm); goto out; } gss_mech_put(gm); } - rsci.h.expiry_time = expiry; - rscp = rsc_lookup(&rsci, 1); + rsci->h.expiry_time = expiry; + spin_lock_init(&rsci->seqdata.sd_lock); + res = rsc_lookup(rsci, 1); + rsc_put(&res->h, &rsc_cache); + rsci = NULL; status = 0; out: - rsc_free(&rsci); - if (rscp) - rsc_put(&rscp->h, &rsc_cache); + if (rsci) + rsc_put(&rsci->h, &rsc_cache); return status; } @@ -446,19 +507,14 @@ .cache_parse = rsc_parse, }; -static DefineSimpleCacheLookup(rsc, 0); - struct rsc * gss_svc_searchbyctx(struct xdr_netobj *handle) { struct rsc rsci; struct rsc *found; - memset(&rsci, 0, sizeof(rsci)); - if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) - return NULL; + rsci.handle = *handle; found = rsc_lookup(&rsci, 0); - rsc_free(&rsci); if (!found) return NULL; if (cache_check(&rsc_cache, &found->h, NULL)) @@ -721,6 +777,45 @@ return stat; } +static int +unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +{ + int stat = -EINVAL; + int out_offset; + u32 * lenp; + u32 priv_len, maj_stat; + int saved_len; + + lenp = buf->head[0].iov_base; + priv_len = ntohl(svc_getu32(&buf->head[0])); + if (priv_len > buf->len) /* XXXJBF: wrong check */ + goto out; + /* XXXJBF: bizarre hack: to handle revisits (and not decrypt + * twice), the first time through we write an offset + * telling us where to skip to find the already-decrypted data */ + if (rqstp->rq_deferred) { + buf->head[0].iov_base += priv_len; + buf->head[0].iov_len -= priv_len; + return 0; + } + saved_len = buf->len; /* XXX HACK */ + buf->len = priv_len; + maj_stat = gss_unwrap(ctx, NULL, 0, buf, &out_offset); + buf->len = saved_len; + buf->head[0].iov_base += out_offset; + buf->head[0].iov_len -= out_offset; + BUG_ON(buf->head[0].iov_len <= 0); + if (maj_stat != GSS_S_COMPLETE) + goto out; + if (ntohl(svc_getu32(&buf->head[0])) != seq) + goto out; + /* XXXJBF: see "bizarre hack", above. */ + *lenp = htonl(out_offset + 4); + stat = 0; +out: + return stat; +} + struct gss_svc_data { /* decoded gss client cred: */ struct rpc_gss_wire_cred clcred; @@ -730,6 +825,19 @@ struct rsc *rsci; }; +static int +svcauth_gss_set_client(struct svc_rqst *rqstp) +{ + struct gss_svc_data *svcdata = rqstp->rq_auth_data; + struct rsc *rsci = svcdata->rsci; + struct rpc_gss_wire_cred *gc = &svcdata->clcred; + + rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc); + if (rqstp->rq_client == NULL) + return SVC_DENIED; + return SVC_OK; +} + /* * Accept an rpcsec packet. * If context establishment, punt to user space @@ -748,7 +856,7 @@ struct gss_svc_data *svcdata = rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc; struct rsc *rsci = NULL; - struct rsi *rsip, rsikey; + struct rsi *rsip, *rsikey = NULL; u32 *rpcstart; u32 *reject_stat = resv->iov_base + resv->iov_len; int ret; @@ -841,30 +949,23 @@ *authp = rpc_autherr_badcred; if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) goto auth_err; - memset(&rsikey, 0, sizeof(rsikey)); - if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) + rsikey = kmalloc(sizeof(*rsikey), GFP_KERNEL); + if (!rsikey) + goto drop; + memset(rsikey, 0, sizeof(*rsikey)); + cache_init(&rsikey->h); + if (dup_netobj(&rsikey->in_handle, &gc->gc_ctx)) goto drop; *authp = rpc_autherr_badverf; - if (svc_safe_getnetobj(argv, &tmpobj)) { - kfree(rsikey.in_handle.data); + if (svc_safe_getnetobj(argv, &tmpobj)) goto auth_err; - } - if (dup_netobj(&rsikey.in_token, &tmpobj)) { - kfree(rsikey.in_handle.data); + if (dup_netobj(&rsikey->in_token, &tmpobj)) goto drop; - } - rsip = rsi_lookup(&rsikey, 0); - rsi_free(&rsikey); - if (!rsip) { - goto drop; - } - switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { - case -EAGAIN: + rsip = gssd_upcall(rsikey, rqstp); + if (!rsip) goto drop; - case -ENOENT: - goto drop; - case 0: + else { rsci = gss_svc_searchbyctx(&rsip->out_handle); if (!rsci) { goto drop; @@ -893,11 +994,6 @@ svc_putu32(resv, rpc_success); goto complete; case RPC_GSS_PROC_DATA: - *authp = rpc_autherr_badcred; - rqstp->rq_client = - find_gss_auth_domain(rsci->mechctx, gc->gc_svc); - if (rqstp->rq_client == NULL) - goto auth_err; *authp = rpcsec_gsserr_ctxproblem; if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; @@ -911,6 +1007,15 @@ if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto auth_err; + /* placeholders for length and seq. number: */ + svcdata->body_start = resv->iov_base + resv->iov_len; + svc_putu32(resv, 0); + svc_putu32(resv, 0); + break; + case RPC_GSS_SVC_PRIVACY: + if (unwrap_priv_data(rqstp, &rqstp->rq_arg, + gc->gc_seq, rsci->mechctx)) + goto auth_err; svcdata->rsci = rsci; cache_get(&rsci->h); /* placeholders for length and seq. number: */ @@ -918,11 +1023,11 @@ svc_putu32(resv, 0); svc_putu32(resv, 0); break; - case RPC_GSS_SVC_PRIVACY: - /* currently unsupported */ default: goto auth_err; } + svcdata->rsci = rsci; + cache_get(&rsci->h); ret = SVC_OK; goto out; } @@ -937,13 +1042,15 @@ drop: ret = SVC_DROP; out: + if (rsikey) + rsi_put(&rsikey->h, &rsi_cache); if (rsci) rsc_put(&rsci->h, &rsc_cache); return ret; } -static int -svcauth_gss_release(struct svc_rqst *rqstp) +static inline int +svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) { struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc = &gsd->clcred; @@ -955,10 +1062,160 @@ int integ_offset, integ_len; int stat = -EINVAL; + p = gsd->body_start; + gsd->body_start = NULL; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Don't wrap in failure case: */ + /* Counting on not getting here if call was not even accepted! */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + goto out; + } + p++; + integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; + integ_len = resbuf->len - integ_offset; + BUG_ON(integ_len % 4); + *p++ = htonl(integ_len); + *p++ = htonl(gc->gc_seq); + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, + integ_len)) + BUG(); + if (resbuf->page_len == 0 + && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE + < PAGE_SIZE) { + BUG_ON(resbuf->tail[0].iov_len); + /* Use head for everything */ + resv = &resbuf->head[0]; + } else if (resbuf->tail[0].iov_base == NULL) { + /* copied from nfsd4_encode_read */ + svc_take_page(rqstp); + resbuf->tail[0].iov_base = page_address(rqstp + ->rq_respages[rqstp->rq_resused-1]); + rqstp->rq_restailpage = rqstp->rq_resused-1; + resbuf->tail[0].iov_len = 0; + resv = &resbuf->tail[0]; + } else { + resv = &resbuf->tail[0]; + } + mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; + if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) + goto out_err; + svc_putu32(resv, htonl(mic.len)); + memset(mic.data + mic.len, 0, + round_up_to_quad(mic.len) - mic.len); + resv->iov_len += XDR_QUADLEN(mic.len) << 2; + /* not strictly required: */ + resbuf->len += XDR_QUADLEN(mic.len) << 2; + BUG_ON(resv->iov_len > PAGE_SIZE); +out: + stat = 0; +out_err: + return stat; +} + +/* XXXJBF: Look for chances to share code with client */ +/* XXXJBF: Do we need to preallocate these pages somehow? E.g. see + * buffer size calculations in svcsock.c */ +/* XXXJBF: how does reference counting on pages work? */ +static struct page ** +svc_alloc_enc_pages(struct xdr_buf *buf) +{ + struct page **ret; + int last, i; + + if (buf->page_len == 0) + return NULL; + BUG_ON(buf->page_base >> PAGE_CACHE_SHIFT); + last = (buf->page_base + buf->page_len - 1) >> PAGE_CACHE_SHIFT; + ret = kmalloc((last + 1) * sizeof(struct page *), GFP_KERNEL); + if (!ret) + goto out; + for (i = 0; i<= last; i++) { + ret[i] = alloc_page(GFP_KERNEL); + if (ret[i] == NULL) + goto out_free; + } +out: + return ret; +out_free: + for (i--; i >= 0; i--) { + __free_page(ret[i]); + } + return NULL; +} + +static inline int +svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + struct page **inpages; + u32 *p; + int offset, *len; + int pad; + int stat = -EINVAL; + + p = gsd->body_start; + gsd->body_start = NULL; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Don't wrap in failure case: */ + /* Counting on not getting here if call was not even accepted! */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + goto out; + } + p++; + len = p++; + offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base; + *p++ = htonl(gc->gc_seq); + stat = -ENOMEM; + inpages = resbuf->pages; + /* XXXJBF: huge memory leaks here: allocated pages probably aren't + * freed, and neither is memory used to hold page array. */ + resbuf->pages = svc_alloc_enc_pages(resbuf); + if (resbuf->page_len && !resbuf->pages) + goto out_err; /* XXX sleep and retry? Reserve ahead of time + and BUG_ON? */ + if (resbuf->tail[0].iov_len == 0 || resbuf->tail[0].iov_base == NULL) { + /* copied from nfsd4_encode_read */ + {int i = svc_take_page(rqstp); BUG_ON(i); } + resbuf->tail[0].iov_base = page_address(rqstp + ->rq_respages[rqstp->rq_resused-1]); + rqstp->rq_restailpage = rqstp->rq_resused-1; + resbuf->tail[0].iov_len = 0; + } + /* XXX: Will svc code attempt to free stuff in xdr_buf->pages? + * Or can we leave it in any old state on error?? */ + stat = -EINVAL; + if (gss_wrap(gsd->rsci->mechctx, GSS_C_QOP_DEFAULT, offset, + resbuf, inpages)) + goto out_err; + *len = htonl(resbuf->len - offset); + pad = 3 - ((resbuf->len - offset - 1)&3); + p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len); + memset(p, 0, pad); + resbuf->tail[0].iov_len += pad; +out: + return 0; +out_err: + return stat; +} + +static int +svcauth_gss_release(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + int stat = -EINVAL; + if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; /* Release can be called twice, but we only wrap once. */ - if (gsd->body_start == 0) + if (gsd->body_start == NULL) goto out; /* normally not set till svc_send, but we need it here: */ resbuf->len = resbuf->head[0].iov_len @@ -967,55 +1224,15 @@ case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: - p = gsd->body_start; - gsd->body_start = NULL; - /* move accept_stat to right place: */ - memcpy(p, p + 2, 4); - /* don't wrap in failure case: */ - /* Note: counting on not getting here if call was not even - * accepted! */ - if (*p != rpc_success) { - resbuf->head[0].iov_len -= 2 * 4; - goto out; - } - p++; - integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; - integ_len = resbuf->len - integ_offset; - BUG_ON(integ_len % 4); - *p++ = htonl(integ_len); - *p++ = htonl(gc->gc_seq); - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, - integ_len)) - BUG(); - if (resbuf->page_len == 0 - && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE - < PAGE_SIZE) { - BUG_ON(resbuf->tail[0].iov_len); - /* Use head for everything */ - resv = &resbuf->head[0]; - } else if (resbuf->tail[0].iov_base == NULL) { - /* copied from nfsd4_encode_read */ - svc_take_page(rqstp); - resbuf->tail[0].iov_base = page_address(rqstp - ->rq_respages[rqstp->rq_resused-1]); - rqstp->rq_restailpage = rqstp->rq_resused-1; - resbuf->tail[0].iov_len = 0; - resv = &resbuf->tail[0]; - } else { - resv = &resbuf->tail[0]; - } - mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; - if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) + stat = svcauth_gss_wrap_resp_integ(rqstp); + if (stat) goto out_err; - svc_putu32(resv, htonl(mic.len)); - memset(mic.data + mic.len, 0, - round_up_to_quad(mic.len) - mic.len); - resv->iov_len += XDR_QUADLEN(mic.len) << 2; - /* not strictly required: */ - resbuf->len += XDR_QUADLEN(mic.len) << 2; - BUG_ON(resv->iov_len > PAGE_SIZE); break; case RPC_GSS_SVC_PRIVACY: + stat = svcauth_gss_wrap_resp_priv(rqstp); + if (stat) + goto out_err; + break; default: goto out_err; } @@ -1052,6 +1269,7 @@ .accept = svcauth_gss_accept, .release = svcauth_gss_release, .domain_release = svcauth_gss_domain_release, + .set_client = svcauth_gss_set_client, }; int Index: linux-2.6.10/net/sunrpc/auth_gss/sunrpcgss_syms.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/sunrpcgss_syms.c 2004-12-25 05:35:23.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/sunrpcgss_syms.c 2005-04-05 19:01:49.158500672 +0800 @@ -1,37 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* svcauth_gss.c: */ -EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor); - -/* registering gss mechanisms to the mech switching code: */ -EXPORT_SYMBOL(gss_mech_register); -EXPORT_SYMBOL(gss_mech_unregister); -EXPORT_SYMBOL(gss_mech_get); -EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor); -EXPORT_SYMBOL(gss_mech_get_by_name); -EXPORT_SYMBOL(gss_mech_put); -EXPORT_SYMBOL(gss_pseudoflavor_to_service); -EXPORT_SYMBOL(gss_service_to_auth_domain_name); - -/* generic functionality in gss code: */ -EXPORT_SYMBOL(g_make_token_header); -EXPORT_SYMBOL(g_verify_token_header); -EXPORT_SYMBOL(g_token_size); -EXPORT_SYMBOL(make_checksum); -EXPORT_SYMBOL(krb5_encrypt); -EXPORT_SYMBOL(krb5_decrypt); - -/* debug */ -EXPORT_SYMBOL(print_hexl); Index: linux-2.6.10/net/sunrpc/auth_gss/Makefile =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/Makefile 2004-12-25 05:34:33.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/Makefile 2005-04-05 14:49:13.408690888 +0800 @@ -10,7 +10,7 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ - gss_krb5_seqnum.o + gss_krb5_seqnum.o gss_krb5_wrap.o obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o Index: linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_mech.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/gss_krb5_mech.c 2004-12-25 05:35:23.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/gss_krb5_mech.c 2005-04-05 14:49:13.400692104 +0800 @@ -182,6 +182,7 @@ kfree(kctx); } +/* XXX the following wrappers have become pointless; kill them. */ static u32 gss_verify_mic_kerberos(struct gss_ctx *ctx, struct xdr_buf *message, @@ -191,8 +192,7 @@ int qop_state; struct krb5_ctx *kctx = ctx->internal_ctx_id; - maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, - KG_TOK_MIC_MSG); + maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state); if (!maj_stat && qop_state) *qstate = qop_state; @@ -208,7 +208,7 @@ u32 err = 0; struct krb5_ctx *kctx = ctx->internal_ctx_id; - err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); + err = krb5_make_token(kctx, qop, message, mic_token); dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); @@ -219,6 +219,8 @@ .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, .gss_verify_mic = gss_verify_mic_kerberos, + .gss_wrap = gss_wrap_kerberos, + .gss_unwrap = gss_unwrap_kerberos, .gss_delete_sec_context = gss_delete_sec_context_kerberos, }; @@ -233,6 +235,11 @@ .service = RPC_GSS_SVC_INTEGRITY, .name = "krb5i", }, + [2] = { + .pseudoflavor = RPC_AUTH_GSS_KRB5P, + .service = RPC_GSS_SVC_PRIVACY, + .name = "krb5p", + }, }; static struct gss_api_mech gss_kerberos_mech = { Index: linux-2.6.10/net/sunrpc/auth_gss/auth_gss.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/auth_gss/auth_gss.c 2004-12-25 05:34:44.000000000 +0800 +++ linux-2.6.10/net/sunrpc/auth_gss/auth_gss.c 2005-04-05 14:49:13.404691496 +0800 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -480,12 +481,14 @@ if (!cred) goto err; if (gss_err) - cred->cr_flags |= RPCAUTH_CRED_DEAD; + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; else gss_cred_set_ctx(cred, ctx); spin_lock(&gss_auth->lock); gss_msg = __gss_find_upcall(gss_auth, acred.uid); if (gss_msg) { + if (gss_err) + gss_msg->msg.errno = -EACCES; __gss_unhash_msg(gss_msg); spin_unlock(&gss_auth->lock); gss_release_msg(gss_msg); @@ -740,7 +743,9 @@ maj_stat = gss_get_mic(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, &verf_buf, &mic); - if(maj_stat != 0){ + if (maj_stat == GSS_S_CONTEXT_EXPIRED) { + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + } else if (maj_stat != 0) { printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); goto out_put_ctx; } @@ -779,6 +784,7 @@ struct xdr_netobj mic; u32 flav,len; u32 service; + u32 maj_stat; dprintk("RPC: %4u gss_validate\n", task->tk_pid); @@ -794,8 +800,11 @@ mic.data = (u8 *)p; mic.len = len; - if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state)) - goto out_bad; + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + if (maj_stat) + goto out_bad; service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type, gss_cred->gc_flavor); switch (service) { @@ -807,6 +816,11 @@ /* verifier data, flavor, length, length, sequence number: */ task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; break; + case RPC_GSS_SVC_PRIVACY: + /* XXXJBF: Ugh. Going for a wild overestimate. + * Need some info from krb5 layer? */ + task->tk_auth->au_rslack = XDR_QUADLEN(len) + 32; + break; default: goto out_bad; } @@ -821,11 +835,10 @@ } static inline int -gss_wrap_req_integ(struct gss_cl_ctx *ctx, - kxdrproc_t encode, void *rqstp, u32 *p, void *obj) +gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) { - struct rpc_rqst *req = (struct rpc_rqst *)rqstp; - struct xdr_buf *snd_buf = &req->rq_snd_buf; + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; struct xdr_buf integ_buf; u32 *integ_len = NULL; struct xdr_netobj mic; @@ -836,7 +849,7 @@ integ_len = p++; offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; - *p++ = htonl(req->rq_seqno); + *p++ = htonl(rqstp->rq_seqno); status = encode(rqstp, p, obj); if (status) @@ -848,7 +861,7 @@ *integ_len = htonl(integ_buf.len); /* guess whether we're in the head or the tail: */ - if (snd_buf->page_len || snd_buf->tail[0].iov_len) + if (snd_buf->page_len || snd_buf->tail[0].iov_len) iov = snd_buf->tail; else iov = snd_buf->head; @@ -858,7 +871,9 @@ maj_stat = gss_get_mic(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, &integ_buf, &mic); status = -EIO; /* XXX? */ - if (maj_stat) + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + else if (maj_stat) return status; q = xdr_encode_opaque(p, NULL, mic.len); @@ -868,6 +883,112 @@ return 0; } +static void +priv_release_snd_buf(struct rpc_rqst *rqstp) +{ + int i; + + for (i=0; i < rqstp->rq_enc_pages_num; i++) + __free_page(rqstp->rq_enc_pages[i]); + kfree(rqstp->rq_enc_pages); +} + +static int +alloc_enc_pages(struct rpc_rqst *rqstp) +{ + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; + int first, last, i; + + if (snd_buf->page_len == 0) { + rqstp->rq_enc_pages_num = 0; + return 0; + } + + first = snd_buf->page_base >> PAGE_CACHE_SHIFT; + last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT; + rqstp->rq_enc_pages_num = last - first + 1 + 1; + rqstp->rq_enc_pages + = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), + GFP_NOFS); + if (!rqstp->rq_enc_pages) + goto out; + for (i=0; i < rqstp->rq_enc_pages_num; i++) { + rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); + if (rqstp->rq_enc_pages[i] == NULL) + goto out_free; + } + rqstp->rq_release_snd_buf = priv_release_snd_buf; + return 0; +out_free: + for (i--; i >= 0; i--) { + __free_page(rqstp->rq_enc_pages[i]); + } +out: + return -EAGAIN; +} + +static inline int +gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) +{ + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; + u32 offset; + u32 maj_stat; + int status; + u32 *opaque_len; + struct page **inpages; + int first; + int pad; + struct kvec *iov; + char *tmp; + + opaque_len = p++; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; + *p++ = htonl(rqstp->rq_seqno); + + status = encode(rqstp, p, obj); + if (status) + return status; + + status = alloc_enc_pages(rqstp); + if (status) + return status; + /* XXXJBF: Oops! Do we need rq_enc_pages really any more?? */ + first = snd_buf->page_base >> PAGE_CACHE_SHIFT; + inpages = snd_buf->pages + first; + snd_buf->pages = rqstp->rq_enc_pages; + snd_buf->page_base -= first << PAGE_CACHE_SHIFT; + /* XXX?: tail needs to be separate if we want to be able to expand + * the head (since it's often put right after the head). But is + * expanding the head safe in any case? */ + if (snd_buf->page_len || snd_buf->tail[0].iov_len) { + tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); + memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); + snd_buf->tail[0].iov_base = tmp; + } + maj_stat = gss_wrap(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, offset, + snd_buf, inpages); + status = -EIO; /* XXX? */ + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + else if (maj_stat) + return status; + + *opaque_len = htonl(snd_buf->len - offset); + /* guess whether we're in the head or the tail: */ + if (snd_buf->page_len || snd_buf->tail[0].iov_len) + iov = snd_buf->tail; + else + iov = snd_buf->head; + p = iov->iov_base + iov->iov_len; + pad = 3 - ((snd_buf->len - offset - 1) & 3); + memset(p, 0, pad); + iov->iov_len += pad; + snd_buf->len += pad; + + return 0; +} + static int gss_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, u32 *p, void *obj) @@ -894,9 +1015,13 @@ status = encode(rqstp, p, obj); goto out; case RPC_GSS_SVC_INTEGRITY: - status = gss_wrap_req_integ(ctx, encode, rqstp, p, obj); + status = gss_wrap_req_integ(cred, ctx, encode, + rqstp, p, obj); goto out; case RPC_GSS_SVC_PRIVACY: + status = gss_wrap_req_priv(cred, ctx, encode, + rqstp, p, obj); + goto out; default: goto out; } @@ -907,11 +1032,10 @@ } static inline int -gss_unwrap_resp_integ(struct gss_cl_ctx *ctx, - kxdrproc_t decode, void *rqstp, u32 **p, void *obj) +gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + struct rpc_rqst *rqstp, u32 **p) { - struct rpc_rqst *req = (struct rpc_rqst *)rqstp; - struct xdr_buf *rcv_buf = &req->rq_rcv_buf; + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct xdr_buf integ_buf; struct xdr_netobj mic; u32 data_offset, mic_offset; @@ -926,7 +1050,7 @@ mic_offset = integ_len + data_offset; if (mic_offset > rcv_buf->len) return status; - if (ntohl(*(*p)++) != req->rq_seqno) + if (ntohl(*(*p)++) != rqstp->rq_seqno) return status; if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, @@ -938,11 +1062,44 @@ maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic, NULL); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + if (maj_stat != GSS_S_COMPLETE) + return status; + return 0; +} + +static inline int +gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, + struct rpc_rqst *rqstp, u32 **p) +{ + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; + u32 offset, out_offset; + u32 opaque_len; + u32 maj_stat; + int status = -EIO; + + opaque_len = ntohl(*(*p)++); + offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; + if (offset + opaque_len > rcv_buf->len) + return status; + /* remove padding: */ + rcv_buf->len = offset + opaque_len; + + maj_stat = gss_unwrap(ctx->gc_gss_ctx, NULL, + offset, rcv_buf, &out_offset); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; if (maj_stat != GSS_S_COMPLETE) return status; + *p = (u32 *)(rcv_buf->head[0].iov_base + out_offset); + if (ntohl(*(*p)++) != rqstp->rq_seqno) + return status; + return 0; } + static int gss_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, u32 *p, void *obj) @@ -962,12 +1119,16 @@ case RPC_GSS_SVC_NONE: goto out_decode; case RPC_GSS_SVC_INTEGRITY: - status = gss_unwrap_resp_integ(ctx, decode, - rqstp, &p, obj); + status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p); if (status) goto out; break; case RPC_GSS_SVC_PRIVACY: + status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); + if (status) + goto out; + break; + default: goto out; } Index: linux-2.6.10/net/sunrpc/svc.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/svc.c 2004-12-25 05:35:28.000000000 +0800 +++ linux-2.6.10/net/sunrpc/svc.c 2005-04-05 14:49:13.409690736 +0800 @@ -264,6 +264,7 @@ u32 dir, prog, vers, proc, auth_stat, rpc_stat; int auth_res; + u32 *accept_statp; rpc_stat = rpc_success; @@ -299,6 +300,9 @@ if (vers != 2) /* RPC version number */ goto err_bad_rpc; + /* Save position in case we later decide to reject: */ + accept_statp = resv->iov_base + resv->iov_len; + svc_putu32(resv, xdr_zero); /* ACCEPT */ rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */ @@ -311,10 +315,12 @@ * We do this before anything else in order to get a decent * auth verifier. */ - if (progp->pg_authenticate != NULL) - auth_res = progp->pg_authenticate(rqstp, &auth_stat); - else - auth_res = svc_authenticate(rqstp, &auth_stat); + auth_res = svc_authenticate(rqstp, &auth_stat); + /* Also give the program a chance to reject this call: */ + if (auth_res == SVC_OK) { + auth_stat = rpc_autherr_badcred; + auth_res = progp->pg_authenticate(rqstp); + } switch (auth_res) { case SVC_OK: break; @@ -437,7 +443,8 @@ err_bad_auth: dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); serv->sv_stats->rpcbadauth++; - resv->iov_len -= 4; + /* Restore write pointer to location of accept status: */ + xdr_ressize_check(rqstp, accept_statp); svc_putu32(resv, xdr_one); /* REJECT */ svc_putu32(resv, xdr_one); /* AUTH_ERROR */ svc_putu32(resv, auth_stat); /* status */ Index: linux-2.6.10/net/sunrpc/sched.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/sched.c 2004-12-25 05:34:58.000000000 +0800 +++ linux-2.6.10/net/sunrpc/sched.c 2005-04-05 14:49:13.391693472 +0800 @@ -41,13 +41,7 @@ static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); - -/* - * When an asynchronous RPC task is activated within a bottom half - * handler, or while executing another RPC task, it is put on - * schedq, and rpciod is woken up. - */ -static RPC_WAITQ(schedq, "schedq"); +static void rpc_async_schedule(void *); /* * RPC tasks that create another task (e.g. for contacting the portmapper) @@ -68,26 +62,18 @@ /* * rpciod-related stuff */ -static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle); -static DECLARE_COMPLETION(rpciod_killer); static DECLARE_MUTEX(rpciod_sema); static unsigned int rpciod_users; -static pid_t rpciod_pid; -static int rpc_inhibit; +static struct workqueue_struct *rpciod_workqueue; /* - * Spinlock for wait queues. Access to the latter also has to be - * interrupt-safe in order to allow timers to wake up sleeping tasks. - */ -static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED; -/* * Spinlock for other critical sections of code. */ static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED; /* * Disable the timer for a given RPC task. Should be called with - * rpc_queue_lock and bh_disabled in order to avoid races within + * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). */ static inline void @@ -105,19 +91,19 @@ * without calling del_timer_sync(). The latter could cause a * deadlock if called while we're holding spinlocks... */ -static void -rpc_run_timer(struct rpc_task *task) +static void rpc_run_timer(struct rpc_task *task) { void (*callback)(struct rpc_task *); - spin_lock_bh(&rpc_queue_lock); callback = task->tk_timeout_fn; task->tk_timeout_fn = NULL; - spin_unlock_bh(&rpc_queue_lock); - if (callback) { + if (callback && RPC_IS_QUEUED(task)) { dprintk("RPC: %4d running timer\n", task->tk_pid); callback(task); } + smp_mb__before_clear_bit(); + clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); + smp_mb__after_clear_bit(); } /* @@ -136,29 +122,21 @@ task->tk_timeout_fn = timer; else task->tk_timeout_fn = __rpc_default_timer; + set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); mod_timer(&task->tk_timer, jiffies + task->tk_timeout); } /* - * Set up a timer for an already sleeping task. - */ -void rpc_add_timer(struct rpc_task *task, rpc_action timer) -{ - spin_lock_bh(&rpc_queue_lock); - if (!RPC_IS_RUNNING(task)) - __rpc_add_timer(task, timer); - spin_unlock_bh(&rpc_queue_lock); -} - -/* * Delete any timer for the current task. Because we use del_timer_sync(), - * this function should never be called while holding rpc_queue_lock. + * this function should never be called while holding queue->lock. */ static inline void rpc_delete_timer(struct rpc_task *task) { - if (del_timer_sync(&task->tk_timer)) + if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { + del_singleshot_timer_sync(&task->tk_timer); dprintk("RPC: %4d deleting timer\n", task->tk_pid); + } } /* @@ -169,16 +147,17 @@ struct list_head *q; struct rpc_task *t; + INIT_LIST_HEAD(&task->u.tk_wait.links); q = &queue->tasks[task->tk_priority]; if (unlikely(task->tk_priority > queue->maxpriority)) q = &queue->tasks[queue->maxpriority]; - list_for_each_entry(t, q, tk_list) { + list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_cookie == task->tk_cookie) { - list_add_tail(&task->tk_list, &t->tk_links); + list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); return; } } - list_add_tail(&task->tk_list, q); + list_add_tail(&task->u.tk_wait.list, q); } /* @@ -189,37 +168,21 @@ * improve overall performance. * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ -static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (task->tk_rpcwait == queue) - return 0; + BUG_ON (RPC_IS_QUEUED(task)); - if (task->tk_rpcwait) { - printk(KERN_WARNING "RPC: doubly enqueued task!\n"); - return -EWOULDBLOCK; - } if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task); else if (RPC_IS_SWAPPER(task)) - list_add(&task->tk_list, &queue->tasks[0]); + list_add(&task->u.tk_wait.list, &queue->tasks[0]); else - list_add_tail(&task->tk_list, &queue->tasks[0]); - task->tk_rpcwait = queue; + list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); + task->u.tk_wait.rpc_waitq = queue; + rpc_set_queued(task); dprintk("RPC: %4d added to queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); - - return 0; -} - -int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task) -{ - int result; - - spin_lock_bh(&rpc_queue_lock); - result = __rpc_add_wait_queue(q, task); - spin_unlock_bh(&rpc_queue_lock); - return result; } /* @@ -229,12 +192,12 @@ { struct rpc_task *t; - if (!list_empty(&task->tk_links)) { - t = list_entry(task->tk_links.next, struct rpc_task, tk_list); - list_move(&t->tk_list, &task->tk_list); - list_splice_init(&task->tk_links, &t->tk_links); + if (!list_empty(&task->u.tk_wait.links)) { + t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); + list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); + list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); } - list_del(&task->tk_list); + list_del(&task->u.tk_wait.list); } /* @@ -243,31 +206,17 @@ */ static void __rpc_remove_wait_queue(struct rpc_task *task) { - struct rpc_wait_queue *queue = task->tk_rpcwait; - - if (!queue) - return; + struct rpc_wait_queue *queue; + queue = task->u.tk_wait.rpc_waitq; if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); else - list_del(&task->tk_list); - task->tk_rpcwait = NULL; - + list_del(&task->u.tk_wait.list); dprintk("RPC: %4d removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); } -void -rpc_remove_wait_queue(struct rpc_task *task) -{ - if (!task->tk_rpcwait) - return; - spin_lock_bh(&rpc_queue_lock); - __rpc_remove_wait_queue(task); - spin_unlock_bh(&rpc_queue_lock); -} - static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { queue->priority = priority; @@ -290,6 +239,7 @@ { int i; + spin_lock_init(&queue->lock); for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) INIT_LIST_HEAD(&queue->tasks[i]); queue->maxpriority = maxprio; @@ -316,34 +266,31 @@ * Note: If the task is ASYNC, this must be called with * the spinlock held to protect the wait queue operation. */ -static inline void -rpc_make_runnable(struct rpc_task *task) +static void rpc_make_runnable(struct rpc_task *task) { - if (task->tk_timeout_fn) { - printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n"); + int do_ret; + + BUG_ON(task->tk_timeout_fn); + do_ret = rpc_test_and_set_running(task); + rpc_clear_queued(task); + if (do_ret) return; - } - rpc_set_running(task); if (RPC_IS_ASYNC(task)) { - if (RPC_IS_SLEEPING(task)) { - int status; - status = __rpc_add_wait_queue(&schedq, task); - if (status < 0) { - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); - task->tk_status = status; - return; - } - rpc_clear_sleeping(task); - wake_up(&rpciod_idle); + int status; + + INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task); + status = queue_work(task->tk_workqueue, &task->u.tk_work); + if (status < 0) { + printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); + task->tk_status = status; + return; } - } else { - rpc_clear_sleeping(task); - wake_up(&task->tk_wait); - } + } else + wake_up(&task->u.tk_wait.waitq); } /* - * Place a newly initialized task on the schedq. + * Place a newly initialized task on the workqueue. */ static inline void rpc_schedule_run(struct rpc_task *task) @@ -352,33 +299,18 @@ if (RPC_IS_ACTIVATED(task)) return; task->tk_active = 1; - rpc_set_sleeping(task); rpc_make_runnable(task); } /* - * For other people who may need to wake the I/O daemon - * but should (for now) know nothing about its innards - */ -void rpciod_wake_up(void) -{ - if(rpciod_pid==0) - printk(KERN_ERR "rpciod: wot no daemon?\n"); - wake_up(&rpciod_idle); -} - -/* * Prepare for sleeping on a wait queue. * By always appending tasks to the list we ensure FIFO behavior. * NB: An RPC task will only receive interrupt-driven events as long * as it's on a wait queue. */ -static void -__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, +static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { - int status; - dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid, rpc_qname(q), jiffies); @@ -388,49 +320,36 @@ } /* Mark the task as being activated if so needed */ - if (!RPC_IS_ACTIVATED(task)) { + if (!RPC_IS_ACTIVATED(task)) task->tk_active = 1; - rpc_set_sleeping(task); - } - status = __rpc_add_wait_queue(q, task); - if (status) { - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); - task->tk_status = status; - } else { - rpc_clear_running(task); - if (task->tk_callback) { - dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid); - BUG(); - } - task->tk_callback = action; - __rpc_add_timer(task, timer); - } + __rpc_add_wait_queue(q, task); + + BUG_ON(task->tk_callback != NULL); + task->tk_callback = action; + __rpc_add_timer(task, timer); } -void -rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, +void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { /* * Protect the queue operations. */ - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&q->lock); __rpc_sleep_on(q, task, action, timer); - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&q->lock); } /** - * __rpc_wake_up_task - wake up a single rpc_task + * __rpc_do_wake_up_task - wake up a single rpc_task * @task: task to be woken up * - * Caller must hold rpc_queue_lock + * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void -__rpc_wake_up_task(struct rpc_task *task) +static void __rpc_do_wake_up_task(struct rpc_task *task) { - dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n", - task->tk_pid, jiffies, rpc_inhibit); + dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies); #ifdef RPC_DEBUG if (task->tk_magic != 0xf00baa) { @@ -445,12 +364,9 @@ printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); return; } - if (RPC_IS_RUNNING(task)) - return; __rpc_disable_timer(task); - if (task->tk_rpcwait != &schedq) - __rpc_remove_wait_queue(task); + __rpc_remove_wait_queue(task); rpc_make_runnable(task); @@ -458,6 +374,18 @@ } /* + * Wake up the specified task + */ +static void __rpc_wake_up_task(struct rpc_task *task) +{ + if (rpc_start_wakeup(task)) { + if (RPC_IS_QUEUED(task)) + __rpc_do_wake_up_task(task); + rpc_finish_wakeup(task); + } +} + +/* * Default timeout handler if none specified by user */ static void @@ -471,14 +399,18 @@ /* * Wake up the specified task */ -void -rpc_wake_up_task(struct rpc_task *task) +void rpc_wake_up_task(struct rpc_task *task) { - if (RPC_IS_RUNNING(task)) - return; - spin_lock_bh(&rpc_queue_lock); - __rpc_wake_up_task(task); - spin_unlock_bh(&rpc_queue_lock); + if (rpc_start_wakeup(task)) { + if (RPC_IS_QUEUED(task)) { + struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; + + spin_lock_bh(&queue->lock); + __rpc_do_wake_up_task(task); + spin_unlock_bh(&queue->lock); + } + rpc_finish_wakeup(task); + } } /* @@ -494,11 +426,11 @@ */ q = &queue->tasks[queue->priority]; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, tk_list); + task = list_entry(q->next, struct rpc_task, u.tk_wait.list); if (queue->cookie == task->tk_cookie) { if (--queue->nr) goto out; - list_move_tail(&task->tk_list, q); + list_move_tail(&task->u.tk_wait.list, q); } /* * Check if we need to switch queues. @@ -516,7 +448,7 @@ else q = q - 1; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, tk_list); + task = list_entry(q->next, struct rpc_task, u.tk_wait.list); goto new_queue; } } while (q != &queue->tasks[queue->priority]); @@ -541,14 +473,14 @@ struct rpc_task *task = NULL; dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) __rpc_wake_up_task(task); } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); return task; } @@ -557,25 +489,25 @@ * rpc_wake_up - wake up all rpc_tasks * @queue: rpc_wait_queue on which the tasks are sleeping * - * Grabs rpc_queue_lock + * Grabs queue->lock */ void rpc_wake_up(struct rpc_wait_queue *queue) { struct rpc_task *task; struct list_head *head; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { while (!list_empty(head)) { - task = list_entry(head->next, struct rpc_task, tk_list); + task = list_entry(head->next, struct rpc_task, u.tk_wait.list); __rpc_wake_up_task(task); } if (head == &queue->tasks[0]) break; head--; } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); } /** @@ -583,18 +515,18 @@ * @queue: rpc_wait_queue on which the tasks are sleeping * @status: status value to set * - * Grabs rpc_queue_lock + * Grabs queue->lock */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { struct list_head *head; struct rpc_task *task; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { while (!list_empty(head)) { - task = list_entry(head->next, struct rpc_task, tk_list); + task = list_entry(head->next, struct rpc_task, u.tk_wait.list); task->tk_status = status; __rpc_wake_up_task(task); } @@ -602,7 +534,7 @@ break; head--; } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); } /* @@ -626,22 +558,23 @@ /* * This is the RPC `scheduler' (or rather, the finite state machine). */ -static int -__rpc_execute(struct rpc_task *task) +static int __rpc_execute(struct rpc_task *task) { int status = 0; dprintk("RPC: %4d rpc_execute flgs %x\n", task->tk_pid, task->tk_flags); - if (!RPC_IS_RUNNING(task)) { - printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n"); - return 0; - } + BUG_ON(RPC_IS_QUEUED(task)); restarted: while (1) { /* + * Garbage collection of pending timers... + */ + rpc_delete_timer(task); + + /* * Execute any pending callback. */ if (RPC_DO_CALLBACK(task)) { @@ -657,7 +590,9 @@ */ save_callback=task->tk_callback; task->tk_callback=NULL; + lock_kernel(); save_callback(task); + unlock_kernel(); } /* @@ -665,43 +600,35 @@ * tk_action may be NULL when the task has been killed * by someone else. */ - if (RPC_IS_RUNNING(task)) { - /* - * Garbage collection of pending timers... - */ - rpc_delete_timer(task); + if (!RPC_IS_QUEUED(task)) { if (!task->tk_action) break; + lock_kernel(); task->tk_action(task); - /* micro-optimization to avoid spinlock */ - if (RPC_IS_RUNNING(task)) - continue; + unlock_kernel(); } /* - * Check whether task is sleeping. + * Lockless check for whether task is sleeping or not. */ - spin_lock_bh(&rpc_queue_lock); - if (!RPC_IS_RUNNING(task)) { - rpc_set_sleeping(task); - if (RPC_IS_ASYNC(task)) { - spin_unlock_bh(&rpc_queue_lock); + if (!RPC_IS_QUEUED(task)) + continue; + rpc_clear_running(task); + if (RPC_IS_ASYNC(task)) { + /* Careful! we may have raced... */ + if (RPC_IS_QUEUED(task)) return 0; - } + if (rpc_test_and_set_running(task)) + return 0; + continue; } - spin_unlock_bh(&rpc_queue_lock); - if (!RPC_IS_SLEEPING(task)) - continue; /* sync task: sleep here */ dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); - if (current->pid == rpciod_pid) - printk(KERN_ERR "RPC: rpciod waiting on sync task!\n"); - if (RPC_TASK_UNINTERRUPTIBLE(task)) { - __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task)); + __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task)); } else { - __wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status); + __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status); /* * When a sync task receives a signal, it exits with * -ERESTARTSYS. In order to catch any callbacks that @@ -715,11 +642,14 @@ rpc_wake_up_task(task); } } + rpc_set_running(task); dprintk("RPC: %4d sync task resuming\n", task->tk_pid); } if (task->tk_exit) { + lock_kernel(); task->tk_exit(task); + unlock_kernel(); /* If tk_action is non-null, the user wants us to restart */ if (task->tk_action) { if (!RPC_ASSASSINATED(task)) { @@ -738,7 +668,6 @@ /* Release all resources associated with the task */ rpc_release_task(task); - return status; } @@ -754,57 +683,16 @@ int rpc_execute(struct rpc_task *task) { - int status = -EIO; - if (rpc_inhibit) { - printk(KERN_INFO "RPC: execution inhibited!\n"); - goto out_release; - } - - status = -EWOULDBLOCK; - if (task->tk_active) { - printk(KERN_ERR "RPC: active task was run twice!\n"); - goto out_err; - } + BUG_ON(task->tk_active); task->tk_active = 1; rpc_set_running(task); return __rpc_execute(task); - out_release: - rpc_release_task(task); - out_err: - return status; } -/* - * This is our own little scheduler for async RPC tasks. - */ -static void -__rpc_schedule(void) +static void rpc_async_schedule(void *arg) { - struct rpc_task *task; - int count = 0; - - dprintk("RPC: rpc_schedule enter\n"); - while (1) { - - task_for_first(task, &schedq.tasks[0]) { - __rpc_remove_wait_queue(task); - spin_unlock_bh(&rpc_queue_lock); - - __rpc_execute(task); - spin_lock_bh(&rpc_queue_lock); - } else { - break; - } - - if (++count >= 200 || need_resched()) { - count = 0; - spin_unlock_bh(&rpc_queue_lock); - schedule(); - spin_lock_bh(&rpc_queue_lock); - } - } - dprintk("RPC: rpc_schedule leave\n"); + __rpc_execute((struct rpc_task *)arg); } /* @@ -862,7 +750,6 @@ task->tk_client = clnt; task->tk_flags = flags; task->tk_exit = callback; - init_waitqueue_head(&task->tk_wait); if (current->uid != current->fsuid || current->gid != current->fsgid) task->tk_flags |= RPC_TASK_SETUID; @@ -873,7 +760,11 @@ task->tk_priority = RPC_PRIORITY_NORMAL; task->tk_cookie = (unsigned long)current; - INIT_LIST_HEAD(&task->tk_links); + + /* Initialize workqueue for async tasks */ + task->tk_workqueue = rpciod_workqueue; + if (!RPC_IS_ASYNC(task)) + init_waitqueue_head(&task->u.tk_wait.waitq); /* Add to global list of all tasks */ spin_lock(&rpc_sched_lock); @@ -944,8 +835,7 @@ goto out; } -void -rpc_release_task(struct rpc_task *task) +void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %4d release task\n", task->tk_pid); @@ -963,19 +853,9 @@ list_del(&task->tk_task); spin_unlock(&rpc_sched_lock); - /* Protect the execution below. */ - spin_lock_bh(&rpc_queue_lock); - - /* Disable timer to prevent zombie wakeup */ - __rpc_disable_timer(task); - - /* Remove from any wait queue we're still on */ - __rpc_remove_wait_queue(task); - + BUG_ON (RPC_IS_QUEUED(task)); task->tk_active = 0; - spin_unlock_bh(&rpc_queue_lock); - /* Synchronously delete any running timer */ rpc_delete_timer(task); @@ -1005,10 +885,9 @@ * queue 'childq'. If so returns a pointer to the parent. * Upon failure returns NULL. * - * Caller must hold rpc_queue_lock + * Caller must hold childq.lock */ -static inline struct rpc_task * -rpc_find_parent(struct rpc_task *child) +static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) { struct rpc_task *task, *parent; struct list_head *le; @@ -1021,17 +900,16 @@ return NULL; } -static void -rpc_child_exit(struct rpc_task *child) +static void rpc_child_exit(struct rpc_task *child) { struct rpc_task *parent; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&childq.lock); if ((parent = rpc_find_parent(child)) != NULL) { parent->tk_status = child->tk_status; __rpc_wake_up_task(parent); } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&childq.lock); } /* @@ -1054,22 +932,20 @@ return NULL; } -void -rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) +void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) { - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&childq.lock); /* N.B. Is it possible for the child to have already finished? */ __rpc_sleep_on(&childq, task, func, NULL); rpc_schedule_run(child); - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&childq.lock); } /* * Kill all tasks for the given client. * XXX: kill their descendants as well? */ -void -rpc_killall_tasks(struct rpc_clnt *clnt) +void rpc_killall_tasks(struct rpc_clnt *clnt) { struct rpc_task *rovr; struct list_head *le; @@ -1091,93 +967,14 @@ static DECLARE_MUTEX_LOCKED(rpciod_running); -static inline int -rpciod_task_pending(void) -{ - return !list_empty(&schedq.tasks[0]); -} - - -/* - * This is the rpciod kernel thread - */ -static int -rpciod(void *ptr) -{ - int rounds = 0; - - lock_kernel(); - /* - * Let our maker know we're running ... - */ - rpciod_pid = current->pid; - up(&rpciod_running); - - daemonize("rpciod"); - allow_signal(SIGKILL); - - dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid); - spin_lock_bh(&rpc_queue_lock); - while (rpciod_users) { - DEFINE_WAIT(wait); - if (signalled()) { - spin_unlock_bh(&rpc_queue_lock); - rpciod_killall(); - flush_signals(current); - spin_lock_bh(&rpc_queue_lock); - } - __rpc_schedule(); - if (current->flags & PF_FREEZE) { - spin_unlock_bh(&rpc_queue_lock); - refrigerator(PF_FREEZE); - spin_lock_bh(&rpc_queue_lock); - } - - if (++rounds >= 64) { /* safeguard */ - spin_unlock_bh(&rpc_queue_lock); - schedule(); - rounds = 0; - spin_lock_bh(&rpc_queue_lock); - } - - dprintk("RPC: rpciod back to sleep\n"); - prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE); - if (!rpciod_task_pending() && !signalled()) { - spin_unlock_bh(&rpc_queue_lock); - schedule(); - rounds = 0; - spin_lock_bh(&rpc_queue_lock); - } - finish_wait(&rpciod_idle, &wait); - dprintk("RPC: switch to rpciod\n"); - } - spin_unlock_bh(&rpc_queue_lock); - - dprintk("RPC: rpciod shutdown commences\n"); - if (!list_empty(&all_tasks)) { - printk(KERN_ERR "rpciod: active tasks at shutdown?!\n"); - rpciod_killall(); - } - - dprintk("RPC: rpciod exiting\n"); - unlock_kernel(); - - rpciod_pid = 0; - complete_and_exit(&rpciod_killer, 0); - return 0; -} - -static void -rpciod_killall(void) +static void rpciod_killall(void) { unsigned long flags; while (!list_empty(&all_tasks)) { clear_thread_flag(TIF_SIGPENDING); rpc_killall_tasks(NULL); - spin_lock_bh(&rpc_queue_lock); - __rpc_schedule(); - spin_unlock_bh(&rpc_queue_lock); + flush_workqueue(rpciod_workqueue); if (!list_empty(&all_tasks)) { dprintk("rpciod_killall: waiting for tasks to exit\n"); yield(); @@ -1195,28 +992,30 @@ int rpciod_up(void) { + struct workqueue_struct *wq; int error = 0; down(&rpciod_sema); - dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users); + dprintk("rpciod_up: users %d\n", rpciod_users); rpciod_users++; - if (rpciod_pid) + if (rpciod_workqueue) goto out; /* * If there's no pid, we should be the first user. */ if (rpciod_users > 1) - printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users); + printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users); /* * Create the rpciod thread and wait for it to start. */ - error = kernel_thread(rpciod, NULL, 0); - if (error < 0) { - printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error); + error = -ENOMEM; + wq = create_workqueue("rpciod"); + if (wq == NULL) { + printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); rpciod_users--; goto out; } - down(&rpciod_running); + rpciod_workqueue = wq; error = 0; out: up(&rpciod_sema); @@ -1227,20 +1026,21 @@ rpciod_down(void) { down(&rpciod_sema); - dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users); + dprintk("rpciod_down sema %d\n", rpciod_users); if (rpciod_users) { if (--rpciod_users) goto out; } else - printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid); + printk(KERN_WARNING "rpciod_down: no users??\n"); - if (!rpciod_pid) { + if (!rpciod_workqueue) { dprintk("rpciod_down: Nothing to do!\n"); goto out; } + rpciod_killall(); - kill_proc(rpciod_pid, SIGKILL, 1); - wait_for_completion(&rpciod_killer); + destroy_workqueue(rpciod_workqueue); + rpciod_workqueue = NULL; out: up(&rpciod_sema); } @@ -1258,7 +1058,12 @@ } printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " "-rpcwait -action- --exit--\n"); - alltask_for_each(t, le, &all_tasks) + alltask_for_each(t, le, &all_tasks) { + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(t)) + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", t->tk_pid, (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), @@ -1266,8 +1071,9 @@ t->tk_client, (t->tk_client ? t->tk_client->cl_prog : 0), t->tk_rqstp, t->tk_timeout, - rpc_qname(t->tk_rpcwait), + rpc_waitq, t->tk_action, t->tk_exit); + } spin_unlock(&rpc_sched_lock); } #endif Index: linux-2.6.10/net/sunrpc/sunrpc_syms.c =================================================================== --- linux-2.6.10.orig/net/sunrpc/sunrpc_syms.c 2004-12-25 05:35:25.000000000 +0800 +++ linux-2.6.10/net/sunrpc/sunrpc_syms.c 2005-04-05 14:49:13.411690432 +0800 @@ -58,6 +58,9 @@ EXPORT_SYMBOL(rpc_wake_up); EXPORT_SYMBOL(rpc_queue_upcall); EXPORT_SYMBOL(rpc_mkpipe); +EXPORT_SYMBOL(rpc_mkdir); +EXPORT_SYMBOL(rpc_rmdir); + /* Client transport */ EXPORT_SYMBOL(xprt_create_proto); @@ -90,6 +93,7 @@ EXPORT_SYMBOL(svc_auth_register); EXPORT_SYMBOL(auth_domain_lookup); EXPORT_SYMBOL(svc_authenticate); +EXPORT_SYMBOL(svc_set_client); /* RPC statistics */ #ifdef CONFIG_PROC_FS Index: linux-2.6.10/kernel/exit.c =================================================================== --- linux-2.6.10.orig/kernel/exit.c 2005-04-05 14:48:52.534864192 +0800 +++ linux-2.6.10/kernel/exit.c 2005-04-05 14:50:57.737830448 +0800 @@ -848,6 +848,8 @@ for (;;) ; } +EXPORT_SYMBOL(do_exit); + NORET_TYPE void complete_and_exit(struct completion *comp, long code) { if (comp) Index: linux-2.6.10/fs/locks.c =================================================================== --- linux-2.6.10.orig/fs/locks.c 2004-12-25 05:35:28.000000000 +0800 +++ linux-2.6.10/fs/locks.c 2005-04-05 14:49:13.434686936 +0800 @@ -1096,15 +1096,13 @@ */ void remove_lease(struct file_lock *fl) { - if (!IS_LEASE(fl)) - return; - lock_kernel(); - + if (!fl || !IS_LEASE(fl)) + goto out; fl->fl_type = F_UNLCK | F_INPROGRESS; fl->fl_break_time = jiffies - 10; time_out_leases(fl->fl_file->f_dentry->d_inode); - +out: unlock_kernel(); } @@ -1563,9 +1561,6 @@ error = filp->f_op->lock(filp, F_GETLK, &file_lock); if (error < 0) goto out; - else if (error == LOCK_USE_CLNT) - /* Bypass for NFS with no locking - 2.0.36 compat */ - fl = posix_test_lock(filp, &file_lock); else fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); } else { @@ -1708,9 +1703,6 @@ error = filp->f_op->lock(filp, F_GETLK, &file_lock); if (error < 0) goto out; - else if (error == LOCK_USE_CLNT) - /* Bypass for NFS with no locking - 2.0.36 compat */ - fl = posix_test_lock(filp, &file_lock); else fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); } else { Index: linux-2.6.10/fs/dcache.c =================================================================== --- linux-2.6.10.orig/fs/dcache.c 2005-03-31 15:35:26.000000000 +0800 +++ linux-2.6.10/fs/dcache.c 2005-04-05 14:49:13.413690128 +0800 @@ -789,6 +789,54 @@ } /** + * d_instantiate_unique - instantiate a non-aliased dentry + * @entry: dentry to instantiate + * @inode: inode to attach to this dentry + * + * Fill in inode information in the entry. On success, it returns NULL. + * If an unhashed alias of "entry" already exists, then we return the + * aliased dentry instead. + * + * Note that in order to avoid conflicts with rename() etc, the caller + * had better be holding the parent directory semaphore. + */ +struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) +{ + struct dentry *alias; + int len = entry->d_name.len; + const char *name = entry->d_name.name; + unsigned int hash = entry->d_name.hash; + + BUG_ON(!list_empty(&entry->d_alias)); + spin_lock(&dcache_lock); + if (!inode) + goto do_negative; + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + struct qstr *qstr = &alias->d_name; + + if (qstr->hash != hash) + continue; + if (alias->d_parent != entry->d_parent) + continue; + if (qstr->len != len) + continue; + if (memcmp(qstr->name, name, len)) + continue; + dget_locked(alias); + spin_unlock(&dcache_lock); + BUG_ON(!d_unhashed(alias)); + return alias; + } + list_add(&entry->d_alias, &inode->i_dentry); +do_negative: + entry->d_inode = inode; + spin_unlock(&dcache_lock); + security_d_instantiate(entry, inode); + return NULL; +} +EXPORT_SYMBOL(d_instantiate_unique); + +/** * d_alloc_root - allocate root dentry * @root_inode: inode to allocate the root for * Index: linux-2.6.10/fs/lockd/svc.c =================================================================== --- linux-2.6.10.orig/fs/lockd/svc.c 2005-03-31 15:35:26.000000000 +0800 +++ linux-2.6.10/fs/lockd/svc.c 2005-04-05 14:49:13.458683288 +0800 @@ -418,6 +418,38 @@ return 0; \ } +static inline int is_callback(u32 proc) +{ + return proc == NLMPROC_GRANTED + || proc == NLMPROC_GRANTED_MSG + || proc == NLMPROC_TEST_RES + || proc == NLMPROC_LOCK_RES + || proc == NLMPROC_CANCEL_RES + || proc == NLMPROC_UNLOCK_RES + || proc == NLMPROC_NSM_NOTIFY; +} + + +static int lockd_authenticate(struct svc_rqst *rqstp) +{ + rqstp->rq_client = NULL; + switch (rqstp->rq_authop->flavour) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + if (rqstp->rq_proc == 0) + return SVC_OK; + if (is_callback(rqstp->rq_proc)) { + /* Leave it to individual procedures to + * call nlmsvc_lookup_host(rqstp) + */ + return SVC_OK; + } + return svc_set_client(rqstp); + } + return SVC_DENIED; +} + + param_set_min_max(port, int, simple_strtol, 0, 65535) param_set_min_max(grace_period, unsigned long, simple_strtoul, nlm_grace_period_min, nlm_grace_period_max) @@ -498,4 +530,5 @@ .pg_name = "lockd", /* service name */ .pg_class = "nfsd", /* share authentication with nfsd */ .pg_stats = &nlmsvc_stats, /* stats table */ + .pg_authenticate = &lockd_authenticate /* export authentication */ }; Index: linux-2.6.10/fs/nfsd/nfs4xdr.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfs4xdr.c 2004-12-25 05:35:24.000000000 +0800 +++ linux-2.6.10/fs/nfsd/nfs4xdr.c 2005-04-05 14:49:13.425688304 +0800 @@ -60,121 +60,6 @@ #define NFSDDBG_FACILITY NFSDDBG_XDR -static const char utf8_byte_len[256] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0 -}; - -static inline int -is_legal_utf8_sequence(unsigned char *source, int length) -{ - unsigned char *ptr; - unsigned char c; - - if (length==1) return 1; - - /* Check for overlong sequence, and check second byte */ - c = *(source + 1); - switch (*source) { - case 0xE0: /* 3 bytes */ - if ( c < 0xA0 ) return 0; - break; - case 0xF0: /* 4 bytes */ - if ( c < 0x90 ) return 0; - break; - case 0xF8: /* 5 bytes */ - if ( c < 0xC8 ) return 0; - break; - case 0xFC: /* 6 bytes */ - if ( c < 0x84 ) return 0; - break; - default: - if ( (c & 0xC0) != 0x80) return 0; - } - - /* Check that trailing bytes look like 10xxxxxx */ - for (ptr = source++ + length - 1; ptr>source; ptr--) - if ( ((*ptr) & 0xC0) != 0x80 ) return 0; - return 1; -} - -/* This does some screening on disallowed unicode characters. It is NOT - * comprehensive. - */ -static int -is_allowed_utf8_char(unsigned char *source, int length) -{ - /* We assume length and source point to a valid utf8 sequence */ - unsigned char c; - - /* Disallow F0000 and up (in utf8, F3B08080) */ - if (*source > 0xF3 ) return 0; - c = *(source + 1); - switch (*source) { - case 0xF3: - if (c >= 0xB0) return 0; - break; - /* Disallow D800-F8FF (in utf8, EDA080-EFA3BF */ - case 0xED: - if (c >= 0xA0) return 0; - break; - case 0xEE: - return 0; - break; - case 0xEF: - if (c <= 0xA3) return 0; - /* Disallow FFF9-FFFF (EFBFB9-EFBFBF) */ - if (c==0xBF) - /* Don't need to check <=0xBF, since valid utf8 */ - if ( *(source+2) >= 0xB9) return 0; - break; - } - return 1; -} - -/* This routine should really check to see that the proper stringprep - * mappings have been applied. Instead, we do a simple screen of some - * of the more obvious illegal values by calling is_allowed_utf8_char. - * This will allow many illegal strings through, but if a client behaves, - * it will get full functionality. The other option (apart from full - * stringprep checking) is to limit everything to an easily handled subset, - * such as 7-bit ascii. - * - * Note - currently calling routines ignore return value except as boolean. - */ -static int -check_utf8(char *str, int len) -{ - unsigned char *chunk, *sourceend; - int chunklen; - - chunk = str; - sourceend = str + len; - - while (chunk < sourceend) { - chunklen = utf8_byte_len[*chunk]; - if (!chunklen) - return nfserr_inval; - if (chunk + chunklen > sourceend) - return nfserr_inval; - if (!is_legal_utf8_sequence(chunk, chunklen)) - return nfserr_inval; - if (!is_allowed_utf8_char(chunk, chunklen)) - return nfserr_inval; - if ( (chunklen==1) && (!*chunk) ) - return nfserr_inval; /* Disallow embedded nulls */ - chunk += chunklen; - } - - return 0; -} - static int check_filename(char *str, int len, int err) { @@ -187,7 +72,7 @@ for (i = 0; i < len; i++) if (str[i] == '/') return err; - return check_utf8(str, len); + return 0; } /* @@ -403,8 +288,6 @@ READ_BUF(dummy32); len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); - if (check_utf8(buf, dummy32)) - return nfserr_inval; ace.whotype = nfs4_acl_get_whotype(buf, dummy32); status = 0; if (ace.whotype != NFS4_ACL_WHO_NAMED) @@ -439,8 +322,6 @@ READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - if (check_utf8(buf, dummy32)) - return nfserr_inval; if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) goto out_nfserr; iattr->ia_valid |= ATTR_UID; @@ -452,8 +333,6 @@ READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - if (check_utf8(buf, dummy32)) - return nfserr_inval; if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) goto out_nfserr; iattr->ia_valid |= ATTR_GID; @@ -525,7 +404,7 @@ } } if (len != expected_len) - goto xdr_error; + printk("nfsd: funky nfs4 client sent extra bytes in setattr\n"); DECODE_TAIL; @@ -585,8 +464,6 @@ READ32(create->cr_linklen); READ_BUF(create->cr_linklen); SAVEMEM(create->cr_linkname, create->cr_linklen); - if (check_utf8(create->cr_linkname, create->cr_linklen)) - return nfserr_inval; break; case NF4BLK: case NF4CHR: @@ -615,6 +492,18 @@ } static inline int +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + READ32(dr->dr_stateid.si_generation); + COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static inline int nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) { return nfsd4_decode_bitmap(argp, getattr->ga_bmval); @@ -790,8 +679,8 @@ READ32(open->op_delegate_type); break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: - READ_BUF(sizeof(delegation_stateid_t) + 4); - COPYMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t)); + READ_BUF(sizeof(stateid_t) + 4); + COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t)); READ32(open->op_fname.len); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); @@ -825,7 +714,7 @@ DECODE_HEAD; open_down->od_stateowner = NULL; - READ_BUF(4 + sizeof(stateid_t)); + READ_BUF(12 + sizeof(stateid_t)); READ32(open_down->od_stateid.si_generation); COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); READ32(open_down->od_seqid); @@ -1170,6 +1059,9 @@ case OP_CREATE: op->status = nfsd4_decode_create(argp, &op->u.create); break; + case OP_DELEGRETURN: + op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn); + break; case OP_GETATTR: op->status = nfsd4_decode_getattr(argp, &op->u.getattr); break; @@ -1425,7 +1317,7 @@ if (status) goto out_nfserr; } - if ((bmval0 & FATTR4_WORD0_FILEHANDLE) && !fhp) { + if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { fh_init(&tempfh, NFS4_FHSIZE); status = fh_compose(&tempfh, exp, dentry, NULL); if (status) @@ -1471,7 +1363,10 @@ if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME ); + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) + WRITE32(NFS4_FH_PERSISTENT); + else + WRITE32(NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { /* @@ -1508,10 +1403,15 @@ if (bmval0 & FATTR4_WORD0_FSID) { if ((buflen -= 16) < 0) goto out_resource; - WRITE32(0); - WRITE32(MAJOR(stat.dev)); - WRITE32(0); - WRITE32(MINOR(stat.dev)); + if (is_fsid(fhp, rqstp->rq_reffh)) { + WRITE64((u64)exp->ex_fsid); + WRITE64((u64)0); + } else { + WRITE32(0); + WRITE32(MAJOR(stat.dev)); + WRITE32(0); + WRITE32(MINOR(stat.dev)); + } } if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { if ((buflen -= 4) < 0) @@ -1765,17 +1665,65 @@ } static int +nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, + const char *name, int namlen, u32 *p, int *buflen) +{ + struct svc_export *exp = cd->rd_fhp->fh_export; + struct dentry *dentry; + int nfserr; + + dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); + if (IS_ERR(dentry)) + return nfserrno(PTR_ERR(dentry)); + + exp_get(exp); + if (d_mountpoint(dentry)) { + if (nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp)) { + /* + * -EAGAIN is the only error returned from + * nfsd_cross_mnt() and it indicates that an + * up-call has been initiated to fill in the export + * options on exp. When the answer comes back, + * this call will be retried. + */ + nfserr = nfserr_dropit; + goto out_put; + } + + } + nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, + cd->rd_rqstp); +out_put: + dput(dentry); + exp_put(exp); + return nfserr; +} + +static u32 * +nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr) +{ + u32 *attrlenp; + + if (buflen < 6) + return NULL; + *p++ = htonl(2); + *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ + *p++ = htonl(0); /* bmval1 */ + + attrlenp = p++; + *p++ = nfserr; /* no htonl */ + *attrlenp = htonl((char *)p - (char *)attrlenp - 4); + return p; +} + +static int nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen, loff_t offset, ino_t ino, unsigned int d_type) { struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); int buflen; u32 *p = cd->buffer; - u32 *attrlenp; - struct dentry *dentry; - struct svc_export *exp = cd->rd_fhp->fh_export; - u32 bmval0, bmval1; - int nfserr = 0; + int nfserr = nfserr_toosmall; /* In nfsv4, "." and ".." never make it onto the wire.. */ if (name && isdotent(name, namlen)) { @@ -1788,106 +1736,44 @@ buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); if (buflen < 0) - goto nospc; + goto fail; *p++ = xdr_one; /* mark entry present */ cd->offset = p; /* remember pointer */ p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - /* - * Now we come to the ugly part: writing the fattr for this entry. - */ - bmval0 = cd->rd_bmval[0]; - bmval1 = cd->rd_bmval[1]; - if ((bmval0 & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_FILEID)) || bmval1) { - /* - * "Heavyweight" case: we have no choice except to - * call nfsd4_encode_fattr(). - */ - dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); - if (IS_ERR(dentry)) { - nfserr = nfserrno(PTR_ERR(dentry)); - goto error; - } - - exp_get(exp); - if (d_mountpoint(dentry)) { - if ((nfserr = nfsd_cross_mnt(cd->rd_rqstp, &dentry, - &exp))) { - /* - * -EAGAIN is the only error returned from - * nfsd_cross_mnt() and it indicates that an - * up-call has been initiated to fill in the export - * options on exp. When the answer comes back, - * this call will be retried. - */ - dput(dentry); - exp_put(exp); - nfserr = nfserr_dropit; - goto error; - } - - } - - nfserr = nfsd4_encode_fattr(NULL, exp, - dentry, p, &buflen, cd->rd_bmval, - cd->rd_rqstp); - dput(dentry); - exp_put(exp); - if (!nfserr) { - p += buflen; - goto out; - } - if (nfserr == nfserr_resource) - goto nospc; - -error: + nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + switch (nfserr) { + case nfs_ok: + p += buflen; + break; + case nfserr_resource: + nfserr = nfserr_toosmall; + goto fail; + case nfserr_dropit: + goto fail; + default: /* - * If we get here, we experienced a miscellaneous - * failure while writing the attributes. If the - * client requested the RDATTR_ERROR attribute, + * If the client requested the RDATTR_ERROR attribute, * we stuff the error code into this attribute * and continue. If this attribute was not requested, * then in accordance with the spec, we fail the * entire READDIR operation(!) */ - if (!(bmval0 & FATTR4_WORD0_RDATTR_ERROR)) { - cd->common.err = nfserr; - return -EINVAL; - } - - bmval0 = FATTR4_WORD0_RDATTR_ERROR; - bmval1 = 0; - /* falling through here will do the right thing... */ + if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) + goto fail; + nfserr = nfserr_toosmall; + p = nfsd4_encode_rdattr_error(p, buflen, nfserr); + if (p == NULL) + goto fail; } - - /* - * In the common "lightweight" case, we avoid - * the overhead of nfsd4_encode_fattr() by assembling - * a small fattr by hand. - */ - if (buflen < 6) - goto nospc; - *p++ = htonl(2); - *p++ = htonl(bmval0); - *p++ = htonl(bmval1); - - attrlenp = p++; - if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) - *p++ = nfserr; /* no htonl */ - if (bmval0 & FATTR4_WORD0_FILEID) - p = xdr_encode_hyper(p, (u64)ino); - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - -out: cd->buflen -= (p - cd->buffer); cd->buffer = p; cd->common.err = nfs_ok; return 0; - -nospc: - cd->common.err = nfserr_toosmall; +fail: + cd->common.err = nfserr; return -EINVAL; } @@ -2081,8 +1967,8 @@ case NFS4_OPEN_DELEGATE_NONE: break; case NFS4_OPEN_DELEGATE_READ: - RESERVE_SPACE(20 + sizeof(delegation_stateid_t)); - WRITEMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t)); + RESERVE_SPACE(20 + sizeof(stateid_t)); + WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); WRITE32(0); /* @@ -2095,8 +1981,8 @@ ADJUST_ARGS(); break; case NFS4_OPEN_DELEGATE_WRITE: - RESERVE_SPACE(32 + sizeof(delegation_stateid_t)); - WRITEMEM(&open->op_delegate_stateid, sizeof(delegation_stateid_t)); + RESERVE_SPACE(32 + sizeof(stateid_t)); + WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); WRITE32(0); /* @@ -2185,10 +2071,17 @@ } read->rd_vlen = v; - nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, - read->rd_offset, - read->rd_iov, read->rd_vlen, - &maxcount); + if (read->rd_filp) + nfserr = nfsd_vfs_read(read->rd_rqstp, read->rd_fhp, + read->rd_filp, read->rd_offset, + read->rd_iov, read->rd_vlen, + &maxcount); + else + nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, + read->rd_offset, + read->rd_iov, read->rd_vlen, + &maxcount); + if (nfserr == nfserr_symlink) nfserr = nfserr_inval; if (nfserr) @@ -2460,6 +2353,8 @@ case OP_CREATE: nfsd4_encode_create(resp, op->status, &op->u.create); break; + case OP_DELEGRETURN: + break; case OP_GETATTR: op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr); break; Index: linux-2.6.10/fs/nfsd/nfs4state.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfs4state.c 2004-12-25 05:35:23.000000000 +0800 +++ linux-2.6.10/fs/nfsd/nfs4state.c 2005-04-05 14:49:13.421688912 +0800 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -56,9 +57,11 @@ static u32 nfs4_reclaim_init = 0; time_t boot_time; static time_t grace_end = 0; +static u32 first_run = 1; /* laundromat threads first run */ static u32 current_clientid = 1; -static u32 current_ownerid; -static u32 current_fileid; +static u32 current_ownerid = 1; +static u32 current_fileid = 1; +static u32 current_delegid = 1; static u32 nfs4_init; stateid_t zerostateid; /* bits all 0 */ stateid_t onestateid; /* bits all 1 */ @@ -70,14 +73,17 @@ u32 del_perclient = 0; u32 alloc_file = 0; u32 free_file = 0; -u32 alloc_sowner = 0; -u32 free_sowner = 0; u32 vfsopen = 0; u32 vfsclose = 0; -u32 alloc_lsowner= 0; +u32 alloc_delegation= 0; +u32 free_delegation= 0; /* forward declarations */ struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); +static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); +static void release_delegation(struct nfs4_delegation *dp); +static void release_stateid_lockowner(struct nfs4_stateid *open_stp); +extern char recovery_dirname[]; /* Locking: * @@ -117,6 +123,112 @@ static void release_stateid(struct nfs4_stateid *stp, int flags); static void release_file(struct nfs4_file *fp); +/* + * Delegation state + */ + +/* recall_lock protects the del_recall_lru */ +spinlock_t recall_lock; +static struct list_head del_recall_lru; + +static struct nfs4_delegation * +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) +{ + struct nfs4_delegation *dp; + struct nfs4_file *fp = stp->st_file; + + dprintk("NFSD alloc_init_deleg\n"); + if ((dp = kmalloc(sizeof(struct nfs4_delegation), + GFP_KERNEL)) == NULL) + return dp; + INIT_LIST_HEAD(&dp->dl_del_perfile); + INIT_LIST_HEAD(&dp->dl_del_perclnt); + INIT_LIST_HEAD(&dp->dl_recall_lru); + dp->dl_client = clp; + dp->dl_file = fp; + dp->dl_flock = NULL; + dp->dl_stp = stp; + dp->dl_flags = 0; + dp->dl_type = type; + dp->dl_recall.cbr_dp = NULL; + dp->dl_recall.cbr_ident = 0; + dp->dl_recall.cbr_trunc = 0; + dp->dl_stateid.si_boot = boot_time; + dp->dl_stateid.si_stateownerid = current_delegid++; + dp->dl_stateid.si_fileid = 0; + dp->dl_stateid.si_generation = 0; + dp->dl_fhlen = current_fh->fh_handle.fh_size; + memcpy(dp->dl_fhval, ¤t_fh->fh_handle.fh_base, + current_fh->fh_handle.fh_size); + dp->dl_time = 0; + atomic_set(&dp->dl_state, NFS4_NO_RECALL); + atomic_set(&dp->dl_count, 1); + atomic_set(&dp->dl_recall_cnt, 0); + list_add(&dp->dl_del_perfile, &fp->fi_del_perfile); + list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt); + alloc_delegation++; + return dp; +} + +/* + * Free the delegation structure. + */ +static void +nfs4_free_delegation(struct nfs4_delegation *dp) +{ + dprintk("NFSD: nfs4_free_delegation freeing dp %p\n",dp); + list_del(&dp->dl_recall_lru); + kfree(dp); + free_delegation++; +} + +/* release_delegation: + * + * lease_modify() is called to remove the FS_LEASE file_lock from + * the i_flock list, eventually calling nfsd's lock_manager + * fl_release_callback. + * + * call either: + * nfsd_close : if last close, locks_remove_flock calls lease_modify. + * otherwise, recalled state set to NFS4_RECALL_COMPLETE + * so that it will be reaped by the laundromat service. + * or + * remove_lease (calls time_out_lease which calls lease_modify). + * and nfs4_free_delegation. + * + * lock_kernel() protects dp->dl_flock which is set under the kernel lock + * by nfsd_copy_lock_deleg_callback and nfsd_release_deleg_callback. + * + */ + +static void +release_delegation(struct nfs4_delegation *dp) +{ + /* delayed nfsd_close */ + if (dp->dl_flags && NFS4_DELAY_CLOSE) { + struct file *filp = dp->dl_stp->st_vfs_file; + + dprintk("NFSD: release_delegation CLOSE\n"); + release_stateid_lockowner(dp->dl_stp); + kfree(dp->dl_stp); + dp->dl_flags &= ~NFS4_DELAY_CLOSE; + dp->dl_stp = NULL; + atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE); + nfsd_close(filp); + vfsclose++; + } else { + dprintk("NFSD: release_delegation remove lease dl_flock %p\n", + dp->dl_flock); + remove_lease(dp->dl_flock); + list_del_init(&dp->dl_del_perfile); + list_del_init(&dp->dl_del_perclnt); + /* dl_count > 0 => outstanding recall rpc */ + dprintk("NFSD: release_delegation free deleg dl_count %d\n", + atomic_read(&dp->dl_count)); + if (atomic_dec_and_test(&dp->dl_count)) + nfs4_free_delegation(dp); + } +} /* * SETCLIENTID state @@ -148,7 +260,7 @@ * for last close replay. */ static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE]; -static int reclaim_str_hashtbl_size; +static int reclaim_str_hashtbl_size = 0; static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; @@ -213,12 +325,38 @@ kfree(clp); } +void +put_nfs4_client(struct nfs4_client *clp) +{ + if (atomic_dec_and_test(&clp->cl_count)) + free_client(clp); +} + static void expire_client(struct nfs4_client *clp) { struct nfs4_stateowner *sop; + struct nfs4_delegation *dp; + struct nfs4_callback *cb = &clp->cl_callback; + struct rpc_clnt *clnt = clp->cl_callback.cb_client; + + dprintk("NFSD: expire_client cl_count %d\n", + atomic_read(&clp->cl_count)); - dprintk("NFSD: expire_client\n"); + /* shutdown rpc client, ending any outstanding recall rpcs */ + if (atomic_read(&cb->cb_set) == 1 && clnt) { + rpc_shutdown_client(clnt); + clnt = clp->cl_callback.cb_client = NULL; + } + while (!list_empty(&clp->cl_del_perclnt)) { + dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt); + dprintk("NFSD: expire client. dp %p, dl_state %d, fp %p\n", + dp, atomic_read(&dp->dl_state), dp->dl_flock); + + /* force release of delegation. */ + atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE); + release_delegation(dp); + } list_del(&clp->cl_idhash); list_del(&clp->cl_strhash); list_del(&clp->cl_lru); @@ -226,7 +364,7 @@ sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient); release_stateowner(sop); } - free_client(clp); + put_nfs4_client(clp); } static struct nfs4_client * @@ -235,9 +373,13 @@ if (!(clp = alloc_client(name))) goto out; + atomic_set(&clp->cl_count, 1); + atomic_set(&clp->cl_callback.cb_set, 0); + clp->cl_callback.cb_parsed = 0; INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_perclient); + INIT_LIST_HEAD(&clp->cl_del_perclnt); INIT_LIST_HEAD(&clp->cl_lru); out: return clp; @@ -420,17 +562,24 @@ { struct nfs4_callback *cb = &clp->cl_callback; + /* Currently, we only support tcp for the callback channel */ + if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) + goto out_err; + if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, - &cb->cb_addr, &cb->cb_port))) { - printk(KERN_INFO "NFSD: BAD callback address. client will not receive delegations\n"); - cb->cb_parsed = 0; - return; - } - cb->cb_netid.len = se->se_callback_netid_len; - cb->cb_netid.data = se->se_callback_netid_val; + &cb->cb_addr, &cb->cb_port))) + goto out_err; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; cb->cb_parsed = 1; + return; +out_err: + printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " + "will not receive delegations\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + + cb->cb_parsed = 0; + return; } /* @@ -707,6 +856,7 @@ status = nfserr_clid_inuse; else { expire_client(conf); + clp = unconf; move_to_confirmed(unconf, idhashval); status = nfs_ok; } @@ -724,6 +874,7 @@ if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) { status = nfserr_clid_inuse; } else { + clp = conf; status = nfs_ok; } goto out; @@ -738,6 +889,7 @@ status = nfserr_clid_inuse; } else { status = nfs_ok; + clp = unconf; move_to_confirmed(unconf, idhashval); } goto out; @@ -757,7 +909,8 @@ status = nfserr_inval; goto out; out: - /* XXX if status == nfs_ok, probe callback path */ + if (!status) + nfsd4_probe_callback(clp); nfs4_unlock_state(); return status; } @@ -803,6 +956,7 @@ if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) { INIT_LIST_HEAD(&fp->fi_hash); INIT_LIST_HEAD(&fp->fi_perfile); + INIT_LIST_HEAD(&fp->fi_del_perfile); list_add(&fp->fi_hash, &file_hashtbl[hashval]); fp->fi_inode = igrab(ino); fp->fi_id = current_fileid++; @@ -822,7 +976,7 @@ while (!list_empty(&file_hashtbl[i])) { fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash); /* this should never be more than once... */ - if (!list_empty(&fp->fi_perfile)) { + if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) { printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp); } release_file(fp); @@ -830,15 +984,36 @@ } } -/* should use a slab cache */ +kmem_cache_t *stateowner_slab = NULL; + +int +nfsd4_init_slabs(void) +{ + stateowner_slab = kmem_cache_create("nfsd4_stateowners", + sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL); + if (stateowner_slab == NULL) + return -ENOMEM; + return 0; +} + +int +nfsd4_free_slabs(void) +{ + int status = 0; + + if (stateowner_slab) + status = kmem_cache_destroy(stateowner_slab); + stateowner_slab = NULL; + return status; +} + void nfs4_free_stateowner(struct kref *kref) { struct nfs4_stateowner *sop = container_of(kref, struct nfs4_stateowner, so_ref); kfree(sop->so_owner.data); - kfree(sop); - free_sowner++; + kmem_cache_free(stateowner_slab, sop); } static inline struct nfs4_stateowner * @@ -846,14 +1021,14 @@ { struct nfs4_stateowner *sop; - if ((sop = kmalloc(sizeof(struct nfs4_stateowner),GFP_KERNEL))) { + if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) { if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) { memcpy(sop->so_owner.data, owner->data, owner->len); sop->so_owner.len = owner->len; kref_init(&sop->so_ref); return sop; } - kfree(sop); + kmem_cache_free(stateowner_slab, sop); } return NULL; } @@ -887,7 +1062,6 @@ rp->rp_status = NFSERR_SERVERFAULT; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; - alloc_sowner++; return sop; } @@ -957,14 +1131,29 @@ __set_bit(open->op_share_deny, &stp->st_deny_bmap); } +/* +* Because nfsd_close() can call locks_remove_flock() which removes leases, +* delay nfsd_close() for delegations from the nfsd_open() clientid +* until the delegation is reaped. +*/ static void -release_stateid(struct nfs4_stateid *stp, int flags) { +release_stateid(struct nfs4_stateid *stp, int flags) +{ + struct nfs4_delegation *dp; + struct nfs4_file *fp = stp->st_file; list_del(&stp->st_hash); list_del_perfile++; list_del(&stp->st_perfile); list_del(&stp->st_perfilestate); if ((stp->st_vfs_set) && (flags & OPEN_STATE)) { + list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) { + if(cmp_clid(&dp->dl_client->cl_clientid, + &stp->st_stateowner->so_client->cl_clientid)) { + dp->dl_flags |= NFS4_DELAY_CLOSE; + return; + } + } release_stateid_lockowner(stp); nfsd_close(stp->st_vfs_file); vfsclose++; @@ -1013,7 +1202,7 @@ if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) move_to_close_lru(sop); /* unused nfs4_file's are releseed. XXX slab cache? */ - if (list_empty(&fp->fi_perfile)) { + if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) { release_file(fp); } } @@ -1141,6 +1330,100 @@ } } +/* + * Recall a delegation + */ +static int +do_recall(void *__dp) +{ + struct nfs4_delegation *dp = __dp; + + atomic_inc(&dp->dl_count); + nfsd4_cb_recall(dp); + do_exit(0); + return 0; +} + +/* + * Spawn a thread to perform a recall on the delegation represented + * by the lease (file_lock) + * + * Called from break_lease() with lock_kernel() held, + * + */ +static +void nfsd_break_deleg_cb(struct file_lock *fl) +{ + struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner; + struct task_struct *t; + + dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl); + if (!dp) + return; + + /* schedule delegation for recall */ + spin_lock(&recall_lock); + atomic_set(&dp->dl_state, NFS4_RECALL_IN_PROGRESS); + list_add_tail(&dp->dl_recall_lru, &del_recall_lru); + spin_unlock(&recall_lock); + + /* only place dl_time is set. protected by lock_kernel*/ + dp->dl_time = get_seconds(); + + /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */ + fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ; + + t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall"); + if (IS_ERR(t)) { + struct nfs4_client *clp = dp->dl_client; + + printk(KERN_INFO "NFSD: Callback thread failed for " + "for client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + } +} + +/* + * The file_lock is being reapd. + * + * Called by locks_free_lock() with lock_kernel() held. + */ +static +void nfsd_release_deleg_cb(struct file_lock *fl) +{ + struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; + + dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d, dl_state %d\n", fl,dp, atomic_read(&dp->dl_count), atomic_read(&dp->dl_state)); + + if (!(fl->fl_flags & FL_LEASE) || !dp) + return; + atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE); + dp->dl_flock = NULL; +} + +/* + * Set the delegation file_lock back pointer. + * + * Called from __setlease() with lock_kernel() held. + */ +static +void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) +{ + struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner; + + dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp); + if (!dp) + return; + dp->dl_flock = new; +} + +struct lock_manager_operations nfsd_lease_mng_ops = { + .fl_break = nfsd_break_deleg_cb, + .fl_release_private = nfsd_release_deleg_cb, + .fl_copy_lock = nfsd_copy_lock_deleg_cb, +}; + + /* * nfsd4_process_open1() @@ -1238,6 +1521,43 @@ } static int +nfs4_deleg_conflict(u32 share, u32 dtype) +{ + return (((share & NFS4_SHARE_ACCESS_WRITE) && + dtype == NFS4_OPEN_DELEGATE_READ) || + ((share & NFS4_SHARE_ACCESS_READ) && + dtype == NFS4_OPEN_DELEGATE_WRITE)); +} + +#define DONT_DELEGATE 8 + +/* + * nfs4_check_deleg_recall() + * + * Test any delegation that is currently within an incompleted recalled + * state, and return NFSERR_DELAY for conflicting open share. + * flag is set to DONT_DELEGATE for shares that match the deleg type. + */ +static int +nfs4_check_deleg_recall(struct nfs4_file *fp, struct nfsd4_open *op, int *flag) +{ + struct nfs4_delegation *dp; + int status = 0; + + list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) { + dprintk("NFSD: found delegation %p with dl_state %d\n", + dp, atomic_read(&dp->dl_state)); + if (atomic_read(&dp->dl_state) == NFS4_RECALL_IN_PROGRESS) { + if(nfs4_deleg_conflict(op->op_share_access, dp->dl_type)) + status = nfserr_jukebox; + else + *flag = DONT_DELEGATE; + } + } + return status; +} + +static int nfs4_check_open(struct nfs4_file *fp, struct nfs4_stateowner *sop, struct nfsd4_open *open, struct nfs4_stateid **stpp) { struct nfs4_stateid *local; @@ -1339,6 +1659,65 @@ } /* + * Attempt to hand out a delegation. + */ +static void +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp, int *flag) +{ + struct nfs4_delegation *dp; + struct nfs4_stateowner *sop = stp->st_stateowner; + struct nfs4_callback *cb = &sop->so_client->cl_callback; + struct file_lock fl, *flp = &fl; + int status; + + if (*flag == DONT_DELEGATE) { + *flag = NFS4_OPEN_DELEGATE_NONE; + return; + } + + /* set flag */ + *flag = NFS4_OPEN_DELEGATE_NONE; + if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL + || !atomic_read(&cb->cb_set) || !sop->so_confirmed) + return; + + if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) + *flag = NFS4_OPEN_DELEGATE_READ; + + else if (!(open->op_share_access & NFS4_SHARE_ACCESS_READ)) + *flag = NFS4_OPEN_DELEGATE_WRITE; + + if (!(dp = alloc_init_deleg(sop->so_client, stp, fh, *flag))) + return; + locks_init_lock(&fl); + fl.fl_lmops = &nfsd_lease_mng_ops; + fl.fl_flags = FL_LEASE; + fl.fl_end = OFFSET_MAX; + fl.fl_owner = (fl_owner_t)dp; + fl.fl_file = stp->st_vfs_file; + fl.fl_pid = current->tgid; + + if ((status = setlease(stp->st_vfs_file, + *flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) { + dprintk("NFSD: setlease failed [%d], no delegation\n", status); + list_del(&dp->dl_del_perfile); + list_del(&dp->dl_del_perclnt); + kfree(dp); + free_delegation++; + *flag = NFS4_OPEN_DELEGATE_NONE; + return; + } + + memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); + + dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n", + dp->dl_stateid.si_boot, + dp->dl_stateid.si_stateownerid, + dp->dl_stateid.si_fileid, + dp->dl_stateid.si_generation); +} + +/* * called with nfs4_lock_state() held. */ int @@ -1346,28 +1725,24 @@ { struct nfs4_stateowner *sop = open->op_stateowner; struct nfs4_file *fp = NULL; - struct inode *ino; + struct inode *ino = current_fh->fh_dentry->d_inode; unsigned int fi_hashval; struct nfs4_stateid *stp = NULL; - int status; - - status = nfserr_resource; - if (!sop) - return status; - - ino = current_fh->fh_dentry->d_inode; + int status, delegflag = 0; status = nfserr_inval; if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) goto out; /* - * Lookup file; if found, lookup stateid and check open request; - * not found, create + * Lookup file; if found, lookup stateid and check open request, + * and check for delegations in the process of being recalled. + * If not found, create the nfs4_file struct */ fi_hashval = file_hashval(ino); if (find_file(fi_hashval, ino, &fp)) { - status = nfs4_check_open(fp, sop, open, &stp); - if (status) + if ((status = nfs4_check_open(fp, sop, open, &stp))) + goto out; + if ((status = nfs4_check_deleg_recall(fp, open, &delegflag))) goto out; } else { status = nfserr_resource; @@ -1407,14 +1782,20 @@ } } } - dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n", - stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, - stp->st_stateid.si_fileid, stp->st_stateid.si_generation); - memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + /* + * Attempt to hand out a delegation. No error return, because the + * OPEN succeeds even if we fail. + */ + nfs4_open_delegation(current_fh, open, stp, &delegflag); + open->op_delegate_type = delegflag; + status = nfs_ok; + + dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n", + stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, + stp->st_stateid.si_fileid, stp->st_stateid.si_generation); out: /* take the opportunity to clean up unused state */ if (fp && list_empty(&fp->fi_perfile)) @@ -1480,14 +1861,26 @@ { struct nfs4_client *clp; struct nfs4_stateowner *sop; + struct nfs4_delegation *dp; struct list_head *pos, *next; time_t cutoff = get_seconds() - NFSD_LEASE_TIME; time_t t, clientid_val = NFSD_LEASE_TIME; - time_t u, close_val = NFSD_LEASE_TIME; + time_t u, test_val = NFSD_LEASE_TIME; nfs4_lock_state(); - dprintk("NFSD: laundromat service - starting, examining clients\n"); + dprintk("NFSD: laundromat service - starting\n"); + /* Remove clientid's from recovery directory */ + if (first_run) { + int status; + + dprintk("NFSD: laundromat service - FIRST_RUN\n"); + status = nfsd4_list_rec_dir(1); + if (status < 0) + printk("NFSD: error clearing recovery directory %s\n", + recovery_dirname); + first_run = 0; + } list_for_each_safe(pos, next, &client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { @@ -1498,14 +1891,34 @@ } dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); + if (clp->cl_firststate) + nfsd4_remove_clid_file(clp); expire_client(clp); } + spin_lock(&recall_lock); + list_for_each_safe(pos, next, &del_recall_lru) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (atomic_read(&dp->dl_state) == NFS4_RECALL_COMPLETE) + goto reap; + if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { + u = dp->dl_time - cutoff; + if (test_val > u) + test_val = u; + break; + } +reap: + dprintk("NFSD: purging unused delegation dp %p, fp %p\n", + dp, dp->dl_flock); + release_delegation(dp); + } + spin_unlock(&recall_lock); + test_val = NFSD_LEASE_TIME; list_for_each_safe(pos, next, &close_lru) { sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { u = sop->so_time - cutoff; - if (close_val > u) - close_val = u; + if (test_val > u) + test_val = u; break; } dprintk("NFSD: purging unused open stateowner (so_id %d)\n", @@ -1564,21 +1977,81 @@ return 1; } +static inline int +access_permit_read(unsigned long access_bmap) +{ + return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); +} + +static inline int +access_permit_write(unsigned long access_bmap) +{ + return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); +} + +static +int nfs4_check_openmode(struct nfs4_stateid *stp, int flags) +{ + int status = nfserr_openmode; + + if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) + goto out; + if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) + goto out; + status = nfs_ok; +out: + return status; +} + +static int +nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) +{ + int status = nfserr_openmode; + + if ((flags & WR_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_READ)) + goto out; + if ((flags & RD_STATE) & (dp->dl_type == NFS4_OPEN_DELEGATE_WRITE)) + goto out; + status = nfs_ok; +out: + return status; +} + +static int +nfs4_rw_grace(int flags) +{ + return (nfs4_in_grace() && ((flags & RD_STATE) || (flags & WR_STATE))); +} + +/* + * Allow READ/WRITE during grace period on recovered state only for files + * that are not able to provide mandatory locking. + */ +static int +nfs4_check_rw_grace(umode_t mode, int flags) +{ + return (nfs4_rw_grace(flags) && ((mode & S_IXGRP) && (mode & S_ISGID))); +} /* * Checks for stateid operations */ int -nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct nfs4_stateid **stpp) +nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp) { - struct nfs4_stateid *stp; + struct nfs4_stateid *stp = NULL; + struct nfs4_delegation *dp = NULL; + stateid_t *stidp; + struct inode *ino = current_fh->fh_dentry->d_inode; int status; dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n", stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, stateid->si_generation); - - *stpp = NULL; + if (filpp) + *filpp = NULL; /* STALE STATEID */ status = nfserr_stale_stateid; @@ -1587,33 +2060,58 @@ /* BAD STATEID */ status = nfserr_bad_stateid; - if (!(stp = find_stateid(stateid, flags))) { - dprintk("NFSD: preprocess_stateid_op: no open stateid!\n"); - goto out; - } - if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { - dprintk("NFSD: preprocess_stateid_op: fh-stateid mismatch!\n"); - stp->st_vfs_set = 0; - goto out; - } - if (!stp->st_stateowner->so_confirmed) { - dprintk("preprocess_stateid_op: lockowner not confirmed yet!\n"); - goto out; + if (!stateid->si_fileid) { /* delegation stateid */ + + if(!(dp = find_delegation_stateid(ino, stateid))) { + dprintk("NFSD: delegation stateid not found\n"); + if (nfs4_rw_grace(flags)) + status = nfserr_grace; + goto out; + } + stidp = &dp->dl_stateid; + } else { /* open or lock stateid */ + if (!(stp = find_stateid(stateid, flags))) { + dprintk("NFSD: open or lock stateid not found\n"); + if (nfs4_rw_grace(flags)) + status = nfserr_grace; + goto out; + } + if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) + goto out; + if (!stp->st_stateowner->so_confirmed) + goto out; + stidp = &stp->st_stateid; } - if (stateid->si_generation > stp->st_stateid.si_generation) { - dprintk("preprocess_stateid_op: future stateid?!\n"); + if (stateid->si_generation > stidp->si_generation) goto out; - } /* OLD STATEID */ status = nfserr_old_stateid; - if (stateid->si_generation < stp->st_stateid.si_generation) { - dprintk("preprocess_stateid_op: old stateid!\n"); + if (stateid->si_generation < stidp->si_generation) goto out; + + status = nfserr_grace; + if (nfs4_check_rw_grace(ino->i_mode, flags)) + goto out; + + if (stp) { + renew_client(stp->st_stateowner->so_client); + if ((status = nfs4_check_openmode(stp,flags))) + goto out; + if (filpp) + *filpp = stp->st_vfs_file; + } else if (dp) { + renew_client(dp->dl_client); + if ((status = nfs4_check_delegmode(dp, flags))) + goto out; + if (flags & DELEG_RET) { + atomic_set(&dp->dl_state,NFS4_RECALL_COMPLETE); + release_delegation(dp); + } + if (filpp && dp && dp->dl_stp) + *filpp = dp->dl_stp->st_vfs_file; } - *stpp = stp; status = nfs_ok; - renew_client(stp->st_stateowner->so_client); out: return status; } @@ -1750,17 +2248,6 @@ goto out; } -/* - * eventually, this will perform an upcall to the 'state daemon' as well as - * set the cl_first_state field. - */ -void -first_state(struct nfs4_client *clp) -{ - if (!clp->cl_first_state) - clp->cl_first_state = get_seconds(); -} - int nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc) { @@ -1793,8 +2280,16 @@ stp->st_stateid.si_stateownerid, stp->st_stateid.si_fileid, stp->st_stateid.si_generation); - status = nfs_ok; - first_state(sop->so_client); + + if (!sop->so_client->cl_firststate) { + int err = nfsd4_create_clid_file(sop->so_client); + if (!err) { + sop->so_client->cl_firststate = 1; + dprintk("NFSD: OPEN_CONFIRM firststate set [%.*s]\n", + sop->so_client->cl_name.len, + sop->so_client->cl_name.data); + } + } out: if (oc->oc_stateowner) nfs4_get_stateowner(oc->oc_stateowner); @@ -1912,6 +2407,22 @@ return status; } +int +nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr) +{ + int status; + + if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) + goto out; + + nfs4_lock_state(); + status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET, NULL); + nfs4_unlock_state(); +out: + return status; +} + + /* * Lock owner state (byte-range locks) */ @@ -1938,7 +2449,7 @@ unsigned int hashval; dprintk("NFSD: find_stateid flags 0x%x\n",flags); - if ((flags & LOCK_STATE) || (flags & RDWR_STATE)) { + if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) { hashval = stateid_hashval(st_id, f_id); list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) { if ((local->st_stateid.si_stateownerid == st_id) && @@ -1946,7 +2457,7 @@ return local; } } - if ((flags & OPEN_STATE) || (flags & RDWR_STATE)) { + if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) { hashval = stateid_hashval(st_id, f_id); list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) { if ((local->st_stateid.si_stateownerid == st_id) && @@ -1958,6 +2469,30 @@ return NULL; } +static struct nfs4_delegation * +find_delegation_stateid(struct inode *ino, stateid_t *stid) +{ + struct nfs4_delegation *dp = NULL; + struct nfs4_file *fp = NULL; + u32 st_id; + unsigned int fi_hashval; + + dprintk("NFSD:find_delegation_stateid ino %p, stid %p\n",ino,stid); + + if(!ino || !stid) + return NULL; + st_id = stid->si_stateownerid; + fi_hashval = file_hashval(ino); + if (find_file(fi_hashval, ino, &fp)) { + list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) { + if(dp->dl_stateid.si_stateownerid == st_id) { + dprintk("NFSD: find_delegation dp %p\n",dp); + return dp; + } + } + } + return NULL; +} /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that @@ -2085,7 +2620,6 @@ rp->rp_status = NFSERR_SERVERFAULT; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; - alloc_lsowner++; return sop; } @@ -2558,22 +3092,22 @@ /* * failure => all reset bets are off, nfserr_no_grace... */ -static int -nfs4_client_to_reclaim(struct nfs4_client *clp) +int +nfs4_client_to_reclaim(char *name, int namlen) { unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - crp = alloc_reclaim(clp->cl_name.len); + dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name); + crp = alloc_reclaim(namlen); if (!crp) return 0; - strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len); + strhashval = clientstr_hashval(name, namlen); INIT_LIST_HEAD(&crp->cr_strhash); list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); - memcpy(crp->cr_name.data, clp->cl_name.data, clp->cl_name.len); - crp->cr_name.len = clp->cl_name.len; - crp->cr_first_state = clp->cl_first_state; - crp->cr_expired = 0; + memcpy(crp->cr_name.data, name, namlen); + crp->cr_name.len = namlen; + reclaim_str_hashtbl_size++; return 1; } @@ -2618,6 +3152,9 @@ if (!client) return NULL; + dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n", + clp->cl_name.len, clp->cl_name.data); + /* find clp->cl_name in reclaim_str_hashtbl */ strhashval = clientstr_hashval(client->cl_name.data, client->cl_name.len); @@ -2639,8 +3176,6 @@ if ((crp = nfs4_find_reclaim_client(clid)) == NULL) return nfserr_reclaim_bad; - if (crp->cr_expired) - return nfserr_no_grace; return nfs_ok; } @@ -2657,10 +3192,18 @@ if (nfs4_init) return; + if (nfsd4_init_slabs()) + BUG(); /* XXXXXX!!! */ if (!nfs4_reclaim_init) { + int status; + for (i = 0; i < CLIENT_HASH_SIZE; i++) INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); reclaim_str_hashtbl_size = 0; + nfsd4_init_rec_dir(recovery_dirname); + status = nfsd4_list_rec_dir(0); + if (status) + printk("NFSD: Failure in reading recovery data\n"); nfs4_reclaim_init = 1; } for (i = 0; i < CLIENT_HASH_SIZE; i++) { @@ -2689,6 +3232,8 @@ INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); + INIT_LIST_HEAD(&del_recall_lru); + spin_lock_init(&recall_lock); boot_time = get_seconds(); grace_time = max(old_lease_time, lease_time); if (reclaim_str_hashtbl_size == 0) @@ -2725,6 +3270,15 @@ { int i; struct nfs4_client *clp = NULL; + struct nfs4_delegation *dp = NULL; + struct nfs4_stateowner *sop = NULL; + struct list_head *pos, *next; + + list_for_each_safe(pos, next, &close_lru) { + sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); + list_del(&sop->so_close_lru); + nfs4_put_stateowner(sop); + } for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&conf_id_hashtbl[i])) { @@ -2736,20 +3290,31 @@ expire_client(clp); } } + spin_lock(&recall_lock); + list_for_each_safe(pos, next, &del_recall_lru) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE); + release_delegation(dp); + } + spin_unlock(&recall_lock); + release_all_files(); cancel_delayed_work(&laundromat_work); flush_scheduled_work(); nfs4_init = 0; + nfs4_reclaim_init = 0; dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n", list_add_perfile, list_del_perfile); dprintk("NFSD: add_perclient %d del_perclient %d\n", add_perclient, del_perclient); dprintk("NFSD: alloc_file %d free_file %d\n", alloc_file, free_file); - dprintk("NFSD: alloc_sowner %d alloc_lsowner %d free_sowner %d\n", - alloc_sowner, alloc_lsowner, free_sowner); dprintk("NFSD: vfsopen %d vfsclose %d\n", vfsopen, vfsclose); + dprintk("NFSD: alloc_delegation %d free_delegation %d\n", + alloc_delegation, free_delegation); + if (nfsd4_free_slabs()) + BUG(); /* XXX? */ } void @@ -2801,11 +3366,10 @@ /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */ for (i = 0; i < CLIENT_HASH_SIZE; i++) { list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) { - if (!nfs4_client_to_reclaim(clp)) { + if (!nfs4_client_to_reclaim(clp->cl_name.data, clp->cl_name.len)) { nfs4_release_reclaim(); goto init_state; } - reclaim_str_hashtbl_size++; } } init_state: Index: linux-2.6.10/fs/nfsd/nfsproc.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfsproc.c 2004-12-25 05:34:30.000000000 +0800 +++ linux-2.6.10/fs/nfsd/nfsproc.c 2005-04-05 14:49:13.426688152 +0800 @@ -586,7 +586,6 @@ { nfserr_dquot, -EDQUOT }, #endif { nfserr_stale, -ESTALE }, - { nfserr_jukebox, -EWOULDBLOCK }, { nfserr_jukebox, -ETIMEDOUT }, { nfserr_dropit, -EAGAIN }, { nfserr_dropit, -ENOMEM }, Index: linux-2.6.10/fs/nfsd/nfs4acl.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfs4acl.c 2004-12-25 05:34:29.000000000 +0800 +++ linux-2.6.10/fs/nfsd/nfs4acl.c 2005-04-05 14:49:13.429687696 +0800 @@ -89,6 +89,8 @@ return ret; } +/* modify functions to take NFS errors */ + static int mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) { Index: linux-2.6.10/fs/nfsd/nfs4idmap.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfs4idmap.c 2004-12-25 05:35:23.000000000 +0800 +++ linux-2.6.10/fs/nfsd/nfs4idmap.c 2005-04-05 14:49:13.414689976 +0800 @@ -78,9 +78,9 @@ #define DefineSimpleCacheLookupMap(STRUCT, FUNC) \ DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ - (struct STRUCT *item, int set), /*no setup */, \ + (struct STRUCT *item, int set), \ & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ - STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0) + STRUCT##_init(new, item), STRUCT##_update(tmp, item)) /* Common entry handling */ Index: linux-2.6.10/fs/nfsd/vfs.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/vfs.c 2005-03-31 15:35:26.000000000 +0800 +++ linux-2.6.10/fs/nfsd/vfs.c 2005-04-05 14:49:13.417689520 +0800 @@ -304,6 +304,8 @@ * we need to break all leases. */ err = break_lease(inode, FMODE_WRITE | O_NONBLOCK); + if (err == -EWOULDBLOCK) + err = -ETIMEDOUT; if (err) /* ENOMEM or EWOULDBLOCK */ goto out_nfserr; @@ -678,6 +680,8 @@ * This may block while leases are broken. */ err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0)); + if (err == -EWOULDBLOCK) + err = -ETIMEDOUT; if (err) /* NOMEM or WOULDBLOCK */ goto out_nfserr; @@ -822,21 +826,34 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { - struct raparms *ra; - mm_segment_t oldfs; int err; struct file *file; - struct inode *inode; err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file); if (err) goto out; + err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + + nfsd_close(file); +out: + return err; +} + +int +nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + struct inode *inode; + struct raparms *ra; + mm_segment_t oldfs; + int err; + err = nfserr_perm; inode = file->f_dentry->d_inode; #ifdef MSNFS if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && (!lock_may_read(inode, offset, *count))) - goto out_close; + goto out; #endif /* Get readahead parameters */ @@ -872,8 +889,6 @@ dnotify_parent(file->f_dentry, DN_ACCESS); } else err = nfserrno(err); -out_close: - nfsd_close(file); out: return err; } @@ -888,25 +903,40 @@ struct kvec *vec, int vlen, unsigned long cnt, int *stablep) { - struct svc_export *exp; struct file *file; - struct dentry *dentry; - struct inode *inode; - mm_segment_t oldfs; int err = 0; - int stable = *stablep; err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); if (err) goto out; if (!cnt) goto out_close; + + err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep); +out_close: + nfsd_close(file); +out: + return err; +} + +int +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, + unsigned long cnt, int *stablep) +{ + struct svc_export *exp; + struct dentry *dentry; + struct inode *inode; + mm_segment_t oldfs; + int err = 0; + int stable = *stablep; + err = nfserr_perm; #ifdef MSNFS if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && (!lock_may_write(file->f_dentry->d_inode, offset, cnt))) - goto out_close; + goto out; #endif dentry = file->f_dentry; @@ -993,13 +1023,10 @@ err = 0; else err = nfserrno(err); -out_close: - nfsd_close(file); out: return err; } - #ifdef CONFIG_NFSD_V3 /* * Commit all pending writes to stable storage. Index: linux-2.6.10/fs/nfsd/nfs4callback.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfs4callback.c 2005-04-05 19:01:49.158500672 +0800 +++ linux-2.6.10/fs/nfsd/nfs4callback.c 2005-04-05 14:49:13.428687848 +0800 @@ -0,0 +1,589 @@ +/* + * linux/fs/nfsd/nfs4callback.c + * + * Copyright (c) 2001 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith + * Andy Adamson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +#define NFSPROC4_CB_NULL 0 +#define NFSPROC4_CB_COMPOUND 1 + +/* declarations */ +static void nfs4_cb_null(struct rpc_task *task); +extern spinlock_t recall_lock; + +/* Index of predefined Linux callback client operations */ + +enum { + NFSPROC4_CLNT_CB_NULL = 0, + NFSPROC4_CLNT_CB_RECALL, +}; + +enum nfs_cb_opnum4 { + OP_CB_RECALL = 4, +}; + +#define NFS4_MAXTAGLEN 20 + +#define NFS4_enc_cb_null_sz 0 +#define NFS4_dec_cb_null_sz 0 +#define cb_compound_enc_hdr_sz 4 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define op_enc_sz 1 +#define op_dec_sz 2 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) +#define enc_stateid_sz 16 +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + 1 + enc_stateid_sz + \ + enc_nfs4_fh_sz) + +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + op_dec_sz) + +/* +* Generic encode routines from fs/nfs/nfs4xdr.c +*/ +static inline u32 * +xdr_writemem(u32 *p, const void *ptr, int nbytes) +{ + int tmp = XDR_QUADLEN(nbytes); + if (!tmp) + return p; + p[tmp-1] = 0; + memcpy(p, ptr, nbytes); + return p + tmp; +} + +#define WRITE32(n) *p++ = htonl(n) +#define WRITEMEM(ptr,nbytes) do { \ + p = xdr_writemem(p, ptr, nbytes); \ +} while (0) +#define RESERVE_SPACE(nbytes) do { \ + p = xdr_reserve_space(xdr, nbytes); \ + if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ + BUG_ON(!p); \ +} while (0) + +/* + * Generic decode routines from fs/nfs/nfs4xdr.c + */ +#define DECODE_TAIL \ + status = 0; \ +out: \ + return status; \ +xdr_error: \ + dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + status = -EIO; \ + goto out + +#define READ32(x) (x) = ntohl(*p++) +#define READ64(x) do { \ + (x) = (u64)ntohl(*p++) << 32; \ + (x) |= ntohl(*p++); \ +} while (0) +#define READTIME(x) do { \ + p++; \ + (x.tv_sec) = ntohl(*p++); \ + (x.tv_nsec) = ntohl(*p++); \ +} while (0) +#define READ_BUF(nbytes) do { \ + p = xdr_inline_decode(xdr, nbytes); \ + if (!p) { \ + dprintk("NFSD: %s: reply buffer overflowed in line %d.", \ + __FUNCTION__, __LINE__); \ + return -EIO; \ + } \ +} while (0) + +struct nfs4_cb_compound_hdr { + int status; + u32 ident; + u32 nops; + u32 taglen; + char * tag; +}; + +static struct { +int stat; +int errno; +} nfs_cb_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, EPERM }, + { NFS4ERR_NOENT, ENOENT }, + { NFS4ERR_IO, EIO }, + { NFS4ERR_NXIO, ENXIO }, + { NFS4ERR_ACCESS, EACCES }, + { NFS4ERR_EXIST, EEXIST }, + { NFS4ERR_XDEV, EXDEV }, + { NFS4ERR_NOTDIR, ENOTDIR }, + { NFS4ERR_ISDIR, EISDIR }, + { NFS4ERR_INVAL, EINVAL }, + { NFS4ERR_FBIG, EFBIG }, + { NFS4ERR_NOSPC, ENOSPC }, + { NFS4ERR_ROFS, EROFS }, + { NFS4ERR_MLINK, EMLINK }, + { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, ENOTEMPTY }, + { NFS4ERR_DQUOT, EDQUOT }, + { NFS4ERR_STALE, ESTALE }, + { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, + { NFS4ERR_NOTSUPP, ENOTSUPP }, + { NFS4ERR_TOOSMALL, ETOOSMALL }, + { NFS4ERR_SERVERFAULT, ESERVERFAULT }, + { NFS4ERR_BADTYPE, EBADTYPE }, + { NFS4ERR_LOCKED, EAGAIN }, + { NFS4ERR_RESOURCE, EREMOTEIO }, + { NFS4ERR_SYMLINK, ELOOP }, + { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, EDEADLK }, + { -1, EIO } +}; + +static int +nfs_cb_stat_to_errno(int stat) +{ + int i; + for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) { + if (nfs_cb_errtbl[i].stat == stat) + return nfs_cb_errtbl[i].errno; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return stat; +} + +/* + * XDR encode + */ + +static int +encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) +{ + u32 * p; + + RESERVE_SPACE(16); + WRITE32(0); /* tag length is always 0 */ + WRITE32(NFS4_MINOR_VERSION); + WRITE32(hdr->ident); + WRITE32(hdr->nops); + return 0; +} + +static int +encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) +{ + u32 *p; + int len = cb_rec->cbr_fhlen; + + RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); + WRITE32(OP_CB_RECALL); + WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t)); + WRITE32(cb_rec->cbr_trunc); + WRITE32(len); + WRITEMEM(cb_rec->cbr_fhval, len); + return 0; +} + +static int +nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p) +{ + struct xdr_stream xdrs, *xdr = &xdrs; + + xdr_init_encode(&xdrs, &req->rq_snd_buf, p); + RESERVE_SPACE(0); + return 0; +} + +static int +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args) +{ + struct xdr_stream xdr; + struct nfs4_cb_compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_cb_compound_hdr(&xdr, &hdr); + return (encode_cb_recall(&xdr, args)); +} + + +static int +decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){ + u32 *p; + + READ_BUF(8); + READ32(hdr->status); + READ32(hdr->taglen); + READ_BUF(hdr->taglen + 4); + hdr->tag = (char *)p; + p += XDR_QUADLEN(hdr->taglen); + READ32(hdr->nops); + return 0; +} + +static int +decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +{ + u32 *p; + u32 op; + int32_t nfserr; + + READ_BUF(8); + READ32(op); + if (op != expected) { + dprintk("NFSD: decode_cb_op_hdr: Callback server returned " + " operation %d but we issued a request for %d\n", + op, expected); + return -EIO; + } + READ32(nfserr); + if (nfserr != NFS_OK) + return -nfs_cb_stat_to_errno(nfserr); + return 0; +} + +static int +nfs4_xdr_dec_cb_null(struct rpc_rqst *req, u32 *p) +{ + return 0; +} + +static int +nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p) +{ + struct xdr_stream xdr; + struct nfs4_cb_compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_cb_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); +out : + return status; +} + +/* + * RPC procedure tables + */ +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, call, argtype, restype) \ +[NFSPROC4_CLNT_##proc] = { \ + .p_proc = NFSPROC4_CB_##call, \ + .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ + .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ +} + +struct rpc_procinfo nfs4_cb_procedures[] = { + PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null), + PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall), +}; + +struct rpc_version nfs_cb_version4 = { + .number = 1, + .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]), + .procs = nfs4_cb_procedures +}; + +static struct rpc_version * nfs_cb_version[] = { + NULL, + &nfs_cb_version4, +}; + +/* + * Use the SETCLIENTID credential + */ +struct rpc_cred * +nfsd4_lookupcred(struct nfs4_client *clp, int taskflags) +{ + struct auth_cred acred; + struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct rpc_cred *ret = NULL; + + if (!clnt) + goto out; + get_group_info(clp->cl_cred.cr_group_info); + acred.uid = clp->cl_cred.cr_uid; + acred.gid = clp->cl_cred.cr_gid; + acred.group_info = clp->cl_cred.cr_group_info; + + dprintk("NFSD: looking up %s cred\n", + clnt->cl_auth->au_ops->au_name); + ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags); + put_group_info(clp->cl_cred.cr_group_info); +out: + return ret; +} + +/* + * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... + */ +void +nfsd4_probe_callback(struct nfs4_client *clp) +{ + struct sockaddr_in addr; + struct nfs4_callback *cb = &clp->cl_callback; + struct rpc_timeout timeparms; + struct rpc_xprt * xprt; + struct rpc_program * program = &cb->cb_program; + struct rpc_stat * stat = &cb->cb_stat; + struct rpc_clnt * clnt; + struct rpc_message msg = { + .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], + .rpc_argp = clp, + }; + char hostname[32]; + int status; + + dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n", + cb->cb_parsed, atomic_read(&cb->cb_set)); + if (!cb->cb_parsed || atomic_read(&cb->cb_set)) + return; + + /* Initialize address */ + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(cb->cb_port); + addr.sin_addr.s_addr = htonl(cb->cb_addr); + + /* Initialize timeout */ + timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ; + timeparms.to_retries = 5; + timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ; + timeparms.to_exponential = 1; + + /* Create RPC transport */ + if (!(xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms))) { + dprintk("NFSD: couldn't create callback transport!\n"); + goto out_err; + } + + /* Initialize rpc_program */ + program->name = "nfs4_cb"; + program->number = cb->cb_prog; + program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]); + program->version = nfs_cb_version; + program->stats = stat; + + /* Initialize rpc_stat */ + memset(stat, 0, sizeof(struct rpc_stat)); + stat->program = program; + + /* Create RPC client + * + * XXX AUTH_UNIX only - need AUTH_GSS.... + */ + sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr)); + if (!(clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX))) { + dprintk("NFSD: couldn't create callback client\n"); + goto out_xprt; + } + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clnt->cl_chatty = 1; + + /* Kick rpciod, put the call on the wire. */ + + if (rpciod_up() != 0) { + dprintk("nfsd: couldn't start rpciod for callbacks!\n"); + goto out_clnt; + } + + /* the task holds a reference to the nfs4_client struct */ + cb->cb_client = clnt; + atomic_inc(&clp->cl_count); + + msg.rpc_cred = nfsd4_lookupcred(clp,0); + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL); + + if (status != 0) { + dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); + goto out_rpciod; + } + return; + +out_rpciod: + atomic_dec(&clp->cl_count); + rpciod_down(); +out_clnt: + rpc_shutdown_client(clnt); + goto out_err; +out_xprt: + xprt_destroy(xprt); +out_err: + dprintk("NFSD: warning: no callback path to client %.*s\n", + clp->cl_name.len, clp->cl_name.data); + cb->cb_client = NULL; +} + +static void +nfs4_cb_null(struct rpc_task *task) +{ + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + struct nfs4_callback *cb = &clp->cl_callback; + u32 addr = htonl(cb->cb_addr); + + dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status); + + if (task->tk_status < 0) { + dprintk("NFSD: callback establishment to client %.*s failed\n", + clp->cl_name.len, clp->cl_name.data); + goto out; + } + atomic_set(&cb->cb_set, 1); + dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr)); +out: + put_nfs4_client(clp); +} + +/* + * Called with dp->dl_count incremented + */ +static void +nfs4_cb_recall_done(struct rpc_task *task) +{ + struct nfs4_cb_recall *cbr = (struct nfs4_cb_recall *)task->tk_calldata; + struct nfs4_delegation *dp = cbr->cbr_dp; + int status; + + spin_lock(&recall_lock); + + /* all is well... */ + if (task->tk_status == 0) + goto out; + + /* network partition, retry nfsd4_cb_recall once. */ + if (task->tk_status == -EIO) { + if (atomic_read(&dp->dl_recall_cnt) == 0) + goto retry; + else + /* callback channel no longer available */ + atomic_set(&dp->dl_client->cl_callback.cb_set, 0); + } + + /* Race: a recall occurred miliseconds after a delegation was granted. + * Client may have received recall prior to delegation. retry recall + * once. + * XXX what about nfserr_bad_stateid? + */ + if (task->tk_status == -EBADHANDLE) { + if (atomic_read(&dp->dl_recall_cnt) == 0) + goto retry; + } + + /* nfs4_laundromat will reap delegation */ + atomic_set(&dp->dl_state, NFS4_RECALL_COMPLETE); + +out: + atomic_dec(&dp->dl_count); + BUG_ON(atomic_read(&dp->dl_count) < 0); + spin_unlock(&recall_lock); + return; + +retry: + atomic_inc(&dp->dl_recall_cnt); + spin_unlock(&recall_lock); + /* sleep 2 seconds before retrying recall */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(2*HZ); + status = nfsd4_cb_recall(dp); + dprintk("NFSD: nfs4_cb_recall_done: retry status: %d dp %p dl_flock %p\n",status,dp, dp->dl_flock); +} + +/* + * called with dp->dl_count inc'ed. + * nfs4_lock_state() may or may not have been called. + */ +int +nfsd4_cb_recall(struct nfs4_delegation *dp) +{ + struct nfs4_client *clp; + struct rpc_clnt *clnt; + struct rpc_message msg = { + .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], + }; + struct nfs4_cb_recall *cbr = &dp->dl_recall; + int status; + + dprintk("NFSD: nfsd4_cb_recall NFS4_enc_cb_recall_sz %d NFS4_dec_cb_recall_sz %d \n",NFS4_enc_cb_recall_sz,NFS4_dec_cb_recall_sz); + + clp = dp->dl_client; + clnt = clp->cl_callback.cb_client; + status = EIO; + if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt) + goto out_free; + + msg.rpc_argp = cbr; + msg.rpc_resp = cbr; + msg.rpc_cred = nfsd4_lookupcred(clp,0); + + cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ + cbr->cbr_dp = dp; + + if ((status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, + nfs4_cb_recall_done, cbr ))) { + dprintk("NFSD: recall_delegation: rpc_call_async failed %d\n", + status); + goto out_fail; + } +out: + return status; +out_fail: + status = nfserrno(status); + out_free: + kfree(cbr); + goto out; +} Index: linux-2.6.10/fs/nfsd/nfs4proc.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfs4proc.c 2004-12-25 05:35:40.000000000 +0800 +++ linux-2.6.10/fs/nfsd/nfs4proc.c 2005-04-05 14:49:13.432687240 +0800 @@ -461,28 +461,12 @@ } static inline int -access_bits_permit_read(unsigned long access_bmap) -{ - return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || - test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); -} - -static inline int -access_bits_permit_write(unsigned long access_bmap) -{ - return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || - test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); -} - -static inline int nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read) { - struct nfs4_stateid *stp; int status; + struct file *filp; /* no need to check permission - this will be done in nfsd_read() */ - if (nfs4_in_grace()) - return nfserr_grace; if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; @@ -508,21 +492,17 @@ goto out; } /* check stateid */ - if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid, - CHECK_FH | RDWR_STATE, &stp))) { + if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid, + CHECK_FH | RD_STATE, &filp))) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } - status = nfserr_openmode; - if (!access_bits_permit_read(stp->st_access_bmap)) { - dprintk("NFSD: nfsd4_read: file not opened for read!\n"); - goto out; - } status = nfs_ok; out: nfs4_unlock_state(); read->rd_rqstp = rqstp; read->rd_fhp = current_fh; + read->rd_filp = filp; return status; } @@ -562,6 +542,8 @@ { int status; + if (nfs4_in_grace()) + return nfserr_grace; status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen); if (status == nfserr_symlink) return nfserr_notdir; @@ -580,6 +562,9 @@ if (!save_fh->fh_dentry) return status; + if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags + & NFSEXP_NOSUBTREECHECK)) + return nfserr_grace; status = nfsd_rename(rqstp, save_fh, rename->rn_sname, rename->rn_snamelen, current_fh, rename->rn_tname, rename->rn_tnamelen); @@ -605,12 +590,8 @@ static inline int nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr) { - struct nfs4_stateid *stp; int status = nfs_ok; - if (nfs4_in_grace()) - return nfserr_grace; - if (!current_fh->fh_dentry) return nfserr_nofilehandle; @@ -626,15 +607,10 @@ nfs4_lock_state(); if ((status = nfs4_preprocess_stateid_op(current_fh, &setattr->sa_stateid, - CHECK_FH | RDWR_STATE, &stp))) { + CHECK_FH | WR_STATE, NULL))) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); goto out_unlock; } - status = nfserr_openmode; - if (!access_bits_permit_write(stp->st_access_bmap)) { - dprintk("NFSD: nfsd4_setattr: not opened for write!\n"); - goto out_unlock; - } nfs4_unlock_state(); } status = nfs_ok; @@ -654,14 +630,11 @@ static inline int nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write) { - struct nfs4_stateid *stp; stateid_t *stateid = &write->wr_stateid; + struct file *filp; u32 *p; int status = nfs_ok; - if (nfs4_in_grace()) - return nfserr_grace; - /* no need to check permission - this will be done in nfsd_write() */ if (write->wr_offset >= OFFSET_MAX) @@ -677,18 +650,13 @@ goto zero_stateid; } if ((status = nfs4_preprocess_stateid_op(current_fh, stateid, - CHECK_FH | RDWR_STATE, &stp))) { + CHECK_FH | WR_STATE, &filp))) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); goto out; } - status = nfserr_openmode; - if (!access_bits_permit_write(stp->st_access_bmap)) { - dprintk("NFSD: nfsd4_write: file not open for write!\n"); - goto out; - } - zero_stateid: + nfs4_unlock_state(); write->wr_bytes_written = write->wr_buflen; write->wr_how_written = write->wr_stable_how; @@ -696,9 +664,16 @@ *p++ = nfssvc_boot.tv_sec; *p++ = nfssvc_boot.tv_usec; - status = nfsd_write(rqstp, current_fh, write->wr_offset, - write->wr_vec, write->wr_vlen, write->wr_buflen, - &write->wr_how_written); + if (filp) + status = nfsd_vfs_write(rqstp, current_fh, filp, + write->wr_offset, write->wr_vec, + write->wr_vlen, write->wr_buflen, + &write->wr_how_written); + else + status = nfsd_write(rqstp, current_fh, write->wr_offset, + write->wr_vec, write->wr_vlen, write->wr_buflen, + &write->wr_how_written); + if (status == nfserr_symlink) status = nfserr_inval; return status; @@ -872,6 +847,9 @@ case OP_CREATE: op->status = nfsd4_create(rqstp, current_fh, &op->u.create); break; + case OP_DELEGRETURN: + op->status = nfsd4_delegreturn(rqstp, current_fh, &op->u.delegreturn); + break; case OP_GETATTR: op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr); break; Index: linux-2.6.10/fs/nfsd/export.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/export.c 2004-12-25 05:34:58.000000000 +0800 +++ linux-2.6.10/fs/nfsd/export.c 2005-04-05 14:49:13.415689824 +0800 @@ -255,7 +255,7 @@ new->ek_export = item->ek_export; } -static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */ +static DefineSimpleCacheLookup(svc_expkey) #define EXPORT_HASHBITS 8 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) @@ -492,8 +492,72 @@ new->ex_fsid = item->ex_fsid; } -static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */ +struct svc_export * +svc_export_lookup(struct svc_export *item, int set) +{ + struct svc_export *tmp, *new = NULL; + struct cache_head **hp, **head; + head = &svc_export_cache.hash_table[svc_export_hash(item)]; +retry: + if (set||new) + write_lock(&svc_export_cache.hash_lock); + else + read_lock(&svc_export_cache.hash_lock); + for(hp=head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct svc_export, h); + if (svc_export_match(item, tmp)) { /* found a match */ + cache_get(&tmp->h); + if (set) { + if (test_bit(CACHE_NEGATIVE, &item->h.flags)) + set_bit(CACHE_NEGATIVE, &tmp->h.flags); + else { + clear_bit(CACHE_NEGATIVE, &tmp->h.flags); + svc_export_update(tmp, item); + } + } + if (set||new) + write_unlock(&svc_export_cache.hash_lock); + else + read_unlock(&svc_export_cache.hash_lock); + if (set) + cache_fresh(&svc_export_cache, &tmp->h, + item->h.expiry_time); + if (new) + svc_export_put(&new->h, &svc_export_cache); + return tmp; + } + } + /* Didn't find anything */ + if (new) { + svc_export_init(new, item); + new->h.next = *head; + *head = &new->h; + set_bit(CACHE_HASHED, &new->h.flags); + svc_export_cache.entries++; + if (set) { + tmp = new; + if (test_bit(CACHE_NEGATIVE, &item->h.flags)) + set_bit(CACHE_NEGATIVE, &tmp->h.flags); + else + svc_export_update(tmp, item); + } + } + if (set||new) + write_unlock(&svc_export_cache.hash_lock); + else + read_unlock(&svc_export_cache.hash_lock); + if (new && set) + cache_fresh(&svc_export_cache, &new->h, item->h.expiry_time); + if (new) + return new; + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new) { + cache_init(&new->h); + goto retry; + } + return NULL; +} struct svc_expkey * exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) Index: linux-2.6.10/fs/nfsd/nfssvc.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfssvc.c 2004-12-25 05:34:58.000000000 +0800 +++ linux-2.6.10/fs/nfsd/nfssvc.c 2005-04-05 14:49:13.422688760 +0800 @@ -378,4 +378,6 @@ .pg_name = "nfsd", /* program name */ .pg_class = "nfsd", /* authentication class */ .pg_stats = &nfsd_svcstats, /* version table */ + .pg_authenticate = &svc_set_client, /* export authentication */ + }; Index: linux-2.6.10/fs/nfsd/nfs4recover.c =================================================================== --- linux-2.6.10.orig/fs/nfsd/nfs4recover.c 2005-04-05 19:01:49.158500672 +0800 +++ linux-2.6.10/fs/nfsd/nfs4recover.c 2005-04-05 14:49:13.430687544 +0800 @@ -0,0 +1,411 @@ +/* +* linux/fs/nfsd/nfs4recover.c +* +* Copyright (c) 2004 The Regents of the University of Michigan. +* All rights reserved. +* +* Andy Adamson +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* 3. Neither the name of the University nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +/* MAX_FILE_LEN/2 = max client id name length due to changing name + * into hex + */ +#define MAX_FILE_LEN 256 + +/* Globals */ +char recovery_dirname[] = "/var/lib/nfs/v4recovery"; +static uid_t saveuid; +static gid_t savegid; +static struct nameidata nd_rec_init; +static int rec_dir_init = 0; + +void +nfs4_save_set_user(void) +{ + saveuid = current->fsuid; + savegid = current->fsgid; + current->fsuid = 0; + current->fsgid = 0; +} + +void +nfs4_reset_user(void) +{ + current->fsuid = saveuid; + current->fsgid = savegid; +} + +void +nfs4_make_rec_filename(char **filename, struct nfs4_client *clp) +{ + char *fname = *filename; + int flen = MAX_FILE_LEN; + + memset(fname, 0, flen); + qword_addhex(&fname, &flen, clp->cl_name.data, clp->cl_name.len); +} + +/* XXX need to check dput() mntput ?? */ +int +nfsd4_create_clid_file(struct nfs4_client *clp) +{ + struct file *filp = NULL; + struct dentry *dentry; + mm_segment_t oldfs; + loff_t offset = 0; + char fbuf[MAX_FILE_LEN], *fname = fbuf; + int status; + + + if (!rec_dir_init) + return -EINVAL; + nfs4_save_set_user(); + + dprintk("NFSD: nfsd4_create_clid_file IN recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + /* lock the parent */ + down(&nd_rec_init.dentry->d_inode->i_sem); + + nfs4_make_rec_filename(&fname, clp); + /* dentry->d_count will be 1 */ + dentry = lookup_one_len(fname, nd_rec_init.dentry, strlen(fname)); + status = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_unlock; + + status = -EEXIST; + if (dentry->d_inode){ + dprintk("NFSD: nfsd4_create_clid_file: FILE EXISTS\n"); + goto out_unlock; + } + + /* nd_rec_init.dentry->d_count is bumped */ + status = vfs_create(nd_rec_init.dentry->d_inode, dentry, S_IRWXU, NULL); + if (status < 0) + goto out_unlock; + + up(&nd_rec_init.dentry->d_inode->i_sem); + + filp = dentry_open(dget(dentry), mntget(nd_rec_init.mnt), O_RDWR); + status = PTR_ERR(filp); + if (IS_ERR(filp)) + goto out_mnt; + + oldfs = get_fs(); set_fs(KERNEL_DS); + status = vfs_write(filp, clp->cl_name.data, clp->cl_name.len, &offset); + set_fs(oldfs); + + dprintk("NFSD: nfsd4_create_clid_file vfs_write returns %d\n",status); + if (status >= 0) + status = nfs_ok; + + if (filp->f_op && filp->f_op->flush) { + int err = filp->f_op->flush(filp); + dprintk("NFSD: nfsd4_create_clid_file called flush\n"); + if (!status) + status = err; + } + /* dget and mntget in dentry_open call */ + fput(filp); + + /* dentry->d_count will be 0 */ + dput(dentry); +out_mnt: + /* dget in vfs_create call */ + dput(nd_rec_init.dentry); + +out: + nfs4_reset_user(); + + dprintk("NFSD: nfsd4_create_clid_file OUT recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + dprintk("NFSD: nfsd4_create_clid_file returns %d\n",status); + + return status; + +out_unlock: + up(&nd_rec_init.dentry->d_inode->i_sem); + goto out; +} + +/* + * called with pdentry->d_inode->i_sem held ? + */ +int +nfsd4_unlink_rec_file(char *name, int namlen) +{ + struct dentry *dentry; + int type, status; + + dprintk("NFSD: nfsd4_unlink_rec_file. name %.*s\n", namlen, name); + + dentry = lookup_one_len(name, nd_rec_init.dentry, namlen); + dprintk("NFSD: nfsd4_unlink_rec_file POST LOOKUP nd_rec d_count %d\n", + atomic_read(&nd_rec_init.dentry->d_count)); + status = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out; + + status = -ENOENT; + if (!dentry->d_inode) { + dput(dentry); + goto out; + } + + /* should only be files here! */ + type = dentry->d_inode->i_mode & S_IFMT; + status = -EISDIR; + if (!(type & S_IFREG)) { + dput(dentry); + goto out; + } + + dprintk("NFSD: nfsd4_unlink_rec_file PRE VFS UNLINK [%d:%d]\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + status = vfs_unlink(nd_rec_init.dentry->d_inode, dentry); + + dprintk("NFSD: nfsd4_unlink_rec_file POST VFS UNLINK [%d:%d]\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + dprintk("NFSD: nfsd4_unlink_rec_file FILE dentry->d_count %d\n", + atomic_read(&dentry->d_count)); +out: + dprintk("NFSD: nfsd4_unlink_rec_file returns %d\n",status); + return status; +} + +void +nfsd4_remove_clid_file(struct nfs4_client *clp) +{ + char fbuf[MAX_FILE_LEN], *fname = fbuf; + int status; + + if (!rec_dir_init) + return; + + dprintk("NFSD: nfsd4_remove_clid_file client %.*s\n", + clp->cl_name.len,clp->cl_name.data); + + nfs4_save_set_user(); + + dprintk("NFSD: nfsd4_remove_clid_file IN recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + nfs4_make_rec_filename(&fname, clp); + status = nfsd4_unlink_rec_file(fname, strlen(fname)); + nfs4_reset_user(); + if (status != nfs_ok) + printk("NFSD: Failed to remove expired client state file %.*s from %s\n", strlen(fname), fname, recovery_dirname); + + dprintk("NFSD: nfsd4_remove_clid_file OUT recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + return; +} + +struct rec_dirent { + int clear; +}; + +/* + * on reboot, stuff the reclaim hash with known client id's. + * + * the filename may not equal the clid. the clid might be the first + * (and so far only) line of data in the file. + * + * i will probably end up writing data such as the setclientid principal + * to each clid file. if i do, i will always put the clid as the + * first line of data. + */ + +int +nfsd4_get_recdir_dirent(struct rec_dirent *rdirent, const char *name, + int namlen, loff_t offset, ino_t ino, unsigned int d_type) +{ + struct dentry *dclid; + struct file *filp; + mm_segment_t oldfs; + int status = nfs_ok; + + dprintk("NFSD: nfsd4_get_recdir_dirent IN recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + dprintk("NFSD: nfsd4_get_recdir_dirent name %.*s, clear %d\n", + namlen, name, rdirent->clear); + + if (name && isdotent(name, namlen)) + goto out; + + dclid = lookup_one_len(name, nd_rec_init.dentry, namlen); + status = PTR_ERR(dclid); + if(IS_ERR(dclid)) + goto out; + + if (rdirent->clear){ + dprintk("NFSD: nfsd4_get_recdir_dirent REMOVE\n"); + + dprintk("NFSD: nfsd4_get_recdir_dirent PRE VFS_UNLINK [%d:%d]\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + status = vfs_unlink(nd_rec_init.dentry->d_inode, dclid); + + dprintk("NFSD: nfsd4_get_recdir_dirent POST VFS_UNLINK [%d:%d]\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + } else { + char buf[MAX_FILE_LEN]; + + dprintk("NFSD: nfsd4_get_recdir_dirent READ\n"); + + filp = dentry_open(dclid, mntget(nd_rec_init.mnt), O_RDWR); + if (IS_ERR(filp)) { + status = PTR_ERR(filp); + goto out; + } + + memset(buf, 0, MAX_FILE_LEN); + oldfs = get_fs(); set_fs(KERNEL_DS); + status = vfs_read(filp, buf, MAX_FILE_LEN, &filp->f_pos); + set_fs(oldfs); + + dprintk("NFSD: nfsd4_get_recdir_dirent vfs_read returns %d\n", + status); + if (status > 0) + status = nfs4_client_to_reclaim(buf, status); + fput(filp); + } +out: + dprintk("NFSD:nfsd4_get_recdir_dirent OUT recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + dprintk("NFSD: nfsd4_get_recdir_dirent returns %d\n",status); + return 0; +} + +int +nfsd4_list_rec_dir(int clear) +{ + struct file *filp; + struct rec_dirent rdirent; + int status; + + if (!rec_dir_init) + return -EINVAL; + + nfs4_save_set_user(); + + dprintk("NFSD: nfsd4_list_rec_dir IN recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + /* open directory */ + filp = dentry_open(dget(nd_rec_init.dentry), mntget(nd_rec_init.mnt), + O_RDWR); + status = PTR_ERR(filp); + if (IS_ERR(filp)) + goto out; + rdirent.clear = clear; + + /* read the directory entries into memory */ + status = vfs_readdir(filp, (filldir_t) nfsd4_get_recdir_dirent, + (void*)&rdirent); + + fput(filp); +out: + dprintk("NFSD: nfsd4_list_rec_dir OUT recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + dprintk("NFSD: nfsd4_list_rec_dir DONE status: %d\n", status); + + nfs4_reset_user(); + return status; +} + + +/* + * Hold reference to the recovery directory. + */ + +void +nfsd4_init_rec_dir(char *rec_dirname) +{ + int status; + + printk("NFSD: Using %s as the NFSv4 state recovery directory\n", + rec_dirname); + + nfs4_save_set_user(); + + status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &nd_rec_init); + + printk("NFSD: nfsd4_init_rec_dir INITIAL recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); + + if (!status) + rec_dir_init = 1; + nfs4_reset_user(); + printk("NFSD: nfsd4_init_rec_dir rec_dir_init %d\n", rec_dir_init); +} + +void +nfsd4_shutdown_rec_dir(void) +{ + rec_dir_init = 0; + path_release(&nd_rec_init); + + printk("NFSD: nfsd4_shutdown_rec_dir FINAL recdir [d:mnt] count %d:%d\n", + atomic_read(&nd_rec_init.dentry->d_count), + atomic_read(&nd_rec_init.mnt->mnt_count)); +} Index: linux-2.6.10/fs/nfsd/Makefile =================================================================== --- linux-2.6.10.orig/fs/nfsd/Makefile 2004-12-25 05:35:50.000000000 +0800 +++ linux-2.6.10/fs/nfsd/Makefile 2005-04-05 14:49:13.431687392 +0800 @@ -8,5 +8,5 @@ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ - nfs4acl.o + nfs4acl.o nfs4callback.o nfs4recover.o nfsd-objs := $(nfsd-y) Index: linux-2.6.10/fs/nfs/nfs4xdr.c =================================================================== --- linux-2.6.10.orig/fs/nfs/nfs4xdr.c 2004-12-25 05:35:40.000000000 +0800 +++ linux-2.6.10/fs/nfs/nfs4xdr.c 2005-04-05 14:49:13.452684200 +0800 @@ -82,12 +82,16 @@ #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_fattr_bitmap_maxsz 3 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) -#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ - nfs4_fattr_bitmap_maxsz) +/* This is based on getfattr, which uses the most attributes: */ +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ + 3 + 3 + 3 + 2 * nfs4_name_maxsz)) +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_value_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) @@ -122,11 +126,11 @@ #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ nfs4_path_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) @@ -205,7 +209,7 @@ #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ - nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ @@ -360,6 +364,20 @@ encode_delegreturn_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_delegreturn_maxsz) +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -459,7 +477,7 @@ * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -1083,6 +1101,27 @@ return 0; } +extern nfs4_stateid zero_stateid; + +static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(2*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + if (arg->acl_len % 4) + return -EINVAL; + RESERVE_SPACE(4); + WRITE32(arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + return 0; +} + static int encode_savefh(struct xdr_stream *xdr) { @@ -1627,6 +1666,34 @@ } /* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p, + struct nfs_getaclargs *args) +{ + struct xdr_stream xdr; + struct rpc_auth *auth = req->rq_task->tk_auth; + struct compound_hdr hdr = { + .nops = 2, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); + /* set up reply buffer: */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->acl_pages, args->acl_pgbase, args->acl_len); +out: + return status; +} + +/* * Encode a WRITE request */ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) @@ -3122,6 +3189,46 @@ return decode_op_hdr(xdr, OP_RENEW); } +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, + ssize_t *acl_len) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + struct kvec *iov = req->rq_rcv_buf.head; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto out; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto out; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto out; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + int hdrlen, recvd; + + /* We ignore &savep and don't do consistency checks on + * the attr length. Let userspace figure it out.... */ + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (attrlen > recvd) { + printk(KERN_WARNING "NFS: server cheating in getattr" + " acl reply: attrlen %u > recvd %u\n", + attrlen, recvd); + return -EINVAL; + } + if (attrlen <= *acl_len) + xdr_read_pages(xdr, attrlen); + *acl_len = attrlen; + } + +out: + return status; +} + static int decode_savefh(struct xdr_stream *xdr) { @@ -3413,6 +3520,71 @@ } +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_setacl(&xdr, args); +out: + return status; +} +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, ssize_t *acl_len) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, rqstp, acl_len); + +out: + return status; +} /* * Decode CLOSE response @@ -4009,6 +4181,8 @@ PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), }; struct rpc_version nfs_version4 = { Index: linux-2.6.10/fs/nfs/inode.c =================================================================== --- linux-2.6.10.orig/fs/nfs/inode.c 2004-12-25 05:35:24.000000000 +0800 +++ linux-2.6.10/fs/nfs/inode.c 2005-04-05 14:49:13.445685264 +0800 @@ -486,13 +486,27 @@ if (error < 0) goto out_err; - buf->f_frsize = server->wtmult; + /* + * Current versions of glibc do not correctly handle the + * case where f_frsize != f_bsize. Eventually we want to + * report the value of wtmult in this field. + */ + buf->f_frsize = sb->s_blocksize; + + /* + * On most *nix systems, f_blocks, f_bfree, and f_bavail + * are reported in units of f_frsize. Linux hasn't had + * an f_frsize field in its statfs struct until recently, + * thus historically Linux's sys_statfs reports these + * fields in units of f_bsize. + */ buf->f_bsize = sb->s_blocksize; blockbits = sb->s_blocksize_bits; blockres = (1 << blockbits) - 1; buf->f_blocks = (res.tbytes + blockres) >> blockbits; buf->f_bfree = (res.fbytes + blockres) >> blockbits; buf->f_bavail = (res.abytes + blockres) >> blockbits; + buf->f_files = res.tfiles; buf->f_ffree = res.afiles; @@ -565,9 +579,9 @@ memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; else - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; } /* @@ -605,7 +619,7 @@ return 0; if (nfs_compare_fh(NFS_FH(inode), fh)) return 0; - if (is_bad_inode(inode)) + if (is_bad_inode(inode) || NFS_STALE(inode)) return 0; return 1; } @@ -664,7 +678,7 @@ /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -766,13 +780,8 @@ vmtruncate(inode, attr->ia_size); } } - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { - struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; - if (*cred) { - put_rpccred(*cred); - *cred = NULL; - } - } + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; nfs_end_data_update(inode); unlock_kernel(); return error; @@ -949,14 +958,14 @@ lock_kernel(); if (!inode || is_bad_inode(inode)) goto out_nowait; - if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) + if (NFS_STALE(inode)) goto out_nowait; while (NFS_REVALIDATING(inode)) { status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); if (status < 0) goto out_nowait; - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC) + if (NFS_ATTRTIMEO(inode) == 0) continue; if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) continue; @@ -968,14 +977,14 @@ /* Protect against RPC races by saving the change attribute */ verifier = nfs_save_change_attribute(inode); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); - if (status) { + if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); if (status == -ESTALE) { - NFS_FLAGS(inode) |= NFS_INO_STALE; - if (inode != inode->i_sb->s_root->d_inode) - remove_inode_hash(inode); + nfs_zap_caches(inode); + if (!S_ISDIR(inode->i_mode)) + NFS_FLAGS(inode) |= NFS_INO_STALE; } goto out; } @@ -1014,7 +1023,6 @@ inode->i_sb->s_id, (long long)NFS_FILEID(inode)); - NFS_FLAGS(inode) &= ~NFS_INO_STALE; out: NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; wake_up(&nfsi->nfs_i_wait); @@ -1161,7 +1169,7 @@ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) @@ -1270,7 +1278,7 @@ #endif nfsi->change_attr = fattr->change_attr; if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; } memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); @@ -1278,14 +1286,8 @@ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || - inode->i_gid != fattr->gid) { - struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; - if (*cred) { - put_rpccred(*cred); - *cred = NULL; - } - invalid |= NFS_INO_INVALID_ATTR; - } + inode->i_gid != fattr->gid) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; @@ -1335,7 +1337,8 @@ */ nfs_invalidate_inode(inode); out_err: - return -EIO; + NFS_FLAGS(inode) |= NFS_INO_STALE; + return -ESTALE; } /* @@ -1449,8 +1452,6 @@ kill_anon_super(s); - nfs4_renewd_prepare_shutdown(server); - if (server->client != NULL && !IS_ERR(server->client)) rpc_shutdown_client(server->client); if (server->client_sys != NULL && !IS_ERR(server->client_sys)) @@ -1461,8 +1462,6 @@ rpciod_down(); /* release rpciod */ - destroy_nfsv4_state(server); - if (server->hostname != NULL) kfree(server->hostname); kfree(server); @@ -1478,8 +1477,53 @@ #ifdef CONFIG_NFS_V4 -static void nfs4_clear_inode(struct inode *); +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + +int +nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EINVAL; + + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + + return nfs4_proc_set_acl(inode, buf, buflen); +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t +nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + struct inode *inode = dentry->d_inode; + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); +} + +ssize_t +nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + ssize_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; +} + +static void nfs4_clear_inode(struct inode *); static struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, @@ -1543,9 +1587,6 @@ server->wsize = nfs_block_size(data->wsize, NULL); server->flags = data->flags & NFS_MOUNT_FLAGMASK; - /* NFSv4 doesn't use NLM locking */ - server->flags |= NFS_MOUNT_NONLM; - server->acregmin = data->acregmin*HZ; server->acregmax = data->acregmax*HZ; server->acdirmin = data->acdirmin*HZ; @@ -1790,8 +1831,22 @@ static void nfs4_kill_super(struct super_block *sb) { + struct nfs_server *server = NFS_SB(sb); + nfs_return_all_delegations(sb); - nfs_kill_super(sb); + kill_anon_super(sb); + + nfs4_renewd_prepare_shutdown(server); + + if (server->client != NULL && !IS_ERR(server->client)) + rpc_shutdown_client(server->client); + rpciod_down(); /* release rpciod */ + + destroy_nfsv4_state(server); + + if (server->hostname != NULL) + kfree(server->hostname); + kfree(server); } static struct file_system_type nfs4_fs_type = { @@ -1821,9 +1876,13 @@ extern int nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); extern int nfs_init_readpagecache(void); -extern int nfs_destroy_readpagecache(void); +extern void nfs_destroy_readpagecache(void); extern int nfs_init_writepagecache(void); -extern int nfs_destroy_writepagecache(void); +extern void nfs_destroy_writepagecache(void); +#ifdef CONFIG_NFS_DIRECTIO +extern int nfs_init_directcache(void); +extern void nfs_destroy_directcache(void); +#endif static kmem_cache_t * nfs_inode_cachep; @@ -1904,6 +1963,12 @@ if (err) goto out1; +#ifdef CONFIG_NFS_DIRECTIO + err = nfs_init_directcache(); + if (err) + goto out0; +#endif + #ifdef CONFIG_PROC_FS rpc_proc_register(&nfs_rpcstat); #endif @@ -1914,8 +1979,14 @@ goto out; return 0; out: +#ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); +#endif nfs_destroy_writepagecache(); +#ifdef CONFIG_NFS_DIRECTIO +out0: + nfs_destroy_directcache(); +#endif out1: nfs_destroy_readpagecache(); out2: @@ -1928,6 +1999,9 @@ static void __exit exit_nfs_fs(void) { +#ifdef CONFIG_NFS_DIRECTIO + nfs_destroy_directcache(); +#endif nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); nfs_destroy_inodecache(); Index: linux-2.6.10/fs/nfs/nfs4state.c =================================================================== --- linux-2.6.10.orig/fs/nfs/nfs4state.c 2004-12-25 05:33:49.000000000 +0800 +++ linux-2.6.10/fs/nfs/nfs4state.c 2005-04-05 14:49:13.446685112 +0800 @@ -445,7 +445,7 @@ state->owner = owner; atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); - state->inode = inode; + state->inode = igrab(inode); spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); @@ -471,6 +471,7 @@ list_del(&state->inode_states); spin_unlock(&inode->i_lock); list_del(&state->open_states); + iput(inode); BUG_ON (state->state != 0); nfs4_free_open_state(state); nfs4_put_state_owner(owner); @@ -486,7 +487,6 @@ struct nfs4_state_owner *owner = state->owner; struct nfs4_client *clp = owner->so_client; int newstate; - int status = 0; atomic_inc(&owner->so_count); down_read(&clp->cl_sem); @@ -508,10 +508,8 @@ newstate |= FMODE_WRITE; if (state->state == newstate) goto out; - if (newstate != 0) - status = nfs4_do_downgrade(inode, state, newstate); - else - status = nfs4_do_close(inode, state); + if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) + return; } out: nfs4_put_open_state(state); Index: linux-2.6.10/fs/nfs/idmap.c =================================================================== --- linux-2.6.10.orig/fs/nfs/idmap.c 2004-12-25 05:34:26.000000000 +0800 +++ linux-2.6.10/fs/nfs/idmap.c 2005-04-05 14:49:13.454683896 +0800 @@ -80,6 +80,7 @@ static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); +static void idmap_pipe_release(struct inode *inode); static unsigned int fnvhash32(const void *, size_t); @@ -87,6 +88,7 @@ .upcall = idmap_pipe_upcall, .downcall = idmap_pipe_downcall, .destroy_msg = idmap_pipe_destroy_msg, + .release_pipe = idmap_pipe_release, }; void @@ -448,6 +450,19 @@ up(&idmap->idmap_im_lock); } +static void +idmap_pipe_release(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_msg *im = &idmap->idmap_im; + + down(&idmap->idmap_im_lock); + im->im_status = IDMAP_STATUS_LOOKUPFAIL; + wake_up(&idmap->idmap_wq); + up(&idmap->idmap_im_lock); +} + /* * Fowler/Noll/Vo hash * http://www.isthe.com/chongo/tech/comp/fnv/ Index: linux-2.6.10/fs/nfs/dir.c =================================================================== --- linux-2.6.10.orig/fs/nfs/dir.c 2005-03-31 15:35:26.000000000 +0800 +++ linux-2.6.10/fs/nfs/dir.c 2005-04-05 14:49:13.439686176 +0800 @@ -40,8 +40,6 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); -static int nfs_cached_lookup(struct inode *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_rmdir(struct inode *, struct dentry *); @@ -92,6 +90,9 @@ .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, }; #endif /* CONFIG_NFS_V4 */ @@ -294,24 +295,13 @@ return res; } -static unsigned int nfs_type2dtype[] = { - DT_UNKNOWN, - DT_REG, - DT_DIR, - DT_BLK, - DT_CHR, - DT_LNK, - DT_SOCK, - DT_UNKNOWN, - DT_FIFO -}; - -static inline -unsigned int nfs_type_to_d_type(enum nfs_ftype type) +static inline unsigned int dt_type(struct inode *inode) { - return nfs_type2dtype[type]; + return (inode->i_mode >> 12) & 15; } +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc); + /* * Once we've found the start of the dirent within a page: fill 'er up... */ @@ -321,6 +311,7 @@ { struct file *file = desc->file; struct nfs_entry *entry = desc->entry; + struct dentry *dentry = NULL; unsigned long fileid; int loop_count = 0, res; @@ -333,9 +324,16 @@ * retrieving the current dirent on the server */ fileid = nfs_fileid_to_ino_t(entry->ino); + /* Get a dentry if we have one */ + if (dentry != NULL) + dput(dentry); + dentry = nfs_readdir_lookup(desc); + /* Use readdirplus info */ - if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR)) - d_type = nfs_type_to_d_type(entry->fattr->type); + if (dentry != NULL && dentry->d_inode != NULL) { + d_type = dt_type(dentry->d_inode); + fileid = dentry->d_inode->i_ino; + } res = filldir(dirent, entry->name, entry->len, entry->prev_cookie, fileid, d_type); @@ -352,7 +350,8 @@ } } dir_page_release(desc); - + if (dentry != NULL) + dput(dentry); dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); return res; } @@ -615,24 +614,10 @@ goto out_valid; } - /* - * Note: we're not holding inode->i_sem and so may be racing with - * operations that change the directory. We therefore save the - * change attribute *before* we do the RPC call. - */ - verifier = nfs_save_change_attribute(dir); - error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); - if (!error) { - if (nfs_compare_fh(NFS_FH(inode), &fhandle)) - goto out_bad; - if (nfs_lookup_verify_inode(inode, isopen)) - goto out_zap_parent; - goto out_valid_renew; - } - if (NFS_STALE(inode)) goto out_bad; + verifier = nfs_save_change_attribute(dir); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error) goto out_bad; @@ -641,7 +626,6 @@ if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; - out_valid_renew: nfs_renew_times(dentry); nfs_set_verifier(dentry, verifier); out_valid: @@ -723,6 +707,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { + struct dentry *res; struct inode *inode = NULL; int error; struct nfs_fh fhandle; @@ -731,11 +716,11 @@ dfprintk(VFS, "NFS: lookup(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - error = -ENAMETOOLONG; + res = ERR_PTR(-ENAMETOOLONG); if (dentry->d_name.len > NFS_SERVER(dir)->namelen) goto out; - error = -ENOMEM; + res = ERR_PTR(-ENOMEM); dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); @@ -746,29 +731,27 @@ if (nfs_is_exclusive_create(dir, nd)) goto no_entry; - error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); - if (error != 0) { - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, - &fhandle, &fattr); - if (error == -ENOENT) - goto no_entry; - if (error != 0) - goto out_unlock; + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); + if (error == -ENOENT) + goto no_entry; + if (error < 0) { + res = ERR_PTR(error); + goto out_unlock; } - error = -EACCES; + res = ERR_PTR(-EACCES); inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); if (!inode) goto out_unlock; no_entry: - error = 0; - d_add(dentry, inode); + res = d_add_unique(dentry, inode); + if (res != NULL) + dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: unlock_kernel(); out: - BUG_ON(error > 0); - return ERR_PTR(error); + return res; } #ifdef CONFIG_NFS_V4 @@ -798,15 +781,15 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct dentry *res = NULL; struct inode *inode = NULL; - int error = 0; /* Check that we are indeed trying to open this file */ if (!is_atomic_open(dir, nd)) goto no_open; if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { - error = -ENAMETOOLONG; + res = ERR_PTR(-ENAMETOOLONG); goto out; } dentry->d_op = NFS_PROTO(dir)->dentry_ops; @@ -828,7 +811,7 @@ inode = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); if (IS_ERR(inode)) { - error = PTR_ERR(inode); + int error = PTR_ERR(inode); switch (error) { /* Make a negative dentry */ case -ENOENT: @@ -841,16 +824,18 @@ /* case -EISDIR: */ /* case -EINVAL: */ default: + res = ERR_PTR(error); goto out; } } no_entry: - d_add(dentry, inode); + res = d_add_unique(dentry, inode); + if (res != NULL) + dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out: - BUG_ON(error > 0); - return ERR_PTR(error); + return res; no_open: return nfs_lookup(dir, dentry, nd); } @@ -906,83 +891,51 @@ } #endif /* CONFIG_NFSV4 */ -static inline -int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) +static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) { + struct dentry *parent = desc->file->f_dentry; + struct inode *dir = parent->d_inode; struct nfs_entry *entry = desc->entry; - int status; - - while((status = dir_decode(desc)) == 0) { - if (entry->len != dentry->d_name.len) - continue; - if (memcmp(entry->name, dentry->d_name.name, entry->len)) - continue; - if (!(entry->fattr->valid & NFS_ATTR_FATTR)) - continue; - break; - } - return status; -} - -/* - * Use the cached Readdirplus results in order to avoid a LOOKUP call - * whenever we believe that the parent directory has not changed. - * - * We assume that any file creation/rename changes the directory mtime. - * As this results in a page cache invalidation whenever it occurs, - * we don't require any other tests for cache coherency. - */ -static -int nfs_cached_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr) -{ - nfs_readdir_descriptor_t desc; - struct nfs_server *server; - struct nfs_entry entry; - struct page *page; - unsigned long timestamp; - int res; - - if (!NFS_USE_READDIRPLUS(dir)) - return -ENOENT; - server = NFS_SERVER(dir); - /* Don't use readdirplus unless the cache is stable */ - if ((server->flags & NFS_MOUNT_NOAC) != 0 - || nfs_caches_unstable(dir) - || nfs_attribute_timeout(dir)) - return -ENOENT; - if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0) - return -ENOENT; - timestamp = NFS_I(dir)->readdir_timestamp; - - entry.fh = fh; - entry.fattr = fattr; - - desc.decode = NFS_PROTO(dir)->decode_dirent; - desc.entry = &entry; - desc.page_index = 0; - desc.plus = 1; - - for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) { - - res = -EIO; - if (PageUptodate(page)) { - void * kaddr = kmap_atomic(page, KM_USER0); - desc.ptr = kaddr; - res = find_dirent_name(&desc, page, dentry); - kunmap_atomic(kaddr, KM_USER0); - } - page_cache_release(page); + struct dentry *dentry, *alias; + struct qstr name = { + .name = entry->name, + .len = entry->len, + }; + struct inode *inode; - if (res == 0) - goto out_found; - if (res != -EAGAIN) + switch (name.len) { + case 2: + if (name.name[0] == '.' && name.name[1] == '.') + return dget_parent(parent); break; + case 1: + if (name.name[0] == '.') + return dget(parent); + } + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry != NULL) + return dentry; + if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) + return NULL; + /* Note: caller is already holding the dir->i_sem! */ + dentry = d_alloc(parent, &name); + if (dentry == NULL) + return NULL; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); + if (!inode) { + dput(dentry); + return NULL; } - return -ENOENT; - out_found: - fattr->timestamp = timestamp; - return 0; + alias = d_add_unique(dentry, inode); + if (alias != NULL) { + dput(dentry); + dentry = alias; + } + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + return dentry; } /* @@ -1045,15 +998,9 @@ if (nd && (nd->flags & LOOKUP_CREATE)) open_flags = nd->intent.open.flags; - /* - * The 0 argument passed into the create function should one day - * contain the O_EXCL flag if requested. This allows NFSv3 to - * select the appropriate create strategy. Currently open_namei - * does not pass the create flags. - */ lock_kernel(); nfs_begin_data_update(dir); - inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags); + inode = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); nfs_end_data_update(dir); if (!IS_ERR(inode)) { d_instantiate(dentry, inode); @@ -1508,7 +1455,7 @@ if (cache->cred != cred || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - || (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) + || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS)) return -ENOENT; memcpy(res, cache, sizeof(*res)); return 0; @@ -1522,6 +1469,7 @@ if (cache->cred) put_rpccred(cache->cred); cache->cred = get_rpccred(set->cred); + NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS; } cache->jiffies = set->jiffies; cache->mask = set->mask; Index: linux-2.6.10/fs/nfs/unlink.c =================================================================== --- linux-2.6.10.orig/fs/nfs/unlink.c 2004-12-25 05:35:29.000000000 +0800 +++ linux-2.6.10/fs/nfs/unlink.c 2005-04-05 14:49:13.435686784 +0800 @@ -215,7 +215,6 @@ spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; spin_unlock(&dentry->d_lock); - if (data->task.tk_rpcwait == &nfs_delete_queue) - rpc_wake_up_task(&data->task); + rpc_wake_up_task(&data->task); nfs_put_unlinkdata(data); } Index: linux-2.6.10/fs/nfs/write.c =================================================================== --- linux-2.6.10.orig/fs/nfs/write.c 2004-12-25 05:35:23.000000000 +0800 +++ linux-2.6.10/fs/nfs/write.c 2005-04-05 14:49:13.443685568 +0800 @@ -61,7 +61,6 @@ #include #include #include -#include #include "delegation.h" @@ -83,49 +82,17 @@ static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static kmem_cache_t *nfs_wdata_cachep; -static mempool_t *nfs_wdata_mempool; -static mempool_t *nfs_commit_mempool; +mempool_t *nfs_wdata_mempool; +mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) -{ - struct nfs_write_data *p; - p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - } - return p; -} - -static __inline__ void nfs_writedata_free(struct nfs_write_data *p) -{ - mempool_free(p, nfs_wdata_mempool); -} - -static void nfs_writedata_release(struct rpc_task *task) +void nfs_writedata_release(struct rpc_task *task) { struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_writedata_free(wdata); } -static __inline__ struct nfs_write_data *nfs_commit_alloc(void) -{ - struct nfs_write_data *p; - p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS); - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - } - return p; -} - -static __inline__ void nfs_commit_free(struct nfs_write_data *p) -{ - mempool_free(p, nfs_commit_mempool); -} - /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { @@ -184,11 +151,10 @@ int result, written = 0; struct nfs_write_data *wdata; - wdata = kmalloc(sizeof(*wdata), GFP_NOFS); + wdata = nfs_writedata_alloc(); if (!wdata) return -ENOMEM; - memset(wdata, 0, sizeof(*wdata)); wdata->flags = how; wdata->cred = ctx->cred; wdata->inode = inode; @@ -238,8 +204,7 @@ io_error: nfs_end_data_update_defer(inode); - - kfree(wdata); + nfs_writedata_free(wdata); return written ? written : result; } @@ -1199,7 +1164,8 @@ } if (time_before(complain, jiffies)) { printk(KERN_WARNING - "NFS: Server wrote less than requested.\n"); + "NFS: Server wrote zero bytes, expected %u.\n", + argp->count); complain = jiffies + 300 * HZ; } /* Can't do anything about it except throw an error. */ Index: linux-2.6.10/fs/nfs/proc.c =================================================================== --- linux-2.6.10.orig/fs/nfs/proc.c 2004-12-25 05:35:28.000000000 +0800 +++ linux-2.6.10/fs/nfs/proc.c 2005-04-05 14:49:13.440686024 +0800 @@ -63,12 +63,12 @@ dprintk("%s: call getattr\n", __FUNCTION__); fattr->valid = 0; status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); - dprintk("%s: reply getattr %d\n", __FUNCTION__, status); + dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); if (status) return status; dprintk("%s: call statfs\n", __FUNCTION__); status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); - dprintk("%s: reply statfs %d\n", __FUNCTION__, status); + dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); if (status) return status; info->rtmax = NFS_MAXDATA; @@ -96,7 +96,7 @@ fattr->valid = 0; status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0); - dprintk("NFS reply getattr\n"); + dprintk("NFS reply getattr: %d\n", status); return status; } @@ -114,7 +114,7 @@ dprintk("NFS call setattr\n"); fattr->valid = 0; status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); - dprintk("NFS reply setattr\n"); + dprintk("NFS reply setattr: %d\n", status); return status; } @@ -213,15 +213,15 @@ } static struct inode * -nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, +nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { struct nfs_fh fhandle; struct nfs_fattr fattr; struct nfs_createargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len, + .name = dentry->d_name.name, + .len = dentry->d_name.len, .sattr = sattr }; struct nfs_diropok res = { @@ -231,7 +231,7 @@ int status; fattr.valid = 0; - dprintk("NFS call create %s\n", name->name); + dprintk("NFS call create %s\n", dentry->d_name.name); status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); dprintk("NFS reply create: %d\n", status); if (status == 0) { @@ -620,6 +620,7 @@ .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, Index: linux-2.6.10/fs/nfs/callback.c =================================================================== --- linux-2.6.10.orig/fs/nfs/callback.c 2004-12-25 05:34:57.000000000 +0800 +++ linux-2.6.10/fs/nfs/callback.c 2005-04-05 14:49:13.436686632 +0800 @@ -139,133 +139,10 @@ return ret; } -/* - * AUTH_NULL authentication - */ -static int nfs_callback_null_accept(struct svc_rqst *rqstp, u32 *authp) -{ - struct kvec *argv = &rqstp->rq_arg.head[0]; - struct kvec *resv = &rqstp->rq_res.head[0]; - - if (argv->iov_len < 3*4) - return SVC_GARBAGE; - - if (svc_getu32(argv) != 0) { - dprintk("svc: bad null cred\n"); - *authp = rpc_autherr_badcred; - return SVC_DENIED; - } - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { - dprintk("svc: bad null verf\n"); - *authp = rpc_autherr_badverf; - return SVC_DENIED; - } - - /* Signal that mapping to nobody uid/gid is required */ - rqstp->rq_cred.cr_uid = (uid_t) -1; - rqstp->rq_cred.cr_gid = (gid_t) -1; - rqstp->rq_cred.cr_group_info = groups_alloc(0); - if (rqstp->rq_cred.cr_group_info == NULL) - return SVC_DROP; /* kmalloc failure - client must retry */ - - /* Put NULL verifier */ - svc_putu32(resv, RPC_AUTH_NULL); - svc_putu32(resv, 0); - dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK); - return SVC_OK; -} - -static int nfs_callback_null_release(struct svc_rqst *rqstp) -{ - if (rqstp->rq_cred.cr_group_info) - put_group_info(rqstp->rq_cred.cr_group_info); - rqstp->rq_cred.cr_group_info = NULL; - return 0; /* don't drop */ -} - -static struct auth_ops nfs_callback_auth_null = { - .name = "null", - .flavour = RPC_AUTH_NULL, - .accept = nfs_callback_null_accept, - .release = nfs_callback_null_release, -}; - -/* - * AUTH_SYS authentication - */ -static int nfs_callback_unix_accept(struct svc_rqst *rqstp, u32 *authp) -{ - struct kvec *argv = &rqstp->rq_arg.head[0]; - struct kvec *resv = &rqstp->rq_res.head[0]; - struct svc_cred *cred = &rqstp->rq_cred; - u32 slen, i; - int len = argv->iov_len; - - dprintk("%s: start\n", __FUNCTION__); - cred->cr_group_info = NULL; - rqstp->rq_client = NULL; - if ((len -= 3*4) < 0) - return SVC_GARBAGE; - - /* Get length, time stamp and machine name */ - svc_getu32(argv); - svc_getu32(argv); - slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); - if (slen > 64 || (len -= (slen + 3)*4) < 0) - goto badcred; - argv->iov_base = (void*)((u32*)argv->iov_base + slen); - argv->iov_len -= slen*4; - - cred->cr_uid = ntohl(svc_getu32(argv)); - cred->cr_gid = ntohl(svc_getu32(argv)); - slen = ntohl(svc_getu32(argv)); - if (slen > 16 || (len -= (slen + 2)*4) < 0) - goto badcred; - cred->cr_group_info = groups_alloc(slen); - if (cred->cr_group_info == NULL) - return SVC_DROP; - for (i = 0; i < slen; i++) - GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv)); - - if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { - *authp = rpc_autherr_badverf; - return SVC_DENIED; - } - /* Put NULL verifier */ - svc_putu32(resv, RPC_AUTH_NULL); - svc_putu32(resv, 0); - dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK); - return SVC_OK; -badcred: - *authp = rpc_autherr_badcred; - return SVC_DENIED; -} - -static int nfs_callback_unix_release(struct svc_rqst *rqstp) -{ - if (rqstp->rq_cred.cr_group_info) - put_group_info(rqstp->rq_cred.cr_group_info); - rqstp->rq_cred.cr_group_info = NULL; - return 0; -} - -static struct auth_ops nfs_callback_auth_unix = { - .name = "unix", - .flavour = RPC_AUTH_UNIX, - .accept = nfs_callback_unix_accept, - .release = nfs_callback_unix_release, -}; - -/* - * Hook the authentication protocol - */ -static int nfs_callback_auth(struct svc_rqst *rqstp, u32 *authp) +static int nfs_callback_authenticate(struct svc_rqst *rqstp) { struct in_addr *addr = &rqstp->rq_addr.sin_addr; struct nfs4_client *clp; - struct kvec *argv = &rqstp->rq_arg.head[0]; - int flavour; - int retval; /* Don't talk to strangers */ clp = nfs4_find_client(addr); @@ -273,34 +150,19 @@ return SVC_DROP; dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); nfs4_put_client(clp); - flavour = ntohl(svc_getu32(argv)); - switch(flavour) { + switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: - if (rqstp->rq_proc != CB_NULL) { - *authp = rpc_autherr_tooweak; - retval = SVC_DENIED; - break; - } - rqstp->rq_authop = &nfs_callback_auth_null; - retval = nfs_callback_null_accept(rqstp, authp); + if (rqstp->rq_proc != CB_NULL) + return SVC_DENIED; break; case RPC_AUTH_UNIX: - /* Eat the authentication flavour */ - rqstp->rq_authop = &nfs_callback_auth_unix; - retval = nfs_callback_unix_accept(rqstp, authp); break; + case RPC_AUTH_GSS: + /* FIXME: RPCSEC_GSS handling? */ default: - /* FIXME: need to add RPCSEC_GSS upcalls */ -#if 0 - svc_ungetu32(argv); - retval = svc_authenticate(rqstp, authp); -#else - *authp = rpc_autherr_rejectedcred; - retval = SVC_DENIED; -#endif + return SVC_DENIED; } - dprintk("%s: flavour %d returning error %d\n", __FUNCTION__, flavour, retval); - return retval; + return SVC_OK; } /* @@ -321,5 +183,5 @@ .pg_name = "NFSv4 callback", /* service name */ .pg_class = "nfs", /* authentication class */ .pg_stats = &nfs4_callback_stats, - .pg_authenticate = nfs_callback_auth, + .pg_authenticate = nfs_callback_authenticate, }; Index: linux-2.6.10/fs/nfs/file.c =================================================================== --- linux-2.6.10.orig/fs/nfs/file.c 2004-12-25 05:35:01.000000000 +0800 +++ linux-2.6.10/fs/nfs/file.c 2005-04-05 14:49:13.453684048 +0800 @@ -67,6 +67,19 @@ .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + +#endif /* CONFIG_NFS_V4 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -295,10 +308,19 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; - int status; + int status = 0; lock_kernel(); - status = NFS_PROTO(inode)->lock(filp, cmd, fl); + /* Use local locking if mounted with "-onolock" */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + else { + struct file_lock *cfl = posix_test_lock(filp, fl); + if (cfl != NULL) { + memcpy(fl, cfl, sizeof(*fl)); + fl->fl_type = F_UNLCK; + } + } unlock_kernel(); return status; } @@ -325,7 +347,11 @@ * still need to complete the unlock. */ lock_kernel(); - status = NFS_PROTO(inode)->lock(filp, cmd, fl); + /* Use local locking if mounted with "-onolock" */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + else + status = posix_lock_file_wait(filp, fl); rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); return status; } @@ -351,15 +377,19 @@ return status; lock_kernel(); - status = NFS_PROTO(inode)->lock(filp, cmd, fl); - /* If we were signalled we still need to ensure that - * we clean up any state on the server. We therefore - * record the lock call as having succeeded in order to - * ensure that locks_remove_posix() cleans it out when - * the process exits. - */ - if (status == -EINTR || status == -ERESTARTSYS) - posix_lock_file(filp, fl); + /* Use local locking if mounted with "-onolock" */ + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) { + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + /* If we were signalled we still need to ensure that + * we clean up any state on the server. We therefore + * record the lock call as having succeeded in order to + * ensure that locks_remove_posix() cleans it out when + * the process exits. + */ + if (status == -EINTR || status == -ERESTARTSYS) + posix_lock_file(filp, fl); + } else + status = posix_lock_file_wait(filp, fl); unlock_kernel(); if (status < 0) return status; @@ -396,15 +426,6 @@ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; - if (NFS_PROTO(inode)->version != 4) { - /* Fake OK code if mounted without NLM support */ - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { - if (IS_GETLK(cmd)) - return LOCK_USE_CLNT; - return 0; - } - } - /* * No BSD flocks over NFS allowed. * Note: we could try to fake a POSIX lock request here by Index: linux-2.6.10/fs/nfs/nfs3proc.c =================================================================== --- linux-2.6.10.orig/fs/nfs/nfs3proc.c 2004-12-25 05:34:45.000000000 +0800 +++ linux-2.6.10/fs/nfs/nfs3proc.c 2005-04-05 14:49:13.441685872 +0800 @@ -80,10 +80,10 @@ dprintk("%s: call fsinfo\n", __FUNCTION__); info->fattr->valid = 0; status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); - dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status); + dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); if (!(info->fattr->valid & NFS_ATTR_FATTR)) { status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0); - dprintk("%s: reply getattr %d\n", __FUNCTION__, status); + dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); } return status; } @@ -101,7 +101,7 @@ fattr->valid = 0; status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0); - dprintk("NFS reply getattr\n"); + dprintk("NFS reply getattr: %d\n", status); return status; } @@ -119,7 +119,7 @@ dprintk("NFS call setattr\n"); fattr->valid = 0; status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); - dprintk("NFS reply setattr\n"); + dprintk("NFS reply setattr: %d\n", status); return status; } @@ -198,7 +198,7 @@ if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) entry->mask |= MAY_EXEC; } - dprintk("NFS reply access, status = %d\n", status); + dprintk("NFS reply access: %d\n", status); return status; } @@ -296,7 +296,7 @@ * For now, we don't implement O_EXCL. */ static struct inode * -nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, +nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { struct nfs_fh fhandle; @@ -304,8 +304,8 @@ struct nfs_fattr dir_attr; struct nfs3_createargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len, + .name = dentry->d_name.name, + .len = dentry->d_name.len, .sattr = sattr, }; struct nfs3_diropres res = { @@ -315,7 +315,7 @@ }; int status; - dprintk("NFS call create %s\n", name->name); + dprintk("NFS call create %s\n", dentry->d_name.name); arg.createmode = NFS3_CREATE_UNCHECKED; if (flags & O_EXCL) { arg.createmode = NFS3_CREATE_EXCLUSIVE; @@ -353,7 +353,7 @@ if (status != 0) goto out; if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) { - status = nfs3_proc_lookup(dir, name, &fhandle, &fattr); + status = nfs3_proc_lookup(dir, &dentry->d_name, &fhandle, &fattr); if (status != 0) goto out; } @@ -838,6 +838,7 @@ .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, Index: linux-2.6.10/fs/nfs/nfs4proc.c =================================================================== --- linux-2.6.10.orig/fs/nfs/nfs4proc.c 2004-12-25 05:35:23.000000000 +0800 +++ linux-2.6.10/fs/nfs/nfs4proc.c 2005-04-05 14:49:13.456683592 +0800 @@ -477,7 +477,7 @@ /* * Returns an nfs4_state + an referenced inode */ -static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -491,7 +491,7 @@ struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .open_flags = flags, - .name = name, + .name = &dentry->d_name, .server = server, .bitmask = server->attr_bitmask, .claim = NFS4_OPEN_CLAIM_NULL, @@ -581,14 +581,14 @@ } -struct nfs4_state *nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) +struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, name, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -635,6 +635,8 @@ fattr->valid = 0; + if (state != NULL) + msg.rpc_cred = state->owner->so_cred; if (sattr->ia_valid & ATTR_SIZE) nfs4_copy_stateid(&arg.stateid, state, NULL); else @@ -658,6 +660,61 @@ return err; } +struct nfs4_closedata { + struct inode *inode; + struct nfs4_state *state; + struct nfs_closeargs arg; + struct nfs_closeres res; +}; + +static void nfs4_close_done(struct rpc_task *task) +{ + struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_state *state = calldata->state; + struct nfs4_state_owner *sp = state->owner; + struct nfs_server *server = NFS_SERVER(calldata->inode); + + /* hmm. we are done with the inode, and in the process of freeing + * the state_owner. we keep this around to process errors + */ + nfs4_increment_seqid(task->tk_status, sp); + switch (task->tk_status) { + case 0: + state->state = calldata->arg.open_flags; + memcpy(&state->stateid, &calldata->res.stateid, + sizeof(state->stateid)); + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + state->state = calldata->arg.open_flags; + nfs4_schedule_state_recovery(server->nfs4_state); + break; + default: + if (nfs4_async_handle_error(task, server) == -EAGAIN) { + rpc_restart_call(task); + return; + } + } + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&server->nfs4_state->cl_sem); + kfree(calldata); +} + +static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], + .rpc_argp = &calldata->arg, + .rpc_resp = &calldata->res, + .rpc_cred = calldata->state->owner->so_cred, + }; + if (calldata->arg.open_flags != 0) + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; + return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); +} + /* * It is possible for data to be read/written from a mem-mapped file * after the sys_close call (which hits the vfs layer as a flush). @@ -669,102 +726,34 @@ * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state) +int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) { - struct nfs4_state_owner *sp = state->owner; - int status = 0; - struct nfs_closeargs arg = { - .fh = NFS_FH(inode), - }; - struct nfs_closeres res; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], - .rpc_argp = &arg, - .rpc_resp = &res, - }; + struct nfs4_closedata *calldata; + int status; - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + /* Tell caller we're done */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + state->state = mode; return 0; - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + } + calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + return -ENOMEM; + calldata->inode = inode; + calldata->state = state; + calldata->arg.fh = NFS_FH(inode); /* Serialization for the sequence id */ - arg.seqid = sp->so_seqid, - status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR); - - /* hmm. we are done with the inode, and in the process of freeing - * the state_owner. we keep this around to process errors + calldata->arg.seqid = state->owner->so_seqid; + calldata->arg.open_flags = mode; + memcpy(&calldata->arg.stateid, &state->stateid, + sizeof(calldata->arg.stateid)); + status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); + /* + * Return -EINPROGRESS on success in order to indicate to the + * caller that an asynchronous RPC call has been launched, and + * that it will release the semaphores on completion. */ - nfs4_increment_seqid(status, sp); - if (!status) - memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); - - return status; -} - -int nfs4_do_close(struct inode *inode, struct nfs4_state *state) -{ - struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; - int err; - do { - err = _nfs4_do_close(inode, state); - switch (err) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); - err = 0; - default: - state->state = 0; - } - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); - return err; -} - -static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) -{ - struct nfs4_state_owner *sp = state->owner; - int status = 0; - struct nfs_closeargs arg = { - .fh = NFS_FH(inode), - .seqid = sp->so_seqid, - .open_flags = mode, - }; - struct nfs_closeres res; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) - return 0; - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); - status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); - if (!status) - memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); - - return status; -} - -int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) -{ - struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; - int err; - do { - err = _nfs4_do_downgrade(inode, state, mode); - switch (err) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(server->nfs4_state); - err = 0; - default: - state->state = mode; - } - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); - return err; + return (status == 0) ? -EINPROGRESS : status; } struct inode * @@ -785,7 +774,7 @@ } cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); - state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) return (struct inode *)state; @@ -802,7 +791,7 @@ cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); state = nfs4_open_delegated(dentry->d_inode, openflags, cred); if (IS_ERR(state)) - state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); + state = nfs4_do_open(dir, dentry, openflags, NULL, cred); put_rpccred(cred); if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) return 1; @@ -1026,7 +1015,7 @@ FMODE_WRITE, cred); if (IS_ERR(state)) state = nfs4_do_open(dentry->d_parent->d_inode, - &dentry->d_name, FMODE_WRITE, + dentry, FMODE_WRITE, NULL, cred); need_iput = 1; } @@ -1327,7 +1316,7 @@ */ static struct inode * -nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, +nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { struct inode *inode; @@ -1335,7 +1324,7 @@ struct rpc_cred *cred; cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); - state = nfs4_do_open(dir, name, flags, sattr, cred); + state = nfs4_do_open(dir, dentry, flags, sattr, cred); put_rpccred(cred); if (!IS_ERR(state)) { inode = state->inode; @@ -2049,6 +2038,86 @@ } static int +nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +/* XXX: assuming XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, + * and that it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) + * bytes on the stack. (Currently probably both true.) + */ +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) + +static void buf_to_pages(const void *buf, ssize_t buflen, + struct page **pages, unsigned int *pgbase) +{ + const void *p = buf; + + *pgbase = offset_in_page(buf); + p -= *pgbase; + while (p < buf + buflen) { + *(pages++) = virt_to_page(p); + p += PAGE_CACHE_SIZE; + } +} + +ssize_t +nfs4_proc_get_acl(struct inode *inode, void *buf, ssize_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_getaclargs args = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + ssize_t acl_len = buflen; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = &args, + .rpc_resp = &acl_len, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (buflen && acl_len > buflen) + return -ERANGE; + if (ret == 0) + ret = acl_len; + return ret; +} + +int +nfs4_proc_set_acl(struct inode *inode, const void *buf, ssize_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + return ret; +} + +static int nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) { struct nfs4_client *clp = server->nfs4_state; @@ -2589,6 +2658,7 @@ .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, Index: linux-2.6.10/fs/nfs/direct.c =================================================================== --- linux-2.6.10.orig/fs/nfs/direct.c 2005-03-31 15:35:23.000000000 +0800 +++ linux-2.6.10/fs/nfs/direct.c 2005-04-05 14:49:13.448684808 +0800 @@ -33,6 +33,7 @@ * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy * 08 Jun 2003 Port to 2.5 APIs --cel * 31 Mar 2004 Handle direct I/O without VFS support --cel + * 15 Sep 2004 Parallel async reads --cel * */ @@ -43,6 +44,7 @@ #include #include #include +#include #include #include @@ -50,11 +52,27 @@ #include #include +#include #define NFSDBG_FACILITY NFSDBG_VFS -#define VERF_SIZE (2 * sizeof(__u32)) #define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) +static kmem_cache_t *nfs_direct_cachep; + +/* + * This represents a set of asynchronous requests that we're waiting on + */ +struct nfs_direct_req { + struct kref kref; /* release manager */ + struct list_head list; /* nfs_read_data structs */ + wait_queue_head_t wait; /* wait for i/o completion */ + struct page ** pages; /* pages in our buffer */ + unsigned int npages; /* count of pages */ + atomic_t complete, /* i/os we're waiting for */ + count, /* bytes actually processed */ + error; /* any reported error */ +}; + /** * nfs_get_user_pages - find and set up pages underlying user's buffer @@ -71,7 +89,8 @@ unsigned long page_count; size_t array_size; - /* set an arbitrary limit to prevent arithmetic overflow */ + /* set an arbitrary limit to prevent type overflow */ + /* XXX: this can probably be as large as INT_MAX */ if (size > MAX_DIRECTIO_SIZE) { *pages = NULL; return -EFBIG; @@ -95,6 +114,8 @@ /** * nfs_free_user_pages - tear down page struct array * @pages: array of page struct pointers underlying target buffer + * @npages: number of pages in the array + * @do_dirty: dirty the pages as we release them */ static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) @@ -109,77 +130,231 @@ } /** - * nfs_direct_read_seg - Read in one iov segment. Generate separate - * read RPCs for each "rsize" bytes. + * nfs_direct_req_release - release nfs_direct_req structure for direct read + * @kref: kref object embedded in an nfs_direct_req structure + * + */ +static void nfs_direct_req_release(struct kref *kref) +{ + struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + kmem_cache_free(nfs_direct_cachep, dreq); +} + +/** + * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read + * @count: count of bytes for the read request + * @rsize: local rsize setting + * + * Note we also set the number of requests we have in the dreq when we are + * done. This prevents races with I/O completion so we will always wait + * until all requests have been dispatched and completed. + */ +static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize) +{ + struct list_head *list; + struct nfs_direct_req *dreq; + unsigned int reads = 0; + + dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + if (!dreq) + return NULL; + + kref_init(&dreq->kref); + init_waitqueue_head(&dreq->wait); + INIT_LIST_HEAD(&dreq->list); + atomic_set(&dreq->count, 0); + atomic_set(&dreq->error, 0); + + list = &dreq->list; + for(;;) { + struct nfs_read_data *data = nfs_readdata_alloc(); + + if (unlikely(!data)) { + while (!list_empty(list)) { + data = list_entry(list->next, + struct nfs_read_data, pages); + list_del(&data->pages); + nfs_readdata_free(data); + } + kref_put(&dreq->kref, nfs_direct_req_release); + return NULL; + } + + INIT_LIST_HEAD(&data->pages); + list_add(&data->pages, list); + + data->req = (struct nfs_page *) dreq; + reads++; + if (nbytes <= rsize) + break; + nbytes -= rsize; + } + kref_get(&dreq->kref); + atomic_set(&dreq->complete, reads); + return dreq; +} + +/** + * nfs_direct_read_result - handle a read reply for a direct read request + * @data: address of NFS READ operation control block + * @status: status of this NFS READ operation + * + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server). + */ +static void nfs_direct_read_result(struct nfs_read_data *data, int status) +{ + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + + if (likely(status >= 0)) + atomic_add(data->res.count, &dreq->count); + else + atomic_set(&dreq->error, status); + + if (unlikely(atomic_dec_and_test(&dreq->complete))) { + nfs_free_user_pages(dreq->pages, dreq->npages, 1); + wake_up(&dreq->wait); + kref_put(&dreq->kref, nfs_direct_req_release); + } +} + +/** + * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read + * @dreq: address of nfs_direct_req struct for this request * @inode: target inode * @ctx: target file open context - * user_addr: starting address of this segment of user's buffer - * count: size of this segment - * file_offset: offset in file to begin the operation - * @pages: array of addresses of page structs defining user's buffer - * nr_pages: size of pages array + * @user_addr: starting address of this segment of user's buffer + * @count: size of this segment + * @file_offset: offset in file to begin the operation + * + * For each nfs_read_data struct that was allocated on the list, dispatch + * an NFS READ operation */ -static int -nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, - unsigned long user_addr, size_t count, loff_t file_offset, - struct page **pages, int nr_pages) -{ - const unsigned int rsize = NFS_SERVER(inode)->rsize; - int tot_bytes = 0; - int curpage = 0; - struct nfs_read_data rdata = { - .inode = inode, - .cred = ctx->cred, - .args = { - .fh = NFS_FH(inode), - .context = ctx, - }, - .res = { - .fattr = &rdata.fattr, - }, - }; +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, + struct inode *inode, struct nfs_open_context *ctx, + unsigned long user_addr, size_t count, loff_t file_offset) +{ + struct list_head *list = &dreq->list; + struct page **pages = dreq->pages; + unsigned int curpage, pgbase; + unsigned int rsize = NFS_SERVER(inode)->rsize; - rdata.args.pgbase = user_addr & ~PAGE_MASK; - rdata.args.offset = file_offset; - do { - int result; - - rdata.args.count = count; - if (rdata.args.count > rsize) - rdata.args.count = rsize; - rdata.args.pages = &pages[curpage]; - - dprintk("NFS: direct read: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", - rdata.args.count, (long long) rdata.args.offset, - user_addr + tot_bytes, rdata.args.pgbase, curpage); + curpage = 0; + pgbase = user_addr & ~PAGE_MASK; + do { + struct nfs_read_data *data; + unsigned int bytes; + + bytes = rsize; + if (count < rsize) + bytes = count; + + data = list_entry(list->next, struct nfs_read_data, pages); + list_del_init(&data->pages); + + data->inode = inode; + data->cred = ctx->cred; + data->args.fh = NFS_FH(inode); + data->args.context = ctx; + data->args.offset = file_offset; + data->args.pgbase = pgbase; + data->args.pages = &pages[curpage]; + data->args.count = bytes; + data->res.fattr = &data->fattr; + data->res.eof = 0; + data->res.count = bytes; + + NFS_PROTO(inode)->read_setup(data); + + data->task.tk_cookie = (unsigned long) inode; + data->task.tk_calldata = data; + data->task.tk_release = nfs_readdata_release; + data->complete = nfs_direct_read_result; lock_kernel(); - result = NFS_PROTO(inode)->read(&rdata); + rpc_execute(&data->task); unlock_kernel(); - if (result <= 0) { - if (tot_bytes > 0) - break; - if (result == -EISDIR) - result = -EINVAL; - return result; - } + dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + bytes, + (unsigned long long)data->args.offset); + + file_offset += bytes; + pgbase += bytes; + curpage += pgbase >> PAGE_SHIFT; + pgbase &= ~PAGE_MASK; - tot_bytes += result; - if (rdata.res.eof) - break; - - rdata.args.offset += result; - rdata.args.pgbase += result; - curpage += rdata.args.pgbase >> PAGE_SHIFT; - rdata.args.pgbase &= ~PAGE_MASK; - count -= result; + count -= bytes; } while (count != 0); +} - /* XXX: should we zero the rest of the user's buffer if we - * hit eof? */ +/** + * nfs_direct_read_wait - wait for I/O completion for direct reads + * @dreq: request on which we are to wait + * @intr: whether or not this wait can be interrupted + * + * Collects and returns the final error value/byte-count. + */ +static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr) +{ + int result = 0; - return tot_bytes; + if (intr) { + result = wait_event_interruptible(dreq->wait, + (atomic_read(&dreq->complete) == 0)); + } else { + wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0)); + } + + if (!result) + result = atomic_read(&dreq->error); + if (!result) + result = atomic_read(&dreq->count); + + kref_put(&dreq->kref, nfs_direct_req_release); + return (ssize_t) result; +} + +/** + * nfs_direct_read_seg - Read in one iov segment. Generate separate + * read RPCs for each "rsize" bytes. + * @inode: target inode + * @ctx: target file open context + * @user_addr: starting address of this segment of user's buffer + * @count: size of this segment + * @file_offset: offset in file to begin the operation + * @pages: array of addresses of page structs defining user's buffer + * @nr_pages: number of pages in the array + * + */ +static ssize_t nfs_direct_read_seg(struct inode *inode, + struct nfs_open_context *ctx, unsigned long user_addr, + size_t count, loff_t file_offset, struct page **pages, + unsigned int nr_pages) +{ + ssize_t result; + sigset_t oldset; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_direct_req *dreq; + + dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); + if (!dreq) + return -ENOMEM; + + dreq->pages = pages; + dreq->npages = nr_pages; + + rpc_clnt_sigmask(clnt, &oldset); + nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, + file_offset); + result = nfs_direct_read_wait(dreq, clnt->cl_intr); + rpc_clnt_sigunmask(clnt, &oldset); + + return result; } /** @@ -191,9 +366,8 @@ * file_offset: offset in file to begin the operation * nr_segs: size of iovec array * - * generic_file_direct_IO has already pushed out any non-direct - * writes so that this read will see them when we read from the - * server. + * We've already pushed out any non-direct writes so that this read + * will see them when we read from the server. */ static ssize_t nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, @@ -222,8 +396,6 @@ result = nfs_direct_read_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); - nfs_free_user_pages(pages, page_count, 1); - if (result <= 0) { if (tot_bytes > 0) break; @@ -249,31 +421,31 @@ * @pages: array of addresses of page structs defining user's buffer * nr_pages: size of pages array */ -static int -nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, - unsigned long user_addr, size_t count, loff_t file_offset, - struct page **pages, int nr_pages) +static ssize_t nfs_direct_write_seg(struct inode *inode, + struct nfs_open_context *ctx, unsigned long user_addr, + size_t count, loff_t file_offset, struct page **pages, + int nr_pages) { const unsigned int wsize = NFS_SERVER(inode)->wsize; size_t request; - int curpage, need_commit, result, tot_bytes; + int curpage, need_commit; + ssize_t result, tot_bytes; struct nfs_writeverf first_verf; - struct nfs_write_data wdata = { - .inode = inode, - .cred = ctx->cred, - .args = { - .fh = NFS_FH(inode), - .context = ctx, - }, - .res = { - .fattr = &wdata.fattr, - .verf = &wdata.verf, - }, - }; + struct nfs_write_data *wdata; - wdata.args.stable = NFS_UNSTABLE; + wdata = nfs_writedata_alloc(); + if (!wdata) + return -ENOMEM; + + wdata->inode = inode; + wdata->cred = ctx->cred; + wdata->args.fh = NFS_FH(inode); + wdata->args.context = ctx; + wdata->args.stable = NFS_UNSTABLE; if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) - wdata.args.stable = NFS_FILE_SYNC; + wdata->args.stable = NFS_FILE_SYNC; + wdata->res.fattr = &wdata->fattr; + wdata->res.verf = &wdata->verf; nfs_begin_data_update(inode); retry: @@ -281,20 +453,20 @@ tot_bytes = 0; curpage = 0; request = count; - wdata.args.pgbase = user_addr & ~PAGE_MASK; - wdata.args.offset = file_offset; - do { - wdata.args.count = request; - if (wdata.args.count > wsize) - wdata.args.count = wsize; - wdata.args.pages = &pages[curpage]; + wdata->args.pgbase = user_addr & ~PAGE_MASK; + wdata->args.offset = file_offset; + do { + wdata->args.count = request; + if (wdata->args.count > wsize) + wdata->args.count = wsize; + wdata->args.pages = &pages[curpage]; dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", - wdata.args.count, (long long) wdata.args.offset, - user_addr + tot_bytes, wdata.args.pgbase, curpage); + wdata->args.count, (long long) wdata->args.offset, + user_addr + tot_bytes, wdata->args.pgbase, curpage); lock_kernel(); - result = NFS_PROTO(inode)->write(&wdata); + result = NFS_PROTO(inode)->write(wdata); unlock_kernel(); if (result <= 0) { @@ -304,20 +476,25 @@ } if (tot_bytes == 0) - memcpy(&first_verf.verifier, &wdata.verf.verifier, - VERF_SIZE); - if (wdata.verf.committed != NFS_FILE_SYNC) { + memcpy(&first_verf.verifier, &wdata->verf.verifier, + sizeof(first_verf.verifier)); + if (wdata->verf.committed != NFS_FILE_SYNC) { need_commit = 1; - if (memcmp(&first_verf.verifier, - &wdata.verf.verifier, VERF_SIZE)) + if (memcmp(&first_verf.verifier, &wdata->verf.verifier, + sizeof(first_verf.verifier))); goto sync_retry; } - tot_bytes += result; - wdata.args.offset += result; - wdata.args.pgbase += result; - curpage += wdata.args.pgbase >> PAGE_SHIFT; - wdata.args.pgbase &= ~PAGE_MASK; + tot_bytes += result; + + /* in case of a short write: stop now, let the app recover */ + if (result < wdata->args.count) + break; + + wdata->args.offset += result; + wdata->args.pgbase += result; + curpage += wdata->args.pgbase >> PAGE_SHIFT; + wdata->args.pgbase &= ~PAGE_MASK; request -= result; } while (request != 0); @@ -325,27 +502,27 @@ * Commit data written so far, even in the event of an error */ if (need_commit) { - wdata.args.count = tot_bytes; - wdata.args.offset = file_offset; + wdata->args.count = tot_bytes; + wdata->args.offset = file_offset; lock_kernel(); - result = NFS_PROTO(inode)->commit(&wdata); + result = NFS_PROTO(inode)->commit(wdata); unlock_kernel(); if (result < 0 || memcmp(&first_verf.verifier, - &wdata.verf.verifier, - VERF_SIZE) != 0) + &wdata->verf.verifier, + sizeof(first_verf.verifier)) != 0) goto sync_retry; } result = tot_bytes; out: nfs_end_data_update_defer(inode); - + nfs_writedata_free(wdata); return result; sync_retry: - wdata.args.stable = NFS_FILE_SYNC; + wdata->args.stable = NFS_FILE_SYNC; goto retry; } @@ -362,9 +539,9 @@ * that non-direct readers might access, so they will pick up these * writes immediately. */ -static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, - const struct iovec *iov, loff_t file_offset, - unsigned long nr_segs) +static ssize_t nfs_direct_write(struct inode *inode, + struct nfs_open_context *ctx, const struct iovec *iov, + loff_t file_offset, unsigned long nr_segs) { ssize_t tot_bytes = 0; unsigned long seg = 0; @@ -504,6 +681,8 @@ if (mapping->nrpages) { retval = filemap_fdatawrite(mapping); if (retval == 0) + retval = nfs_wb_all(inode); + if (retval == 0) retval = filemap_fdatawait(mapping); if (retval) goto out; @@ -593,6 +772,8 @@ if (mapping->nrpages) { retval = filemap_fdatawrite(mapping); if (retval == 0) + retval = nfs_wb_all(inode); + if (retval == 0) retval = filemap_fdatawait(mapping); if (retval) goto out; @@ -607,3 +788,21 @@ out: return retval; } + +int nfs_init_directcache(void) +{ + nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", + sizeof(struct nfs_direct_req), + 0, SLAB_RECLAIM_ACCOUNT, + NULL, NULL); + if (nfs_direct_cachep == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_directcache(void) +{ + if (kmem_cache_destroy(nfs_direct_cachep)) + printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n"); +} Index: linux-2.6.10/fs/nfs/read.c =================================================================== --- linux-2.6.10.orig/fs/nfs/read.c 2004-12-25 05:33:47.000000000 +0800 +++ linux-2.6.10/fs/nfs/read.c 2005-04-05 14:49:13.437686480 +0800 @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -39,25 +38,11 @@ static void nfs_readpage_result_full(struct nfs_read_data *, int); static kmem_cache_t *nfs_rdata_cachep; -static mempool_t *nfs_rdata_mempool; +mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) -static struct nfs_read_data *nfs_readdata_alloc(void) -{ - struct nfs_read_data *p; - p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); - if (p) - memset(p, 0, sizeof(*p)); - return p; -} - -static __inline__ void nfs_readdata_free(struct nfs_read_data *p) -{ - mempool_free(p, nfs_rdata_mempool); -} - -static void nfs_readdata_release(struct rpc_task *task) +void nfs_readdata_release(struct rpc_task *task) { struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; nfs_readdata_free(data);