--- linux-2.6.7/Documentation/filesystems/00-INDEX.lsec 2004-06-15 23:20:26.000000000 -0600 +++ linux-2.6.7/Documentation/filesystems/00-INDEX 2005-03-23 14:28:24.576313528 -0700 @@ -28,6 +28,8 @@ jfs.txt - info and mount options for the JFS filesystem. ncpfs.txt - info on Novell Netware(tm) filesystem using NCP protocol. +nfs4.txt + - info and mount options for the nfs4 filesystem. ntfs.txt - info and mount options for the NTFS filesystem (Windows NT). proc.txt --- linux-2.6.7/Documentation/filesystems/nfs4.txt.lsec 2005-03-23 14:28:24.576313528 -0700 +++ linux-2.6.7/Documentation/filesystems/nfs4.txt 2005-03-23 14:28:24.576313528 -0700 @@ -0,0 +1,20 @@ +NFS version 4 +============= + +NFS version 4 is specified by RFC3530. Compared to earlier NFS versions, +it provides enhanced security and better client caching, among other features. + +In addition to basic file operations, the NFS client supports locking, kerberos +(basic authentication and integrity), and reboot recovery. + +As this writing (July 2004), patches to nfs-utils and util-linux are required +for NFSv4 support; see http://www.citi.umich.edu/projects/nfsv4/linux/ for +patches and instructions. + +The kernel treats NFS version 4 as a separate filesystem type, nfs4, so it is +mounted using "mount -tnfs4 server:/path /mntpoint", not by mounting the nfs +filesystem with -onfsver=4. + +Mount options: + +XXX? --- linux-2.6.7/fs/locks.c.lsec 2004-06-15 23:20:03.000000000 -0600 +++ linux-2.6.7/fs/locks.c 2005-03-23 14:28:22.425640480 -0700 @@ -317,7 +317,7 @@ static int flock_to_posix_lock(struct fi if (l->l_len == 0) fl->fl_end = OFFSET_MAX; - fl->fl_owner = current->files; + fl->fl_owner = 0; fl->fl_pid = current->tgid; fl->fl_file = filp; fl->fl_flags = FL_POSIX; @@ -357,7 +357,7 @@ static int flock64_to_posix_lock(struct if (l->l_len == 0) fl->fl_end = OFFSET_MAX; - fl->fl_owner = current->files; + fl->fl_owner = 0; fl->fl_pid = current->tgid; fl->fl_file = filp; fl->fl_flags = FL_POSIX; @@ -920,7 +920,7 @@ int posix_lock_file(struct file *filp, s */ int locks_mandatory_locked(struct inode *inode) { - fl_owner_t owner = current->files; + unsigned int pid = current->tgid; struct file_lock *fl; /* @@ -930,7 +930,9 @@ int locks_mandatory_locked(struct inode for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!IS_POSIX(fl)) continue; - if (fl->fl_owner != owner) + if (fl->fl_owner != 0) + break; + if (fl->fl_pid != pid) break; } unlock_kernel(); @@ -958,7 +960,7 @@ int locks_mandatory_area(int read_write, int error; locks_init_lock(&fl); - fl.fl_owner = current->files; + fl.fl_owner = 0; fl.fl_pid = current->tgid; fl.fl_file = filp; fl.fl_flags = FL_POSIX | FL_ACCESS; @@ -1684,7 +1686,7 @@ void locks_remove_posix(struct file *fil lock_kernel(); while (*before != NULL) { struct file_lock *fl = *before; - if (IS_POSIX(fl) && (fl->fl_owner == owner)) { + if (IS_POSIX(fl) && posix_same_owner(fl, &lock)) { locks_delete_lock(before); continue; } @@ -1982,18 +1984,6 @@ int lock_may_write(struct inode *inode, EXPORT_SYMBOL(lock_may_write); -static inline void __steal_locks(struct file *file, fl_owner_t from) -{ - struct inode *inode = file->f_dentry->d_inode; - struct file_lock *fl = inode->i_flock; - - while (fl) { - if (fl->fl_file == file && fl->fl_owner == from) - fl->fl_owner = current->files; - fl = fl->fl_next; - } -} - /* When getting ready for executing a binary, we make sure that current * has a files_struct on its own. Before dropping the old files_struct, * we take over ownership of all locks for all file descriptors we own. @@ -2002,31 +1992,6 @@ static inline void __steal_locks(struct */ void steal_locks(fl_owner_t from) { - struct files_struct *files = current->files; - int i, j; - - if (from == files) - return; - - lock_kernel(); - j = 0; - for (;;) { - unsigned long set; - i = j * __NFDBITS; - if (i >= files->max_fdset || i >= files->max_fds) - break; - set = files->open_fds->fds_bits[j++]; - while (set) { - if (set & 1) { - struct file *file = files->fd[i]; - if (file) - __steal_locks(file, from); - } - i++; - set >>= 1; - } - } - unlock_kernel(); } EXPORT_SYMBOL(steal_locks); --- linux-2.6.7/fs/hostfs/hostfs_kern.c.lsec 2005-03-23 14:25:58.982447160 -0700 +++ linux-2.6.7/fs/hostfs/hostfs_kern.c 2005-03-23 14:33:11.946626600 -0700 @@ -290,7 +290,6 @@ static void hostfs_delete_inode(struct i { if(HOSTFS_I(inode)->fd != -1) { close_file(&HOSTFS_I(inode)->fd); - printk("Closing host fd in .delete_inode\n"); HOSTFS_I(inode)->fd = -1; } clear_inode(inode); @@ -303,7 +302,6 @@ static void hostfs_destroy_inode(struct if(HOSTFS_I(inode)->fd != -1) { close_file(&HOSTFS_I(inode)->fd); - printk("Closing host fd in .destroy_inode\n"); } kfree(HOSTFS_I(inode)); --- linux-2.6.7/fs/open.c.lsec 2005-03-23 14:26:01.774022776 -0700 +++ linux-2.6.7/fs/open.c 2005-03-23 14:28:23.226518728 -0700 @@ -1025,7 +1025,7 @@ int filp_close(struct file *filp, fl_own } dnotify_flush(filp, id); - locks_remove_posix(filp, id); + locks_remove_posix(filp, 0); fput(filp); return retval; } --- linux-2.6.7/fs/nfsd/export.c.lsec 2004-06-15 23:19:36.000000000 -0600 +++ linux-2.6.7/fs/nfsd/export.c 2005-03-23 14:28:24.686296808 -0700 @@ -255,7 +255,7 @@ static inline void svc_expkey_update(str new->ek_export = item->ek_export; } -static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */ +static DefineSimpleCacheLookup(svc_expkey) #define EXPORT_HASHBITS 8 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) @@ -487,8 +487,72 @@ static inline void svc_export_update(str new->ex_fsid = item->ex_fsid; } -static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */ +struct svc_export * +svc_export_lookup(struct svc_export *item, int set) +{ + struct svc_export *tmp, *new = NULL; + struct cache_head **hp, **head; + head = &svc_export_cache.hash_table[svc_export_hash(item)]; +retry: + if (set||new) + write_lock(&svc_export_cache.hash_lock); + else + read_lock(&svc_export_cache.hash_lock); + for(hp=head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct svc_export, h); + if (svc_export_match(item, tmp)) { /* found a match */ + cache_get(&tmp->h); + if (set) { + if (test_bit(CACHE_NEGATIVE, &item->h.flags)) + set_bit(CACHE_NEGATIVE, &tmp->h.flags); + else { + clear_bit(CACHE_NEGATIVE, &tmp->h.flags); + svc_export_update(tmp, item); + } + } + if (set||new) + write_unlock(&svc_export_cache.hash_lock); + else + read_unlock(&svc_export_cache.hash_lock); + if (set) + cache_fresh(&svc_export_cache, &tmp->h, + item->h.expiry_time); + if (new) + svc_export_put(&new->h, &svc_export_cache); + return tmp; + } + } + /* Didn't find anything */ + if (new) { + svc_export_init(new, item); + new->h.next = *head; + *head = &new->h; + set_bit(CACHE_HASHED, &new->h.flags); + svc_export_cache.entries++; + if (set) { + tmp = new; + if (test_bit(CACHE_NEGATIVE, &item->h.flags)) + set_bit(CACHE_NEGATIVE, &tmp->h.flags); + else + svc_export_update(tmp, item); + } + } + if (set||new) + write_unlock(&svc_export_cache.hash_lock); + else + read_unlock(&svc_export_cache.hash_lock); + if (new && set) + cache_fresh(&svc_export_cache, &new->h, item->h.expiry_time); + if (new) + return new; + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new) { + cache_init(&new->h); + goto retry; + } + return NULL; +} struct svc_expkey * exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) --- linux-2.6.7/fs/nfsd/nfs4callback.c.lsec 2005-03-23 14:28:24.578313224 -0700 +++ linux-2.6.7/fs/nfsd/nfs4callback.c 2005-03-23 14:28:24.578313224 -0700 @@ -0,0 +1,631 @@ +/* + * linux/fs/nfsd/nfs4callback.c + * + * Copyright (c) 2001 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith + * Andy Adamson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +#define NFSPROC4_CB_NULL 0 +#define NFSPROC4_CB_COMPOUND 1 + +/* forward declarations */ +static void nfs4_cb_null(struct rpc_task *task); + +/* Index of predefined Linux callback client operations */ + +enum { + NFSPROC4_CLNT_CB_NULL = 0, + NFSPROC4_CLNT_CB_GETATTR, + NFSPROC4_CLNT_CB_RECALL, +}; + +enum nfs_cb_opnum4 { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + OP_CB_ILLEGAL = 10044 +}; + + +#define NFS4_MAXTAGLEN 20 + +#define cb_compound_enc_hdr_sz 4 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define op_enc_sz 1 +#define op_dec_sz 2 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) +#define enc_stateid_sz 16 + +#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \ + op_enc_sz + \ + enc_nfs4_fh_sz + 4) + +#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \ + op_dec_sz + \ + 11) + +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + 1 + enc_stateid_sz + \ + enc_nfs4_fh_sz) + +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + op_dec_sz) + +/* +* Generic encode routines from fs/nfs/nfs4xdr.c +*/ +static inline u32 * +xdr_writemem(u32 *p, const void *ptr, int nbytes) +{ + int tmp = XDR_QUADLEN(nbytes); + if (!tmp) + return p; + p[tmp-1] = 0; + memcpy(p, ptr, nbytes); + return p + tmp; +} + +#define WRITE32(n) *p++ = htonl(n) +#define WRITEMEM(ptr,nbytes) do { \ + p = xdr_writemem(p, ptr, nbytes); \ +} while (0) +#define RESERVE_SPACE(nbytes) do { \ + p = xdr_reserve_space(xdr, nbytes); \ + if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ + BUG_ON(!p); \ +} while (0) + +/* + * Generic decode routines from fs/nfs/nfs4xdr.c + */ +#define DECODE_TAIL \ + status = 0; \ +out: \ + return status; \ +xdr_error: \ + dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + status = -EIO; \ + goto out + +#define READ32(x) (x) = ntohl(*p++) +#define READ64(x) do { \ + (x) = (u64)ntohl(*p++) << 32; \ + (x) |= ntohl(*p++); \ +} while (0) +#define READTIME(x) do { \ + p++; \ + (x.tv_sec) = ntohl(*p++); \ + (x.tv_nsec) = ntohl(*p++); \ +} while (0) +#define READ_BUF(nbytes) do { \ + p = xdr_inline_decode(xdr, nbytes); \ + if (!p) { \ + dprintk("NFSD: %s: reply buffer overflowed in line %d.", \ + __FUNCTION__, __LINE__); \ + return -EIO; \ + } \ +} while (0) + +struct nfs4_cb_compound_hdr { + int status; + u32 ident; + u32 nops; + u32 taglen; + char * tag; +}; + +struct nfs4_cb_getattr { + struct nfs_fh fh; + u32 bm0; + u32 bm1; + __u64 change_attr; + __u64 size; + struct timespec mtime; +}; + +struct nfs4_cb_recall { + nfs4_stateid stateid; + int trunc; + struct nfs_fh fh; +}; + +static struct { + int stat; + int errno; +} nfs_cb_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, EPERM }, + { NFS4ERR_NOENT, ENOENT }, + { NFS4ERR_IO, EIO }, + { NFS4ERR_NXIO, ENXIO }, + { NFS4ERR_ACCESS, EACCES }, + { NFS4ERR_EXIST, EEXIST }, + { NFS4ERR_XDEV, EXDEV }, + { NFS4ERR_NOTDIR, ENOTDIR }, + { NFS4ERR_ISDIR, EISDIR }, + { NFS4ERR_INVAL, EINVAL }, + { NFS4ERR_FBIG, EFBIG }, + { NFS4ERR_NOSPC, ENOSPC }, + { NFS4ERR_ROFS, EROFS }, + { NFS4ERR_MLINK, EMLINK }, + { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, ENOTEMPTY }, + { NFS4ERR_DQUOT, EDQUOT }, + { NFS4ERR_STALE, ESTALE }, + { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, + { NFS4ERR_NOTSUPP, ENOTSUPP }, + { NFS4ERR_TOOSMALL, ETOOSMALL }, + { NFS4ERR_SERVERFAULT, ESERVERFAULT }, + { NFS4ERR_BADTYPE, EBADTYPE }, + { NFS4ERR_LOCKED, EAGAIN }, + { NFS4ERR_RESOURCE, EREMOTEIO }, + { NFS4ERR_SYMLINK, ELOOP }, + { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, EDEADLK }, + { -1, EIO } +}; + +static int +nfs_cb_stat_to_errno(int stat) +{ + int i; + for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) { + if (nfs_cb_errtbl[i].stat == stat) + return nfs_cb_errtbl[i].errno; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return stat; +} + +/* + * XDR encode + */ + +static int +encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) +{ + u32 * p; + + RESERVE_SPACE(16); + WRITE32(0); /* tag length is always 0 */ + WRITE32(NFS4_MINOR_VERSION); + WRITE32(hdr->ident); + WRITE32(hdr->nops); + return 0; +} + +static int +encode_cb_getattr(struct xdr_stream *xdr, struct nfs4_cb_getattr *cb_get) +{ + u32 *p; + int len = cb_get->fh.size; + + RESERVE_SPACE(20 + len); + WRITE32(OP_CB_GETATTR); + WRITE32(len); + WRITEMEM(cb_get->fh.data, len); + WRITE32(2); + WRITE32(cb_get->bm0); + WRITE32(cb_get->bm1); + return 0; +} + +static int +encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) +{ + u32 *p; + int len = cb_rec->fh.size; + + RESERVE_SPACE(8+sizeof(cb_rec->stateid.data)); + WRITE32(OP_CB_RECALL); + WRITEMEM(cb_rec->stateid.data, sizeof(cb_rec->stateid.data)); + WRITE32(cb_rec->trunc); + WRITE32(len); + WRITEMEM(cb_rec->fh.data, len); + return 0; +} + +static int +nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req, u32 *p, struct nfs4_cb_getattr *args) +{ + struct xdr_stream xdr; + struct nfs4_cb_compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_cb_compound_hdr(&xdr, &hdr); + return (encode_cb_getattr(&xdr, args)); +} + +static int +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args) +{ + struct xdr_stream xdr; + struct nfs4_cb_compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_cb_compound_hdr(&xdr, &hdr); + return (encode_cb_recall(&xdr, args)); +} + + +static int +decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){ + u32 *p; + + READ_BUF(8); + READ32(hdr->status); + READ32(hdr->taglen); + READ_BUF(hdr->taglen + 4); + hdr->tag = (char *)p; + p += XDR_QUADLEN(hdr->taglen); + READ32(hdr->nops); + return 0; +} + +static int +decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +{ + u32 *p; + u32 op; + int32_t nfserr; + + READ_BUF(8); + READ32(op); + if (op != expected) { + dprintk("NFSD: decode_cb_op_hdr: Callback server returned operation" + " %d but we issued a request for %d\n", + op, expected); + return -EIO; + } + READ32(nfserr); + if (nfserr != NFS_OK) + return -nfs_cb_stat_to_errno(nfserr); + return 0; +} + +static int +decode_cb_getattr(struct xdr_stream *xdr, struct nfs4_cb_getattr *cb_get) +{ + int status; + u32 bmlen, + attrlen =0, + bmval0 =0, + bmval1 =0, + len = 0; + u32 *p; + + status = decode_cb_op_hdr(xdr, OP_CB_GETATTR); + if (status) + return status; + READ_BUF(4); + READ32(bmlen); + if( (bmlen < 1) || (bmlen > 2)) + goto xdr_error; + READ_BUF((bmlen << 2) + 4); + READ32(bmval0); + if (bmval0 & ~(FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) + goto out_bad_bitmap; + if (bmlen == 2) { + READ32(bmval1); + if (bmval1 & ~ FATTR4_WORD1_TIME_MODIFY) + goto out_bad_bitmap; + } + READ32(attrlen); + if (bmval0 & FATTR4_WORD0_CHANGE) { + READ_BUF(8); + len += 8; + READ64(cb_get->change_attr); + dprintk("decode_cb_getattr: changeid=%Ld\n", + (long long)cb_get->change_attr); + } + if (bmval0 & FATTR4_WORD0_SIZE) { + READ_BUF(8); + len += 8; + READ64(cb_get->size); + dprintk("decode_cb_getattr: size=%Ld\n", + (long long)cb_get->size); + } + if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { + READ_BUF(12); + len += 12; + READTIME(cb_get->mtime); + dprintk("decode_cb_gatattr: mtime=%ld\n", + (long)cb_get->mtime.tv_sec); + } + if (len != attrlen) + goto xdr_error; + + DECODE_TAIL; + +out_bad_bitmap: + dprintk("NFSD: %s Callback server returned bad attribute bitmap\n", + __FUNCTION__); + return -EIO; + +} + +static int +nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, u32 *p, struct nfs4_cb_getattr *res) +{ + struct xdr_stream xdr; + struct nfs4_cb_compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_cb_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_cb_getattr(&xdr, res); +out: + return status; +} + +static int +nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p) +{ + struct xdr_stream xdr; + struct nfs4_cb_compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_cb_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); +out: + return status; +} + +static int +nfs4_xdr_enc_null(struct rpc_rqst *req, u32 *p) +{ + struct xdr_stream xdrs, *xdr = &xdrs; + + xdr_init_encode(&xdrs, &req->rq_snd_buf, p); + RESERVE_SPACE(0); + return 0; +} + +static int +nfs4_xdr_dec_null(struct rpc_rqst *req, u32 *p) +{ + return 0; +} + +/* + * RPC procedure tables + */ +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, argtype, restype) \ +[NFSPROC4_CLNT_##proc] = { \ + .p_proc = NFSPROC4_CB_COMPOUND, \ + .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ + .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ +} + +struct rpc_procinfo nfs4_cb_procedures[] = { + PROC(CB_GETATTR, enc_cb_getattr, dec_cb_getattr), + PROC(CB_RECALL, enc_cb_recall, dec_cb_recall), +}; + +struct rpc_version nfs_cb_version4 = { + .number = 1, + .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]), + .procs = nfs4_cb_procedures +}; + +static struct rpc_version * nfs_cb_version[] = { + NULL, + &nfs_cb_version4, +}; + +struct rpc_procinfo nfs4_cb_null_proc= { + .p_proc = NFSPROC4_CB_NULL, + .p_encode = (kxdrproc_t)nfs4_xdr_enc_null, + .p_decode = (kxdrproc_t) nfs4_xdr_dec_null, + .p_bufsiz = 0, +}; + +/* + * Use the SETCLIENTID credential + */ +struct rpc_cred * +nfsd4_lookupcred(struct nfs4_client *clp, int taskflags) +{ + struct auth_cred acred; + struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct rpc_cred *ret = NULL; + + if (!clnt) + goto out; + get_group_info(clp->cl_cred.cr_group_info); + acred.uid = clp->cl_cred.cr_uid; + acred.gid = clp->cl_cred.cr_gid; + acred.group_info = clp->cl_cred.cr_group_info; + + dprintk("NFSD: looking up %s cred\n", + clnt->cl_auth->au_ops->au_name); + ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags); + put_group_info(clp->cl_cred.cr_group_info); +out: + return ret; +} + +/* + * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... + */ +void +nfsd4_probe_callback(struct nfs4_client *clp) +{ + struct sockaddr_in addr; + struct nfs4_callback *cb = &clp->cl_callback; + struct rpc_timeout timeparms; + struct rpc_xprt * xprt; + struct rpc_program * program = &cb->cb_program; + struct rpc_stat * stat = &cb->cb_stat; + struct rpc_clnt * clnt; + struct rpc_message msg = { + .rpc_proc = &nfs4_cb_null_proc, + .rpc_argp = clp, + }; + char hostname[32]; + int status; + + dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d 1\n", + cb->cb_parsed, cb->cb_set); + if (!cb->cb_parsed || cb->cb_set) + goto out_err; + + /* Currently, we only support tcp for the callback channel */ + if (cb->cb_netid.len !=3 || memcmp((char *)cb->cb_netid.data, "tcp", 3)) + goto out_err; + + /* Initialize address */ + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(cb->cb_port); + addr.sin_addr.s_addr = htonl(cb->cb_addr); + + /* Initialize timeout */ + timeparms.to_initval = HZ; + timeparms.to_retries = 5; + timeparms.to_maxval = NFSD_LEASE_TIME*HZ; + timeparms.to_exponential = 1; + + /* Create RPC transport */ + if (!(xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms))) { + dprintk("NFSD: couldn't create callback transport!\n"); + goto out_err; + } + + /* Initialize rpc_program */ + program->name = "nfs4_cb"; + program->number = cb->cb_prog; + program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]); + program->version = nfs_cb_version; + program->stats = stat; + + /* Initialize rpc_stat */ + memset(stat, 0, sizeof(struct rpc_stat)); + stat->program = program; + + /* Create RPC client + * + * XXX AUTH_UNIX only - need AUTH_GSS.... + */ + sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr)); + if (!(clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX))) { + dprintk("NFSD: couldn't create callback client\n"); + goto out_xprt; + } + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clnt->cl_chatty = 1; + cb->cb_client = clnt; + + /* Kick rpciod, put the call on the wire. */ + + if (rpciod_up() != 0) { + dprintk("nfsd: couldn't start rpciod for callbacks!\n"); + goto out_clnt; + } + + /* the task holds a reference to the nfs4_client struct */ + atomic_inc(&clp->cl_count); + + msg.rpc_cred = nfsd4_lookupcred(clp,0); + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, 0); + + if (status != 0) { + dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); + goto out_rpciod; + } + return; + +out_rpciod: + rpciod_down(); +out_clnt: + rpc_shutdown_client(clnt); + goto out_err; +out_xprt: + xprt_destroy(xprt); +out_err: + dprintk("NFSD: warning: no callback path to client %.*s\n", + clp->cl_name.len, clp->cl_name.data); + cb->cb_client = NULL; +} + +static void +nfs4_cb_null(struct rpc_task *task) +{ + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + struct nfs4_callback *cb = &clp->cl_callback; + u32 addr = htonl(cb->cb_addr); + + dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status); + + if (task->tk_status < 0) { + dprintk("NFSD: callback establishment to client %.*s failed\n", + clp->cl_name.len, clp->cl_name.data); + goto out; + } + cb->cb_set = 1; + dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr)); +out: + put_nfs4_client(clp); +} --- linux-2.6.7/fs/nfsd/nfs4xdr.c.lsec 2004-06-15 23:19:52.000000000 -0600 +++ linux-2.6.7/fs/nfsd/nfs4xdr.c 2005-03-23 14:28:23.924412632 -0700 @@ -55,6 +55,8 @@ #include #include #include +#include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -287,27 +289,40 @@ u32 *read_buf(struct nfsd4_compoundargs return p; } -char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) +static int +defer_free(struct nfsd4_compoundargs *argp, + void (*release)(const void *), void *p) { struct tmpbuf *tb; + + tb = kmalloc(sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + tb->buf = p; + tb->release = release; + tb->next = argp->to_free; + argp->to_free = tb; + return 0; +} + +char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) +{ + void *new = NULL; if (p == argp->tmp) { - p = kmalloc(nbytes, GFP_KERNEL); - if (!p) return NULL; + new = kmalloc(nbytes, GFP_KERNEL); + if (!new) return NULL; + p = new; memcpy(p, argp->tmp, nbytes); } else { if (p != argp->tmpp) BUG(); argp->tmpp = NULL; } - tb = kmalloc(sizeof(*tb), GFP_KERNEL); - if (!tb) { - kfree(p); + if (defer_free(argp, kfree, p)) { + kfree(new); return NULL; - } - tb->buf = p; - tb->next = argp->to_free; - argp->to_free = tb; - return (char*)p; + } else + return (char *)p; } @@ -335,7 +350,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoun } static int -nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr) +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, + struct nfs4_acl **acl) { int expected_len, len = 0; u32 dummy32; @@ -364,6 +380,51 @@ nfsd4_decode_fattr(struct nfsd4_compound READ64(iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } + if (bmval[0] & FATTR4_WORD0_ACL) { + int nace, i; + struct nfs4_ace ace; + + READ_BUF(4); len += 4; + READ32(nace); + + *acl = nfs4_acl_new(); + if (*acl == NULL) { + status = -ENOMEM; + goto out_nfserr; + } + defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl); + + for (i = 0; i < nace; i++) { + READ_BUF(16); len += 16; + READ32(ace.type); + READ32(ace.flag); + READ32(ace.access_mask); + READ32(dummy32); + READ_BUF(dummy32); + len += XDR_QUADLEN(dummy32) << 2; + READMEM(buf, dummy32); + if (check_utf8(buf, dummy32)) + return nfserr_inval; + ace.whotype = nfs4_acl_get_whotype(buf, dummy32); + status = 0; + if (ace.whotype != NFS4_ACL_WHO_NAMED) + ace.who = 0; + else if (ace.flag & NFS4_ACE_IDENTIFIER_GROUP) + status = nfsd_map_name_to_gid(argp->rqstp, + buf, dummy32, &ace.who); + else + status = nfsd_map_name_to_uid(argp->rqstp, + buf, dummy32, &ace.who); + if (status) + goto out_nfserr; + if (nfs4_acl_add_ace(*acl, ace.type, ace.flag, + ace.access_mask, ace.whotype, ace.who) != 0) { + status = -ENOMEM; + goto out_nfserr; + } + } + } else + *acl = NULL; if (bmval[1] & FATTR4_WORD1_MODE) { READ_BUF(4); len += 4; @@ -549,7 +610,7 @@ nfsd4_decode_create(struct nfsd4_compoun if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) return status; - if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr))) + if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) goto out; DECODE_TAIL; @@ -698,7 +759,7 @@ nfsd4_decode_open(struct nfsd4_compounda switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: - if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr))) + if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) goto out; break; case NFS4_CREATE_EXCLUSIVE: @@ -875,7 +936,7 @@ nfsd4_decode_setattr(struct nfsd4_compou READ_BUF(sizeof(stateid_t)); READ32(setattr->sa_stateid.si_generation); COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); - if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr))) + if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) goto out; DECODE_TAIL; @@ -1288,32 +1349,24 @@ static u32 nfs4_ftypes[16] = { NF4SOCK, NF4BAD, NF4LNK, NF4BAD, }; -static inline int -xdr_padding(int l) -{ - return 3 - ((l - 1) & 3); /* smallest i>=0 such that (l+i)%4 = 0 */ -} - static int -nfsd4_encode_name(struct svc_rqst *rqstp, int group, uid_t id, +nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, u32 **p, int *buflen) { int status; - u32 len; if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4) return nfserr_resource; - if (group) + if (whotype != NFS4_ACL_WHO_NAMED) + status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); + else if (group) status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1)); else status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1)); if (status < 0) return nfserrno(status); - len = (unsigned)status; - *(*p)++ = htonl(len); - memset((u8 *)*p + len, 0, xdr_padding(len)); - *p += XDR_QUADLEN(len); - *buflen -= (XDR_QUADLEN(len) << 2) + 4; + *p = xdr_encode_opaque(*p, NULL, status); + *buflen -= (XDR_QUADLEN(status) << 2) + 4; BUG_ON(*buflen < 0); return 0; } @@ -1321,13 +1374,20 @@ nfsd4_encode_name(struct svc_rqst *rqstp static inline int nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, u32 **p, int *buflen) { - return nfsd4_encode_name(rqstp, uid, 0, p, buflen); + return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen); } static inline int nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, u32 **p, int *buflen) { - return nfsd4_encode_name(rqstp, gid, 1, p, buflen); + return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen); +} + +static inline int +nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, + u32 **p, int *buflen) +{ + return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); } @@ -1354,6 +1414,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s u64 dummy64; u32 *p = buffer; int status; + int aclsupport = 0; + struct nfs4_acl *acl = NULL; BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); @@ -1376,6 +1438,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s goto out; fhp = &tempfh; } + if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT + | FATTR4_WORD0_SUPPORTED_ATTRS)) { + status = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); + aclsupport = (status == 0); + if (bmval0 & FATTR4_WORD0_ACL) { + if (status == -EOPNOTSUPP) + bmval0 &= ~FATTR4_WORD0_ACL; + else if (status != 0) + goto out_nfserr; + } + } if ((buflen -= 16) < 0) goto out_resource; @@ -1388,7 +1461,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s if ((buflen -= 12) < 0) goto out_resource; WRITE32(2); - WRITE32(NFSD_SUPPORTED_ATTRS_WORD0); + WRITE32(aclsupport ? + NFSD_SUPPORTED_ATTRS_WORD0 : + NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL); WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); } if (bmval0 & FATTR4_WORD0_TYPE) { @@ -1459,10 +1534,44 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s goto out_resource; WRITE32(0); } + if (bmval0 & FATTR4_WORD0_ACL) { + struct nfs4_ace *ace; + struct list_head *h; + + if (acl == NULL) { + if ((buflen -= 4) < 0) + goto out_resource; + + WRITE32(0); + goto out_acl; + } + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(acl->naces); + + list_for_each(h, &acl->ace_head) { + ace = list_entry(h, struct nfs4_ace, l_ace); + + if ((buflen -= 4*3) < 0) + goto out_resource; + WRITE32(ace->type); + WRITE32(ace->flag); + WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); + status = nfsd4_encode_aclname(rqstp, ace->whotype, + ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP, + &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } + } +out_acl: if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(0); + WRITE32(aclsupport ? + ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { if ((buflen -= 4) < 0) @@ -1645,6 +1754,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s status = nfs_ok; out: + nfs4_acl_free(acl); if (fhp == &tempfh) fh_put(&tempfh); return status; @@ -2471,6 +2581,24 @@ nfs4svc_encode_voidres(struct svc_rqst * return xdr_ressize_check(rqstp, p); } +void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) +{ + if (args->ops != args->iops) { + kfree(args->ops); + args->ops = args->iops; + } + if (args->tmpp) { + kfree(args->tmpp); + args->tmpp = NULL; + } + while (args->to_free) { + struct tmpbuf *tb = args->to_free; + args->to_free = tb->next; + tb->release(tb->buf); + kfree(tb); + } +} + int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args) { @@ -2487,20 +2615,7 @@ nfs4svc_decode_compoundargs(struct svc_r status = nfsd4_decode_compound(args); if (status) { - if (args->ops != args->iops) { - kfree(args->ops); - args->ops = args->iops; - } - if (args->tmpp) { - kfree(args->tmpp); - args->tmpp = NULL; - } - while (args->to_free) { - struct tmpbuf *tb = args->to_free; - args->to_free = tb->next; - kfree(tb->buf); - kfree(tb); - } + nfsd4_release_compoundargs(args); } return !status; } --- linux-2.6.7/fs/nfsd/nfs4proc.c.lsec 2004-06-15 23:20:26.000000000 -0600 +++ linux-2.6.7/fs/nfsd/nfs4proc.c 2005-03-23 14:28:24.080388920 -0700 @@ -52,6 +52,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -135,9 +136,11 @@ do_open_fhandle(struct svc_rqst *rqstp, { int status; - dprintk("NFSD: do_open_fhandle\n"); + /* Only reclaims from previously confirmed clients are valid */ + if ((status = nfs4_check_open_reclaim(&open->op_clientid))) + return status; - /* we don't know the target directory, and therefore can not + /* We don't know the target directory, and therefore can not * set the change info */ @@ -172,8 +175,7 @@ nfsd4_open(struct svc_rqst *rqstp, struc if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) return nfserr_grace; - if (nfs4_in_no_grace() && - open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) return nfserr_no_grace; /* This check required by spec. */ @@ -318,7 +320,7 @@ nfsd4_commit(struct svc_rqst *rqstp, str return status; } -static inline int +static int nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_create *create) { struct svc_fh resfh; @@ -435,7 +437,7 @@ nfsd4_link(struct svc_rqst *rqstp, struc return status; } -static inline int +static int nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh) { struct svc_fh tmp_fh; @@ -619,7 +621,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, st status = nfserr_bad_stateid; if (ZERO_STATEID(&setattr->sa_stateid) || ONE_STATEID(&setattr->sa_stateid)) { dprintk("NFSD: nfsd4_setattr: magic stateid!\n"); - return status; + goto out; } nfs4_lock_state(); @@ -627,17 +629,25 @@ nfsd4_setattr(struct svc_rqst *rqstp, st &setattr->sa_stateid, CHECK_FH | RDWR_STATE, &stp))) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); - goto out; + goto out_unlock; } status = nfserr_openmode; if (!access_bits_permit_write(stp->st_access_bmap)) { dprintk("NFSD: nfsd4_setattr: not opened for write!\n"); - goto out; + goto out_unlock; } nfs4_unlock_state(); } - return (nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, 0, (time_t)0)); + status = nfs_ok; + if (setattr->sa_acl != NULL) + status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl); + if (status) + goto out; + status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, + 0, (time_t)0); out: + return status; +out_unlock: nfs4_unlock_state(); return status; } @@ -773,13 +783,20 @@ nfsd4_proc_compound(struct svc_rqst *rqs struct nfsd4_compoundres *resp) { struct nfsd4_op *op; - struct svc_fh current_fh; - struct svc_fh save_fh; + struct svc_fh *current_fh = NULL; + struct svc_fh *save_fh = NULL; int slack_space; /* in words, not bytes! */ int status; - fh_init(¤t_fh, NFS4_FHSIZE); - fh_init(&save_fh, NFS4_FHSIZE); + status = nfserr_resource; + current_fh = kmalloc(sizeof(*current_fh), GFP_KERNEL); + if (current_fh == NULL) + goto out; + fh_init(current_fh, NFS4_FHSIZE); + save_fh = kmalloc(sizeof(*save_fh), GFP_KERNEL); + if (save_fh == NULL) + goto out; + fh_init(save_fh, NFS4_FHSIZE); resp->xbuf = &rqstp->rq_res; resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; @@ -831,7 +848,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs * SETATTR NOFILEHANDLE error handled in nfsd4_setattr * due to required returned bitmap argument */ - if ((!current_fh.fh_dentry) && + if ((!current_fh->fh_dentry) && !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || (op->opnum == OP_SETCLIENTID) || (op->opnum == OP_SETCLIENTID_CONFIRM) || @@ -843,105 +860,105 @@ nfsd4_proc_compound(struct svc_rqst *rqs } switch (op->opnum) { case OP_ACCESS: - op->status = nfsd4_access(rqstp, ¤t_fh, &op->u.access); + op->status = nfsd4_access(rqstp, current_fh, &op->u.access); break; case OP_CLOSE: - op->status = nfsd4_close(rqstp, ¤t_fh, &op->u.close); + op->status = nfsd4_close(rqstp, current_fh, &op->u.close); if (op->u.close.cl_stateowner) op->replay = &op->u.close.cl_stateowner->so_replay; break; case OP_COMMIT: - op->status = nfsd4_commit(rqstp, ¤t_fh, &op->u.commit); + op->status = nfsd4_commit(rqstp, current_fh, &op->u.commit); break; case OP_CREATE: - op->status = nfsd4_create(rqstp, ¤t_fh, &op->u.create); + op->status = nfsd4_create(rqstp, current_fh, &op->u.create); break; case OP_GETATTR: - op->status = nfsd4_getattr(rqstp, ¤t_fh, &op->u.getattr); + op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr); break; case OP_GETFH: - op->status = nfsd4_getfh(¤t_fh, &op->u.getfh); + op->status = nfsd4_getfh(current_fh, &op->u.getfh); break; case OP_LINK: - op->status = nfsd4_link(rqstp, ¤t_fh, &save_fh, &op->u.link); + op->status = nfsd4_link(rqstp, current_fh, save_fh, &op->u.link); break; case OP_LOCK: - op->status = nfsd4_lock(rqstp, ¤t_fh, &op->u.lock); + op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock); if (op->u.lock.lk_stateowner) op->replay = &op->u.lock.lk_stateowner->so_replay; break; case OP_LOCKT: - op->status = nfsd4_lockt(rqstp, ¤t_fh, &op->u.lockt); + op->status = nfsd4_lockt(rqstp, current_fh, &op->u.lockt); break; case OP_LOCKU: - op->status = nfsd4_locku(rqstp, ¤t_fh, &op->u.locku); + op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku); if (op->u.locku.lu_stateowner) op->replay = &op->u.locku.lu_stateowner->so_replay; break; case OP_LOOKUP: - op->status = nfsd4_lookup(rqstp, ¤t_fh, &op->u.lookup); + op->status = nfsd4_lookup(rqstp, current_fh, &op->u.lookup); break; case OP_LOOKUPP: - op->status = nfsd4_lookupp(rqstp, ¤t_fh); + op->status = nfsd4_lookupp(rqstp, current_fh); break; case OP_NVERIFY: - op->status = nfsd4_verify(rqstp, ¤t_fh, &op->u.nverify); + op->status = nfsd4_verify(rqstp, current_fh, &op->u.nverify); if (op->status == nfserr_not_same) op->status = nfs_ok; break; case OP_OPEN: - op->status = nfsd4_open(rqstp, ¤t_fh, &op->u.open); + op->status = nfsd4_open(rqstp, current_fh, &op->u.open); if (op->u.open.op_stateowner) op->replay = &op->u.open.op_stateowner->so_replay; break; case OP_OPEN_CONFIRM: - op->status = nfsd4_open_confirm(rqstp, ¤t_fh, &op->u.open_confirm); + op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm); if (op->u.open_confirm.oc_stateowner) op->replay = &op->u.open_confirm.oc_stateowner->so_replay; break; case OP_OPEN_DOWNGRADE: - op->status = nfsd4_open_downgrade(rqstp, ¤t_fh, &op->u.open_downgrade); + op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade); if (op->u.open_downgrade.od_stateowner) op->replay = &op->u.open_downgrade.od_stateowner->so_replay; break; case OP_PUTFH: - op->status = nfsd4_putfh(rqstp, ¤t_fh, &op->u.putfh); + op->status = nfsd4_putfh(rqstp, current_fh, &op->u.putfh); break; case OP_PUTROOTFH: - op->status = nfsd4_putrootfh(rqstp, ¤t_fh); + op->status = nfsd4_putrootfh(rqstp, current_fh); break; case OP_READ: - op->status = nfsd4_read(rqstp, ¤t_fh, &op->u.read); + op->status = nfsd4_read(rqstp, current_fh, &op->u.read); break; case OP_READDIR: - op->status = nfsd4_readdir(rqstp, ¤t_fh, &op->u.readdir); + op->status = nfsd4_readdir(rqstp, current_fh, &op->u.readdir); break; case OP_READLINK: - op->status = nfsd4_readlink(rqstp, ¤t_fh, &op->u.readlink); + op->status = nfsd4_readlink(rqstp, current_fh, &op->u.readlink); break; case OP_REMOVE: - op->status = nfsd4_remove(rqstp, ¤t_fh, &op->u.remove); + op->status = nfsd4_remove(rqstp, current_fh, &op->u.remove); break; case OP_RENAME: - op->status = nfsd4_rename(rqstp, ¤t_fh, &save_fh, &op->u.rename); + op->status = nfsd4_rename(rqstp, current_fh, save_fh, &op->u.rename); break; case OP_RENEW: op->status = nfsd4_renew(&op->u.renew); break; case OP_RESTOREFH: - op->status = nfsd4_restorefh(¤t_fh, &save_fh); + op->status = nfsd4_restorefh(current_fh, save_fh); break; case OP_SAVEFH: - op->status = nfsd4_savefh(¤t_fh, &save_fh); + op->status = nfsd4_savefh(current_fh, save_fh); break; case OP_SETATTR: - op->status = nfsd4_setattr(rqstp, ¤t_fh, &op->u.setattr); + op->status = nfsd4_setattr(rqstp, current_fh, &op->u.setattr); break; case OP_SETCLIENTID: op->status = nfsd4_setclientid(rqstp, &op->u.setclientid); @@ -950,12 +967,12 @@ nfsd4_proc_compound(struct svc_rqst *rqs op->status = nfsd4_setclientid_confirm(rqstp, &op->u.setclientid_confirm); break; case OP_VERIFY: - op->status = nfsd4_verify(rqstp, ¤t_fh, &op->u.verify); + op->status = nfsd4_verify(rqstp, current_fh, &op->u.verify); if (op->status == nfserr_same) op->status = nfs_ok; break; case OP_WRITE: - op->status = nfsd4_write(rqstp, ¤t_fh, &op->u.write); + op->status = nfsd4_write(rqstp, current_fh, &op->u.write); break; case OP_RELEASE_LOCKOWNER: op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner); @@ -976,22 +993,13 @@ encode_op: } out: - if (args->ops != args->iops) { - kfree(args->ops); - args->ops = args->iops; - } - if (args->tmpp) { - kfree(args->tmpp); - args->tmpp = NULL; - } - while (args->to_free) { - struct tmpbuf *tb = args->to_free; - args->to_free = tb->next; - kfree(tb->buf); - kfree(tb); - } - fh_put(¤t_fh); - fh_put(&save_fh); + nfsd4_release_compoundargs(args); + if (current_fh) + fh_put(current_fh); + kfree(current_fh); + if (save_fh) + fh_put(save_fh); + kfree(save_fh); return status; } --- linux-2.6.7/fs/nfsd/nfs4state.c.lsec 2004-06-15 23:19:43.000000000 -0600 +++ linux-2.6.7/fs/nfsd/nfs4state.c 2005-03-23 14:28:24.028396824 -0700 @@ -51,6 +51,9 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC /* Globals */ +static time_t lease_time = 90; /* default lease time */ +static time_t old_lease_time = 90; /* past incarnation lease time */ +static u32 nfs4_reclaim_init = 0; time_t boot_time; static time_t grace_end = 0; static u32 current_clientid = 1; @@ -82,7 +85,7 @@ struct nfs4_stateid * find_stateid(state * protects clientid_hashtbl[], clientstr_hashtbl[], * unconfstr_hashtbl[], uncofid_hashtbl[]. */ -static struct semaphore client_sema; +static DECLARE_MUTEX(client_sema); void nfs4_lock_state(void) @@ -131,8 +134,11 @@ static void release_file(struct nfs4_fil ((id) & CLIENT_HASH_MASK) #define clientstr_hashval(name, namelen) \ (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK) - -/* conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed +/* + * reclaim_str_hashtbl[] holds known client info from previous reset/reboot + * used in reboot/reset lease grace period processing + * + * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed * setclientid_confirmed info. * * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed @@ -144,6 +150,8 @@ static void release_file(struct nfs4_fil * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time * for last close replay. */ +static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE]; +static int reclaim_str_hashtbl_size; static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; @@ -208,12 +216,20 @@ free_client(struct nfs4_client *clp) kfree(clp); } -static void +void +put_nfs4_client(struct nfs4_client *clp) +{ + if (atomic_dec_and_test(&clp->cl_count)) + free_client(clp); +} + +void expire_client(struct nfs4_client *clp) { struct nfs4_stateowner *sop; - dprintk("NFSD: expire_client\n"); + dprintk("NFSD: expire_client cl_count %d\n", + atomic_read(&clp->cl_count)); list_del(&clp->cl_idhash); list_del(&clp->cl_strhash); list_del(&clp->cl_lru); @@ -221,7 +237,7 @@ expire_client(struct nfs4_client *clp) sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient); release_stateowner(sop); } - free_client(clp); + put_nfs4_client(clp); } static struct nfs4_client * @@ -230,6 +246,7 @@ create_client(struct xdr_netobj name) { if(!(clp = alloc_client(name))) goto out; + atomic_set(&clp->cl_count, 1); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_perclient); @@ -339,6 +356,99 @@ move_to_confirmed(struct nfs4_client *cl renew_client(clp); } + +/* a helper function for parse_callback */ +static int +parse_octet(unsigned int *lenp, char **addrp) +{ + unsigned int len = *lenp; + char *p = *addrp; + int n = -1; + char c; + + for (;;) { + if (!len) + break; + len--; + c = *p++; + if (c == '.') + break; + if ((c < '0') || (c > '9')) { + n = -1; + break; + } + if (n < 0) + n = 0; + n = (n * 10) + (c - '0'); + if (n > 255) { + n = -1; + break; + } + } + *lenp = len; + *addrp = p; + return n; +} + +/* parse and set the setclientid ipv4 callback address */ +int +parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) +{ + int temp = 0; + u32 cbaddr = 0; + u16 cbport = 0; + u32 addrlen = addr_len; + char *addr = addr_val; + int i, shift; + + /* ipaddress */ + shift = 24; + for(i = 4; i > 0 ; i--) { + if ((temp = parse_octet(&addrlen, &addr)) < 0) { + return 0; + } + cbaddr |= (temp << shift); + if(shift > 0) + shift -= 8; + } + *cbaddrp = cbaddr; + + /* port */ + shift = 8; + for(i = 2; i > 0 ; i--) { + if ((temp = parse_octet(&addrlen, &addr)) < 0) { + return 0; + } + cbport |= (temp << shift); + if(shift > 0) + shift -= 8; + } + *cbportp = cbport; + return 1; +} + +void +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) +{ + struct nfs4_callback *cb = &clp->cl_callback; + + if( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, + &cb->cb_addr, &cb->cb_port))) { + printk(KERN_INFO "NFSD: BAD callback address. client will not receive delegations\n"); + printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " + "will not receive delegations\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + + cb->cb_parsed = 0; + return; + } + cb->cb_netid.len = se->se_callback_netid_len; + cb->cb_netid.data = se->se_callback_netid_val; + cb->cb_prog = se->se_callback_prog; + cb->cb_ident = se->se_callback_ident; + cb->cb_parsed = 1; +} + /* * RFC 3010 has a complex implmentation description of processing a * SETCLIENTID request consisting of 5 bullets, labeled as @@ -450,6 +560,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp copy_cred(&new->cl_cred,&rqstp->rq_cred); gen_clid(new); gen_confirm(new); + gen_callback(new, setclid); add_to_unconfirmed(new, strhashval); } else if (cmp_verf(&conf->cl_verifier, &clverifier)) { /* @@ -477,6 +588,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp copy_cred(&new->cl_cred,&rqstp->rq_cred); copy_clid(new, conf); gen_confirm(new); + gen_callback(new, setclid); add_to_unconfirmed(new,strhashval); } else if (!unconf) { /* @@ -494,6 +606,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp copy_cred(&new->cl_cred,&rqstp->rq_cred); gen_clid(new); gen_confirm(new); + gen_callback(new, setclid); add_to_unconfirmed(new, strhashval); } else if (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm)) { /* @@ -519,6 +632,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp copy_cred(&new->cl_cred,&rqstp->rq_cred); gen_clid(new); gen_confirm(new); + gen_callback(new, setclid); add_to_unconfirmed(new, strhashval); } else { /* No cases hit !!! */ @@ -529,7 +643,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); - printk(KERN_INFO "NFSD: this client will not receive delegations\n"); status = nfs_ok; out: nfs4_unlock_state(); @@ -575,7 +688,7 @@ nfsd4_setclientid_confirm(struct svc_rqs * not been found. */ if (clp->cl_addr != ip_addr) { - printk("NFSD: setclientid: string in use by client" + dprintk("NFSD: setclientid: string in use by client" "(clientid %08x/%08x)\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); goto out; @@ -588,7 +701,7 @@ nfsd4_setclientid_confirm(struct svc_rqs continue; status = nfserr_inval; if (clp->cl_addr != ip_addr) { - printk("NFSD: setclientid: string in use by client" + dprintk("NFSD: setclientid: string in use by client" "(clientid %08x/%08x)\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); goto out; @@ -610,6 +723,7 @@ nfsd4_setclientid_confirm(struct svc_rqs status = nfserr_clid_inuse; else { expire_client(conf); + clp = unconf; move_to_confirmed(unconf, idhashval); status = nfs_ok; } @@ -627,6 +741,7 @@ nfsd4_setclientid_confirm(struct svc_rqs if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) { status = nfserr_clid_inuse; } else { + clp = conf; status = nfs_ok; } goto out; @@ -641,6 +756,7 @@ nfsd4_setclientid_confirm(struct svc_rqs status = nfserr_clid_inuse; } else { status = nfs_ok; + clp = unconf; move_to_confirmed(unconf, idhashval); } goto out; @@ -660,7 +776,9 @@ nfsd4_setclientid_confirm(struct svc_rqs status = nfserr_inval; goto out; out: - /* XXX if status == nfs_ok, probe callback path */ + if (!status) + nfsd4_probe_callback(clp); + nfs4_unlock_state(); return status; } @@ -1510,10 +1628,12 @@ nfs4_preprocess_seqid_op(struct svc_fh * status = nfserr_bad_stateid; - /* for new lock stateowners, check that the lock->v.new.open_stateid - * refers to an open stateowner, and that the lockclid - * (nfs4_lock->v.new.clientid) is the same as the - * open_stateid->st_stateowner->so_client->clientid + /* for new lock stateowners: + * check that the lock->v.new.open_stateid + * refers to an open stateowner + * + * check that the lockclid (nfs4_lock->v.new.clientid) is the same + * as the open_stateid->st_stateowner->so_client->clientid */ if (lockclid) { struct nfs4_stateowner *sop = stp->st_stateowner; @@ -1599,6 +1719,17 @@ check_replay: } /* + * eventually, this will perform an upcall to the 'state daemon' as well as + * set the cl_first_state field. + */ +void +first_state(struct nfs4_client *clp) +{ + if (!clp->cl_first_state) + clp->cl_first_state = get_seconds(); +} + +/* * nfs4_unlock_state(); called in encode */ int @@ -1635,6 +1766,7 @@ nfsd4_open_confirm(struct svc_rqst *rqst stp->st_stateid.si_fileid, stp->st_stateid.si_generation); status = nfs_ok; + first_state(sop->so_client); out: return status; } @@ -1850,6 +1982,21 @@ nfs4_set_lock_denied(struct file_lock *f deny->ld_type = NFS4_WRITE_LT; } +static struct nfs4_stateowner * +find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid) +{ + struct nfs4_stateowner *local = NULL; + int i; + + for (i = 0; i < LOCK_HASH_SIZE; i++) { + list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) { + if(!cmp_owner_str(local, owner, clid)) + continue; + return local; + } + } + return NULL; +} static int find_lockstateowner_str(unsigned int hashval, struct xdr_netobj *owner, clientid_t *clid, struct nfs4_stateowner **op) { @@ -1969,7 +2116,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc if (nfs4_in_grace() && !lock->lk_reclaim) return nfserr_grace; - if (nfs4_in_no_grace() && lock->lk_reclaim) + if (!nfs4_in_grace() && lock->lk_reclaim) return nfserr_no_grace; if (check_lock_length(lock->lk_offset, lock->lk_length)) @@ -1992,7 +2139,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struc printk("NFSD: nfsd4_lock: clientid is stale!\n"); goto out; } - /* does the clientid in the lock owner own the open stateid? */ + + /* is the new lock seqid presented by the client zero? */ + status = nfserr_bad_seqid; + if (lock->v.new.lock_seqid != 0) + goto out; /* validate and update open stateid and open seqid */ status = nfs4_preprocess_seqid_op(current_fh, @@ -2011,15 +2162,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struc strhashval = lock_ownerstr_hashval(fp->fi_inode, open_sop->so_client->cl_clientid.cl_id, lock->v.new.owner); - /* * If we already have this lock owner, the client is in * error (or our bookeeping is wrong!) * for asking for a 'new lock'. */ status = nfserr_bad_stateid; - if (find_lockstateowner_str(strhashval, &lock->v.new.owner, - &lock->v.new.clientid, &lock_sop)) + lock_sop = find_lockstateowner(&lock->v.new.owner, + &lock->v.new.clientid); + if (lock_sop) goto out; status = nfserr_resource; if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock))) @@ -2315,7 +2466,7 @@ nfsd4_release_lockowner(struct svc_rqst clientid_t *clid = &rlockowner->rl_clientid; struct nfs4_stateowner *local = NULL; struct xdr_netobj *owner = &rlockowner->rl_owner; - int status, i; + int status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); @@ -2330,34 +2481,136 @@ nfsd4_release_lockowner(struct svc_rqst nfs4_lock_state(); - /* find the lockowner */ status = nfs_ok; - for (i=0; i < LOCK_HASH_SIZE; i++) - list_for_each_entry(local, &lock_ownerstr_hashtbl[i], so_strhash) - if(cmp_owner_str(local, owner, clid)) { - struct nfs4_stateid *stp; - - /* check for any locks held by any stateid - * associated with the (lock) stateowner */ - status = nfserr_locks_held; - list_for_each_entry(stp, &local->so_perfilestate, - st_perfilestate) { - if(stp->st_vfs_set) { - if (check_for_locks(&stp->st_vfs_file, - local)) - goto out; - } - } - /* no locks held by (lock) stateowner */ - status = nfs_ok; - release_stateowner(local); - goto out; + local = find_lockstateowner(owner, clid); + if (local) { + struct nfs4_stateid *stp; + + /* check for any locks held by any stateid + * associated with the (lock) stateowner */ + status = nfserr_locks_held; + list_for_each_entry(stp, &local->so_perfilestate, + st_perfilestate) { + if(stp->st_vfs_set) { + if (check_for_locks(&stp->st_vfs_file, local)) + goto out; } + } + /* no locks held by (lock) stateowner */ + status = nfs_ok; + release_stateowner(local); + } out: nfs4_unlock_state(); return status; } +static inline struct nfs4_client_reclaim * +alloc_reclaim(int namelen) +{ + struct nfs4_client_reclaim *crp = NULL; + + crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); + if (!crp) + return NULL; + crp->cr_name.data = kmalloc(namelen, GFP_KERNEL); + if (!crp->cr_name.data) { + kfree(crp); + return NULL; + } + return crp; +} + +/* + * failure => all reset bets are off, nfserr_no_grace... + */ +static int +nfs4_client_to_reclaim(struct nfs4_client *clp) +{ + unsigned int strhashval; + struct nfs4_client_reclaim *crp = NULL; + + crp = alloc_reclaim(clp->cl_name.len); + if (!crp) + return 0; + strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len); + INIT_LIST_HEAD(&crp->cr_strhash); + list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); + memcpy(crp->cr_name.data, clp->cl_name.data, clp->cl_name.len); + crp->cr_name.len = clp->cl_name.len; + crp->cr_first_state = clp->cl_first_state; + crp->cr_expired = 0; + return 1; +} + +static void +nfs4_release_reclaim(void) +{ + struct nfs4_client_reclaim *crp = NULL; + int i; + + BUG_ON(!nfs4_reclaim_init); + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&reclaim_str_hashtbl[i])) { + crp = list_entry(reclaim_str_hashtbl[i].next, + struct nfs4_client_reclaim, cr_strhash); + list_del(&crp->cr_strhash); + kfree(crp->cr_name.data); + kfree(crp); + reclaim_str_hashtbl_size--; + } + } + BUG_ON(reclaim_str_hashtbl_size); +} + +/* + * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ +struct nfs4_client_reclaim * +nfs4_find_reclaim_client(clientid_t *clid) +{ + unsigned int idhashval = clientid_hashval(clid->cl_id); + unsigned int strhashval; + struct nfs4_client *clp, *client = NULL; + struct nfs4_client_reclaim *crp = NULL; + + + /* find clientid in conf_id_hashtbl */ + list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { + if (cmp_clid(&clp->cl_clientid, clid)) { + client = clp; + break; + } + } + if (!client) + return NULL; + + /* find clp->cl_name in reclaim_str_hashtbl */ + strhashval = clientstr_hashval(client->cl_name.data, + client->cl_name.len); + list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { + if(cmp_name(&crp->cr_name, &client->cl_name)) { + return crp; + } + } + return NULL; +} + +/* +* Called from OPEN. Look for clientid in reclaim list. +*/ +int +nfs4_check_open_reclaim(clientid_t *clid) +{ + struct nfs4_client_reclaim *crp; + + if ((crp = nfs4_find_reclaim_client(clid)) == NULL) + return nfserr_reclaim_bad; + if (crp->cr_expired) + return nfserr_no_grace; + return nfs_ok; +} + + /* * Start and stop routines */ @@ -2366,10 +2619,16 @@ void nfs4_state_init(void) { int i; - time_t start = get_seconds(); + time_t grace_time; if (nfs4_init) return; + if (!nfs4_reclaim_init) { + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); + reclaim_str_hashtbl_size = 0; + nfs4_reclaim_init = 1; + } for (i = 0; i < CLIENT_HASH_SIZE; i++) { INIT_LIST_HEAD(&conf_id_hashtbl[i]); INIT_LIST_HEAD(&conf_str_hashtbl[i]); @@ -2396,27 +2655,36 @@ nfs4_state_init(void) INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); - init_MUTEX(&client_sema); - boot_time = start; - grace_end = start + NFSD_LEASE_TIME; + boot_time = get_seconds(); + grace_time = max(old_lease_time, lease_time); + if (reclaim_str_hashtbl_size == 0) + grace_time = 0; + if (grace_time) + printk("NFSD: starting %ld-second grace period\n", grace_time); + grace_end = boot_time + grace_time; INIT_WORK(&laundromat_work,laundromat_main, NULL); schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ); nfs4_init = 1; - } int nfs4_in_grace(void) { - return time_before(get_seconds(), (unsigned long)grace_end); + return get_seconds() < grace_end; } -int -nfs4_in_no_grace(void) +void +set_no_grace(void) { - return (grace_end < get_seconds()); + printk("NFSD: ERROR in reboot recovery. State reclaims will fail.\n"); + grace_end = get_seconds(); } +time_t +nfs4_lease_time(void) +{ + return lease_time; +} static void __nfs4_state_shutdown(void) @@ -2454,6 +2722,61 @@ void nfs4_state_shutdown(void) { nfs4_lock_state(); + nfs4_release_reclaim(); __nfs4_state_shutdown(); nfs4_unlock_state(); } + +/* + * Called when leasetime is changed. + * + * if nfsd is not started, simply set the global lease. + * + * if nfsd(s) are running, lease change requires nfsv4 state to be reset. + * e.g: boot_time is reset, existing nfs4_client structs are + * used to fill reclaim_str_hashtbl, then all state (except for the + * reclaim_str_hashtbl) is re-initialized. + * + * if the old lease time is greater than the new lease time, the grace + * period needs to be set to the old lease time to allow clients to reclaim + * their state. XXX - we may want to set the grace period == lease time + * after an initial grace period == old lease time + * + * if an error occurs in this process, the new lease is set, but the server + * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace + * which means OPEN/LOCK/READ/WRITE will fail during grace period. + * + * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and + * OPEN and LOCK reclaims. + */ +void +nfs4_reset_lease(time_t leasetime) +{ + struct nfs4_client *clp; + int i; + + printk("NFSD: New leasetime %ld\n",leasetime); + if (!nfs4_init) + return; + nfs4_lock_state(); + old_lease_time = lease_time; + lease_time = leasetime; + + nfs4_release_reclaim(); + + /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */ + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) { + if (!nfs4_client_to_reclaim(clp)) { + nfs4_release_reclaim(); + goto init_state; + } + reclaim_str_hashtbl_size++; + } + } +init_state: + __nfs4_state_shutdown(); + nfs4_state_init(); + nfs4_unlock_state(); +} + --- linux-2.6.7/fs/nfsd/vfs.c.lsec 2004-06-15 23:19:13.000000000 -0600 +++ linux-2.6.7/fs/nfsd/vfs.c 2005-03-23 14:28:24.520322040 -0700 @@ -44,6 +44,16 @@ #include #include #include +#ifdef CONFIG_NFSD_V4 +#include +#include +#include +#include +#include +#include +#include +#include +#endif /* CONFIG_NFSD_V4 */ #include @@ -344,6 +354,177 @@ out_nfserr: goto out; } +#if defined(CONFIG_NFSD_V4) + +static int +set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) +{ + int len; + size_t buflen; + char *buf = NULL; + int error = 0; + struct inode *inode = dentry->d_inode; + + buflen = posix_acl_xattr_size(pacl->a_count); + buf = kmalloc(buflen, GFP_KERNEL); + error = -ENOMEM; + if (buf == NULL) + goto out; + + len = posix_acl_to_xattr(pacl, buf, buflen); + if (len < 0) { + error = len; + goto out; + } + + error = -EOPNOTSUPP; + if (inode->i_op && inode->i_op->setxattr) { + down(&inode->i_sem); + security_inode_setxattr(dentry, key, buf, len, 0); + error = inode->i_op->setxattr(dentry, key, buf, len, 0); + if (!error) + security_inode_post_setxattr(dentry, key, buf, len, 0); + up(&inode->i_sem); + } +out: + kfree(buf); + return (error); +} + +int +nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl) +{ + int error; + struct dentry *dentry; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + + /* Get inode */ + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); + if (error) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + if (S_ISDIR(inode->i_mode)) + flags = NFS4_ACL_DIR; + + error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + if (error < 0) + goto out_nfserr; + + if (pacl) { + error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS); + if (error < 0) + goto out_nfserr; + } + + if (dpacl) { + error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT); + if (error < 0) + goto out_nfserr; + } + + error = nfs_ok; + +out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return (error); +out_nfserr: + error = nfserrno(error); + goto out; +} + +static struct posix_acl * +_get_posix_acl(struct dentry *dentry, char *key) +{ + struct inode *inode = dentry->d_inode; + char *buf = NULL; + int buflen, error = 0; + struct posix_acl *pacl = NULL; + + down(&inode->i_sem); + + buflen = inode->i_op->getxattr(dentry, key, NULL, 0); + if (buflen <= 0) { + error = buflen < 0 ? buflen : -ENODATA; + goto out_sem; + } + + buf = kmalloc(buflen, GFP_KERNEL); + if (buf == NULL) { + error = -ENOMEM; + goto out_sem; + } + + error = -EOPNOTSUPP; + if (inode->i_op && inode->i_op->getxattr) { + error = security_inode_getxattr(dentry, key); + if (error) + goto out_sem; + error = inode->i_op->getxattr(dentry, key, buf, buflen); + } + if (error < 0) + goto out_sem; + + error = 0; + up(&inode->i_sem); + + pacl = posix_acl_from_xattr(buf, buflen); + out: + kfree(buf); + return pacl; + out_sem: + up(&inode->i_sem); + pacl = ERR_PTR(error); + goto out; +} + +int +nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) +{ + struct inode *inode = dentry->d_inode; + int error = 0; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + + pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS); + if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) + pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(pacl)) { + error = PTR_ERR(pacl); + pacl = NULL; + goto out; + } + + if (S_ISDIR(inode->i_mode)) { + dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT); + if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) + dpacl = NULL; + else if (IS_ERR(dpacl)) { + error = PTR_ERR(dpacl); + dpacl = NULL; + goto out; + } + flags = NFS4_ACL_DIR; + } + + *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); + if (IS_ERR(*acl)) { + error = PTR_ERR(*acl); + *acl = NULL; + } + out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return error; +} + +#endif /* defined(CONFIG_NFS_V4) */ + #ifdef CONFIG_NFSD_V3 /* * Check server access rights to a file system object --- linux-2.6.7/fs/nfsd/nfs4idmap.c.lsec 2004-06-15 23:19:43.000000000 -0600 +++ linux-2.6.7/fs/nfsd/nfs4idmap.c 2005-03-23 14:28:24.687296656 -0700 @@ -78,9 +78,9 @@ struct ent { #define DefineSimpleCacheLookupMap(STRUCT, FUNC) \ DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ - (struct STRUCT *item, int set), /*no setup */, \ + (struct STRUCT *item, int set), \ & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ - STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0) + STRUCT##_init(new, item), STRUCT##_update(tmp, item)) /* Common entry handling */ --- linux-2.6.7/fs/nfsd/nfs4acl.c.lsec 2005-03-23 14:28:24.463330704 -0700 +++ linux-2.6.7/fs/nfsd/nfs4acl.c 2005-03-23 14:28:24.463330704 -0700 @@ -0,0 +1,974 @@ +/* + * fs/nfs4acl/acl.c + * + * Common NFSv4 ACL handling code. + * + * Copyright (c) 2002, 2003 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen + * Jeff Sedlak + * J. Bruce Fields + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* mode bit translations: */ +#define NFS4_READ_MODE (NFS4_ACE_READ_DATA | NFS4_ACE_READ_NAMED_ATTRS) +#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_WRITE_NAMED_ATTRS | NFS4_ACE_APPEND_DATA) +#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE +#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE) +#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL) + +/* flags used to simulate posix default ACLs */ +#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ + | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) + +#define MASK_EQUAL(mask1, mask2) \ + ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) + +static u32 +mask_from_posix(unsigned short perm, unsigned int flags) +{ + int mask = NFS4_ANYONE_MODE; + + if (flags & NFS4_ACL_OWNER) + mask |= NFS4_OWNER_MODE; + if (perm & ACL_READ) + mask |= NFS4_READ_MODE; + if (perm & ACL_WRITE) + mask |= NFS4_WRITE_MODE; + if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR)) + mask |= NFS4_ACE_DELETE_CHILD; + if (perm & ACL_EXECUTE) + mask |= NFS4_EXECUTE_MODE; + return mask; +} + +static u32 +deny_mask(u32 allow_mask, unsigned int flags) +{ + u32 ret = ~allow_mask & ~NFS4_ACE_DELETE; + if (!(flags & NFS4_ACL_DIR)) + ret &= ~NFS4_ACE_DELETE_CHILD; + return ret; +} + +static int +mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) +{ + u32 ignore = 0; + + if (!(flags & NFS4_ACL_DIR)) + ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */ + perm |= ignore; + *mode = 0; + if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) + *mode |= ACL_READ; + if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE) + *mode |= ACL_WRITE; + if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) + *mode |= ACL_EXECUTE; + if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags))) + return -EINVAL; + return 0; +} + +struct ace_container { + struct nfs4_ace *ace; + struct list_head ace_l; +}; + +static short ace2type(struct nfs4_ace *); +static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); +static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int); +int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); +int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); + +struct nfs4_acl * +nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, + unsigned int flags) +{ + struct nfs4_acl *acl; + int error = -EINVAL; + + if ((pacl != NULL && + (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) || + (dpacl != NULL && + (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0))) + goto out_err; + + acl = nfs4_acl_new(); + if (acl == NULL) { + error = -ENOMEM; + goto out_err; + } + + if (pacl != NULL) { + error = _posix_to_nfsv4_one(pacl, acl, + flags & ~NFS4_ACL_TYPE_DEFAULT); + if (error < 0) + goto out_acl; + } + + if (dpacl != NULL) { + error = _posix_to_nfsv4_one(dpacl, acl, + flags | NFS4_ACL_TYPE_DEFAULT); + if (error < 0) + goto out_acl; + } + + return acl; + +out_acl: + nfs4_acl_free(acl); +out_err: + acl = ERR_PTR(error); + + return acl; +} + +static int +nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, int whotype, + uid_t owner, unsigned int flags) +{ + int error; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + eflag, mask, whotype, owner); + if (error < 0) + return error; + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + eflag, deny_mask(mask, flags), whotype, owner); + return error; +} + +/* We assume the acl has been verified with posix_acl_valid. */ +static int +_posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, + unsigned int flags) +{ + struct posix_acl_entry *pa, *pe, *group_owner_entry; + int error = -EINVAL; + u32 mask, mask_mask; + int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ? + NFS4_INHERITANCE_FLAGS : 0); + + BUG_ON(pacl->a_count < 3); + pe = pacl->a_entries + pacl->a_count; + pa = pe - 2; /* if mask entry exists, it's second from the last. */ + if (pa->e_tag == ACL_MASK) + mask_mask = deny_mask(mask_from_posix(pa->e_perm, flags), flags); + else + mask_mask = 0; + + pa = pacl->a_entries; + BUG_ON(pa->e_tag != ACL_USER_OBJ); + mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER); + error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_OWNER, 0, flags); + if (error < 0) + goto out; + pa++; + + while (pa->e_tag == ACL_USER) { + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + eflag, mask_mask, NFS4_ACL_WHO_NAMED, pa->e_id); + if (error < 0) + goto out; + + + error = nfs4_acl_add_pair(acl, eflag, mask, + NFS4_ACL_WHO_NAMED, pa->e_id, flags); + if (error < 0) + goto out; + pa++; + } + + /* In the case of groups, we apply allow ACEs first, then deny ACEs, + * since a user can be in more than one group. */ + + /* allow ACEs */ + + if (pacl->a_count > 3) { + BUG_ON(pa->e_tag != ACL_GROUP_OBJ); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, + NFS4_ACL_WHO_GROUP, 0); + if (error < 0) + goto out; + } + group_owner_entry = pa; + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, + NFS4_ACL_WHO_GROUP, 0); + if (error < 0) + goto out; + pa++; + + while (pa->e_tag == ACL_GROUP) { + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, + NFS4_ACL_WHO_NAMED, pa->e_id); + if (error < 0) + goto out; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, + NFS4_ACL_WHO_NAMED, pa->e_id); + if (error < 0) + goto out; + pa++; + } + + /* deny ACEs */ + + pa = group_owner_entry; + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, + deny_mask(mask, flags), NFS4_ACL_WHO_GROUP, 0); + if (error < 0) + goto out; + pa++; + while (pa->e_tag == ACL_GROUP) { + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, + deny_mask(mask, flags), NFS4_ACL_WHO_NAMED, pa->e_id); + if (error < 0) + goto out; + pa++; + } + + if (pa->e_tag == ACL_MASK) + pa++; + BUG_ON(pa->e_tag != ACL_OTHER); + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_EVERYONE, 0, flags); + +out: + return error; +} + +static void +sort_pacl_range(struct posix_acl *pacl, int start, int end) { + int sorted = 0, i; + struct posix_acl_entry tmp; + + /* We just do a bubble sort; easy to do in place, and we're not + * expecting acl's to be long enough to justify anything more. */ + while (!sorted) { + sorted = 1; + for (i = start; i < end; i++) { + if (pacl->a_entries[i].e_id + > pacl->a_entries[i+1].e_id) { + sorted = 0; + tmp = pacl->a_entries[i]; + pacl->a_entries[i] = pacl->a_entries[i+1]; + pacl->a_entries[i+1] = tmp; + } + } + } +} + +static void +sort_pacl(struct posix_acl *pacl) +{ + /* posix_acl_valid requires that users and groups be in order + * by uid/gid. */ + int i, j; + + if (pacl->a_count <= 4) + return; /* no users or groups */ + i = 1; + while (pacl->a_entries[i].e_tag == ACL_USER) + i++; + sort_pacl_range(pacl, 1, i-1); + + BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); + j = i++; + while (pacl->a_entries[j].e_tag == ACL_GROUP) + j++; + sort_pacl_range(pacl, i, j-1); + return; +} + +static int +write_pace(struct nfs4_ace *ace, struct posix_acl *pacl, + struct posix_acl_entry **pace, short tag, unsigned int flags) +{ + struct posix_acl_entry *this = *pace; + + if (*pace == pacl->a_entries + pacl->a_count) + return -EINVAL; /* fell off the end */ + (*pace)++; + this->e_tag = tag; + if (tag == ACL_USER_OBJ) + flags |= NFS4_ACL_OWNER; + if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags)) + return -EINVAL; + this->e_id = (tag == ACL_USER || tag == ACL_GROUP ? + ace->who : ACL_UNDEFINED_ID); + return 0; +} + +static struct nfs4_ace * +get_next_v4_ace(struct list_head **p, struct list_head *head) +{ + struct nfs4_ace *ace; + + *p = (*p)->next; + if (*p == head) + return NULL; + ace = list_entry(*p, struct nfs4_ace, l_ace); + + return ace; +} + +int +nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, + struct posix_acl **dpacl, unsigned int flags) +{ + struct nfs4_acl *dacl; + int error = -ENOMEM; + + *pacl = NULL; + *dpacl = NULL; + + dacl = nfs4_acl_new(); + if (dacl == NULL) + goto out; + + error = nfs4_acl_split(acl, dacl); + if (error < 0) + goto out_acl; + + if (pacl != NULL) { + if (acl->naces == 0) { + error = -ENODATA; + goto try_dpacl; + } + + *pacl = _nfsv4_to_posix_one(acl, flags); + if (IS_ERR(*pacl)) { + error = PTR_ERR(*pacl); + *pacl = NULL; + goto out_acl; + } + } + +try_dpacl: + if (dpacl != NULL) { + if (dacl->naces == 0) { + if (pacl == NULL || *pacl == NULL) + error = -ENODATA; + goto out_acl; + } + + error = 0; + *dpacl = _nfsv4_to_posix_one(dacl, flags); + if (IS_ERR(*dpacl)) { + error = PTR_ERR(*dpacl); + *dpacl = NULL; + goto out_acl; + } + } + +out_acl: + if (error && pacl) { + posix_acl_release(*pacl); + *pacl = NULL; + } + nfs4_acl_free(dacl); +out: + return error; +} + +static int +same_who(struct nfs4_ace *a, struct nfs4_ace *b) +{ + return a->whotype == b->whotype && + (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who); +} + +static int +complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny, + unsigned int flags) +{ + int ignore = 0; + if (!(flags & NFS4_ACL_DIR)) + ignore |= NFS4_ACE_DELETE_CHILD; + return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags), + ignore|deny->access_mask) && + allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && + deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE && + allow->flag == deny->flag && + same_who(allow, deny); +} + +static inline int +user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_USER_OBJ) + goto out; + error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2, flags)) + goto out; + error = 0; +out: + return error; +} + +static inline int +users_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + while (ace2type(ace) == ACL_USER) { + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) + goto out; + if (*mask_ace && + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + error = write_pace(ace, pacl, pace, ACL_USER, flags); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2, flags)) + goto out; + if ((*mask_ace)->flag != ace2->flag || + !same_who(*mask_ace, ace2)) + goto out; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + } + error = 0; +out: + return error; +} + +static inline int +group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + struct ace_container *ac; + struct list_head group_l; + + INIT_LIST_HEAD(&group_l); + ace = list_entry(*p, struct nfs4_ace, l_ace); + + /* group owner (mask and allow aces) */ + + if (pacl->a_count != 3) { + /* then the group owner should be preceded by mask */ + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) + goto out; + if (*mask_ace && + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + + if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace)) + goto out; + } + + if (ace2type(ace) != ACL_GROUP_OBJ) + goto out; + + ac = kmalloc(sizeof(*ac), GFP_KERNEL); + error = -ENOMEM; + if (ac == NULL) + goto out; + ac->ace = ace; + list_add_tail(&ac->ace_l, &group_l); + + error = -EINVAL; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + + error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags); + if (error < 0) + goto out; + + error = -EINVAL; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + + /* groups (mask and allow aces) */ + + while (ace2type(ace) == ACL_GROUP) { + if (*mask_ace == NULL) + goto out; + + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE || + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + ac = kmalloc(sizeof(*ac), GFP_KERNEL); + error = -ENOMEM; + if (ac == NULL) + goto out; + error = -EINVAL; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE || + !same_who(ace, *mask_ace)) + goto out; + + ac->ace = ace; + list_add_tail(&ac->ace_l, &group_l); + + error = write_pace(ace, pacl, pace, ACL_GROUP, flags); + if (error < 0) + goto out; + error = -EINVAL; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + } + + /* group owner (deny ace) */ + + if (ace2type(ace) != ACL_GROUP_OBJ) + goto out; + ac = list_entry(group_l.next, struct ace_container, ace_l); + ace2 = ac->ace; + if (!complementary_ace_pair(ace2, ace, flags)) + goto out; + list_del(group_l.next); + kfree(ac); + + /* groups (deny aces) */ + + while (!list_empty(&group_l)) { + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_GROUP) + goto out; + ac = list_entry(group_l.next, struct ace_container, ace_l); + ace2 = ac->ace; + if (!complementary_ace_pair(ace2, ace, flags)) + goto out; + list_del(group_l.next); + kfree(ac); + } + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_OTHER) + goto out; + error = 0; +out: + while (!list_empty(&group_l)) { + ac = list_entry(group_l.next, struct ace_container, ace_l); + list_del(group_l.next); + kfree(ac); + } + return error; +} + +static inline int +mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace; + + ace = list_entry(*p, struct nfs4_ace, l_ace); + if (pacl->a_count != 3) { + if (*mask_ace == NULL) + goto out; + (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags); + write_pace(*mask_ace, pacl, pace, ACL_MASK, flags); + } + error = 0; +out: + return error; +} + +static inline int +other_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = list_entry(*p, struct nfs4_ace, l_ace); + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + error = write_pace(ace, pacl, pace, ACL_OTHER, flags); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2, flags)) + goto out; + error = 0; +out: + return error; +} + +static int +calculate_posix_ace_count(struct nfs4_acl *n4acl) +{ + if (n4acl->naces == 6) /* owner, owner group, and other only */ + return 3; + else { /* Otherwise there must be a mask entry. */ + /* Also, the remaining entries are for named users and + * groups, and come in threes (mask, allow, deny): */ + if (n4acl->naces < 7) + return -1; + if ((n4acl->naces - 7) % 3) + return -1; + return 4 + (n4acl->naces - 7)/3; + } +} + + +static struct posix_acl * +_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) +{ + struct posix_acl *pacl; + int error = -EINVAL, nace = 0; + struct list_head *p; + struct nfs4_ace *mask_ace = NULL; + struct posix_acl_entry *pace; + + nace = calculate_posix_ace_count(n4acl); + if (nace < 0) + goto out_err; + + pacl = posix_acl_alloc(nace, GFP_KERNEL); + error = -ENOMEM; + if (pacl == NULL) + goto out_err; + + pace = &pacl->a_entries[0]; + p = &n4acl->ace_head; + + error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags); + if (error) + goto out_acl; + + error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); + if (error) + goto out_acl; + + error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace, + flags); + if (error) + goto out_acl; + + error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); + if (error) + goto out_acl; + error = other_from_v4(n4acl, &p, pacl, &pace, flags); + if (error) + goto out_acl; + + error = -EINVAL; + if (p->next != &n4acl->ace_head) + goto out_acl; + if (pace != pacl->a_entries + pacl->a_count) + goto out_acl; + + sort_pacl(pacl); + + return pacl; +out_acl: + posix_acl_release(pacl); +out_err: + pacl = ERR_PTR(error); + return pacl; +} + +int +nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) +{ + struct list_head *h, *n; + struct nfs4_ace *ace; + int error = 0; + + list_for_each_safe(h, n, &acl->ace_head) { + ace = list_entry(h, struct nfs4_ace, l_ace); + + if ((ace->flag & NFS4_INHERITANCE_FLAGS) + != NFS4_INHERITANCE_FLAGS) + continue; + + error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, + ace->access_mask, ace->whotype, ace->who) == -1; + if (error < 0) + goto out; + + list_del(h); + kfree(ace); + acl->naces--; + } + +out: + return error; +} + +static short +ace2type(struct nfs4_ace *ace) +{ + switch (ace->whotype) { + case NFS4_ACL_WHO_NAMED: + return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ? + ACL_GROUP : ACL_USER); + case NFS4_ACL_WHO_OWNER: + return ACL_USER_OBJ; + case NFS4_ACL_WHO_GROUP: + return ACL_GROUP_OBJ; + case NFS4_ACL_WHO_EVERYONE: + return ACL_OTHER; + } + BUG(); + return -1; +} + +EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); +EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); + +struct nfs4_acl * +nfs4_acl_new(void) +{ + struct nfs4_acl *acl; + + if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL) + return NULL; + + acl->naces = 0; + INIT_LIST_HEAD(&acl->ace_head); + + return acl; +} + +void +nfs4_acl_free(struct nfs4_acl *acl) +{ + struct list_head *h; + struct nfs4_ace *ace; + + if (!acl) + return; + + while (!list_empty(&acl->ace_head)) { + h = acl->ace_head.next; + list_del(h); + ace = list_entry(h, struct nfs4_ace, l_ace); + kfree(ace); + } + + kfree(acl); + + return; +} + +int +nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, + int whotype, uid_t who) +{ + struct nfs4_ace *ace; + + if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) + return -1; + + ace->type = type; + ace->flag = flag; + ace->access_mask = access_mask; + ace->whotype = whotype; + ace->who = who; + + list_add_tail(&ace->l_ace, &acl->ace_head); + acl->naces++; + + return 0; +} + +static struct { + char *string; + int stringlen; + int type; +} s2t_map[] = { + { + .string = "OWNER@", + .stringlen = sizeof("OWNER@") - 1, + .type = NFS4_ACL_WHO_OWNER, + }, + { + .string = "GROUP@", + .stringlen = sizeof("GROUP@") - 1, + .type = NFS4_ACL_WHO_GROUP, + }, + { + .string = "EVERYONE@", + .stringlen = sizeof("EVERYONE@") - 1, + .type = NFS4_ACL_WHO_EVERYONE, + }, +}; + +int +nfs4_acl_get_whotype(char *p, u32 len) +{ + int i; + + for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) { + if (s2t_map[i].stringlen == len && + 0 == memcmp(s2t_map[i].string, p, len)) + return s2t_map[i].type; + } + return NFS4_ACL_WHO_NAMED; +} + +int +nfs4_acl_write_who(int who, char *p) +{ + int i; + + for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) { + if (s2t_map[i].type == who) { + memcpy(p, s2t_map[i].string, s2t_map[i].stringlen); + return s2t_map[i].stringlen; + } + } + BUG(); + return -1; +} + +static inline int +match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who) +{ + switch (ace->whotype) { + case NFS4_ACL_WHO_NAMED: + return who == ace->who; + case NFS4_ACL_WHO_OWNER: + return who == owner; + case NFS4_ACL_WHO_GROUP: + return who == group; + case NFS4_ACL_WHO_EVERYONE: + return 1; + default: + return 0; + } +} + +/* 0 = granted, -EACCES = denied; mask is an nfsv4 mask, not mode bits */ +int +nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, + uid_t who, u32 mask) +{ + struct nfs4_ace *ace; + u32 allowed = 0; + + list_for_each_entry(ace, &acl->ace_head, l_ace) { + if (!match_who(ace, group, owner, who)) + continue; + switch (ace->type) { + case NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE: + allowed |= ace->access_mask; + if ((allowed & mask) == mask) + return 0; + break; + case NFS4_ACE_ACCESS_DENIED_ACE_TYPE: + if (ace->access_mask & mask) + return -EACCES; + break; + } + } + return -EACCES; +} + +EXPORT_SYMBOL(nfs4_acl_new); +EXPORT_SYMBOL(nfs4_acl_free); +EXPORT_SYMBOL(nfs4_acl_add_ace); +EXPORT_SYMBOL(nfs4_acl_get_whotype); +EXPORT_SYMBOL(nfs4_acl_write_who); +EXPORT_SYMBOL(nfs4_acl_permission); --- linux-2.6.7/fs/nfsd/Makefile.lsec 2004-06-15 23:19:13.000000000 -0600 +++ linux-2.6.7/fs/nfsd/Makefile 2005-03-23 14:28:24.461331008 -0700 @@ -7,5 +7,6 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o -nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o +nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ + nfs4acl.o nfs4callback.o nfsd-objs := $(nfsd-y) --- linux-2.6.7/fs/nfsd/nfsctl.c.lsec 2004-06-15 23:19:01.000000000 -0600 +++ linux-2.6.7/fs/nfsd/nfsctl.c 2005-03-23 14:28:24.132381016 -0700 @@ -36,7 +36,7 @@ #include /* - * We have a single directory with 8 nodes in it. + * We have a single directory with 9 nodes in it. */ enum { NFSD_Root = 1, @@ -50,6 +50,7 @@ enum { NFSD_List, NFSD_Fh, NFSD_Threads, + NFSD_Leasetime, }; /* @@ -64,6 +65,7 @@ static ssize_t write_getfd(struct file * static ssize_t write_getfs(struct file *file, char *buf, size_t size); static ssize_t write_filehandle(struct file *file, char *buf, size_t size); static ssize_t write_threads(struct file *file, char *buf, size_t size); +static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Svc] = write_svc, @@ -75,6 +77,7 @@ static ssize_t (*write_op[])(struct file [NFSD_Getfs] = write_getfs, [NFSD_Fh] = write_filehandle, [NFSD_Threads] = write_threads, + [NFSD_Leasetime] = write_leasetime, }; /* an argresp is stored in an allocated page and holds the @@ -393,6 +396,29 @@ static ssize_t write_threads(struct file return strlen(buf); } +extern time_t nfs4_leasetime(void); + +static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +{ + /* if size > 10 seconds, call + * nfs4_reset_lease() then write out the new lease (seconds) as reply + */ + char *mesg = buf; + int rv; + + if (size > 0) { + int lease; + rv = get_int(&mesg, &lease); + if (rv) + return rv; + if (lease < 10 || lease > 3600) + return -EINVAL; + nfs4_reset_lease(lease); + } + sprintf(buf, "%ld\n", nfs4_lease_time()); + return strlen(buf); +} + /*----------------------------------------------------------------------------*/ /* * populating the filesystem. @@ -411,6 +437,7 @@ static int nfsd_fill_super(struct super_ [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, /* last one */ {""} }; return simple_fill_super(sb, 0x6e667364, nfsd_files); --- linux-2.6.7/fs/nfs/callback_proc.c.lsec 2005-03-23 14:28:22.485631360 -0700 +++ linux-2.6.7/fs/nfs/callback_proc.c 2005-03-23 14:28:22.485631360 -0700 @@ -0,0 +1,85 @@ +/* + * linux/fs/nfs/callback_proc.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback procedures + */ +#include +#include +#include +#include "callback.h" +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) +{ + struct nfs4_client *clp; + struct nfs_delegation *delegation; + struct nfs_inode *nfsi; + struct inode *inode; + + res->bitmap[0] = res->bitmap[1] = 0; + res->status = htonl(NFS4ERR_BADHANDLE); + clp = nfs4_find_client(&args->addr->sin_addr); + if (clp == NULL) + goto out; + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + nfsi = NFS_I(inode); + down_read(&nfsi->rwsem); + delegation = nfsi->delegation; + if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) + goto out_iput; + res->size = i_size_read(inode); + res->change_attr = NFS_CHANGE_ATTR(inode); + res->ctime = inode->i_ctime; + res->mtime = inode->i_mtime; + res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & + args->bitmap[0]; + res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & + args->bitmap[1]; + res->status = 0; +out_iput: + up_read(&nfsi->rwsem); + iput(inode); +out_putclient: + nfs4_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); + return res->status; +} + +unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) +{ + struct nfs4_client *clp; + struct inode *inode; + unsigned res; + + res = htonl(NFS4ERR_BADHANDLE); + clp = nfs4_find_client(&args->addr->sin_addr); + if (clp == NULL) + goto out; + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + /* Set up a helper thread to actually return the delegation */ + switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { + case 0: + res = 0; + break; + case -ENOENT: + res = htonl(NFS4ERR_BAD_STATEID); + break; + default: + res = htonl(NFS4ERR_RESOURCE); + } + iput(inode); +out_putclient: + nfs4_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); + return res; +} --- linux-2.6.7/fs/nfs/delegation.c.lsec 2005-03-23 14:28:22.546622088 -0700 +++ linux-2.6.7/fs/nfs/delegation.c 2005-03-23 14:28:22.545622240 -0700 @@ -0,0 +1,320 @@ +/* + * linux/fs/nfs/delegation.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFS file delegation management + * + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "delegation.h" + +static struct nfs_delegation *nfs_alloc_delegation(void) +{ + return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL); +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + if (delegation->cred) + put_rpccred(delegation->cred); + kfree(delegation); +} + +static void nfs_delegation_claim_opens(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + +again: + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; + get_nfs_open_context(ctx); + spin_unlock(&inode->i_lock); + if (nfs4_open_delegation_recall(ctx->dentry, state) < 0) + return; + put_nfs_open_context(ctx); + goto again; + } + spin_unlock(&inode->i_lock); +} + +/* + * Set up a delegation on an inode + */ +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +{ + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + + if (delegation == NULL) + return; + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + put_rpccred(cred); + delegation->cred = get_rpccred(cred); + delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; + NFS_I(inode)->delegation_state = delegation->type; + wmb(); +} + +/* + * Set up a delegation on an inode + */ +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int status = 0; + + delegation = nfs_alloc_delegation(); + if (delegation == NULL) + return -ENOMEM; + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + delegation->cred = get_rpccred(cred); + delegation->inode = inode; + + spin_lock(&clp->cl_lock); + if (nfsi->delegation == NULL) { + list_add(&delegation->super_list, &clp->cl_delegations); + nfsi->delegation = delegation; + nfsi->delegation_state = delegation->type; + delegation = NULL; + } else { + if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, + sizeof(delegation->stateid)) != 0 || + delegation->type != nfsi->delegation->type) { + printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", + __FUNCTION__, NIPQUAD(clp->cl_addr)); + status = -EIO; + } + } + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + kfree(delegation); + return status; +} + +static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation) +{ + int res = 0; + + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + + res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); + nfs_free_delegation(delegation); + return res; +} + +/* Sync all data to disk upon delegation return */ +static void nfs_msync_inode(struct inode *inode) +{ + down(&inode->i_sem); + filemap_fdatawrite(inode->i_mapping); + nfs_wb_all(inode); + filemap_fdatawait(inode->i_mapping); + up(&inode->i_sem); +} + +/* + * Basic procedure for returning a delegation to the server + */ +int nfs_inode_return_delegation(struct inode *inode) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int res = 0; + + nfs_msync_inode(inode); + down_read(&clp->cl_sem); + /* Guard against new delegated open calls */ + down_write(&nfsi->rwsem); + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL) { + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + } + spin_unlock(&clp->cl_lock); + nfs_delegation_claim_opens(inode); + up_write(&nfsi->rwsem); + up_read(&clp->cl_sem); + nfs_msync_inode(inode); + + if (delegation != NULL) + res = nfs_do_return_delegation(inode, delegation); + return res; +} + +/* + * Return all delegations associated to a super block + */ +void nfs_return_all_delegations(struct super_block *sb) +{ + struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_delegation *delegation; + struct inode *inode; + + if (clp == NULL) + return; +restart: + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + if (delegation->inode->i_sb != sb) + continue; + inode = igrab(delegation->inode); + if (inode == NULL) + continue; + spin_unlock(&clp->cl_lock); + nfs_inode_return_delegation(inode); + iput(inode); + goto restart; + } + spin_unlock(&clp->cl_lock); +} + +struct recall_threadargs { + struct inode *inode; + struct nfs4_client *clp; + const nfs4_stateid *stateid; + + struct completion started; + int result; +}; + +static int recall_thread(void *data) +{ + struct recall_threadargs *args = (struct recall_threadargs *)data; + struct inode *inode = igrab(args->inode); + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + + daemonize("nfsv4-delegreturn"); + + nfs_msync_inode(inode); + down_read(&clp->cl_sem); + down_write(&nfsi->rwsem); + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL && memcmp(delegation->stateid.data, + args->stateid->data, + sizeof(delegation->stateid.data)) == 0) { + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + args->result = 0; + } else { + delegation = NULL; + args->result = -ENOENT; + } + spin_unlock(&clp->cl_lock); + complete(&args->started); + nfs_delegation_claim_opens(inode); + up_write(&nfsi->rwsem); + up_read(&clp->cl_sem); + nfs_msync_inode(inode); + + if (delegation != NULL) + nfs_do_return_delegation(inode, delegation); + iput(inode); + module_put_and_exit(0); +} + +/* + * Asynchronous delegation recall! + */ +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) +{ + struct recall_threadargs data = { + .inode = inode, + .stateid = stateid, + }; + int status; + + init_completion(&data.started); + __module_get(THIS_MODULE); + status = kernel_thread(recall_thread, &data, CLONE_KERNEL); + if (status < 0) + goto out_module_put; + wait_for_completion(&data.started); + return data.result; +out_module_put: + module_put(THIS_MODULE); + return status; +} + +/* + * Retrieve the inode associated with a delegation + */ +struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) +{ + struct nfs_delegation *delegation; + struct inode *res = NULL; + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { + res = igrab(delegation->inode); + break; + } + } + spin_unlock(&clp->cl_lock); + return res; +} + +/* + * Mark all delegations as needing to be reclaimed + */ +void nfs_delegation_mark_reclaim(struct nfs4_client *clp) +{ + struct nfs_delegation *delegation; + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) + delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; + spin_unlock(&clp->cl_lock); +} + +/* + * Reap all unclaimed delegations after reboot recovery is done + */ +void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) +{ + struct nfs_delegation *delegation, *n; + LIST_HEAD(head); + spin_lock(&clp->cl_lock); + list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { + if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) + continue; + list_move(&delegation->super_list, &head); + NFS_I(delegation->inode)->delegation = NULL; + NFS_I(delegation->inode)->delegation_state = 0; + } + spin_unlock(&clp->cl_lock); + while(!list_empty(&head)) { + delegation = list_entry(head.next, struct nfs_delegation, super_list); + list_del(&delegation->super_list); + nfs_free_delegation(delegation); + } +} --- linux-2.6.7/fs/nfs/delegation.h.lsec 2005-03-23 14:28:22.546622088 -0700 +++ linux-2.6.7/fs/nfs/delegation.h 2005-03-23 14:28:22.546622088 -0700 @@ -0,0 +1,56 @@ +/* + * linux/fs/nfs/delegation.h + * + * Copyright (c) Trond Myklebust + * + * Definitions pertaining to NFS delegated files + */ +#ifndef FS_NFS_DELEGATION_H +#define FS_NFS_DELEGATION_H + +#if defined(CONFIG_NFS_V4) +/* + * NFSv4 delegation + */ +struct nfs_delegation { + struct list_head super_list; + struct rpc_cred *cred; + struct inode *inode; + nfs4_stateid stateid; + int type; +#define NFS_DELEGATION_NEED_RECLAIM 1 + long flags; + loff_t maxsize; +}; + +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +int nfs_inode_return_delegation(struct inode *inode); +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); + +struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); +void nfs_return_all_delegations(struct super_block *sb); + +void nfs_delegation_mark_reclaim(struct nfs4_client *clp); +void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); + +/* NFSv4 delegation-related procedures */ +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); + +static inline int nfs_have_delegation(struct inode *inode, int flags) +{ + flags &= FMODE_READ|FMODE_WRITE; + rmb(); + if ((NFS_I(inode)->delegation_state & flags) == flags) + return 1; + return 0; +} +#else +static inline int nfs_have_delegation(struct inode *inode, int flags) +{ + return 0; +} +#endif + +#endif --- linux-2.6.7/fs/nfs/nfs3proc.c.lsec 2004-06-15 23:19:23.000000000 -0600 +++ linux-2.6.7/fs/nfs/nfs3proc.c 2005-03-23 14:28:22.820580440 -0700 @@ -68,18 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas return 1; } -static struct rpc_cred * -nfs_cred(struct inode *inode, struct file *filp) -{ - struct rpc_cred *cred = NULL; - - if (filp) - cred = (struct rpc_cred *)filp->private_data; - if (!cred) - cred = NFS_I(inode)->mm_cred; - return cred; -} - /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -164,8 +152,7 @@ nfs3_proc_lookup(struct inode *dir, stru return status; } -static int -nfs3_proc_access(struct inode *inode, struct rpc_cred *cred, int mode) +static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs_fattr fattr; struct nfs3_accessargs arg = { @@ -178,9 +165,10 @@ nfs3_proc_access(struct inode *inode, st .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = cred + .rpc_cred = entry->cred }; - int status; + int mode = entry->mask; + int status; dprintk("NFS call access\n"); fattr.valid = 0; @@ -200,10 +188,16 @@ nfs3_proc_access(struct inode *inode, st } status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); - dprintk("NFS reply access\n"); - - if (status == 0 && (arg.access & res.access) != arg.access) - status = -EACCES; + if (status == 0) { + entry->mask = 0; + if (res.access & NFS3_ACCESS_READ) + entry->mask |= MAY_READ; + if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; + } + dprintk("NFS reply access, status = %d\n", status); return status; } @@ -227,8 +221,7 @@ nfs3_proc_readlink(struct inode *inode, return status; } -static int -nfs3_proc_read(struct nfs_read_data *rdata, struct file *filp) +static int nfs3_proc_read(struct nfs_read_data *rdata) { int flags = rdata->flags; struct inode * inode = rdata->inode; @@ -237,13 +230,13 @@ nfs3_proc_read(struct nfs_read_data *rda .rpc_proc = &nfs3_procedures[NFS3PROC_READ], .rpc_argp = &rdata->args, .rpc_resp = &rdata->res, + .rpc_cred = rdata->cred, }; int status; dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -251,8 +244,7 @@ nfs3_proc_read(struct nfs_read_data *rda return status; } -static int -nfs3_proc_write(struct nfs_write_data *wdata, struct file *filp) +static int nfs3_proc_write(struct nfs_write_data *wdata) { int rpcflags = wdata->flags; struct inode * inode = wdata->inode; @@ -261,13 +253,13 @@ nfs3_proc_write(struct nfs_write_data *w .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], .rpc_argp = &wdata->args, .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, }; int status; dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -275,8 +267,7 @@ nfs3_proc_write(struct nfs_write_data *w return status < 0? status : wdata->res.count; } -static int -nfs3_proc_commit(struct nfs_write_data *cdata, struct file *filp) +static int nfs3_proc_commit(struct nfs_write_data *cdata) { struct inode * inode = cdata->inode; struct nfs_fattr * fattr = cdata->res.fattr; @@ -284,13 +275,13 @@ nfs3_proc_commit(struct nfs_write_data * .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], .rpc_argp = &cdata->args, .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, }; int status; dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -534,6 +525,8 @@ nfs3_proc_symlink(struct inode *dir, str }; int status; + if (path->len > NFS3_MAXPATHLEN) + return -ENAMETOOLONG; dprintk("NFS call symlink %s -> %s\n", name->name, path->name); dir_attr.valid = 0; fattr->valid = 0; @@ -832,27 +825,6 @@ nfs3_proc_commit_setup(struct nfs_write_ rpc_call_setup(task, &msg, 0); } -/* - * Set up the nfspage struct with the right credentials - */ -void -nfs3_request_init(struct nfs_page *req, struct file *filp) -{ - req->wb_cred = get_rpccred(nfs_cred(req->wb_inode, filp)); -} - -static int -nfs3_request_compatible(struct nfs_page *req, struct file *filp, struct page *page) -{ - if (req->wb_file != filp) - return 0; - if (req->wb_page != page) - return 0; - if (req->wb_cred != nfs_file_cred(filp)) - return 0; - return 1; -} - static int nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { @@ -863,6 +835,7 @@ struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -892,7 +865,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .commit_setup = nfs3_proc_commit_setup, .file_open = nfs_open, .file_release = nfs_release, - .request_init = nfs3_request_init, - .request_compatible = nfs3_request_compatible, .lock = nfs3_proc_lock, }; --- linux-2.6.7/fs/nfs/proc.c.lsec 2004-06-15 23:20:03.000000000 -0600 +++ linux-2.6.7/fs/nfs/proc.c 2005-03-23 14:28:23.058544264 -0700 @@ -49,18 +49,6 @@ extern struct rpc_procinfo nfs_procedures[]; -static struct rpc_cred * -nfs_cred(struct inode *inode, struct file *filp) -{ - struct rpc_cred *cred = NULL; - - if (filp) - cred = (struct rpc_cred *)filp->private_data; - if (!cred) - cred = NFS_I(inode)->mm_cred; - return cred; -} - /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -167,8 +155,7 @@ nfs_proc_readlink(struct inode *inode, s return status; } -static int -nfs_proc_read(struct nfs_read_data *rdata, struct file *filp) +static int nfs_proc_read(struct nfs_read_data *rdata) { int flags = rdata->flags; struct inode * inode = rdata->inode; @@ -177,15 +164,14 @@ nfs_proc_read(struct nfs_read_data *rdat .rpc_proc = &nfs_procedures[NFSPROC_READ], .rpc_argp = &rdata->args, .rpc_resp = &rdata->res, + .rpc_resp = rdata->cred, }; int status; dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); - if (status >= 0) { nfs_refresh_inode(inode, fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 @@ -198,8 +184,7 @@ nfs_proc_read(struct nfs_read_data *rdat return status; } -static int -nfs_proc_write(struct nfs_write_data *wdata, struct file *filp) +static int nfs_proc_write(struct nfs_write_data *wdata) { int flags = wdata->flags; struct inode * inode = wdata->inode; @@ -208,13 +193,13 @@ nfs_proc_write(struct nfs_write_data *wd .rpc_proc = &nfs_procedures[NFSPROC_WRITE], .rpc_argp = &wdata->args, .rpc_resp = &wdata->res, + .rpc_resp = wdata->cred, }; int status; dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) { nfs_refresh_inode(inode, fattr); @@ -400,6 +385,8 @@ nfs_proc_symlink(struct inode *dir, stru }; int status; + if (path->len > NFS2_MAXPATHLEN) + return -ENAMETOOLONG; dprintk("NFS call symlink %s -> %s\n", name->name, path->name); fattr->valid = 0; status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0); @@ -619,27 +606,6 @@ nfs_proc_commit_setup(struct nfs_write_d BUG(); } -/* - * Set up the nfspage struct with the right credentials - */ -static void -nfs_request_init(struct nfs_page *req, struct file *filp) -{ - req->wb_cred = get_rpccred(nfs_cred(req->wb_inode, filp)); -} - -static int -nfs_request_compatible(struct nfs_page *req, struct file *filp, struct page *page) -{ - if (req->wb_file != filp) - return 0; - if (req->wb_page != page) - return 0; - if (req->wb_cred != nfs_file_cred(filp)) - return 0; - return 1; -} - static int nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { @@ -651,6 +617,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, @@ -680,7 +647,5 @@ struct nfs_rpc_ops nfs_v2_clientops = { .commit_setup = nfs_proc_commit_setup, .file_open = nfs_open, .file_release = nfs_release, - .request_init = nfs_request_init, - .request_compatible = nfs_request_compatible, .lock = nfs_proc_lock, }; --- linux-2.6.7/fs/nfs/file.c.lsec 2004-06-15 23:19:37.000000000 -0600 +++ linux-2.6.7/fs/nfs/file.c 2005-03-23 14:28:22.760589560 -0700 @@ -31,6 +31,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_FILE static long nfs_file_fcntl(int fd, unsigned int cmd, @@ -66,6 +68,19 @@ struct inode_operations nfs_file_inode_o .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs_getxattr, + .setxattr = nfs_setxattr, + .listxattr = nfs_listxattr, +}; + +#endif /* CONFIG_NFS_V4 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -127,6 +142,7 @@ nfs_file_release(struct inode *inode, st static int nfs_file_flush(struct file *file) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = file->f_dentry->d_inode; int status; @@ -138,9 +154,9 @@ nfs_file_flush(struct file *file) /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); if (!status) { - status = file->f_error; - file->f_error = 0; - if (!status) + status = ctx->error; + ctx->error = 0; + if (!status && !nfs_have_delegation(inode, FMODE_READ)) __nfs_revalidate_inode(NFS_SERVER(inode), inode); } unlock_kernel(); @@ -211,6 +227,7 @@ nfs_file_mmap(struct file * file, struct static int nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = dentry->d_inode; int status; @@ -219,8 +236,8 @@ nfs_fsync(struct file *file, struct dent lock_kernel(); status = nfs_wb_all(inode); if (!status) { - status = file->f_error; - file->f_error = 0; + status = ctx->error; + ctx->error = 0; } unlock_kernel(); return status; @@ -302,6 +319,90 @@ out_swapfile: goto out; } +static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status; + + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + unlock_kernel(); + return status; +} + +static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + sigset_t oldset; + int status; + + rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); + /* + * Flush all pending writes before doing anything + * with locks.. + */ + filemap_fdatawrite(filp->f_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); + up(&inode->i_sem); + filemap_fdatawait(filp->f_mapping); + + /* NOTE: special case + * If we're signalled while cleaning up locks on process exit, we + * still need to complete the unlock. + */ + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); + return status; +} + +static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status; + + /* + * Flush all pending writes before doing anything + * with locks.. + */ + status = filemap_fdatawrite(filp->f_mapping); + if (status == 0) { + down(&inode->i_sem); + status = nfs_wb_all(inode); + up(&inode->i_sem); + if (status == 0) + status = filemap_fdatawait(filp->f_mapping); + } + if (status < 0) + return status; + + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + /* If we were signalled we still need to ensure that + * we clean up any state on the server. We therefore + * record the lock call as having succeeded in order to + * ensure that locks_remove_posix() cleans it out when + * the process exits. + */ + if (status == -EINTR || status == -ERESTARTSYS) + posix_lock_file(filp, fl); + unlock_kernel(); + if (status < 0) + return status; + /* + * Make sure we clear the cache whenever we try to get the lock. + * This makes locking act as a cache coherency point. + */ + filemap_fdatawrite(filp->f_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); /* we may have slept */ + up(&inode->i_sem); + filemap_fdatawait(filp->f_mapping); + nfs_zap_caches(inode); + return 0; +} + /* * Lock a (portion of) a file */ @@ -309,8 +410,6 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode * inode = filp->f_mapping->host; - int status = 0; - int status2; dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", inode->i_sb->s_id, inode->i_ino, @@ -328,8 +427,8 @@ nfs_lock(struct file *filp, int cmd, str /* Fake OK code if mounted without NLM support */ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { if (IS_GETLK(cmd)) - status = LOCK_USE_CLNT; - goto out_ok; + return LOCK_USE_CLNT; + return 0; } } @@ -340,45 +439,12 @@ nfs_lock(struct file *filp, int cmd, str * Not sure whether that would be unique, though, or whether * that would break in other places. */ - if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) + if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; - /* - * Flush all pending writes before doing anything - * with locks.. - */ - status = filemap_fdatawrite(filp->f_mapping); - down(&inode->i_sem); - status2 = nfs_wb_all(inode); - if (!status) - status = status2; - up(&inode->i_sem); - status2 = filemap_fdatawait(filp->f_mapping); - if (!status) - status = status2; - if (status < 0) - return status; - - lock_kernel(); - status = NFS_PROTO(inode)->lock(filp, cmd, fl); - unlock_kernel(); - if (status < 0) - return status; - - status = 0; - - /* - * Make sure we clear the cache whenever we try to get the lock. - * This makes locking act as a cache coherency point. - */ - out_ok: - if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { - filemap_fdatawrite(filp->f_mapping); - down(&inode->i_sem); - nfs_wb_all(inode); /* we may have slept */ - up(&inode->i_sem); - filemap_fdatawait(filp->f_mapping); - nfs_zap_caches(inode); - } - return status; + if (IS_GETLK(cmd)) + return do_getlk(filp, cmd, fl); + if (fl->fl_type == F_UNLCK) + return do_unlk(filp, cmd, fl); + return do_setlk(filp, cmd, fl); } --- linux-2.6.7/fs/nfs/write.c.lsec 2004-06-15 23:19:43.000000000 -0600 +++ linux-2.6.7/fs/nfs/write.c 2005-03-23 14:28:23.225518880 -0700 @@ -63,6 +63,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_PAGECACHE #define MIN_POOL_WRITE (32) @@ -71,7 +73,8 @@ /* * Local function declarations */ -static struct nfs_page * nfs_update_request(struct file*, struct inode *, +static struct nfs_page * nfs_update_request(struct nfs_open_context*, + struct inode *, struct page *, unsigned int, unsigned int); static void nfs_writeback_done_partial(struct nfs_write_data *, int); @@ -173,7 +176,7 @@ static void nfs_mark_uptodate(struct pag * Write a page synchronously. * Offset is the data offset within the page. */ -static int nfs_writepage_sync(struct file *file, struct inode *inode, +static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, struct page *page, unsigned int offset, unsigned int count, int how) { @@ -187,9 +190,10 @@ static int nfs_writepage_sync(struct fil memset(wdata, 0, sizeof(*wdata)); wdata->flags = how; + wdata->cred = ctx->cred; wdata->inode = inode; wdata->args.fh = NFS_FH(inode); - wdata->args.lockowner = current->files; + wdata->args.context = ctx; wdata->args.pages = &page; wdata->args.stable = NFS_FILE_SYNC; wdata->args.pgbase = offset; @@ -208,7 +212,7 @@ static int nfs_writepage_sync(struct fil wdata->args.count = count; wdata->args.offset = page_offset(page) + wdata->args.pgbase; - result = NFS_PROTO(inode)->write(wdata, file); + result = NFS_PROTO(inode)->write(wdata); if (result < 0) { /* Must mark the page invalid after I/O error */ @@ -241,13 +245,14 @@ io_error: return written ? written : result; } -static int nfs_writepage_async(struct file *file, struct inode *inode, - struct page *page, unsigned int offset, unsigned int count) +static int nfs_writepage_async(struct nfs_open_context *ctx, + struct inode *inode, struct page *page, + unsigned int offset, unsigned int count) { struct nfs_page *req; int status; - req = nfs_update_request(file, inode, page, offset, count); + req = nfs_update_request(ctx, inode, page, offset, count); status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status < 0) goto out; @@ -274,6 +279,7 @@ static int wb_priority(struct writeback_ */ int nfs_writepage(struct page *page, struct writeback_control *wbc) { + struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; unsigned long end_index; unsigned offset = PAGE_CACHE_SIZE; @@ -308,16 +314,21 @@ int nfs_writepage(struct page *page, str if (page->index >= end_index+1 || !offset) goto out; do_it: + ctx = nfs_find_open_context(inode, FMODE_WRITE); + if (ctx == NULL) { + err = -EBADF; + goto out; + } lock_kernel(); if (!IS_SYNC(inode) && inode_referenced) { - err = nfs_writepage_async(NULL, inode, page, 0, offset); + err = nfs_writepage_async(ctx, inode, page, 0, offset); if (err >= 0) { err = 0; if (wbc->for_reclaim) nfs_flush_inode(inode, 0, 0, FLUSH_STABLE); } } else { - err = nfs_writepage_sync(NULL, inode, page, 0, + err = nfs_writepage_sync(ctx, inode, page, 0, offset, priority); if (err >= 0) { if (err != offset) @@ -326,6 +337,7 @@ do_it: } } unlock_kernel(); + put_nfs_open_context(ctx); out: unlock_page(page); if (inode_referenced) @@ -374,8 +386,7 @@ out: /* * Insert a write request into an inode */ -static inline int -nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); int error; @@ -387,6 +398,8 @@ nfs_inode_add_request(struct inode *inod if (!nfsi->npages) { igrab(inode); nfs_begin_data_update(inode); + if (nfs_have_delegation(inode, FMODE_WRITE)) + nfsi->change_attr++; } nfsi->npages++; req->wb_count++; @@ -404,7 +417,7 @@ nfs_inode_remove_request(struct nfs_page BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&nfs_wreq_lock); - inode = req->wb_inode; + inode = req->wb_context->dentry->d_inode; nfsi = NFS_I(inode); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; @@ -450,7 +463,7 @@ nfs_find_request(struct inode *inode, un static void nfs_mark_request_dirty(struct nfs_page *req) { - struct inode *inode = req->wb_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfs_wreq_lock); @@ -467,7 +480,7 @@ nfs_mark_request_dirty(struct nfs_page * static inline int nfs_dirty_request(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_inode); + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty; } @@ -478,7 +491,7 @@ nfs_dirty_request(struct nfs_page *req) static void nfs_mark_request_commit(struct nfs_page *req) { - struct inode *inode = req->wb_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfs_wreq_lock); @@ -619,9 +632,9 @@ static int nfs_wait_on_write_congestion( * * Note: Should always be called with the Page Lock held! */ -static struct nfs_page * -nfs_update_request(struct file* file, struct inode *inode, struct page *page, - unsigned int offset, unsigned int bytes) +static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, + struct inode *inode, struct page *page, + unsigned int offset, unsigned int bytes) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_page *req, *new = NULL; @@ -668,13 +681,9 @@ nfs_update_request(struct file* file, st } spin_unlock(&nfs_wreq_lock); - new = nfs_create_request(file, inode, page, offset, bytes); + new = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(new)) return new; - if (file) { - new->wb_file = file; - get_file(file); - } } /* We have a request for our page. @@ -684,7 +693,7 @@ nfs_update_request(struct file* file, st * request. */ rqend = req->wb_offset + req->wb_bytes; - if (req->wb_file != file + if (req->wb_context != ctx || req->wb_page != page || !nfs_dirty_request(req) || offset > rqend || end < req->wb_offset) { @@ -705,9 +714,9 @@ nfs_update_request(struct file* file, st return req; } -int -nfs_flush_incompatible(struct file *file, struct page *page) +int nfs_flush_incompatible(struct file *file, struct page *page) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = page->mapping->host; struct nfs_page *req; int status = 0; @@ -721,7 +730,7 @@ nfs_flush_incompatible(struct file *file */ req = nfs_find_request(inode, page->index); if (req) { - if (!NFS_PROTO(inode)->request_compatible(req, file, page)) + if (req->wb_page != page || ctx != req->wb_context) status = nfs_wb_page(inode, page); nfs_release_request(req); } @@ -737,6 +746,7 @@ nfs_flush_incompatible(struct file *file int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct dentry *dentry = file->f_dentry; struct inode *inode = page->mapping->host; struct nfs_page *req; @@ -747,7 +757,7 @@ int nfs_updatepage(struct file *file, st count, (long long)(page_offset(page) +offset)); if (IS_SYNC(inode)) { - status = nfs_writepage_sync(file, inode, page, offset, count, 0); + status = nfs_writepage_sync(ctx, inode, page, offset, count, 0); if (status > 0) { if (offset == 0 && status == PAGE_CACHE_SIZE) SetPageUptodate(page); @@ -784,7 +794,7 @@ int nfs_updatepage(struct file *file, st * it out now. */ do { - req = nfs_update_request(file, inode, page, offset, count); + req = nfs_update_request(ctx, inode, page, offset, count); status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status != -EBUSY) break; @@ -860,16 +870,15 @@ static void nfs_write_rpcsetup(struct nf * NB: take care not to mess about with data->commit et al. */ data->req = req; - data->inode = inode = req->wb_inode; - data->cred = req->wb_cred; + data->inode = inode = req->wb_context->dentry->d_inode; + data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.lockowner = req->wb_lockowner; - data->args.state = req->wb_state; + data->args.context = req->wb_context; data->res.fattr = &data->fattr; data->res.count = count; @@ -1029,7 +1038,7 @@ nfs_flush_list(struct list_head *head, i while (!list_empty(head)) { pages += nfs_coalesce_requests(head, &one_request, wpages); req = nfs_list_entry(one_request.next); - error = nfs_flush_one(&one_request, req->wb_inode, how); + error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how); if (error < 0) break; } @@ -1054,16 +1063,15 @@ static void nfs_writeback_done_partial(s struct page *page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (status < 0) { ClearPageUptodate(page); SetPageError(page); - if (req->wb_file) - req->wb_file->f_error = status; + req->wb_context->error = status; dprintk(", error = %d\n", status); } else { #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -1104,16 +1112,15 @@ static void nfs_writeback_done_full(stru page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (status < 0) { ClearPageUptodate(page); SetPageError(page); - if (req->wb_file) - req->wb_file->f_error = status; + req->wb_context->error = status; end_page_writeback(page); nfs_inode_remove_request(req); dprintk(", error = %d\n", status); @@ -1232,7 +1239,7 @@ static void nfs_commit_rpcsetup(struct l list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); last = nfs_list_entry(data->pages.prev); - inode = first->wb_inode; + inode = first->wb_context->dentry->d_inode; /* * Determine the offset range of requests in the COMMIT call. @@ -1246,7 +1253,7 @@ static void nfs_commit_rpcsetup(struct l len = 0; data->inode = inode; - data->cred = first->wb_cred; + data->cred = first->wb_context->cred; data->args.fh = NFS_FH(data->inode); data->args.offset = start; @@ -1313,13 +1320,12 @@ nfs_commit_done(struct rpc_task *task) nfs_list_remove_request(req); dprintk("NFS: commit (%s/%Ld %d@%Ld)", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { - if (req->wb_file) - req->wb_file->f_error = task->tk_status; + req->wb_context->error = task->tk_status; nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; --- linux-2.6.7/fs/nfs/nfs4xdr.c.lsec 2004-06-15 23:20:26.000000000 -0600 +++ linux-2.6.7/fs/nfs/nfs4xdr.c 2005-03-23 14:28:23.056544568 -0700 @@ -84,9 +84,13 @@ static int nfs_stat_to_errno(int); ((3+NFS4_FHSIZE) >> 2)) #define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) +#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) #define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) #define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ nfs4_fattr_bitmap_maxsz) +#define encode_setattr_maxsz (op_decode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz) +#define decode_setattr_maxsz (op_decode_hdr_maxsz + 3) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) @@ -118,10 +122,17 @@ static int nfs_stat_to_errno(int); #define encode_link_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) #define decode_link_maxsz (op_decode_hdr_maxsz + 5) +#define encode_symlink_maxsz (op_encode_hdr_maxsz + \ + 1 + nfs4_name_maxsz + \ + nfs4_path_maxsz + \ + nfs4_fattr_bitmap_maxsz) +#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ - 2 + 2 * nfs4_name_maxsz + \ + 2 + nfs4_name_maxsz + \ nfs4_fattr_bitmap_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) +#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) +#define decode_delegreturn_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ #define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ #define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ @@ -172,16 +183,14 @@ static int nfs_stat_to_errno(int); #define NFS4_dec_open_confirm_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 4) -#define NFS4_enc_open_reclaim_sz (compound_encode_hdr_maxsz + \ +#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + \ - 11 + \ - encode_getattr_maxsz) -#define NFS4_dec_open_reclaim_sz (compound_decode_hdr_maxsz + \ + 11) +#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + \ - 4 + 5 + 2 + 3 + \ - decode_getattr_maxsz) + 4 + 5 + 2 + 3) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -313,6 +322,16 @@ static int nfs_stat_to_errno(int); decode_savefh_maxsz + \ decode_putfh_maxsz + \ decode_link_maxsz) +#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_symlink_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz) +#define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_symlink_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz) #define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_create_maxsz + \ @@ -339,6 +358,33 @@ static int nfs_stat_to_errno(int); encode_getattr_maxsz) #define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \ decode_getattr_maxsz) +#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_delegreturn_maxsz) +#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ + decode_delegreturn_maxsz) +#define username_maxsz (1 + ((IDMAP_NAMESZ + 3) >> 2)) +/* XXX: fix ACL bounds */ +#define ace_maxsz (3 + username_maxsz) +#define NFS_ACL_MAX_ENTRIES 32 +#define acl_maxentries ((NFS_ACL_MAX_ENTRIES - 3) * 3 + 6) +#define acl_maxsz (1 + acl_maxentries * ace_maxsz) +#define NFS4_enc_getacl_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz +#define username_maxsz (1 + ((IDMAP_NAMESZ + 3) >> 2)) +#define ace_maxsz (3 + username_maxsz) +#define acl_maxentries ((NFS_ACL_MAX_ENTRIES - 3) * 3 + 6) +#define acl_maxsz (1 + acl_maxentries * ace_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 3 + 1 + acl_maxsz) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + 1 + acl_maxsz) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_setattr_maxsz) static struct { unsigned int mode; @@ -388,6 +434,15 @@ struct compound_hdr { BUG_ON(!p); \ } while (0) +static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 4 + len); + BUG_ON(p == NULL); + xdr_encode_opaque(p, str, len); +} + static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { uint32_t *p; @@ -402,6 +457,15 @@ static int encode_compound_hdr(struct xd return 0; } +static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + BUG_ON(p == NULL); + xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); +} + static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) { char owner_name[IDMAP_NAMESZ]; @@ -420,7 +484,7 @@ static int encode_attrs(struct xdr_strea * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -742,19 +806,12 @@ static int encode_lookup(struct xdr_stre return 0; } -static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) +static void encode_share_access(struct xdr_stream *xdr, int open_flags) { - int status; uint32_t *p; - /* - * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, - * owner 4, opentype 4 = 36 - */ - RESERVE_SPACE(36); - WRITE32(OP_OPEN); - WRITE32(arg->seqid); - switch (arg->share_access) { + RESERVE_SPACE(8); + switch (open_flags & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: WRITE32(NFS4_SHARE_ACCESS_READ); break; @@ -767,84 +824,135 @@ static int encode_open(struct xdr_stream default: BUG(); } - WRITE32(0); /* for linux, share_deny = 0 always */ + WRITE32(0); /* for linux, share_deny = 0 always */ +} + +static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; + /* + * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, + * owner 4 = 32 + */ + RESERVE_SPACE(8); + WRITE32(OP_OPEN); + WRITE32(arg->seqid); + encode_share_access(xdr, arg->open_flags); + RESERVE_SPACE(16); WRITE64(arg->clientid); WRITE32(4); WRITE32(arg->id); - WRITE32(arg->opentype); +} - if (arg->opentype == NFS4_OPEN_CREATE) { - if (arg->createmode == NFS4_CREATE_EXCLUSIVE) { - RESERVE_SPACE(12); - WRITE32(arg->createmode); - WRITEMEM(arg->u.verifier.data, sizeof(arg->u.verifier.data)); - } - else if (arg->u.attrs) { - RESERVE_SPACE(4); - WRITE32(arg->createmode); - if ((status = encode_attrs(xdr, arg->u.attrs, arg->server))) - return status; - } - else { - RESERVE_SPACE(12); - WRITE32(arg->createmode); - WRITE32(0); - WRITE32(0); - } +static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4); + switch(arg->open_flags & O_EXCL) { + case 0: + WRITE32(NFS4_CREATE_UNCHECKED); + encode_attrs(xdr, arg->u.attrs, arg->server); + break; + default: + WRITE32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); } +} - RESERVE_SPACE(8 + arg->name->len); - WRITE32(NFS4_OPEN_CLAIM_NULL); - WRITE32(arg->name->len); - WRITEMEM(arg->name->name, arg->name->len); +static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; - return 0; + RESERVE_SPACE(4); + switch (arg->open_flags & O_CREAT) { + case 0: + WRITE32(NFS4_OPEN_NOCREATE); + break; + default: + BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); + WRITE32(NFS4_OPEN_CREATE); + encode_createmode(xdr, arg); + } } -static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg) +static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type) { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); - WRITE32(OP_OPEN_CONFIRM); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); - WRITE32(arg->seqid); + RESERVE_SPACE(4); + switch (delegation_type) { + case 0: + WRITE32(NFS4_OPEN_DELEGATE_NONE); + break; + case FMODE_READ: + WRITE32(NFS4_OPEN_DELEGATE_READ); + break; + case FMODE_WRITE|FMODE_READ: + WRITE32(NFS4_OPEN_DELEGATE_WRITE); + break; + default: + BUG(); + } +} - return 0; +static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *name) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(NFS4_OPEN_CLAIM_NULL); + encode_string(xdr, name->len, name->name); } +static inline void encode_claim_previous(struct xdr_stream *xdr, int type) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); + encode_delegation_type(xdr, type); +} -static int encode_open_reclaim(struct xdr_stream *xdr, const struct nfs_open_reclaimargs *arg) +static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struct qstr *name, const nfs4_stateid *stateid) { uint32_t *p; - /* - * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, - * owner 4, opentype 4, claim 4, delegation_type 4 = 44 - */ - RESERVE_SPACE(44); - WRITE32(OP_OPEN); - WRITE32(arg->seqid); - switch (arg->share_access) { - case FMODE_READ: - WRITE32(NFS4_SHARE_ACCESS_READ); + RESERVE_SPACE(4+sizeof(stateid->data)); + WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); + WRITEMEM(stateid->data, sizeof(stateid->data)); + encode_string(xdr, name->len, name->name); +} + +static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + encode_openhdr(xdr, arg); + encode_opentype(xdr, arg); + switch (arg->claim) { + case NFS4_OPEN_CLAIM_NULL: + encode_claim_null(xdr, arg->name); break; - case FMODE_WRITE: - WRITE32(NFS4_SHARE_ACCESS_WRITE); + case NFS4_OPEN_CLAIM_PREVIOUS: + encode_claim_previous(xdr, arg->u.delegation_type); break; - case FMODE_READ|FMODE_WRITE: - WRITE32(NFS4_SHARE_ACCESS_BOTH); + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); break; default: BUG(); } - WRITE32(0); /* for linux, share_deny = 0 always */ - WRITE64(arg->clientid); - WRITE32(4); - WRITE32(arg->id); - WRITE32(NFS4_OPEN_NOCREATE); - WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); - WRITE32(NFS4_OPEN_DELEGATE_NONE); + return 0; +} + +static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(8+sizeof(arg->stateid.data)); + WRITE32(OP_OPEN_CONFIRM); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITE32(arg->seqid); + return 0; } @@ -852,14 +960,11 @@ static int encode_open_downgrade(struct { uint32_t *p; - RESERVE_SPACE(16+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_OPEN_DOWNGRADE); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); WRITE32(arg->seqid); - WRITE32(arg->share_access); - /* No deny modes */ - WRITE32(0); - + encode_share_access(xdr, arg->open_flags); return 0; } @@ -887,15 +992,15 @@ static int encode_putrootfh(struct xdr_s return 0; } -static void encode_stateid(struct xdr_stream *xdr, struct nfs4_state *state, fl_owner_t lockowner) +static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) { extern nfs4_stateid zero_stateid; nfs4_stateid stateid; uint32_t *p; RESERVE_SPACE(16); - if (state != NULL) { - nfs4_copy_stateid(&stateid, state, lockowner); + if (ctx->state != NULL) { + nfs4_copy_stateid(&stateid, ctx->state, ctx->pid); WRITEMEM(stateid.data, sizeof(stateid.data)); } else WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); @@ -908,7 +1013,7 @@ static int encode_read(struct xdr_stream RESERVE_SPACE(4); WRITE32(OP_READ); - encode_stateid(xdr, args->state, args->lockowner); + encode_stateid(xdr, args->context); RESERVE_SPACE(12); WRITE64(args->offset); @@ -1003,6 +1108,45 @@ static int encode_renew(struct xdr_strea return 0; } +extern nfs4_stateid zero_stateid; + +static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p; + uint32_t *q = (uint32_t *)arg->acl; + uint32_t *end = (uint32_t *)(arg->acl + arg->acl_len); + uint32_t tmp; + int naces, i; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(4*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + WRITE32(arg->acl_len); + if (q + 1 > end) + return -EINVAL; + naces = ntohl(*q++); + WRITE32(naces); + for (i = 0; i < naces; i++) { + if (q + 4 > end) + return -EINVAL; + RESERVE_SPACE(3*4); + memcpy(p, q, 3*4); /* type, flag, access_mask, length */ + q += 3; + tmp = ntohl(*q++); /* length */ + if (tmp > XDR_MAX_NETOBJ) + return -EINVAL; + if (q + XDR_QUADLEN(tmp) > end) + return -EINVAL; + RESERVE_SPACE((XDR_QUADLEN(tmp) << 2) + 4); + p = xdr_encode_opaque(p, q, tmp); + } + return 0; +} + static int encode_savefh(struct xdr_stream *xdr) { @@ -1031,26 +1175,18 @@ static int encode_setattr(struct xdr_str static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid) { - uint32_t total_len; - uint32_t len1, len2, len3; uint32_t *p; - len1 = strlen(setclientid->sc_name); - len2 = strlen(setclientid->sc_netid); - len3 = strlen(setclientid->sc_uaddr); - total_len = XDR_QUADLEN(len1) + XDR_QUADLEN(len2) + XDR_QUADLEN(len3); - total_len = (total_len << 2) + 24 + sizeof(setclientid->sc_verifier.data); - - RESERVE_SPACE(total_len); + RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data)); WRITE32(OP_SETCLIENTID); - WRITEMEM(setclientid->sc_verifier.data, sizeof(setclientid->sc_verifier.data)); - WRITE32(len1); - WRITEMEM(setclientid->sc_name, len1); + WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data)); + + encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + RESERVE_SPACE(4); WRITE32(setclientid->sc_prog); - WRITE32(len2); - WRITEMEM(setclientid->sc_netid, len2); - WRITE32(len3); - WRITEMEM(setclientid->sc_uaddr, len3); + encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); + encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); + RESERVE_SPACE(4); WRITE32(setclientid->sc_cb_ident); return 0; @@ -1075,7 +1211,7 @@ static int encode_write(struct xdr_strea RESERVE_SPACE(4); WRITE32(OP_WRITE); - encode_stateid(xdr, args->state, args->lockowner); + encode_stateid(xdr, args->context); RESERVE_SPACE(16); WRITE64(args->offset); @@ -1086,6 +1222,18 @@ static int encode_write(struct xdr_strea return 0; } + +static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid) +{ + uint32_t *p; + + RESERVE_SPACE(20); + + WRITE32(OP_DELEGRETURN); + WRITEMEM(stateid->data, sizeof(stateid->data)); + return 0; + +} /* * END OF "GENERIC" ENCODE ROUTINES. */ @@ -1244,6 +1392,14 @@ out: } /* + * Encode SYMLINK request + */ +static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args) +{ + return nfs4_xdr_enc_create(req, p, args); +} + +/* * Encode GETATTR request */ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, const struct nfs4_getattr_arg *args) @@ -1331,13 +1487,13 @@ out: } /* - * Encode an OPEN request + * Encode an OPEN request with no attributes. */ -static int nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, struct nfs_open_reclaimargs *args) +static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 2, }; int status; @@ -1346,10 +1502,7 @@ static int nfs4_xdr_enc_open_reclaim(str status = encode_putfh(&xdr, args->fh); if (status) goto out; - status = encode_open_reclaim(&xdr, args); - if (status) - goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_open(&xdr, args); out: return status; } @@ -1538,6 +1691,52 @@ out: } /* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) + +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_setacl(&xdr, args); +out: + return status; +} + +/* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,struct nfs_fh *fhandle) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fhandle); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); +out: + return status; + +} + +/* * Encode a WRITE request */ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) @@ -1716,6 +1915,24 @@ static int nfs4_xdr_enc_setclientid_conf } /* + * DELEGRETURN request + */ +static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const struct nfs4_delegreturnargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->fhandle)) == 0) + status = encode_delegreturn(&xdr, args->stateid); + return status; +} + +/* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" * set of XDR encode/decode routines which are intended to be shared by @@ -1749,6 +1966,17 @@ static int nfs4_xdr_enc_setclientid_conf } \ } while (0) +static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string) +{ + uint32_t *p; + + READ_BUF(4); + READ32(*len); + READ_BUF(*len); + *string = (char *)p; + return 0; +} + static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { uint32_t *p; @@ -1785,6 +2013,17 @@ static int decode_op_hdr(struct xdr_stre return 0; } +/* Dummy routine */ +static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) +{ + uint32_t *p; + uint32_t strlen; + char *str; + + READ_BUF(12); + return decode_opaque_inline(xdr, &strlen, &str); +} + static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) { uint32_t bmlen, *p; @@ -2717,10 +2956,56 @@ static int decode_lookup(struct xdr_stre return decode_op_hdr(xdr, OP_LOOKUP); } +/* This is too sick! */ +static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) +{ + uint32_t *p; + uint32_t limit_type, nblocks, blocksize; + + READ_BUF(12); + READ32(limit_type); + switch (limit_type) { + case 1: + READ64(*maxsize); + break; + case 2: + READ32(nblocks); + READ32(blocksize); + *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; + } + return 0; +} + +static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +{ + uint32_t *p; + uint32_t delegation_type; + + READ_BUF(4); + READ32(delegation_type); + if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { + res->delegation_type = 0; + return 0; + } + READ_BUF(20); + COPYMEM(res->delegation.data, sizeof(res->delegation.data)); + READ32(res->do_recall); + switch (delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + res->delegation_type = FMODE_READ; + break; + case NFS4_OPEN_DELEGATE_WRITE: + res->delegation_type = FMODE_WRITE|FMODE_READ; + if (decode_space_limit(xdr, &res->maxsize) < 0) + return -EIO; + } + return decode_ace(xdr, NULL, res->server->nfs4_state); +} + static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) { uint32_t *p; - uint32_t bmlen, delegation_type; + uint32_t bmlen; int status; status = decode_op_hdr(xdr, OP_OPEN); @@ -2737,11 +3022,9 @@ static int decode_open(struct xdr_stream if (bmlen > 10) goto xdr_error; - READ_BUF((bmlen << 2) + 4); + READ_BUF(bmlen << 2); p += bmlen; - READ32(delegation_type); - if (delegation_type == NFS4_OPEN_DELEGATE_NONE) - return 0; + return decode_delegation(xdr, res); xdr_error: printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__); return -EIO; @@ -2967,6 +3250,72 @@ static int decode_renew(struct xdr_strea return decode_op_hdr(xdr, OP_RENEW); } +static int decode_attr_acl(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs_getaclres *res) +{ + uint32_t *p; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + ssize_t size = res->acl_len; + uint32_t nace, tmp; + u32 *start; + int i; + + res->acl_len = 0; + READ_BUF(4); + start = p; + READ32(nace); + res->acl_len += 4; + + for (i = 0; i < nace; i++) { + READ_BUF(4*4); + res->acl_len += 4*4; + p += 3; + READ32(tmp); /* namelen */ + READ_BUF(tmp); + if (tmp > XDR_MAX_NETOBJ) { + printk(KERN_WARNING "%s: name too long (%u)!\n", + __FUNCTION__, tmp); + return -EIO; + } + res->acl_len += XDR_QUADLEN(tmp) << 2; + } + if (size && res->acl_len > size) + return -ERANGE; + if (size == 0 && res->acl_len <= XATTR_SIZE_MAX) + res->acl = kmalloc(res->acl_len, GFP_KERNEL); + if (res->acl) + memcpy(res->acl, start, res->acl_len); + } + return 0; +} + +static int decode_getacl(struct xdr_stream *xdr, struct nfs_getaclres *res) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto xdr_error; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto xdr_error; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto xdr_error; + + if ((status = decode_attr_acl(xdr, bitmap, res)) != 0) + goto xdr_error; + + status = verify_attr_len(xdr, savep, attrlen); +xdr_error: + if (status != 0) + printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status); + return status; +} + static int decode_savefh(struct xdr_stream *xdr) { @@ -3048,6 +3397,11 @@ static int decode_write(struct xdr_strea return 0; } +static int decode_delegreturn(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_DELEGRETURN); +} + /* * Decode OPEN_DOWNGRADE response */ @@ -3222,6 +3576,14 @@ out: } /* + * Decode SYMLINK response + */ +static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res) +{ + return nfs4_xdr_dec_create(rqstp, p, res); +} + +/* * Decode GETATTR response */ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res) @@ -3243,6 +3605,50 @@ out: } +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_getaclres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, res); + +out: + return status; +} /* * Decode CLOSE response @@ -3314,9 +3720,9 @@ out: } /* - * Decode OPEN_RECLAIM response + * Decode OPEN response */ -static int nfs4_xdr_dec_open_reclaim(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) +static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -3330,9 +3736,6 @@ static int nfs4_xdr_dec_open_reclaim(str if (status) goto out; status = decode_open(&xdr, res); - if (status) - goto out; - status = decode_getfattr(&xdr, res->f_attr, res->server); out: return status; } @@ -3665,6 +4068,25 @@ static int nfs4_xdr_dec_setclientid_conf return status; } +/* + * DELEGRETURN request + */ +static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status == 0) { + status = decode_putfh(&xdr); + if (status == 0) + status = decode_delegreturn(&xdr); + } + return status; +} + uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { uint32_t len; @@ -3756,7 +4178,7 @@ nfs_stat_to_errno(int stat) if (nfs_errtbl[i].stat == stat) return nfs_errtbl[i].errno; } - if (stat < 0) { + if (stat <= 10000 || stat > 10100) { /* The server is looney tunes. */ return ESERVERFAULT; } @@ -3786,7 +4208,7 @@ struct rpc_procinfo nfs4_procedures[] = PROC(COMMIT, enc_commit, dec_commit), PROC(OPEN, enc_open, dec_open), PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), - PROC(OPEN_RECLAIM, enc_open_reclaim, dec_open_reclaim), + PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr), PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), PROC(CLOSE, enc_close, dec_close), PROC(SETATTR, enc_setattr, dec_setattr), @@ -3804,12 +4226,16 @@ struct rpc_procinfo nfs4_procedures[] = PROC(REMOVE, enc_remove, dec_remove), PROC(RENAME, enc_rename, dec_rename), PROC(LINK, enc_link, dec_link), + PROC(SYMLINK, enc_symlink, dec_symlink), PROC(CREATE, enc_create, dec_create), PROC(PATHCONF, enc_pathconf, dec_pathconf), PROC(STATFS, enc_statfs, dec_statfs), PROC(READLINK, enc_readlink, dec_readlink), PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), + PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), }; struct rpc_version nfs_version4 = { --- linux-2.6.7/fs/nfs/pagelist.c.lsec 2004-06-15 23:20:03.000000000 -0600 +++ linux-2.6.7/fs/nfs/pagelist.c 2005-03-23 14:28:23.057544416 -0700 @@ -36,7 +36,6 @@ nfs_page_alloc(void) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->wb_list); - init_waitqueue_head(&p->wb_wait); } return p; } @@ -62,7 +61,7 @@ nfs_page_free(struct nfs_page *p) * User should ensure it is safe to sleep in this function. */ struct nfs_page * -nfs_create_request(struct file *file, struct inode *inode, +nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, struct page *page, unsigned int offset, unsigned int count) { @@ -94,33 +93,38 @@ nfs_create_request(struct file *file, st req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; - req->wb_inode = inode; req->wb_count = 1; - server->rpc_ops->request_init(req, file); + req->wb_context = get_nfs_open_context(ctx); return req; } /** + * nfs_unlock_request - Unlock request and wake up sleepers. + * @req: + */ +void nfs_unlock_request(struct nfs_page *req) +{ + if (!NFS_WBACK_BUSY(req)) { + printk(KERN_ERR "NFS: Invalid unlock attempted\n"); + BUG(); + } + smp_mb__before_clear_bit(); + clear_bit(PG_BUSY, &req->wb_flags); + smp_mb__after_clear_bit(); + wake_up_all(&req->wb_context->waitq); + nfs_release_request(req); +} + +/** * nfs_clear_request - Free up all resources allocated to the request * @req: * - * Release all resources associated with a write request after it + * Release page resources associated with a write request after it * has completed. */ void nfs_clear_request(struct nfs_page *req) { - if (req->wb_state) - req->wb_state = NULL; - /* Release struct file or cached credential */ - if (req->wb_file) { - fput(req->wb_file); - req->wb_file = NULL; - } - if (req->wb_cred) { - put_rpccred(req->wb_cred); - req->wb_cred = NULL; - } if (req->wb_page) { page_cache_release(req->wb_page); req->wb_page = NULL; @@ -151,6 +155,7 @@ nfs_release_request(struct nfs_page *req /* Release struct file or cached credential */ nfs_clear_request(req); + put_nfs_open_context(req->wb_context); nfs_page_free(req); } @@ -194,12 +199,12 @@ nfs_list_add_request(struct nfs_page *re int nfs_wait_on_request(struct nfs_page *req) { - struct inode *inode = req->wb_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct rpc_clnt *clnt = NFS_CLIENT(inode); if (!NFS_WBACK_BUSY(req)) return 0; - return nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req)); + return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req)); } /** @@ -224,7 +229,11 @@ nfs_coalesce_requests(struct list_head * req = nfs_list_entry(head->next); if (prev) { - if (req->wb_cred != prev->wb_cred) + if (req->wb_context->cred != prev->wb_context->cred) + break; + if (req->wb_context->pid != prev->wb_context->pid) + break; + if (req->wb_context->state != prev->wb_context->state) break; if (req->wb_index != (prev->wb_index + 1)) break; --- linux-2.6.7/fs/nfs/nfs4proc.c.lsec 2004-06-15 23:19:44.000000000 -0600 +++ linux-2.6.7/fs/nfs/nfs4proc.c 2005-03-23 14:32:35.532162440 -0700 @@ -47,12 +47,16 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_PROC -#define NFS4_POLL_RETRY_TIME (15*HZ) +#define NFS4_POLL_RETRY_MIN (1*HZ) +#define NFS4_POLL_RETRY_MAX (15*HZ) static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -189,53 +193,296 @@ static void update_changeattr(struct ino * reclaim state on the server after a reboot. * Assumes caller is holding the sp->so_sem */ -int -nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs_fattr fattr = { - .valid = 0, - }; - struct nfs_open_reclaimargs o_arg = { + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct nfs_openargs o_arg = { .fh = NFS_FH(inode), .seqid = sp->so_seqid, .id = sp->so_id, - .share_access = state->state, + .open_flags = state->state, .clientid = server->nfs4_state->cl_clientid, .claim = NFS4_OPEN_CLAIM_PREVIOUS, .bitmask = server->attr_bitmask, }; struct nfs_openres o_res = { - .f_attr = &fattr, .server = server, /* Grrr */ }; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_RECLAIM], + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], .rpc_argp = &o_arg, .rpc_resp = &o_res, .rpc_cred = sp->so_cred, }; int status; + if (delegation != NULL) { + if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { + memcpy(&state->stateid, &delegation->stateid, + sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + return 0; + } + o_arg.u.delegation_type = delegation->type; + } status = rpc_call_sync(server->client, &msg, 0); nfs4_increment_seqid(status, sp); - if (status == 0) + if (status == 0) { memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); - /* Update the inode attributes */ - nfs_refresh_inode(inode, &fattr); + if (o_res.delegation_type != 0) { + nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + /* Did the server issue an immediate delegation recall? */ + if (o_res.do_recall) + nfs_async_inode_return_delegation(inode, &o_res.stateid); + } + } + clear_bit(NFS_DELEGATED_STATE, &state->flags); + /* Ensure we update the inode attributes */ + NFS_CACHEINV(inode); return status; } +int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_open_reclaim(sp, state); + switch (err) { + case 0: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + return err; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_state_owner *sp = state->owner; + struct inode *inode = dentry->d_inode; + struct nfs_server *server = NFS_SERVER(inode); + struct dentry *parent = dget_parent(dentry); + struct nfs_openargs arg = { + .fh = NFS_FH(parent->d_inode), + .clientid = server->nfs4_state->cl_clientid, + .name = &dentry->d_name, + .id = sp->so_id, + .server = server, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_DELEGATE_CUR, + }; + struct nfs_openres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status = 0; + + down(&sp->so_sema); + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; + if (state->state == 0) + goto out; + arg.seqid = sp->so_seqid; + arg.open_flags = state->state; + memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); + if (status >= 0) { + memcpy(state->stateid.data, res.stateid.data, + sizeof(state->stateid.data)); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } +out: + up(&sp->so_sema); + dput(parent); + return status; +} + +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + int err; + do { + err = _nfs4_open_delegation_recall(dentry, state); + switch (err) { + case 0: + return err; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + /* Don't recall a delegation if it was lost */ + nfs4_schedule_state_recovery(server->nfs4_state); + return err; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +{ + struct nfs_open_confirmargs arg = { + .fh = fh, + .seqid = sp->so_seqid, + .stateid = *stateid, + }; + struct nfs_open_confirmres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(clnt, &msg, 0); + nfs4_increment_seqid(status, sp); + if (status >= 0) + memcpy(stateid, &res.stateid, sizeof(*stateid)); + return status; +} + +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int mask) +{ + struct nfs_access_entry cache; + int status; + + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = _nfs4_proc_access(inode, &cache); + if (status != 0) + return status; + nfs_access_add_cache(inode, &cache); +out: + if ((cache.mask & mask) == mask) + return 0; + return -EACCES; +} + +/* + * Returns an nfs4_state + an extra reference to the inode + */ +int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) +{ + struct nfs_delegation *delegation; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_state_owner *sp = NULL; + struct nfs4_state *state = NULL; + int open_flags = flags & (FMODE_READ|FMODE_WRITE); + int mask = 0; + int err; + + /* Protect against reboot recovery - NOTE ORDER! */ + down_read(&clp->cl_sem); + /* Protect against delegation recall */ + down_read(&nfsi->rwsem); + delegation = NFS_I(inode)->delegation; + err = -ENOENT; + if (delegation == NULL || (delegation->type & open_flags) != open_flags) + goto out_err; + err = -ENOMEM; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); + goto out_err; + } + down(&sp->so_sema); + state = nfs4_get_open_state(inode, sp); + if (state == NULL) + goto out_err; + + err = -ENOENT; + if ((state->state & open_flags) == open_flags) { + spin_lock(&inode->i_lock); + if (open_flags & FMODE_READ) + state->nreaders++; + if (open_flags & FMODE_WRITE) + state->nwriters++; + spin_unlock(&inode->i_lock); + goto out_ok; + } else if (state->state != 0) + goto out_err; + + lock_kernel(); + err = _nfs4_do_access(inode, cred, mask); + unlock_kernel(); + if (err != 0) + goto out_err; + spin_lock(&inode->i_lock); + memcpy(state->stateid.data, delegation->stateid.data, + sizeof(state->stateid.data)); + state->state |= open_flags; + if (open_flags & FMODE_READ) + state->nreaders++; + if (open_flags & FMODE_WRITE) + state->nwriters++; + set_bit(NFS_DELEGATED_STATE, &state->flags); + spin_unlock(&inode->i_lock); +out_ok: + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); + igrab(inode); + *res = state; + return 0; +out_err: + if (sp != NULL) { + if (state != NULL) + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + } + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); + return err; +} + +static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) +{ + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int err; + + do { + err = _nfs4_open_delegated(inode, flags, cred, &res); + if (err == 0) + break; + res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode), + err, &exception)); + } while (exception.retry); + return res; +} + /* * Returns an nfs4_state + an referenced inode */ -struct nfs4_state * -nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) +static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_client *clp = server->nfs4_state; struct inode *inode = NULL; int status; struct nfs_fattr f_attr = { @@ -243,12 +490,11 @@ nfs4_do_open(struct inode *dir, struct q }; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), - .share_access = flags & (FMODE_READ|FMODE_WRITE), - .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, - .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, + .open_flags = flags, .name = name, .server = server, .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, }; struct nfs_openres o_res = { .f_attr = &f_attr, @@ -261,60 +507,44 @@ nfs4_do_open(struct inode *dir, struct q .rpc_cred = cred, }; -retry: + /* Protect against reboot recovery conflicts */ + down_read(&clp->cl_sem); status = -ENOMEM; - if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) { + if (!(sp = nfs4_get_state_owner(server, cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); - goto out; + goto out_err; } - if (o_arg.createmode & NFS4_CREATE_EXCLUSIVE){ + if (flags & O_EXCL) { u32 *p = (u32 *) o_arg.u.verifier.data; p[0] = jiffies; p[1] = current->pid; - } else if (o_arg.createmode == NFS4_CREATE_UNCHECKED) { + } else o_arg.u.attrs = sattr; - } /* Serialization for the sequence id */ down(&sp->so_sema); o_arg.seqid = sp->so_seqid; o_arg.id = sp->so_id; - o_arg.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, + o_arg.clientid = clp->cl_clientid; status = rpc_call_sync(server->client, &msg, 0); nfs4_increment_seqid(status, sp); if (status) - goto out_up; + goto out_err; update_changeattr(dir, &o_res.cinfo); + if(o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(server->client, &o_res.fh, sp, &o_res.stateid); + if (status) + goto out_err; + } status = -ENOMEM; inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr); if (!inode) - goto out_up; + goto out_err; state = nfs4_get_open_state(inode, sp); if (!state) - goto out_up; - - if(o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) { - struct nfs_open_confirmargs oc_arg = { - .fh = &o_res.fh, - .seqid = sp->so_seqid, - }; - struct nfs_open_confirmres oc_res; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], - .rpc_argp = &oc_arg, - .rpc_resp = &oc_res, - .rpc_cred = cred, - }; - - memcpy(&oc_arg.stateid, &o_res.stateid, sizeof(oc_arg.stateid)); - status = rpc_call_sync(server->client, &msg, 0); - nfs4_increment_seqid(status, sp); - if (status) - goto out_up; - memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid)); - } else - memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + goto out_err; + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); spin_lock(&inode->i_lock); if (flags & FMODE_READ) state->nreaders++; @@ -322,47 +552,62 @@ retry: state->nwriters++; state->state |= flags & (FMODE_READ|FMODE_WRITE); spin_unlock(&inode->i_lock); - + if (o_res.delegation_type != 0) + nfs_inode_set_delegation(inode, cred, &o_res); up(&sp->so_sema); nfs4_put_state_owner(sp); - return state; - -out_up: - up(&sp->so_sema); - nfs4_put_state_owner(sp); - if (state) { - nfs4_put_open_state(state); - state = NULL; - } - if (inode) { + up_read(&clp->cl_sem); + *res = state; + return 0; +out_err: + if (sp != NULL) { + if (state != NULL) + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + } + /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ + up_read(&clp->cl_sem); + if (inode != NULL) iput(inode); - inode = NULL; - } - /* NOTE: BAD_SEQID means the server and client disagree about the - * book-keeping w.r.t. state-changing operations - * (OPEN/CLOSE/LOCK/LOCKU...) - * It is actually a sign of a bug on the client or on the server. - * - * If we receive a BAD_SEQID error in the particular case of - * doing an OPEN, we assume that nfs4_increment_seqid() will - * have unhashed the old state_owner for us, and that we can - * therefore safely retry using a new one. We should still warn - * the user though... - */ - if (status == -NFS4ERR_BAD_SEQID) { - printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); - goto retry; - } - status = nfs4_handle_error(server, status); - if (!status) - goto retry; - BUG_ON(status < -1000 || status > 0); -out: - return ERR_PTR(status); + *res = NULL; + return status; } -int -nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, + +struct nfs4_state *nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) +{ + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int status; + + do { + status = _nfs4_do_open(dir, name, flags, sattr, cred, &res); + if (status == 0) + break; + /* NOTE: BAD_SEQID means the server and client disagree about the + * book-keeping w.r.t. state-changing operations + * (OPEN/CLOSE/LOCK/LOCKU...) + * It is actually a sign of a bug on the client or on the server. + * + * If we receive a BAD_SEQID error in the particular case of + * doing an OPEN, we assume that nfs4_increment_seqid() will + * have unhashed the old state_owner for us, and that we can + * therefore safely retry using a new one. We should still warn + * the user though... + */ + if (status == -NFS4ERR_BAD_SEQID) { + printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + exception.retry = 1; + continue; + } + res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + status, &exception)); + } while (exception.retry); + return res; +} + +static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, struct nfs_fh *fhandle, struct iattr *sattr, struct nfs4_state *state) { @@ -381,9 +626,7 @@ nfs4_do_setattr(struct nfs_server *serve .rpc_argp = &arg, .rpc_resp = &res, }; - int status; -retry: fattr->valid = 0; if (sattr->ia_valid & ATTR_SIZE) @@ -391,13 +634,22 @@ retry: else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - status = rpc_call_sync(server->client, &msg, 0); - if (status) { - status = nfs4_handle_error(server, status); - if (!status) - goto retry; - } - return status; + return rpc_call_sync(server->client, &msg, 0); +} + +int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, + struct nfs_fh *fhandle, struct iattr *sattr, + struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_do_setattr(server, fattr, fhandle, sattr, + state), + &exception); + } while (exception.retry); + return err; } /* @@ -411,8 +663,7 @@ retry: * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int -nfs4_do_close(struct inode *inode, struct nfs4_state *state) +static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state) { struct nfs4_state_owner *sp = state->owner; int status = 0; @@ -426,6 +677,8 @@ nfs4_do_close(struct inode *inode, struc .rpc_resp = &res, }; + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + return 0; memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); /* Serialization for the sequence id */ arg.seqid = sp->so_seqid, @@ -441,15 +694,34 @@ nfs4_do_close(struct inode *inode, struc return status; } -int -nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) +int nfs4_do_close(struct inode *inode, struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_do_close(inode, state); + switch (err) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(server->nfs4_state); + case 0: + state->state = 0; + return 0; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) { struct nfs4_state_owner *sp = state->owner; int status = 0; struct nfs_closeargs arg = { .fh = NFS_FH(inode), .seqid = sp->so_seqid, - .share_access = mode, + .open_flags = mode, }; struct nfs_closeres res; struct rpc_message msg = { @@ -458,6 +730,8 @@ nfs4_do_downgrade(struct inode *inode, s .rpc_resp = &res, }; + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + return 0; memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); nfs4_increment_seqid(status, sp); @@ -467,6 +741,26 @@ nfs4_do_downgrade(struct inode *inode, s return status; } +int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_do_downgrade(inode, state, mode); + switch (err) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(server->nfs4_state); + case 0: + state->state = mode; + return 0; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + struct inode * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -500,7 +794,9 @@ nfs4_open_revalidate(struct inode *dir, struct inode *inode; cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); - state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); + state = nfs4_open_delegated(dentry->d_inode, openflags, cred); + if (IS_ERR(state)) + state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); put_rpccred(cred); if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) return 1; @@ -518,7 +814,7 @@ nfs4_open_revalidate(struct inode *dir, } -static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { struct nfs4_server_caps_res res = {}; struct rpc_message msg = { @@ -542,7 +838,19 @@ static int nfs4_server_capabilities(stru return status; } -static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, +static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_server_capabilities(server, fhandle), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { struct nfs_fattr * fattr = info->fattr; @@ -563,6 +871,19 @@ static int nfs4_lookup_root(struct nfs_s return rpc_call_sync(server->client, &msg, 0); } +static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_lookup_root(server, fhandle, info), + &exception); + } while (exception.retry); + return err; +} + static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { @@ -597,6 +918,8 @@ static int nfs4_proc_get_root(struct nfs p = server->mnt_path; for (;;) { + struct nfs4_exception exception = { }; + while (*p == '/') p++; if (!*p) @@ -606,9 +929,13 @@ static int nfs4_proc_get_root(struct nfs p++; q.len = p - q.name; - fattr->valid = 0; - status = rpc_call_sync(server->client, &msg, 0); - if (!status) + do { + fattr->valid = 0; + status = nfs4_handle_exception(server, + rpc_call_sync(server->client, &msg, 0), + &exception); + } while (exception.retry); + if (status == 0) continue; if (status == -ENOENT) { printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path); @@ -621,10 +948,10 @@ static int nfs4_proc_get_root(struct nfs if (status == 0) status = nfs4_do_fsinfo(server, fhandle, info); out: - return nfs4_map_errors(status); + return status; } -static int nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr) +static int _nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_getattr_arg args = { @@ -642,8 +969,19 @@ static int nfs4_proc_getattr(struct inod }; fattr->valid = 0; + return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); +} - return nfs4_map_errors(rpc_call_sync(NFS_CLIENT(inode), &msg, 0)); +static int nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_getattr(inode, fattr), + &exception); + } while (exception.retry); + return err; } /* @@ -678,9 +1016,13 @@ nfs4_proc_setattr(struct dentry *dentry, if (size_change) { struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); state = nfs4_find_state(inode, cred, FMODE_WRITE); - if (!state) { - state = nfs4_do_open(dentry->d_parent->d_inode, - &dentry->d_name, FMODE_WRITE, NULL, cred); + if (state == NULL) { + state = nfs4_open_delegated(dentry->d_inode, + FMODE_WRITE, cred); + if (IS_ERR(state)) + state = nfs4_do_open(dentry->d_parent->d_inode, + &dentry->d_name, FMODE_WRITE, + NULL, cred); need_iput = 1; } put_rpccred(cred); @@ -705,7 +1047,7 @@ out: return status; } -static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, +static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; @@ -731,12 +1073,23 @@ static int nfs4_proc_lookup(struct inode dprintk("NFS call lookup %s\n", name->name); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); dprintk("NFS reply lookup: %d\n", status); - return nfs4_map_errors(status); + return status; } -static int nfs4_proc_access(struct inode *inode, struct rpc_cred *cred, int mode) +static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_lookup(dir, name, fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) { - int status; struct nfs4_accessargs args = { .fh = NFS_FH(inode), }; @@ -745,8 +1098,10 @@ static int nfs4_proc_access(struct inode .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, .rpc_resp = &res, - .rpc_cred = cred, + .rpc_cred = entry->cred, }; + int mode = entry->mask; + int status; /* * Determine which access bits we want to ask for... @@ -758,8 +1113,7 @@ static int nfs4_proc_access(struct inode args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE; if (mode & MAY_EXEC) args.access |= NFS4_ACCESS_LOOKUP; - } - else { + } else { if (mode & MAY_WRITE) args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND; if (mode & MAY_EXEC) @@ -767,13 +1121,27 @@ static int nfs4_proc_access(struct inode } status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (!status) { - if (args.access != res.supported) { - printk(KERN_NOTICE "NFS: server didn't support all access bits!\n"); - status = -ENOTSUPP; - } else if ((args.access & res.access) != args.access) - status = -EACCES; + entry->mask = 0; + if (res.access & NFS4_ACCESS_READ) + entry->mask |= MAY_READ; + if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; } - return nfs4_map_errors(status); + return status; +} + +static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_access(inode, entry), + &exception); + } while (exception.retry); + return err; } /* @@ -800,7 +1168,7 @@ static int nfs4_proc_access(struct inode * Both of these changes to the XDR layer would in fact be quite * minor, but I decided to leave them for a subsequent patch. */ -static int nfs4_proc_readlink(struct inode *inode, struct page *page) +static int _nfs4_proc_readlink(struct inode *inode, struct page *page) { struct nfs4_readlink args = { .fh = NFS_FH(inode), @@ -813,11 +1181,22 @@ static int nfs4_proc_readlink(struct ino .rpc_resp = NULL, }; - return nfs4_map_errors(rpc_call_sync(NFS_CLIENT(inode), &msg, 0)); + return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); } -static int -nfs4_proc_read(struct nfs_read_data *rdata, struct file *filp) +static int nfs4_proc_readlink(struct inode *inode, struct page *page) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_readlink(inode, page), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_read(struct nfs_read_data *rdata) { int flags = rdata->flags; struct inode *inode = rdata->inode; @@ -827,6 +1206,7 @@ nfs4_proc_read(struct nfs_read_data *rda .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], .rpc_argp = &rdata->args, .rpc_resp = &rdata->res, + .rpc_cred = rdata->cred, }; unsigned long timestamp = jiffies; int status; @@ -834,29 +1214,27 @@ nfs4_proc_read(struct nfs_read_data *rda dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); - /* - * Try first to use O_RDONLY, then O_RDWR stateid. - */ - if (filp) { - struct nfs4_state *state; - state = (struct nfs4_state *)filp->private_data; - rdata->args.state = state; - msg.rpc_cred = state->owner->so_cred; - } else { - rdata->args.state = NULL; - msg.rpc_cred = NFS_I(inode)->mm_cred; - } - fattr->valid = 0; status = rpc_call_sync(server->client, &msg, flags); if (!status) renew_lease(server, timestamp); dprintk("NFS reply read: %d\n", status); - return nfs4_map_errors(status); + return status; } -static int -nfs4_proc_write(struct nfs_write_data *wdata, struct file *filp) +static int nfs4_proc_read(struct nfs_read_data *rdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(rdata->inode), + _nfs4_proc_read(rdata), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_write(struct nfs_write_data *wdata) { int rpcflags = wdata->flags; struct inode *inode = wdata->inode; @@ -866,33 +1244,32 @@ nfs4_proc_write(struct nfs_write_data *w .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], .rpc_argp = &wdata->args, .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, }; int status; dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); - /* - * Try first to use O_WRONLY, then O_RDWR stateid. - */ - if (filp) { - struct nfs4_state *state; - state = (struct nfs4_state *)filp->private_data; - wdata->args.state = state; - msg.rpc_cred = state->owner->so_cred; - } else { - wdata->args.state = NULL; - msg.rpc_cred = NFS_I(inode)->mm_cred; - } - fattr->valid = 0; status = rpc_call_sync(server->client, &msg, rpcflags); dprintk("NFS reply write: %d\n", status); - return nfs4_map_errors(status); + return status; } -static int -nfs4_proc_commit(struct nfs_write_data *cdata, struct file *filp) +static int nfs4_proc_write(struct nfs_write_data *wdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(wdata->inode), + _nfs4_proc_write(wdata), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_commit(struct nfs_write_data *cdata) { struct inode *inode = cdata->inode; struct nfs_fattr *fattr = cdata->res.fattr; @@ -901,24 +1278,29 @@ nfs4_proc_commit(struct nfs_write_data * .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], .rpc_argp = &cdata->args, .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, }; int status; dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); - /* - * Try first to use O_WRONLY, then O_RDWR stateid. - */ - if (filp) - msg.rpc_cred = ((struct nfs4_state *)filp->private_data)->owner->so_cred; - else - msg.rpc_cred = NFS_I(inode)->mm_cred; - fattr->valid = 0; status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply commit: %d\n", status); - return nfs4_map_errors(status); + return status; +} + +static int nfs4_proc_commit(struct nfs_write_data *cdata) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(cdata->inode), + _nfs4_proc_commit(cdata), + &exception); + } while (exception.retry); + return err; } /* @@ -965,7 +1347,7 @@ nfs4_proc_create(struct inode *dir, stru return inode; } -static int nfs4_proc_remove(struct inode *dir, struct qstr *name) +static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) { struct nfs4_remove_arg args = { .fh = NFS_FH(dir), @@ -982,7 +1364,19 @@ static int nfs4_proc_remove(struct inode status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) update_changeattr(dir, &res); - return nfs4_map_errors(status); + return status; +} + +static int nfs4_proc_remove(struct inode *dir, struct qstr *name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_remove(dir, name), + &exception); + } while (exception.retry); + return err; } struct unlink_desc { @@ -1023,7 +1417,7 @@ static int nfs4_proc_unlink_done(struct return 0; } -static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, +static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { struct nfs4_rename_arg arg = { @@ -1046,10 +1440,24 @@ static int nfs4_proc_rename(struct inode update_changeattr(old_dir, &res.old_cinfo); update_changeattr(new_dir, &res.new_cinfo); } - return nfs4_map_errors(status); + return status; } -static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(old_dir), + _nfs4_proc_rename(old_dir, old_name, + new_dir, new_name), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { struct nfs4_link_arg arg = { .fh = NFS_FH(inode), @@ -1068,10 +1476,22 @@ static int nfs4_proc_link(struct inode * if (!status) update_changeattr(dir, &cinfo); - return nfs4_map_errors(status); + return status; +} + +static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + _nfs4_proc_link(inode, dir, name), + &exception); + } while (exception.retry); + return err; } -static int nfs4_proc_symlink(struct inode *dir, struct qstr *name, +static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { @@ -1090,22 +1510,39 @@ static int nfs4_proc_symlink(struct inod .fattr = fattr, }; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK], .rpc_argp = &arg, .rpc_resp = &res, }; int status; + if (path->len > NFS4_MAXPATHLEN) + return -ENAMETOOLONG; arg.u.symlink = path; fattr->valid = 0; status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) update_changeattr(dir, &res.dir_cinfo); - return nfs4_map_errors(status); + return status; } -static int nfs4_proc_mkdir(struct inode *dir, struct qstr *name, +static int nfs4_proc_symlink(struct inode *dir, struct qstr *name, + struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_symlink(dir, name, path, sattr, + fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { @@ -1135,10 +1572,25 @@ static int nfs4_proc_mkdir(struct inode status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) update_changeattr(dir, &res.dir_cinfo); - return nfs4_map_errors(status); + return status; } -static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, +static int nfs4_proc_mkdir(struct inode *dir, struct qstr *name, + struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_mkdir(dir, name, sattr, + fhandle, fattr), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, u64 cookie, struct page *page, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; @@ -1164,10 +1616,24 @@ static int nfs4_proc_readdir(struct dent if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); unlock_kernel(); - return nfs4_map_errors(status); + return status; } -static int nfs4_proc_mknod(struct inode *dir, struct qstr *name, +static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, + u64 cookie, struct page *page, unsigned int count, int plus) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), + _nfs4_proc_readdir(dentry, cred, cookie, + page, count, plus), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr, dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr) { @@ -1214,10 +1680,25 @@ static int nfs4_proc_mknod(struct inode status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) update_changeattr(dir, &res.dir_cinfo); - return nfs4_map_errors(status); + return status; +} + +static int nfs4_proc_mknod(struct inode *dir, struct qstr *name, + struct iattr *sattr, dev_t rdev, struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_mknod(dir, name, sattr, rdev, + fh, fattr), + &exception); + } while (exception.retry); + return err; } -static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, +static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) { struct nfs4_statfs_arg args = { @@ -1231,10 +1712,22 @@ static int nfs4_proc_statfs(struct nfs_s }; fsstat->fattr->valid = 0; - return nfs4_map_errors(rpc_call_sync(server->client, &msg, 0)); + return rpc_call_sync(server->client, &msg, 0); } -static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, +static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_proc_statfs(server, fhandle, fsstat), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { struct nfs4_fsinfo_arg args = { @@ -1247,16 +1740,29 @@ static int nfs4_do_fsinfo(struct nfs_ser .rpc_resp = fsinfo, }; - return nfs4_map_errors(rpc_call_sync(server->client, &msg, 0)); + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_do_fsinfo(server, fhandle, fsinfo), + &exception); + } while (exception.retry); + return err; } static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { fsinfo->fattr->valid = 0; - return nfs4_map_errors(nfs4_do_fsinfo(server, fhandle, fsinfo)); + return nfs4_do_fsinfo(server, fhandle, fsinfo); } -static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, +static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_pathconf *pathconf) { struct nfs4_pathconf_arg args = { @@ -1276,7 +1782,21 @@ static int nfs4_proc_pathconf(struct nfs } pathconf->fattr->valid = 0; - return nfs4_map_errors(rpc_call_sync(server->client, &msg, 0)); + return rpc_call_sync(server->client, &msg, 0); +} + +static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *pathconf) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_proc_pathconf(server, fhandle, pathconf), + &exception); + } while (exception.retry); + return err; } static void @@ -1467,8 +1987,10 @@ static int nfs4_proc_file_open(struct inode *inode, struct file *filp) { struct dentry *dentry = filp->f_dentry; - struct nfs4_state *state; + struct nfs_open_context *ctx; + struct nfs4_state *state = NULL; struct rpc_cred *cred; + int status = -ENOMEM; dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", (int)dentry->d_parent->d_name.len, @@ -1478,21 +2000,28 @@ nfs4_proc_file_open(struct inode *inode, /* Find our open stateid */ cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_find_state(inode, cred, filp->f_mode); + if (unlikely(cred == NULL)) + return -ENOMEM; + ctx = alloc_nfs_open_context(dentry, cred); put_rpccred(cred); - if (state == NULL) { - printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - return -EIO; /* ERACE actually */ - } + if (unlikely(ctx == NULL)) + return -ENOMEM; + status = -EIO; /* ERACE actually */ + state = nfs4_find_state(inode, cred, filp->f_mode); + if (unlikely(state == NULL)) + goto no_state; + ctx->state = state; nfs4_close_state(state, filp->f_mode); - if (filp->f_mode & FMODE_WRITE) { - lock_kernel(); - nfs_set_mmcred(inode, state->owner->so_cred); + ctx->mode = filp->f_mode; + nfs_file_set_open_context(filp, ctx); + put_nfs_open_context(ctx); + if (filp->f_mode & FMODE_WRITE) nfs_begin_data_update(inode); - unlock_kernel(); - } - filp->private_data = state; return 0; +no_state: + printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); + put_nfs_open_context(ctx); + return status; } /* @@ -1501,35 +2030,148 @@ nfs4_proc_file_open(struct inode *inode, static int nfs4_proc_file_release(struct inode *inode, struct file *filp) { - struct nfs4_state *state = (struct nfs4_state *)filp->private_data; - - if (state) - nfs4_close_state(state, filp->f_mode); - if (filp->f_mode & FMODE_WRITE) { - lock_kernel(); + if (filp->f_mode & FMODE_WRITE) nfs_end_data_update(inode); - unlock_kernel(); - } + nfs_file_clear_open_context(filp); return 0; } -/* - * Set up the nfspage struct with the right state info and credentials - */ +static ssize_t +nfs4_read_acl_attr(struct inode *inode, char *buf, ssize_t buflen) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int ret; + + spin_lock(&inode->i_lock); + if (buf == NULL && nfsi->acl_len) + goto out_len; + ret = -ENOENT; + if (nfsi->acl_len == 0) + goto out; + ret = -ERANGE; /* see getxattr(2) man page */ + if (nfsi->acl_len > buflen) + goto out; + memcpy(buf, nfsi->acl, nfsi->acl_len); +out_len: + ret = nfsi->acl_len; +out: + spin_unlock(&inode->i_lock); + return ret; +} + static void -nfs4_request_init(struct nfs_page *req, struct file *filp) +nfs4_set_acl_attr(struct inode *inode, char *buf, ssize_t buflen) { - struct nfs4_state *state; + struct nfs_inode *nfsi = NFS_I(inode); - if (!filp) { - req->wb_cred = get_rpccred(NFS_I(req->wb_inode)->mm_cred); - req->wb_state = NULL; - return; + spin_lock(&inode->i_lock); + kfree(nfsi->acl); + nfsi->acl = buf; + nfsi->acl_len = buflen; + spin_unlock(&inode->i_lock); +} + +static int +nfs4_write_acl_attr(struct inode *inode, const char *buf, ssize_t buflen) +{ + void *abuf = NULL; + + if (buflen > PAGE_SIZE) + goto out_nomem; + abuf = kmalloc(buflen, GFP_KERNEL); + if (abuf == NULL) + goto out_nomem; + memcpy(abuf, buf, buflen); + nfs4_set_acl_attr(inode, abuf, buflen); + return 0; +out_nomem: + nfs4_set_acl_attr(inode, NULL, 0); + return -ENOMEM; +} + +void +nfs4_zap_acl_attr(struct inode *inode) +{ + nfs4_set_acl_attr(inode, NULL, 0); +} + +static int +nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +ssize_t +nfs4_proc_get_acl(struct inode *inode, void *buf, ssize_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_getaclres res = { + .acl = buf, + .acl_len = buflen, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = NFS_FH(inode), + .rpc_resp = &res, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + lock_kernel(); + ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret < 0) + goto out; + ret = nfs4_read_acl_attr(inode, buf, buflen); + if (ret == -ENOENT) { + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (ret == 0) { + nfs4_write_acl_attr(inode, res.acl, res.acl_len); + ret = res.acl_len; + } + if (res.acl != buf) { + /* xdr decode allocated the memory: */ + kfree(res.acl); + } } - state = (struct nfs4_state *)filp->private_data; - req->wb_state = state; - req->wb_cred = get_rpccred(state->owner->so_cred); - req->wb_lockowner = current->files; +out: + unlock_kernel(); + return ret; +} + +int +nfs4_proc_set_acl(struct inode *inode, const void *buf, ssize_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .server = server, + .acl = buf, + .acl_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + + /* XXX: should check for buflen too large? */ + + lock_kernel(); + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + unlock_kernel(); + + if (ret == 0) + nfs4_write_acl_attr(inode, buf, buflen); + + return ret; } static int @@ -1545,11 +2187,13 @@ nfs4_async_handle_error(struct rpc_task case -NFS4ERR_EXPIRED: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_OK, &clp->cl_state)) + rpc_wake_up_task(task); task->tk_status = 0; return -EAGAIN; case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - rpc_delay(task, NFS4_POLL_RETRY_TIME); + rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; case -NFS4ERR_OLD_STATEID: @@ -1560,12 +2204,11 @@ nfs4_async_handle_error(struct rpc_task return 0; } -int -nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) { DEFINE_WAIT(wait); sigset_t oldset; - int interruptible, res; + int interruptible, res = 0; might_sleep(); @@ -1573,101 +2216,85 @@ nfs4_wait_clnt_recover(struct rpc_clnt * interruptible = TASK_UNINTERRUPTIBLE; if (clnt->cl_intr) interruptible = TASK_INTERRUPTIBLE; - do { - res = 0; - prepare_to_wait(&clp->cl_waitq, &wait, interruptible); - nfs4_schedule_state_recovery(clp); - if (test_bit(NFS4CLNT_OK, &clp->cl_state) && - !test_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) - break; - if (clnt->cl_intr && signalled()) { - res = -ERESTARTSYS; - break; - } + prepare_to_wait(&clp->cl_waitq, &wait, interruptible); + nfs4_schedule_state_recovery(clp); + if (clnt->cl_intr && signalled()) + res = -ERESTARTSYS; + else if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) schedule(); - } while(!test_bit(NFS4CLNT_OK, &clp->cl_state)); finish_wait(&clp->cl_waitq, &wait); rpc_clnt_sigunmask(clnt, &oldset); return res; } -static int -nfs4_delay(struct rpc_clnt *clnt) +static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) { sigset_t oldset; int res = 0; might_sleep(); + if (*timeout <= 0) + *timeout = NFS4_POLL_RETRY_MIN; + if (*timeout > NFS4_POLL_RETRY_MAX) + *timeout = NFS4_POLL_RETRY_MAX; rpc_clnt_sigmask(clnt, &oldset); if (clnt->cl_intr) { set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(NFS4_POLL_RETRY_TIME); + schedule_timeout(*timeout); if (signalled()) res = -ERESTARTSYS; } else { set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(NFS4_POLL_RETRY_TIME); + schedule_timeout(*timeout); } rpc_clnt_sigunmask(clnt, &oldset); + *timeout <<= 1; return res; } /* This is the error handling routine for processes that are allowed * to sleep. */ -int -nfs4_handle_error(struct nfs_server *server, int errorcode) +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs4_client *clp = server->nfs4_state; int ret = errorcode; + exception->retry = 0; switch(errorcode) { + case 0: + return 0; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: ret = nfs4_wait_clnt_recover(server->client, clp); + if (ret == 0) + exception->retry = 1; break; case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - ret = nfs4_delay(server->client); + ret = nfs4_delay(server->client, &exception->timeout); + if (ret == 0) + exception->retry = 1; break; case -NFS4ERR_OLD_STATEID: - ret = 0; + if (ret == 0) + exception->retry = 1; } /* We failed to handle the error */ return nfs4_map_errors(ret); } - -static int -nfs4_request_compatible(struct nfs_page *req, struct file *filp, struct page *page) -{ - struct nfs4_state *state = NULL; - struct rpc_cred *cred = NULL; - - if (req->wb_file != filp) - return 0; - if (req->wb_page != page) - return 0; - state = (struct nfs4_state *)filp->private_data; - if (req->wb_state != state) - return 0; - if (req->wb_lockowner != current->files) - return 0; - cred = state->owner->so_cred; - if (req->wb_cred != cred) - return 0; - return 1; -} - -int -nfs4_proc_setclientid(struct nfs4_client *clp, - u32 program, unsigned short port) +int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port) { - u32 *p; - struct nfs4_setclientid setclientid; - struct timespec tv; + static nfs4_verifier sc_verifier; + static int initialized; + + struct nfs4_setclientid setclientid = { + .sc_verifier = &sc_verifier, + .sc_prog = program, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], .rpc_argp = &setclientid, @@ -1675,15 +2302,24 @@ nfs4_proc_setclientid(struct nfs4_client .rpc_cred = clp->cl_cred, }; - tv = CURRENT_TIME; - p = (u32*)setclientid.sc_verifier.data; - *p++ = (u32)tv.tv_sec; - *p = (u32)tv.tv_nsec; - setclientid.sc_name = clp->cl_ipaddr; - sprintf(setclientid.sc_netid, "tcp"); - sprintf(setclientid.sc_uaddr, "%s.%d.%d", clp->cl_ipaddr, port >> 8, port & 255); - setclientid.sc_prog = htonl(program); - setclientid.sc_cb_ident = 0; + if (!initialized) { + struct timespec boot_time; + u32 *p; + + initialized = 1; + boot_time = CURRENT_TIME; + p = (u32*)sc_verifier.data; + *p++ = htonl((u32)boot_time.tv_sec); + *p = htonl((u32)boot_time.tv_nsec); + } + setclientid.sc_name_len = scnprintf(setclientid.sc_name, + sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u", + clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr)); + setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, + sizeof(setclientid.sc_netid), "tcp"); + setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, + sizeof(setclientid.sc_uaddr), "%s.%d.%d", + clp->cl_ipaddr, port >> 8, port & 255); return rpc_call_sync(clp->cl_rpcclient, &msg, 0); } @@ -1712,6 +2348,40 @@ nfs4_proc_setclientid_confirm(struct nfs return status; } +static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +{ + struct nfs4_delegreturnargs args = { + .fhandle = NFS_FH(inode), + .stateid = stateid, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], + .rpc_argp = &args, + .rpc_cred = cred, + }; + + return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); +} + +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_proc_delegreturn(inode, cred, stateid); + switch (err) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(server->nfs4_state); + case 0: + return 0; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + #define NFS4_LOCK_MINTIMEOUT (1 * HZ) #define NFS4_LOCK_MAXTIMEOUT (30 * HZ) @@ -1753,8 +2423,7 @@ nfs4_lck_length(struct file_lock *reques return request->fl_end - request->fl_start + 1; } -int -nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); @@ -1778,9 +2447,10 @@ nfs4_proc_getlk(struct nfs4_state *state struct nfs4_lock_state *lsp; int status; + down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); + lsp = nfs4_find_lock_state(state, request->fl_pid); if (lsp) nlo.id = lsp->ls_id; else { @@ -1811,14 +2481,28 @@ nfs4_proc_getlk(struct nfs4_state *state if (lsp) nfs4_put_lock_state(lsp); up(&state->lock_sema); - return nfs4_map_errors(status); + up_read(&clp->cl_sem); + return status; } -int -nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(NFS_SERVER(state->inode), + _nfs4_proc_getlk(state, cmd, request), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), @@ -1838,29 +2522,46 @@ nfs4_proc_unlck(struct nfs4_state *state struct nfs_locku_opargs luargs; int status = 0; + down_read(&clp->cl_sem); down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); + lsp = nfs4_find_lock_state(state, request->fl_pid); if (!lsp) goto out; - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, 0); - nfs4_increment_lock_seqid(status, lsp); + /* We might have lost the locks! */ + if ((lsp->flags & NFS_LOCK_INITIALIZED) != 0) { + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_lock_seqid(status, lsp); + } if (status == 0) { memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(lsp->ls_stateid)); - nfs4_notify_unlck(inode, request, lsp); + nfs4_notify_unlck(state, request, lsp); } nfs4_put_lock_state(lsp); out: up(&state->lock_sema); - return nfs4_map_errors(status); + up_read(&clp->cl_sem); + return status; } -static int -nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(NFS_SERVER(state->inode), + _nfs4_proc_unlck(state, cmd, request), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); @@ -1881,23 +2582,22 @@ nfs4_proc_setlk(struct nfs4_state *state .rpc_cred = state->owner->so_cred, }; struct nfs_lock_opargs largs = { + .reclaim = reclaim, .new_lock_owner = 0, }; int status; - down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (lsp == NULL) { + lsp = nfs4_get_lock_state(state, request->fl_pid); + if (lsp == NULL) + return -ENOMEM; + if (!(lsp->flags & NFS_LOCK_INITIALIZED)) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { .lock_owner = { .clientid = server->nfs4_state->cl_clientid, }, }; - status = -ENOMEM; - lsp = nfs4_alloc_lock_state(state, request->fl_owner); - if (!lsp) - goto out; + otl.lock_seqid = lsp->ls_seqid; otl.lock_owner.id = lsp->ls_id; memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); @@ -1926,25 +2626,60 @@ nfs4_proc_setlk(struct nfs4_state *state /* save the returned stateid. */ if (status == 0) { memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - nfs4_notify_setlk(inode, request, lsp); + if (!reclaim) + nfs4_notify_setlk(state, request, lsp); } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; nfs4_put_lock_state(lsp); -out: + return status; +} + +int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) +{ +#ifdef F_SETLK64 + return _nfs4_do_setlk(state, F_SETLK64, request, 1); +#else + return _nfs4_do_setlk(state, F_SETLK, request, 1); +#endif +} + +static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_client *clp = state->owner->so_client; + int status; + + down_read(&clp->cl_sem); + down(&state->lock_sema); + status = _nfs4_do_setlk(state, cmd, request, 0); up(&state->lock_sema); - return nfs4_map_errors(status); + up_read(&clp->cl_sem); + return status; +} + +static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(NFS_SERVER(state->inode), + _nfs4_proc_setlk(state, cmd, request), + &exception); + } while (exception.retry); + return err; } static int nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) { + struct nfs_open_context *ctx; struct nfs4_state *state; unsigned long timeout = NFS4_LOCK_MINTIMEOUT; int status; /* verify open state */ - state = (struct nfs4_state *)filp->private_data; - BUG_ON(!state); + ctx = (struct nfs_open_context *)filp->private_data; + state = ctx->state; if (request->fl_start < 0 || request->fl_end < 0) return -EINVAL; @@ -1975,6 +2710,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, @@ -2004,8 +2740,6 @@ struct nfs_rpc_ops nfs_v4_clientops = { .commit_setup = nfs4_proc_commit_setup, .file_open = nfs4_proc_file_open, .file_release = nfs4_proc_file_release, - .request_init = nfs4_request_init, - .request_compatible = nfs4_request_compatible, .lock = nfs4_proc_lock, }; --- linux-2.6.7/fs/nfs/callback.h.lsec 2005-03-23 14:28:22.484631512 -0700 +++ linux-2.6.7/fs/nfs/callback.h 2005-03-23 14:28:22.484631512 -0700 @@ -0,0 +1,70 @@ +/* + * linux/fs/nfs/callback.h + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback definitions + */ +#ifndef __LINUX_FS_NFS_CALLBACK_H +#define __LINUX_FS_NFS_CALLBACK_H + +#define NFS4_CALLBACK 0x40000000 +#define NFS4_CALLBACK_XDRSIZE 2048 +#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE) + +enum nfs4_callback_procnum { + CB_NULL = 0, + CB_COMPOUND = 1, +}; + +enum nfs4_callback_opnum { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + OP_CB_ILLEGAL = 10044, +}; + +struct cb_compound_hdr_arg { + int taglen; + const char *tag; + unsigned int callback_ident; + unsigned nops; +}; + +struct cb_compound_hdr_res { + uint32_t *status; + int taglen; + const char *tag; + uint32_t *nops; +}; + +struct cb_getattrargs { + struct sockaddr_in *addr; + struct nfs_fh fh; + uint32_t bitmap[2]; +}; + +struct cb_getattrres { + uint32_t status; + uint32_t bitmap[2]; + uint64_t size; + uint64_t change_attr; + struct timespec ctime; + struct timespec mtime; +}; + +struct cb_recallargs { + struct sockaddr_in *addr; + struct nfs_fh fh; + nfs4_stateid stateid; + uint32_t truncate; +}; + +extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); +extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); + +extern int nfs_callback_up(void); +extern int nfs_callback_down(void); + +extern unsigned short nfs_callback_tcpport; + +#endif /* __LINUX_FS_NFS_CALLBACK_H */ --- linux-2.6.7/fs/nfs/direct.c.lsec 2004-06-15 23:19:53.000000000 -0600 +++ linux-2.6.7/fs/nfs/direct.c 2005-03-23 14:28:22.702598376 -0700 @@ -110,7 +110,7 @@ nfs_free_user_pages(struct page **pages, * nfs_direct_read_seg - Read in one iov segment. Generate separate * read RPCs for each "rsize" bytes. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * user_addr: starting address of this segment of user's buffer * count: size of this segment * file_offset: offset in file to begin the operation @@ -118,7 +118,7 @@ nfs_free_user_pages(struct page **pages, * nr_pages: size of pages array */ static int -nfs_direct_read_seg(struct inode *inode, struct file *file, +nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { @@ -127,9 +127,10 @@ nfs_direct_read_seg(struct inode *inode, int curpage = 0; struct nfs_read_data rdata = { .inode = inode, + .cred = ctx->cred, .args = { .fh = NFS_FH(inode), - .lockowner = current->files, + .context = ctx, }, .res = { .fattr = &rdata.fattr, @@ -151,7 +152,7 @@ nfs_direct_read_seg(struct inode *inode, user_addr + tot_bytes, rdata.args.pgbase, curpage); lock_kernel(); - result = NFS_PROTO(inode)->read(&rdata, file); + result = NFS_PROTO(inode)->read(&rdata); unlock_kernel(); if (result <= 0) { @@ -183,7 +184,7 @@ nfs_direct_read_seg(struct inode *inode, * nfs_direct_read - For each iov segment, map the user's buffer * then generate read RPCs. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * @iov: array of vectors that define I/O buffer * file_offset: offset in file to begin the operation * nr_segs: size of iovec array @@ -193,7 +194,7 @@ nfs_direct_read_seg(struct inode *inode, * server. */ static ssize_t -nfs_direct_read(struct inode *inode, struct file *file, +nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { @@ -216,7 +217,7 @@ nfs_direct_read(struct inode *inode, str return page_count; } - result = nfs_direct_read_seg(inode, file, user_addr, size, + result = nfs_direct_read_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); nfs_free_user_pages(pages, page_count, 1); @@ -239,7 +240,7 @@ nfs_direct_read(struct inode *inode, str * nfs_direct_write_seg - Write out one iov segment. Generate separate * write RPCs for each "wsize" bytes, then commit. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * user_addr: starting address of this segment of user's buffer * count: size of this segment * file_offset: offset in file to begin the operation @@ -247,7 +248,7 @@ nfs_direct_read(struct inode *inode, str * nr_pages: size of pages array */ static int -nfs_direct_write_seg(struct inode *inode, struct file *file, +nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { @@ -257,9 +258,10 @@ nfs_direct_write_seg(struct inode *inode struct nfs_writeverf first_verf; struct nfs_write_data wdata = { .inode = inode, + .cred = ctx->cred, .args = { .fh = NFS_FH(inode), - .lockowner = current->files, + .context = ctx, }, .res = { .fattr = &wdata.fattr, @@ -290,7 +292,7 @@ retry: user_addr + tot_bytes, wdata.args.pgbase, curpage); lock_kernel(); - result = NFS_PROTO(inode)->write(&wdata, file); + result = NFS_PROTO(inode)->write(&wdata); unlock_kernel(); if (result <= 0) { @@ -325,7 +327,7 @@ retry: wdata.args.offset = file_offset; lock_kernel(); - result = NFS_PROTO(inode)->commit(&wdata, file); + result = NFS_PROTO(inode)->commit(&wdata); unlock_kernel(); if (result < 0 || memcmp(&first_verf.verifier, @@ -349,7 +351,7 @@ sync_retry: * nfs_direct_write - For each iov segment, map the user's buffer * then generate write and commit RPCs. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * @iov: array of vectors that define I/O buffer * file_offset: offset in file to begin the operation * nr_segs: size of iovec array @@ -358,8 +360,7 @@ sync_retry: * that non-direct readers might access, so they will pick up these * writes immediately. */ -static ssize_t -nfs_direct_write(struct inode *inode, struct file *file, +static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { @@ -382,7 +383,7 @@ nfs_direct_write(struct inode *inode, st return page_count; } - result = nfs_direct_write_seg(inode, file, user_addr, size, + result = nfs_direct_write_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); nfs_free_user_pages(pages, page_count, 0); @@ -414,6 +415,7 @@ nfs_direct_IO(int rw, struct kiocb *iocb { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx; struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; @@ -423,19 +425,20 @@ nfs_direct_IO(int rw, struct kiocb *iocb if (!is_sync_kiocb(iocb)) return result; + ctx = (struct nfs_open_context *)file->private_data; switch (rw) { case READ: dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n", dentry->d_name.name, file_offset, nr_segs); - result = nfs_direct_read(inode, file, iov, + result = nfs_direct_read(inode, ctx, iov, file_offset, nr_segs); break; case WRITE: dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n", dentry->d_name.name, file_offset, nr_segs); - result = nfs_direct_write(inode, file, iov, + result = nfs_direct_write(inode, ctx, iov, file_offset, nr_segs); break; default: @@ -471,6 +474,8 @@ nfs_file_direct_read(struct kiocb *iocb, ssize_t retval = -EINVAL; loff_t *ppos = &iocb->ki_pos; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = + (struct nfs_open_context *) file->private_data; struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; @@ -502,7 +507,7 @@ nfs_file_direct_read(struct kiocb *iocb, goto out; } - retval = nfs_direct_read(inode, file, &iov, pos, 1); + retval = nfs_direct_read(inode, ctx, &iov, pos, 1); if (retval > 0) *ppos = pos + retval; @@ -542,6 +547,8 @@ nfs_file_direct_write(struct kiocb *iocb loff_t *ppos = &iocb->ki_pos; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = + (struct nfs_open_context *) file->private_data; struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; @@ -589,7 +596,7 @@ nfs_file_direct_write(struct kiocb *iocb goto out; } - retval = nfs_direct_write(inode, file, &iov, pos, 1); + retval = nfs_direct_write(inode, ctx, &iov, pos, 1); if (mapping->nrpages) invalidate_inode_pages2(mapping); if (retval > 0) --- linux-2.6.7/fs/nfs/nfs4state.c.lsec 2004-06-15 23:18:47.000000000 -0600 +++ linux-2.6.7/fs/nfs/nfs4state.c 2005-03-23 14:28:22.939562352 -0700 @@ -40,11 +40,15 @@ #include #include +#include #include #include #include #include +#include "callback.h" +#include "delegation.h" + #define OPENOWNER_POOL_SIZE 8 static spinlock_t state_spinlock = SPIN_LOCK_UNLOCKED; @@ -93,21 +97,26 @@ nfs4_alloc_client(struct in_addr *addr) { struct nfs4_client *clp; - if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL))) { - memset(clp, 0, sizeof(*clp)); - memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); - init_rwsem(&clp->cl_sem); - INIT_LIST_HEAD(&clp->cl_state_owners); - INIT_LIST_HEAD(&clp->cl_unused); - spin_lock_init(&clp->cl_lock); - atomic_set(&clp->cl_count, 1); - INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); - INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); - INIT_LIST_HEAD(&clp->cl_superblocks); - init_waitqueue_head(&clp->cl_waitq); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); - clp->cl_state = 1 << NFS4CLNT_NEW; + if (nfs_callback_up() < 0) + return NULL; + if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { + nfs_callback_down(); + return NULL; } + memset(clp, 0, sizeof(*clp)); + memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); + init_rwsem(&clp->cl_sem); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_state_owners); + INIT_LIST_HEAD(&clp->cl_unused); + spin_lock_init(&clp->cl_lock); + atomic_set(&clp->cl_count, 1); + INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + INIT_LIST_HEAD(&clp->cl_superblocks); + init_waitqueue_head(&clp->cl_waitq); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_state = 1 << NFS4CLNT_OK; return clp; } @@ -130,25 +139,52 @@ nfs4_free_client(struct nfs4_client *clp if (clp->cl_rpcclient) rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); + nfs_callback_down(); +} + +static struct nfs4_client *__nfs4_find_client(struct in_addr *addr) +{ + struct nfs4_client *clp; + list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { + if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) { + atomic_inc(&clp->cl_count); + return clp; + } + } + return NULL; +} + +struct nfs4_client *nfs4_find_client(struct in_addr *addr) +{ + struct nfs4_client *clp; + spin_lock(&state_spinlock); + clp = __nfs4_find_client(addr); + spin_unlock(&state_spinlock); + return clp; } struct nfs4_client * nfs4_get_client(struct in_addr *addr) { - struct nfs4_client *new, *clp = NULL; + struct nfs4_client *clp, *new = NULL; - new = nfs4_alloc_client(addr); spin_lock(&state_spinlock); - list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { - if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) - goto found; + for (;;) { + clp = __nfs4_find_client(addr); + if (clp != NULL) + break; + clp = new; + if (clp != NULL) { + list_add(&clp->cl_servers, &nfs4_clientid_list); + new = NULL; + break; + } + spin_unlock(&state_spinlock); + new = nfs4_alloc_client(addr); + spin_lock(&state_spinlock); + if (new == NULL) + break; } - if (new) - list_add(&new->cl_servers, &nfs4_clientid_list); - spin_unlock(&state_spinlock); - return new; -found: - atomic_inc(&clp->cl_count); spin_unlock(&state_spinlock); if (new) nfs4_free_client(new); @@ -169,6 +205,16 @@ nfs4_put_client(struct nfs4_client *clp) nfs4_free_client(clp); } +int nfs4_init_client(struct nfs4_client *clp) +{ + int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport); + if (status == 0) + status = nfs4_proc_setclientid_confirm(clp); + if (status == 0) + nfs4_schedule_state_renewal(clp); + return status; +} + u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { @@ -185,7 +231,6 @@ nfs4_client_grab_unused(struct nfs4_clie atomic_inc(&sp->so_count); sp->so_cred = cred; list_move(&sp->so_list, &clp->cl_state_owners); - sp->so_generation = clp->cl_generation; clp->cl_nunused--; } return sp; @@ -224,6 +269,7 @@ nfs4_alloc_state_owner(void) init_MUTEX(&sp->so_sema); sp->so_seqid = 0; /* arbitrary */ INIT_LIST_HEAD(&sp->so_states); + INIT_LIST_HEAD(&sp->so_delegations); atomic_set(&sp->so_count, 1); return sp; } @@ -237,8 +283,11 @@ nfs4_unhash_state_owner(struct nfs4_stat spin_unlock(&clp->cl_lock); } -struct nfs4_state_owner * -nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) +/* + * Note: must be called with clp->cl_sem held in order to prevent races + * with reboot recovery! + */ +struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { struct nfs4_client *clp = server->nfs4_state; struct nfs4_state_owner *sp, *new; @@ -254,23 +303,23 @@ nfs4_get_state_owner(struct nfs_server * new->so_client = clp; new->so_id = nfs4_alloc_lockowner_id(clp); new->so_cred = cred; - new->so_generation = clp->cl_generation; sp = new; new = NULL; } spin_unlock(&clp->cl_lock); if (new) kfree(new); - if (sp) { - if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) - nfs4_wait_clnt_recover(server->client, clp); - } else - put_rpccred(cred); - return sp; + if (sp != NULL) + return sp; + put_rpccred(cred); + return NULL; } -void -nfs4_put_state_owner(struct nfs4_state_owner *sp) +/* + * Must be called with clp->cl_sem held in order to avoid races + * with state recovery... + */ +void nfs4_put_state_owner(struct nfs4_state_owner *sp) { struct nfs4_client *clp = sp->so_client; struct rpc_cred *cred = sp->so_cred; @@ -330,8 +379,6 @@ __nfs4_find_state(struct inode *inode, s continue; if ((state->state & mode) != mode) continue; - /* Add the state to the head of the inode's list */ - list_move(&state->inode_states, &nfsi->open_states); atomic_inc(&state->count); if (mode & FMODE_READ) state->nreaders++; @@ -353,8 +400,6 @@ __nfs4_find_state_byowner(struct inode * if (state->nreaders == 0 && state->nwriters == 0) continue; if (state->owner == owner) { - /* Add the state to the head of the inode's list */ - list_move(&state->inode_states, &nfsi->open_states); atomic_inc(&state->count); return state; } @@ -411,51 +456,40 @@ out: return state; } -static void -__nfs4_put_open_state(struct nfs4_state *state) +/* + * Beware! Caller must be holding exactly one + * reference to clp->cl_sem and owner->so_sema! + */ +void nfs4_put_open_state(struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - int status = 0; - if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) { - up(&owner->so_sema); + if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) return; - } if (!list_empty(&state->inode_states)) list_del(&state->inode_states); spin_unlock(&inode->i_lock); list_del(&state->open_states); - if (state->state != 0) { - do { - status = nfs4_do_close(inode, state); - if (!status) - break; - up(&owner->so_sema); - status = nfs4_handle_error(NFS_SERVER(inode), status); - down(&owner->so_sema); - } while (!status); - } - up(&owner->so_sema); + BUG_ON (state->state != 0); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } -void -nfs4_put_open_state(struct nfs4_state *state) -{ - down(&state->owner->so_sema); - __nfs4_put_open_state(state); -} - -void -nfs4_close_state(struct nfs4_state *state, mode_t mode) +/* + * Beware! Caller must be holding no references to clp->cl_sem! + * of owner->so_sema! + */ +void nfs4_close_state(struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; + struct nfs4_client *clp = owner->so_client; int newstate; int status = 0; + atomic_inc(&owner->so_count); + down_read(&clp->cl_sem); down(&owner->so_sema); /* Protect against nfs4_find_state() */ spin_lock(&inode->i_lock); @@ -466,29 +500,24 @@ nfs4_close_state(struct nfs4_state *stat if (state->nwriters == 0 && state->nreaders == 0) list_del_init(&state->inode_states); spin_unlock(&inode->i_lock); - do { - newstate = 0; - if (state->state == 0) - break; + newstate = 0; + if (state->state != 0) { if (state->nreaders) newstate |= FMODE_READ; if (state->nwriters) newstate |= FMODE_WRITE; if (state->state == newstate) - break; + goto out; if (newstate != 0) status = nfs4_do_downgrade(inode, state, newstate); else status = nfs4_do_close(inode, state); - if (!status) { - state->state = newstate; - break; - } - up(&owner->so_sema); - status = nfs4_handle_error(NFS_SERVER(inode), status); - down(&owner->so_sema); - } while (!status); - __nfs4_put_open_state(state); + } +out: + nfs4_put_open_state(state); + up(&owner->so_sema); + nfs4_put_state_owner(owner); + up_read(&clp->cl_sem); } /* @@ -496,11 +525,11 @@ nfs4_close_state(struct nfs4_state *stat * that is compatible with current->files */ static struct nfs4_lock_state * -__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +__nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid) { struct nfs4_lock_state *pos; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (pos->ls_owner != fl_owner) + if (pos->ls_pid != pid) continue; atomic_inc(&pos->ls_count); return pos; @@ -509,23 +538,16 @@ __nfs4_find_lock_state(struct nfs4_state } struct nfs4_lock_state * -nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid) { struct nfs4_lock_state *lsp; read_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); + lsp = __nfs4_find_lock_state(state, pid); read_unlock(&state->state_lock); return lsp; } -/* - * Return a compatible lock_state. If no initialized lock_state structure - * exists, return an uninitialized one. - * - * The caller must be holding state->lock_sema - */ -struct nfs4_lock_state * -nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, unsigned int pid) { struct nfs4_lock_state *lsp; struct nfs4_client *clp = state->owner->so_client; @@ -533,12 +555,12 @@ nfs4_alloc_lock_state(struct nfs4_state lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) return NULL; + lsp->flags = 0; lsp->ls_seqid = 0; /* arbitrary */ lsp->ls_id = -1; memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); atomic_set(&lsp->ls_count, 1); - lsp->ls_owner = fl_owner; - lsp->ls_parent = state; + lsp->ls_pid = pid; INIT_LIST_HEAD(&lsp->ls_locks); spin_lock(&clp->cl_lock); lsp->ls_id = nfs4_alloc_lockowner_id(clp); @@ -547,16 +569,32 @@ nfs4_alloc_lock_state(struct nfs4_state } /* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + * The caller must be holding state->lock_sema and clp->cl_sem + */ +struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, unsigned int pid) +{ + struct nfs4_lock_state * lsp; + + lsp = nfs4_find_lock_state(state, pid); + if (lsp == NULL) + lsp = nfs4_alloc_lock_state(state, pid); + return lsp; +} + +/* * Byte-range lock aware utility to initialize the stateid of read/write * requests. */ void -nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, unsigned int pid) { if (test_bit(LK_STATE_IN_USE, &state->flags)) { struct nfs4_lock_state *lsp; - lsp = nfs4_find_lock_state(state, fl_owner); + lsp = nfs4_find_lock_state(state, pid); if (lsp) { memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); nfs4_put_lock_state(lsp); @@ -567,13 +605,14 @@ nfs4_copy_stateid(nfs4_stateid *dst, str } /* -* Called with state->lock_sema held. +* Called with state->lock_sema and clp->cl_sem held. */ -void -nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) { - if (status == NFS_OK || seqid_mutating_err(-status)) + if (status == NFS_OK || seqid_mutating_err(-status)) { lsp->ls_seqid++; + lsp->flags |= NFS_LOCK_INITIALIZED; + } } /* @@ -598,12 +637,11 @@ nfs4_check_unlock(struct file_lock *fl, * Post an initialized lock_state on the state->lock_states list. */ void -nfs4_notify_setlk(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) { - struct nfs4_state *state = lsp->ls_parent; - if (!list_empty(&lsp->ls_locks)) return; + atomic_inc(&lsp->ls_count); write_lock(&state->state_lock); list_add(&lsp->ls_locks, &state->lock_states); set_bit(LK_STATE_IN_USE, &state->flags); @@ -620,15 +658,15 @@ nfs4_notify_setlk(struct inode *inode, s * */ void -nfs4_notify_unlck(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) { - struct nfs4_state *state = lsp->ls_parent; + struct inode *inode = state->inode; struct file_lock *fl; for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & FL_POSIX)) continue; - if (fl->fl_owner != lsp->ls_owner) + if (fl->fl_pid != lsp->ls_pid) continue; /* Exit if we find at least one lock which is not consumed */ if (nfs4_check_unlock(fl,request) == 0) @@ -640,6 +678,7 @@ nfs4_notify_unlck(struct inode *inode, s if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); write_unlock(&state->state_lock); + nfs4_put_lock_state(lsp); } /* @@ -651,20 +690,18 @@ nfs4_put_lock_state(struct nfs4_lock_sta { if (!atomic_dec_and_test(&lsp->ls_count)) return; - if (!list_empty(&lsp->ls_locks)) - return; + BUG_ON (!list_empty(&lsp->ls_locks)); kfree(lsp); } /* -* Called with sp->so_sema held. +* Called with sp->so_sema and clp->cl_sem held. * * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() */ -void -nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) +void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) { if (status == NFS_OK || seqid_mutating_err(-status)) sp->so_seqid++; @@ -693,21 +730,14 @@ nfs4_recover_state(void *data) init_completion(&args.complete); - down_read(&clp->cl_sem); - if (test_and_set_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) - goto out_failed; if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) goto out_failed_clear; wait_for_completion(&args.complete); return; out_failed_clear: - smp_mb__before_clear_bit(); - clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); - smp_mb__after_clear_bit(); + set_bit(NFS4CLNT_OK, &clp->cl_state); wake_up_all(&clp->cl_waitq); rpc_wake_up(&clp->cl_rpcwaitq); -out_failed: - up_read(&clp->cl_sem); } /* @@ -718,24 +748,66 @@ nfs4_schedule_state_recovery(struct nfs4 { if (!clp) return; - smp_mb__before_clear_bit(); - clear_bit(NFS4CLNT_OK, &clp->cl_state); - smp_mb__after_clear_bit(); - schedule_work(&clp->cl_recoverd); + if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state)) + schedule_work(&clp->cl_recoverd); } -static int -nfs4_reclaim_open_state(struct nfs4_state_owner *sp) +static int nfs4_reclaim_locks(struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct file_lock *fl; + int status = 0; + + for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) + continue; + status = nfs4_lock_reclaim(state, fl); + if (status >= 0) + continue; + switch (status) { + default: + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __FUNCTION__, status); + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + break; + case -NFS4ERR_STALE_CLIENTID: + goto out_err; + } + } + return 0; +out_err: + return status; +} + +static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp) { struct nfs4_state *state; + struct nfs4_lock_state *lock; int status = 0; list_for_each_entry(state, &sp->so_states, open_states) { if (state->state == 0) continue; status = nfs4_open_reclaim(sp, state); - if (status >= 0) + list_for_each_entry(lock, &state->lock_states, ls_locks) + lock->flags &= ~NFS_LOCK_INITIALIZED; + if (status >= 0) { + status = nfs4_reclaim_locks(state); + if (status < 0) + goto out_err; + list_for_each_entry(lock, &state->lock_states, ls_locks) { + if (!(lock->flags & NFS_LOCK_INITIALIZED)) + printk("%s: Lock reclaim failed!\n", + __FUNCTION__); + } continue; + } switch (status) { default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", @@ -762,75 +834,55 @@ out_err: return status; } -static int -reclaimer(void *ptr) +static int reclaimer(void *ptr) { struct reclaimer_args *args = (struct reclaimer_args *)ptr; struct nfs4_client *clp = args->clp; struct nfs4_state_owner *sp; - int generation; int status; daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); allow_signal(SIGKILL); + atomic_inc(&clp->cl_count); complete(&args->complete); + /* Ensure exclusive access to NFSv4 state */ + lock_kernel(); + down_write(&clp->cl_sem); /* Are there any NFS mounts out there? */ if (list_empty(&clp->cl_superblocks)) goto out; - if (!test_bit(NFS4CLNT_NEW, &clp->cl_state)) { - status = nfs4_proc_renew(clp); - if (status == 0) { - set_bit(NFS4CLNT_OK, &clp->cl_state); - goto out; - } - } - status = nfs4_proc_setclientid(clp, 0, 0); - if (status) - goto out_error; - status = nfs4_proc_setclientid_confirm(clp); +restart_loop: + status = nfs4_proc_renew(clp); + if (status == 0) + goto out; + status = nfs4_init_client(clp); if (status) goto out_error; - generation = ++(clp->cl_generation); - clear_bit(NFS4CLNT_NEW, &clp->cl_state); - set_bit(NFS4CLNT_OK, &clp->cl_state); - up_read(&clp->cl_sem); - nfs4_schedule_state_renewal(clp); -restart_loop: - spin_lock(&clp->cl_lock); + /* Mark all delagations for reclaim */ + nfs_delegation_mark_reclaim(clp); + /* Note: list is protected by exclusive lock on cl->cl_sem */ list_for_each_entry(sp, &clp->cl_state_owners, so_list) { - if (sp->so_generation - generation >= 0) - continue; - atomic_inc(&sp->so_count); - spin_unlock(&clp->cl_lock); - down(&sp->so_sema); - if (sp->so_generation - generation < 0) { - smp_rmb(); - sp->so_generation = clp->cl_generation; - status = nfs4_reclaim_open_state(sp); - } - up(&sp->so_sema); - nfs4_put_state_owner(sp); + status = nfs4_reclaim_open_state(sp); if (status < 0) { if (status == -NFS4ERR_STALE_CLIENTID) - nfs4_schedule_state_recovery(clp); - goto out; + goto restart_loop; + goto out_error; } - goto restart_loop; } - spin_unlock(&clp->cl_lock); + nfs_delegation_reap_unclaimed(clp); out: - smp_mb__before_clear_bit(); - clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); - smp_mb__after_clear_bit(); + set_bit(NFS4CLNT_OK, &clp->cl_state); + up_write(&clp->cl_sem); + unlock_kernel(); wake_up_all(&clp->cl_waitq); rpc_wake_up(&clp->cl_rpcwaitq); + nfs4_put_client(clp); return 0; out_error: - printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n", - NIPQUAD(clp->cl_addr.s_addr)); - up_read(&clp->cl_sem); + printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", + NIPQUAD(clp->cl_addr.s_addr), -status); goto out; } --- linux-2.6.7/fs/nfs/inode.c.lsec 2004-06-15 23:19:44.000000000 -0600 +++ linux-2.6.7/fs/nfs/inode.c 2005-03-23 14:28:22.818580744 -0700 @@ -39,6 +39,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 @@ -123,8 +125,9 @@ nfs_delete_inode(struct inode * inode) { dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_wb_all(inode); /* - * The following can never actually happen... + * The following should never happen... */ if (nfs_have_writebacks(inode)) { printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); @@ -133,18 +136,15 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - struct rpc_cred *cred = nfsi->mm_cred; + struct rpc_cred *cred; - if (cred) - put_rpccred(cred); + nfs4_zap_acl_attr(inode); + nfs_wb_all(inode); + BUG_ON (!list_empty(&nfsi->open_files)); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -704,7 +704,7 @@ nfs_fhget(struct super_block *sb, struct /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -859,53 +859,114 @@ int nfs_getattr(struct vfsmount *mnt, st return err; } +struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) +{ + struct nfs_open_context *ctx; + + ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx != NULL) { + atomic_set(&ctx->count, 1); + ctx->dentry = dget(dentry); + ctx->cred = get_rpccred(cred); + ctx->state = NULL; + ctx->pid = current->tgid; + ctx->error = 0; + init_waitqueue_head(&ctx->waitq); + } + return ctx; +} + +struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) +{ + if (ctx != NULL) + atomic_inc(&ctx->count); + return ctx; +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + if (atomic_dec_and_test(&ctx->count)) { + if (ctx->state != NULL) + nfs4_close_state(ctx->state, ctx->mode); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + dput(ctx->dentry); + kfree(ctx); + } +} + /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -void -nfs_set_mmcred(struct inode *inode, struct rpc_cred *cred) +void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + filp->private_data = get_nfs_open_context(ctx); + spin_lock(&inode->i_lock); + list_add(&ctx->list, &nfsi->open_files); + spin_unlock(&inode->i_lock); +} + +struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *pos, *ctx = NULL; + + spin_lock(&inode->i_lock); + list_for_each_entry(pos, &nfsi->open_files, list) { + if ((pos->mode & mode) == mode) { + ctx = get_nfs_open_context(pos); + break; + } + } + spin_unlock(&inode->i_lock); + return ctx; +} + +void nfs_file_clear_open_context(struct file *filp) { - struct rpc_cred **p = &NFS_I(inode)->mm_cred, - *oldcred = *p; + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; - *p = get_rpccred(cred); - if (oldcred) - put_rpccred(oldcred); + if (ctx) { + filp->private_data = NULL; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + put_nfs_open_context(ctx); + } } /* - * These are probably going to contain hooks for - * allocating and releasing RPC credentials for - * the file. I'll have to think about Tronds patch - * a bit more.. + * These allocate and release file read/write context information. */ int nfs_open(struct inode *inode, struct file *filp) { - struct rpc_auth *auth; + struct nfs_open_context *ctx; struct rpc_cred *cred; - auth = NFS_CLIENT(inode)->cl_auth; - cred = rpcauth_lookupcred(auth, 0); - filp->private_data = cred; - if ((filp->f_mode & FMODE_WRITE) != 0) { - nfs_set_mmcred(inode, cred); + if ((cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0)) == NULL) + return -ENOMEM; + ctx = alloc_nfs_open_context(filp->f_dentry, cred); + put_rpccred(cred); + if (ctx == NULL) + return -ENOMEM; + ctx->mode = filp->f_mode; + nfs_file_set_open_context(filp, ctx); + put_nfs_open_context(ctx); + if ((filp->f_mode & FMODE_WRITE) != 0) nfs_begin_data_update(inode); - } return 0; } int nfs_release(struct inode *inode, struct file *filp) { - struct rpc_cred *cred; - - lock_kernel(); if ((filp->f_mode & FMODE_WRITE) != 0) nfs_end_data_update(inode); - cred = nfs_file_cred(filp); - if (cred) - put_rpccred(cred); - unlock_kernel(); + nfs_file_clear_open_context(filp); return 0; } @@ -1002,6 +1063,30 @@ out: return status; } +int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfs_have_delegation(inode, FMODE_READ)) + return 0; + return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); +} + +/** + * nfs_revalidate_inode - Revalidate the inode attributes + * @server - pointer to nfs_server struct + * @inode - pointer to inode struct + * + * Updates inode attribute information by retrieving the data from the server. + */ +int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +{ + if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + && !nfs_attribute_timeout(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return __nfs_revalidate_inode(server, inode); +} + /** * nfs_begin_data_update * @inode - pointer to inode @@ -1023,11 +1108,13 @@ void nfs_end_data_update(struct inode *i { struct nfs_inode *nfsi = NFS_I(inode); - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; + if (!nfs_have_delegation(inode, FMODE_READ)) { + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); } @@ -1068,6 +1155,10 @@ int nfs_refresh_inode(struct inode *inod loff_t cur_size, new_isize; int data_unstable; + /* Do we hold a delegation? */ + if (nfs_have_delegation(inode, FMODE_READ)) + return 0; + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); @@ -1240,6 +1331,7 @@ static int nfs_update_inode(struct inode inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; inode->i_gid = fattr->gid; + nfs4_zap_acl_attr(inode); if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* @@ -1265,7 +1357,8 @@ static int nfs_update_inode(struct inode if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - nfsi->flags |= invalid; + if (!nfs_have_delegation(inode, FMODE_READ)) + nfsi->flags |= invalid; return 0; out_changed: @@ -1400,6 +1493,52 @@ static struct file_system_type nfs_fs_ty #ifdef CONFIG_NFS_V4 +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + +int +nfs_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EINVAL; + + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + + return nfs4_proc_set_acl(inode, buf, buflen); +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t +nfs_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); +} + +ssize_t +nfs_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + ssize_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; +} + static void nfs4_clear_inode(struct inode *); static struct super_operations nfs4_sops = { @@ -1423,6 +1562,12 @@ static void nfs4_clear_inode(struct inod { struct nfs_inode *nfsi = NFS_I(inode); + /* If we are holding a delegation, return it! */ + if (nfsi->delegation != NULL) + nfs_inode_return_delegation(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); + /* Now clear out any remaining state */ while (!list_empty(&nfsi->open_states)) { struct nfs4_state *state; @@ -1437,8 +1582,6 @@ static void nfs4_clear_inode(struct inod BUG_ON(atomic_read(&state->count) != 1); nfs4_close_state(state, state->state); } - /* Now call standard NFS clear_inode() code */ - nfs_clear_inode(inode); } @@ -1536,8 +1679,19 @@ static int nfs4_fill_super(struct super_ memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); nfs_idmap_new(clp); } - if (list_empty(&clp->cl_superblocks)) - clear_bit(NFS4CLNT_OK, &clp->cl_state); + /* Fire up rpciod if not yet running */ + if (rpciod_up() != 0) { + printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); + goto out_fail; + } + + if (list_empty(&clp->cl_superblocks)) { + err = nfs4_init_client(clp); + if (err != 0) { + up_write(&clp->cl_sem); + goto out_rpciod; + } + } list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); if (!IS_ERR(clnt)) @@ -1567,17 +1721,10 @@ static int nfs4_fill_super(struct super_ } } - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - goto out_shutdown; - } - sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; - rpciod_down(); out_shutdown: rpc_shutdown_client(server->client); out_remove_list: @@ -1585,6 +1732,8 @@ out_remove_list: list_del_init(&server->nfs4_siblings); up_write(&server->nfs4_state->cl_sem); destroy_nfsv4_state(server); +out_rpciod: + rpciod_down(); out_fail: if (clp) nfs4_put_client(clp); @@ -1709,22 +1858,31 @@ out_free: return s; } +static void nfs4_kill_super(struct super_block *sb) +{ + nfs_return_all_delegations(sb); + nfs_kill_super(sb); +} + static struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .get_sb = nfs4_get_sb, - .kill_sb = nfs_kill_super, + .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -#define nfs4_zero_state(nfsi) \ +#define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ + nfsi->delegation = NULL; \ + nfsi->delegation_state = 0; \ + init_rwsem(&nfsi->rwsem); \ } while(0) #define register_nfs4fs() register_filesystem(&nfs4_fs_type) #define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) #else -#define nfs4_zero_state(nfsi) \ +#define nfs4_init_once(nfsi) \ do { } while (0) #define register_nfs4fs() (0) #define unregister_nfs4fs() @@ -1746,8 +1904,8 @@ static struct inode *nfs_alloc_inode(str if (!nfsi) return NULL; nfsi->flags = 0; - nfsi->mm_cred = NULL; - nfs4_zero_state(nfsi); + nfsi->acl_len = 0; + nfsi->acl = NULL; return &nfsi->vfs_inode; } @@ -1765,12 +1923,14 @@ static void init_once(void * foo, kmem_c inode_init_once(&nfsi->vfs_inode); INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); + INIT_LIST_HEAD(&nfsi->open_files); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; init_waitqueue_head(&nfsi->nfs_i_wait); + nfs4_init_once(nfsi); } } --- linux-2.6.7/fs/nfs/dir.c.lsec 2004-06-15 23:19:23.000000000 -0600 +++ linux-2.6.7/fs/nfs/dir.c 2005-03-23 14:28:22.701598528 -0700 @@ -32,6 +32,8 @@ #include #include +#include "delegation.h" + #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ @@ -88,6 +90,9 @@ struct inode_operations nfs4_dir_inode_o .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs_getxattr, + .setxattr = nfs_setxattr, + .listxattr = nfs_listxattr, }; #endif /* CONFIG_NFS_V4 */ @@ -850,22 +855,22 @@ static int nfs_open_revalidate(struct de unsigned long verifier; int openflags, ret = 0; - /* NFS only supports OPEN for regular files */ - if (inode && !S_ISREG(inode->i_mode)) - goto no_open; parent = dget_parent(dentry); dir = parent->d_inode; if (!is_atomic_open(dir, nd)) goto no_open; + /* We can't create new files in nfs_open_revalidate(), so we + * optimize away revalidation of negative dentries. + */ + if (inode == NULL) + goto out; + /* NFS only supports OPEN on regular files */ + if (!S_ISREG(inode->i_mode)) + goto no_open; openflags = nd->intent.open.flags; - if (openflags & O_CREAT) { - /* If this is a negative dentry, just drop it */ - if (!inode) - goto out; - /* If this is exclusive open, just revalidate */ - if (openflags & O_EXCL) - goto no_open; - } + /* We cannot do exclusive creation on a positive dentry */ + if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + goto no_open; /* We can't create new files, or truncate existing ones here */ openflags &= ~(O_CREAT|O_TRUNC); @@ -887,6 +892,8 @@ out: return ret; no_open: dput(parent); + if (inode != NULL && nfs_have_delegation(inode, FMODE_READ)) + return 1; return nfs_lookup_revalidate(dentry, nd); } #endif /* CONFIG_NFSV4 */ @@ -1299,19 +1306,6 @@ nfs_symlink(struct inode *dir, struct de dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name, symname); - error = -ENAMETOOLONG; - switch (NFS_PROTO(dir)->version) { - case 2: - if (strlen(symname) > NFS2_MAXPATHLEN) - goto out; - break; - case 3: - if (strlen(symname) > NFS3_MAXPATHLEN) - goto out; - default: - break; - } - #ifdef NFS_PARANOIA if (dentry->d_inode) printk("nfs_proc_symlink: %s/%s not negative!\n", @@ -1341,8 +1335,6 @@ dentry->d_parent->d_name.name, dentry->d d_drop(dentry); } unlock_kernel(); - -out: return error; } @@ -1498,10 +1490,56 @@ out: return error; } -int -nfs_permission(struct inode *inode, int mask, struct nameidata *nd) +int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + + if (cache->cred != cred + || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) + || (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) + return -ENOENT; + memcpy(res, cache, sizeof(*res)); + return 0; +} + +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +{ + struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + + if (cache->cred != set->cred) { + if (cache->cred) + put_rpccred(cache->cred); + cache->cred = get_rpccred(set->cred); + } + cache->jiffies = set->jiffies; + cache->mask = set->mask; +} + +static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) +{ + struct nfs_access_entry cache; + int status; + + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = NFS_PROTO(inode)->access(inode, &cache); + if (status != 0) + return status; + nfs_access_add_cache(inode, &cache); +out: + if ((cache.mask & mask) == mask) + return 0; + return -EACCES; +} + +int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { - struct nfs_access_cache *cache = &NFS_I(inode)->cache_access; struct rpc_cred *cred; int mode = inode->i_mode; int res; @@ -1542,24 +1580,7 @@ nfs_permission(struct inode *inode, int goto out_notsup; cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); - if (cache->cred == cred - && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { - if (!(res = cache->err)) { - /* Is the mask a subset of an accepted mask? */ - if ((cache->mask & mask) == mask) - goto out; - } else { - /* ...or is it a superset of a rejected mask? */ - if ((cache->mask & mask) == cache->mask) - goto out; - } - } - - res = NFS_PROTO(inode)->access(inode, cred, mask); - if (!res || res == -EACCES) - goto add_cache; -out: + res = nfs_do_access(inode, cred, mask); put_rpccred(cred); unlock_kernel(); return res; @@ -1568,15 +1589,6 @@ out_notsup: res = vfs_permission(inode, mask); unlock_kernel(); return res; -add_cache: - cache->jiffies = jiffies; - if (cache->cred) - put_rpccred(cache->cred); - cache->cred = cred; - cache->mask = mask; - cache->err = res; - unlock_kernel(); - return res; } /* --- linux-2.6.7/fs/nfs/unlink.c.lsec 2004-06-15 23:20:04.000000000 -0600 +++ linux-2.6.7/fs/nfs/unlink.c 2005-03-23 14:28:23.170527240 -0700 @@ -215,7 +215,6 @@ nfs_complete_unlink(struct dentry *dentr spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; spin_unlock(&dentry->d_lock); - if (data->task.tk_rpcwait == &nfs_delete_queue) - rpc_wake_up_task(&data->task); + rpc_wake_up_task(&data->task); nfs_put_unlinkdata(data); } --- linux-2.6.7/fs/nfs/callback_xdr.c.lsec 2005-03-23 14:28:22.545622240 -0700 +++ linux-2.6.7/fs/nfs/callback_xdr.c 2005-03-23 14:28:22.544622392 -0700 @@ -0,0 +1,481 @@ +/* + * linux/fs/nfs/callback_xdr.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback encode/decode procedures + */ +#include +#include +#include +#include +#include +#include "callback.h" + +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) +#define CB_OP_GETATTR_BITMAP_MAXSZ (4) +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + 2 + 2 + 3 + 3) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +typedef unsigned (*callback_process_op_t)(void *, void *); +typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); +typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); + + +struct callback_op { + callback_process_op_t process_op; + callback_decode_arg_t decode_args; + callback_encode_res_t encode_res; + long res_maxsize; +}; + +static struct callback_op callback_ops[]; + +static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return htonl(NFS4_OK); +} + +static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy) +{ + return xdr_argsize_check(rqstp, p); +} + +static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes) +{ + uint32_t *p; + + p = xdr_inline_decode(xdr, nbytes); + if (unlikely(p == NULL)) + printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); + return p; +} + +static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str) +{ + uint32_t *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *len = ntohl(*p); + + if (*len != 0) { + p = read_buf(xdr, *len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *str = (const char *)p; + } else + *str = NULL; + + return 0; +} + +static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + uint32_t *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + fh->size = ntohl(*p); + if (fh->size > NFS4_FHSIZE) + return htonl(NFS4ERR_BADHANDLE); + p = read_buf(xdr, fh->size); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(&fh->data[0], p, fh->size); + memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size); + return 0; +} + +static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + uint32_t *p; + unsigned int attrlen; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + attrlen = ntohl(*p); + p = read_buf(xdr, attrlen << 2); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + if (likely(attrlen > 0)) + bitmap[0] = ntohl(*p++); + if (attrlen > 1) + bitmap[1] = ntohl(*p); + return 0; +} + +static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + uint32_t *p; + + p = read_buf(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(stateid->data, p, 16); + return 0; +} + +static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) +{ + uint32_t *p; + unsigned int minor_version; + unsigned status; + + status = decode_string(xdr, &hdr->taglen, &hdr->tag); + if (unlikely(status != 0)) + return status; + /* We do not like overly long tags! */ + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) { + printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", + __FUNCTION__, hdr->taglen); + return htonl(NFS4ERR_RESOURCE); + } + p = read_buf(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + minor_version = ntohl(*p++); + /* Check minor version is zero. */ + if (minor_version != 0) { + printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n", + __FUNCTION__, minor_version); + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); + } + hdr->callback_ident = ntohl(*p++); + hdr->nops = ntohl(*p); + return 0; +} + +static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op) +{ + uint32_t *p; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *op = ntohl(*p); + return 0; +} + +static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args) +{ + unsigned status; + + status = decode_fh(xdr, &args->fh); + if (unlikely(status != 0)) + goto out; + args->addr = &rqstp->rq_addr; + status = decode_bitmap(xdr, args->bitmap); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return status; +} + +static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args) +{ + uint32_t *p; + unsigned status; + + args->addr = &rqstp->rq_addr; + status = decode_stateid(xdr, &args->stateid); + if (unlikely(status != 0)) + goto out; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_RESOURCE); + goto out; + } + args->truncate = ntohl(*p); + status = decode_fh(xdr, &args->fh); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return 0; +} + +static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 4 + len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + xdr_encode_opaque(p, str, len); + return 0; +} + +#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) +#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) +static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep) +{ + uint32_t bm[2]; + uint32_t *p; + + bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0); + bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1); + if (bm[1] != 0) { + p = xdr_reserve_space(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(2); + *p++ = bm[0]; + *p++ = bm[1]; + } else if (bm[0] != 0) { + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(1); + *p++ = bm[0]; + } else { + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(0); + } + *savep = p; + return 0; +} + +static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change) +{ + uint32_t *p; + + if (!(bitmap[0] & FATTR4_WORD0_CHANGE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, change); + return 0; +} + +static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size) +{ + uint32_t *p; + + if (!(bitmap[0] & FATTR4_WORD0_SIZE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, size); + return 0; +} + +static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, time->tv_sec); + *p = htonl(time->tv_nsec); + return 0; +} + +static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) + return 0; + return encode_attr_time(xdr,time); +} + +static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) + return 0; + return encode_attr_time(xdr,time); +} + +static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr) +{ + unsigned status; + + hdr->status = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->status == NULL)) + return htonl(NFS4ERR_RESOURCE); + status = encode_string(xdr, hdr->taglen, hdr->tag); + if (unlikely(status != 0)) + return status; + hdr->nops = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->nops == NULL)) + return htonl(NFS4ERR_RESOURCE); + return 0; +} + +static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(op); + *p = htonl(res); + return 0; +} + +static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) +{ + uint32_t *savep; + unsigned status = res->status; + + if (unlikely(status != 0)) + goto out; + status = encode_attr_bitmap(xdr, res->bitmap, &savep); + if (unlikely(status != 0)) + goto out; + status = encode_attr_change(xdr, res->bitmap, res->change_attr); + if (unlikely(status != 0)) + goto out; + status = encode_attr_size(xdr, res->bitmap, res->size); + if (unlikely(status != 0)) + goto out; + status = encode_attr_ctime(xdr, res->bitmap, &res->ctime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); + *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return status; +} + +static unsigned process_op(struct svc_rqst *rqstp, + struct xdr_stream *xdr_in, void *argp, + struct xdr_stream *xdr_out, void *resp) +{ + struct callback_op *op; + unsigned int op_nr; + unsigned int status = 0; + long maxlen; + unsigned res; + + dprintk("%s: start\n", __FUNCTION__); + status = decode_op_hdr(xdr_in, &op_nr); + if (unlikely(status != 0)) { + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) { + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + status = htonl(NFS4ERR_OP_ILLEGAL); + } else + op = &callback_ops[op_nr]; + + maxlen = xdr_out->end - xdr_out->p; + if (maxlen > 0 && maxlen < PAGE_SIZE) { + if (likely(status == 0 && op->decode_args != NULL)) + status = op->decode_args(rqstp, xdr_in, argp); + if (likely(status == 0 && op->process_op != NULL)) + status = op->process_op(argp, resp); + } else + status = htonl(NFS4ERR_RESOURCE); + + res = encode_op_hdr(xdr_out, op_nr, status); + if (status == 0) + status = res; + if (op->encode_res != NULL && status == 0) + status = op->encode_res(rqstp, xdr_out, resp); + dprintk("%s: done, status = %d\n", __FUNCTION__, status); + return status; +} + +/* + * Decode, process and encode a COMPOUND + */ +static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp) +{ + struct cb_compound_hdr_arg hdr_arg; + struct cb_compound_hdr_res hdr_res; + struct xdr_stream xdr_in, xdr_out; + uint32_t *p; + unsigned int status; + unsigned int nops = 1; + + dprintk("%s: start\n", __FUNCTION__); + + xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); + + p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); + rqstp->rq_res.head[0].iov_len = PAGE_SIZE; + xdr_init_encode(&xdr_out, &rqstp->rq_res, p); + + decode_compound_hdr_arg(&xdr_in, &hdr_arg); + hdr_res.taglen = hdr_arg.taglen; + hdr_res.tag = hdr_arg.tag; + encode_compound_hdr_res(&xdr_out, &hdr_res); + + for (;;) { + status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp); + if (status != 0) + break; + if (nops == hdr_arg.nops) + break; + nops++; + } + *hdr_res.status = status; + *hdr_res.nops = htonl(nops); + dprintk("%s: done, status = %u\n", __FUNCTION__, status); + return rpc_success; +} + +/* + * Define NFS4 callback COMPOUND ops. + */ +static struct callback_op callback_ops[] = { + [0] = { + .res_maxsize = CB_OP_HDR_RES_MAXSZ, + }, + [OP_CB_GETATTR] = { + .process_op = (callback_process_op_t)nfs4_callback_getattr, + .decode_args = (callback_decode_arg_t)decode_getattr_args, + .encode_res = (callback_encode_res_t)encode_getattr_res, + .res_maxsize = CB_OP_GETATTR_RES_MAXSZ, + }, + [OP_CB_RECALL] = { + .process_op = (callback_process_op_t)nfs4_callback_recall, + .decode_args = (callback_decode_arg_t)decode_recall_args, + .res_maxsize = CB_OP_RECALL_RES_MAXSZ, + } +}; + +/* + * Define NFS4 callback procedures + */ +static struct svc_procedure nfs4_callback_procedures1[] = { + [CB_NULL] = { + .pc_func = nfs4_callback_null, + .pc_decode = (kxdrproc_t)nfs4_decode_void, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_xdrressize = 1, + }, + [CB_COMPOUND] = { + .pc_func = nfs4_callback_compound, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_argsize = 256, + .pc_ressize = 256, + .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, + } +}; + +struct svc_version nfs4_callback_version1 = { + .vs_vers = 1, + .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), + .vs_proc = nfs4_callback_procedures1, + .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, + .vs_dispatch = NULL, +}; + --- linux-2.6.7/fs/nfs/callback.c.lsec 2005-03-23 14:28:22.484631512 -0700 +++ linux-2.6.7/fs/nfs/callback.c 2005-03-23 14:28:22.483631664 -0700 @@ -0,0 +1,325 @@ +/* + * linux/fs/nfs/callback.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback handling + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +struct nfs_callback_data { + unsigned int users; + struct svc_serv *serv; + pid_t pid; + struct completion started; + struct completion stopped; +}; + +static struct nfs_callback_data nfs_callback_info; +static DECLARE_MUTEX(nfs_callback_sema); +static struct svc_program nfs4_callback_program; + +unsigned short nfs_callback_tcpport; + +/* + * This is the callback kernel thread. + */ +static void nfs_callback_svc(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + int err; + + __module_get(THIS_MODULE); + lock_kernel(); + + nfs_callback_info.pid = current->pid; + daemonize("nfsv4-svc"); + /* Process request with signals blocked, but allow SIGKILL. */ + allow_signal(SIGKILL); + + complete(&nfs_callback_info.started); + + while (nfs_callback_info.users != 0 || !signalled()) { + /* + * Listen for a request on the socket + */ + err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT); + if (err == -EAGAIN || err == -EINTR) + continue; + if (err < 0) { + printk(KERN_WARNING + "%s: terminating on error %d\n", + __FUNCTION__, -err); + break; + } + dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__, + NIPQUAD(rqstp->rq_addr.sin_addr.s_addr)); + svc_process(serv, rqstp); + } + + nfs_callback_info.pid = 0; + complete(&nfs_callback_info.stopped); + unlock_kernel(); + module_put_and_exit(0); +} + +/* + * Bring up the server process if it is not already up. + */ +int nfs_callback_up(void) +{ + struct svc_serv *serv; + struct svc_sock *svsk; + int ret = 0; + + lock_kernel(); + down(&nfs_callback_sema); + if (nfs_callback_info.users++ || nfs_callback_info.pid != 0) + goto out; + init_completion(&nfs_callback_info.started); + init_completion(&nfs_callback_info.stopped); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE); + ret = -ENOMEM; + if (!serv) + goto out_err; + /* FIXME: We don't want to register this socket with the portmapper */ + ret = svc_makesock(serv, IPPROTO_TCP, 0); + if (ret < 0) + goto out_destroy; + if (!list_empty(&serv->sv_permsocks)) { + svsk = list_entry(serv->sv_permsocks.next, + struct svc_sock, sk_list); + nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport); + dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport); + } else + BUG(); + ret = svc_create_thread(nfs_callback_svc, serv); + if (ret < 0) + goto out_destroy; + nfs_callback_info.serv = serv; + wait_for_completion(&nfs_callback_info.started); +out: + up(&nfs_callback_sema); + unlock_kernel(); + return ret; +out_destroy: + svc_destroy(serv); +out_err: + nfs_callback_info.users--; + goto out; +} + +/* + * Kill the server process if it is not already up. + */ +int nfs_callback_down(void) +{ + int ret = 0; + + lock_kernel(); + down(&nfs_callback_sema); + if (--nfs_callback_info.users || nfs_callback_info.pid == 0) + goto out; + kill_proc(nfs_callback_info.pid, SIGKILL, 1); + wait_for_completion(&nfs_callback_info.stopped); +out: + up(&nfs_callback_sema); + unlock_kernel(); + return ret; +} + +/* + * AUTH_NULL authentication + */ +static int nfs_callback_null_accept(struct svc_rqst *rqstp, u32 *authp) +{ + struct iovec *argv = &rqstp->rq_arg.head[0]; + struct iovec *resv = &rqstp->rq_res.head[0]; + + if (argv->iov_len < 3*4) + return SVC_GARBAGE; + + if (svc_getu32(argv) != 0) { + dprintk("svc: bad null cred\n"); + *authp = rpc_autherr_badcred; + return SVC_DENIED; + } + if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + dprintk("svc: bad null verf\n"); + *authp = rpc_autherr_badverf; + return SVC_DENIED; + } + + /* Signal that mapping to nobody uid/gid is required */ + rqstp->rq_cred.cr_uid = (uid_t) -1; + rqstp->rq_cred.cr_gid = (gid_t) -1; + rqstp->rq_cred.cr_group_info = groups_alloc(0); + if (rqstp->rq_cred.cr_group_info == NULL) + return SVC_DROP; /* kmalloc failure - client must retry */ + + /* Put NULL verifier */ + svc_putu32(resv, RPC_AUTH_NULL); + svc_putu32(resv, 0); + dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK); + return SVC_OK; +} + +static int nfs_callback_null_release(struct svc_rqst *rqstp) +{ + if (rqstp->rq_cred.cr_group_info) + put_group_info(rqstp->rq_cred.cr_group_info); + rqstp->rq_cred.cr_group_info = NULL; + return 0; /* don't drop */ +} + +static struct auth_ops nfs_callback_auth_null = { + .name = "null", + .flavour = RPC_AUTH_NULL, + .accept = nfs_callback_null_accept, + .release = nfs_callback_null_release, +}; + +/* + * AUTH_SYS authentication + */ +static int nfs_callback_unix_accept(struct svc_rqst *rqstp, u32 *authp) +{ + struct iovec *argv = &rqstp->rq_arg.head[0]; + struct iovec *resv = &rqstp->rq_res.head[0]; + struct svc_cred *cred = &rqstp->rq_cred; + u32 slen, i; + int len = argv->iov_len; + + dprintk("%s: start\n", __FUNCTION__); + cred->cr_group_info = NULL; + rqstp->rq_client = NULL; + if ((len -= 3*4) < 0) + return SVC_GARBAGE; + + /* Get length, time stamp and machine name */ + svc_getu32(argv); + svc_getu32(argv); + slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); + if (slen > 64 || (len -= (slen + 3)*4) < 0) + goto badcred; + argv->iov_base = (void*)((u32*)argv->iov_base + slen); + argv->iov_len -= slen*4; + + cred->cr_uid = ntohl(svc_getu32(argv)); + cred->cr_gid = ntohl(svc_getu32(argv)); + slen = ntohl(svc_getu32(argv)); + if (slen > 16 || (len -= (slen + 2)*4) < 0) + goto badcred; + cred->cr_group_info = groups_alloc(slen); + if (cred->cr_group_info == NULL) + return SVC_DROP; + for (i = 0; i < slen; i++) + GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv)); + + if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + *authp = rpc_autherr_badverf; + return SVC_DENIED; + } + /* Put NULL verifier */ + svc_putu32(resv, RPC_AUTH_NULL); + svc_putu32(resv, 0); + dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK); + return SVC_OK; +badcred: + *authp = rpc_autherr_badcred; + return SVC_DENIED; +} + +static int nfs_callback_unix_release(struct svc_rqst *rqstp) +{ + if (rqstp->rq_cred.cr_group_info) + put_group_info(rqstp->rq_cred.cr_group_info); + rqstp->rq_cred.cr_group_info = NULL; + return 0; +} + +static struct auth_ops nfs_callback_auth_unix = { + .name = "unix", + .flavour = RPC_AUTH_UNIX, + .accept = nfs_callback_unix_accept, + .release = nfs_callback_unix_release, +}; + +/* + * Hook the authentication protocol + */ +static int nfs_callback_auth(struct svc_rqst *rqstp, u32 *authp) +{ + struct in_addr *addr = &rqstp->rq_addr.sin_addr; + struct nfs4_client *clp; + struct iovec *argv = &rqstp->rq_arg.head[0]; + int flavour; + int retval; + + /* Don't talk to strangers */ + clp = nfs4_find_client(addr); + if (clp == NULL) + return SVC_DROP; + dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); + nfs4_put_client(clp); + flavour = ntohl(svc_getu32(argv)); + switch(flavour) { + case RPC_AUTH_NULL: + if (rqstp->rq_proc != CB_NULL) { + *authp = rpc_autherr_tooweak; + retval = SVC_DENIED; + break; + } + rqstp->rq_authop = &nfs_callback_auth_null; + retval = nfs_callback_null_accept(rqstp, authp); + break; + case RPC_AUTH_UNIX: + /* Eat the authentication flavour */ + rqstp->rq_authop = &nfs_callback_auth_unix; + retval = nfs_callback_unix_accept(rqstp, authp); + break; + default: + /* FIXME: need to add RPCSEC_GSS upcalls */ +#if 0 + svc_ungetu32(argv); + retval = svc_authenticate(rqstp, authp); +#else + *authp = rpc_autherr_rejectedcred; + retval = SVC_DENIED; +#endif + } + dprintk("%s: flavour %d returning error %d\n", __FUNCTION__, flavour, retval); + return retval; +} + +/* + * Define NFS4 callback program + */ +extern struct svc_version nfs4_callback_version1; + +static struct svc_version *nfs4_callback_version[] = { + [1] = &nfs4_callback_version1, +}; + +static struct svc_stat nfs4_callback_stats; + +static struct svc_program nfs4_callback_program = { + .pg_prog = NFS4_CALLBACK, /* RPC service number */ + .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */ + .pg_vers = nfs4_callback_version, /* version table */ + .pg_name = "NFSv4 callback", /* service name */ + .pg_class = "nfs", /* authentication class */ + .pg_stats = &nfs4_callback_stats, + .pg_authenticate = nfs_callback_auth, +}; --- linux-2.6.7/fs/nfs/read.c.lsec 2004-06-15 23:18:37.000000000 -0600 +++ linux-2.6.7/fs/nfs/read.c 2005-03-23 14:28:23.114535752 -0700 @@ -91,8 +91,8 @@ int nfs_return_empty_page(struct page *p /* * Read a page synchronously. */ -static int -nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page) +static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, + struct page *page) { unsigned int rsize = NFS_SERVER(inode)->rsize; unsigned int count = PAGE_CACHE_SIZE; @@ -105,10 +105,11 @@ nfs_readpage_sync(struct file *file, str memset(rdata, 0, sizeof(*rdata)); rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + rdata->cred = ctx->cred; rdata->inode = inode; INIT_LIST_HEAD(&rdata->pages); rdata->args.fh = NFS_FH(inode); - rdata->args.lockowner = current->files; + rdata->args.context = ctx; rdata->args.pages = &page; rdata->args.pgbase = 0UL; rdata->args.count = rsize; @@ -134,7 +135,7 @@ nfs_readpage_sync(struct file *file, str rdata->args.count); lock_kernel(); - result = NFS_PROTO(inode)->read(rdata, file); + result = NFS_PROTO(inode)->read(rdata); unlock_kernel(); /* @@ -169,8 +170,8 @@ io_error: return result; } -static int -nfs_readpage_async(struct file *file, struct inode *inode, struct page *page) +static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, + struct page *page) { LIST_HEAD(one_request); struct nfs_page *new; @@ -179,7 +180,7 @@ nfs_readpage_async(struct file *file, st len = nfs_page_length(inode, page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(file, inode, page, 0, len); + new = nfs_create_request(ctx, inode, page, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -202,8 +203,8 @@ static void nfs_readpage_release(struct nfs_unlock_request(req); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); } @@ -217,16 +218,15 @@ static void nfs_read_rpcsetup(struct nfs struct inode *inode; data->req = req; - data->inode = inode = req->wb_inode; - data->cred = req->wb_cred; + data->inode = inode = req->wb_context->dentry->d_inode; + data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.lockowner = req->wb_lockowner; - data->args.state = req->wb_state; + data->args.context = req->wb_context; data->res.fattr = &data->fattr; data->res.count = count; @@ -396,7 +396,7 @@ nfs_pagein_list(struct list_head *head, while (!list_empty(head)) { pages += nfs_coalesce_requests(head, &one_request, rpages); req = nfs_list_entry(one_request.next); - error = nfs_pagein_one(&one_request, req->wb_inode); + error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); if (error < 0) break; } @@ -500,9 +500,9 @@ void nfs_readpage_result(struct rpc_task * - The error flag is set for this page. This happens only when a * previous async read operation failed. */ -int -nfs_readpage(struct file *file, struct page *page) +int nfs_readpage(struct file *file, struct page *page) { + struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; int error; @@ -519,25 +519,33 @@ nfs_readpage(struct file *file, struct p if (error) goto out_error; + if (file == NULL) { + ctx = nfs_find_open_context(inode, FMODE_READ); + if (ctx == NULL) + return -EBADF; + } else + ctx = get_nfs_open_context((struct nfs_open_context *) + file->private_data); if (!IS_SYNC(inode)) { - error = nfs_readpage_async(file, inode, page); + error = nfs_readpage_async(ctx, inode, page); goto out; } - error = nfs_readpage_sync(file, inode, page); + error = nfs_readpage_sync(ctx, inode, page); if (error < 0 && IS_SWAPFILE(inode)) printk("Aiee.. nfs swap-in of page failed!\n"); out: + put_nfs_open_context(ctx); return error; out_error: unlock_page(page); - goto out; + return error; } struct nfs_readdesc { struct list_head *head; - struct file *filp; + struct nfs_open_context *ctx; }; static int @@ -552,7 +560,7 @@ readpage_async_filler(void *data, struct len = nfs_page_length(inode, page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->filp, inode, page, 0, len); + new = nfs_create_request(desc->ctx, inode, page, 0, len); if (IS_ERR(new)) { SetPageError(page); unlock_page(page); @@ -565,13 +573,11 @@ readpage_async_filler(void *data, struct return 0; } -int -nfs_readpages(struct file *filp, struct address_space *mapping, +int nfs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { LIST_HEAD(head); struct nfs_readdesc desc = { - .filp = filp, .head = &head, }; struct inode *inode = mapping->host; @@ -583,12 +589,20 @@ nfs_readpages(struct file *filp, struct (long long)NFS_FILEID(inode), nr_pages); + if (filp == NULL) { + desc.ctx = nfs_find_open_context(inode, FMODE_READ); + if (desc.ctx == NULL) + return -EBADF; + } else + desc.ctx = get_nfs_open_context((struct nfs_open_context *) + filp->private_data); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); if (!ret) ret = err; } + put_nfs_open_context(desc.ctx); return ret; } --- linux-2.6.7/fs/nfs/Makefile.lsec 2004-06-15 23:19:01.000000000 -0600 +++ linux-2.6.7/fs/nfs/Makefile 2005-03-23 14:28:22.819580592 -0700 @@ -9,6 +9,7 @@ nfs-y := dir.o file.o inode.o nfs2xdr nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - idmap.o + delegation.o idmap.o \ + callback.o callback_xdr.o callback_proc.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-objs := $(nfs-y) --- linux-2.6.7/fs/Kconfig.lsec 2004-06-15 23:19:36.000000000 -0600 +++ linux-2.6.7/fs/Kconfig 2005-03-23 14:28:23.871420688 -0700 @@ -322,7 +322,7 @@ config FS_POSIX_ACL # Never use this symbol for ifdefs. # bool - depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL + depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || NFS_V4 default y config XFS_FS @@ -1443,6 +1443,7 @@ config NFSD_V3 config NFSD_V4 bool "Provide NFSv4 server support (EXPERIMENTAL)" depends on NFSD_V3 && EXPERIMENTAL + select NFSD_TCP help If you would like to include the NFSv4 server as well as the NFSv2 and NFSv3 servers, say Y here. This feature is experimental, and @@ -1450,11 +1451,13 @@ config NFSD_V4 If unsure, say N. config NFSD_TCP - bool "Provide NFS server over TCP support (EXPERIMENTAL)" - depends on NFSD && EXPERIMENTAL + bool "Provide NFS server over TCP support" + depends on NFSD + default y help - Enable NFS service over TCP connections. This the officially - still experimental, but seems to work well. + If you want your NFS server to support TCP connections, say Y here. + TCP connections usually perform better than the default UDP when + the network is lossy or congested. If unsure, say Y. config ROOT_NFS bool "Root file system on NFS" @@ -1505,6 +1508,22 @@ config RPCSEC_GSS_KRB5 If unsure, say N. +config RPCSEC_GSS_SPKM3 + tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES + help + Provides for secure RPC calls by means of a gss-api + mechanism based on the SPKM3 public-key mechanism. + + Note: Requires an auxiliary userspace daemon which may be found on + http://www.citi.umich.edu/projects/nfsv4/ + + If unsure, say N. + config SMB_FS tristate "SMB file system support (to mount Windows shares etc.)" depends on INET --- linux-2.6.7/include/linux/fs.h.lsec 2005-03-23 14:26:03.300790672 -0700 +++ linux-2.6.7/include/linux/fs.h 2005-03-23 14:28:23.280510520 -0700 @@ -632,7 +632,7 @@ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ struct list_head fl_link; /* doubly linked list of all locks */ struct list_head fl_block; /* circular list of blocked processes */ - fl_owner_t fl_owner; + fl_owner_t fl_owner; /* 0 if lock owned by a local process */ unsigned int fl_pid; wait_queue_head_t fl_wait; struct file *fl_file; --- linux-2.6.7/include/linux/nfs4.h.lsec 2004-06-15 23:19:22.000000000 -0600 +++ linux-2.6.7/include/linux/nfs4.h 2005-03-23 14:28:23.335502160 -0700 @@ -13,8 +13,12 @@ #ifndef _LINUX_NFS4_H #define _LINUX_NFS4_H +#include +#include + #define NFS4_VERIFIER_SIZE 8 #define NFS4_FHSIZE 128 +#define NFS4_MAXPATHLEN PATH_MAX #define NFS4_MAXNAMLEN NAME_MAX #define NFS4_ACCESS_READ 0x0001 @@ -52,6 +56,60 @@ #define ACL4_SUPPORT_AUDIT_ACL 0x04 #define ACL4_SUPPORT_ALARM_ACL 0x08 +#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001 +#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002 +#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004 +#define NFS4_ACE_INHERIT_ONLY_ACE 0x00000008 +#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010 +#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020 +#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040 +#define NFS4_ACE_OWNER 0x00000080 +#define NFS4_ACE_GROUP 0x00000100 +#define NFS4_ACE_EVERYONE 0x00000200 + +#define NFS4_ACE_READ_DATA 0x00000001 +#define NFS4_ACE_LIST_DIRECTORY 0x00000001 +#define NFS4_ACE_WRITE_DATA 0x00000002 +#define NFS4_ACE_ADD_FILE 0x00000002 +#define NFS4_ACE_APPEND_DATA 0x00000004 +#define NFS4_ACE_ADD_SUBDIRECTORY 0x00000004 +#define NFS4_ACE_READ_NAMED_ATTRS 0x00000008 +#define NFS4_ACE_WRITE_NAMED_ATTRS 0x00000010 +#define NFS4_ACE_EXECUTE 0x00000020 +#define NFS4_ACE_DELETE_CHILD 0x00000040 +#define NFS4_ACE_READ_ATTRIBUTES 0x00000080 +#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100 +#define NFS4_ACE_DELETE 0x00010000 +#define NFS4_ACE_READ_ACL 0x00020000 +#define NFS4_ACE_WRITE_ACL 0x00040000 +#define NFS4_ACE_WRITE_OWNER 0x00080000 +#define NFS4_ACE_SYNCHRONIZE 0x00100000 +#define NFS4_ACE_GENERIC_READ 0x00120081 +#define NFS4_ACE_GENERIC_WRITE 0x00160106 +#define NFS4_ACE_GENERIC_EXECUTE 0x001200A0 +#define NFS4_ACE_MASK_ALL 0x001F01FF + +enum nfs4_acl_whotype { + NFS4_ACL_WHO_NAMED = 0, + NFS4_ACL_WHO_OWNER, + NFS4_ACL_WHO_GROUP, + NFS4_ACL_WHO_EVERYONE, +}; + +struct nfs4_ace { + uint32_t type; + uint32_t flag; + uint32_t access_mask; + int whotype; + uid_t who; + struct list_head l_ace; +}; + +struct nfs4_acl { + uint32_t naces; + struct list_head ace_head; +}; + typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; typedef struct { char data[16]; } nfs4_stateid; @@ -297,7 +355,7 @@ enum { NFSPROC4_CLNT_COMMIT, NFSPROC4_CLNT_OPEN, NFSPROC4_CLNT_OPEN_CONFIRM, - NFSPROC4_CLNT_OPEN_RECLAIM, + NFSPROC4_CLNT_OPEN_NOATTR, NFSPROC4_CLNT_OPEN_DOWNGRADE, NFSPROC4_CLNT_CLOSE, NFSPROC4_CLNT_SETATTR, @@ -315,12 +373,16 @@ enum { NFSPROC4_CLNT_REMOVE, NFSPROC4_CLNT_RENAME, NFSPROC4_CLNT_LINK, + NFSPROC4_CLNT_SYMLINK, NFSPROC4_CLNT_CREATE, NFSPROC4_CLNT_PATHCONF, NFSPROC4_CLNT_STATFS, NFSPROC4_CLNT_READLINK, NFSPROC4_CLNT_READDIR, NFSPROC4_CLNT_SERVER_CAPS, + NFSPROC4_CLNT_DELEGRETURN, + NFSPROC4_CLNT_GETACL, + NFSPROC4_CLNT_SETACL, }; #endif --- linux-2.6.7/include/linux/nfs_page.h.lsec 2004-06-15 23:18:57.000000000 -0600 +++ linux-2.6.7/include/linux/nfs_page.h 2005-03-23 14:28:23.392493496 -0700 @@ -29,14 +29,9 @@ struct nfs_page { struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ - struct file *wb_file; - fl_owner_t wb_lockowner; - struct inode *wb_inode; - struct rpc_cred *wb_cred; - struct nfs4_state *wb_state; struct page *wb_page; /* page to read in/write out */ + struct nfs_open_context *wb_context; /* File state context info */ atomic_t wb_complete; /* i/os we're waiting for */ - wait_queue_head_t wb_wait; /* wait queue */ unsigned long wb_index; /* Offset >> PAGE_CACHE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ @@ -50,9 +45,11 @@ struct nfs_page { #define NFS_NEED_COMMIT(req) (test_bit(PG_NEED_COMMIT,&(req)->wb_flags)) #define NFS_NEED_RESCHED(req) (test_bit(PG_NEED_RESCHED,&(req)->wb_flags)) -extern struct nfs_page *nfs_create_request(struct file *, struct inode *, - struct page *, - unsigned int, unsigned int); +extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, + struct inode *inode, + struct page *page, + unsigned int offset, + unsigned int count); extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); @@ -64,6 +61,7 @@ extern int nfs_scan_list(struct list_hea extern int nfs_coalesce_requests(struct list_head *, struct list_head *, unsigned int); extern int nfs_wait_on_request(struct nfs_page *); +extern void nfs_unlock_request(struct nfs_page *req); extern spinlock_t nfs_wreq_lock; @@ -90,19 +88,6 @@ nfs_lock_request(struct nfs_page *req) return 1; } -static inline void -nfs_unlock_request(struct nfs_page *req) -{ - if (!NFS_WBACK_BUSY(req)) { - printk(KERN_ERR "NFS: Invalid unlock attempted\n"); - BUG(); - } - smp_mb__before_clear_bit(); - clear_bit(PG_BUSY, &req->wb_flags); - smp_mb__after_clear_bit(); - wake_up_all(&req->wb_wait); - nfs_release_request(req); -} /** * nfs_list_remove_request - Remove a request from its wb_list --- linux-2.6.7/include/linux/sunrpc/svc.h.lsec 2004-06-15 23:19:35.000000000 -0600 +++ linux-2.6.7/include/linux/sunrpc/svc.h 2005-03-23 14:28:23.541470848 -0700 @@ -87,6 +87,14 @@ static inline u32 svc_getu32(struct iove iov->iov_len -= sizeof(u32); return val; } + +static inline void svc_ungetu32(struct iovec *iov) +{ + u32 *vp = (u32 *)iov->iov_base; + iov->iov_base = (void *)(vp - 1); + iov->iov_len += sizeof(*vp); +} + static inline void svc_putu32(struct iovec *iov, u32 val) { u32 *vp = iov->iov_base + iov->iov_len; @@ -243,6 +251,8 @@ struct svc_program { char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ + /* Override authentication. NULL means use default */ + int (*pg_authenticate)(struct svc_rqst *, u32 *); }; /* --- linux-2.6.7/include/linux/sunrpc/gss_spkm3.h.lsec 2005-03-23 14:28:24.186372808 -0700 +++ linux-2.6.7/include/linux/sunrpc/gss_spkm3.h 2005-03-23 14:28:24.185372960 -0700 @@ -0,0 +1,61 @@ +/* + * linux/include/linux/sunrpc/gss_spkm3.h + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson + */ + +#include +#include +#include + +struct spkm3_ctx { + struct xdr_netobj ctx_id; /* per message context id */ + int qop; /* negotiated qop */ + struct xdr_netobj mech_used; + unsigned int ret_flags ; + unsigned int req_flags ; + struct xdr_netobj share_key; + int conf_alg; + struct crypto_tfm* derived_conf_key; + int intg_alg; + struct crypto_tfm* derived_integ_key; + int keyestb_alg; /* alg used to get share_key */ + int owf_alg; /* one way function */ +}; + +/* from openssl/objects.h */ +/* XXX need SEAL_ALG_NONE */ +#define NID_md5 4 +#define NID_dhKeyAgreement 28 +#define NID_des_cbc 31 +#define NID_sha1 64 +#define NID_cast5_cbc 108 + +/* SPKM InnerContext Token types */ + +#define SPKM_ERROR_TOK 3 +#define SPKM_MIC_TOK 4 +#define SPKM_WRAP_TOK 5 +#define SPKM_DEL_TOK 6 + +u32 spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, struct xdr_buf * text, struct xdr_netobj * token, int toktype); + +u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int *qop_state, int toktype); + +#define CKSUMTYPE_RSA_MD5 0x0007 + +s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, + struct xdr_netobj *cksum); +void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits); +int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, + int explen); +void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, + unsigned char *ctxhdr, int elen, int zbit); +void spkm3_make_mic_token(unsigned char **tokp, int toklen, + struct xdr_netobj *mic_hdr, + struct xdr_netobj *md5cksum, int md5elen, int md5zbit); +u32 spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, + unsigned char **cksum); --- linux-2.6.7/include/linux/sunrpc/sched.h.lsec 2004-06-15 23:19:42.000000000 -0600 +++ linux-2.6.7/include/linux/sunrpc/sched.h 2005-03-23 14:28:23.540471000 -0700 @@ -11,7 +11,9 @@ #include #include +#include #include +#include #include /* @@ -25,11 +27,18 @@ struct rpc_message { struct rpc_cred * rpc_cred; /* Credentials */ }; +struct rpc_wait_queue; +struct rpc_wait { + struct list_head list; /* wait queue links */ + struct list_head links; /* Links to related tasks */ + wait_queue_head_t waitq; /* sync: sleep on this q */ + struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */ +}; + /* * This is the RPC task struct */ struct rpc_task { - struct list_head tk_list; /* wait queue links */ #ifdef RPC_DEBUG unsigned long tk_magic; /* 0xf00baa */ #endif @@ -37,7 +46,6 @@ struct rpc_task { struct rpc_clnt * tk_client; /* RPC client */ struct rpc_rqst * tk_rqstp; /* RPC request */ int tk_status; /* result of last operation */ - struct rpc_wait_queue * tk_rpcwait; /* RPC wait queue we're on */ /* * RPC call state @@ -70,13 +78,18 @@ struct rpc_task { * you have a pathological interest in kernel oopses. */ struct timer_list tk_timer; /* kernel timer */ - wait_queue_head_t tk_wait; /* sync: sleep on this q */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ unsigned char tk_active : 1;/* Task has been activated */ unsigned char tk_priority : 2;/* Task priority */ unsigned long tk_runstate; /* Task run status */ - struct list_head tk_links; /* links to related tasks */ + struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could + * be any workqueue + */ + union { + struct work_struct tk_work; /* Async task work queue */ + struct rpc_wait tk_wait; /* RPC wait */ + } u; #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ #endif @@ -87,11 +100,11 @@ struct rpc_task { /* support walking a list of tasks on a wait queue */ #define task_for_each(task, pos, head) \ list_for_each(pos, head) \ - if ((task=list_entry(pos, struct rpc_task, tk_list)),1) + if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1) #define task_for_first(task, head) \ if (!list_empty(head) && \ - ((task=list_entry((head)->next, struct rpc_task, tk_list)),1)) + ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1)) /* .. and walking list of all tasks */ #define alltask_for_each(task, pos, head) \ @@ -124,22 +137,24 @@ typedef void (*rpc_action)(struct rpc_ #define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) -#define RPC_TASK_SLEEPING 0 -#define RPC_TASK_RUNNING 1 -#define RPC_IS_SLEEPING(t) (test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) -#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) +#define RPC_TASK_RUNNING 0 +#define RPC_TASK_QUEUED 1 +#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) -#define rpc_clear_running(t) (clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) - -#define rpc_set_sleeping(t) (set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) - -#define rpc_clear_sleeping(t) \ +#define rpc_test_and_set_running(t) \ + (test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) +#define rpc_clear_running(t) \ do { \ smp_mb__before_clear_bit(); \ - clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \ + clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ smp_mb__after_clear_bit(); \ - } while(0) + } while (0) + +#define RPC_IS_QUEUED(t) (test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)) +#define rpc_set_queued(t) (set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)) +#define rpc_test_and_clear_queued(t) \ + (test_and_clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)) /* * Task priorities. @@ -155,6 +170,7 @@ typedef void (*rpc_action)(struct rpc_ * RPC synchronization objects */ struct rpc_wait_queue { + spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ unsigned long cookie; /* cookie of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ @@ -175,6 +191,7 @@ struct rpc_wait_queue { #ifndef RPC_DEBUG # define RPC_WAITQ_INIT(var,qname) { \ + .lock = SPIN_LOCK_UNLOCKED, \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ @@ -183,6 +200,7 @@ struct rpc_wait_queue { } #else # define RPC_WAITQ_INIT(var,qname) { \ + .lock = SPIN_LOCK_UNLOCKED, \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ @@ -207,13 +225,10 @@ void rpc_killall_tasks(struct rpc_clnt int rpc_execute(struct rpc_task *); void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, rpc_action action); -int rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *); -void rpc_remove_wait_queue(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action, rpc_action timer); -void rpc_add_timer(struct rpc_task *, rpc_action); void rpc_wake_up_task(struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); --- linux-2.6.7/include/linux/sunrpc/gss_api.h.lsec 2004-06-15 23:20:03.000000000 -0600 +++ linux-2.6.7/include/linux/sunrpc/gss_api.h 2005-03-23 14:28:24.688296504 -0700 @@ -47,6 +47,18 @@ u32 gss_verify_mic( struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); +u32 gss_wrap( + struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); +u32 gss_unwrap( + struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *inbuf, + int *out_offset); u32 gss_delete_sec_context( struct gss_ctx **ctx_id); @@ -93,6 +105,18 @@ struct gss_api_ops { struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); + u32 (*gss_wrap)( + struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); + u32 (*gss_unwrap)( + struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *buf, + int *out_offset); void (*gss_delete_sec_context)( void *internal_ctx_id); }; --- linux-2.6.7/include/linux/sunrpc/xprt.h.lsec 2004-06-15 23:19:43.000000000 -0600 +++ linux-2.6.7/include/linux/sunrpc/xprt.h 2005-03-23 14:28:24.783282064 -0700 @@ -95,7 +95,10 @@ struct rpc_rqst { int rq_cong; /* has incremented xprt->cong */ int rq_received; /* receive completed */ u32 rq_seqno; /* gss seq no. used on req. */ - + int rq_enc_pages_num; + struct page **rq_enc_pages; /* scratch pages for use by + gss privacy code */ + void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; struct xdr_buf rq_private_buf; /* The receive buffer --- linux-2.6.7/include/linux/sunrpc/gss_krb5.h.lsec 2004-06-15 23:19:29.000000000 -0600 +++ linux-2.6.7/include/linux/sunrpc/gss_krb5.h 2005-03-23 14:28:24.840273400 -0700 @@ -53,6 +53,8 @@ struct krb5_ctx { struct xdr_netobj mech_used; }; +extern spinlock_t krb5_seq_lock; + #define KG_TOK_MIC_MSG 0x0101 #define KG_TOK_WRAP_MSG 0x0201 @@ -116,18 +118,25 @@ enum seal_alg { s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum); + int body_offset, struct xdr_netobj *cksum); u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, struct xdr_buf *input_message_buffer, - struct xdr_netobj *output_message_buffer, int toktype); + struct xdr_netobj *output_message_buffer); u32 krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, - struct xdr_buf *message_buffer, - int *qop_state, int toktype); + struct xdr_buf *message_buffer, int *qop_state); + +u32 +gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset, + struct xdr_buf *outbuf, struct page **pages); + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset, + struct xdr_buf *buf, int *out_offset); u32 krb5_encrypt(struct crypto_tfm * key, @@ -137,6 +146,13 @@ u32 krb5_decrypt(struct crypto_tfm * key, void *iv, void *in, void *out, int length); +int +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset, + struct page **pages); + +int +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset); + s32 krb5_make_seq_num(struct crypto_tfm * key, int direction, --- linux-2.6.7/include/linux/sunrpc/gss_asn1.h.lsec 2004-06-15 23:20:04.000000000 -0600 +++ linux-2.6.7/include/linux/sunrpc/gss_asn1.h 2005-03-23 14:28:23.706445768 -0700 @@ -69,7 +69,6 @@ u32 g_verify_token_header( struct xdr_netobj *mech, int *body_size, unsigned char **buf_in, - int tok_type, int toksize); u32 g_get_mech_oid(struct xdr_netobj *mech, struct xdr_netobj * in_buf); --- linux-2.6.7/include/linux/sunrpc/cache.h.lsec 2004-06-15 23:19:28.000000000 -0600 +++ linux-2.6.7/include/linux/sunrpc/cache.h 2005-03-23 14:28:24.349348032 -0700 @@ -128,20 +128,17 @@ struct cache_deferred_req { * just like a template in C++, this macro does cache lookup * for us. * The function is passed some sort of HANDLE from which a cache_detail - * structure can be determined (via SETUP, DETAIL), a template + * structure can be determined (via DETAIL), a template * cache entry (type RTN*), and a "set" flag. Using the HASHFN and the * TEST, the function will try to find a matching cache entry in the cache. * If "set" == 0 : * If an entry is found, it is returned * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 and INPLACE == 0 : + * If "set" == 1: * If no entry is found a new one is inserted with data from "template" * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE * If a CACHE_VALID entry is found, a new entry is swapped in with data * from "template" - * If set == 1, and INPLACE == 1 : - * As above, except that if a CACHE_VALID entry is found, we UPDATE in place - * instead of swapping in a new entry. * * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not * run but insteead CACHE_NEGATIVE is set in any new item. @@ -153,21 +150,18 @@ struct cache_deferred_req { * MEMBER is the member of the cache which is cache_head, which must be first * FNAME is the name for the function * ARGS are arguments to function and must contain RTN *item, int set. May - * also contain something to be usedby SETUP or DETAIL to find cache_detail. - * SETUP locates the cache detail and makes it available as... - * DETAIL identifies the cache detail, possibly set up by SETUP + * also contain something to be used by DETAIL to find cache_detail. + * DETAIL identifies the cache detail * HASHFN returns a hash value of the cache entry "item" * TEST tests if "tmp" matches "item" * INIT copies key information from "item" to "new" * UPDATE copies content information from "item" to "tmp" - * INPLACE is true if updates can happen inplace rather than allocating a new structure */ -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE) \ +#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,DETAIL,HASHFN,TEST,INIT,UPDATE) \ RTN *FNAME ARGS \ { \ RTN *tmp, *new=NULL; \ struct cache_head **hp, **head; \ - SETUP; \ head = &(DETAIL)->hash_table[HASHFN]; \ retry: \ if (set||new) write_lock(&(DETAIL)->hash_lock); \ @@ -176,14 +170,14 @@ RTN *FNAME ARGS \ tmp = container_of(*hp, RTN, MEMBER); \ if (TEST) { /* found a match */ \ \ - if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ + if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ break; \ \ if (new) \ {INIT;} \ cache_get(&tmp->MEMBER); \ if (set) { \ - if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ + if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ { /* need to swap in new */ \ RTN *t2; \ \ @@ -205,7 +199,7 @@ RTN *FNAME ARGS \ else read_unlock(&(DETAIL)->hash_lock); \ if (set) \ cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \ - if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ + if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \ return tmp; \ } \ @@ -233,16 +227,15 @@ RTN *FNAME ARGS \ new = kmalloc(sizeof(*new), GFP_KERNEL); \ if (new) { \ cache_init(&new->MEMBER); \ - cache_get(&new->MEMBER); \ goto retry; \ } \ return NULL; \ } -#define DefineSimpleCacheLookup(STRUCT,INPLACE) \ - DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */, \ +#define DefineSimpleCacheLookup(STRUCT) \ + DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), \ & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\ - STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE) + STRUCT##_init(new, item), STRUCT##_update(tmp, item)) #define cache_for_each(pos, detail, index, member) \ for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ --- linux-2.6.7/include/linux/sunrpc/xdr.h.lsec 2004-06-15 23:20:26.000000000 -0600 +++ linux-2.6.7/include/linux/sunrpc/xdr.h 2005-03-23 14:28:24.783282064 -0700 @@ -192,6 +192,7 @@ extern void xdr_write_pages(struct xdr_s extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p); extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); +extern void truncate_xdr_buf(struct xdr_buf *xdr, int len); #endif /* __KERNEL__ */ --- linux-2.6.7/include/linux/nfsd/state.h.lsec 2004-06-15 23:18:56.000000000 -0600 +++ linux-2.6.7/include/linux/nfsd/state.h 2005-03-23 14:28:24.081388768 -0700 @@ -38,6 +38,7 @@ #define _NFSD4_STATE_H #include +#include #define NFS4_OPAQUE_LIMIT 1024 typedef struct { @@ -65,6 +66,22 @@ extern stateid_t onestateid; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) +/* client delegation callback info */ +struct nfs4_callback { + /* SETCLIENTID info */ + u32 cb_parsed; /* addr parsed */ + u32 cb_addr; + unsigned short cb_port; + u32 cb_prog; + u32 cb_ident; + struct xdr_netobj cb_netid; + /* RPC client info */ + u32 cb_set; /* successful CB_NULL call */ + struct rpc_program cb_program; + struct rpc_stat cb_stat; + struct rpc_clnt * cb_client; +}; + /* * struct nfs4_client - one per client. Clientids live here. * o Each nfs4_client is hashed by clientid. @@ -87,6 +104,21 @@ struct nfs4_client { struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ + struct nfs4_callback cl_callback; /* callback info */ + time_t cl_first_state; /* first state aquisition*/ + atomic_t cl_count; /* ref count */ +}; + +/* struct nfs4_client_reset + * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl + * upon lease reset, or from upcall to state_daemon (to read in state + * from non-volitile storage) upon reboot. + */ +struct nfs4_client_reclaim { + struct list_head cr_strhash; /* hash by cr_name */ + struct xdr_netobj cr_name; /* id generated by client */ + time_t cr_first_state; /* first state aquisition */ + u32 cr_expired; /* boolean: lease expired? */ }; static inline void @@ -216,5 +248,8 @@ extern int nfs4_share_conflict(struct sv extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); extern int nfs4_in_grace(void); -extern int nfs4_in_no_grace(void); +extern int nfs4_check_open_reclaim(clientid_t *clid); +extern void nfsd4_probe_callback(struct nfs4_client *clp); +extern void expire_client(struct nfs4_client *clp); +extern void put_nfs4_client(struct nfs4_client *clp); #endif /* NFSD4_STATE_H */ --- linux-2.6.7/include/linux/nfsd/nfsd.h.lsec 2004-06-15 23:20:04.000000000 -0600 +++ linux-2.6.7/include/linux/nfsd/nfsd.h 2005-03-23 14:28:24.133380864 -0700 @@ -76,6 +76,11 @@ int nfsd_lookup(struct svc_rqst *, stru const char *, int, struct svc_fh *); int nfsd_setattr(struct svc_rqst *, struct svc_fh *, struct iattr *, int, time_t); +#ifdef CONFIG_NFSD_V4 +int nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, + struct nfs4_acl *); +int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); +#endif /* CONFIG_NFSD_V4 */ int nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, int type, dev_t rdev, struct svc_fh *res); @@ -126,9 +131,13 @@ int nfsd_permission(struct svc_export * #ifdef CONFIG_NFSD_V4 void nfs4_state_init(void); void nfs4_state_shutdown(void); +time_t nfs4_lease_time(void); +void nfs4_reset_lease(time_t leasetime); #else void static inline nfs4_state_init(void){} void static inline nfs4_state_shutdown(void){} +time_t static inline nfs4_lease_time(void){return 0;} +void static inline nfs4_reset_lease(time_t leasetime){} #endif /* @@ -249,12 +258,11 @@ static inline int is_fsid(struct svc_fh #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ #define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ -#define NFSD_LEASE_TIME 60 /* seconds */ +#define NFSD_LEASE_TIME (nfs4_lease_time()) #define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ /* * The following attributes are currently not supported by the NFSv4 server: - * ACL (will be supported in a forthcoming patch) * ARCHIVE (deprecated anyway) * FS_LOCATIONS (will be supported eventually) * HIDDEN (unlikely to be supported any time soon) @@ -274,7 +282,7 @@ static inline int is_fsid(struct svc_fh | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \ | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_HOMOGENEOUS \ | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \ - | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE) + | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL) #define NFSD_SUPPORTED_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \ @@ -289,7 +297,8 @@ static inline int is_fsid(struct svc_fh (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ -#define NFSD_WRITEABLE_ATTRS_WORD0 FATTR4_WORD0_SIZE +#define NFSD_WRITEABLE_ATTRS_WORD0 \ +(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY_SET) --- linux-2.6.7/include/linux/nfsd/xdr4.h.lsec 2004-06-15 23:18:59.000000000 -0600 +++ linux-2.6.7/include/linux/nfsd/xdr4.h 2005-03-23 14:28:24.082388616 -0700 @@ -39,6 +39,8 @@ #ifndef _LINUX_NFSD_XDR4_H #define _LINUX_NFSD_XDR4_H +#include + #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) @@ -95,6 +97,7 @@ struct nfsd4_create { u32 cr_bmval[2]; /* request */ struct iattr cr_iattr; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ + struct nfs4_acl *cr_acl; }; #define cr_linklen u.link.namelen #define cr_linkname u.link.name @@ -216,7 +219,7 @@ struct nfsd4_open { u32 op_rflags; /* response */ int op_truncate; /* used during processing */ struct nfs4_stateowner *op_stateowner; /* used during processing */ - + struct nfs4_acl *op_acl; }; #define op_iattr u.iattr #define op_verf u.verf @@ -291,6 +294,7 @@ struct nfsd4_setattr { stateid_t sa_stateid; /* request */ u32 sa_bmval[2]; /* request */ struct iattr sa_iattr; /* request */ + struct nfs4_acl *sa_acl; }; struct nfsd4_setclientid { @@ -378,6 +382,7 @@ struct nfsd4_compoundargs { u32 * tmpp; struct tmpbuf { struct tmpbuf *next; + void (*release)(const void *); void *buf; } *to_free; @@ -449,6 +454,7 @@ extern int nfsd4_locku(struct svc_rqst * extern int nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner); +extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *); #endif /* --- linux-2.6.7/include/linux/nfs_fs.h.lsec 2004-06-15 23:19:13.000000000 -0600 +++ linux-2.6.7/include/linux/nfs_fs.h 2005-03-23 14:28:23.338501704 -0700 @@ -28,6 +28,7 @@ #include #include #include +#include #include /* @@ -75,15 +76,33 @@ #ifdef __KERNEL__ /* - * NFSv3 Access mode cache + * NFSv3/v4 Access mode cache entry */ -struct nfs_access_cache { +struct nfs_access_entry { unsigned long jiffies; struct rpc_cred * cred; int mask; - int err; }; +struct nfs4_state; +struct nfs_open_context { + atomic_t count; + struct dentry *dentry; + struct rpc_cred *cred; + struct nfs4_state *state; + unsigned int pid; + int mode; + int error; + + struct list_head list; + wait_queue_head_t waitq; +}; + +/* + * NFSv4 delegation + */ +struct nfs_delegation; + /* * nfs fs inode data in memory */ @@ -137,7 +156,7 @@ struct nfs_inode { */ atomic_t data_updates; - struct nfs_access_cache cache_access; + struct nfs_access_entry cache_access; /* * This is the cookie verifier used for NFSv3 readdir @@ -156,16 +175,20 @@ struct nfs_inode { ncommit, npages; - /* Credentials for shared mmap */ - struct rpc_cred *mm_cred; + /* Open contexts for shared mmap writes */ + struct list_head open_files; wait_queue_head_t nfs_i_wait; #ifdef CONFIG_NFS_V4 /* NFSv4 state */ struct list_head open_states; + struct nfs_delegation *delegation; + int delegation_state; + struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ - + void *acl; + ssize_t acl_len; struct inode vfs_inode; }; @@ -259,6 +282,18 @@ static inline int nfs_verify_change_attr && chattr == NFS_I(inode)->cache_change_attribute; } +/** + * nfs_compare_fh - compare two filehandles for equality + * @fh1 - pointer to first filehandle + * @fh2 - pointer to second filehandle + */ +static inline int nfs_compare_fh(const struct nfs_fh *fh1, const struct nfs_fh *fh2) +{ + if (fh1->size == fh2->size) + return memcmp(fh1->data, fh2->data, fh1->size); + return (fh1->size > fh2->size) ? 1 : -1; +} + /* * linux/fs/nfs/inode.c */ @@ -268,9 +303,12 @@ extern struct inode *nfs_fhget(struct su extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); -extern void nfs_set_mmcred(struct inode *, struct rpc_cred *); +extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); +extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); +extern int nfs_attribute_timeout(struct inode *inode); +extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_begin_attr_update(struct inode *); @@ -278,6 +316,12 @@ extern void nfs_end_attr_update(struct i extern void nfs_begin_data_update(struct inode *); extern void nfs_end_data_update(struct inode *); extern void nfs_end_data_update_defer(struct inode *); +extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred); +extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); +extern void put_nfs_open_context(struct nfs_open_context *ctx); +extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); +extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode); +extern void nfs_file_clear_open_context(struct file *filp); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ @@ -289,16 +333,15 @@ extern struct inode_operations nfs_file_ extern struct file_operations nfs_file_operations; extern struct address_space_operations nfs_file_aops; -static __inline__ struct rpc_cred * -nfs_file_cred(struct file *file) +static inline struct rpc_cred *nfs_file_cred(struct file *file) { - struct rpc_cred *cred = NULL; - if (file) - cred = (struct rpc_cred *)file->private_data; -#ifdef RPC_DEBUG - BUG_ON(cred && cred->cr_magic != RPCAUTH_CRED_MAGIC); -#endif - return cred; + if (file != NULL) { + struct nfs_open_context *ctx; + + ctx = (struct nfs_open_context*)file->private_data; + return ctx->cred; + } + return NULL; } /* @@ -418,28 +461,6 @@ extern int nfsroot_mount(struct sockadd * inline functions */ -static inline int nfs_attribute_timeout(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); -} - -/** - * nfs_revalidate_inode - Revalidate the inode attributes - * @server - pointer to nfs_server struct - * @inode - pointer to inode struct - * - * Updates inode attribute information by retrieving the data from the server. - */ -static inline int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) -{ - if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) - && !nfs_attribute_timeout(inode)) - return NFS_STALE(inode) ? -ESTALE : 0; - return __nfs_revalidate_inode(server, inode); -} - static inline loff_t nfs_size_to_loff_t(__u64 size) { @@ -507,8 +528,6 @@ struct idmap; enum nfs4_client_state { NFS4CLNT_OK = 0, - NFS4CLNT_NEW, - NFS4CLNT_SETUP_STATE, }; /* @@ -520,7 +539,6 @@ struct nfs4_client { u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; unsigned long cl_state; - long cl_generation; u32 cl_lockowner_id; @@ -530,6 +548,7 @@ struct nfs4_client { */ struct rw_semaphore cl_sem; + struct list_head cl_delegations; struct list_head cl_state_owners; struct list_head cl_unused; int cl_nunused; @@ -573,12 +592,11 @@ struct nfs4_state_owner { u32 so_id; /* 32-bit identifier, unique */ struct semaphore so_sema; u32 so_seqid; /* protected by so_sema */ - unsigned int so_flags; /* protected by so_sema */ atomic_t so_count; - long so_generation; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; + struct list_head so_delegations; }; /* @@ -593,10 +611,13 @@ struct nfs4_state_owner { * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) */ +/* bits for nfs4_lock_state->flags */ + struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ - fl_owner_t ls_owner; /* POSIX lock owner */ - struct nfs4_state * ls_parent; /* Parent nfs4_state */ + unsigned int ls_pid; /* pid of owner process */ +#define NFS_LOCK_INITIALIZED 1 + int flags; u32 ls_seqid; u32 ls_id; nfs4_stateid ls_stateid; @@ -606,6 +627,7 @@ struct nfs4_lock_state { /* bits for nfs4_state->flags */ enum { LK_STATE_IN_USE, + NFS_DELEGATED_STATE, }; struct nfs4_state { @@ -629,8 +651,19 @@ struct nfs4_state { }; +struct nfs4_exception { + long timeout; + int retry; +}; + extern struct dentry_operations nfs4_dentry_operations; extern struct inode_operations nfs4_dir_inode_operations; +extern struct inode_operations nfs4_file_inode_operations; + +/* inode.c */ +extern ssize_t nfs_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs_listxattr(struct dentry *, char *, size_t); /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); @@ -639,10 +672,15 @@ extern int nfs4_open_reclaim(struct nfs4 extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); -int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode); extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); +extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); +extern int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request); +extern ssize_t nfs4_proc_get_acl(struct inode *, void *buf, ssize_t buflen); +extern int nfs4_proc_set_acl(struct inode *, const void *buf, ssize_t buflen); +extern void nfs4_zap_acl_attr(struct inode *inode); /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); @@ -654,6 +692,8 @@ extern void init_nfsv4_state(struct nfs_ extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); @@ -663,15 +703,14 @@ extern void nfs4_put_open_state(struct n extern void nfs4_close_state(struct nfs4_state *, mode_t); extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); -extern int nfs4_handle_error(struct nfs_server *, int); extern void nfs4_schedule_state_recovery(struct nfs4_client *); -extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); -extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid); +extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, unsigned int pid); extern void nfs4_put_lock_state(struct nfs4_lock_state *state); extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); -extern void nfs4_notify_setlk(struct inode *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_notify_unlck(struct inode *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); +extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, unsigned int pid); @@ -681,6 +720,7 @@ struct nfs4_mount_data; #define destroy_nfsv4_state(server) do { } while (0) #define nfs4_put_state_owner(inode, owner) do { } while (0) #define nfs4_put_open_state(state) do { } while (0) +#define nfs4_close_state(a, b) do { } while (0) #define nfs4_renewd_prepare_shutdown(server) do { } while (0) #endif @@ -697,6 +737,7 @@ struct nfs4_mount_data; #define NFSDBG_XDR 0x0020 #define NFSDBG_FILE 0x0040 #define NFSDBG_ROOT 0x0080 +#define NFSDBG_CALLBACK 0x0100 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ --- linux-2.6.7/include/linux/nfs4_acl.h.lsec 2005-03-23 14:28:24.519322192 -0700 +++ linux-2.6.7/include/linux/nfs4_acl.h 2005-03-23 14:28:24.518322344 -0700 @@ -0,0 +1,59 @@ +/* + * include/linux/nfs4_acl.c + * + * Common NFSv4 ACL handling definitions. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFS4_ACL_H +#define LINUX_NFS4_ACL_H + +#include + +struct nfs4_acl *nfs4_acl_new(void); +void nfs4_acl_free(struct nfs4_acl *); +int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); +int nfs4_acl_get_whotype(char *, u32); +int nfs4_acl_write_who(int who, char *p); +int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, + uid_t who, u32 mask); + +#define NFS4_ACL_TYPE_DEFAULT 0x01 +#define NFS4_ACL_DIR 0x02 +#define NFS4_ACL_OWNER 0x04 + +struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *, + struct posix_acl *, unsigned int flags); +int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **, + struct posix_acl **, unsigned int flags); + +#endif /* LINUX_NFS4_ACL_H */ --- linux-2.6.7/include/linux/nfs_xdr.h.lsec 2004-06-15 23:19:52.000000000 -0600 +++ linux-2.6.7/include/linux/nfs_xdr.h 2005-03-23 14:28:23.539471152 -0700 @@ -99,20 +99,21 @@ struct nfs4_change_info { * Arguments to the open call. */ struct nfs_openargs { - struct nfs_fh * fh; + const struct nfs_fh * fh; __u32 seqid; - __u32 share_access; + int open_flags; __u64 clientid; __u32 id; - __u32 opentype; - __u32 createmode; union { struct iattr * attrs; /* UNCHECKED, GUARDED */ nfs4_verifier verifier; /* EXCLUSIVE */ + nfs4_stateid delegation; /* CLAIM_DELEGATE_CUR */ + int delegation_type; /* CLAIM_PREVIOUS */ } u; const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; + __u32 claim; }; struct nfs_openres { @@ -122,13 +123,17 @@ struct nfs_openres { __u32 rflags; struct nfs_fattr * f_attr; const struct nfs_server *server; + int delegation_type; + nfs4_stateid delegation; + __u32 do_recall; + __u64 maxsize; }; /* * Arguments to the open_confirm call. */ struct nfs_open_confirmargs { - struct nfs_fh * fh; + const struct nfs_fh * fh; nfs4_stateid stateid; __u32 seqid; }; @@ -138,26 +143,13 @@ struct nfs_open_confirmres { }; /* - * Arguments to the open_reclaim call. - */ -struct nfs_open_reclaimargs { - struct nfs_fh * fh; - __u64 clientid; - __u32 seqid; - __u32 id; - __u32 share_access; - __u32 claim; - const __u32 * bitmask; -}; - -/* * Arguments to the close call. */ struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid stateid; __u32 seqid; - __u32 share_access; + int open_flags; }; struct nfs_closeres { @@ -224,6 +216,11 @@ struct nfs_lockres { const struct nfs_server * server; }; +struct nfs4_delegreturnargs { + const struct nfs_fh *fhandle; + const nfs4_stateid *stateid; +}; + /* * Arguments to the read call. */ @@ -235,8 +232,7 @@ struct nfs_lockres { struct nfs_readargs { struct nfs_fh * fh; - fl_owner_t lockowner; - struct nfs4_state * state; + struct nfs_open_context *context; __u64 offset; __u32 count; unsigned int pgbase; @@ -259,8 +255,7 @@ struct nfs_readres { struct nfs_writeargs { struct nfs_fh * fh; - fl_owner_t lockowner; - struct nfs4_state * state; + struct nfs_open_context *context; __u64 offset; __u32 count; enum nfs3_stable_how stable; @@ -331,6 +326,19 @@ struct nfs_setattrargs { const u32 * bitmask; }; +struct nfs_setaclargs { + struct nfs_fh * fh; + const char * acl; + ssize_t acl_len; + const struct nfs_server * server; /* Needed for name mapping */ +}; + +struct nfs_getaclres { + char * acl; + ssize_t acl_len; + const struct nfs_server * server; /* Needed for name mapping */ +}; + struct nfs_setattrres { struct nfs_fattr * fattr; const struct nfs_server * server; @@ -597,13 +605,15 @@ struct nfs4_rename_res { }; struct nfs4_setclientid { - nfs4_verifier sc_verifier; /* request */ - char * sc_name; /* request */ + const nfs4_verifier * sc_verifier; /* request */ + unsigned int sc_name_len; + char sc_name[32]; /* request */ u32 sc_prog; /* request */ + unsigned int sc_netid_len; char sc_netid[4]; /* request */ + unsigned int sc_uaddr_len; char sc_uaddr[24]; /* request */ u32 sc_cb_ident; /* request */ - struct nfs4_client * sc_state; /* response */ }; struct nfs4_statfs_arg { @@ -657,6 +667,8 @@ struct nfs_write_data { void (*complete) (struct nfs_write_data *, int); }; +struct nfs_access_entry; + /* * RPC procedure vector for NFSv2/NFSv3 demuxing */ @@ -664,6 +676,7 @@ struct nfs_rpc_ops { int version; /* Protocol version */ struct dentry_operations *dentry_ops; struct inode_operations *dir_inode_ops; + struct inode_operations *file_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); @@ -672,11 +685,11 @@ struct nfs_rpc_ops { struct iattr *); int (*lookup) (struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); - int (*access) (struct inode *, struct rpc_cred *, int); + int (*access) (struct inode *, struct nfs_access_entry *); int (*readlink)(struct inode *, struct page *); - int (*read) (struct nfs_read_data *, struct file *); - int (*write) (struct nfs_write_data *, struct file *); - int (*commit) (struct nfs_write_data *, struct file *); + int (*read) (struct nfs_read_data *); + int (*write) (struct nfs_write_data *); + int (*commit) (struct nfs_write_data *); struct inode * (*create) (struct inode *, struct qstr *, struct iattr *, int); int (*remove) (struct inode *, struct qstr *); @@ -708,8 +721,6 @@ struct nfs_rpc_ops { void (*commit_setup) (struct nfs_write_data *, int how); int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); - void (*request_init)(struct nfs_page *, struct file *); - int (*request_compatible)(struct nfs_page *, struct file *, struct page *); int (*lock)(struct file *, int, struct file_lock *); }; --- linux-2.6.7/arch/s390/defconfig.lsec 2004-06-15 23:19:52.000000000 -0600 +++ linux-2.6.7/arch/s390/defconfig 2005-03-23 14:28:23.869420992 -0700 @@ -422,7 +422,7 @@ CONFIG_NFS_V3=y CONFIG_NFSD=y CONFIG_NFSD_V3=y # CONFIG_NFSD_V4 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y --- linux-2.6.7/arch/ia64/defconfig.lsec 2004-06-15 23:18:57.000000000 -0600 +++ linux-2.6.7/arch/ia64/defconfig 2005-03-23 14:28:23.816429048 -0700 @@ -987,7 +987,7 @@ CONFIG_NFS_DIRECTIO=y CONFIG_NFSD=y CONFIG_NFSD_V3=y # CONFIG_NFSD_V4 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y --- linux-2.6.7/arch/ppc/defconfig.lsec 2004-06-15 23:19:52.000000000 -0600 +++ linux-2.6.7/arch/ppc/defconfig 2005-03-23 14:28:23.817428896 -0700 @@ -1230,7 +1230,7 @@ CONFIG_NFS_V3=y CONFIG_NFSD=y CONFIG_NFSD_V3=y # CONFIG_NFSD_V4 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=y --- linux-2.6.7/arch/i386/defconfig.lsec 2004-06-15 23:19:42.000000000 -0600 +++ linux-2.6.7/arch/i386/defconfig 2005-03-23 14:28:23.763437104 -0700 @@ -1148,7 +1148,7 @@ CONFIG_NFS_FS=y # CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=y # CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_LOCKD=y CONFIG_EXPORTFS=y CONFIG_SUNRPC=y --- linux-2.6.7/arch/alpha/defconfig.lsec 2004-06-15 23:19:23.000000000 -0600 +++ linux-2.6.7/arch/alpha/defconfig 2005-03-23 14:28:23.762437256 -0700 @@ -791,7 +791,7 @@ CONFIG_NFS_V3=y CONFIG_NFSD=m CONFIG_NFSD_V3=y # CONFIG_NFSD_V4 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_LOCKD=m CONFIG_LOCKD_V4=y CONFIG_EXPORTFS=m --- linux-2.6.7/net/sunrpc/svcauth_unix.c.lsec 2004-06-15 23:19:37.000000000 -0600 +++ linux-2.6.7/net/sunrpc/svcauth_unix.c 2005-03-23 14:28:24.295356240 -0700 @@ -55,12 +55,10 @@ struct auth_domain *unix_domain_find(cha if (new == NULL) return NULL; cache_init(&new->h.h); - atomic_inc(&new->h.h.refcnt); new->h.name = strdup(name); new->h.flavour = RPC_AUTH_UNIX; new->addr_changes = 0; new->h.h.expiry_time = NEVER; - new->h.h.flags = 0; rv = auth_domain_lookup(&new->h, 2); if (rv == &new->h) { @@ -262,7 +260,7 @@ struct cache_detail ip_map_cache = { .cache_show = ip_map_show, }; -static DefineSimpleCacheLookup(ip_map, 0) +static DefineSimpleCacheLookup(ip_map) int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) @@ -318,7 +316,8 @@ struct auth_domain *auth_unix_lookup(str return NULL; if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { - set_bit(CACHE_NEGATIVE, &ipm->h.flags); + if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0) + auth_domain_put(&ipm->m_client->h); rv = NULL; } else { rv = &ipm->m_client->h; @@ -405,6 +404,9 @@ svcauth_null_release(struct svc_rqst *rq if (rqstp->rq_client) auth_domain_put(rqstp->rq_client); rqstp->rq_client = NULL; + if (rqstp->rq_cred.cr_group_info) + put_group_info(rqstp->rq_cred.cr_group_info); + rqstp->rq_cred.cr_group_info = NULL; return 0; /* don't drop */ } --- linux-2.6.7/net/sunrpc/xprt.c.lsec 2004-06-15 23:19:42.000000000 -0600 +++ linux-2.6.7/net/sunrpc/xprt.c 2005-03-23 14:28:23.706445768 -0700 @@ -1099,7 +1099,7 @@ xprt_write_space(struct sock *sk) goto out; spin_lock_bh(&xprt->sock_lock); - if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) + if (xprt->snd_task) rpc_wake_up_task(xprt->snd_task); spin_unlock_bh(&xprt->sock_lock); out: @@ -1357,6 +1357,7 @@ xprt_request_init(struct rpc_task *task, req->rq_task = task; req->rq_xprt = xprt; req->rq_xid = xprt_alloc_xid(xprt); + req->rq_release_snd_buf = NULL; dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, req->rq_xid); } @@ -1382,6 +1383,8 @@ xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); spin_unlock_bh(&xprt->sock_lock); task->tk_rqstp = NULL; + if (req->rq_release_snd_buf) + req->rq_release_snd_buf(req); memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %4d release request %p\n", task->tk_pid, req); --- linux-2.6.7/net/sunrpc/sched.c.lsec 2004-06-15 23:19:35.000000000 -0600 +++ linux-2.6.7/net/sunrpc/sched.c 2005-03-23 14:28:23.651454128 -0700 @@ -41,13 +41,7 @@ static mempool_t *rpc_buffer_mempool; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); - -/* - * When an asynchronous RPC task is activated within a bottom half - * handler, or while executing another RPC task, it is put on - * schedq, and rpciod is woken up. - */ -static RPC_WAITQ(schedq, "schedq"); +static void rpc_async_schedule(void *); /* * RPC tasks that create another task (e.g. for contacting the portmapper) @@ -68,26 +62,18 @@ static LIST_HEAD(all_tasks); /* * rpciod-related stuff */ -static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle); -static DECLARE_COMPLETION(rpciod_killer); static DECLARE_MUTEX(rpciod_sema); static unsigned int rpciod_users; -static pid_t rpciod_pid; -static int rpc_inhibit; +static struct workqueue_struct *rpciod_workqueue; /* - * Spinlock for wait queues. Access to the latter also has to be - * interrupt-safe in order to allow timers to wake up sleeping tasks. - */ -static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED; -/* * Spinlock for other critical sections of code. */ static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED; /* * Disable the timer for a given RPC task. Should be called with - * rpc_queue_lock and bh_disabled in order to avoid races within + * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). */ static inline void @@ -105,16 +91,13 @@ __rpc_disable_timer(struct rpc_task *tas * without calling del_timer_sync(). The latter could cause a * deadlock if called while we're holding spinlocks... */ -static void -rpc_run_timer(struct rpc_task *task) +static void rpc_run_timer(struct rpc_task *task) { void (*callback)(struct rpc_task *); - spin_lock_bh(&rpc_queue_lock); callback = task->tk_timeout_fn; task->tk_timeout_fn = NULL; - spin_unlock_bh(&rpc_queue_lock); - if (callback) { + if (callback && RPC_IS_QUEUED(task)) { dprintk("RPC: %4d running timer\n", task->tk_pid); callback(task); } @@ -140,19 +123,8 @@ __rpc_add_timer(struct rpc_task *task, r } /* - * Set up a timer for an already sleeping task. - */ -void rpc_add_timer(struct rpc_task *task, rpc_action timer) -{ - spin_lock_bh(&rpc_queue_lock); - if (!RPC_IS_RUNNING(task)) - __rpc_add_timer(task, timer); - spin_unlock_bh(&rpc_queue_lock); -} - -/* * Delete any timer for the current task. Because we use del_timer_sync(), - * this function should never be called while holding rpc_queue_lock. + * this function should never be called while holding queue->lock. */ static inline void rpc_delete_timer(struct rpc_task *task) @@ -169,16 +141,17 @@ static void __rpc_add_wait_queue_priorit struct list_head *q; struct rpc_task *t; + INIT_LIST_HEAD(&task->u.tk_wait.links); q = &queue->tasks[task->tk_priority]; if (unlikely(task->tk_priority > queue->maxpriority)) q = &queue->tasks[queue->maxpriority]; - list_for_each_entry(t, q, tk_list) { + list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_cookie == task->tk_cookie) { - list_add_tail(&task->tk_list, &t->tk_links); + list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); return; } } - list_add_tail(&task->tk_list, q); + list_add_tail(&task->u.tk_wait.list, q); } /* @@ -189,37 +162,21 @@ static void __rpc_add_wait_queue_priorit * improve overall performance. * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ -static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (task->tk_rpcwait == queue) - return 0; + BUG_ON (RPC_IS_QUEUED(task)); - if (task->tk_rpcwait) { - printk(KERN_WARNING "RPC: doubly enqueued task!\n"); - return -EWOULDBLOCK; - } if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task); else if (RPC_IS_SWAPPER(task)) - list_add(&task->tk_list, &queue->tasks[0]); + list_add(&task->u.tk_wait.list, &queue->tasks[0]); else - list_add_tail(&task->tk_list, &queue->tasks[0]); - task->tk_rpcwait = queue; + list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); + task->u.tk_wait.rpc_waitq = queue; + rpc_set_queued(task); dprintk("RPC: %4d added to queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); - - return 0; -} - -int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task) -{ - int result; - - spin_lock_bh(&rpc_queue_lock); - result = __rpc_add_wait_queue(q, task); - spin_unlock_bh(&rpc_queue_lock); - return result; } /* @@ -229,12 +186,12 @@ static void __rpc_remove_wait_queue_prio { struct rpc_task *t; - if (!list_empty(&task->tk_links)) { - t = list_entry(task->tk_links.next, struct rpc_task, tk_list); - list_move(&t->tk_list, &task->tk_list); - list_splice_init(&task->tk_links, &t->tk_links); + if (!list_empty(&task->u.tk_wait.links)) { + t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); + list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); + list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); } - list_del(&task->tk_list); + list_del(&task->u.tk_wait.list); } /* @@ -243,31 +200,17 @@ static void __rpc_remove_wait_queue_prio */ static void __rpc_remove_wait_queue(struct rpc_task *task) { - struct rpc_wait_queue *queue = task->tk_rpcwait; - - if (!queue) - return; + struct rpc_wait_queue *queue; + queue = task->u.tk_wait.rpc_waitq; if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); else - list_del(&task->tk_list); - task->tk_rpcwait = NULL; - + list_del(&task->u.tk_wait.list); dprintk("RPC: %4d removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); } -void -rpc_remove_wait_queue(struct rpc_task *task) -{ - if (!task->tk_rpcwait) - return; - spin_lock_bh(&rpc_queue_lock); - __rpc_remove_wait_queue(task); - spin_unlock_bh(&rpc_queue_lock); -} - static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { queue->priority = priority; @@ -290,6 +233,7 @@ static void __rpc_init_priority_wait_que { int i; + spin_lock_init(&queue->lock); for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) INIT_LIST_HEAD(&queue->tasks[i]); queue->maxpriority = maxprio; @@ -316,34 +260,27 @@ EXPORT_SYMBOL(rpc_init_wait_queue); * Note: If the task is ASYNC, this must be called with * the spinlock held to protect the wait queue operation. */ -static inline void -rpc_make_runnable(struct rpc_task *task) +static void rpc_make_runnable(struct rpc_task *task) { - if (task->tk_timeout_fn) { - printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n"); + if (rpc_test_and_set_running(task)) return; - } - rpc_set_running(task); + BUG_ON(task->tk_timeout_fn); if (RPC_IS_ASYNC(task)) { - if (RPC_IS_SLEEPING(task)) { - int status; - status = __rpc_add_wait_queue(&schedq, task); - if (status < 0) { - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); - task->tk_status = status; - return; - } - rpc_clear_sleeping(task); - wake_up(&rpciod_idle); + int status; + + INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task); + status = queue_work(task->tk_workqueue, &task->u.tk_work); + if (status < 0) { + printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); + task->tk_status = status; + return; } - } else { - rpc_clear_sleeping(task); - wake_up(&task->tk_wait); - } + } else + wake_up(&task->u.tk_wait.waitq); } /* - * Place a newly initialized task on the schedq. + * Place a newly initialized task on the workqueue. */ static inline void rpc_schedule_run(struct rpc_task *task) @@ -352,33 +289,18 @@ rpc_schedule_run(struct rpc_task *task) if (RPC_IS_ACTIVATED(task)) return; task->tk_active = 1; - rpc_set_sleeping(task); rpc_make_runnable(task); } /* - * For other people who may need to wake the I/O daemon - * but should (for now) know nothing about its innards - */ -void rpciod_wake_up(void) -{ - if(rpciod_pid==0) - printk(KERN_ERR "rpciod: wot no daemon?\n"); - wake_up(&rpciod_idle); -} - -/* * Prepare for sleeping on a wait queue. * By always appending tasks to the list we ensure FIFO behavior. * NB: An RPC task will only receive interrupt-driven events as long * as it's on a wait queue. */ -static void -__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, +static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { - int status; - dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid, rpc_qname(q), jiffies); @@ -388,49 +310,36 @@ __rpc_sleep_on(struct rpc_wait_queue *q, } /* Mark the task as being activated if so needed */ - if (!RPC_IS_ACTIVATED(task)) { + if (!RPC_IS_ACTIVATED(task)) task->tk_active = 1; - rpc_set_sleeping(task); - } - status = __rpc_add_wait_queue(q, task); - if (status) { - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); - task->tk_status = status; - } else { - rpc_clear_running(task); - if (task->tk_callback) { - dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid); - BUG(); - } - task->tk_callback = action; - __rpc_add_timer(task, timer); - } + __rpc_add_wait_queue(q, task); + + BUG_ON(task->tk_callback != NULL); + task->tk_callback = action; + __rpc_add_timer(task, timer); } -void -rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, +void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { /* * Protect the queue operations. */ - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&q->lock); __rpc_sleep_on(q, task, action, timer); - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&q->lock); } /** - * __rpc_wake_up_task - wake up a single rpc_task + * __rpc_do_wake_up_task - wake up a single rpc_task * @task: task to be woken up * - * Caller must hold rpc_queue_lock + * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void -__rpc_wake_up_task(struct rpc_task *task) +static void __rpc_do_wake_up_task(struct rpc_task *task) { - dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n", - task->tk_pid, jiffies, rpc_inhibit); + dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies); #ifdef RPC_DEBUG if (task->tk_magic != 0xf00baa) { @@ -445,12 +354,9 @@ __rpc_wake_up_task(struct rpc_task *task printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); return; } - if (RPC_IS_RUNNING(task)) - return; __rpc_disable_timer(task); - if (task->tk_rpcwait != &schedq) - __rpc_remove_wait_queue(task); + __rpc_remove_wait_queue(task); rpc_make_runnable(task); @@ -458,6 +364,15 @@ __rpc_wake_up_task(struct rpc_task *task } /* + * Wake up the specified task + */ +static void __rpc_wake_up_task(struct rpc_task *task) +{ + if (rpc_test_and_clear_queued(task)) + __rpc_do_wake_up_task(task); +} + +/* * Default timeout handler if none specified by user */ static void @@ -471,14 +386,15 @@ __rpc_default_timer(struct rpc_task *tas /* * Wake up the specified task */ -void -rpc_wake_up_task(struct rpc_task *task) +void rpc_wake_up_task(struct rpc_task *task) { - if (RPC_IS_RUNNING(task)) - return; - spin_lock_bh(&rpc_queue_lock); - __rpc_wake_up_task(task); - spin_unlock_bh(&rpc_queue_lock); + if (rpc_test_and_clear_queued(task)) { + struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; + + spin_lock_bh(&queue->lock); + __rpc_do_wake_up_task(task); + spin_unlock_bh(&queue->lock); + } } /* @@ -494,11 +410,11 @@ static struct rpc_task * __rpc_wake_up_n */ q = &queue->tasks[queue->priority]; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, tk_list); + task = list_entry(q->next, struct rpc_task, u.tk_wait.list); if (queue->cookie == task->tk_cookie) { if (--queue->nr) goto out; - list_move_tail(&task->tk_list, q); + list_move_tail(&task->u.tk_wait.list, q); } /* * Check if we need to switch queues. @@ -516,7 +432,7 @@ static struct rpc_task * __rpc_wake_up_n else q = q - 1; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, tk_list); + task = list_entry(q->next, struct rpc_task, u.tk_wait.list); goto new_queue; } } while (q != &queue->tasks[queue->priority]); @@ -541,14 +457,14 @@ struct rpc_task * rpc_wake_up_next(struc struct rpc_task *task = NULL; dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) __rpc_wake_up_task(task); } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); return task; } @@ -557,25 +473,25 @@ struct rpc_task * rpc_wake_up_next(struc * rpc_wake_up - wake up all rpc_tasks * @queue: rpc_wait_queue on which the tasks are sleeping * - * Grabs rpc_queue_lock + * Grabs queue->lock */ void rpc_wake_up(struct rpc_wait_queue *queue) { struct rpc_task *task; struct list_head *head; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { while (!list_empty(head)) { - task = list_entry(head->next, struct rpc_task, tk_list); + task = list_entry(head->next, struct rpc_task, u.tk_wait.list); __rpc_wake_up_task(task); } if (head == &queue->tasks[0]) break; head--; } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); } /** @@ -583,18 +499,18 @@ void rpc_wake_up(struct rpc_wait_queue * * @queue: rpc_wait_queue on which the tasks are sleeping * @status: status value to set * - * Grabs rpc_queue_lock + * Grabs queue->lock */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { struct list_head *head; struct rpc_task *task; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { while (!list_empty(head)) { - task = list_entry(head->next, struct rpc_task, tk_list); + task = list_entry(head->next, struct rpc_task, u.tk_wait.list); task->tk_status = status; __rpc_wake_up_task(task); } @@ -602,7 +518,7 @@ void rpc_wake_up_status(struct rpc_wait_ break; head--; } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); } /* @@ -626,18 +542,14 @@ __rpc_atrun(struct rpc_task *task) /* * This is the RPC `scheduler' (or rather, the finite state machine). */ -static int -__rpc_execute(struct rpc_task *task) +static int __rpc_execute(struct rpc_task *task) { int status = 0; dprintk("RPC: %4d rpc_execute flgs %x\n", task->tk_pid, task->tk_flags); - if (!RPC_IS_RUNNING(task)) { - printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n"); - return 0; - } + BUG_ON(RPC_IS_QUEUED(task)); restarted: while (1) { @@ -657,7 +569,9 @@ __rpc_execute(struct rpc_task *task) */ save_callback=task->tk_callback; task->tk_callback=NULL; + lock_kernel(); save_callback(task); + unlock_kernel(); } /* @@ -665,43 +579,41 @@ __rpc_execute(struct rpc_task *task) * tk_action may be NULL when the task has been killed * by someone else. */ - if (RPC_IS_RUNNING(task)) { + if (!RPC_IS_QUEUED(task)) { /* * Garbage collection of pending timers... */ rpc_delete_timer(task); if (!task->tk_action) break; + lock_kernel(); task->tk_action(task); - /* micro-optimization to avoid spinlock */ - if (RPC_IS_RUNNING(task)) - continue; + unlock_kernel(); } /* - * Check whether task is sleeping. + * Lockless check for whether task is sleeping or not. */ - spin_lock_bh(&rpc_queue_lock); - if (!RPC_IS_RUNNING(task)) { - rpc_set_sleeping(task); - if (RPC_IS_ASYNC(task)) { - spin_unlock_bh(&rpc_queue_lock); + if (!RPC_IS_QUEUED(task)) + continue; + if (RPC_IS_ASYNC(task)) { + rpc_clear_running(task); + /* Careful! we may have raced... */ + if (RPC_IS_QUEUED(task)) return 0; - } + if (rpc_test_and_set_running(task)) + return 0; + continue; } - spin_unlock_bh(&rpc_queue_lock); - if (!RPC_IS_SLEEPING(task)) - continue; + init_waitqueue_head(&task->u.tk_wait.waitq); + rpc_clear_running(task); /* sync task: sleep here */ dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); - if (current->pid == rpciod_pid) - printk(KERN_ERR "RPC: rpciod waiting on sync task!\n"); - if (!task->tk_client->cl_intr) { - __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task)); + __wait_event(task->u.tk_wait.waitq, RPC_IS_RUNNING(task)); } else { - __wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status); + __wait_event_interruptible(task->u.tk_wait.waitq, RPC_IS_RUNNING(task), status); /* * When a sync task receives a signal, it exits with * -ERESTARTSYS. In order to catch any callbacks that @@ -719,7 +631,9 @@ __rpc_execute(struct rpc_task *task) } if (task->tk_exit) { + lock_kernel(); task->tk_exit(task); + unlock_kernel(); /* If tk_action is non-null, the user wants us to restart */ if (task->tk_action) { if (!RPC_ASSASSINATED(task)) { @@ -738,7 +652,6 @@ __rpc_execute(struct rpc_task *task) /* Release all resources associated with the task */ rpc_release_task(task); - return status; } @@ -754,57 +667,16 @@ __rpc_execute(struct rpc_task *task) int rpc_execute(struct rpc_task *task) { - int status = -EIO; - if (rpc_inhibit) { - printk(KERN_INFO "RPC: execution inhibited!\n"); - goto out_release; - } - - status = -EWOULDBLOCK; - if (task->tk_active) { - printk(KERN_ERR "RPC: active task was run twice!\n"); - goto out_err; - } + BUG_ON(task->tk_active); task->tk_active = 1; rpc_set_running(task); return __rpc_execute(task); - out_release: - rpc_release_task(task); - out_err: - return status; } -/* - * This is our own little scheduler for async RPC tasks. - */ -static void -__rpc_schedule(void) +static void rpc_async_schedule(void *arg) { - struct rpc_task *task; - int count = 0; - - dprintk("RPC: rpc_schedule enter\n"); - while (1) { - - task_for_first(task, &schedq.tasks[0]) { - __rpc_remove_wait_queue(task); - spin_unlock_bh(&rpc_queue_lock); - - __rpc_execute(task); - spin_lock_bh(&rpc_queue_lock); - } else { - break; - } - - if (++count >= 200 || need_resched()) { - count = 0; - spin_unlock_bh(&rpc_queue_lock); - schedule(); - spin_lock_bh(&rpc_queue_lock); - } - } - dprintk("RPC: rpc_schedule leave\n"); + __rpc_execute((struct rpc_task *)arg); } /* @@ -862,7 +734,6 @@ void rpc_init_task(struct rpc_task *task task->tk_client = clnt; task->tk_flags = flags; task->tk_exit = callback; - init_waitqueue_head(&task->tk_wait); if (current->uid != current->fsuid || current->gid != current->fsgid) task->tk_flags |= RPC_TASK_SETUID; @@ -873,7 +744,9 @@ void rpc_init_task(struct rpc_task *task task->tk_priority = RPC_PRIORITY_NORMAL; task->tk_cookie = (unsigned long)current; - INIT_LIST_HEAD(&task->tk_links); + + /* Initialize workqueue for async tasks */ + task->tk_workqueue = rpciod_workqueue; /* Add to global list of all tasks */ spin_lock(&rpc_sched_lock); @@ -942,8 +815,7 @@ cleanup: goto out; } -void -rpc_release_task(struct rpc_task *task) +void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %4d release task\n", task->tk_pid); @@ -961,19 +833,9 @@ rpc_release_task(struct rpc_task *task) list_del(&task->tk_task); spin_unlock(&rpc_sched_lock); - /* Protect the execution below. */ - spin_lock_bh(&rpc_queue_lock); - - /* Disable timer to prevent zombie wakeup */ - __rpc_disable_timer(task); - - /* Remove from any wait queue we're still on */ - __rpc_remove_wait_queue(task); - + BUG_ON (rpc_test_and_clear_queued(task)); task->tk_active = 0; - spin_unlock_bh(&rpc_queue_lock); - /* Synchronously delete any running timer */ rpc_delete_timer(task); @@ -1003,10 +865,9 @@ rpc_release_task(struct rpc_task *task) * queue 'childq'. If so returns a pointer to the parent. * Upon failure returns NULL. * - * Caller must hold rpc_queue_lock + * Caller must hold childq.lock */ -static inline struct rpc_task * -rpc_find_parent(struct rpc_task *child) +static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) { struct rpc_task *task, *parent; struct list_head *le; @@ -1019,17 +880,16 @@ rpc_find_parent(struct rpc_task *child) return NULL; } -static void -rpc_child_exit(struct rpc_task *child) +static void rpc_child_exit(struct rpc_task *child) { struct rpc_task *parent; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&childq.lock); if ((parent = rpc_find_parent(child)) != NULL) { parent->tk_status = child->tk_status; __rpc_wake_up_task(parent); } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&childq.lock); } /* @@ -1052,22 +912,20 @@ fail: return NULL; } -void -rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) +void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) { - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&childq.lock); /* N.B. Is it possible for the child to have already finished? */ __rpc_sleep_on(&childq, task, func, NULL); rpc_schedule_run(child); - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&childq.lock); } /* * Kill all tasks for the given client. * XXX: kill their descendants as well? */ -void -rpc_killall_tasks(struct rpc_clnt *clnt) +void rpc_killall_tasks(struct rpc_clnt *clnt) { struct rpc_task *rovr; struct list_head *le; @@ -1089,93 +947,14 @@ rpc_killall_tasks(struct rpc_clnt *clnt) static DECLARE_MUTEX_LOCKED(rpciod_running); -static inline int -rpciod_task_pending(void) -{ - return !list_empty(&schedq.tasks[0]); -} - - -/* - * This is the rpciod kernel thread - */ -static int -rpciod(void *ptr) -{ - int rounds = 0; - - lock_kernel(); - /* - * Let our maker know we're running ... - */ - rpciod_pid = current->pid; - up(&rpciod_running); - - daemonize("rpciod"); - allow_signal(SIGKILL); - - dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid); - spin_lock_bh(&rpc_queue_lock); - while (rpciod_users) { - DEFINE_WAIT(wait); - if (signalled()) { - spin_unlock_bh(&rpc_queue_lock); - rpciod_killall(); - flush_signals(current); - spin_lock_bh(&rpc_queue_lock); - } - __rpc_schedule(); - if (current->flags & PF_FREEZE) { - spin_unlock_bh(&rpc_queue_lock); - refrigerator(PF_FREEZE); - spin_lock_bh(&rpc_queue_lock); - } - - if (++rounds >= 64) { /* safeguard */ - spin_unlock_bh(&rpc_queue_lock); - schedule(); - rounds = 0; - spin_lock_bh(&rpc_queue_lock); - } - - dprintk("RPC: rpciod back to sleep\n"); - prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE); - if (!rpciod_task_pending() && !signalled()) { - spin_unlock_bh(&rpc_queue_lock); - schedule(); - rounds = 0; - spin_lock_bh(&rpc_queue_lock); - } - finish_wait(&rpciod_idle, &wait); - dprintk("RPC: switch to rpciod\n"); - } - spin_unlock_bh(&rpc_queue_lock); - - dprintk("RPC: rpciod shutdown commences\n"); - if (!list_empty(&all_tasks)) { - printk(KERN_ERR "rpciod: active tasks at shutdown?!\n"); - rpciod_killall(); - } - - dprintk("RPC: rpciod exiting\n"); - unlock_kernel(); - - rpciod_pid = 0; - complete_and_exit(&rpciod_killer, 0); - return 0; -} - -static void -rpciod_killall(void) +static void rpciod_killall(void) { unsigned long flags; while (!list_empty(&all_tasks)) { clear_thread_flag(TIF_SIGPENDING); rpc_killall_tasks(NULL); - spin_lock_bh(&rpc_queue_lock); - __rpc_schedule(); - spin_unlock_bh(&rpc_queue_lock); + flush_workqueue(rpciod_workqueue); if (!list_empty(&all_tasks)) { dprintk("rpciod_killall: waiting for tasks to exit\n"); yield(); @@ -1193,28 +972,30 @@ rpciod_killall(void) int rpciod_up(void) { + struct workqueue_struct *wq; int error = 0; down(&rpciod_sema); - dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users); + dprintk("rpciod_up: users %d\n", rpciod_users); rpciod_users++; - if (rpciod_pid) + if (rpciod_workqueue) goto out; /* * If there's no pid, we should be the first user. */ if (rpciod_users > 1) - printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users); + printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users); /* * Create the rpciod thread and wait for it to start. */ - error = kernel_thread(rpciod, NULL, 0); - if (error < 0) { - printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error); + error = -ENOMEM; + wq = create_workqueue("rpciod"); + if (wq == NULL) { + printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); rpciod_users--; goto out; } - down(&rpciod_running); + rpciod_workqueue = wq; error = 0; out: up(&rpciod_sema); @@ -1225,20 +1006,21 @@ void rpciod_down(void) { down(&rpciod_sema); - dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users); + dprintk("rpciod_down sema %d\n", rpciod_users); if (rpciod_users) { if (--rpciod_users) goto out; } else - printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid); + printk(KERN_WARNING "rpciod_down: no users??\n"); - if (!rpciod_pid) { + if (!rpciod_workqueue) { dprintk("rpciod_down: Nothing to do!\n"); goto out; } + rpciod_killall(); - kill_proc(rpciod_pid, SIGKILL, 1); - wait_for_completion(&rpciod_killer); + destroy_workqueue(rpciod_workqueue); + rpciod_workqueue = NULL; out: up(&rpciod_sema); } @@ -1256,7 +1038,12 @@ void rpc_show_tasks(void) } printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " "-rpcwait -action- --exit--\n"); - alltask_for_each(t, le, &all_tasks) + alltask_for_each(t, le, &all_tasks) { + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(t)) + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", t->tk_pid, (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), @@ -1264,8 +1051,9 @@ void rpc_show_tasks(void) t->tk_client, (t->tk_client ? t->tk_client->cl_prog : 0), t->tk_rqstp, t->tk_timeout, - rpc_qname(t->tk_rpcwait), + rpc_waitq, t->tk_action, t->tk_exit); + } spin_unlock(&rpc_sched_lock); } #endif --- linux-2.6.7/net/sunrpc/svcsock.c.lsec 2004-06-15 23:18:57.000000000 -0600 +++ linux-2.6.7/net/sunrpc/svcsock.c 2005-03-23 14:28:24.029396672 -0700 @@ -414,7 +414,6 @@ svc_sendto(struct svc_rqst *rqstp, struc } /* send tail */ if (xdr->tail[0].iov_len) { - /* The tail *will* be in respages[0]; */ result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), xdr->tail[0].iov_len, 0); --- linux-2.6.7/net/sunrpc/clnt.c.lsec 2004-06-15 23:19:13.000000000 -0600 +++ linux-2.6.7/net/sunrpc/clnt.c 2005-03-23 14:28:23.595462640 -0700 @@ -351,7 +351,9 @@ int rpc_call_sync(struct rpc_clnt *clnt, rpc_clnt_sigmask(clnt, &oldset); /* Create/initialize a new RPC task */ - rpc_init_task(task, clnt, NULL, flags); + task = rpc_new_task(clnt, NULL, flags); + if (task == NULL) + return -ENOMEM; rpc_call_setup(task, msg, 0); /* Set up the call info struct and execute the task */ @@ -620,8 +622,14 @@ call_encode(struct rpc_task *task) rpc_exit(task, -EIO); return; } - if (encode && (status = rpcauth_wrap_req(task, encode, req, p, - task->tk_msg.rpc_argp)) < 0) { + if (encode == NULL) + return; + + status = rpcauth_wrap_req(task, encode, req, p, task->tk_msg.rpc_argp); + if (status == -EAGAIN) { + printk("XXXJBF: out of memeory? Should retry here!!!\n"); + } + if (status < 0) { printk(KERN_WARNING "%s: can't encode arguments: %d\n", clnt->cl_protname, -status); rpc_exit(task, status); --- linux-2.6.7/net/sunrpc/sunrpc_syms.c.lsec 2004-06-15 23:19:52.000000000 -0600 +++ linux-2.6.7/net/sunrpc/sunrpc_syms.c 2005-03-23 14:32:35.589153776 -0700 @@ -58,6 +58,8 @@ EXPORT_SYMBOL(rpc_unlink); EXPORT_SYMBOL(rpc_wake_up); EXPORT_SYMBOL(rpc_queue_upcall); EXPORT_SYMBOL(rpc_mkpipe); +EXPORT_SYMBOL(rpc_mkdir); +EXPORT_SYMBOL(rpc_rmdir); /* Client transport */ EXPORT_SYMBOL(xprt_create_proto); @@ -89,6 +91,7 @@ EXPORT_SYMBOL(svc_makesock); EXPORT_SYMBOL(svc_reserve); EXPORT_SYMBOL(svc_auth_register); EXPORT_SYMBOL(auth_domain_lookup); +EXPORT_SYMBOL(svc_authenticate); /* RPC statistics */ #ifdef CONFIG_PROC_FS --- linux-2.6.7/net/sunrpc/pmap_clnt.c.lsec 2004-06-15 23:19:23.000000000 -0600 +++ linux-2.6.7/net/sunrpc/pmap_clnt.c 2005-03-23 14:28:24.134380712 -0700 @@ -183,8 +183,10 @@ rpc_register(u32 prog, u32 vers, int pro map.pm_prot = prot; map.pm_port = port; + rpciod_up(); error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET, &map, okay, 0); + rpciod_down(); if (error < 0) { printk(KERN_WARNING --- linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_unseal.c.lsec 2004-06-15 23:19:44.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_unseal.c 2005-03-23 14:28:23.761437408 -0700 @@ -68,20 +68,13 @@ #endif -/* message_buffer is an input if toktype is MIC and an output if it is WRAP: - * If toktype is MIC: read_token is a mic token, and message_buffer is the - * data that the mic was supposedly taken over. - * If toktype is WRAP: read_token is a wrap token, and message_buffer is used - * to return the decrypted data. - */ +/* read_token is a mic token, and message_buffer is the data that the mic was + * supposedly taken over. */ -/* XXX will need to change prototype and/or just split into a separate function - * when we add privacy (because read_token will be in pages too). */ u32 krb5_read_token(struct krb5_ctx *ctx, struct xdr_netobj *read_token, - struct xdr_buf *message_buffer, - int *qop_state, int toktype) + struct xdr_buf *message_buffer, int *qop_state) { int signalg; int sealalg; @@ -96,20 +89,16 @@ krb5_read_token(struct krb5_ctx *ctx, dprintk("RPC: krb5_read_token\n"); - if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, toktype, + if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, read_token->len)) goto out; - if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) + if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || + (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) goto out; /* XXX sanity-check bodysize?? */ - if (toktype == KG_TOK_WRAP_MSG) { - /* XXX gone */ - goto out; - } - /* get the sign and seal algorithms */ signalg = ptr[0] + (ptr[1] << 8); @@ -120,14 +109,7 @@ krb5_read_token(struct krb5_ctx *ctx, if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) goto out; - if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || - ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff))) - goto out; - - /* in the current spec, there is only one valid seal algorithm per - key type, so a simple comparison is ok */ - - if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg)) + if (sealalg != 0xffff) goto out; /* there are several mappings of seal algorithms to sign algorithms, @@ -154,7 +136,7 @@ krb5_read_token(struct krb5_ctx *ctx, switch (signalg) { case SGN_ALG_DES_MAC_MD5: ret = make_checksum(checksum_type, ptr - 2, 8, - message_buffer, &md5cksum); + message_buffer, 0, &md5cksum); if (ret) goto out; --- linux-2.6.7/net/sunrpc/auth_gss/auth_gss.c.lsec 2004-06-15 23:19:22.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/auth_gss.c 2005-03-23 14:28:24.185372960 -0700 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -397,7 +398,7 @@ retry: spin_unlock(&gss_auth->lock); } gss_release_msg(gss_msg); - dprintk("RPC: %4u gss_upcall for uid %u result %d", task->tk_pid, + dprintk("RPC: %4u gss_upcall for uid %u result %d\n", task->tk_pid, uid, res); return res; out_sleep: @@ -740,6 +741,8 @@ gss_marshal(struct rpc_task *task, u32 * maj_stat = gss_get_mic(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, &verf_buf, &mic); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags |= RPCAUTH_CRED_DEAD; if(maj_stat != 0){ printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); goto out_put_ctx; @@ -779,6 +782,7 @@ gss_validate(struct rpc_task *task, u32 struct xdr_netobj mic; u32 flav,len; u32 service; + u32 maj_stat; dprintk("RPC: %4u gss_validate\n", task->tk_pid); @@ -794,8 +798,11 @@ gss_validate(struct rpc_task *task, u32 mic.data = (u8 *)p; mic.len = len; - if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state)) - goto out_bad; + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags |= RPCAUTH_CRED_DEAD; + if (maj_stat) + goto out_bad; service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type, gss_cred->gc_flavor); switch (service) { @@ -807,6 +814,11 @@ gss_validate(struct rpc_task *task, u32 /* verifier data, flavor, length, length, sequence number: */ task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; break; + case RPC_GSS_SVC_PRIVACY: + /* XXXJBF: Ugh. Going for a wild overestimate. + * Need some info from krb5 layer? */ + task->tk_auth->au_rslack = XDR_QUADLEN(len) + 32; + break; default: goto out_bad; } @@ -821,11 +833,11 @@ out_bad: } static inline int -gss_wrap_req_integ(struct gss_cl_ctx *ctx, - kxdrproc_t encode, void *rqstp, u32 *p, void *obj) +gss_wrap_req_integ(struct rpc_cred *cred, kxdrproc_t encode, + struct rpc_rqst *rqstp, u32 *p, void *obj) { - struct rpc_rqst *req = (struct rpc_rqst *)rqstp; - struct xdr_buf *snd_buf = &req->rq_snd_buf; + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; struct xdr_buf integ_buf; u32 *integ_len = NULL; struct xdr_netobj mic; @@ -836,7 +848,7 @@ gss_wrap_req_integ(struct gss_cl_ctx *ct integ_len = p++; offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; - *p++ = htonl(req->rq_seqno); + *p++ = htonl(rqstp->rq_seqno); status = encode(rqstp, p, obj); if (status) @@ -848,7 +860,7 @@ gss_wrap_req_integ(struct gss_cl_ctx *ct *integ_len = htonl(integ_buf.len); /* guess whether we're in the head or the tail: */ - if (snd_buf->page_len || snd_buf->tail[0].iov_len) + if (snd_buf->page_len || snd_buf->tail[0].iov_len) iov = snd_buf->tail; else iov = snd_buf->head; @@ -857,6 +869,8 @@ gss_wrap_req_integ(struct gss_cl_ctx *ct maj_stat = gss_get_mic(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, &integ_buf, &mic); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags |= RPCAUTH_CRED_DEAD; status = -EIO; /* XXX? */ if (maj_stat) return status; @@ -868,6 +882,113 @@ gss_wrap_req_integ(struct gss_cl_ctx *ct return 0; } +static void +priv_release_snd_buf(struct rpc_rqst *rqstp) +{ + int i; + + for (i=0; i < rqstp->rq_enc_pages_num; i++) + __free_page(rqstp->rq_enc_pages[i]); + kfree(rqstp->rq_enc_pages); +} + +static int +alloc_enc_pages(struct rpc_rqst *rqstp) +{ + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; + int first, last, i; + + if (snd_buf->page_len == 0) { + rqstp->rq_enc_pages_num = 0; + return 0; + } + + first = snd_buf->page_base >> PAGE_CACHE_SHIFT; + last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT; + rqstp->rq_enc_pages_num = last - first + 1 + 1; + rqstp->rq_enc_pages + = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), + GFP_NOFS); + if (!rqstp->rq_enc_pages) + goto out; + for (i=0; i < rqstp->rq_enc_pages_num; i++) { + rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); + if (rqstp->rq_enc_pages[i] == NULL) + goto out_free; + } + rqstp->rq_release_snd_buf = priv_release_snd_buf; + return 0; +out_free: + for (i--; i >= 0; i--) { + __free_page(rqstp->rq_enc_pages[i]); + } +out: + return -EAGAIN; +} + +static inline int +gss_wrap_req_priv(struct rpc_cred *cred, kxdrproc_t encode, + struct rpc_rqst *rqstp, u32 *p, void *obj) +{ + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; + u32 offset; + u32 maj_stat; + int status; + u32 *opaque_len; + struct page **inpages; + int first; + int pad; + struct iovec *iov; + char *tmp; + + opaque_len = p++; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; + *p++ = htonl(rqstp->rq_seqno); + + status = encode(rqstp, p, obj); + if (status) + return status; + + status = alloc_enc_pages(rqstp); + if (status) + return status; + /* XXXJBF: Oops! Do we need rq_enc_pages really any more?? */ + first = snd_buf->page_base >> PAGE_CACHE_SHIFT; + inpages = snd_buf->pages + first; + snd_buf->pages = rqstp->rq_enc_pages; + snd_buf->page_base -= first << PAGE_CACHE_SHIFT; + /* XXX?: tail needs to be separate if we want to be able to expand + * the head (since it's often put right after the head). But is + * expanding the head safe in any case? */ + if (snd_buf->page_len || snd_buf->tail[0].iov_len) { + tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); + memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); + snd_buf->tail[0].iov_base = tmp; + } + maj_stat = gss_wrap(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, offset, + snd_buf, inpages); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags |= RPCAUTH_CRED_DEAD; + status = -EIO; /* XXX? */ + if (maj_stat) + return status; + + *opaque_len = htonl(snd_buf->len - offset); + /* guess whether we're in the head or the tail: */ + if (snd_buf->page_len || snd_buf->tail[0].iov_len) + iov = snd_buf->tail; + else + iov = snd_buf->head; + p = iov->iov_base + iov->iov_len; + pad = 3 - ((snd_buf->len - offset - 1) & 3); + memset(p, 0, pad); + iov->iov_len += pad; + snd_buf->len += pad; + + return 0; +} + static int gss_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, u32 *p, void *obj) @@ -894,9 +1015,11 @@ gss_wrap_req(struct rpc_task *task, status = encode(rqstp, p, obj); goto out; case RPC_GSS_SVC_INTEGRITY: - status = gss_wrap_req_integ(ctx, encode, rqstp, p, obj); + status = gss_wrap_req_integ(cred, encode, rqstp, p, obj); goto out; case RPC_GSS_SVC_PRIVACY: + status = gss_wrap_req_priv(cred, encode, rqstp, p, obj); + goto out; default: goto out; } @@ -907,11 +1030,10 @@ out: } static inline int -gss_unwrap_resp_integ(struct gss_cl_ctx *ctx, - kxdrproc_t decode, void *rqstp, u32 **p, void *obj) +gss_unwrap_resp_integ(struct rpc_cred *cred, struct rpc_rqst *rqstp, u32 **p) { - struct rpc_rqst *req = (struct rpc_rqst *)rqstp; - struct xdr_buf *rcv_buf = &req->rq_rcv_buf; + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct xdr_buf integ_buf; struct xdr_netobj mic; u32 data_offset, mic_offset; @@ -926,7 +1048,7 @@ gss_unwrap_resp_integ(struct gss_cl_ctx mic_offset = integ_len + data_offset; if (mic_offset > rcv_buf->len) return status; - if (ntohl(*(*p)++) != req->rq_seqno) + if (ntohl(*(*p)++) != rqstp->rq_seqno) return status; if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, @@ -938,11 +1060,44 @@ gss_unwrap_resp_integ(struct gss_cl_ctx maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic, NULL); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags |= RPCAUTH_CRED_DEAD; if (maj_stat != GSS_S_COMPLETE) return status; return 0; } +static inline int +gss_unwrap_resp_priv(struct rpc_cred *cred, struct rpc_rqst *rqstp, u32 **p) +{ + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; + u32 offset, out_offset; + u32 opaque_len; + u32 maj_stat; + int status = -EIO; + + opaque_len = ntohl(*(*p)++); + offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; + if (offset + opaque_len > rcv_buf->len) + return status; + /* remove padding: */ + rcv_buf->len = offset + opaque_len; + + maj_stat = gss_unwrap(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, + offset, rcv_buf, &out_offset); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) + cred->cr_flags |= RPCAUTH_CRED_DEAD; + if (maj_stat != GSS_S_COMPLETE) + return status; + *p = (u32 *)(rcv_buf->head[0].iov_base + out_offset); + if (ntohl(*(*p)++) != rqstp->rq_seqno) + return status; + + return 0; +} + + static int gss_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, u32 *p, void *obj) @@ -962,12 +1117,16 @@ gss_unwrap_resp(struct rpc_task *task, case RPC_GSS_SVC_NONE: goto out_decode; case RPC_GSS_SVC_INTEGRITY: - status = gss_unwrap_resp_integ(ctx, decode, - rqstp, &p, obj); + status = gss_unwrap_resp_integ(cred, rqstp, &p); if (status) goto out; break; case RPC_GSS_SVC_PRIVACY: + status = gss_unwrap_resp_priv(cred, rqstp, &p); + if (status) + goto out; + break; + default: goto out; } --- linux-2.6.7/net/sunrpc/auth_gss/gss_spkm3_mech.c.lsec 2005-03-23 14:28:24.187372656 -0700 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_spkm3_mech.c 2005-03-23 14:28:24.186372808 -0700 @@ -0,0 +1,296 @@ +/* + * linux/net/sunrpc/gss_spkm3_mech.c + * + * Copyright (c) 2003 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson + * J. Bruce Fields + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +struct xdr_netobj gss_mech_spkm3_oid = + {7, "\053\006\001\005\005\001\003"}; + +static inline int +get_bytes(char **ptr, const char *end, void *res, int len) +{ + char *p, *q; + p = *ptr; + q = p + len; + if (q > end || q < p) + return -1; + memcpy(res, p, len); + *ptr = q; + return 0; +} + +static inline int +get_netobj(char **ptr, const char *end, struct xdr_netobj *res) +{ + char *p, *q; + p = *ptr; + if (get_bytes(&p, end, &res->len, sizeof(res->len))) + return -1; + q = p + res->len; + if(res->len == 0) + goto out_nocopy; + if (q > end || q < p) + return -1; + if (!(res->data = kmalloc(res->len, GFP_KERNEL))) + return -1; + memcpy(res->data, p, res->len); +out_nocopy: + *ptr = q; + return 0; +} + +static inline int +get_key(char **p, char *end, struct crypto_tfm **res, int *resalg) +{ + struct xdr_netobj key = { + .len = 0, + .data = NULL, + }; + int alg_mode,setkey = 0; + char *alg_name; + + if (get_bytes(p, end, resalg, sizeof(int))) + goto out_err; + if ((get_netobj(p, end, &key))) + goto out_err; + + switch (*resalg) { + case NID_des_cbc: + alg_name = "des"; + alg_mode = CRYPTO_TFM_MODE_CBC; + setkey = 1; + break; + case NID_md5: + if (key.len == 0) { + dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); + } + alg_name = "md5"; + alg_mode = 0; + setkey = 0; + break; + case NID_cast5_cbc: + dprintk("RPC: SPKM3 get_key: case cast5_cbc, UNSUPPORTED \n"); + goto out_err; + break; + default: + dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg); + goto out_err_free_key; + } + if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) + goto out_err_free_key; + if (setkey) { + if (crypto_cipher_setkey(*res, key.data, key.len)) + goto out_err_free_tfm; + } + + if(key.len > 0) + kfree(key.data); + return 0; + +out_err_free_tfm: + crypto_free_tfm(*res); +out_err_free_key: + if(key.len > 0) + kfree(key.data); +out_err: + return -1; +} + +static u32 +gss_import_sec_context_spkm3(struct xdr_netobj *inbuf, + struct gss_ctx *ctx_id) +{ + char *p = inbuf->data; + char *end = inbuf->data + inbuf->len; + struct spkm3_ctx *ctx; + + if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) + goto out_err; + memset(ctx, 0, sizeof(*ctx)); + + if (get_netobj(&p, end, &ctx->ctx_id)) + goto out_err_free_ctx; + + if (get_bytes(&p, end, &ctx->qop, sizeof(ctx->qop))) + goto out_err_free_ctx_id; + + if (get_netobj(&p, end, &ctx->mech_used)) + goto out_err_free_mech; + + if (get_bytes(&p, end, &ctx->ret_flags, sizeof(ctx->ret_flags))) + goto out_err_free_mech; + + if (get_bytes(&p, end, &ctx->req_flags, sizeof(ctx->req_flags))) + goto out_err_free_mech; + + if (get_netobj(&p, end, &ctx->share_key)) + goto out_err_free_s_key; + + if (get_key(&p, end, &ctx->derived_conf_key, &ctx->conf_alg)) { + dprintk("RPC: SPKM3 confidentiality key will be NULL\n"); + } + + if (get_key(&p, end, &ctx->derived_integ_key, &ctx->intg_alg)) { + dprintk("RPC: SPKM3 integrity key will be NULL\n"); + } + + if (get_bytes(&p, end, &ctx->owf_alg, sizeof(ctx->owf_alg))) + goto out_err_free_s_key; + + if (get_bytes(&p, end, &ctx->owf_alg, sizeof(ctx->owf_alg))) + goto out_err_free_s_key; + + if (p != end) + goto out_err_free_s_key; + + ctx_id->internal_ctx_id = ctx; + + dprintk("Succesfully imported new spkm context.\n"); + return 0; + +out_err_free_s_key: + kfree(ctx->share_key.data); +out_err_free_mech: + kfree(ctx->mech_used.data); +out_err_free_ctx_id: + kfree(ctx->ctx_id.data); +out_err_free_ctx: + kfree(ctx); +out_err: + return GSS_S_FAILURE; +} + +void +gss_delete_sec_context_spkm3(void *internal_ctx) { + struct spkm3_ctx *sctx = internal_ctx; + + if(sctx->derived_integ_key) + crypto_free_tfm(sctx->derived_integ_key); + if(sctx->derived_conf_key) + crypto_free_tfm(sctx->derived_conf_key); + if(sctx->share_key.data) + kfree(sctx->share_key.data); + if(sctx->mech_used.data) + kfree(sctx->mech_used.data); + kfree(sctx); +} + +u32 +gss_verify_mic_spkm3(struct gss_ctx *ctx, + struct xdr_buf *signbuf, + struct xdr_netobj *checksum, + u32 *qstate) { + u32 maj_stat = 0; + int qop_state = 0; + struct spkm3_ctx *sctx = ctx->internal_ctx_id; + + dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); + maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state, + SPKM_MIC_TOK); + + if (!maj_stat && qop_state) + *qstate = qop_state; + + dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); + return maj_stat; +} + +u32 +gss_get_mic_spkm3(struct gss_ctx *ctx, + u32 qop, + struct xdr_buf *message_buffer, + struct xdr_netobj *message_token) { + u32 err = 0; + struct spkm3_ctx *sctx = ctx->internal_ctx_id; + + dprintk("RPC: gss_get_mic_spkm3\n"); + + err = spkm3_make_token(sctx, qop, message_buffer, + message_token, SPKM_MIC_TOK); + return err; +} + +static struct gss_api_ops gss_spkm3_ops = { + .gss_import_sec_context = gss_import_sec_context_spkm3, + .gss_get_mic = gss_get_mic_spkm3, + .gss_verify_mic = gss_verify_mic_spkm3, + .gss_delete_sec_context = gss_delete_sec_context_spkm3, +}; + +static struct pf_desc gss_spkm3_pfs[] = { + {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"}, + {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, +}; + +static struct gss_api_mech gss_spkm3_mech = { + .gm_name = "spkm3", + .gm_owner = THIS_MODULE, + .gm_ops = &gss_spkm3_ops, + .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs), + .gm_pfs = gss_spkm3_pfs, +}; + +static int __init init_spkm3_module(void) +{ + int status; + + status = gss_mech_register(&gss_spkm3_mech); + if (status) + printk("Failed to register spkm3 gss mechanism!\n"); + return 0; +} + +static void __exit cleanup_spkm3_module(void) +{ + gss_mech_unregister(&gss_spkm3_mech); +} + +MODULE_LICENSE("GPL"); +module_init(init_spkm3_module); +module_exit(cleanup_spkm3_module); --- linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_crypto.c.lsec 2004-06-15 23:18:55.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_crypto.c 2005-03-23 14:28:24.840273400 -0700 @@ -139,17 +139,91 @@ buf_to_sg(struct scatterlist *sg, char * sg->length = len; } +static int +process_xdr_buf(struct xdr_buf *buf, int offset, int len, + int (*actor)(struct scatterlist *, void *), void *data) +{ + int i, page_len, thislen, page_offset, ret = 0; + struct scatterlist sg[1]; + + if (offset >= buf->head[0].iov_len) { + offset -= buf->head[0].iov_len; + } else { + thislen = buf->head[0].iov_len - offset; + if (thislen > len) + thislen = len; + buf_to_sg(sg, buf->head[0].iov_base + offset, thislen); + ret = actor(sg, data); + if (ret) + goto out; + offset = 0; + len -= thislen; + } + if (len == 0) + goto out; + + if (offset >= buf->page_len) { + offset -= buf->page_len; + } else { + page_len = buf->page_len - offset; + if (page_len > len) + page_len = len; + len -= page_len; + page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); + i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; + thislen = PAGE_CACHE_SIZE - page_offset; + do { + if (thislen > page_len) + thislen = page_len; + sg->page = buf->pages[i]; + sg->offset = page_offset; + sg->length = thislen; + ret = actor(sg, data); + if (ret) + goto out; + page_len -= thislen; + i++; + page_offset = 0; + thislen = PAGE_CACHE_SIZE; + } while (page_len != 0); + offset = 0; + } + if (len == 0) + goto out; + + if (offset < buf->tail[0].iov_len) { + thislen = buf->tail[0].iov_len - offset; + if (thislen > len) + thislen = len; + buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen); + ret = actor(sg, data); + len -= thislen; + } + if (len != 0) + ret = -EINVAL; +out: + return ret; +} + +static int +checksummer(struct scatterlist *sg, void *data) +{ + struct crypto_tfm *tfm = (struct crypto_tfm *)data; + + crypto_digest_update(tfm, sg, 1); + + return 0; +} + /* checksum the plaintext data and hdrlen bytes of the token header */ s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum) + int body_offset, struct xdr_netobj *cksum) { char *cksumname; struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ struct scatterlist sg[1]; u32 code = GSS_S_FAILURE; - int len, thislen, offset; - int i; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -169,35 +243,8 @@ make_checksum(s32 cksumtype, char *heade crypto_digest_init(tfm); buf_to_sg(sg, header, hdrlen); crypto_digest_update(tfm, sg, 1); - if (body->head[0].iov_len) { - buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); - crypto_digest_update(tfm, sg, 1); - } - - len = body->page_len; - if (len != 0) { - offset = body->page_base & (PAGE_CACHE_SIZE - 1); - i = body->page_base >> PAGE_CACHE_SHIFT; - thislen = PAGE_CACHE_SIZE - offset; - do { - if (thislen > len) - thislen = len; - sg->page = body->pages[i]; - sg->offset = offset; - sg->length = thislen; - kmap(sg->page); /* XXX kmap_atomic? */ - crypto_digest_update(tfm, sg, 1); - kunmap(sg->page); - len -= thislen; - i++; - offset = 0; - thislen = PAGE_CACHE_SIZE; - } while(len != 0); - } - if (body->tail[0].iov_len) { - buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); - crypto_digest_update(tfm, sg, 1); - } + process_xdr_buf(body, body_offset, body->len - body_offset, + checksummer, tfm); crypto_digest_final(tfm, cksum->data); code = 0; out: @@ -207,3 +254,154 @@ out: } EXPORT_SYMBOL(make_checksum); + +struct encryptor_desc { + u8 iv[8]; /* XXX hard-coded blocksize */ + struct crypto_tfm *tfm; + int pos; + struct xdr_buf *outbuf; + struct page **pages; + struct scatterlist infrags[4]; + struct scatterlist outfrags[4]; + int fragno; + int fraglen; +}; + +static int +encryptor(struct scatterlist *sg, void *data) +{ + struct encryptor_desc *desc = data; + struct xdr_buf *outbuf = desc->outbuf; + struct page *in_page; + int thislen = desc->fraglen + sg->length; + int fraglen, ret; + int page_pos; + + /* Worst case is 4 fragments: head, end of page 1, start + * of page 2, tail. Anything more is a bug. */ + BUG_ON(desc->fragno > 3); + desc->infrags[desc->fragno] = *sg; + desc->outfrags[desc->fragno] = *sg; + + page_pos = desc->pos - outbuf->head[0].iov_len; + if (page_pos >= 0 && page_pos < outbuf->page_len) { + /* pages are not in place: */ + int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; + in_page = desc->pages[i]; + } else { + in_page = sg->page; + } + desc->infrags[desc->fragno].page = in_page; + desc->fragno++; + desc->fraglen += sg->length; + desc->pos += sg->length; + + fraglen = thislen & 7; /* XXX hardcoded blocksize */ + thislen -= fraglen; + + if (thislen == 0) + return 0; + + ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, + thislen, desc->iv); + if (ret) + return ret; + if (fraglen) { + desc->outfrags[0].page = sg->page; + desc->outfrags[0].offset = sg->offset + sg->length - fraglen; + desc->outfrags[0].length = fraglen; + desc->infrags[0] = desc->outfrags[0]; + desc->infrags[0].page = in_page; + desc->fragno = 1; + desc->fraglen = fraglen; + } else { + desc->fragno = 0; + desc->fraglen = 0; + } + return 0; +} + +int +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, + struct page **pages) +{ + int ret; + struct encryptor_desc desc; + + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + + memset(desc.iv, 0, sizeof(desc.iv)); + desc.tfm = tfm; + desc.pos = offset; + desc.outbuf = buf; + desc.pages = pages; + desc.fragno = 0; + desc.fraglen = 0; + + ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); + return ret; +} + +EXPORT_SYMBOL(gss_encrypt_xdr_buf); + +struct decryptor_desc { + u8 iv[8]; /* XXX hard-coded blocksize */ + struct crypto_tfm *tfm; + struct scatterlist frags[4]; + int fragno; + int fraglen; +}; + +static int +decryptor(struct scatterlist *sg, void *data) +{ + struct decryptor_desc *desc = data; + int thislen = desc->fraglen + sg->length; + int fraglen, ret; + + /* Worst case is 4 fragments: head, end of page 1, start + * of page 2, tail. Anything more is a bug. */ + BUG_ON(desc->fragno > 3); + desc->frags[desc->fragno] = *sg; + desc->fragno++; + desc->fraglen += sg->length; + + fraglen = thislen & 7; /* XXX hardcoded blocksize */ + thislen -= fraglen; + + if (thislen == 0) + return 0; + + ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, + thislen, desc->iv); + if (ret) + return ret; + if (fraglen) { + desc->frags[0].page = sg->page; + desc->frags[0].offset = sg->offset + sg->length - fraglen; + desc->frags[0].length = fraglen; + desc->fragno = 1; + desc->fraglen = fraglen; + } else { + desc->fragno = 0; + desc->fraglen = 0; + } + return 0; +} + +int +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) +{ + struct decryptor_desc desc; + + /* XXXJBF: */ + BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); + + memset(desc.iv, 0, sizeof(desc.iv)); + desc.tfm = tfm; + desc.fragno = 0; + desc.fraglen = 0; + return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); +} + +EXPORT_SYMBOL(gss_decrypt_xdr_buf); --- linux-2.6.7/net/sunrpc/auth_gss/gss_spkm3_seal.c.lsec 2005-03-23 14:28:24.239364752 -0700 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_spkm3_seal.c 2005-03-23 14:28:24.238364904 -0700 @@ -0,0 +1,132 @@ +/* + * linux/net/sunrpc/gss_spkm3_seal.c + * + * Copyright (c) 2003 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +/* + * spkm3_make_token() + * + * Only SPKM_MIC_TOK with md5 intg-alg is supported + */ + +u32 +spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, + struct xdr_buf * text, struct xdr_netobj * token, + int toktype) +{ + s32 checksum_type; + char tokhdrbuf[25]; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf}; + int tmsglen, tokenlen = 0; + unsigned char *ptr; + s32 now; + int ctxelen = 0, ctxzbit = 0; + int md5elen = 0, md5zbit = 0; + + dprintk("RPC: spkm3_make_token\n"); + + now = jiffies; + if (qop_req != 0) + goto out_err; + + if (ctx->ctx_id.len != 16) { + dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", + ctx->ctx_id.len); + goto out_err; + } + + switch (ctx->intg_alg) { + case NID_md5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not" + " supported\n", ctx->intg_alg); + goto out_err; + } + /* XXX since we don't support WRAP, perhaps we don't care... */ + if (ctx->conf_alg != NID_cast5_cbc) { + dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n", + ctx->conf_alg); + goto out_err; + } + + if (toktype == SPKM_MIC_TOK) { + tmsglen = 0; + /* Calculate checksum over the mic-header */ + asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); + spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, + ctxelen, ctxzbit); + + if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, + text, &md5cksum)) + goto out_err; + + asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); + tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1; + + /* Create token header using generic routines */ + token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen); + + ptr = token->data; + g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr); + + spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); + } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ + dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n"); + goto out_err; + } + kfree(md5cksum.data); + + /* XXX need to implement sequence numbers, and ctx->expired */ + + return GSS_S_COMPLETE; +out_err: + if (md5cksum.data) + kfree(md5cksum.data); + token->data = 0; + token->len = 0; + return GSS_S_FAILURE; +} --- linux-2.6.7/net/sunrpc/auth_gss/svcauth_gss.c.lsec 2004-06-15 23:19:22.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/svcauth_gss.c 2005-03-23 14:28:24.405339520 -0700 @@ -37,6 +37,7 @@ * */ +#include #include #include #include @@ -78,7 +79,6 @@ struct rsi { static struct cache_head *rsi_table[RSI_HASHMAX]; static struct cache_detail rsi_cache; -static struct rsi *rsi_lookup(struct rsi *item, int set); static void rsi_free(struct rsi *rsii) { @@ -125,38 +125,6 @@ static inline int dup_netobj(struct xdr_ return dup_to_netobj(dst, src->data, src->len); } -static inline void rsi_init(struct rsi *new, struct rsi *item) -{ - new->out_handle.data = NULL; - new->out_handle.len = 0; - new->out_token.data = NULL; - new->out_token.len = 0; - new->in_handle.len = item->in_handle.len; - item->in_handle.len = 0; - new->in_token.len = item->in_token.len; - item->in_token.len = 0; - new->in_handle.data = item->in_handle.data; - item->in_handle.data = NULL; - new->in_token.data = item->in_token.data; - item->in_token.data = NULL; -} - -static inline void rsi_update(struct rsi *new, struct rsi *item) -{ - BUG_ON(new->out_handle.data || new->out_token.data); - new->out_handle.len = item->out_handle.len; - item->out_handle.len = 0; - new->out_token.len = item->out_token.len; - item->out_token.len = 0; - new->out_handle.data = item->out_handle.data; - item->out_handle.data = NULL; - new->out_token.data = item->out_token.data; - item->out_token.data = NULL; - - new->major_status = item->major_status; - new->minor_status = item->minor_status; -} - static void rsi_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) @@ -168,6 +136,75 @@ static void rsi_request(struct cache_det (*bpp)[-1] = '\n'; } +static inline int +gssd_reply(struct rsi *item) +{ + struct rsi *tmp; + struct cache_head **hp, **head; + + head = &rsi_cache.hash_table[rsi_hash(item)]; + write_lock(&rsi_cache.hash_lock); + for (hp = head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct rsi, h); + if (rsi_match(tmp, item)) { + cache_get(&tmp->h); + clear_bit(CACHE_HASHED, &tmp->h.flags); + *hp = tmp->h.next; + tmp->h.next = NULL; + rsi_cache.entries--; + if (test_bit(CACHE_VALID, &tmp->h.flags)) { + write_unlock(&rsi_cache.hash_lock); + rsi_put(&tmp->h, &rsi_cache); + return -EINVAL; + } + set_bit(CACHE_HASHED, &item->h.flags); + item->h.next = *hp; + *hp = &item->h; + rsi_cache.entries++; + set_bit(CACHE_VALID, &item->h.flags); + item->h.last_refresh = get_seconds(); + write_unlock(&rsi_cache.hash_lock); + cache_fresh(&rsi_cache, &tmp->h, 0); + rsi_put(&tmp->h, &rsi_cache); + return 0; + } + } + write_unlock(&rsi_cache.hash_lock); + return -EINVAL; +} + +static inline struct rsi * +gssd_upcall(struct rsi *item, struct svc_rqst *rqstp) +{ + struct rsi *tmp; + struct cache_head **hp, **head; + + head = &rsi_cache.hash_table[rsi_hash(item)]; + read_lock(&rsi_cache.hash_lock); + for (hp = head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct rsi, h); + if (rsi_match(tmp, item)) { + if (!test_bit(CACHE_VALID, &tmp->h.flags)) { + read_unlock(&rsi_cache.hash_lock); + return NULL; + } + *hp = tmp->h.next; + tmp->h.next = NULL; + rsi_cache.entries--; + read_unlock(&rsi_cache.hash_lock); + return tmp; + } + } + cache_get(&item->h); + item->h.next = *head; + *head = &item->h; + rsi_cache.entries++; + read_unlock(&rsi_cache.hash_lock); + cache_get(&item->h); + if (cache_check(&rsi_cache, &item->h, &rqstp->rq_chandle)) + return NULL; + return item; +} static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -176,17 +213,22 @@ static int rsi_parse(struct cache_detail char *buf = mesg; char *ep; int len; - struct rsi rsii, *rsip = NULL; + struct rsi *rsii; time_t expiry; int status = -EINVAL; - memset(&rsii, 0, sizeof(rsii)); + rsii = kmalloc(sizeof(*rsii), GFP_KERNEL); + if (!rsii) + return -ENOMEM; + memset(rsii, 0, sizeof(*rsii)); + cache_init(&rsii->h); + /* handle */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsii.in_handle, buf, len)) + if (dup_to_netobj(&rsii->in_handle, buf, len)) goto out; /* token */ @@ -195,10 +237,9 @@ static int rsi_parse(struct cache_detail if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsii.in_token, buf, len)) + if (dup_to_netobj(&rsii->in_token, buf, len)) goto out; - rsii.h.flags = 0; /* expiry */ expiry = get_expiry(&mesg); status = -EINVAL; @@ -212,13 +253,13 @@ static int rsi_parse(struct cache_detail if (len == 0) { goto out; } else { - rsii.major_status = simple_strtoul(buf, &ep, 10); + rsii->major_status = simple_strtoul(buf, &ep, 10); if (*ep) goto out; len = qword_get(&mesg, buf, mlen); if (len <= 0) goto out; - rsii.minor_status = simple_strtoul(buf, &ep, 10); + rsii->minor_status = simple_strtoul(buf, &ep, 10); if (*ep) goto out; @@ -227,7 +268,7 @@ static int rsi_parse(struct cache_detail if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsii.out_handle, buf, len)) + if (dup_to_netobj(&rsii->out_handle, buf, len)) goto out; /* out_token */ @@ -236,16 +277,14 @@ static int rsi_parse(struct cache_detail if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsii.out_token, buf, len)) + if (dup_to_netobj(&rsii->out_token, buf, len)) goto out; } - rsii.h.expiry_time = expiry; - rsip = rsi_lookup(&rsii, 1); - status = 0; + rsii->h.expiry_time = expiry; + status = gssd_reply(rsii); out: - rsi_free(&rsii); - if (rsip) - rsi_put(&rsip->h, &rsi_cache); + if (rsii) + rsi_put(&rsii->h, &rsi_cache); return status; } @@ -258,8 +297,6 @@ static struct cache_detail rsi_cache = { .cache_parse = rsi_parse, }; -static DefineSimpleCacheLookup(rsi, 0) - /* * The rpcsec_context cache is used to store a context that is * used in data exchange. @@ -292,7 +329,6 @@ struct rsc { static struct cache_head *rsc_table[RSC_HASHMAX]; static struct cache_detail rsc_cache; -static struct rsc *rsc_lookup(struct rsc *item, int set); static void rsc_free(struct rsc *rsci) { @@ -325,26 +361,44 @@ rsc_match(struct rsc *new, struct rsc *t return netobj_equal(&new->handle, &tmp->handle); } -static inline void -rsc_init(struct rsc *new, struct rsc *tmp) +static struct rsc *rsc_lookup(struct rsc *item, int set) { - new->handle.len = tmp->handle.len; - tmp->handle.len = 0; - new->handle.data = tmp->handle.data; - tmp->handle.data = NULL; - new->mechctx = NULL; - new->cred.cr_group_info = NULL; -} - -static inline void -rsc_update(struct rsc *new, struct rsc *tmp) -{ - new->mechctx = tmp->mechctx; - tmp->mechctx = NULL; - memset(&new->seqdata, 0, sizeof(new->seqdata)); - spin_lock_init(&new->seqdata.sd_lock); - new->cred = tmp->cred; - tmp->cred.cr_group_info = NULL; + struct rsc *tmp = NULL; + struct cache_head **hp, **head; + head = &rsc_cache.hash_table[rsc_hash(item)]; + + if (set) + write_lock(&rsc_cache.hash_lock); + else + read_lock(&rsc_cache.hash_lock); + for (hp = head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct rsc, h); + if (!rsc_match(tmp, item)) + continue; + cache_get(&tmp->h); + if (!set) + goto out_noset; + *hp = tmp->h.next; + tmp->h.next = NULL; + clear_bit(CACHE_HASHED, &tmp->h.flags); + rsc_put(&tmp->h, &rsc_cache); + goto out_set; + } + /* Didn't find anything */ + if (!set) + goto out_noset; + rsc_cache.entries++; +out_set: + set_bit(CACHE_HASHED, &item->h.flags); + item->h.next = *head; + *head = &item->h; + write_unlock(&rsc_cache.hash_lock); + cache_fresh(&rsc_cache, &item->h, item->h.expiry_time); + cache_get(&item->h); + return item; +out_noset: + read_unlock(&rsc_cache.hash_lock); + return tmp; } static int rsc_parse(struct cache_detail *cd, @@ -353,19 +407,22 @@ static int rsc_parse(struct cache_detail /* contexthandle expiry [ uid gid N mechname ...mechdata... ] */ char *buf = mesg; int len, rv; - struct rsc rsci, *rscp = NULL; + struct rsc *rsci, *res = NULL; time_t expiry; int status = -EINVAL; - memset(&rsci, 0, sizeof(rsci)); + rsci = kmalloc(sizeof(*rsci), GFP_KERNEL); + if (!rsci) + return -ENOMEM; + memset(rsci, 0, sizeof(*rsci)); + cache_init(&rsci->h); /* context handle */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; status = -ENOMEM; - if (dup_to_netobj(&rsci.handle, buf, len)) + if (dup_to_netobj(&rsci->handle, buf, len)) goto out; - rsci.h.flags = 0; /* expiry */ expiry = get_expiry(&mesg); status = -EINVAL; @@ -373,26 +430,26 @@ static int rsc_parse(struct cache_detail goto out; /* uid, or NEGATIVE */ - rv = get_int(&mesg, &rsci.cred.cr_uid); + rv = get_int(&mesg, &rsci->cred.cr_uid); if (rv == -EINVAL) goto out; if (rv == -ENOENT) - set_bit(CACHE_NEGATIVE, &rsci.h.flags); + set_bit(CACHE_NEGATIVE, &rsci->h.flags); else { int N, i; struct gss_api_mech *gm; struct xdr_netobj tmp_buf; /* gid */ - if (get_int(&mesg, &rsci.cred.cr_gid)) + if (get_int(&mesg, &rsci->cred.cr_gid)) goto out; /* number of additional gid's */ if (get_int(&mesg, &N)) goto out; status = -ENOMEM; - rsci.cred.cr_group_info = groups_alloc(N); - if (rsci.cred.cr_group_info == NULL) + rsci->cred.cr_group_info = groups_alloc(N); + if (rsci->cred.cr_group_info == NULL) goto out; /* gid's */ @@ -401,7 +458,7 @@ static int rsc_parse(struct cache_detail gid_t gid; if (get_int(&mesg, &gid)) goto out; - GROUP_AT(rsci.cred.cr_group_info, i) = gid; + GROUP_AT(rsci->cred.cr_group_info, i) = gid; } /* mech name */ @@ -422,19 +479,21 @@ static int rsc_parse(struct cache_detail } tmp_buf.len = len; tmp_buf.data = buf; - if (gss_import_sec_context(&tmp_buf, gm, &rsci.mechctx)) { + if (gss_import_sec_context(&tmp_buf, gm, &rsci->mechctx)) { gss_mech_put(gm); goto out; } gss_mech_put(gm); } - rsci.h.expiry_time = expiry; - rscp = rsc_lookup(&rsci, 1); + rsci->h.expiry_time = expiry; + spin_lock_init(&rsci->seqdata.sd_lock); + res = rsc_lookup(rsci, 1); + rsc_put(&res->h, &rsc_cache); + rsci = NULL; status = 0; out: - rsc_free(&rsci); - if (rscp) - rsc_put(&rscp->h, &rsc_cache); + if (rsci) + rsc_put(&rsci->h, &rsc_cache); return status; } @@ -446,19 +505,14 @@ static struct cache_detail rsc_cache = { .cache_parse = rsc_parse, }; -static DefineSimpleCacheLookup(rsc, 0); - struct rsc * gss_svc_searchbyctx(struct xdr_netobj *handle) { struct rsc rsci; struct rsc *found; - memset(&rsci, 0, sizeof(rsci)); - if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) - return NULL; + rsci.handle = *handle; found = rsc_lookup(&rsci, 0); - rsc_free(&rsci); if (!found) return NULL; if (cache_check(&rsc_cache, &found->h, NULL)) @@ -643,7 +697,6 @@ svcauth_gss_register_pseudoflavor(u32 ps if (!new) goto out; cache_init(&new->h.h); - atomic_inc(&new->h.h.refcnt); new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL); if (!new->h.name) goto out_free_dom; @@ -651,7 +704,6 @@ svcauth_gss_register_pseudoflavor(u32 ps new->h.flavour = RPC_AUTH_GSS; new->pseudoflavor = pseudoflavor; new->h.h.expiry_time = NEVER; - new->h.h.flags = 0; test = auth_domain_lookup(&new->h, 1); if (test == &new->h) { @@ -723,6 +775,45 @@ out: return stat; } +static int +unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +{ + int stat = -EINVAL; + int out_offset; + u32 * lenp; + u32 priv_len, maj_stat; + int saved_len; + + lenp = buf->head[0].iov_base; + priv_len = ntohl(svc_getu32(&buf->head[0])); + if (priv_len > buf->len) /* XXXJBF: wrong check */ + goto out; + /* XXXJBF: bizarre hack: to handle revisits (and not decrypt + * twice), the first time through we write an offset + * telling us where to skip to find the already-decrypted data */ + if (rqstp->rq_deferred) { + buf->head[0].iov_base += priv_len; + buf->head[0].iov_len -= priv_len; + return 0; + } + saved_len = buf->len; /* XXX HACK */ + buf->len = priv_len; + maj_stat = gss_unwrap(ctx, GSS_C_QOP_DEFAULT, 0, buf, &out_offset); + buf->len = saved_len; + buf->head[0].iov_base += out_offset; + buf->head[0].iov_len -= out_offset; + BUG_ON(buf->head[0].iov_len <= 0); + if (maj_stat != GSS_S_COMPLETE) + goto out; + if (ntohl(svc_getu32(&buf->head[0])) != seq) + goto out; + /* XXXJBF: see "bizarre hack", above. */ + *lenp = htonl(out_offset + 4); + stat = 0; +out: + return stat; +} + struct gss_svc_data { /* decoded gss client cred: */ struct rpc_gss_wire_cred clcred; @@ -750,7 +841,7 @@ svcauth_gss_accept(struct svc_rqst *rqst struct gss_svc_data *svcdata = rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc; struct rsc *rsci = NULL; - struct rsi *rsip, rsikey; + struct rsi *rsip, *rsikey = NULL; u32 *rpcstart; u32 *reject_stat = resv->iov_base + resv->iov_len; int ret; @@ -843,30 +934,23 @@ svcauth_gss_accept(struct svc_rqst *rqst *authp = rpc_autherr_badcred; if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) goto auth_err; - memset(&rsikey, 0, sizeof(rsikey)); - if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) + rsikey = kmalloc(sizeof(*rsikey), GFP_KERNEL); + if (!rsikey) + goto drop; + memset(rsikey, 0, sizeof(*rsikey)); + cache_init(&rsikey->h); + if (dup_netobj(&rsikey->in_handle, &gc->gc_ctx)) goto drop; *authp = rpc_autherr_badverf; - if (svc_safe_getnetobj(argv, &tmpobj)) { - kfree(rsikey.in_handle.data); + if (svc_safe_getnetobj(argv, &tmpobj)) goto auth_err; - } - if (dup_netobj(&rsikey.in_token, &tmpobj)) { - kfree(rsikey.in_handle.data); + if (dup_netobj(&rsikey->in_token, &tmpobj)) goto drop; - } - rsip = rsi_lookup(&rsikey, 0); - rsi_free(&rsikey); - if (!rsip) { - goto drop; - } - switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { - case -EAGAIN: + rsip = gssd_upcall(rsikey, rqstp); + if (!rsip) goto drop; - case -ENOENT: - goto drop; - case 0: + else { rsci = gss_svc_searchbyctx(&rsip->out_handle); if (!rsci) { goto drop; @@ -921,7 +1005,16 @@ svcauth_gss_accept(struct svc_rqst *rqst svc_putu32(resv, 0); break; case RPC_GSS_SVC_PRIVACY: - /* currently unsupported */ + if (unwrap_priv_data(rqstp, &rqstp->rq_arg, + gc->gc_seq, rsci->mechctx)) + goto auth_err; + svcdata->rsci = rsci; + cache_get(&rsci->h); + /* placeholders for length and seq. number: */ + svcdata->body_start = resv->iov_base + resv->iov_len; + svc_putu32(resv, 0); + svc_putu32(resv, 0); + break; default: goto auth_err; } @@ -939,13 +1032,15 @@ complete: drop: ret = SVC_DROP; out: + if (rsikey) + rsi_put(&rsikey->h, &rsi_cache); if (rsci) rsc_put(&rsci->h, &rsc_cache); return ret; } -static int -svcauth_gss_release(struct svc_rqst *rqstp) +static inline int +svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) { struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc = &gsd->clcred; @@ -957,6 +1052,156 @@ svcauth_gss_release(struct svc_rqst *rqs int integ_offset, integ_len; int stat = -EINVAL; + p = gsd->body_start; + gsd->body_start = 0; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Don't wrap in failure case: */ + /* Counting on not getting here if call was not even accepted! */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + goto out; + } + p++; + integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; + integ_len = resbuf->len - integ_offset; + BUG_ON(integ_len % 4); + *p++ = htonl(integ_len); + *p++ = htonl(gc->gc_seq); + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, + integ_len)) + BUG(); + if (resbuf->page_len == 0 + && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE + < PAGE_SIZE) { + BUG_ON(resbuf->tail[0].iov_len); + /* Use head for everything */ + resv = &resbuf->head[0]; + } else if (resbuf->tail[0].iov_base == NULL) { + /* copied from nfsd4_encode_read */ + svc_take_page(rqstp); + resbuf->tail[0].iov_base = page_address(rqstp + ->rq_respages[rqstp->rq_resused-1]); + rqstp->rq_restailpage = rqstp->rq_resused-1; + resbuf->tail[0].iov_len = 0; + resv = &resbuf->tail[0]; + } else { + resv = &resbuf->tail[0]; + } + mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; + if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) + goto out_err; + svc_putu32(resv, htonl(mic.len)); + memset(mic.data + mic.len, 0, + round_up_to_quad(mic.len) - mic.len); + resv->iov_len += XDR_QUADLEN(mic.len) << 2; + /* not strictly required: */ + resbuf->len += XDR_QUADLEN(mic.len) << 2; + BUG_ON(resv->iov_len > PAGE_SIZE); +out: + stat = 0; +out_err: + return stat; +} + +/* XXXJBF: Look for chances to share code with client */ +/* XXXJBF: Do we need to preallocate these pages somehow? E.g. see + * buffer size calculations in svcsock.c */ +/* XXXJBF: how does reference counting on pages work? */ +static struct page ** +svc_alloc_enc_pages(struct xdr_buf *buf) +{ + struct page **ret; + int last, i; + + if (buf->page_len == 0) + return NULL; + BUG_ON(buf->page_base >> PAGE_CACHE_SHIFT); + last = (buf->page_base + buf->page_len - 1) >> PAGE_CACHE_SHIFT; + ret = kmalloc((last + 1) * sizeof(struct page *), GFP_KERNEL); + if (!ret) + goto out; + for (i = 0; i<= last; i++) { + ret[i] = alloc_page(GFP_KERNEL); + if (ret[i] == NULL) + goto out_free; + } +out: + return ret; +out_free: + for (i--; i >= 0; i--) { + __free_page(ret[i]); + } + return NULL; +} + +static inline int +svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + struct page **inpages; + u32 *p; + int offset, *len; + int pad; + int stat = -EINVAL; + + p = gsd->body_start; + gsd->body_start = 0; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Don't wrap in failure case: */ + /* Counting on not getting here if call was not even accepted! */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + goto out; + } + p++; + len = p++; + offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base; + *p++ = htonl(gc->gc_seq); + stat = -ENOMEM; + inpages = resbuf->pages; + /* XXXJBF: huge memory leaks here: allocated pages probably aren't + * freed, and neither is memory used to hold page array. */ + resbuf->pages = svc_alloc_enc_pages(resbuf); + if (resbuf->page_len && !resbuf->pages) + goto out_err; /* XXX sleep and retry? Reserve ahead of time + and BUG_ON? */ + if (resbuf->tail[0].iov_len == 0 || resbuf->tail[0].iov_base == NULL) { + /* copied from nfsd4_encode_read */ + {int i = svc_take_page(rqstp); BUG_ON(i); } + resbuf->tail[0].iov_base = page_address(rqstp + ->rq_respages[rqstp->rq_resused-1]); + rqstp->rq_restailpage = rqstp->rq_resused-1; + resbuf->tail[0].iov_len = 0; + } + /* XXX: Will svc code attempt to free stuff in xdr_buf->pages? + * Or can we leave it in any old state on error?? */ + stat = -EINVAL; + if (gss_wrap(gsd->rsci->mechctx, GSS_C_QOP_DEFAULT, offset, + resbuf, inpages)) + goto out_err; + *len = htonl(resbuf->len - offset); + pad = 3 - ((resbuf->len - offset - 1)&3); + p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len); + memset(p, 0, pad); + resbuf->tail[0].iov_len += pad; +out: + return 0; +out_err: + return stat; +} + +static int +svcauth_gss_release(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + int stat = -EINVAL; + if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; /* Release can be called twice, but we only wrap once. */ @@ -969,55 +1214,15 @@ svcauth_gss_release(struct svc_rqst *rqs case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: - p = gsd->body_start; - gsd->body_start = 0; - /* move accept_stat to right place: */ - memcpy(p, p + 2, 4); - /* don't wrap in failure case: */ - /* Note: counting on not getting here if call was not even - * accepted! */ - if (*p != rpc_success) { - resbuf->head[0].iov_len -= 2 * 4; - goto out; - } - p++; - integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; - integ_len = resbuf->len - integ_offset; - BUG_ON(integ_len % 4); - *p++ = htonl(integ_len); - *p++ = htonl(gc->gc_seq); - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, - integ_len)) - BUG(); - if (resbuf->page_len == 0 - && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE - < PAGE_SIZE) { - BUG_ON(resbuf->tail[0].iov_len); - /* Use head for everything */ - resv = &resbuf->head[0]; - } else if (resbuf->tail[0].iov_base == NULL) { - /* copied from nfsd4_encode_read */ - svc_take_page(rqstp); - resbuf->tail[0].iov_base = page_address(rqstp - ->rq_respages[rqstp->rq_resused-1]); - rqstp->rq_restailpage = rqstp->rq_resused-1; - resbuf->tail[0].iov_len = 0; - resv = &resbuf->tail[0]; - } else { - resv = &resbuf->tail[0]; - } - mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; - if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) + stat = svcauth_gss_wrap_resp_integ(rqstp); + if (stat) goto out_err; - svc_putu32(resv, htonl(mic.len)); - memset(mic.data + mic.len, 0, - round_up_to_quad(mic.len) - mic.len); - resv->iov_len += XDR_QUADLEN(mic.len) << 2; - /* not strictly required: */ - resbuf->len += XDR_QUADLEN(mic.len) << 2; - BUG_ON(resv->iov_len > PAGE_SIZE); break; case RPC_GSS_SVC_PRIVACY: + stat = svcauth_gss_wrap_resp_priv(rqstp); + if (stat) + goto out_err; + break; default: goto out_err; } --- linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_wrap.c.lsec 2005-03-23 14:28:24.900264280 -0700 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_wrap.c 2005-03-23 14:28:24.900264280 -0700 @@ -0,0 +1,337 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +static inline int +gss_krb5_padding(int blocksize, int length) +{ + /* Most of the code is block-size independent but currently we + * use only 8: */ + BUG_ON(blocksize != 8); + return 8 - (length & 7); +} + +static inline void +gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize) +{ + int padding = gss_krb5_padding(blocksize, buf->len - offset); + char *p; + struct iovec *iov; + + if (buf->page_len || buf->tail[0].iov_len) + iov = &buf->tail[0]; + else + iov = &buf->head[0]; + p = iov->iov_base + iov->iov_len; + iov->iov_len += padding; + buf->len += padding; + memset(p, padding, padding); +} + +static inline int +gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) +{ + u8 *ptr; + u8 pad; + int len = buf->len; + + if (len <= buf->head[0].iov_len) { + pad = *(u8 *)(buf->head[0].iov_base + len - 1); + goto out; + } else + len -= buf->head[0].iov_len; + if (len <= buf->page_len) { + int last = (buf->page_base + len - 1) + >>PAGE_CACHE_SHIFT; + int offset = (buf->page_base + len - 1) + & (PAGE_CACHE_SIZE - 1); + ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); + pad = *(ptr + offset); + kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); + goto out; + } else + len -= buf->page_len; + BUG_ON(len > buf->tail[0].iov_len); + pad = *(u8 *)(buf->tail[0].iov_base + len - 1); +out: + if (pad > blocksize) + return -EINVAL; + buf->len -= pad; + return 0; +} + +static inline void +make_confounder(char *p, int blocksize) +{ + /* XXX? Is this OK to do on every packet? */ + get_random_bytes(p, blocksize); +} + +/* Assumptions: the head and tail of inbuf are ours to play with. + * The pages, however, may be real pages in the page cache and we replace + * them with scratch pages from **pages before writing to them. */ +/* XXX: obviously the above should be documentation of wrap interface, + * and shouldn't be in this kerberos-specific file. */ + +/* XXX factor out common code with seal/unseal. */ + +u32 +gss_wrap_kerberos(struct gss_ctx *ctx, u32 qop, int offset, + struct xdr_buf *buf, struct page **pages) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + s32 checksum_type; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + int blocksize = 0, plainlen; + unsigned char *ptr, *krb5_hdr, *msg_start; + s32 now; + int headlen; + struct page **tmp_pages; + u32 seq_send; + + dprintk("RPC: gss_wrap_kerberos\n"); + + now = get_seconds(); + + if (qop != 0) + goto out_err; + + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" + " supported\n", kctx->signalg); + goto out_err; + } + if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { + dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", + kctx->sealalg); + goto out_err; + } + + blocksize = crypto_tfm_alg_blocksize(kctx->enc); + gss_krb5_add_padding(buf, offset, blocksize); + BUG_ON((buf->len - offset) % blocksize); + plainlen = blocksize + buf->len - offset; + + headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - + (buf->len - offset); + + ptr = buf->head[0].iov_base + offset; + /* shift data to make room for header. */ + /* XXX Would be cleverer to encrypt while copying. */ + /* XXX bounds checking, slack, etc. */ + memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset); + buf->head[0].iov_len += headlen; + buf->len += headlen; + BUG_ON((buf->len - offset - headlen) % blocksize); + + g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); + + + *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); + *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); + + /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ + krb5_hdr = ptr - 2; + msg_start = krb5_hdr + 24; + /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); + + *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); + memset(krb5_hdr + 4, 0xff, 4); + *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); + + make_confounder(msg_start, blocksize); + + /* XXXJBF: UGH!: */ + tmp_pages = buf->pages; + buf->pages = pages; + if (make_checksum(checksum_type, krb5_hdr, 8, buf, + offset + headlen - blocksize, &md5cksum)) + goto out_err; + buf->pages = tmp_pages; + + switch (kctx->signalg) { + case SGN_ALG_DES_MAC_MD5: + if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len)) + goto out_err; + memcpy(krb5_hdr + 16, + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); + + dprintk("RPC: make_seal_token: cksum data: \n"); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); + break; + default: + BUG(); + } + + kfree(md5cksum.data); + + spin_lock(&krb5_seq_lock); + seq_send = kctx->seq_send++; + spin_unlock(&krb5_seq_lock); + + /* XXX would probably be more efficient to compute checksum + * and encrypt at the same time: */ + if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, + seq_send, krb5_hdr + 16, krb5_hdr + 8))) + goto out_err; + + if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, + pages)) + goto out_err; + + return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); +out_err: + if (md5cksum.data) kfree(md5cksum.data); + return GSS_S_FAILURE; +} + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx, u32 *qop, int offset, + struct xdr_buf *buf, int *out_offset) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + int signalg; + int sealalg; + s32 checksum_type; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + s32 now; + int direction; + s32 seqnum; + unsigned char *ptr; + int bodysize; + u32 ret = GSS_S_DEFECTIVE_TOKEN; + u8 *data_start; + int blocksize; + + dprintk("RPC: gss_unwrap_kerberos\n"); + + ptr = (u8 *)buf->head[0].iov_base + offset; + if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, + buf->len - offset)) + goto out; + + if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || + (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) + goto out; + + /* XXX sanity-check bodysize?? */ + + /* get the sign and seal algorithms */ + + signalg = ptr[0] + (ptr[1] << 8); + sealalg = ptr[2] + (ptr[3] << 8); + + /* Sanity checks */ + + if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) + goto out; + + if (sealalg == 0xffff) + goto out; + + /* in the current spec, there is only one valid seal algorithm per + key type, so a simple comparison is ok */ + + if (sealalg != kctx->sealalg) + goto out; + + /* there are several mappings of seal algorithms to sign algorithms, + but few enough that we can try them all. */ + + if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || + (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || + (kctx->sealalg == SEAL_ALG_DES3KD && + signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) + goto out; + + if (gss_decrypt_xdr_buf(kctx->enc, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base)) + goto out; + + /* compute the checksum of the message */ + + /* initialize the the cksum */ + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + switch (signalg) { + case SGN_ALG_DES_MAC_MD5: + ret = make_checksum(checksum_type, ptr - 2, 8, buf, + ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); + if (ret) + goto out; + + ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, + md5cksum.data, md5cksum.len); + if (ret) + goto out; + + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { + ret = GSS_S_BAD_SIG; + goto out; + } + break; + default: + ret = GSS_S_DEFECTIVE_TOKEN; + goto out; + } + + /* it got through unscathed. Make sure the context is unexpired */ + + if (qop) + *qop = GSS_C_QOP_DEFAULT; + + now = get_seconds(); + + ret = GSS_S_CONTEXT_EXPIRED; + if (now > kctx->endtime) + goto out; + + /* do sequencing checks */ + + ret = GSS_S_BAD_SIG; + if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum))) + goto out; + + if ((kctx->initiate && direction != 0xff) || + (!kctx->initiate && direction != 0)) + goto out; + + /* Copy the data back to the right position. XXX: Would probably be + * better to copy and encrypt at the same time. */ + + blocksize = crypto_tfm_alg_blocksize(kctx->enc); + data_start = ptr + 22 + blocksize; + *out_offset = data_start - (u8 *)buf->head[0].iov_base; + + ret = GSS_S_DEFECTIVE_TOKEN; + if (gss_krb5_remove_padding(buf, blocksize)) + goto out; + + ret = GSS_S_COMPLETE; +out: + if (md5cksum.data) kfree(md5cksum.data); + return ret; +} --- linux-2.6.7/net/sunrpc/auth_gss/gss_mech_switch.c.lsec 2004-06-15 23:19:37.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_mech_switch.c 2005-03-23 14:28:24.782282216 -0700 @@ -279,6 +279,29 @@ gss_verify_mic(struct gss_ctx *context_ qstate); } +u32 +gss_wrap(struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *buf, + struct page **inpages) +{ + return ctx_id->mech_type->gm_ops + ->gss_wrap(ctx_id, qop, offset, buf, inpages); +} + +u32 +gss_unwrap(struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *buf, + int *out_offset) +{ + return ctx_id->mech_type->gm_ops + ->gss_unwrap(ctx_id, qop, offset, buf, out_offset); +} + + /* gss_delete_sec_context: free all resources associated with context_handle. * Note this differs from the RFC 2744-specified prototype in that we don't * bother returning an output token, since it would never be used anyway. */ --- linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_mech.c.lsec 2004-06-15 23:19:42.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_mech.c 2005-03-23 14:28:24.841273248 -0700 @@ -182,6 +182,7 @@ gss_delete_sec_context_kerberos(void *in kfree(kctx); } +/* XXX the following wrappers have become pointless; kill them. */ static u32 gss_verify_mic_kerberos(struct gss_ctx *ctx, struct xdr_buf *message, @@ -191,8 +192,7 @@ gss_verify_mic_kerberos(struct gss_ctx int qop_state; struct krb5_ctx *kctx = ctx->internal_ctx_id; - maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, - KG_TOK_MIC_MSG); + maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state); if (!maj_stat && qop_state) *qstate = qop_state; @@ -208,7 +208,7 @@ gss_get_mic_kerberos(struct gss_ctx *ctx u32 err = 0; struct krb5_ctx *kctx = ctx->internal_ctx_id; - err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); + err = krb5_make_token(kctx, qop, message, mic_token); dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); @@ -219,6 +219,8 @@ static struct gss_api_ops gss_kerberos_o .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_get_mic = gss_get_mic_kerberos, .gss_verify_mic = gss_verify_mic_kerberos, + .gss_wrap = gss_wrap_kerberos, + .gss_unwrap = gss_unwrap_kerberos, .gss_delete_sec_context = gss_delete_sec_context_kerberos, }; @@ -233,6 +235,11 @@ static struct pf_desc gss_kerberos_pfs[] .service = RPC_GSS_SVC_INTEGRITY, .name = "krb5i", }, + [2] = { + .pseudoflavor = RPC_AUTH_GSS_KRB5P, + .service = RPC_GSS_SVC_PRIVACY, + .name = "krb5p", + }, }; static struct gss_api_mech gss_kerberos_mech = { --- linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_seal.c.lsec 2004-06-15 23:18:37.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_krb5_seal.c 2005-03-23 14:28:24.898264584 -0700 @@ -70,24 +70,17 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static inline int -gss_krb5_padding(int blocksize, int length) { - /* Most of the code is block-size independent but in practice we - * use only 8: */ - BUG_ON(blocksize != 8); - return 8 - (length & 7); -} +spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED; u32 krb5_make_token(struct krb5_ctx *ctx, int qop_req, - struct xdr_buf *text, struct xdr_netobj *token, - int toktype) + struct xdr_buf *text, struct xdr_netobj *token) { s32 checksum_type; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; - int blocksize = 0, tmsglen; unsigned char *ptr, *krb5_hdr, *msg_start; s32 now; + u32 seq_send; dprintk("RPC: gss_krb5_seal\n"); @@ -111,21 +104,13 @@ krb5_make_token(struct krb5_ctx *ctx, in goto out_err; } - if (toktype == KG_TOK_WRAP_MSG) { - blocksize = crypto_tfm_alg_blocksize(ctx->enc); - tmsglen = blocksize + text->len - + gss_krb5_padding(blocksize, blocksize + text->len); - } else { - tmsglen = 0; - } - - token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); + token->len = g_token_size(&ctx->mech_used, 22); ptr = token->data; - g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); + g_make_token_header(&ctx->mech_used, 22, &ptr); - *ptr++ = (unsigned char) ((toktype>>8)&0xff); - *ptr++ = (unsigned char) (toktype&0xff); + *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); + *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ krb5_hdr = ptr - 2; @@ -133,17 +118,9 @@ krb5_make_token(struct krb5_ctx *ctx, in *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); memset(krb5_hdr + 4, 0xff, 4); - if (toktype == KG_TOK_WRAP_MSG) - *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); - if (toktype == KG_TOK_WRAP_MSG) { - /* XXX removing support for now */ - goto out_err; - } else { /* Sign only. */ - if (make_checksum(checksum_type, krb5_hdr, 8, text, - &md5cksum)) + if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) goto out_err; - } switch (ctx->signalg) { case SGN_ALG_DES_MAC_MD5: @@ -163,12 +140,14 @@ krb5_make_token(struct krb5_ctx *ctx, in kfree(md5cksum.data); + spin_lock(&krb5_seq_lock); + seq_send = ctx->seq_send++; + spin_unlock(&krb5_seq_lock); + if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, - ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) + seq_send, krb5_hdr + 16, krb5_hdr + 8))) goto out_err; - ctx->seq_send++; - return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); out_err: if (md5cksum.data) kfree(md5cksum.data); --- linux-2.6.7/net/sunrpc/auth_gss/gss_spkm3_token.c.lsec 2005-03-23 14:28:24.240364600 -0700 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_spkm3_token.c 2005-03-23 14:28:24.239364752 -0700 @@ -0,0 +1,266 @@ +/* + * linux/net/sunrpc/gss_spkm3_token.c + * + * Copyright (c) 2003 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +/* + * asn1_bitstring_len() + * + * calculate the asn1 bitstring length of the xdr_netobject + */ +void +asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) +{ + int i, zbit = 0,elen = in->len; + char *ptr; + + ptr = &in->data[in->len -1]; + + /* count trailing 0's */ + for(i = in->len; i > 0; i--) { + if (*ptr == 0) { + ptr--; + elen--; + } else + break; + } + + /* count number of 0 bits in final octet */ + ptr = &in->data[elen - 1]; + for(i = 0; i < 8; i++) { + short mask = 0x01; + + if (!((mask << i) & *ptr)) + zbit++; + else + break; + } + *enclen = elen; + *zerobits = zbit; +} + +/* + * decode_asn1_bitstring() + * + * decode a bitstring into a buffer of the expected length. + * enclen = bit string length + * explen = expected length (define in rfc) + */ +int +decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) +{ + if (!(out->data = kmalloc(explen,GFP_KERNEL))) + return 0; + out->len = explen; + memset(out->data, 0, explen); + memcpy(out->data, in, enclen); + return 1; +} + +/* + * SPKMInnerContextToken choice SPKM_MIC asn1 token layout + * + * contextid is always 16 bytes plain data. max asn1 bitstring len = 17. + * + * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum) + * + * pos value + * ---------- + * [0] a4 SPKM-MIC tag + * [1] ?? innertoken length (max 44) + * + * + * tok_hdr piece of checksum data starts here + * + * the maximum mic-header len = 9 + 17 = 26 + * mic-header + * ---------- + * [2] 30 SEQUENCE tag + * [3] ?? mic-header length: (max 23) = TokenID + ContextID + * + * TokenID - all fields constant and can be hardcoded + * ------- + * [4] 02 Type 2 + * [5] 02 Length 2 + * [6][7] 01 01 TokenID (SPKM_MIC_TOK) + * + * ContextID - encoded length not constant, calculated + * --------- + * [8] 03 Type 3 + * [9] ?? encoded length + * [10] ?? ctxzbit + * [11] contextid + * + * mic_header piece of checksum data ends here. + * + * int-cksum - encoded length not constant, calculated + * --------- + * [??] 03 Type 3 + * [??] ?? encoded length + * [??] ?? md5zbit + * [??] int-cksum (NID_md5 = 16) + * + * maximum SPKM-MIC innercontext token length = + * 10 + encoded contextid_size(17 max) + 2 + encoded + * cksum_size (17 maxfor NID_md5) = 46 + */ + +/* + * spkm3_mic_header() + * + * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation + * elen: 16 byte context id asn1 bitstring encoded length + */ +void +spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit) +{ + char *hptr = *hdrbuf; + char *top = *hdrbuf; + + *(u8 *)hptr++ = 0x30; + *(u8 *)hptr++ = elen + 7; /* on the wire header length */ + + /* tokenid */ + *(u8 *)hptr++ = 0x02; + *(u8 *)hptr++ = 0x02; + *(u8 *)hptr++ = 0x01; + *(u8 *)hptr++ = 0x01; + + /* coniextid */ + *(u8 *)hptr++ = 0x03; + *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */ + *(u8 *)hptr++ = zbit; + memcpy(hptr, ctxdata, elen); + hptr += elen; + *hdrlen = hptr - top; +} + +/* + * spkm3_mic_innercontext_token() + * + * *tokp points to the beginning of the SPKM_MIC token described + * in rfc 2025, section 3.2.1: + * + */ +void +spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit) +{ + unsigned char *ict = *tokp; + + *(u8 *)ict++ = 0xa4; + *(u8 *)ict++ = toklen - 2; + memcpy(ict, mic_hdr->data, mic_hdr->len); + ict += mic_hdr->len; + + *(u8 *)ict++ = 0x03; + *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */ + *(u8 *)ict++ = md5zbit; + memcpy(ict, md5cksum->data, md5elen); +} + +u32 +spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum) +{ + struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL}; + unsigned char *ptr = *tokp; + int ctxelen; + u32 ret = GSS_S_DEFECTIVE_TOKEN; + + /* spkm3 innercontext token preamble */ + if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) { + dprintk("RPC: BAD SPKM ictoken preamble\n"); + goto out; + } + + *mic_hdrlen = ptr[3]; + + /* token type */ + if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) { + dprintk("RPC: BAD asn1 SPKM3 token type\n"); + goto out; + } + + /* only support SPKM_MIC_TOK */ + if((ptr[6] != 0x01) || (ptr[7] != 0x01)) { + dprintk("RPC: ERROR unsupported SPKM3 token \n"); + goto out; + } + + /* contextid */ + if (ptr[8] != 0x03) { + dprintk("RPC: BAD SPKM3 asn1 context-id type\n"); + goto out; + } + + ctxelen = ptr[9]; + if (ctxelen > 17) { /* length includes asn1 zbit octet */ + dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen); + goto out; + } + + /* ignore ptr[10] */ + + if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16)) + goto out; + + /* + * in the current implementation: the optional int-alg is not present + * so the default int-alg (md5) is used the optional snd-seq field is + * also not present + */ + + if (*mic_hdrlen != 6 + ctxelen) { + dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only support default int-alg (should be absent) and do not support snd-seq\n", *mic_hdrlen); + goto out; + } + /* checksum */ + *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */ + + ret = GSS_S_COMPLETE; +out: + if (spkm3_ctx_id.data) + kfree(spkm3_ctx_id.data); + return ret; +} + --- linux-2.6.7/net/sunrpc/auth_gss/gss_generic_token.c.lsec 2004-06-15 23:19:10.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_generic_token.c 2005-03-23 14:28:23.707445616 -0700 @@ -179,7 +179,7 @@ EXPORT_SYMBOL(g_make_token_header); */ u32 g_verify_token_header(struct xdr_netobj *mech, int *body_size, - unsigned char **buf_in, int tok_type, int toksize) + unsigned char **buf_in, int toksize) { unsigned char *buf = *buf_in; int seqsize; --- linux-2.6.7/net/sunrpc/auth_gss/gss_spkm3_unseal.c.lsec 2005-03-23 14:28:24.240364600 -0700 +++ linux-2.6.7/net/sunrpc/auth_gss/gss_spkm3_unseal.c 2005-03-23 14:28:24.240364600 -0700 @@ -0,0 +1,128 @@ +/* + * linux/net/sunrpc/gss_spkm3_unseal.c + * + * Copyright (c) 2003 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +/* + * spkm3_read_token() + * + * only SPKM_MIC_TOK with md5 intg-alg is supported + */ +u32 +spkm3_read_token(struct spkm3_ctx *ctx, + struct xdr_netobj *read_token, /* checksum */ + struct xdr_buf *message_buffer, /* signbuf */ + int *qop_state, int toktype) +{ + s32 code; + struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; + struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; + unsigned char *ptr = (unsigned char *)read_token->data; + unsigned char *cksum; + int bodysize, md5elen; + int mic_hdrlen; + u32 ret = GSS_S_DEFECTIVE_TOKEN; + + dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len); + + if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, + &bodysize, &ptr, read_token->len)) + goto out; + + /* decode the token */ + + if (toktype == SPKM_MIC_TOK) { + + if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) + goto out; + + if (*cksum++ != 0x03) { + dprintk("RPC: spkm3_read_token BAD checksum type\n"); + goto out; + } + md5elen = *cksum++; + cksum++; /* move past the zbit */ + + if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) + goto out; + + /* HARD CODED FOR MD5 */ + + /* compute the checksum of the message. + * ptr + 2 = start of header piece of checksum + * mic_hdrlen + 2 = length of header piece of checksum + */ + ret = GSS_S_DEFECTIVE_TOKEN; + code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, + mic_hdrlen + 2, + message_buffer, &md5cksum); + + if (code) + goto out; + + dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n", + wire_cksum.len); + dprintk(" md5cksum.data\n"); + print_hexl((u32 *) md5cksum.data, 16, 0); + dprintk(" cksum.data:\n"); + print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0); + + ret = GSS_S_BAD_SIG; + code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); + if (code) + goto out; + + } else { + dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype); + goto out; + } + + /* XXX: need to add expiration and sequencing */ + ret = GSS_S_COMPLETE; +out: + if (md5cksum.data) + kfree(md5cksum.data); + if (wire_cksum.data) + kfree(wire_cksum.data); + return ret; +} --- linux-2.6.7/net/sunrpc/auth_gss/Makefile.lsec 2004-06-15 23:19:22.000000000 -0600 +++ linux-2.6.7/net/sunrpc/auth_gss/Makefile 2005-03-23 14:28:24.294356392 -0700 @@ -10,5 +10,9 @@ auth_rpcgss-objs := auth_gss.o gss_gener obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ - gss_krb5_seqnum.o + gss_krb5_seqnum.o gss_krb5_wrap.o +obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o + +rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \ + gss_spkm3_token.o --- linux-2.6.7/net/sunrpc/cache.c.lsec 2004-06-15 23:19:36.000000000 -0600 +++ linux-2.6.7/net/sunrpc/cache.c 2005-03-23 14:28:24.406339368 -0700 @@ -38,7 +38,7 @@ void cache_init(struct cache_head *h) time_t now = get_seconds(); h->next = NULL; h->flags = 0; - atomic_set(&h->refcnt, 0); + atomic_set(&h->refcnt, 1); h->expiry_time = now + CACHE_NEW_EXPIRY; h->last_refresh = now; } --- linux-2.6.7/net/sunrpc/svc.c.lsec 2004-06-15 23:20:03.000000000 -0600 +++ linux-2.6.7/net/sunrpc/svc.c 2005-03-23 14:28:23.652453976 -0700 @@ -263,6 +263,7 @@ svc_process(struct svc_serv *serv, struc u32 *statp; u32 dir, prog, vers, proc, auth_stat, rpc_stat; + int auth_res; rpc_stat = rpc_success; @@ -304,12 +305,17 @@ svc_process(struct svc_serv *serv, struc rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */ rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ + progp = serv->sv_program; /* * Decode auth data, and add verifier to reply buffer. * We do this before anything else in order to get a decent * auth verifier. */ - switch (svc_authenticate(rqstp, &auth_stat)) { + if (progp->pg_authenticate != NULL) + auth_res = progp->pg_authenticate(rqstp, &auth_stat); + else + auth_res = svc_authenticate(rqstp, &auth_stat); + switch (auth_res) { case SVC_OK: break; case SVC_GARBAGE: @@ -326,7 +332,6 @@ svc_process(struct svc_serv *serv, struc goto sendit; } - progp = serv->sv_program; if (prog != progp->pg_prog) goto err_bad_prog; --- linux-2.6.7/net/sunrpc/svcauth.c.lsec 2004-06-15 23:19:44.000000000 -0600 +++ linux-2.6.7/net/sunrpc/svcauth.c 2005-03-23 14:28:24.407339216 -0700 @@ -156,25 +156,47 @@ static inline int auth_domain_match(stru { return strcmp(tmp->name, item->name) == 0; } -DefineCacheLookup(struct auth_domain, - h, - auth_domain_lookup, - (struct auth_domain *item, int set), - /* no setup */, - &auth_domain_cache, - auth_domain_hash(item), - auth_domain_match(tmp, item), - kfree(new); if(!set) { - if (new) - write_unlock(&auth_domain_cache.hash_lock); - else - read_unlock(&auth_domain_cache.hash_lock); - return NULL; - } - new=item; atomic_inc(&new->h.refcnt), - /* no update */, - 0 /* no inplace updates */ - ) + +struct auth_domain * +auth_domain_lookup(struct auth_domain *item, int set) +{ + struct auth_domain *tmp = NULL; + struct cache_head **hp, **head; + head = &auth_domain_cache.hash_table[auth_domain_hash(item)]; + + if (set) + write_lock(&auth_domain_cache.hash_lock); + else + read_lock(&auth_domain_cache.hash_lock); + for (hp=head; *hp != NULL; hp = &tmp->h.next) { + tmp = container_of(*hp, struct auth_domain, h); + if (!auth_domain_match(tmp, item)) + continue; + cache_get(&tmp->h); + if (!set) + goto out_noset; + *hp = tmp->h.next; + tmp->h.next = NULL; + clear_bit(CACHE_HASHED, &tmp->h.flags); + auth_domain_drop(&tmp->h, &auth_domain_cache); + goto out_set; + } + /* Didn't find anything */ + if (!set) + goto out_noset; + auth_domain_cache.entries++; +out_set: + set_bit(CACHE_HASHED, &item->h.flags); + item->h.next = *head; + *head = &item->h; + write_unlock(&auth_domain_cache.hash_lock); + cache_fresh(&auth_domain_cache, &item->h, item->h.expiry_time); + cache_get(&item->h); + return item; +out_noset: + read_unlock(&auth_domain_cache.hash_lock); + return tmp; +} struct auth_domain *auth_domain_find(char *name) {