The complete series of citi nfsv4 patches in a single patch Makefile | 2 fs/Kconfig | 49 fs/Makefile | 1 fs/inode.c | 2 fs/nfs/dir.c | 181 ++ fs/nfs/direct.c | 3 fs/nfs/file.c | 23 fs/nfs/inode.c | 586 +++++---- fs/nfs/nfs3proc.c | 43 fs/nfs/nfs4proc.c | 988 +++++++--------- fs/nfs/nfs4xdr.c | 1931 ++++++++++++++++++++++++-------- fs/nfs/pagelist.c | 5 fs/nfs/proc.c | 51 fs/nfs/read.c | 2 fs/nfs/unlink.c | 3 fs/nfs/write.c | 207 +-- fs/nfs4acl/Makefile | 3 fs/nfs4acl/acl.c | 921 +++++++++++++++ fs/nfs4acl/acl_syms.c | 51 fs/nfsd/Makefile | 2 fs/nfsd/nfs3xdr.c | 2 fs/nfsd/nfs4idmap.c | 569 +++++++++ fs/nfsd/nfs4proc.c | 229 ++- fs/nfsd/nfs4state.c | 440 +++++-- fs/nfsd/nfs4xdr.c | 495 +++++--- fs/nfsd/nfsctl.c | 7 fs/nfsd/nfsproc.c | 1 fs/nfsd/nfsxdr.c | 2 fs/nfsd/stats.c | 67 - fs/nfsd/vfs.c | 218 +++ include/linux/fs.h | 2 include/linux/nfs.h | 2 include/linux/nfs4.h | 80 + include/linux/nfs4_acl.h | 68 + include/linux/nfs_fs.h | 138 +- include/linux/nfs_page.h | 2 include/linux/nfs_xdr.h | 256 +--- include/linux/nfsd/nfsd.h | 16 include/linux/nfsd/nfsfh.h | 8 include/linux/nfsd/state.h | 21 include/linux/nfsd/xdr4.h | 37 include/linux/nfsd_idmap.h | 54 include/linux/sunrpc/auth_gss.h | 2 include/linux/sunrpc/cache.h | 13 include/linux/sunrpc/gss_api.h | 3 include/linux/sunrpc/stats.h | 20 include/linux/sunrpc/svc.h | 1 include/linux/sunrpc/svcauth.h | 5 include/linux/sunrpc/svcauth_gss.h | 35 include/linux/sunrpc/xdr.h | 3 include/linux/sunrpc/xprt.h | 15 net/sunrpc/Makefile | 2 net/sunrpc/auth_gss/Makefile | 2 net/sunrpc/auth_gss/auth_gss.c | 119 + net/sunrpc/auth_gss/gss_krb5_crypto.c | 18 net/sunrpc/auth_gss/gss_krb5_mech.c | 14 net/sunrpc/auth_gss/gss_krb5_seal.c | 9 net/sunrpc/auth_gss/gss_krb5_seqnum.c | 2 net/sunrpc/auth_gss/gss_mech_switch.c | 32 net/sunrpc/auth_gss/gss_pseudoflavors.c | 21 net/sunrpc/auth_gss/sunrpcgss_syms.c | 2 net/sunrpc/auth_gss/svcauth_gss.c | 1018 ++++++++++++++++ net/sunrpc/cache.c | 13 net/sunrpc/stats.c | 106 - net/sunrpc/sunrpc_syms.c | 5 net/sunrpc/svc.c | 4 net/sunrpc/svcauth.c | 5 net/sunrpc/svcauth_unix.c | 13 net/sunrpc/xdr.c | 4 net/sunrpc/xprt.c | 210 +-- include/linux/sunrpc/name_lookup.h | 38 71 files changed, 7194 insertions(+), 2308 deletions(-) diff -puN Makefile~CITI_NFS4_ALL Makefile --- linux-2.6.3/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:02.000000000 -0500 +++ linux-2.6.3-bfields/Makefile 2004-02-19 16:47:16.000000000 -0500 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 3 -EXTRAVERSION = +EXTRAVERSION = -CITI_NFS4_ALL-1 NAME=Feisty Dunnart # *DOCUMENTATION* diff -puN fs/inode.c~CITI_NFS4_ALL fs/inode.c --- linux-2.6.3/fs/inode.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/inode.c 2004-02-19 16:47:03.000000000 -0500 @@ -1178,6 +1178,8 @@ void inode_update_time(struct inode *ino struct timespec now; int sync_it = 0; + if (IS_NOCMTIME(inode)) + return; if (IS_RDONLY(inode)) return; diff -puN fs/Kconfig~CITI_NFS4_ALL fs/Kconfig --- linux-2.6.3/fs/Kconfig~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/Kconfig 2004-02-19 16:47:07.000000000 -0500 @@ -288,7 +288,7 @@ config FS_POSIX_ACL # Never use this symbol for ifdefs. # bool - depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL + depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || NFS_V4_ACL default y config XFS_FS @@ -1314,21 +1314,25 @@ config NFS_V3 Say Y here if you want your NFS client to be able to speak the newer version 3 of the NFS protocol. - If unsure, say N. + If unsure, say Y. config NFS_V4 bool "Provide NFSv4 client support (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL + select RPCSEC_GSS_KRB5 help Say Y here if you want your NFS client to be able to speak the newer - version 4 of the NFS protocol. This feature is experimental, and - should only be used if you are interested in helping to test NFSv4. + version 4 of the NFS protocol. + + Note: Requires auxiliary userspace daemons which may be found on + http://www.citi.umich.edu/projects/nfsv4/ If unsure, say N. config NFS_DIRECTIO bool "Allow direct I/O on NFS files (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL + select NFS_V4_ACL help This option enables applications to perform uncached I/O on files in NFS file systems using the O_DIRECT open() flag. When O_DIRECT @@ -1388,6 +1392,7 @@ config NFSD_V3 config NFSD_V4 bool "Provide NFSv4 server support (EXPERIMENTAL)" depends on NFSD_V3 && EXPERIMENTAL + select NFS_V4_ACL help If you would like to include the NFSv4 server as well as the NFSv2 and NFSv3 servers, say Y here. This feature is experimental, and @@ -1423,6 +1428,12 @@ config LOCKD_V4 depends on NFSD_V3 || NFS_V3 default y +config NFS_V4_ACL + bool "Provide NFSv4 ACL support" + depends on NFSD_V4 || NFS_V4 + help + This allows you to use POSIX ACLs with NFSv4. + config EXPORTFS tristate default NFSD @@ -1431,28 +1442,24 @@ config SUNRPC tristate config SUNRPC_GSS - tristate "Provide RPCSEC_GSS authentication (EXPERIMENTAL)" + tristate + +config RPCSEC_GSS_KRB5 + tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL - default SUNRPC if NFS_V4=y + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES help - Provides cryptographic authentication for NFS rpc requests. To - make this useful, you must also select at least one rpcsec_gss - mechanism. - Note: You should always select this option if you wish to use + Provides for secure RPC calls by means of a gss-api + mechanism based on Kerberos V5. This is required for NFSv4. -config RPCSEC_GSS_KRB5 - tristate "Kerberos V mechanism for RPCSEC_GSS (EXPERIMENTAL)" - depends on SUNRPC_GSS && CRYPTO_DES && CRYPTO_MD5 - default SUNRPC_GSS if NFS_V4=y - help - Provides a gss-api mechanism based on Kerberos V5 (this is - mandatory for RFC3010-compliant NFSv4 implementations). - Requires a userspace daemon; - see http://www.citi.umich.edu/projects/nfsv4/. + Note: Requires an auxiliary userspace daemon which may be found on + http://www.citi.umich.edu/projects/nfsv4/ - Note: If you select this option, please ensure that you also - enable the MD5 and DES crypto ciphers. + If unsure, say N. config SMB_FS tristate "SMB file system support (to mount Windows shares etc.)" diff -puN fs/nfs/dir.c~CITI_NFS4_ALL fs/nfs/dir.c --- linux-2.6.3/fs/nfs/dir.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/dir.c 2004-02-19 16:47:07.000000000 -0500 @@ -88,6 +88,10 @@ struct inode_operations nfs4_dir_inode_o .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, +#ifdef CONFIG_NFS_V4_ACL + .getxattr = nfs_getxattr, + .setxattr = nfs_setxattr, +#endif /* CONFIG_NFS_V4_ACL */ }; #endif /* CONFIG_NFS_V4 */ @@ -139,11 +143,13 @@ int nfs_readdir_filler(nfs_readdir_descr struct file *file = desc->file; struct inode *inode = file->f_dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); + unsigned long timestamp; int error; dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); again: + timestamp = jiffies; error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { @@ -157,18 +163,21 @@ int nfs_readdir_filler(nfs_readdir_descr goto error; } SetPageUptodate(page); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * throught inode->i_sem or some other mechanism. */ - if (page->index == 0) + if (page->index == 0) { invalidate_inode_pages(inode->i_mapping); + NFS_I(inode)->readdir_timestamp = timestamp; + } unlock_page(page); return 0; error: SetPageError(page); unlock_page(page); - invalidate_inode_pages(inode->i_mapping); + nfs_zap_caches(inode); desc->error = error; return -EIO; } @@ -381,6 +390,7 @@ int uncached_readdir(nfs_readdir_descrip page, NFS_SERVER(inode)->dtsize, desc->plus); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -459,7 +469,15 @@ static int nfs_readdir(struct file *filp } res = 0; break; - } else if (res < 0) + } + if (res == -ETOOSMALL && desc->plus) { + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + nfs_zap_caches(inode); + desc->plus = 0; + desc->entry->eof = 0; + continue; + } + if (res < 0) break; res = nfs_do_filldir(desc, dirent, filldir); @@ -481,14 +499,19 @@ static int nfs_readdir(struct file *filp * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. */ -static inline -int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; - if (nfs_revalidate_inode(NFS_SERVER(dir), dir)) + if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 + || nfs_attribute_timeout(dir)) return 0; - return time_after(dentry->d_time, NFS_MTIME_UPDATE(dir)); + return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); +} + +static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) +{ + dentry->d_fsdata = (void *)verf; } /* @@ -528,9 +551,7 @@ int nfs_neg_need_reval(struct inode *dir /* Don't revalidate a negative dentry if we're creating a new file */ if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE)) return 0; - if (!nfs_check_verifier(dir, dentry)) - return 1; - return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); + return !nfs_check_verifier(dir, dentry); } /* @@ -552,6 +573,7 @@ static int nfs_lookup_revalidate(struct int error; struct nfs_fh fhandle; struct nfs_fattr fattr; + unsigned long verifier; int isopen = 0; parent = dget_parent(dentry); @@ -574,6 +596,9 @@ static int nfs_lookup_revalidate(struct goto out_bad; } + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, isopen)) @@ -581,6 +606,12 @@ static int nfs_lookup_revalidate(struct goto out_valid; } + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + verifier = nfs_save_change_attribute(dir); error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); if (!error) { if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) @@ -603,6 +634,7 @@ static int nfs_lookup_revalidate(struct out_valid_renew: nfs_renew_times(dentry); + nfs_set_verifier(dentry, verifier); out_valid: unlock_kernel(); dput(parent); @@ -638,6 +670,11 @@ static int nfs_dentry_delete(struct dent /* Unhash it, so that ->d_iput() would be called */ return 1; } + if (!(dentry->d_sb->s_flags & MS_ACTIVE)) { + /* Unhash it, so that ancestors of killed async unlink + * files will be cleaned up during umount */ + return 1; + } return 0; } @@ -693,6 +730,8 @@ static struct dentry *nfs_lookup(struct dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); /* If we're doing an exclusive create, optimize away the lookup */ if (nfs_is_exclusive_create(dir, nd)) @@ -715,6 +754,7 @@ no_entry: error = 0; d_add(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: unlock_kernel(); out: @@ -768,7 +808,15 @@ static struct dentry *nfs_atomic_lookup( /* Open the file on the server */ lock_kernel(); - inode = nfs4_atomic_open(dir, dentry, nd); + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + + if (nd->intent.open.flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); + } else + inode = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); if (IS_ERR(inode)) { error = PTR_ERR(inode); @@ -790,6 +838,7 @@ static struct dentry *nfs_atomic_lookup( no_entry: d_add(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out: BUG_ON(error > 0); return ERR_PTR(error); @@ -801,13 +850,16 @@ static int nfs_open_revalidate(struct de { struct dentry *parent = NULL; struct inode *inode = dentry->d_inode; + struct inode *dir; + unsigned long verifier; int openflags, ret = 0; /* NFS only supports OPEN for regular files */ if (inode && !S_ISREG(inode->i_mode)) goto no_open; parent = dget_parent(dentry); - if (!is_atomic_open(parent->d_inode, nd)) + dir = parent->d_inode; + if (!is_atomic_open(dir, nd)) goto no_open; openflags = nd->intent.open.flags; if (openflags & O_CREAT) { @@ -821,8 +873,16 @@ static int nfs_open_revalidate(struct de /* We can't create new files, or truncate existing ones here */ openflags &= ~(O_CREAT|O_TRUNC); + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ lock_kernel(); - ret = nfs4_open_revalidate(parent->d_inode, dentry, openflags); + verifier = nfs_save_change_attribute(dir); + ret = nfs4_open_revalidate(dir, dentry, openflags); + if (!ret) + nfs_set_verifier(dentry, verifier); unlock_kernel(); out: dput(parent); @@ -869,15 +929,20 @@ int nfs_cached_lookup(struct inode *dir, struct nfs_server *server; struct nfs_entry entry; struct page *page; - unsigned long timestamp = NFS_MTIME_UPDATE(dir); + unsigned long timestamp; int res; if (!NFS_USE_READDIRPLUS(dir)) return -ENOENT; server = NFS_SERVER(dir); - if (server->flags & NFS_MOUNT_NOAC) + /* Don't use readdirplus unless the cache is stable */ + if ((server->flags & NFS_MOUNT_NOAC) != 0 + || nfs_caches_unstable(dir) + || nfs_attribute_timeout(dir)) return -ENOENT; - nfs_revalidate_inode(server, dir); + if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0) + return -ENOENT; + timestamp = NFS_I(dir)->readdir_timestamp; entry.fh = fh; entry.fattr = fattr; @@ -931,6 +996,7 @@ static int nfs_instantiate(struct dentry if (inode) { d_instantiate(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dentry->d_parent->d_inode)); error = 0; } return error; @@ -969,11 +1035,13 @@ static int nfs_create(struct inode *dir, * does not pass the create flags. */ lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags); + nfs_end_data_update(dir); if (!IS_ERR(inode)) { d_instantiate(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); error = 0; } else { error = PTR_ERR(inode); @@ -1004,9 +1072,10 @@ nfs_mknod(struct inode *dir, struct dent attr.ia_valid = ATTR_MODE; lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mknod(dir, &dentry->d_name, &attr, rdev, &fhandle, &fattr); + nfs_end_data_update(dir); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -1041,9 +1110,10 @@ static int nfs_mkdir(struct inode *dir, */ d_drop(dentry); #endif - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mkdir(dir, &dentry->d_name, &attr, &fhandle, &fattr); + nfs_end_data_update(dir); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -1060,10 +1130,12 @@ static int nfs_rmdir(struct inode *dir, dir->i_ino, dentry->d_name.name); lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); - if (!error) + /* Ensure the VFS deletes this inode */ + if (error == 0 && dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; + nfs_end_data_update(dir); unlock_kernel(); return error; @@ -1119,12 +1191,21 @@ dentry->d_parent->d_name.name, dentry->d goto out; } while(sdentry->d_inode != NULL); /* need negative lookup */ - nfs_zap_caches(dir); qsilly.name = silly; qsilly.len = strlen(silly); - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); + nfs_begin_data_update(dir); + if (dentry->d_inode) { + nfs_begin_data_update(dentry->d_inode); + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dentry->d_inode); + } else + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dir); if (!error) { nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); d_move(dentry, sdentry); error = nfs_async_unlink(dentry); /* If we return 0 we don't unlink */ @@ -1156,14 +1237,17 @@ static int nfs_safe_remove(struct dentry goto out; } - nfs_zap_caches(dir); - if (inode) - NFS_CACHEINV(inode); - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - if (error < 0) - goto out; - if (inode) - inode->i_nlink--; + nfs_begin_data_update(dir); + if (inode != NULL) { + nfs_begin_data_update(inode); + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + /* The VFS may want to delete this inode */ + if (error == 0) + inode->i_nlink--; + nfs_end_data_update(inode); + } else + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + nfs_end_data_update(dir); out: return error; } @@ -1198,9 +1282,10 @@ static int nfs_unlink(struct inode *dir, spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); - if (!error) + if (!error) { nfs_renew_times(dentry); - else if (need_rehash) + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } else if (need_rehash) d_rehash(dentry); unlock_kernel(); return error; @@ -1247,9 +1332,10 @@ dentry->d_parent->d_name.name, dentry->d qsymname.len = strlen(symname); lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, &attr, &sym_fh, &sym_attr); + nfs_end_data_update(dir); if (!error) { error = nfs_instantiate(dentry, &sym_fh, &sym_attr); } else { @@ -1281,9 +1367,12 @@ nfs_link(struct dentry *old_dentry, stru */ lock_kernel(); d_drop(dentry); - nfs_zap_caches(dir); - NFS_CACHEINV(inode); + + nfs_begin_data_update(dir); + nfs_begin_data_update(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); + nfs_end_data_update(inode); + nfs_end_data_update(dir); unlock_kernel(); return error; } @@ -1388,16 +1477,23 @@ go_ahead: if (new_inode) d_delete(new_dentry); - nfs_zap_caches(new_dir); - nfs_zap_caches(old_dir); + nfs_begin_data_update(old_dir); + nfs_begin_data_update(new_dir); + nfs_begin_data_update(old_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); + nfs_end_data_update(old_inode); + nfs_end_data_update(new_dir); + nfs_end_data_update(old_dir); out: if (rehash) d_rehash(rehash); - if (!error && !S_ISDIR(old_inode->i_mode)) - d_move(old_dentry, new_dentry); - nfs_renew_times(new_dentry); + if (!error) { + if (!S_ISDIR(old_inode->i_mode)) + d_move(old_dentry, new_dentry); + nfs_renew_times(new_dentry); + nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); + } /* new dentry created? */ if (dentry) @@ -1451,7 +1547,8 @@ nfs_permission(struct inode *inode, int cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (cache->cred == cred - && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) { + && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) + && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { if (!(res = cache->err)) { /* Is the mask a subset of an accepted mask? */ if ((cache->mask & mask) == mask) diff -puN fs/nfs/direct.c~CITI_NFS4_ALL fs/nfs/direct.c --- linux-2.6.3/fs/nfs/direct.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/direct.c 2004-02-19 16:47:03.000000000 -0500 @@ -269,6 +269,7 @@ nfs_direct_write_seg(struct inode *inode if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) wdata.args.stable = NFS_FILE_SYNC; + nfs_begin_data_update(inode); retry: need_commit = 0; tot_bytes = 0; @@ -334,6 +335,8 @@ retry: VERF_SIZE) != 0) goto sync_retry; } + nfs_end_data_update(inode); + NFS_FLAGS(inode) |= NFS_INO_INVALID_DATA; return tot_bytes; diff -puN fs/nfs/file.c~CITI_NFS4_ALL fs/nfs/file.c --- linux-2.6.3/fs/nfs/file.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/file.c 2004-02-19 16:47:07.000000000 -0500 @@ -63,6 +63,20 @@ struct inode_operations nfs_file_inode_o .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +#ifdef CONFIG_NFS_V4_ACL + .getxattr = nfs_getxattr, + .setxattr = nfs_setxattr, +#endif /* CONFIG_NFS_V4_ACL */ +}; + +#endif /* CONFIG_NFS_V4 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -104,11 +118,16 @@ nfs_file_flush(struct file *file) dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; lock_kernel(); - status = nfs_wb_file(inode, file); + /* Ensure that data+attribute caches are up to date after close() */ + status = nfs_wb_all(inode); if (!status) { status = file->f_error; file->f_error = 0; + if (!status) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); } unlock_kernel(); return status; @@ -179,7 +198,7 @@ nfs_fsync(struct file *file, struct dent dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); lock_kernel(); - status = nfs_wb_file(inode, file); + status = nfs_wb_all(inode); if (!status) { status = file->f_error; file->f_error = 0; diff -puN fs/nfs/inode.c~CITI_NFS4_ALL fs/nfs/inode.c --- linux-2.6.3/fs/nfs/inode.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/inode.c 2004-02-19 16:47:15.000000000 -0500 @@ -53,8 +53,8 @@ */ #define NFS_MAX_READAHEAD RPC_MAXREQS -void nfs_zap_caches(struct inode *); static void nfs_invalidate_inode(struct inode *); +static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long); static struct inode *nfs_alloc_inode(struct super_block *sb); static void nfs_destroy_inode(struct inode *); @@ -118,7 +118,7 @@ nfs_write_inode(struct inode *inode, int { int flags = sync ? FLUSH_WAIT : 0; - nfs_commit_file(inode, NULL, 0, 0, flags); + nfs_commit_inode(inode, 0, 0, flags); } static void @@ -136,21 +136,24 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct rpc_cred *cred = nfsi->mm_cred; +#ifdef CONFIG_NFS_V4_ACL + if (nfsi->acl != NFS4_ACL_NOT_CACHED) + posix_acl_release(nfsi->acl); + if (nfsi->default_acl != NFS4_ACL_NOT_CACHED) + posix_acl_release(nfsi->default_acl); +#endif /* CONFIG_NFS_V4_ACL */ if (cred) put_rpccred(cred); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); + BUG_ON(atomic_read(&nfsi->data_updates) != 0); } void @@ -230,50 +233,23 @@ nfs_block_size(unsigned long bsize, unsi /* * Obtain the root inode of the file system. */ -static int -nfs_get_root(struct inode **rooti, rpc_authflavor_t authflavor, struct super_block *sb, struct nfs_fh *rootfh) +static struct inode * +nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) { struct nfs_server *server = NFS_SB(sb); - struct nfs_fattr fattr = { }; + struct inode *rooti; int error; - error = server->rpc_ops->getroot(server, rootfh, &fattr); - if (error == -EACCES && authflavor > RPC_AUTH_MAXFLAVOR) { - /* - * Some authentication types (gss/krb5, most notably) - * are such that root won't be able to present a - * credential for GETATTR (ie, getroot()). - * - * We still want the mount to succeed. - * - * So we fake the attr values and mark the inode as such. - * On the first succesful traversal, we fix everything. - * The auth type test isn't quite correct, but whatever. - */ - dfprintk(VFS, "NFS: faking root inode\n"); - - fattr.fileid = 1; - fattr.nlink = 2; /* minimum for a dir */ - fattr.type = NFDIR; - fattr.mode = S_IFDIR|S_IRUGO|S_IXUGO; - fattr.size = 4096; - fattr.du.nfs3.used = 1; - fattr.valid = NFS_ATTR_FATTR|NFS_ATTR_FATTR_V3; - } else if (error < 0) { + error = server->rpc_ops->getroot(server, rootfh, fsinfo); + if (error < 0) { printk(KERN_NOTICE "nfs_get_root: getattr error = %d\n", -error); - *rooti = NULL; /* superfluous ... but safe */ - return error; + return ERR_PTR(error); } - *rooti = nfs_fhget(sb, rootfh, &fattr); - if (error == -EACCES && authflavor > RPC_AUTH_MAXFLAVOR) { - if (*rooti) { - NFS_FLAGS(*rooti) |= NFS_INO_FAKE_ROOT; - NFS_CACHEINV((*rooti)); - error = 0; - } - } - return error; + rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); + if (!rooti) + return ERR_PTR(-ENOMEM); + return rooti; } /* @@ -283,7 +259,7 @@ static int nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) { struct nfs_server *server; - struct inode *root_inode = NULL; + struct inode *root_inode; struct nfs_fattr fattr; struct nfs_fsinfo fsinfo = { .fattr = &fattr, @@ -299,8 +275,9 @@ nfs_sb_init(struct super_block *sb, rpc_ sb->s_magic = NFS_SUPER_MAGIC; + root_inode = nfs_get_root(sb, &server->fh, &fsinfo); /* Did getting the root inode fail? */ - if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0) + if (IS_ERR(root_inode)) goto out_no_root; sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) @@ -309,10 +286,6 @@ nfs_sb_init(struct super_block *sb, rpc_ sb->s_root->d_op = server->rpc_ops->dentry_ops; /* Get some general file system info */ - if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) { - printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n"); - goto out_no_root; - } if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) server->namelen = pathinfo.max_namelen; @@ -368,13 +341,11 @@ nfs_sb_init(struct super_block *sb, rpc_ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); return 0; /* Yargs. It didn't work out. */ -out_free_all: - if (root_inode) - iput(root_inode); - return -EINVAL; out_no_root: printk("nfs_read_super: get root inode failed\n"); - goto out_free_all; + if (!IS_ERR(root_inode)) + iput(root_inode); + return -EINVAL; } /* @@ -627,13 +598,17 @@ static int nfs_show_options(struct seq_f void nfs_zap_caches(struct inode *inode) { + struct nfs_inode *nfsi = NFS_I(inode); + int mode = inode->i_mode; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; - invalidate_remote_inode(inode); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - NFS_CACHEINV(inode); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + else + nfsi->flags |= NFS_INO_INVALID_ATTR; } /* @@ -673,9 +648,6 @@ nfs_find_actor(struct inode *inode, void return 0; if (is_bad_inode(inode)) return 0; - /* Force an attribute cache update if inode->i_count == 0 */ - if (!atomic_read(&inode->i_count)) - NFS_CACHEINV(inode); return 1; } @@ -729,12 +701,12 @@ nfs_fhget(struct super_block *sb, struct inode->i_ino = hash; /* We can't support update_atime(), since the server will reset it */ - inode->i_flags |= S_NOATIME; + inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -754,10 +726,6 @@ nfs_fhget(struct super_block *sb, struct inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; inode->i_ctime = fattr->ctime; - nfsi->read_cache_ctime = fattr->ctime; - nfsi->read_cache_mtime = fattr->mtime; - nfsi->cache_mtime_jiffies = fattr->timestamp; - nfsi->read_cache_isize = fattr->size; if (fattr->valid & NFS_ATTR_FATTR_V4) nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); @@ -778,7 +746,6 @@ nfs_fhget(struct super_block *sb, struct nfsi->attrtimeo_timestamp = jiffies; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); nfsi->cache_access.cred = NULL; - unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); @@ -804,70 +771,50 @@ nfs_setattr(struct dentry *dentry, struc struct nfs_fattr fattr; int error; + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) + attr->ia_valid &= ~ATTR_SIZE; + } + /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; if (attr->ia_valid == 0) return 0; lock_kernel(); - - /* - * Make sure the inode is up-to-date. - */ - error = nfs_revalidate_inode(NFS_SERVER(inode),inode); - if (error) { -#ifdef NFS_PARANOIA -printk("nfs_setattr: revalidate failed, error=%d\n", error); -#endif - goto out; - } - - if (!S_ISREG(inode->i_mode)) { - attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid == 0) - goto out; - } else { - filemap_fdatawrite(inode->i_mapping); - error = nfs_wb_all(inode); - filemap_fdatawait(inode->i_mapping); - if (error) - goto out; - /* Optimize away unnecessary truncates */ - if ((attr->ia_valid & ATTR_SIZE) && i_size_read(inode) == attr->ia_size) - attr->ia_valid &= ~ATTR_SIZE; + nfs_begin_data_update(inode); + /* Write all dirty data if we're changing file permissions or size */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); } - if (!attr->ia_valid) - goto out; - error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); - if (error) - goto out; - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != fattr.size) - printk("nfs_setattr: attr=%Ld, fattr=%Ld??\n", - (long long) attr->ia_size, (long long)fattr.size); - vmtruncate(inode, attr->ia_size); + if (error == 0) { + nfs_refresh_inode(inode, &fattr); + if ((attr->ia_valid & ATTR_MODE) != 0) { + int mode; + mode = inode->i_mode & ~S_IALLUGO; + mode |= attr->ia_mode & S_IALLUGO; + inode->i_mode = mode; + } + if ((attr->ia_valid & ATTR_UID) != 0) + inode->i_uid = attr->ia_uid; + if ((attr->ia_valid & ATTR_GID) != 0) + inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_SIZE) != 0) { + i_size_write(inode, attr->ia_size); + vmtruncate(inode, attr->ia_size); + } } - - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (!(fattr.valid & NFS_ATTR_WCC)) { - struct nfs_inode *nfsi = NFS_I(inode); - fattr.pre_size = nfsi->read_cache_isize; - fattr.pre_mtime = nfsi->read_cache_mtime; - fattr.pre_ctime = nfsi->read_cache_ctime; - fattr.valid |= NFS_ATTR_WCC; - } - /* Force an attribute cache update */ - NFS_CACHEINV(inode); - error = nfs_refresh_inode(inode, &fattr); -out: + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; + if (*cred) { + put_rpccred(*cred); + *cred = NULL; + } + } + nfs_end_data_update(inode); unlock_kernel(); return error; } @@ -895,7 +842,19 @@ nfs_wait_on_inode(struct inode *inode, i int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; - int err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + struct nfs_inode *nfsi = NFS_I(inode); + int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; + int err; + + if (__IS_FLG(inode, MS_NOATIME)) + need_atime = 0; + else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + need_atime = 0; + /* We may force a getattr if the user cares about atime */ + if (need_atime) + err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + err = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (!err) generic_fillattr(inode, stat); return err; @@ -930,8 +889,10 @@ int nfs_open(struct inode *inode, struct auth = NFS_CLIENT(inode)->cl_auth; cred = rpcauth_lookupcred(auth, 0); filp->private_data = cred; - if (filp->f_mode & FMODE_WRITE) + if ((filp->f_mode & FMODE_WRITE) != 0) { nfs_set_mmcred(inode, cred); + nfs_begin_data_update(inode); + } return 0; } @@ -940,6 +901,8 @@ int nfs_release(struct inode *inode, str struct rpc_cred *cred; lock_kernel(); + if ((filp->f_mode & FMODE_WRITE) != 0) + nfs_end_data_update(inode); cred = nfs_file_cred(filp); if (cred) put_rpccred(cred); @@ -956,6 +919,9 @@ __nfs_revalidate_inode(struct nfs_server { int status = -ESTALE; struct nfs_fattr fattr; + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long verifier; + unsigned int flags; dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -965,23 +931,22 @@ __nfs_revalidate_inode(struct nfs_server goto out_nowait; if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) goto out_nowait; - if (NFS_FAKE_ROOT(inode)) { - dfprintk(VFS, "NFS: not revalidating fake root\n"); - status = 0; - goto out_nowait; - } while (NFS_REVALIDATING(inode)) { status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); if (status < 0) goto out_nowait; - if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) { - status = NFS_STALE(inode) ? -ESTALE : 0; - goto out_nowait; - } + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC) + continue; + if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) + continue; + status = NFS_STALE(inode) ? -ESTALE : 0; + goto out_nowait; } NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; + /* Protect against RPC races by saving the change attribute */ + verifier = nfs_save_change_attribute(inode); status = NFS_PROTO(inode)->getattr(inode, &fattr); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", @@ -995,13 +960,36 @@ __nfs_revalidate_inode(struct nfs_server goto out; } - status = nfs_refresh_inode(inode, &fattr); + status = nfs_update_inode(inode, &fattr, verifier); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); goto out; } + flags = nfsi->flags; + /* + * We may need to keep the attributes marked as invalid if + * we raced with nfs_end_attr_update(). + */ + if (verifier == nfsi->cache_change_attribute) + nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + /* Do the page cache invalidation */ + if (flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); + } + nfsi->flags &= ~NFS_INO_INVALID_DATA; + invalidate_inode_pages2(inode->i_mapping); + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + /* This ensures we revalidate dentries */ + nfsi->cache_change_attribute++; + } dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1009,41 +997,104 @@ __nfs_revalidate_inode(struct nfs_server NFS_FLAGS(inode) &= ~NFS_INO_STALE; out: NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; - wake_up(&NFS_I(inode)->nfs_i_wait); + wake_up(&nfsi->nfs_i_wait); out_nowait: unlock_kernel(); return status; } -/* - * nfs_fattr_obsolete - Test if attribute data is newer than cached data - * @inode: inode - * @fattr: attributes to test +/** + * nfs_begin_data_update + * @inode - pointer to inode + * Declare that a set of operations will update file data on the server + */ +void nfs_begin_data_update(struct inode *inode) +{ + atomic_inc(&NFS_I(inode)->data_updates); +} + +/** + * nfs_end_data_update + * @inode - pointer to inode + * Declare end of the operations that will update file data + */ +void nfs_end_data_update(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (atomic_dec_and_test(&nfsi->data_updates)) { + nfsi->cache_change_attribute ++; + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + } +} + +/** + * nfs_refresh_inode - verify consistency of the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes * - * Avoid stuffing the attribute cache with obsolete information. - * We always accept updates if the attribute cache timed out, or if - * fattr->ctime is newer than our cached value. - * If fattr->ctime matches the cached value, we still accept the update - * if it increases the file size. + * Verifies the attribute cache. If we have just changed the attributes, + * so that fattr carries weak cache consistency data, then it may + * also update the ctime/mtime/change_attribute. */ -static inline -int nfs_fattr_obsolete(struct inode *inode, struct nfs_fattr *fattr) +int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - long cdif; + loff_t cur_size, new_isize; + int data_unstable; + + /* Are we in the process of updating data on the server? */ + data_unstable = nfs_caches_unstable(inode); + + if (fattr->valid & NFS_ATTR_FATTR_V4) { + if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 + && nfsi->change_attr == fattr->pre_change_attr) + nfsi->change_attr = fattr->change_attr; + if (!data_unstable && nfsi->change_attr != fattr->change_attr) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + return -EIO; - if (time_after(jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo)) - goto out_valid; - cdif = fattr->ctime.tv_sec - nfsi->read_cache_ctime.tv_sec; - if (cdif == 0) - cdif = fattr->ctime.tv_nsec - nfsi->read_cache_ctime.tv_nsec; - if (cdif > 0) - goto out_valid; - /* Ugh... */ - if (cdif == 0 && fattr->size > nfsi->read_cache_isize) - goto out_valid; - return -1; - out_valid: + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_WCC) != 0) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + } + + /* Verify a few of the more important attributes */ + if (!data_unstable) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime) + || cur_size != new_isize) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + /* Have any file permissions changed? */ + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) + || inode->i_uid != fattr->uid + || inode->i_gid != fattr->gid) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + if (!timespec_equal(&inode->i_atime, &fattr->atime)) + nfsi->flags |= NFS_INO_INVALID_ATIME; + + nfsi->read_cache_jiffies = fattr->timestamp; return 0; } @@ -1059,20 +1110,22 @@ int nfs_fattr_obsolete(struct inode *ino * * A very similar scenario holds for the dir cache. */ -int -__nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { struct nfs_inode *nfsi = NFS_I(inode); __u64 new_size; loff_t new_isize; - int invalid = 0; - int mtime_update = 0; + unsigned int invalid = 0; loff_t cur_isize; + int data_unstable; - dfprintk(VFS, "NFS: refresh_inode(%s/%ld ct=%d info=0x%x)\n", - inode->i_sb->s_id, inode->i_ino, + dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", + __FUNCTION__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + /* First successful call after mount, fill real data. */ if (NFS_FAKE_ROOT(inode)) { dfprintk(VFS, "NFS: updating fake root\n"); @@ -1081,43 +1134,49 @@ __nfs_refresh_inode(struct inode *inode, } if (nfsi->fileid != fattr->fileid) { - printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n" + printk(KERN_ERR "%s: inode number mismatch\n" "expected (%s/0x%Lx), got (%s/0x%Lx)\n", + __FUNCTION__, inode->i_sb->s_id, (long long)nfsi->fileid, inode->i_sb->s_id, (long long)fattr->fileid); goto out_err; } - /* Throw out obsolete READDIRPLUS attributes */ - if (time_before(fattr->timestamp, NFS_READTIME(inode))) - return 0; /* * Make sure the inode's type hasn't changed. */ if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; - new_size = fattr->size; - new_isize = nfs_size_to_loff_t(fattr->size); - - /* Avoid races */ - if (nfs_fattr_obsolete(inode, fattr)) - goto out_nochange; - /* * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->timestamp; - /* - * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. - * NOT inode->i_size!!! - */ - if (nfsi->read_cache_isize != new_size) { + /* Are we racing with known updates of the metadata on the server? */ + data_unstable = ! nfs_verify_change_attribute(inode, verifier); + + /* Check if the file size agrees */ + new_size = fattr->size; + new_isize = nfs_size_to_loff_t(fattr->size); + cur_isize = i_size_read(inode); + if (cur_isize != new_size) { #ifdef NFS_DEBUG_VERBOSE printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; + /* + * If we have pending writebacks, things can get + * messy. + */ + if (S_ISREG(inode->i_mode) && data_unstable) { + if (new_isize > cur_isize) { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + } else { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } } /* @@ -1125,12 +1184,13 @@ __nfs_refresh_inode(struct inode *inode, * can change this value in VFS without requiring a * cache revalidation. */ - if (!timespec_equal(&nfsi->read_cache_mtime, &fattr->mtime)) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); #ifdef NFS_DEBUG_VERBOSE printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; - mtime_update = 1; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) @@ -1139,47 +1199,15 @@ __nfs_refresh_inode(struct inode *inode, printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; - } - - /* Check Weak Cache Consistency data. - * If size and mtime match the pre-operation values, we can - * assume that any attribute changes were caused by our NFS - * operation, so there's no need to invalidate the caches. - */ - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) - && nfsi->change_attr == fattr->pre_change_attr) { - invalid = 0; - } - else if ((fattr->valid & NFS_ATTR_WCC) - && nfsi->read_cache_isize == fattr->pre_size - && timespec_equal(&nfsi->read_cache_mtime, &fattr->pre_mtime)) { - invalid = 0; - } - - /* - * If we have pending writebacks, things can get - * messy. - */ - cur_isize = i_size_read(inode); - if (nfs_have_writebacks(inode) && new_isize < cur_isize) - new_isize = cur_isize; - - nfsi->read_cache_ctime = fattr->ctime; - inode->i_ctime = fattr->ctime; - inode->i_atime = fattr->atime; - - if (mtime_update) { - if (invalid) - nfsi->cache_mtime_jiffies = fattr->timestamp; - nfsi->read_cache_mtime = fattr->mtime; - inode->i_mtime = fattr->mtime; + nfsi->change_attr = fattr->change_attr; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - nfsi->read_cache_isize = new_size; - i_size_write(inode, new_isize); + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - if (inode->i_mode != fattr->mode || + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) { struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; @@ -1187,15 +1215,17 @@ __nfs_refresh_inode(struct inode *inode, put_rpccred(*cred); *cred = NULL; } + invalid |= NFS_INO_INVALID_ATTR; } - if (fattr->valid & NFS_ATTR_FATTR_V4) - nfsi->change_attr = fattr->change_attr; - inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; inode->i_gid = fattr->gid; +#ifdef CONFIG_NFS_V4_ACL + nfs4_izap_acl(inode, &nfsi->acl); + nfs4_izap_acl(inode, &nfsi->default_acl); +#endif /* CONFIG_NFS_V4_ACL */ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* @@ -1207,31 +1237,30 @@ __nfs_refresh_inode(struct inode *inode, inode->i_blocks = fattr->du.nfs2.blocks; inode->i_blksize = fattr->du.nfs2.blocksize; } - - /* Update attrtimeo value */ - if (invalid) { + + /* Update attrtimeo value if we're out of the unstable period */ + if (invalid & NFS_INO_INVALID_ATTR) { nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - invalidate_remote_inode(inode); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } + /* Don't invalidate the data if we were to blame */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) + || S_ISLNK(inode->i_mode))) + invalid &= ~NFS_INO_INVALID_DATA; + nfsi->flags |= invalid; return 0; - out_nochange: - if (!timespec_equal(&fattr->atime, &inode->i_atime)) - inode->i_atime = fattr->atime; - return 0; out_changed: /* * Big trouble! The inode has become a different object. */ #ifdef NFS_PARANOIA - printk(KERN_DEBUG "nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n", - inode->i_ino, inode->i_mode, fattr->mode); + printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); #endif /* * No need to worry about unhashing the dentry, as the @@ -1355,6 +1384,82 @@ static struct file_system_type nfs_fs_ty .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT, }; +#ifdef CONFIG_NFS_V4_ACL + +int +nfs_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct posix_acl *acl; + int type, error; + struct inode *inode = dentry->d_inode; + + if (strlen(key) == sizeof(XATTR_NAME_ACL_ACCESS) - 1 && + memcmp(key, XATTR_NAME_ACL_ACCESS, + sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0) + type = ACL_TYPE_ACCESS; + else if (strlen(key) == sizeof(XATTR_NAME_ACL_DEFAULT) - 1 && + memcmp(key, XATTR_NAME_ACL_DEFAULT, + sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0) + type = ACL_TYPE_DEFAULT; + else + return (-EINVAL); + + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return (-EPERM); + + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return -EACCES; + + acl = posix_acl_from_xattr(buf, buflen); + if (IS_ERR(acl)) + return (PTR_ERR(acl)); + if (acl == NULL) + return (-ENODATA); + + error = posix_acl_valid(acl); + if (error) + goto out_free; + + error = nfs4_proc_set_posix_acl(inode, type, acl); +out_free: + posix_acl_release(acl); + return error; +} + +ssize_t +nfs_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + int type = 0; + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + ssize_t ret; + + if (strlen(key) == sizeof(XATTR_NAME_ACL_ACCESS) - 1 && + memcmp(key, XATTR_NAME_ACL_ACCESS, + sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0) + type = ACL_TYPE_ACCESS; + else if (strlen(key) == sizeof(XATTR_NAME_ACL_DEFAULT) - 1 && + memcmp(key, XATTR_NAME_ACL_DEFAULT, + sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0) + type = ACL_TYPE_DEFAULT; + else + return (-EINVAL); + + acl = nfs4_proc_get_posix_acl(inode, type); + if (IS_ERR(acl)) + return (PTR_ERR(acl)); + + ret = posix_acl_to_xattr(acl, buf, buflen); + + posix_acl_release(acl); + return ret; +} + +#endif /* CONFIG_NFS_V4_ACL */ + #ifdef CONFIG_NFS_V4 static void nfs4_clear_inode(struct inode *); @@ -1601,7 +1706,7 @@ static struct super_block *nfs4_get_sb(s if (data->version != NFS4_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", - data->version < NFS_MOUNT_VERSION ? "older" : "newer"); + data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); } p = nfs_copy_user_string(NULL, &data->hostname, 256); @@ -1699,6 +1804,10 @@ static struct inode *nfs_alloc_inode(str return NULL; nfsi->flags = 0; nfsi->mm_cred = NULL; +#ifdef CONFIG_NFS_V4_ACL + nfsi->acl = NFS4_ACL_NOT_CACHED; + nfsi->default_acl = NFS4_ACL_NOT_CACHED; +#endif /* CONFIG_NFS_V4_ACL */ nfs4_zero_state(nfsi); return &nfsi->vfs_inode; } @@ -1718,6 +1827,7 @@ static void init_once(void * foo, kmem_c INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; diff -puN fs/nfs/nfs3proc.c~CITI_NFS4_ALL fs/nfs/nfs3proc.c --- linux-2.6.3/fs/nfs/nfs3proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/nfs3proc.c 2004-02-19 16:47:07.000000000 -0500 @@ -68,20 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas return 1; } -static void -nfs3_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - if (fattr->valid & NFS_ATTR_FATTR) { - if (!(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - nfs_refresh_inode(inode, fattr); - } -} - static struct rpc_cred * nfs_cred(struct inode *inode, struct file *filp) { @@ -99,14 +85,18 @@ nfs_cred(struct inode *inode, struct fil */ static int nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fsinfo *info) { int status; - dprintk("NFS call getroot\n"); - fattr->valid = 0; - status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0); - dprintk("NFS reply getroot\n"); + dprintk("%s: call fsinfo\n", __FUNCTION__); + info->fattr->valid = 0; + status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status); + if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0); + dprintk("%s: reply getattr %d\n", __FUNCTION__, status); + } return status; } @@ -280,7 +270,7 @@ nfs3_proc_write(struct nfs_write_data *w msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) - nfs3_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); dprintk("NFS reply write: %d\n", status); return status < 0? status : wdata->res.count; } @@ -303,7 +293,7 @@ nfs3_proc_commit(struct nfs_write_data * msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) - nfs3_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); dprintk("NFS reply commit: %d\n", status); return status; } @@ -777,12 +767,13 @@ nfs3_proc_read_setup(struct nfs_read_dat static void nfs3_write_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data; if (nfs3_async_handle_jukebox(task)) return; + data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs3_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } @@ -835,12 +826,13 @@ nfs3_proc_write_setup(struct nfs_write_d static void nfs3_commit_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data; if (nfs3_async_handle_jukebox(task)) return; + data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs3_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_commit_done(task); } @@ -907,6 +899,7 @@ struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, diff -puN fs/nfs/nfs4proc.c~CITI_NFS4_ALL fs/nfs/nfs4proc.c --- linux-2.6.3/fs/nfs/nfs4proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/nfs4proc.c 2004-02-19 16:47:15.000000000 -0500 @@ -46,112 +46,20 @@ #include #include #include +#include +#include #define NFSDBG_FACILITY NFSDBG_PROC #define NFS4_POLL_RETRY_TIME (15*HZ) -#define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name -#define OPNUM(cp) cp->ops[cp->req_nops].opnum - +static int nfs4_proc_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; extern nfs4_stateid zero_stateid; -static void -nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops, - struct nfs_server *server, char *tag) -{ - memset(cp, 0, sizeof(*cp)); - cp->ops = ops; - cp->server = server; -} - -static void -nfs4_setup_access(struct nfs4_compound *cp, u32 req_access, u32 *resp_supported, u32 *resp_access) -{ - struct nfs4_access *access = GET_OP(cp, access); - - access->ac_req_access = req_access; - access->ac_resp_supported = resp_supported; - access->ac_resp_access = resp_access; - - OPNUM(cp) = OP_ACCESS; - cp->req_nops++; -} - -static void -nfs4_setup_create_dir(struct nfs4_compound *cp, struct qstr *name, - struct iattr *sattr, struct nfs4_change_info *info) -{ - struct nfs4_create *create = GET_OP(cp, create); - - create->cr_ftype = NF4DIR; - create->cr_namelen = name->len; - create->cr_name = name->name; - create->cr_attrs = sattr; - create->cr_cinfo = info; - - OPNUM(cp) = OP_CREATE; - cp->req_nops++; -} - -static void -nfs4_setup_create_symlink(struct nfs4_compound *cp, struct qstr *name, - struct qstr *linktext, struct iattr *sattr, - struct nfs4_change_info *info) -{ - struct nfs4_create *create = GET_OP(cp, create); - - create->cr_ftype = NF4LNK; - create->cr_textlen = linktext->len; - create->cr_text = linktext->name; - create->cr_namelen = name->len; - create->cr_name = name->name; - create->cr_attrs = sattr; - create->cr_cinfo = info; - - OPNUM(cp) = OP_CREATE; - cp->req_nops++; -} - -static void -nfs4_setup_create_special(struct nfs4_compound *cp, struct qstr *name, - dev_t dev, struct iattr *sattr, - struct nfs4_change_info *info) -{ - int mode = sattr->ia_mode; - struct nfs4_create *create = GET_OP(cp, create); - - BUG_ON(!(sattr->ia_valid & ATTR_MODE)); - BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); - - if (S_ISFIFO(mode)) - create->cr_ftype = NF4FIFO; - else if (S_ISBLK(mode)) { - create->cr_ftype = NF4BLK; - create->cr_specdata1 = MAJOR(dev); - create->cr_specdata2 = MINOR(dev); - } - else if (S_ISCHR(mode)) { - create->cr_ftype = NF4CHR; - create->cr_specdata1 = MAJOR(dev); - create->cr_specdata2 = MINOR(dev); - } - else - create->cr_ftype = NF4SOCK; - - create->cr_namelen = name->len; - create->cr_name = name->name; - create->cr_attrs = sattr; - create->cr_cinfo = info; - - OPNUM(cp) = OP_CREATE; - cp->req_nops++; -} - /* * This is our standard bitmap for GETATTR requests. */ @@ -181,126 +89,15 @@ u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_pathconf_bitmap[2] = { - FATTR4_WORD0_MAXLINK - | FATTR4_WORD0_MAXNAME, - 0 -}; - -static inline void -__nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap, - struct nfs_fattr *fattr, - struct nfs_fsstat *fsstat, - struct nfs_pathconf *pathconf) -{ - struct nfs4_getattr *getattr = GET_OP(cp, getattr); - - getattr->gt_bmval = bitmap; - getattr->gt_attrs = fattr; - getattr->gt_fsstat = fsstat; - getattr->gt_pathconf = pathconf; - - OPNUM(cp) = OP_GETATTR; - cp->req_nops++; -} - -static void -nfs4_setup_getattr(struct nfs4_compound *cp, - struct nfs_fattr *fattr) -{ - __nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr, - NULL, NULL); -} - -static void -nfs4_setup_statfs(struct nfs4_compound *cp, - struct nfs_fsstat *fsstat) -{ - __nfs4_setup_getattr(cp, nfs4_statfs_bitmap, - NULL, fsstat, NULL); -} - -static void -nfs4_setup_pathconf(struct nfs4_compound *cp, - struct nfs_pathconf *pathconf) -{ - __nfs4_setup_getattr(cp, nfs4_pathconf_bitmap, - NULL, NULL, pathconf); -} - -static void -nfs4_setup_getfh(struct nfs4_compound *cp, struct nfs_fh *fhandle) -{ - struct nfs4_getfh *getfh = GET_OP(cp, getfh); - - getfh->gf_fhandle = fhandle; - - OPNUM(cp) = OP_GETFH; - cp->req_nops++; -} - -static void -nfs4_setup_link(struct nfs4_compound *cp, struct qstr *name, - struct nfs4_change_info *info) -{ - struct nfs4_link *link = GET_OP(cp, link); - - link->ln_namelen = name->len; - link->ln_name = name->name; - link->ln_cinfo = info; - - OPNUM(cp) = OP_LINK; - cp->req_nops++; -} - static void -nfs4_setup_lookup(struct nfs4_compound *cp, struct qstr *q) -{ - struct nfs4_lookup *lookup = GET_OP(cp, lookup); - - lookup->lo_name = q; - - OPNUM(cp) = OP_LOOKUP; - cp->req_nops++; -} - -static void -nfs4_setup_putfh(struct nfs4_compound *cp, struct nfs_fh *fhandle) -{ - struct nfs4_putfh *putfh = GET_OP(cp, putfh); - - putfh->pf_fhandle = fhandle; - - OPNUM(cp) = OP_PUTFH; - cp->req_nops++; -} - -static void -nfs4_setup_putrootfh(struct nfs4_compound *cp) -{ - OPNUM(cp) = OP_PUTROOTFH; - cp->req_nops++; -} - -static void -nfs4_setup_readdir(struct nfs4_compound *cp, u64 cookie, u32 *verifier, - struct page **pages, unsigned int bufsize, struct dentry *dentry) +nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, struct nfs4_readdir_arg *readdir) { u32 *start, *p; - struct nfs4_readdir *readdir = GET_OP(cp, readdir); - BUG_ON(bufsize < 80); - readdir->rd_cookie = (cookie > 2) ? cookie : 0; - memcpy(&readdir->rd_req_verifier, verifier, sizeof(readdir->rd_req_verifier)); - readdir->rd_count = bufsize; - readdir->rd_bmval[0] = FATTR4_WORD0_FILEID; - readdir->rd_bmval[1] = 0; - readdir->rd_pages = pages; - readdir->rd_pgbase = 0; + BUG_ON(readdir->count < 80); + readdir->cookie = (cookie > 2) ? cookie : 0; + memcpy(&readdir->req_verifier, verifier, sizeof(readdir->req_verifier)); - OPNUM(cp) = OP_READDIR; - cp->req_nops++; - if (cookie >= 2) return; @@ -311,7 +108,7 @@ nfs4_setup_readdir(struct nfs4_compound * when talking to the server, we always send cookie 0 * instead of 1 or 2. */ - start = p = (u32 *)kmap_atomic(*pages, KM_USER0); + start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0); if (cookie == 0) { *p++ = xdr_one; /* next */ @@ -337,68 +134,12 @@ nfs4_setup_readdir(struct nfs4_compound *p++ = htonl(8); /* attribute buffer length */ p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); - readdir->rd_pgbase = (char *)p - (char *)start; - readdir->rd_count -= readdir->rd_pgbase; + readdir->pgbase = (char *)p - (char *)start; + readdir->count -= readdir->pgbase; kunmap_atomic(start, KM_USER0); } static void -nfs4_setup_readlink(struct nfs4_compound *cp, int count, struct page **pages) -{ - struct nfs4_readlink *readlink = GET_OP(cp, readlink); - - readlink->rl_count = count; - readlink->rl_pages = pages; - - OPNUM(cp) = OP_READLINK; - cp->req_nops++; -} - -static void -nfs4_setup_remove(struct nfs4_compound *cp, struct qstr *name, struct nfs4_change_info *cinfo) -{ - struct nfs4_remove *remove = GET_OP(cp, remove); - - remove->rm_namelen = name->len; - remove->rm_name = name->name; - remove->rm_cinfo = cinfo; - - OPNUM(cp) = OP_REMOVE; - cp->req_nops++; -} - -static void -nfs4_setup_rename(struct nfs4_compound *cp, struct qstr *old, struct qstr *new, - struct nfs4_change_info *old_cinfo, struct nfs4_change_info *new_cinfo) -{ - struct nfs4_rename *rename = GET_OP(cp, rename); - - rename->rn_oldnamelen = old->len; - rename->rn_oldname = old->name; - rename->rn_newnamelen = new->len; - rename->rn_newname = new->name; - rename->rn_src_cinfo = old_cinfo; - rename->rn_dst_cinfo = new_cinfo; - - OPNUM(cp) = OP_RENAME; - cp->req_nops++; -} - -static void -nfs4_setup_restorefh(struct nfs4_compound *cp) -{ - OPNUM(cp) = OP_RESTOREFH; - cp->req_nops++; -} - -static void -nfs4_setup_savefh(struct nfs4_compound *cp) -{ - OPNUM(cp) = OP_SAVEFH; - cp->req_nops++; -} - -static void renew_lease(struct nfs_server *server, unsigned long timestamp) { struct nfs4_client *clp = server->nfs4_state; @@ -409,47 +150,6 @@ renew_lease(struct nfs_server *server, u } static inline void -process_lease(struct nfs4_compound *cp) -{ - /* - * Generic lease processing: If this operation contains a - * lease-renewing operation, and it succeeded, update the RENEW time - * in the superblock. Instead of the current time, we use the time - * when the request was sent out. (All we know is that the lease was - * renewed sometime between then and now, and we have to assume the - * worst case.) - * - * Notes: - * (1) renewd doesn't acquire the spinlock when messing with - * server->last_renewal; this is OK since rpciod always runs - * under the BKL. - * (2) cp->timestamp was set at the end of XDR encode. - */ - if (!cp->renew_index) - return; - if (!cp->toplevel_status || cp->resp_nops > cp->renew_index) - renew_lease(cp->server, cp->timestamp); -} - -static int -nfs4_call_compound(struct nfs4_compound *cp, struct rpc_cred *cred, int flags) -{ - int status; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND], - .rpc_argp = cp, - .rpc_resp = cp, - .rpc_cred = cred, - }; - - status = rpc_call_sync(cp->server->client, &msg, flags); - if (!status) - process_lease(cp); - - return status; -} - -static inline void process_cinfo(struct nfs4_change_info *info, struct nfs_fattr *fattr) { BUG_ON((fattr->valid & NFS_ATTR_FATTR) == 0); @@ -476,11 +176,6 @@ nfs4_open_reclaim(struct nfs4_state_owne .valid = 0, }; struct nfs4_change_info d_cinfo; - struct nfs4_getattr f_getattr = { - .gt_bmval = nfs4_fattr_bitmap, - .gt_attrs = &fattr, - }; - struct nfs_open_reclaimargs o_arg = { .fh = NFS_FH(inode), .seqid = sp->so_seqid, @@ -488,11 +183,10 @@ nfs4_open_reclaim(struct nfs4_state_owne .share_access = state->state, .clientid = server->nfs4_state->cl_clientid, .claim = NFS4_OPEN_CLAIM_PREVIOUS, - .f_getattr = &f_getattr, }; struct nfs_openres o_res = { - .cinfo = &d_cinfo, - .f_getattr = &f_getattr, + .cinfo = &d_cinfo, + .f_attr = &fattr, .server = server, /* Grrr */ }; struct rpc_message msg = { @@ -528,28 +222,18 @@ nfs4_do_open(struct inode *dir, struct q struct nfs_fattr f_attr = { .valid = 0, }; - struct nfs4_getattr f_getattr = { - .gt_bmval = nfs4_fattr_bitmap, - .gt_attrs = &f_attr, - }; - struct nfs4_getattr d_getattr = { - .gt_bmval = nfs4_fattr_bitmap, - .gt_attrs = &d_attr, - }; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .share_access = flags & (FMODE_READ|FMODE_WRITE), .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, .name = name, - .f_getattr = &f_getattr, - .d_getattr = &d_getattr, .server = server, }; struct nfs_openres o_res = { .cinfo = &d_cinfo, - .f_getattr = &f_getattr, - .d_getattr = &d_getattr, + .f_attr = &f_attr, + .d_attr = &d_attr, .server = server, }; struct rpc_message msg = { @@ -665,18 +349,14 @@ nfs4_do_setattr(struct nfs_server *serve struct nfs_fh *fhandle, struct iattr *sattr, struct nfs4_state *state) { - struct nfs4_getattr getattr = { - .gt_bmval = nfs4_fattr_bitmap, - .gt_attrs = fattr, - }; struct nfs_setattrargs arg = { .fh = fhandle, .iap = sattr, - .attr = &getattr, + .fattr = fattr, .server = server, }; struct nfs_setattrres res = { - .attr = &getattr, + .fattr = fattr, .server = server, }; struct rpc_message msg = { @@ -822,27 +502,43 @@ nfs4_open_revalidate(struct inode *dir, static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fsinfo *info) { - struct nfs4_compound compound; - struct nfs4_op ops[4]; + struct nfs_fattr * fattr = info->fattr; unsigned char * p; struct qstr q; int status; + struct nfs4_getroot_arg args = { + .fhandle = fhandle, + .name = &q, + }; + struct nfs4_getroot_res res = { + .server = server, + .fattr = fattr, + .fhandle = fhandle, + }; + struct rpc_message msg_head = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETROOT_HEAD], + .rpc_argp = NULL, + .rpc_resp = &res, + }; + struct rpc_message msg_path = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETROOT_PATH], + .rpc_argp = &args, + .rpc_resp = &res, + }; /* * Now we do a separate LOOKUP for each component of the mount path. * The LOOKUPs are done separately so that we can conveniently * catch an ERR_WRONGSEC if it occurs along the way... */ - p = server->mnt_path; fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "getrootfh"); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getattr(&compound, fattr); - nfs4_setup_getfh(&compound, fhandle); - if ((status = nfs4_call_compound(&compound, NULL, 0))) + status = rpc_call_sync(server->client, &msg_head, 0); + if (status) goto out; + + p = server->mnt_path; for (;;) { while (*p == '/') p++; @@ -854,12 +550,7 @@ nfs4_proc_get_root(struct nfs_server *se q.len = p - q.name; fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "mount"); - nfs4_setup_putfh(&compound, fhandle); - nfs4_setup_lookup(&compound, &q); - nfs4_setup_getattr(&compound, fattr); - nfs4_setup_getfh(&compound, fhandle); - status = nfs4_call_compound(&compound, NULL, 0); + status = rpc_call_sync(server->client,&msg_path,0); if (!status) continue; if (status == -ENOENT) { @@ -869,21 +560,27 @@ nfs4_proc_get_root(struct nfs_server *se break; } out: - return status; + if (status) + return status; + return nfs4_proc_fsinfo(server, fhandle, info); } static int nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs4_compound compound; - struct nfs4_op ops[2]; - + struct nfs4_getattr_res res = { + .fattr = fattr, + .server = NFS_SERVER(inode), + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], + .rpc_argp = NFS_FH(inode), + .rpc_resp = &res, + }; + fattr->valid = 0; - nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "getattr"); - nfs4_setup_putfh(&compound, NFS_FH(inode)); - nfs4_setup_getattr(&compound, fattr); - return nfs4_call_compound(&compound, NULL, 0); + return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); } /* @@ -945,26 +642,218 @@ out: return status; } +#ifdef CONFIG_NFS_V4_ACL + +static inline int +nfs_name_to_uid_wrapper(void *arg, const char *name, size_t len, __u32 *id) +{ + return nfs_map_name_to_uid((struct nfs4_client *)arg, name, len, id); +} + +static inline int +nfs_name_to_gid_wrapper(void *arg, const char *name, size_t len, __u32 *id) +{ + return nfs_map_group_to_gid((struct nfs4_client*)arg, name, len, id); +} + +static inline int +nfs_uid_to_name_wrapper(void *arg, __u32 id, char *name) +{ + return nfs_map_uid_to_name((struct nfs4_client *)arg, id, name); +} + +static inline int +nfs_gid_to_name_wrapper(void *arg, __u32 id, char *name) +{ + return nfs_map_gid_to_group((struct nfs4_client *)arg, id, name); +} + +static struct nfs4_acl_idmapper nfs4_idmapper = { + .name2uid = nfs_name_to_uid_wrapper, + .name2gid = nfs_name_to_gid_wrapper, + .uid2name = nfs_uid_to_name_wrapper, + .gid2name = nfs_gid_to_name_wrapper, +}; + +/* From fs/ext2/acl.c: */ + +static inline struct posix_acl * +nfs4_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = NFS4_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != NFS4_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + return acl; +} + +void +nfs4_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != NFS4_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +void +nfs4_izap_acl(struct inode *inode, struct posix_acl **i_acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != NFS4_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = NFS4_ACL_NOT_CACHED; + spin_unlock(&inode->i_lock); +} + +struct posix_acl * +nfs4_proc_get_posix_acl(struct inode *inode, int type) +{ + struct nfs4_acl *acl = NULL; + int error; + struct posix_acl *pacl, *dpacl, *ret = NULL; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = NFS_FH(inode), + .rpc_resp = &acl, + }; + + lock_kernel(); + error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (error < 0) { + unlock_kernel(); + return ERR_PTR(error); + } + if (type == ACL_TYPE_ACCESS) + ret = nfs4_iget_acl(inode, &NFS_I(inode)->acl); + else + ret = nfs4_iget_acl(inode, &NFS_I(inode)->default_acl); + + if (ret != NFS4_ACL_NOT_CACHED) { + if (ret == NULL) + ret = ERR_PTR(-ENODATA); + unlock_kernel(); + return ret; + } + + error = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + unlock_kernel(); + + if (error < 0) + goto out_free; + + error = -ENODATA; + if (acl == NULL) + goto out_free; + + error = nfs4_acl_nfsv4_to_posix(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, acl, &pacl, &dpacl); + if (error < 0) + goto out_free; + + error = -ERANGE; + if (pacl && pacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out_free; + if (dpacl && dpacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out_free; + + nfs4_iset_acl(inode, &NFS_I(inode)->acl, pacl); + nfs4_iset_acl(inode, &NFS_I(inode)->default_acl, dpacl); + + ret = (type == ACL_TYPE_ACCESS) ? pacl : dpacl; + error = -ENODATA; + if (ret == NULL) + goto out_free; + error = 0; +out_free: + if (error < 0) + ret = ERR_PTR(error); + nfs4_acl_free(acl); + return ret; +} + +int +nfs4_proc_set_posix_acl(struct inode *inode, int type, struct posix_acl *pacl) +{ + struct iattr ia; + struct nfs4_acl *acl; + struct nfs_fattr fattr; + int error; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + + ia.ia_valid = 0; + fattr.valid = 0; + + if (pacl && pacl->a_count > NFS_ACL_MAX_ENTRIES) + return -ERANGE; + + if (type == ACL_TYPE_ACCESS) + acl = nfs4_acl_posix_to_nfsv4(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, pacl, NULL); + else + acl = nfs4_acl_posix_to_nfsv4(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, NULL, pacl); + if (IS_ERR(acl)) + return PTR_ERR(acl); + arg.acl = acl; + + lock_kernel(); + error = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + unlock_kernel(); + + nfs4_acl_free(acl); + + if (error) + return error; + + if (type == ACL_TYPE_ACCESS) + nfs4_iset_acl(inode, &NFS_I(inode)->acl, pacl); + else + nfs4_iset_acl(inode, &NFS_I(inode)->default_acl, pacl); + + if (type == ACL_TYPE_ACCESS) + posix_acl_equiv_mode(pacl, &inode->i_mode); + + return error; +} + +#endif /* CONFIG_NFS_V4_ACL */ + static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs4_compound compound; - struct nfs4_op ops[5]; - struct nfs_fattr dir_attr; - int status; - + struct nfs_fattr dir_attr; + int status; + struct nfs4_lookupargs args = { + .dir_fh = NFS_FH(dir), + .name = name, + }; + struct nfs4_lookupres res = { + .server = NFS_SERVER(dir), + .dirattr = &dir_attr, + .fattr = fattr, + .fhandle = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + dir_attr.valid = 0; fattr->valid = 0; dprintk("NFS call lookup %s\n", name->name); - nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "lookup"); - nfs4_setup_putfh(&compound, NFS_FH(dir)); - nfs4_setup_getattr(&compound, &dir_attr); - nfs4_setup_lookup(&compound, name); - nfs4_setup_getattr(&compound, fattr); - nfs4_setup_getfh(&compound, fhandle); - status = nfs4_call_compound(&compound, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); dprintk("NFS reply lookup: %d\n", status); if (status >= 0) @@ -975,11 +864,24 @@ nfs4_proc_lookup(struct inode *dir, stru static int nfs4_proc_access(struct inode *inode, struct rpc_cred *cred, int mode) { - struct nfs4_compound compound; - struct nfs4_op ops[3]; struct nfs_fattr fattr; u32 req_access = 0, resp_supported, resp_access; int status; + struct nfs4_accessargs args = { + .fhandle = NFS_FH(inode), + }; + struct nfs4_accessres res = { + .server = NFS_SERVER(inode), + .fattr = &fattr, + .resp_supported = &resp_supported, + .resp_access = &resp_access, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; fattr.valid = 0; @@ -1000,12 +902,9 @@ nfs4_proc_access(struct inode *inode, st if (mode & MAY_EXEC) req_access |= NFS4_ACCESS_EXECUTE; } + res.req_access = args.req_access = req_access; - nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "access"); - nfs4_setup_putfh(&compound, NFS_FH(inode)); - nfs4_setup_getattr(&compound, &fattr); - nfs4_setup_access(&compound, req_access, &resp_supported, &resp_access); - status = nfs4_call_compound(&compound, cred, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); if (!status) { @@ -1046,13 +945,18 @@ nfs4_proc_access(struct inode *inode, st static int nfs4_proc_readlink(struct inode *inode, struct page *page) { - struct nfs4_compound compound; - struct nfs4_op ops[2]; + struct nfs4_readlink args = { + .fh = NFS_FH(inode), + .count = PAGE_CACHE_SIZE, + .pages = &page, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK], + .rpc_argp = &args, + .rpc_resp = NULL, + }; - nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "readlink"); - nfs4_setup_putfh(&compound, NFS_FH(inode)); - nfs4_setup_readlink(&compound, PAGE_CACHE_SIZE, &page); - return nfs4_call_compound(&compound, NULL, 0); + return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); } static int @@ -1088,12 +992,8 @@ nfs4_proc_read(struct nfs_read_data *rda fattr->valid = 0; status = rpc_call_sync(server->client, &msg, flags); - if (!status) { + if (!status) renew_lease(server, timestamp); - /* Check cache consistency */ - if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - } dprintk("NFS reply read: %d\n", status); return status; } @@ -1130,7 +1030,6 @@ nfs4_proc_write(struct nfs_write_data *w fattr->valid = 0; status = rpc_call_sync(server->client, &msg, rpcflags); - NFS_CACHEINV(inode); dprintk("NFS reply write: %d\n", status); return status; } @@ -1217,18 +1116,26 @@ nfs4_proc_create(struct inode *dir, stru static int nfs4_proc_remove(struct inode *dir, struct qstr *name) { - struct nfs4_compound compound; - struct nfs4_op ops[3]; struct nfs4_change_info dir_cinfo; struct nfs_fattr dir_attr; int status; + struct nfs4_remove_arg args = { + .fhandle = NFS_FH(dir), + .name = name, + }; + struct nfs4_remove_res res = { + .server = NFS_SERVER(dir), + .dir_cinfo = &dir_cinfo, + .dir_attr = &dir_attr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE], + .rpc_argp = &args, + .rpc_resp = &res, + }; dir_attr.valid = 0; - nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "remove"); - nfs4_setup_putfh(&compound, NFS_FH(dir)); - nfs4_setup_remove(&compound, name, &dir_cinfo); - nfs4_setup_getattr(&compound, &dir_attr); - status = nfs4_call_compound(&compound, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) { process_cinfo(&dir_cinfo, &dir_attr); @@ -1237,32 +1144,22 @@ nfs4_proc_remove(struct inode *dir, stru return status; } -struct unlink_desc { - struct nfs4_compound compound; - struct nfs4_op ops[3]; - struct nfs4_change_info cinfo; - struct nfs_fattr attrs; -}; - static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name) { - struct unlink_desc * up; - struct nfs4_compound * cp; + struct nfs4_unlink *up; - up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL); + up = (struct nfs4_unlink *) kmalloc(sizeof(*up), GFP_KERNEL); if (!up) return -ENOMEM; - cp = &up->compound; - nfs4_setup_compound(cp, up->ops, NFS_SERVER(dir->d_inode), "unlink_setup"); - nfs4_setup_putfh(cp, NFS_FH(dir->d_inode)); - nfs4_setup_remove(cp, name, &up->cinfo); - nfs4_setup_getattr(cp, &up->attrs); - - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND]; - msg->rpc_argp = cp; - msg->rpc_resp = cp; + up->server = NFS_SERVER(dir->d_inode); + up->fh = NFS_FH(dir->d_inode); + up->name = name; + + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_UNLINK]; + msg->rpc_argp = up; + msg->rpc_resp = up; return 0; } @@ -1270,11 +1167,10 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task) { struct rpc_message *msg = &task->tk_msg; - struct unlink_desc *up; + struct nfs4_unlink *up; if (msg->rpc_argp) { - up = (struct unlink_desc *) msg->rpc_argp; - process_lease(&up->compound); + up = (struct nfs4_unlink *) msg->rpc_argp; process_cinfo(&up->cinfo, &up->attrs); nfs_refresh_inode(dir->d_inode, &up->attrs); kfree(up); @@ -1287,24 +1183,32 @@ static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, struct inode *new_dir, struct qstr *new_name) { - struct nfs4_compound compound; - struct nfs4_op ops[7]; struct nfs4_change_info old_cinfo, new_cinfo; struct nfs_fattr old_dir_attr, new_dir_attr; int status; - + struct nfs4_rename_arg arg = { + .old_dir = NFS_FH(old_dir), + .new_dir = NFS_FH(new_dir), + .old_name = old_name, + .new_name = new_name, + }; + struct nfs4_rename_res res = { + .server = NFS_SERVER(old_dir), + .old_cinfo = &old_cinfo, + .new_cinfo = &new_cinfo, + .old_fattr = &old_dir_attr, + .new_fattr = &new_dir_attr, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + old_dir_attr.valid = 0; new_dir_attr.valid = 0; - nfs4_setup_compound(&compound, ops, NFS_SERVER(old_dir), "rename"); - nfs4_setup_putfh(&compound, NFS_FH(old_dir)); - nfs4_setup_savefh(&compound); - nfs4_setup_putfh(&compound, NFS_FH(new_dir)); - nfs4_setup_rename(&compound, old_name, new_name, &old_cinfo, &new_cinfo); - nfs4_setup_getattr(&compound, &new_dir_attr); - nfs4_setup_restorefh(&compound); - nfs4_setup_getattr(&compound, &old_dir_attr); - status = nfs4_call_compound(&compound, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); if (!status) { process_cinfo(&old_cinfo, &old_dir_attr); @@ -1318,24 +1222,30 @@ nfs4_proc_rename(struct inode *old_dir, static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { - struct nfs4_compound compound; - struct nfs4_op ops[7]; struct nfs4_change_info dir_cinfo; struct nfs_fattr dir_attr, fattr; int status; - + struct nfs4_link_arg arg = { + .fh = NFS_FH(inode), + .dir_fh = NFS_FH(dir), + .name = name, + }; + struct nfs4_link_res res = { + .server = NFS_SERVER(inode), + .fattr = &fattr, + .dir_attr = &dir_attr, + .dir_cinfo = &dir_cinfo, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + dir_attr.valid = 0; fattr.valid = 0; - nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "link"); - nfs4_setup_putfh(&compound, NFS_FH(inode)); - nfs4_setup_savefh(&compound); - nfs4_setup_putfh(&compound, NFS_FH(dir)); - nfs4_setup_link(&compound, name, &dir_cinfo); - nfs4_setup_getattr(&compound, &dir_attr); - nfs4_setup_restorefh(&compound); - nfs4_setup_getattr(&compound, &fattr); - status = nfs4_call_compound(&compound, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (!status) { process_cinfo(&dir_cinfo, &dir_attr); @@ -1350,24 +1260,34 @@ nfs4_proc_symlink(struct inode *dir, str struct iattr *sattr, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs4_compound compound; - struct nfs4_op ops[7]; struct nfs_fattr dir_attr; struct nfs4_change_info dir_cinfo; int status; + struct nfs4_create_arg arg = { + .dir_fh = NFS_FH(dir), + .server = NFS_SERVER(dir), + .name = name, + .u.symlink = path, + .attrs = sattr, + .ftype = NF4LNK, + }; + struct nfs4_create_res res = { + .server = NFS_SERVER(dir), + .fhandle = fhandle, + .fattr = fattr, + .dir_attr = &dir_attr, + .dir_cinfo = &dir_cinfo, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; dir_attr.valid = 0; fattr->valid = 0; - nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "symlink"); - nfs4_setup_putfh(&compound, NFS_FH(dir)); - nfs4_setup_savefh(&compound); - nfs4_setup_create_symlink(&compound, name, path, sattr, &dir_cinfo); - nfs4_setup_getattr(&compound, fattr); - nfs4_setup_getfh(&compound, fhandle); - nfs4_setup_restorefh(&compound); - nfs4_setup_getattr(&compound, &dir_attr); - status = nfs4_call_compound(&compound, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) { process_cinfo(&dir_cinfo, &dir_attr); @@ -1380,24 +1300,33 @@ static int nfs4_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs4_compound compound; - struct nfs4_op ops[7]; struct nfs_fattr dir_attr; struct nfs4_change_info dir_cinfo; int status; + struct nfs4_create_arg arg = { + .dir_fh = NFS_FH(dir), + .server = NFS_SERVER(dir), + .name = name, + .attrs = sattr, + .ftype = NF4DIR, + }; + struct nfs4_create_res res = { + .server = NFS_SERVER(dir), + .fhandle = fhandle, + .fattr = fattr, + .dir_attr = &dir_attr, + .dir_cinfo = &dir_cinfo, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; dir_attr.valid = 0; fattr->valid = 0; - nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mkdir"); - nfs4_setup_putfh(&compound, NFS_FH(dir)); - nfs4_setup_savefh(&compound); - nfs4_setup_create_dir(&compound, name, sattr, &dir_cinfo); - nfs4_setup_getattr(&compound, fattr); - nfs4_setup_getfh(&compound, fhandle); - nfs4_setup_restorefh(&compound); - nfs4_setup_getattr(&compound, &dir_attr); - status = nfs4_call_compound(&compound, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) { process_cinfo(&dir_cinfo, &dir_attr); @@ -1411,17 +1340,25 @@ nfs4_proc_readdir(struct dentry *dentry, u64 cookie, struct page *page, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; - struct nfs4_compound compound; - struct nfs4_op ops[2]; int status; + struct nfs4_readdir_arg args = { + .fh = NFS_FH(dir), + .pages = &page, + .pgbase = 0, + .count = count, + }; + struct nfs4_readdir_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; lock_kernel(); - - nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "readdir"); - nfs4_setup_putfh(&compound, NFS_FH(dir)); - nfs4_setup_readdir(&compound, cookie, NFS_COOKIEVERF(dir), &page, count, dentry); - status = nfs4_call_compound(&compound, cred, 0); - + nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + res.pgbase = args.pgbase; + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); unlock_kernel(); return status; } @@ -1430,24 +1367,50 @@ static int nfs4_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr, dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr) { - struct nfs4_compound compound; - struct nfs4_op ops[7]; struct nfs_fattr dir_attr; struct nfs4_change_info dir_cinfo; int status; + int mode = sattr->ia_mode; + struct nfs4_create_arg arg = { + .dir_fh = NFS_FH(dir), + .server = NFS_SERVER(dir), + .name = name, + .attrs = sattr, + }; + struct nfs4_create_res res = { + .server = NFS_SERVER(dir), + .fhandle = fh, + .fattr = fattr, + .dir_attr = &dir_attr, + .dir_cinfo = &dir_cinfo, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; dir_attr.valid = 0; fattr->valid = 0; + + BUG_ON(!(sattr->ia_valid & ATTR_MODE)); + BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); + if (S_ISFIFO(mode)) + arg.ftype = NF4FIFO; + else if (S_ISBLK(mode)) { + arg.ftype = NF4BLK; + arg.u.device.specdata1 = MAJOR(rdev); + arg.u.device.specdata2 = MINOR(rdev); + } + else if (S_ISCHR(mode)) { + arg.ftype = NF4CHR; + arg.u.device.specdata1 = MAJOR(rdev); + arg.u.device.specdata2 = MINOR(rdev); + } + else + arg.ftype = NF4SOCK; - nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mknod"); - nfs4_setup_putfh(&compound, NFS_FH(dir)); - nfs4_setup_savefh(&compound); - nfs4_setup_create_special(&compound, name, rdev,sattr, &dir_cinfo); - nfs4_setup_getattr(&compound, fattr); - nfs4_setup_getfh(&compound, fh); - nfs4_setup_restorefh(&compound); - nfs4_setup_getattr(&compound, &dir_attr); - status = nfs4_call_compound(&compound, NULL, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (!status) { process_cinfo(&dir_cinfo, &dir_attr); @@ -1460,14 +1423,13 @@ static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) { - struct nfs4_compound compound; - struct nfs4_op ops[2]; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS], + .rpc_argp = fhandle, + .rpc_resp = fsstat, + }; - memset(fsstat, 0, sizeof(*fsstat)); - nfs4_setup_compound(&compound, ops, server, "statfs"); - nfs4_setup_putfh(&compound, fhandle); - nfs4_setup_statfs(&compound, fsstat); - return nfs4_call_compound(&compound, NULL, 0); + return rpc_call_sync(server->client, &msg, 0); } static int @@ -1480,7 +1442,6 @@ nfs4_proc_fsinfo(struct nfs_server *serv .rpc_resp = fsinfo, }; - memset(fsinfo, 0, sizeof(*fsinfo)); return rpc_call_sync(server->client, &msg, 0); } @@ -1488,14 +1449,13 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_pathconf *pathconf) { - struct nfs4_compound compound; - struct nfs4_op ops[2]; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF], + .rpc_argp = fhandle, + .rpc_resp = pathconf, + }; - memset(pathconf, 0, sizeof(*pathconf)); - nfs4_setup_compound(&compound, ops, server, "statfs"); - nfs4_setup_putfh(&compound, fhandle); - nfs4_setup_pathconf(&compound, pathconf); - return nfs4_call_compound(&compound, NULL, 0); + return rpc_call_sync(server->client, &msg, 0); } static void @@ -1517,7 +1477,6 @@ nfs4_read_done(struct rpc_task *task) { struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; struct inode *inode = data->inode; - struct nfs_fattr *fattr = data->res.fattr; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { task->tk_action = nfs4_restart_read; @@ -1525,11 +1484,6 @@ nfs4_read_done(struct rpc_task *task) } if (task->tk_status > 0) renew_lease(NFS_SERVER(inode), data->timestamp); - /* Check cache consistency */ - if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_ACCESS) - inode->i_atime = fattr->atime; /* Call back common NFS readpage processing */ nfs_readpage_result(task); } @@ -1577,21 +1531,6 @@ nfs4_proc_read_setup(struct nfs_read_dat } static void -nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - /* Check cache consistency */ - if (fattr->pre_change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - NFS_CHANGE_ATTR(inode) = fattr->change_attr; - if (fattr->bitmap[1] & FATTR4_WORD1_SPACE_USED) - inode->i_blocks = (fattr->du.nfs3.used + 511) >> 9; - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_METADATA) - inode->i_ctime = fattr->ctime; - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_MODIFY) - inode->i_mtime = fattr->mtime; -} - -static void nfs4_restart_write(struct rpc_task *task) { struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; @@ -1617,7 +1556,6 @@ nfs4_write_done(struct rpc_task *task) } if (task->tk_status >= 0) renew_lease(NFS_SERVER(inode), data->timestamp); - nfs4_write_refresh_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_writeback_done(task); } @@ -1684,7 +1622,6 @@ nfs4_commit_done(struct rpc_task *task) task->tk_action = nfs4_restart_write; return; } - nfs4_write_refresh_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_commit_done(task); } @@ -1807,6 +1744,7 @@ nfs4_proc_file_open(struct inode *inode, if (filp->f_mode & FMODE_WRITE) { lock_kernel(); nfs_set_mmcred(inode, state->owner->so_cred); + nfs_begin_data_update(inode); unlock_kernel(); } filp->private_data = state; @@ -1823,6 +1761,11 @@ nfs4_proc_file_release(struct inode *ino if (state) nfs4_close_state(state, filp->f_mode); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_end_data_update(inode); + unlock_kernel(); + } return 0; } @@ -2294,6 +2237,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, diff -puN fs/nfs/pagelist.c~CITI_NFS4_ALL fs/nfs/pagelist.c --- linux-2.6.3/fs/nfs/pagelist.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/pagelist.c 2004-02-19 16:47:03.000000000 -0500 @@ -246,7 +246,6 @@ nfs_coalesce_requests(struct list_head * * nfs_scan_list - Scan a list for matching requests * @head: One of the NFS inode request lists * @dst: Destination list - * @file: if set, ensure we match requests from this file * @idx_start: lower bound of page->index to scan * @npages: idx_start + npages sets the upper bound to scan. * @@ -258,7 +257,6 @@ nfs_coalesce_requests(struct list_head * */ int nfs_scan_list(struct list_head *head, struct list_head *dst, - struct file *file, unsigned long idx_start, unsigned int npages) { struct list_head *pos, *tmp; @@ -276,9 +274,6 @@ nfs_scan_list(struct list_head *head, st req = nfs_list_entry(pos); - if (file && req->wb_file != file) - continue; - if (req->wb_index < idx_start) continue; if (req->wb_index > idx_end) diff -puN fs/nfs/proc.c~CITI_NFS4_ALL fs/nfs/proc.c --- linux-2.6.3/fs/nfs/proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/proc.c 2004-02-19 16:47:07.000000000 -0500 @@ -49,18 +49,6 @@ extern struct rpc_procinfo nfs_procedures[]; -static void -nfs_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - if (!(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - nfs_refresh_inode(inode, fattr); -} - static struct rpc_cred * nfs_cred(struct inode *inode, struct file *filp) { @@ -78,15 +66,33 @@ nfs_cred(struct inode *inode, struct fil */ static int nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fsinfo *info) { - int status; + struct nfs_fattr *fattr = info->fattr; + struct nfs2_fsstat fsinfo; + int status; - dprintk("NFS call getroot\n"); + dprintk("%s: call getattr\n", __FUNCTION__); fattr->valid = 0; - status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0); - dprintk("NFS reply getroot\n"); - return status; + status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); + dprintk("%s: reply getattr %d\n", __FUNCTION__, status); + if (status) + return status; + dprintk("%s: call statfs\n", __FUNCTION__); + status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); + dprintk("%s: reply statfs %d\n", __FUNCTION__, status); + if (status) + return status; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->lease_time = 0; + return 0; } /* @@ -205,7 +211,7 @@ nfs_proc_write(struct nfs_write_data *wd msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) { - nfs_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); wdata->res.count = wdata->args.count; wdata->verf.committed = NFS_FILE_SYNC; } @@ -331,10 +337,8 @@ nfs_proc_unlink_done(struct dentry *dir, { struct rpc_message *msg = &task->tk_msg; - if (msg->rpc_argp) { - NFS_CACHEINV(dir->d_inode); + if (msg->rpc_argp) kfree(msg->rpc_argp); - } return 0; } @@ -584,7 +588,7 @@ nfs_write_done(struct rpc_task *task) struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; if (task->tk_status >= 0) - nfs_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } @@ -665,6 +669,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, diff -puN fs/nfs/read.c~CITI_NFS4_ALL fs/nfs/read.c --- linux-2.6.3/fs/nfs/read.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/read.c 2004-02-19 16:47:03.000000000 -0500 @@ -124,6 +124,7 @@ nfs_readpage_sync(struct file *file, str if (result < rdata.args.count) /* NFSv2ism */ break; } while (count); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; if (count) memclear_highpage_flush(page, rdata.args.pgbase, count); @@ -266,6 +267,7 @@ nfs_readpage_result(struct rpc_task *tas dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", task->tk_pid, task->tk_status); + NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); struct page *page = req->wb_page; diff -puN fs/nfs/unlink.c~CITI_NFS4_ALL fs/nfs/unlink.c --- linux-2.6.3/fs/nfs/unlink.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/unlink.c 2004-02-19 16:47:03.000000000 -0500 @@ -104,6 +104,7 @@ nfs_async_unlink_init(struct rpc_task *t status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name); if (status < 0) goto out_err; + nfs_begin_data_update(dir->d_inode); rpc_call_setup(task, &msg, 0); return; out_err: @@ -126,7 +127,7 @@ nfs_async_unlink_done(struct rpc_task *t if (!dir) return; dir_i = dir->d_inode; - nfs_zap_caches(dir_i); + nfs_end_data_update(dir_i); if (NFS_PROTO(dir_i)->unlink_done(dir, task)) return; put_rpccred(data->cred); diff -puN fs/nfs/write.c~CITI_NFS4_ALL fs/nfs/write.c --- linux-2.6.3/fs/nfs/write.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/write.c 2004-02-19 16:47:03.000000000 -0500 @@ -74,7 +74,6 @@ static struct nfs_page * nfs_update_request(struct file*, struct inode *, struct page *, unsigned int, unsigned int); -static void nfs_strategy(struct inode *inode); static kmem_cache_t *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -124,6 +123,52 @@ void nfs_commit_release(struct rpc_task nfs_commit_free(wdata); } +/* Adjust the file length if we're writing beyond the end */ +static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) +{ + struct inode *inode = page->mapping->host; + loff_t end, i_size = i_size_read(inode); + unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + + if (i_size > 0 && page->index < end_index) + return; + end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); + if (i_size >= end) + return; + i_size_write(inode, end); +} + +/* We can set the PG_uptodate flag if we see that a write request + * covers the full page. + */ +static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) +{ + loff_t end_offs; + + if (PageUptodate(page)) + return; + if (base != 0) + return; + if (count == PAGE_CACHE_SIZE) { + SetPageUptodate(page); + return; + } + + end_offs = i_size_read(page->mapping->host) - 1; + if (end_offs < 0) + return; + /* Is this the last page? */ + if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT)) + return; + /* This is the last page: set PG_uptodate if we cover the entire + * extent of the data, then zero the rest of the page. + */ + if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) { + memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); + SetPageUptodate(page); + } +} + /* * Write a page synchronously. * Offset is the data offset within the page. @@ -157,6 +202,7 @@ nfs_writepage_sync(struct file *file, st (long long)NFS_FILEID(inode), count, (long long)(page_offset(page) + offset)); + nfs_begin_data_update(inode); do { if (count < wsize && !swapfile) wdata.args.count = count; @@ -177,14 +223,12 @@ nfs_writepage_sync(struct file *file, st wdata.args.pgbase += result; written += result; count -= result; - - /* - * If we've extended the file, update the inode - * now so we don't invalidate the cache. - */ - if (wdata.args.offset > i_size_read(inode)) - i_size_write(inode, wdata.args.offset); } while (count); + /* Update file length */ + nfs_grow_file(page, offset, written); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, offset, written); + nfs_end_data_update(inode); if (PageError(page)) ClearPageError(page); @@ -201,18 +245,19 @@ nfs_writepage_async(struct file *file, s unsigned int offset, unsigned int count) { struct nfs_page *req; - loff_t end; int status; + nfs_begin_data_update(inode); req = nfs_update_request(file, inode, page, offset, count); status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status < 0) goto out; + /* Update file length */ + nfs_grow_file(page, offset, count); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, offset, count); nfs_unlock_request(req); - nfs_strategy(inode); - end = ((loff_t)page->index<sync_mode == WB_SYNC_HOLD) @@ -294,7 +339,7 @@ nfs_writepages(struct address_space *map if (is_sync && wbc->sync_mode == WB_SYNC_ALL) { err = nfs_wb_all(inode); } else - nfs_commit_file(inode, NULL, 0, 0, 0); + nfs_commit_inode(inode, 0, 0, 0); out: return err; } @@ -312,8 +357,10 @@ nfs_inode_add_request(struct inode *inod BUG_ON(error == -EEXIST); if (error) return error; - if (!nfsi->npages) + if (!nfsi->npages) { igrab(inode); + nfs_begin_data_update(inode); + } nfsi->npages++; req->wb_count++; return 0; @@ -336,6 +383,7 @@ nfs_inode_remove_request(struct nfs_page nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfs_wreq_lock); + nfs_end_data_update(inode); iput(inode); } else spin_unlock(&nfs_wreq_lock); @@ -421,7 +469,7 @@ nfs_mark_request_commit(struct nfs_page * Interruptible by signals only if mounted with intr flag. */ static int -nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages) +nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req; @@ -441,8 +489,6 @@ nfs_wait_on_requests(struct inode *inode break; next = req->wb_index + 1; - if (file && req->wb_file != file) - continue; if (!NFS_WBACK_BUSY(req)) continue; @@ -453,7 +499,6 @@ nfs_wait_on_requests(struct inode *inode if (error < 0) return error; spin_lock(&nfs_wreq_lock); - next = idx_start; res++; } spin_unlock(&nfs_wreq_lock); @@ -464,7 +509,6 @@ nfs_wait_on_requests(struct inode *inode * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan * @dst: destination list - * @file: if set, ensure we match requests from this file * @idx_start: lower bound of page->index to scan. * @npages: idx_start + npages sets the upper bound to scan. * @@ -472,11 +516,11 @@ nfs_wait_on_requests(struct inode *inode * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) +nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); int res; - res = nfs_scan_list(&nfsi->dirty, dst, file, idx_start, npages); + res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); nfsi->ndirty -= res; sub_page_state(nr_dirty,res); if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) @@ -489,7 +533,6 @@ nfs_scan_dirty(struct inode *inode, stru * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan * @dst: destination list - * @file: if set, ensure we collect requests from this file only. * @idx_start: lower bound of page->index to scan. * @npages: idx_start + npages sets the upper bound to scan. * @@ -497,11 +540,11 @@ nfs_scan_dirty(struct inode *inode, stru * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) +nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); int res; - res = nfs_scan_list(&nfsi->commit, dst, file, idx_start, npages); + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); nfsi->ncommit -= res; if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); @@ -600,46 +643,6 @@ nfs_update_request(struct file* file, st return req; } -/* - * This is the strategy routine for NFS. - * It is called by nfs_updatepage whenever the user wrote up to the end - * of a page. - * - * We always try to submit a set of requests in parallel so that the - * server's write code can gather writes. This is mainly for the benefit - * of NFSv2. - * - * We never submit more requests than we think the remote can handle. - * For UDP sockets, we make sure we don't exceed the congestion window; - * for TCP, we limit the number of requests to 8. - * - * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that - * should be sent out in one go. This is for the benefit of NFSv2 servers - * that perform write gathering. - * - * FIXME: Different servers may have different sweet spots. - * Record the average congestion window in server struct? - */ -#define NFS_STRATEGY_PAGES 8 -static void -nfs_strategy(struct inode *inode) -{ - unsigned int dirty, wpages; - - dirty = NFS_I(inode)->ndirty; - wpages = NFS_SERVER(inode)->wpages; -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (NFS_PROTO(inode)->version == 2) { - if (dirty >= NFS_STRATEGY_PAGES * wpages) - nfs_flush_file(inode, NULL, 0, 0, 0); - } else if (dirty >= wpages) - nfs_flush_file(inode, NULL, 0, 0, 0); -#else - if (dirty >= NFS_STRATEGY_PAGES * wpages) - nfs_flush_file(inode, NULL, 0, 0, 0); -#endif -} - int nfs_flush_incompatible(struct file *file, struct page *page) { @@ -675,7 +678,6 @@ nfs_updatepage(struct file *file, struct struct dentry *dentry = file->f_dentry; struct inode *inode = page->mapping->host; struct nfs_page *req; - loff_t end; int status = 0; dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", @@ -696,6 +698,30 @@ nfs_updatepage(struct file *file, struct return status; } + nfs_begin_data_update(inode); + + + /* If we're not using byte range locks, and we know the page + * is entirely in cache, it may be more efficient to avoid + * fragmenting write requests. + */ + if (PageUptodate(page) && inode->i_flock == NULL) { + loff_t end_offs = i_size_read(inode) - 1; + unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; + + count += offset; + offset = 0; + if (end_offs < 0) { + /* Do nothing */ + } else if (page->index == end_index) { + unsigned int pglen; + pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1; + if (count < pglen) + count = pglen; + } else if (page->index < end_index) + count = PAGE_CACHE_SIZE; + } + /* * Try to find an NFS request corresponding to this page * and update it. @@ -714,21 +740,14 @@ nfs_updatepage(struct file *file, struct goto done; status = 0; - end = ((loff_t)page->index<wb_pgbase == 0 && req->wb_bytes == PAGE_CACHE_SIZE) { - SetPageUptodate(page); - nfs_unlock_request(req); - nfs_strategy(inode); - } else - nfs_unlock_request(req); + + /* Update file length */ + nfs_grow_file(page, offset, count); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_unlock_request(req); done: + nfs_end_data_update(inode); dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)i_size_read(inode)); if (status < 0) @@ -891,10 +910,7 @@ nfs_writeback_done(struct rpc_task *task #endif /* - * Update attributes as result of writeback. - * FIXME: There is an inherent race with invalidate_inode_pages and - * writebacks since the page->count is kept > 1 for as long - * as the page has a write request pending. + * Process the nfs_page list */ while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1061,7 +1077,7 @@ nfs_commit_done(struct rpc_task *task) } #endif -int nfs_flush_file(struct inode *inode, struct file *file, unsigned long idx_start, +int nfs_flush_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) { LIST_HEAD(head); @@ -1069,7 +1085,7 @@ int nfs_flush_file(struct inode *inode, error = 0; spin_lock(&nfs_wreq_lock); - res = nfs_scan_dirty(inode, &head, file, idx_start, npages); + res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfs_wreq_lock); if (res) error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); @@ -1079,7 +1095,7 @@ int nfs_flush_file(struct inode *inode, } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start, +int nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) { LIST_HEAD(head); @@ -1087,9 +1103,9 @@ int nfs_commit_file(struct inode *inode, error = 0; spin_lock(&nfs_wreq_lock); - res = nfs_scan_commit(inode, &head, file, idx_start, npages); + res = nfs_scan_commit(inode, &head, idx_start, npages); if (res) { - res += nfs_scan_commit(inode, &head, NULL, 0, 0); + res += nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfs_wreq_lock); error = nfs_commit_list(&head, how); } else @@ -1100,7 +1116,7 @@ int nfs_commit_file(struct inode *inode, } #endif -int nfs_sync_file(struct inode *inode, struct file *file, unsigned long idx_start, +int nfs_sync_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) { int error, @@ -1109,18 +1125,15 @@ int nfs_sync_file(struct inode *inode, s wait = how & FLUSH_WAIT; how &= ~FLUSH_WAIT; - if (!inode && file) - inode = file->f_dentry->d_inode; - do { error = 0; if (wait) - error = nfs_wait_on_requests(inode, file, idx_start, npages); + error = nfs_wait_on_requests(inode, idx_start, npages); if (error == 0) - error = nfs_flush_file(inode, file, idx_start, npages, how); + error = nfs_flush_inode(inode, idx_start, npages, how); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (error == 0) - error = nfs_commit_file(inode, file, idx_start, npages, how); + error = nfs_commit_inode(inode, idx_start, npages, how); #endif } while (error > 0); return error; diff -puN include/linux/fs.h~CITI_NFS4_ALL include/linux/fs.h --- linux-2.6.3/include/linux/fs.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/fs.h 2004-02-19 16:47:03.000000000 -0500 @@ -137,6 +137,7 @@ extern int leases_enable, dir_notify_ena #define S_DEAD 32 /* removed, but still open directory */ #define S_NOQUOTA 64 /* Inode is not counted to quota */ #define S_DIRSYNC 128 /* Directory modifications are synchronous */ +#define S_NOCMTIME 256 /* Do not update file c/mtime */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -170,6 +171,7 @@ extern int leases_enable, dir_notify_ena #define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) +#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ diff -puN include/linux/nfs_fs.h~CITI_NFS4_ALL include/linux/nfs_fs.h --- linux-2.6.3/include/linux/nfs_fs.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfs_fs.h 2004-02-19 16:47:07.000000000 -0500 @@ -23,6 +23,10 @@ #include #include +#ifdef CONFIG_NFS_V4 +#include +#endif /* CONFIG_NFS_V4 */ + #include #include #include @@ -99,7 +103,7 @@ struct nfs_inode { /* * Various flags */ - unsigned short flags; + unsigned int flags; /* * read_cache_jiffies is when we started read-caching this inode, @@ -118,19 +122,22 @@ struct nfs_inode { * * mtime != read_cache_mtime */ + unsigned long readdir_timestamp; unsigned long read_cache_jiffies; - struct timespec read_cache_ctime; - struct timespec read_cache_mtime; - __u64 read_cache_isize; unsigned long attrtimeo; unsigned long attrtimeo_timestamp; __u64 change_attr; /* v4 only */ + /* "Generation counter" for the attribute cache. This is + * bumped whenever we update the metadata on the + * server. + */ + unsigned long cache_change_attribute; /* - * Timestamp that dates the change made to read_cache_mtime. - * This is of use for dentry revalidation + * Counter indicating the number of outstanding requests that + * will cause a file data update. */ - unsigned long cache_mtime_jiffies; + atomic_t data_updates; struct nfs_access_cache cache_access; @@ -160,7 +167,10 @@ struct nfs_inode { /* NFSv4 state */ struct list_head open_states; #endif /* CONFIG_NFS_V4*/ - +#ifdef CONFIG_NFS_V4_ACL + struct posix_acl *acl; + struct posix_acl *default_acl; +#endif /* CONFIG_NFS_V4_ACL */ struct inode vfs_inode; }; @@ -170,7 +180,9 @@ struct nfs_inode { #define NFS_INO_STALE 0x0001 /* possible stale inode */ #define NFS_INO_ADVISE_RDPLUS 0x0002 /* advise readdirplus */ #define NFS_INO_REVALIDATING 0x0004 /* revalidating attrs */ -#define NFS_INO_FLUSH 0x0008 /* inode is due for flushing */ +#define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */ +#define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */ +#define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ #define NFS_INO_FAKE_ROOT 0x0080 /* root inode placeholder */ static inline struct nfs_inode *NFS_I(struct inode *inode) @@ -186,15 +198,7 @@ static inline struct nfs_inode *NFS_I(st #define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) -#define NFS_MTIME_UPDATE(inode) (NFS_I(inode)->cache_mtime_jiffies) -#define NFS_CACHE_CTIME(inode) (NFS_I(inode)->read_cache_ctime) -#define NFS_CACHE_MTIME(inode) (NFS_I(inode)->read_cache_mtime) -#define NFS_CACHE_ISIZE(inode) (NFS_I(inode)->read_cache_isize) #define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) -#define NFS_CACHEINV(inode) \ -do { \ - NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; \ -} while (0) #define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo) #define NFS_MINATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ @@ -211,6 +215,17 @@ do { \ #define NFS_FILEID(inode) (NFS_I(inode)->fileid) +static inline int nfs_caches_unstable(struct inode *inode) +{ + return atomic_read(&NFS_I(inode)->data_updates) != 0; +} + +static inline void NFS_CACHEINV(struct inode *inode) +{ + if (!nfs_caches_unstable(inode)) + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR; +} + static inline int nfs_server_capable(struct inode *inode, int cap) { return NFS_SERVER(inode)->caps & cap; @@ -227,13 +242,37 @@ loff_t page_offset(struct page *page) return ((loff_t)page->index) << PAGE_CACHE_SHIFT; } +/** + * nfs_save_change_attribute - Returns the inode attribute change cookie + * @inode - pointer to inode + * The "change attribute" is updated every time we finish an operation + * that will result in a metadata change on the server. + */ +static inline long nfs_save_change_attribute(struct inode *inode) +{ + return NFS_I(inode)->cache_change_attribute; +} + +/** + * nfs_verify_change_attribute - Detects NFS inode cache updates + * @inode - pointer to inode + * @chattr - previously saved change attribute + * Return "false" if metadata has been updated (or is in the process of + * being updated) since the change attribute was saved. + */ +static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) +{ + return !nfs_caches_unstable(inode) + && chattr == NFS_I(inode)->cache_change_attribute; +} + /* * linux/fs/nfs/inode.c */ extern void nfs_zap_caches(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *); -extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *); +extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); extern void nfs_set_mmcred(struct inode *, struct rpc_cred *); @@ -241,6 +280,10 @@ extern int nfs_open(struct inode *, stru extern int nfs_release(struct inode *, struct file *); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_setattr(struct dentry *, struct iattr *); +extern void nfs_begin_attr_update(struct inode *); +extern void nfs_end_attr_update(struct inode *); +extern void nfs_begin_data_update(struct inode *); +extern void nfs_end_data_update(struct inode *); /* * linux/fs/nfs/file.c @@ -309,11 +352,11 @@ extern void nfs_commit_done(struct rpc_t * Try to write back everything synchronously (but check the * return value!) */ -extern int nfs_sync_file(struct inode *, struct file *, unsigned long, unsigned int, int); -extern int nfs_flush_file(struct inode *, struct file *, unsigned long, unsigned int, int); +extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); +extern int nfs_flush_inode(struct inode *, unsigned long, unsigned int, int); extern int nfs_flush_list(struct list_head *, int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern int nfs_commit_file(struct inode *, struct file *, unsigned long, unsigned int, int); +extern int nfs_commit_inode(struct inode *, unsigned long, unsigned int, int); extern int nfs_commit_list(struct list_head *, int); #else static inline int @@ -333,7 +376,7 @@ nfs_have_writebacks(struct inode *inode) static inline int nfs_wb_all(struct inode *inode) { - int error = nfs_sync_file(inode, 0, 0, 0, FLUSH_WAIT); + int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT); return (error < 0) ? error : 0; } @@ -343,21 +386,11 @@ nfs_wb_all(struct inode *inode) static inline int nfs_wb_page(struct inode *inode, struct page* page) { - int error = nfs_sync_file(inode, 0, page->index, 1, + int error = nfs_sync_inode(inode, page->index, 1, FLUSH_WAIT | FLUSH_STABLE); return (error < 0) ? error : 0; } -/* - * Write back all pending writes for one user.. - */ -static inline int -nfs_wb_file(struct inode *inode, struct file *file) -{ - int error = nfs_sync_file(inode, file, 0, 0, FLUSH_WAIT); - return (error < 0) ? error : 0; -} - /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -383,20 +416,27 @@ extern int nfsroot_mount(struct sockadd /* * inline functions */ -static inline int -nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) + +static inline int nfs_attribute_timeout(struct inode *inode) { - if (time_before(jiffies, NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) - return NFS_STALE(inode) ? -ESTALE : 0; - return __nfs_revalidate_inode(server, inode); + struct nfs_inode *nfsi = NFS_I(inode); + + return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); } -static inline int -nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +/** + * nfs_revalidate_inode - Revalidate the inode attributes + * @server - pointer to nfs_server struct + * @inode - pointer to inode struct + * + * Updates inode attribute information by retrieving the data from the server. + */ +static inline int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - if ((fattr->valid & NFS_ATTR_FATTR) == 0) - return 0; - return __nfs_refresh_inode(inode,fattr); + if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + && !nfs_attribute_timeout(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return __nfs_revalidate_inode(server, inode); } static inline loff_t @@ -590,6 +630,15 @@ struct nfs4_state { extern struct dentry_operations nfs4_dentry_operations; extern struct inode_operations nfs4_dir_inode_operations; +extern struct inode_operations nfs4_file_inode_operations; + +#define NFS_ACL_MAX_ENTRIES 32 + +/* inode.c */ +extern ssize_t nfs_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs_setxattr(struct dentry *, const char *, const void *, size_t, int); + +#define NFS4_ACL_NOT_CACHED ((void *)-1) /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); @@ -602,6 +651,9 @@ int nfs4_do_downgrade(struct inode *inod extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); +struct posix_acl * nfs4_proc_get_posix_acl(struct inode *, int); +extern int nfs4_proc_set_posix_acl(struct inode *, int, struct posix_acl *); +void nfs4_izap_acl(struct inode *inode, struct posix_acl **i_acl); /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); diff -puN include/linux/nfs_page.h~CITI_NFS4_ALL include/linux/nfs_page.h --- linux-2.6.3/include/linux/nfs_page.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfs_page.h 2004-02-19 16:47:03.000000000 -0500 @@ -53,7 +53,7 @@ extern void nfs_release_request(struct n extern void nfs_list_add_request(struct nfs_page *, struct list_head *); extern int nfs_scan_list(struct list_head *, struct list_head *, - struct file *, unsigned long, unsigned int); + unsigned long, unsigned int); extern int nfs_coalesce_requests(struct list_head *, struct list_head *, unsigned int); extern int nfs_wait_on_request(struct nfs_page *); diff -puN include/linux/nfs_xdr.h~CITI_NFS4_ALL include/linux/nfs_xdr.h --- linux-2.6.3/include/linux/nfs_xdr.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfs_xdr.h 2004-02-19 16:47:15.000000000 -0500 @@ -39,6 +39,9 @@ struct nfs_fattr { __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ unsigned long timestamp; +#ifdef CONFIG_NFS_V4 + struct nfs4_acl *acl; /* NFSv4 ACL */ +#endif /* CONFIG_NFS_V4 */ }; #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ @@ -103,8 +106,6 @@ struct nfs_openargs { nfs4_verifier verifier; /* EXCLUSIVE */ } u; struct qstr * name; - struct nfs4_getattr * f_getattr; - struct nfs4_getattr * d_getattr; struct nfs_server * server; /* Needed for ID mapping */ }; @@ -113,8 +114,8 @@ struct nfs_openres { struct nfs_fh fh; struct nfs4_change_info * cinfo; __u32 rflags; - struct nfs4_getattr * f_getattr; - struct nfs4_getattr * d_getattr; + struct nfs_fattr * f_attr; + struct nfs_fattr * d_attr; struct nfs_server * server; }; @@ -141,7 +142,6 @@ struct nfs_open_reclaimargs { __u32 id; __u32 share_access; __u32 claim; - struct nfs4_getattr * f_getattr; }; /* @@ -319,12 +319,22 @@ struct nfs_setattrargs { struct nfs_fh * fh; nfs4_stateid stateid; struct iattr * iap; - struct nfs4_getattr * attr; + struct nfs_fattr * fattr; struct nfs_server * server; /* Needed for name mapping */ +#ifdef CONFIG_NFS_V4 + struct nfs4_acl * acl; +#endif /* CONFIG_NFS_V4 */ }; +#ifdef CONFIG_NFS_V4 +struct nfs_setaclargs { + struct nfs_fh * fh; + struct nfs4_acl * acl; +}; +#endif /* CONFIG_NFS_V4 */ + struct nfs_setattrres { - struct nfs4_getattr * attr; + struct nfs_fattr * fattr; struct nfs_server * server; }; @@ -482,118 +492,127 @@ struct nfs4_change_info { u64 after; }; -struct nfs4_access { - u32 ac_req_access; /* request */ - u32 * ac_resp_supported; /* response */ - u32 * ac_resp_access; /* response */ +struct nfs4_accessargs { + struct nfs_fh * fhandle; + u32 req_access; }; -struct nfs4_close { - char * cl_stateid; /* request */ - u32 cl_seqid; /* request */ +struct nfs4_accessres { + struct nfs_server * server; + struct nfs_fattr * fattr; + u32 req_access; + u32 * resp_supported; + u32 * resp_access; }; -struct nfs4_create { - u32 cr_ftype; /* request */ - union { /* request */ - struct { - u32 textlen; - const char * text; - } symlink; /* NF4LNK */ +struct nfs4_create_arg { + u32 ftype; + union { + struct qstr * symlink; /* NF4LNK */ struct { u32 specdata1; u32 specdata2; } device; /* NF4BLK, NF4CHR */ } u; - u32 cr_namelen; /* request */ - const char * cr_name; /* request */ - struct iattr * cr_attrs; /* request */ - struct nfs4_change_info * cr_cinfo; /* response */ + struct qstr * name; + struct nfs_server * server; + struct iattr * attrs; + struct nfs_fh * dir_fh; }; -#define cr_textlen u.symlink.textlen -#define cr_text u.symlink.text -#define cr_specdata1 u.device.specdata1 -#define cr_specdata2 u.device.specdata2 -struct nfs4_getattr { - u32 * gt_bmval; /* request */ - struct nfs_fattr * gt_attrs; /* response */ - struct nfs_fsstat * gt_fsstat; /* response */ - struct nfs_pathconf * gt_pathconf; /* response */ +struct nfs4_create_res { + struct nfs_server * server; + struct nfs_fh * fhandle; + struct nfs_fattr * fattr; + struct nfs_fattr * dir_attr; + struct nfs4_change_info * dir_cinfo; }; -struct nfs4_getfh { - struct nfs_fh * gf_fhandle; /* response */ +struct nfs4_getattr_res { + struct nfs_server * server; + struct nfs_fattr * fattr; }; -struct nfs4_link { - u32 ln_namelen; /* request */ - const char * ln_name; /* request */ - struct nfs4_change_info * ln_cinfo; /* response */ +struct nfs4_getroot_res { + struct nfs_server * server; + struct nfs_fattr * fattr; + struct nfs_fh * fhandle; }; -struct nfs4_lookup { - struct qstr * lo_name; /* request */ +struct nfs4_getroot_arg { + struct nfs_fh * fhandle; + struct qstr * name; }; -struct nfs4_open { - struct nfs4_client * op_client_state; /* request */ - u32 op_share_access; /* request */ - u32 op_opentype; /* request */ - u32 op_createmode; /* request */ - union { /* request */ - struct iattr * attrs; /* UNCHECKED, GUARDED */ - nfs4_verifier verifier; /* EXCLUSIVE */ - } u; - struct qstr * op_name; /* request */ - char * op_stateid; /* response */ - struct nfs4_change_info * op_cinfo; /* response */ - u32 * op_rflags; /* response */ -}; -#define op_attrs u.attrs -#define op_verifier u.verifier - -struct nfs4_open_confirm { - char * oc_stateid; /* request */ -}; - -struct nfs4_putfh { - struct nfs_fh * pf_fhandle; /* request */ -}; - -struct nfs4_readdir { - u64 rd_cookie; /* request */ - nfs4_verifier rd_req_verifier; /* request */ - u32 rd_count; /* request */ - u32 rd_bmval[2]; /* request */ - nfs4_verifier rd_resp_verifier; /* response */ - struct page ** rd_pages; /* zero-copy data */ - unsigned int rd_pgbase; /* zero-copy data */ +struct nfs4_link_arg { + struct nfs_fh * fh; + struct nfs_fh * dir_fh; + struct qstr * name; +}; + +struct nfs4_link_res { + struct nfs_server * server; + struct nfs_fattr * fattr; + struct nfs_fattr * dir_attr; + struct nfs4_change_info * dir_cinfo; +}; + +struct nfs4_lookupargs { + struct nfs_fh * dir_fh; + struct qstr * name; +}; + +struct nfs4_lookupres { + struct nfs_server * server; + struct nfs_fattr * dirattr; + struct nfs_fattr * fattr; + struct nfs_fh * fhandle; +}; + +struct nfs4_readdir_arg { + struct nfs_fh * fh; + u64 cookie; /* request */ + nfs4_verifier req_verifier; /* request */ + u32 count; /* request */ + struct page ** pages; /* zero-copy data */ + unsigned int pgbase; /* zero-copy data */ +}; + +struct nfs4_readdir_res { + nfs4_verifier resp_verifier; + unsigned int pgbase; }; struct nfs4_readlink { - u32 rl_count; /* zero-copy data */ - struct page ** rl_pages; /* zero-copy data */ + struct nfs_fh * fh; + u32 count; /* zero-copy data */ + struct page ** pages; /* zero-copy data */ }; -struct nfs4_remove { - u32 rm_namelen; /* request */ - const char * rm_name; /* request */ - struct nfs4_change_info * rm_cinfo; /* response */ +struct nfs4_remove_arg { + struct nfs_fh * fhandle; + struct qstr * name; }; -struct nfs4_rename { - u32 rn_oldnamelen; /* request */ - const char * rn_oldname; /* request */ - u32 rn_newnamelen; /* request */ - const char * rn_newname; /* request */ - struct nfs4_change_info * rn_src_cinfo; /* response */ - struct nfs4_change_info * rn_dst_cinfo; /* response */ +struct nfs4_remove_res { + struct nfs_server * server; + struct nfs4_change_info * dir_cinfo; + struct nfs_fattr * dir_attr; }; -struct nfs4_setattr { - char * st_stateid; /* request */ - struct iattr * st_iap; /* request */ +struct nfs4_rename_arg { + struct nfs_fh * old_dir; + struct nfs_fh * new_dir; + struct qstr * old_name; + struct qstr * new_name; +}; + +struct nfs4_rename_res { + struct nfs_server * server; + struct nfs4_change_info * old_cinfo; + struct nfs4_change_info * new_cinfo; + struct nfs_fattr * old_fattr; + struct nfs_fattr * new_fattr; }; struct nfs4_setclientid { @@ -606,52 +625,12 @@ struct nfs4_setclientid { struct nfs4_client * sc_state; /* response */ }; -struct nfs4_op { - u32 opnum; - union { - struct nfs4_access access; - struct nfs4_close close; - struct nfs4_create create; - struct nfs4_getattr getattr; - struct nfs4_getfh getfh; - struct nfs4_link link; - struct nfs4_lookup lookup; - struct nfs4_open open; - struct nfs4_open_confirm open_confirm; - struct nfs4_putfh putfh; - struct nfs4_readdir readdir; - struct nfs4_readlink readlink; - struct nfs4_remove remove; - struct nfs4_rename rename; - struct nfs4_client * renew; - struct nfs4_setattr setattr; - } u; -}; - -struct nfs4_compound { - unsigned int flags; /* defined below */ - struct nfs_server * server; - - /* RENEW information */ - int renew_index; - unsigned long timestamp; - - /* scratch variables for XDR encode/decode */ - int nops; - u32 * p; - u32 * end; - - /* the individual COMPOUND operations */ - struct nfs4_op *ops; - - /* request */ - int req_nops; - u32 taglen; - char * tag; - - /* response */ - int resp_nops; - int toplevel_status; +struct nfs4_unlink { + struct nfs_server * server; + struct nfs_fh * fh; + struct qstr * name; + struct nfs4_change_info cinfo; /* NOT a pointer */ + struct nfs_fattr attrs; /* NOT a pointer */ }; #endif /* CONFIG_NFS_V4 */ @@ -698,9 +677,10 @@ struct nfs_rpc_ops { int version; /* Protocol version */ struct dentry_operations *dentry_ops; struct inode_operations *dir_inode_ops; + struct inode_operations *file_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, - struct nfs_fattr *); + struct nfs_fsinfo *); int (*getattr) (struct inode *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); diff -puN include/linux/sunrpc/xprt.h~CITI_NFS4_ALL include/linux/sunrpc/xprt.h --- linux-2.6.3/include/linux/sunrpc/xprt.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/xprt.h 2004-02-19 16:47:05.000000000 -0500 @@ -95,14 +95,15 @@ struct rpc_rqst { struct rpc_rqst * rq_next; /* free list */ int rq_cong; /* has incremented xprt->cong */ int rq_received; /* receive completed */ - u32 rq_seqno; /* gss seq no. used on req. */ +#define GSS_SEQNO_CACHE 4 + u32 rq_seqnos[GSS_SEQNO_CACHE]; + /* gss seq no.s used on req. */ struct list_head rq_list; struct xdr_buf rq_private_buf; /* The receive buffer * used in the softirq. */ - /* * For authentication (e.g. auth_des) */ @@ -155,6 +156,11 @@ struct rpc_xprt { stream : 1; /* TCP */ /* + * XID + */ + __u32 xid; /* Next XID value to use */ + + /* * State of TCP reply receive stuff */ u32 tcp_recm, /* Fragment header */ @@ -164,6 +170,11 @@ struct rpc_xprt { unsigned long tcp_copied, /* copied to request */ tcp_flags; /* + * Connection of sockets + */ + struct work_struct sock_connect; + unsigned short port; + /* * Disconnection of idle sockets */ struct work_struct task_cleanup; diff -puN net/sunrpc/xprt.c~CITI_NFS4_ALL net/sunrpc/xprt.c --- linux-2.6.3/net/sunrpc/xprt.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/xprt.c 2004-02-19 16:47:05.000000000 -0500 @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -77,6 +78,7 @@ #define XPRT_MAX_BACKOFF (8) #define XPRT_IDLE_TIMEOUT (5*60*HZ) +#define XPRT_MAX_RESVPORT (800) /* * Local functions @@ -87,7 +89,7 @@ static void xprt_disconnect(struct rpc_x static void xprt_connect_status(struct rpc_task *task); static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to); -static struct socket *xprt_create_socket(int, struct rpc_timeout *, int); +static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); static void xprt_bind_socket(struct rpc_xprt *, struct socket *); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); @@ -455,6 +457,68 @@ out_abort: spin_unlock(&xprt->sock_lock); } +static void +xprt_socket_connect(void *args) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)args; + struct socket *sock = xprt->sock; + int status = -EIO; + + if (xprt->shutdown) { + rpc_wake_up_status(&xprt->pending, -EIO); + return; + } + if (!xprt->addr.sin_port) + goto out_err; + + /* + * Start by resetting any existing state + */ + xprt_close(xprt); + sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); + if (sock == NULL) { + /* couldn't create socket or bind to reserved port; + * this is likely a permanent error, so cause an abort */ + goto out_err; + return; + } + xprt_bind_socket(xprt, sock); + xprt_sock_setbufsize(xprt); + + if (!xprt->stream) + goto out; + + /* + * Tell the socket layer to start connecting... + */ + status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, + sizeof(xprt->addr), O_NONBLOCK); + dprintk("RPC: %p connect status %d connected %d sock state %d\n", + xprt, -status, xprt_connected(xprt), sock->sk->sk_state); + if (status >= 0) + goto out; + switch (status) { + case -EINPROGRESS: + case -EALREADY: + break; + default: + goto out_err; + } +out: + spin_lock_bh(&xprt->sock_lock); + if (xprt->snd_task) + rpc_wake_up_task(xprt->snd_task); + spin_unlock_bh(&xprt->sock_lock); + return; +out_err: + spin_lock_bh(&xprt->sock_lock); + if (xprt->snd_task) { + xprt->snd_task->tk_status = status; + rpc_wake_up_task(xprt->snd_task); + } + spin_unlock_bh(&xprt->sock_lock); +} + /* * Attempt to connect a TCP socket. * @@ -463,9 +527,6 @@ void xprt_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - struct socket *sock = xprt->sock; - struct sock *inet; - int status; dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); @@ -486,79 +547,9 @@ xprt_connect(struct rpc_task *task) if (task->tk_rqstp) task->tk_rqstp->rq_bytes_sent = 0; - /* - * We're here because the xprt was marked disconnected. - * Start by resetting any existing state. - */ - xprt_close(xprt); - if (!(sock = xprt_create_socket(xprt->prot, &xprt->timeout, xprt->resvport))) { - /* couldn't create socket or bind to reserved port; - * this is likely a permanent error, so cause an abort */ - task->tk_status = -EIO; - goto out_write; - } - xprt_bind_socket(xprt, sock); - xprt_sock_setbufsize(xprt); - - if (!xprt->stream) - goto out_write; - - inet = sock->sk; - - /* - * Tell the socket layer to start connecting... - */ - status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, - sizeof(xprt->addr), O_NONBLOCK); - dprintk("RPC: %4d connect status %d connected %d sock state %d\n", - task->tk_pid, -status, xprt_connected(xprt), inet->sk_state); - - if (status >= 0) - return; - - switch (status) { - case -EINPROGRESS: - case -EALREADY: - /* Protect against TCP socket state changes */ - lock_sock(inet); - if (inet->sk_state != TCP_ESTABLISHED) { - dprintk("RPC: %4d waiting for connection\n", - task->tk_pid); - task->tk_timeout = RPC_CONNECT_TIMEOUT; - /* if the socket is already closing, delay briefly */ - if ((1 << inet->sk_state) & - ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) - task->tk_timeout = RPC_REESTABLISH_TIMEOUT; - rpc_sleep_on(&xprt->pending, task, xprt_connect_status, - NULL); - } - release_sock(inet); - break; - case -ECONNREFUSED: - case -ECONNRESET: - case -ENOTCONN: - if (!RPC_IS_SOFT(task)) { - rpc_delay(task, RPC_REESTABLISH_TIMEOUT); - task->tk_status = -ENOTCONN; - break; - } - default: - /* Report myriad other possible returns. If this file - * system is soft mounted, just error out, like Solaris. */ - if (RPC_IS_SOFT(task)) { - printk(KERN_WARNING - "RPC: error %d connecting to server %s, exiting\n", - -status, task->tk_client->cl_server); - task->tk_status = -EIO; - goto out_write; - } - printk(KERN_WARNING "RPC: error %d connecting to server %s\n", - -status, task->tk_client->cl_server); - /* This will prevent anybody else from reconnecting */ - rpc_delay(task, RPC_REESTABLISH_TIMEOUT); - task->tk_status = status; - break; - } + task->tk_timeout = RPC_CONNECT_TIMEOUT; + rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); + schedule_work(&xprt->sock_connect); return; out_write: xprt_release_write(xprt, task); @@ -583,6 +574,8 @@ xprt_connect_status(struct rpc_task *tas task->tk_status = -EIO; switch (task->tk_status) { + case -ECONNREFUSED: + case -ECONNRESET: case -ENOTCONN: rpc_delay(task, RPC_REESTABLISH_TIMEOUT); return; @@ -1333,22 +1326,14 @@ do_xprt_reserve(struct rpc_task *task) /* * Allocate a 'unique' XID */ -static u32 -xprt_alloc_xid(void) +static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) +{ + return xprt->xid++; +} + +static inline void xprt_init_xid(struct rpc_xprt *xprt) { - static spinlock_t xid_lock = SPIN_LOCK_UNLOCKED; - static int need_init = 1; - static u32 xid; - u32 ret; - - spin_lock(&xid_lock); - if (unlikely(need_init)) { - xid = get_seconds() << 12; - need_init = 0; - } - ret = xid++; - spin_unlock(&xid_lock); - return ret; + get_random_bytes(&xprt->xid, sizeof(xprt->xid)); } /* @@ -1362,7 +1347,8 @@ xprt_request_init(struct rpc_task *task, req->rq_timeout = xprt->timeout; req->rq_task = task; req->rq_xprt = xprt; - req->rq_xid = xprt_alloc_xid(); + req->rq_xid = xprt_alloc_xid(xprt); + memset(req->rq_seqnos, 0, sizeof(req->rq_seqnos)); INIT_LIST_HEAD(&req->rq_list); dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, req, req->rq_xid); @@ -1457,11 +1443,13 @@ xprt_setup(int proto, struct sockaddr_in init_waitqueue_head(&xprt->cong_wait); INIT_LIST_HEAD(&xprt->recv); + INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); init_timer(&xprt->timer); xprt->timer.function = xprt_init_autodisconnect; xprt->timer.data = (unsigned long) xprt; xprt->last_used = jiffies; + xprt->port = XPRT_MAX_RESVPORT; /* Set timeout parameters */ if (to) { @@ -1481,6 +1469,8 @@ xprt_setup(int proto, struct sockaddr_in req->rq_next = NULL; xprt->free = xprt->slot; + xprt_init_xid(xprt); + /* Check whether we want to use a reserved port */ xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; @@ -1493,30 +1483,28 @@ xprt_setup(int proto, struct sockaddr_in * Bind to a reserved port */ static inline int -xprt_bindresvport(struct socket *sock) +xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) { - struct sockaddr_in myaddr; + struct sockaddr_in myaddr = { + .sin_family = AF_INET, + }; int err, port; - kernel_cap_t saved_cap = current->cap_effective; - /* Override capabilities. - * They were checked in xprt_create_proto i.e. at mount time - */ - cap_raise(current->cap_effective, CAP_NET_BIND_SERVICE); - - memset(&myaddr, 0, sizeof(myaddr)); - myaddr.sin_family = AF_INET; - port = 800; + /* Were we already bound to a given port? Try to reuse it */ + port = xprt->port; do { myaddr.sin_port = htons(port); err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); - } while (err == -EADDRINUSE && --port > 0); - current->cap_effective = saved_cap; - - if (err < 0) - printk("RPC: Can't bind to reserved port (%d).\n", -err); + if (err == 0) { + xprt->port = port; + return 0; + } + if (--port == 0) + port = XPRT_MAX_RESVPORT; + } while (err == -EADDRINUSE && port != xprt->port); + printk("RPC: Can't bind to reserved port (%d).\n", -err); return err; } @@ -1580,7 +1568,7 @@ xprt_sock_setbufsize(struct rpc_xprt *xp * and connect stream sockets. */ static struct socket * -xprt_create_socket(int proto, struct rpc_timeout *to, int resvport) +xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) { struct socket *sock; int type, err; @@ -1596,7 +1584,7 @@ xprt_create_socket(int proto, struct rpc } /* If the caller has the capability, bind to a reserved port */ - if (resvport && xprt_bindresvport(sock) < 0) { + if (resvport && xprt_bindresvport(xprt, sock) < 0) { printk("RPC: can't bind to reserved port.\n"); goto failed; } diff -puN net/sunrpc/cache.c~CITI_NFS4_ALL net/sunrpc/cache.c --- linux-2.6.3/net/sunrpc/cache.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/cache.c 2004-02-19 16:47:03.000000000 -0500 @@ -325,6 +325,7 @@ int cache_clean(void) if (current_detail && current_index < current_detail->hash_size) { struct cache_head *ch, **cp; + struct cache_detail *d; write_lock(¤t_detail->hash_lock); @@ -354,12 +355,14 @@ int cache_clean(void) rv = 1; } write_unlock(¤t_detail->hash_lock); - if (ch) - current_detail->cache_put(ch, current_detail); - else + d = current_detail; + if (!ch) current_index ++; - } - spin_unlock(&cache_list_lock); + spin_unlock(&cache_list_lock); + if (ch) + d->cache_put(ch, d); + } else + spin_unlock(&cache_list_lock); return rv; } diff -puN include/linux/sunrpc/cache.h~CITI_NFS4_ALL include/linux/sunrpc/cache.h --- linux-2.6.3/include/linux/sunrpc/cache.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/cache.h 2004-02-19 16:47:03.000000000 -0500 @@ -132,12 +132,14 @@ struct cache_deferred_req { * If "set" == 0 : * If an entry is found, it is returned * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 : + * If "set" == 1 and INPLACE == 0 : * If no entry is found a new one is inserted with data from "template" * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE * If a CACHE_VALID entry is found, a new entry is swapped in with data * from "template" - * If set == 2, we UPDATE, but don't swap. i.e. update in place + * If set == 1, and INPLACE == 1 : + * As above, except that if a CACHE_VALID entry is found, we UPDATE in place + * instead of swapping in a new entry. * * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not * run but insteead CACHE_NEGATIVE is set in any new item. @@ -164,8 +166,8 @@ RTN *FNAME ARGS \ RTN *tmp, *new=NULL; \ struct cache_head **hp, **head; \ SETUP; \ - retry: \ head = &(DETAIL)->hash_table[HASHFN]; \ + retry: \ if (set||new) write_lock(&(DETAIL)->hash_lock); \ else read_lock(&(DETAIL)->hash_lock); \ for(hp=head; *hp != NULL; hp = &tmp->MEMBER.next) { \ @@ -175,6 +177,8 @@ RTN *FNAME ARGS \ if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ break; \ \ + if (new) \ + {INIT;} \ cache_get(&tmp->MEMBER); \ if (set) { \ if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ @@ -203,6 +207,7 @@ RTN *FNAME ARGS \ } \ /* Didn't find anything */ \ if (new) { \ + INIT; \ new->MEMBER.next = *head; \ *head = &new->MEMBER; \ (DETAIL)->entries ++; \ @@ -224,8 +229,6 @@ RTN *FNAME ARGS \ if (new) { \ cache_init(&new->MEMBER); \ cache_get(&new->MEMBER); \ - INIT; \ - tmp = new; \ goto retry; \ } \ return NULL; \ diff -puN net/sunrpc/svcauth.c~CITI_NFS4_ALL net/sunrpc/svcauth.c --- linux-2.6.3/net/sunrpc/svcauth.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/svcauth.c 2004-02-19 16:47:04.000000000 -0500 @@ -150,7 +150,10 @@ DefineCacheLookup(struct auth_domain, &auth_domain_cache, auth_domain_hash(item), auth_domain_match(tmp, item), - kfree(new); if(!set) return NULL; + kfree(new); if(!set) { + write_unlock(&auth_domain_cache.hash_lock); + return NULL; + } new=item; atomic_inc(&new->h.refcnt), /* no update */, 0 /* no inplace updates */ diff -puN net/sunrpc/svcauth_unix.c~CITI_NFS4_ALL net/sunrpc/svcauth_unix.c --- linux-2.6.3/net/sunrpc/svcauth_unix.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/svcauth_unix.c 2004-02-19 16:47:03.000000000 -0500 @@ -119,7 +119,8 @@ static inline int ip_map_match(struct ip } static inline void ip_map_init(struct ip_map *new, struct ip_map *item) { - new->m_class = strdup(item->m_class); + new->m_class = item->m_class; + item->m_class = NULL; new->m_addr.s_addr = item->m_addr.s_addr; } static inline void ip_map_update(struct ip_map *new, struct ip_map *item) @@ -191,7 +192,9 @@ static int ip_map_parse(struct cache_det } else dom = NULL; - ipm.m_class = class; + ipm.m_class = strdup(class); + if (ipm.m_class == NULL) + return -ENOMEM; ipm.m_addr.s_addr = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); ipm.h.flags = 0; @@ -207,6 +210,7 @@ static int ip_map_parse(struct cache_det ip_map_put(&ipmp->h, &ip_map_cache); if (dom) auth_domain_put(dom); + if (ipm.m_class) kfree(ipm.m_class); if (!ipmp) return -ENOMEM; cache_flush(); @@ -266,7 +270,9 @@ int auth_unix_add_addr(struct in_addr ad if (dom->flavour != RPC_AUTH_UNIX) return -EINVAL; udom = container_of(dom, struct unix_domain, h); - ip.m_class = "nfsd"; + ip.m_class = strdup("nfsd"); + if (!ip.m_class) + return -ENOMEM; ip.m_addr = addr; ip.m_client = udom; ip.m_add_change = udom->addr_changes+1; @@ -274,6 +280,7 @@ int auth_unix_add_addr(struct in_addr ad ip.h.expiry_time = NEVER; ipmp = ip_map_lookup(&ip, 1); + if (ip.m_class) kfree(ip.m_class); if (ipmp) { ip_map_put(&ipmp->h, &ip_map_cache); return 0; diff -puN fs/nfsd/stats.c~CITI_NFS4_ALL fs/nfsd/stats.c --- linux-2.6.3/fs/nfsd/stats.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/stats.c 2004-02-19 16:47:04.000000000 -0500 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -39,14 +40,11 @@ struct svc_stat nfsd_svcstats = { .program = &nfsd_program, }; -static int -nfsd_proc_read(char *buffer, char **start, off_t offset, int count, - int *eof, void *data) +static int nfsd_proc_show(struct seq_file *seq, void *v) { - int len; - int i; + int i; - len = sprintf(buffer, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", + seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", nfsdstats.rchits, nfsdstats.rcmisses, nfsdstats.rcnocache, @@ -58,57 +56,42 @@ nfsd_proc_read(char *buffer, char **star nfsdstats.io_read, nfsdstats.io_write); /* thread usage: */ - len += sprintf(buffer+len, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); + seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); for (i=0; i<10; i++) { unsigned int jifs = nfsdstats.th_usage[i]; unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ; - len += sprintf(buffer+len, " %u.%03u", sec, msec); + seq_printf(seq, " %u.%03u", sec, msec); } /* newline and ra-cache */ - len += sprintf(buffer+len, "\nra %u", nfsdstats.ra_size); + seq_printf(seq, "\nra %u", nfsdstats.ra_size); for (i=0; i<11; i++) - len += sprintf(buffer+len, " %u", nfsdstats.ra_depth[i]); - len += sprintf(buffer+len, "\n"); + seq_printf(seq, " %u", nfsdstats.ra_depth[i]); + seq_putc(seq, '\n'); + /* show my rpc info */ + svc_seq_show(seq, &nfsd_svcstats); - /* Assume we haven't hit EOF yet. Will be set by svc_proc_read. */ - *eof = 0; - - /* - * Append generic nfsd RPC statistics if there's room for it. - */ - if (len <= offset) { - len = svc_proc_read(buffer, start, offset - len, count, - eof, data); - return len; - } - - if (len < count) { - len += svc_proc_read(buffer + len, start, 0, count - len, - eof, data); - } - - if (offset >= len) { - *start = buffer; - return 0; - } + return 0; +} - *start = buffer + offset; - if ((len -= offset) > count) - return count; - return len; +static int nfsd_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, nfsd_proc_show, NULL); } +static struct file_operations nfsd_proc_fops = { + .owner = THIS_MODULE, + .open = nfsd_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + void nfsd_stat_init(void) { - struct proc_dir_entry *ent; - - if ((ent = svc_proc_register(&nfsd_svcstats)) != 0) { - ent->read_proc = nfsd_proc_read; - ent->owner = THIS_MODULE; - } + svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); } void diff -puN include/linux/sunrpc/stats.h~CITI_NFS4_ALL include/linux/sunrpc/stats.h --- linux-2.6.3/include/linux/sunrpc/stats.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/stats.h 2004-02-19 16:47:04.000000000 -0500 @@ -48,14 +48,13 @@ void rpc_modcount(struct inode *, int) #ifdef CONFIG_PROC_FS struct proc_dir_entry * rpc_proc_register(struct rpc_stat *); void rpc_proc_unregister(const char *); -int rpc_proc_read(char *, char **, off_t, int, - int *, void *); void rpc_proc_zero(struct rpc_program *); -struct proc_dir_entry * svc_proc_register(struct svc_stat *); +struct proc_dir_entry * svc_proc_register(struct svc_stat *, + struct file_operations *); void svc_proc_unregister(const char *); -int svc_proc_read(char *, char **, off_t, int, - int *, void *); -void svc_proc_zero(struct svc_program *); + +void svc_seq_show(struct seq_file *, + const struct svc_stat *); extern struct proc_dir_entry *proc_net_rpc; @@ -63,13 +62,14 @@ extern struct proc_dir_entry *proc_net_r static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; } static inline void rpc_proc_unregister(const char *p) {} -static inline int rpc_proc_read(char *a, char **b, off_t c, int d, int *e, void *f) { return 0; } static inline void rpc_proc_zero(struct rpc_program *p) {} -static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s) { return NULL; } +static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s, + struct file_operations *f) { return NULL; } static inline void svc_proc_unregister(const char *p) {} -static inline int svc_proc_read(char *a, char **b, off_t c, int d, int *e, void *f) { return 0; } -static inline void svc_proc_zero(struct svc_program *p) {} + +static inline void svc_seq_show(struct seq_file *seq, + const struct svc_stat *st) {} #define proc_net_rpc NULL diff -puN net/sunrpc/stats.c~CITI_NFS4_ALL net/sunrpc/stats.c --- linux-2.6.3/net/sunrpc/stats.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/stats.c 2004-02-19 16:47:04.000000000 -0500 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -28,70 +29,66 @@ struct proc_dir_entry *proc_net_rpc = NU /* * Get RPC client stats */ -int -rpc_proc_read(char *buffer, char **start, off_t offset, int count, - int *eof, void *data) -{ - struct rpc_stat *statp = (struct rpc_stat *) data; - struct rpc_program *prog = statp->program; - struct rpc_version *vers; - int len, i, j; +static int rpc_proc_show(struct seq_file *seq, void *v) { + const struct rpc_stat *statp = seq->private; + const struct rpc_program *prog = statp->program; + int i, j; - len = sprintf(buffer, + seq_printf(seq, "net %d %d %d %d\n", statp->netcnt, statp->netudpcnt, statp->nettcpcnt, statp->nettcpconn); - len += sprintf(buffer + len, + seq_printf(seq, "rpc %d %d %d\n", statp->rpccnt, statp->rpcretrans, statp->rpcauthrefresh); for (i = 0; i < prog->nrvers; i++) { - if (!(vers = prog->version[i])) + const struct rpc_version *vers = prog->version[i]; + if (!vers) continue; - len += sprintf(buffer + len, "proc%d %d", + seq_printf(seq, "proc%d %d", vers->number, vers->nrprocs); for (j = 0; j < vers->nrprocs; j++) - len += sprintf(buffer + len, " %d", + seq_printf(seq, " %d", vers->procs[j].p_count); - buffer[len++] = '\n'; + seq_putc(seq, '\n'); } + return 0; +} - if (offset >= len) { - *start = buffer; - *eof = 1; - return 0; - } - *start = buffer + offset; - if ((len -= offset) > count) - return count; - *eof = 1; - return len; +static int rpc_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, rpc_proc_show, PDE(inode)->data); } +static struct file_operations rpc_proc_fops = { + .owner = THIS_MODULE, + .open = rpc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * Get RPC server stats */ -int -svc_proc_read(char *buffer, char **start, off_t offset, int count, - int *eof, void *data) -{ - struct svc_stat *statp = (struct svc_stat *) data; - struct svc_program *prog = statp->program; - struct svc_procedure *proc; - struct svc_version *vers; - int len, i, j; +void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { + const struct svc_program *prog = statp->program; + const struct svc_procedure *proc; + const struct svc_version *vers; + int i, j; - len = sprintf(buffer, + seq_printf(seq, "net %d %d %d %d\n", statp->netcnt, statp->netudpcnt, statp->nettcpcnt, statp->nettcpconn); - len += sprintf(buffer + len, + seq_printf(seq, "rpc %d %d %d %d %d\n", statp->rpccnt, statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt, @@ -102,41 +99,36 @@ svc_proc_read(char *buffer, char **start for (i = 0; i < prog->pg_nvers; i++) { if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc)) continue; - len += sprintf(buffer + len, "proc%d %d", i, vers->vs_nproc); + seq_printf(seq, "proc%d %d", i, vers->vs_nproc); for (j = 0; j < vers->vs_nproc; j++, proc++) - len += sprintf(buffer + len, " %d", proc->pc_count); - buffer[len++] = '\n'; + seq_printf(seq, " %d", proc->pc_count); + seq_putc(seq, '\n'); } - - if (offset >= len) { - *start = buffer; - *eof = 1; - return 0; - } - *start = buffer + offset; - if ((len -= offset) > count) - return count; - *eof = 1; - return len; } /* * Register/unregister RPC proc files */ static inline struct proc_dir_entry * -do_register(const char *name, void *data, int issvc) +do_register(const char *name, void *data, struct file_operations *fops) { + struct proc_dir_entry *ent; + rpc_proc_init(); dprintk("RPC: registering /proc/net/rpc/%s\n", name); - return create_proc_read_entry(name, 0, proc_net_rpc, - issvc? svc_proc_read : rpc_proc_read, - data); + + ent = create_proc_entry(name, 0, proc_net_rpc); + if (ent) { + ent->proc_fops = fops; + ent->data = data; + } + return ent; } struct proc_dir_entry * rpc_proc_register(struct rpc_stat *statp) { - return do_register(statp->program->name, statp, 0); + return do_register(statp->program->name, statp, &rpc_proc_fops); } void @@ -146,9 +138,9 @@ rpc_proc_unregister(const char *name) } struct proc_dir_entry * -svc_proc_register(struct svc_stat *statp) +svc_proc_register(struct svc_stat *statp, struct file_operations *fops) { - return do_register(statp->program->pg_name, statp, 1); + return do_register(statp->program->pg_name, statp, fops); } void @@ -163,7 +155,7 @@ rpc_proc_init(void) dprintk("RPC: registering /proc/net/rpc\n"); if (!proc_net_rpc) { struct proc_dir_entry *ent; - ent = proc_mkdir("net/rpc", 0); + ent = proc_mkdir("rpc", proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_rpc = ent; diff -puN net/sunrpc/sunrpc_syms.c~CITI_NFS4_ALL net/sunrpc/sunrpc_syms.c --- linux-2.6.3/net/sunrpc/sunrpc_syms.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/sunrpc_syms.c 2004-02-19 16:47:04.000000000 -0500 @@ -85,15 +85,16 @@ EXPORT_SYMBOL(svc_recv); EXPORT_SYMBOL(svc_wake_up); EXPORT_SYMBOL(svc_makesock); EXPORT_SYMBOL(svc_reserve); +EXPORT_SYMBOL(svc_auth_register); +EXPORT_SYMBOL(auth_domain_lookup); /* RPC statistics */ #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(rpc_proc_register); EXPORT_SYMBOL(rpc_proc_unregister); -EXPORT_SYMBOL(rpc_proc_read); EXPORT_SYMBOL(svc_proc_register); EXPORT_SYMBOL(svc_proc_unregister); -EXPORT_SYMBOL(svc_proc_read); +EXPORT_SYMBOL(svc_seq_show); #endif /* caching... */ diff -puN net/sunrpc/auth_gss/gss_krb5_seal.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_seal.c --- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_seal.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_seal.c 2004-02-19 16:47:07.000000000 -0500 @@ -101,12 +101,12 @@ krb5_make_token(struct krb5_ctx *ctx, in checksum_type = CKSUMTYPE_RSA_MD5; break; default: - dprintk("RPC: gss_krb5_seal: ctx->signalg %d not" + dprintk("RPC: gss_krb5_seal: ctx->signalg %d not" " supported\n", ctx->signalg); goto out_err; } if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) { - dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n", + dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n", ctx->sealalg); goto out_err; } @@ -151,7 +151,7 @@ krb5_make_token(struct krb5_ctx *ctx, in md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, KRB5_CKSUM_LENGTH); - dprintk("make_seal_token: cksum data: \n"); + dprintk("RPC: make_seal_token: cksum data: \n"); print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); break; default: @@ -169,8 +169,5 @@ krb5_make_token(struct krb5_ctx *ctx, in return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); out_err: if (md5cksum.data) kfree(md5cksum.data); - if (token->data) kfree(token->data); - token->data = 0; - token->len = 0; return GSS_S_FAILURE; } diff -puN include/linux/sunrpc/auth_gss.h~CITI_NFS4_ALL include/linux/sunrpc/auth_gss.h --- linux-2.6.3/include/linux/sunrpc/auth_gss.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/auth_gss.h 2004-02-19 16:47:04.000000000 -0500 @@ -62,8 +62,6 @@ struct rpc_gss_init_res { struct xdr_netobj gr_token; /* token */ }; -#define GSS_SEQ_WIN 5 - /* The gss_cl_ctx struct holds all the information the rpcsec_gss client * code needs to know about a single security context. In particular, * gc_gss_ctx is the context handle that is used to do gss-api calls, while diff -puN include/linux/sunrpc/gss_api.h~CITI_NFS4_ALL include/linux/sunrpc/gss_api.h --- linux-2.6.3/include/linux/sunrpc/gss_api.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/gss_api.h 2004-02-19 16:47:04.000000000 -0500 @@ -120,6 +120,9 @@ int gss_mech_unregister_all(void); * reference count. */ struct gss_api_mech * gss_mech_get_by_OID(struct xdr_netobj *); +/* Similar, but get by name like "krb5", "spkm", etc., instead of OID. */ +struct gss_api_mech *gss_mech_get_by_name(char *); + /* Just increments the mechanism's reference count and returns its input: */ struct gss_api_mech * gss_mech_get(struct gss_api_mech *); diff -puN /dev/null include/linux/sunrpc/svcauth_gss.h --- /dev/null 2004-01-26 19:20:21.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/svcauth_gss.h 2004-02-19 16:47:04.000000000 -0500 @@ -0,0 +1,35 @@ +/* + * linux/include/linux/svcauth_gss.h + * + * Bruce Fields + * Copyright (c) 2002 The Regents of the Unviersity of Michigan + * + * Id: linux-2.6.3-CITI_NFS4_ALL.patch,v 1.2.4.1 2004/03/17 23:55:23 adilger Exp $ + * + */ + +#ifndef _LINUX_SUNRPC_SVCAUTH_GSS_H +#define _LINUX_SUNRPC_SVCAUTH_GSS_H + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include + +int gss_svc_init(void); +int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); + + +struct gss_svc_data { + /* decoded gss client cred: */ + struct rpc_gss_wire_cred clcred; + /* pointer to the beginning of the procedure-specific results, which + * may be encrypted/checksummed in svcauth_gss_release: */ + u32 *body_start; +}; + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff -puN include/linux/sunrpc/svcauth.h~CITI_NFS4_ALL include/linux/sunrpc/svcauth.h --- linux-2.6.3/include/linux/sunrpc/svcauth.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/svcauth.h 2004-02-19 16:47:04.000000000 -0500 @@ -65,6 +65,10 @@ struct auth_domain { * GARBAGE - rpc garbage_args error * SYSERR - rpc system_err error * DENIED - authp holds reason for denial. + * COMPLETE - the reply is encoded already and ready to be sent; no + * further processing is necessary. (This is used for processing + * null procedure calls which are used to set up encryption + * contexts.) * * accept is passed the proc number so that it can accept NULL rpc requests * even if it cannot authenticate the client (as is sometimes appropriate). @@ -97,6 +101,7 @@ extern struct auth_ops *authtab[RPC_AUTH #define SVC_DROP 6 #define SVC_DENIED 7 #define SVC_PENDING 8 +#define SVC_COMPLETE 9 extern int svc_authenticate(struct svc_rqst *rqstp, u32 *authp); diff -puN include/linux/sunrpc/svc.h~CITI_NFS4_ALL include/linux/sunrpc/svc.h --- linux-2.6.3/include/linux/sunrpc/svc.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/svc.h 2004-02-19 16:47:04.000000000 -0500 @@ -135,6 +135,7 @@ struct svc_rqst { void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ + void * rq_auth_data; /* flavor-specific data */ int rq_reserved; /* space on socket outq * reserved for this request diff -puN net/sunrpc/auth_gss/auth_gss.c~CITI_NFS4_ALL net/sunrpc/auth_gss/auth_gss.c --- linux-2.6.3/net/sunrpc/auth_gss/auth_gss.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/auth_gss.c 2004-02-19 16:47:07.000000000 -0500 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -279,7 +280,7 @@ err_free_ctx: kfree(ctx); err: *gc = NULL; - dprintk("RPC: gss_parse_init_downcall returning %d\n", err); + dprintk("RPC: gss_parse_init_downcall returning %d\n", err); return err; } @@ -310,8 +311,10 @@ __gss_find_upcall(struct gss_auth *gss_a if (pos->uid != uid) continue; atomic_inc(&pos->count); + dprintk("RPC: gss_find_upcall found msg %p\n", pos); return pos; } + dprintk("RPC: gss_find_upcall found nothing\n"); return NULL; } @@ -349,6 +352,8 @@ gss_upcall(struct rpc_clnt *clnt, struct uid_t uid = cred->cr_uid; int res = 0; + dprintk("RPC: %4u gss_upcall for uid %u\n", task->tk_pid, uid); + retry: spin_lock(&gss_auth->lock); gss_msg = __gss_find_upcall(gss_auth, uid); @@ -357,8 +362,10 @@ retry: if (gss_new == NULL) { spin_unlock(&gss_auth->lock); gss_new = kmalloc(sizeof(*gss_new), GFP_KERNEL); - if (!gss_new) + if (!gss_new) { + dprintk("RPC: %4u gss_upcall -ENOMEM\n", task->tk_pid); return -ENOMEM; + } goto retry; } gss_msg = gss_new; @@ -388,10 +395,12 @@ retry: spin_unlock(&gss_auth->lock); } gss_release_msg(gss_msg); + dprintk("RPC: %4u gss_upcall for uid %u result %d", task->tk_pid, + uid, res); return res; out_sleep: - /* Sleep forever */ - task->tk_timeout = 0; + dprintk("RPC: %4u gss_upcall sleeping\n", task->tk_pid); + task->tk_timeout = 0; /* Sleep forever */ rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL); spin_unlock(&gss_auth->lock); if (gss_new) @@ -476,12 +485,13 @@ gss_pipe_downcall(struct file *filp, con } else spin_unlock(&gss_auth->lock); rpc_release_client(clnt); + dprintk("RPC: gss_pipe_downcall returning length %u\n", mlen); return mlen; err: if (ctx) gss_destroy_ctx(ctx); rpc_release_client(clnt); - dprintk("RPC: gss_pipe_downcall returning %d\n", err); + dprintk("RPC: gss_pipe_downcall returning %d\n", err); return err; } @@ -519,6 +529,8 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg static unsigned long ratelimit; if (msg->errno < 0) { + dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n", + gss_msg); atomic_inc(&gss_msg->count); gss_unhash_msg(gss_msg); if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) { @@ -543,7 +555,8 @@ gss_create(struct rpc_clnt *clnt, rpc_au struct gss_auth *gss_auth; struct rpc_auth * auth; - dprintk("RPC: creating GSS authenticator for client %p\n",clnt); + dprintk("RPC: creating GSS authenticator for client %p\n",clnt); + if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; gss_auth->mech = gss_pseudoflavor_to_mech(flavor); @@ -581,7 +594,8 @@ static void gss_destroy(struct rpc_auth *auth) { struct gss_auth *gss_auth; - dprintk("RPC: destroying GSS authenticator %p flavor %d\n", + + dprintk("RPC: destroying GSS authenticator %p flavor %d\n", auth, auth->au_flavor); gss_auth = container_of(auth, struct gss_auth, rpc_auth); @@ -596,8 +610,7 @@ gss_destroy(struct rpc_auth *auth) static void gss_destroy_ctx(struct gss_cl_ctx *ctx) { - - dprintk("RPC: gss_destroy_ctx\n"); + dprintk("RPC: gss_destroy_ctx\n"); if (ctx->gc_gss_ctx) gss_delete_sec_context(&ctx->gc_gss_ctx); @@ -616,7 +629,7 @@ gss_destroy_cred(struct rpc_cred *rc) { struct gss_cred *cred = (struct gss_cred *)rc; - dprintk("RPC: gss_destroy_cred \n"); + dprintk("RPC: gss_destroy_cred \n"); if (cred->gc_ctx) gss_put_ctx(cred->gc_ctx); @@ -628,7 +641,7 @@ gss_create_cred(struct rpc_auth *auth, s { struct gss_cred *cred = NULL; - dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", + dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) @@ -648,7 +661,7 @@ gss_create_cred(struct rpc_auth *auth, s return (struct rpc_cred *) cred; out_err: - dprintk("RPC: gss_create_cred failed\n"); + dprintk("RPC: gss_create_cred failed\n"); if (cred) gss_destroy_cred((struct rpc_cred *)cred); return NULL; } @@ -659,6 +672,15 @@ gss_match(struct auth_cred *acred, struc return (rc->cr_uid == acred->uid); } +static void +shift_seqnos(u32 *seqnos) +{ + int i; + + for (i=1; i < GSS_SEQNO_CACHE; i++) + seqnos[i] = seqnos[i-1]; +} + /* * Marshal credentials. * Maybe we should keep a cached credential for performance reasons. @@ -678,24 +700,25 @@ gss_marshal(struct rpc_task *task, u32 * struct xdr_buf verf_buf; u32 service; - dprintk("RPC: gss_marshal\n"); + dprintk("RPC: %4u gss_marshal\n", task->tk_pid); *p++ = htonl(RPC_AUTH_GSS); cred_len = p++; service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); if (service == 0) { - dprintk("Bad pseudoflavor %d in gss_marshal\n", - gss_cred->gc_flavor); + dprintk("RPC: %4u Bad pseudoflavor %d in gss_marshal\n", + task->tk_pid, gss_cred->gc_flavor); goto out_put_ctx; } + shift_seqnos(req->rq_seqnos); spin_lock(&ctx->gc_seq_lock); - req->rq_seqno = ctx->gc_seq++; + req->rq_seqnos[0] = ctx->gc_seq++; spin_unlock(&ctx->gc_seq_lock); *p++ = htonl((u32) RPC_GSS_VERSION); *p++ = htonl((u32) ctx->gc_proc); - *p++ = htonl((u32) req->rq_seqno); + *p++ = htonl((u32) req->rq_seqnos[0]); *p++ = htonl((u32) service); p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); *cred_len = htonl((p - (cred_len + 1)) << 2); @@ -745,6 +768,32 @@ gss_refresh(struct rpc_task *task) return 0; } +static int +verify_checksum(struct gss_ctx *ctx, struct xdr_netobj *mic, u32 *seqnos) +{ + u32 seq, qop_state; + struct xdr_buf verf_buf; + struct iovec iov; + int i; + + for (i=0; i < GSS_SEQNO_CACHE; i++) { + if (i && !seqnos[i]) + goto fail; + seq = htonl(seqnos[i]); + iov.iov_base = &seq; + iov.iov_len = sizeof(seq); + xdr_buf_from_iov(&iov, &verf_buf); + if (!gss_verify_mic(ctx, &verf_buf, mic, &qop_state)) + goto success; + } +fail: + return -1; +success: + /* So unwrap knows which seqno we used: */ + seqnos[0] = seqnos[i]; + return 0; +} + static u32 * gss_validate(struct rpc_task *task, u32 *p) { @@ -752,28 +801,21 @@ gss_validate(struct rpc_task *task, u32 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - u32 seq, qop_state; - struct iovec iov; - struct xdr_buf verf_buf; struct xdr_netobj mic; u32 flav,len; u32 service; - dprintk("RPC: gss_validate\n"); + dprintk("RPC: %4u gss_validate\n", task->tk_pid); flav = ntohl(*p++); if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) goto out_bad; if (flav != RPC_AUTH_GSS) goto out_bad; - seq = htonl(task->tk_rqstp->rq_seqno); - iov.iov_base = &seq; - iov.iov_len = sizeof(seq); - xdr_buf_from_iov(&iov, &verf_buf); + mic.data = (u8 *)p; mic.len = len; - - if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state)) + if (verify_checksum(ctx->gc_gss_ctx, &mic, task->tk_rqstp->rq_seqnos)) goto out_bad; service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); switch (service) { @@ -789,9 +831,12 @@ gss_validate(struct rpc_task *task, u32 goto out_bad; } gss_put_ctx(ctx); + dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", + task->tk_pid); return p + XDR_QUADLEN(len); out_bad: gss_put_ctx(ctx); + dprintk("RPC: %4u gss_validate failed.\n", task->tk_pid); return NULL; } @@ -814,7 +859,7 @@ gss_wrap_req(struct rpc_task *task, u32 offset, *q; struct iovec *iov; - dprintk("RPC: gss_wrap_body\n"); + dprintk("RPC: %4u gss_wrap_req\n", task->tk_pid); BUG_ON(!ctx); if (ctx->gc_proc != RPC_GSS_PROC_DATA) { /* The spec seems a little ambiguous here, but I think that not @@ -832,7 +877,7 @@ gss_wrap_req(struct rpc_task *task, integ_len = p++; offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; - *p++ = htonl(req->rq_seqno); + *p++ = htonl(req->rq_seqnos[0]); status = encode(rqstp, p, obj); if (status) @@ -871,7 +916,7 @@ gss_wrap_req(struct rpc_task *task, status = 0; out: gss_put_ctx(ctx); - dprintk("RPC: gss_wrap_req returning %d\n", status); + dprintk("RPC: %4u gss_wrap_req returning %d\n", task->tk_pid, status); return status; } @@ -909,7 +954,7 @@ gss_unwrap_resp(struct rpc_task *task, mic_offset = integ_len + data_offset; if (mic_offset > rcv_buf->len) goto out; - if (ntohl(*p++) != req->rq_seqno) + if (ntohl(*p++) != req->rq_seqnos[0]) goto out; if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, @@ -932,7 +977,8 @@ out_decode: status = decode(rqstp, p, obj); out: gss_put_ctx(ctx); - dprintk("RPC: gss_unwrap_resp returning %d\n", status); + dprintk("RPC: %4u gss_unwrap_resp returning %d\n", task->tk_pid, + status); return status; } @@ -972,6 +1018,15 @@ static int __init init_rpcsec_gss(void) int err = 0; err = rpcauth_register(&authgss_ops); + if (err) + goto out; + err = gss_svc_init(); + if (err) + goto out_unregister; + return 0; +out_unregister: + rpcauth_unregister(&authgss_ops); +out: return err; } diff -puN net/sunrpc/auth_gss/gss_krb5_mech.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_mech.c --- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_mech.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_mech.c 2004-02-19 16:47:15.000000000 -0500 @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include #include @@ -98,7 +100,7 @@ get_key(char **p, char *end, struct cryp alg_mode = CRYPTO_TFM_MODE_CBC; break; default: - dprintk("RPC: get_key: unsupported algorithm %d\n", alg); + dprintk("RPC: get_key: unsupported algorithm %d\n", alg); goto out_err_free_key; } if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) @@ -153,7 +155,7 @@ gss_import_sec_context_kerberos(struct x goto out_err_free_key2; ctx_id->internal_ctx_id = ctx; - dprintk("Succesfully imported new context.\n"); + dprintk("RPC: Succesfully imported new context.\n"); return 0; out_err_free_key2: @@ -195,7 +197,7 @@ gss_verify_mic_kerberos(struct gss_ctx if (!maj_stat && qop_state) *qstate = qop_state; - dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); + dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); return maj_stat; } @@ -209,7 +211,7 @@ gss_get_mic_kerberos(struct gss_ctx *ctx err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); - dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); + dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); return err; } @@ -232,6 +234,10 @@ static int __init init_kerberos_module(v gm = gss_mech_get_by_OID(&gss_mech_krb5_oid); gss_register_triple(RPC_AUTH_GSS_KRB5 , gm, 0, RPC_GSS_SVC_NONE); gss_register_triple(RPC_AUTH_GSS_KRB5I, gm, 0, RPC_GSS_SVC_INTEGRITY); + if (svcauth_gss_register_pseudoflavor(RPC_AUTH_GSS_KRB5, "krb5")) + printk("Failed to register %s with server!\n", "krb5"); + if (svcauth_gss_register_pseudoflavor(RPC_AUTH_GSS_KRB5I, "krb5i")) + printk("Failed to register %s with server!\n", "krb5i"); gss_mech_put(gm); return 0; } diff -puN net/sunrpc/auth_gss/gss_mech_switch.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_mech_switch.c --- linux-2.6.3/net/sunrpc/auth_gss/gss_mech_switch.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_mech_switch.c 2004-02-19 16:47:07.000000000 -0500 @@ -43,7 +43,6 @@ #include #include #include -#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -82,7 +81,7 @@ gss_mech_register(struct xdr_netobj * me spin_lock(®istered_mechs_lock); list_add(&gm->gm_list, ®istered_mechs); spin_unlock(®istered_mechs_lock); - dprintk("RPC: gss_mech_register: registered mechanism with oid:\n"); + dprintk("RPC: gss_mech_register: registered mechanism with oid:\n"); print_hexl((u32 *)mech_type->data, mech_type->len, 0); return 0; } @@ -94,11 +93,10 @@ do_gss_mech_unregister(struct gss_api_me list_del(&gm->gm_list); - dprintk("RPC: unregistered mechanism with oid:\n"); + dprintk("RPC: unregistered mechanism with oid:\n"); print_hexl((u32 *)gm->gm_oid.data, gm->gm_oid.len, 0); if (!gss_mech_put(gm)) { - dprintk("RPC: We just unregistered a gss_mechanism which" - " someone is still using.\n"); + dprintk("RPC: We just unregistered a gss_mechanism which someone is still using.\n"); return -1; } else { return 0; @@ -146,7 +144,7 @@ gss_mech_get_by_OID(struct xdr_netobj *m { struct gss_api_mech *pos, *gm = NULL; - dprintk("RPC: gss_mech_get_by_OID searching for mechanism with OID:\n"); + dprintk("RPC: gss_mech_get_by_OID searching for mechanism with OID:\n"); print_hexl((u32 *)mech_type->data, mech_type->len, 0); spin_lock(®istered_mechs_lock); list_for_each_entry(pos, ®istered_mechs, gm_list) { @@ -158,10 +156,27 @@ gss_mech_get_by_OID(struct xdr_netobj *m } } spin_unlock(®istered_mechs_lock); - dprintk("RPC: gss_mech_get_by_OID %s it\n", gm ? "found" : "didn't find"); + dprintk("RPC: gss_mech_get_by_OID %s it\n", gm ? "found" : "didn't find"); return gm; } +struct gss_api_mech * +gss_mech_get_by_name(char *name) +{ + struct gss_api_mech *pos, *gm = NULL; + + spin_lock(®istered_mechs_lock); + list_for_each_entry(pos, ®istered_mechs, gm_list) { + if (0 == strcmp(name, pos->gm_ops->name)) { + gm = gss_mech_get(pos); + break; + } + } + spin_unlock(®istered_mechs_lock); + return gm; + +} + int gss_mech_put(struct gss_api_mech * gm) { @@ -228,7 +243,8 @@ gss_verify_mic(struct gss_ctx *context_ u32 gss_delete_sec_context(struct gss_ctx **context_handle) { - dprintk("gss_delete_sec_context deleting %p\n",*context_handle); + dprintk("RPC: gss_delete_sec_context deleting %p\n", + *context_handle); if (!*context_handle) return(GSS_S_NO_CONTEXT); diff -puN net/sunrpc/auth_gss/Makefile~CITI_NFS4_ALL net/sunrpc/auth_gss/Makefile --- linux-2.6.3/net/sunrpc/auth_gss/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/Makefile 2004-02-19 16:47:04.000000000 -0500 @@ -5,7 +5,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o auth_rpcgss-objs := auth_gss.o gss_pseudoflavors.o gss_generic_token.o \ - sunrpcgss_syms.o gss_mech_switch.o + sunrpcgss_syms.o gss_mech_switch.o svcauth_gss.o obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o diff -puN net/sunrpc/auth_gss/sunrpcgss_syms.c~CITI_NFS4_ALL net/sunrpc/auth_gss/sunrpcgss_syms.c --- linux-2.6.3/net/sunrpc/auth_gss/sunrpcgss_syms.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/sunrpcgss_syms.c 2004-02-19 16:47:04.000000000 -0500 @@ -8,6 +8,7 @@ #include #include +#include #include /* sec_triples: */ @@ -17,6 +18,7 @@ EXPORT_SYMBOL(gss_cmp_triples); EXPORT_SYMBOL(gss_pseudoflavor_to_mechOID); EXPORT_SYMBOL(gss_pseudoflavor_supported); EXPORT_SYMBOL(gss_pseudoflavor_to_service); +EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor); /* registering gss mechanisms to the mech switching code: */ EXPORT_SYMBOL(gss_mech_register); diff -puN /dev/null net/sunrpc/auth_gss/svcauth_gss.c --- /dev/null 2004-01-26 19:20:21.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/svcauth_gss.c 2004-02-19 16:47:15.000000000 -0500 @@ -0,0 +1,1018 @@ +/* + * Neil Brown + * J. Bruce Fields + * Andy Adamson + * Dug Song + * + * RPCSEC_GSS server authentication. + * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078 + * (gssapi) + * + * The RPCSEC_GSS involves three stages: + * 1/ context creation + * 2/ data exchange + * 3/ context destruction + * + * Context creation is handled largely by upcalls to user-space. + * In particular, GSS_Accept_sec_context is handled by an upcall + * Data exchange is handled entirely within the kernel + * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel. + * Context destruction is handled in-kernel + * GSS_Delete_sec_context is in-kernel + * + * Context creation is initiated by a RPCSEC_GSS_INIT request arriving. + * The context handle and gss_token are used as a key into the rpcsec_init cache. + * The content of this cache includes some of the outputs of GSS_Accept_sec_context, + * being major_status, minor_status, context_handle, reply_token. + * These are sent back to the client. + * Sequence window management is handled by the kernel. The window size if currently + * a compile time constant. + * + * When user-space is happy that a context is established, it places an entry + * in the rpcsec_context cache. The key for this cache is the context_handle. + * The content includes: + * uid/gidlist - for determining access rights + * mechanism type + * mechanism specific information, such as a key + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +/* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests + * into replies. + * + * Key is context handle (\x if empty) and gss_token. + * Content is major_status minor_status (integers) context_handle, reply_token. + * + */ + +static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b) +{ + return a->len == b->len && 0 == memcmp(a->data, b->data, a->len); +} + +#define RSI_HASHBITS 6 +#define RSI_HASHMAX (1<in_handle.data); + kfree(rsii->in_token.data); + kfree(rsii->out_handle.data); + kfree(rsii->out_token.data); +} + +static void rsi_put(struct cache_head *item, struct cache_detail *cd) +{ + struct rsi *rsii = container_of(item, struct rsi, h); + if (cache_put(item, cd)) { + rsi_free(rsii); + kfree(rsii); + } +} + +static inline int rsi_hash(struct rsi *item) +{ + return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS) + ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS); +} + +static inline int rsi_match(struct rsi *item, struct rsi *tmp) +{ + return netobj_equal(&item->in_handle, &tmp->in_handle) + && netobj_equal(&item->in_token, &tmp->in_token); +} + +static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len) +{ + dst->len = len; + dst->data = (len ? kmalloc(len, GFP_KERNEL) : NULL); + if (dst->data) + memcpy(dst->data, src, len); + if (len && !dst->data) + return -ENOMEM; + return 0; +} + +static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src) +{ + return dup_to_netobj(dst, src->data, src->len); +} + +static inline void rsi_init(struct rsi *new, struct rsi *item) +{ + new->out_handle.data = NULL; + new->out_handle.len = 0; + new->out_token.data = NULL; + new->out_token.len = 0; + new->in_handle.len = item->in_handle.len; + new->in_handle.data = item->in_handle.data; + item->in_handle.len = 0; + item->in_handle.data = NULL; + new->in_token.len = item->in_token.len; + new->in_token.data = item->in_token.data; + item->in_token.len = 0; + item->in_token.data = NULL; + return; +} + +static inline void rsi_update(struct rsi *new, struct rsi *item) +{ + BUG_ON(new->out_handle.data || new->out_token.data); + new->out_handle.len = item->out_handle.len; + item->out_handle.len = 0; + new->out_token.len = item->out_token.len; + item->out_token.len = 0; + new->out_handle.data = item->out_handle.data; + item->out_handle.data = NULL; + new->out_token.data = item->out_token.data; + item->out_token.data = NULL; + + new->major_status = item->major_status; + new->minor_status = item->minor_status; +} + +static void rsi_request(struct cache_detail *cd, + struct cache_head *h, + char **bpp, int *blen) +{ + struct rsi *rsii = container_of(h, struct rsi, h); + + qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len); + qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len); + (*bpp)[-1] = '\n'; +} + + +static int rsi_parse(struct cache_detail *cd, + char *mesg, int mlen) +{ + /* context token expiry major minor context token */ + char *buf = mesg; + char *ep; + int len; + struct rsi rsii, *rsip = NULL; + time_t expiry; + int status = -EINVAL; + + memset(&rsii, 0, sizeof(rsii)); + /* handle */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) + goto out; + status = -ENOMEM; + if (dup_to_netobj(&rsii.in_handle, buf, len)) + goto out; + + /* token */ + len = qword_get(&mesg, buf, mlen); + status = -EINVAL; + if (len < 0) + goto out;; + status = -ENOMEM; + if (dup_to_netobj(&rsii.in_token, buf, len)) + goto out; + + rsii.h.flags = 0; + /* expiry */ + expiry = get_expiry(&mesg); + status = -EINVAL; + if (expiry == 0) + goto out; + + /* major/minor */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) + goto out; + if (len == 0) { + goto out; + } else { + rsii.major_status = simple_strtoul(buf, &ep, 10); + if (*ep) + goto out; + len = qword_get(&mesg, buf, mlen); + if (len <= 0) + goto out; + rsii.minor_status = simple_strtoul(buf, &ep, 10); + if (*ep) + goto out; + + /* out_handle */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) + goto out; + status = -ENOMEM; + if (dup_to_netobj(&rsii.out_handle, buf, len)) + goto out; + + /* out_token */ + len = qword_get(&mesg, buf, mlen); + status = -EINVAL; + if (len < 0) + goto out; + status = -ENOMEM; + if (dup_to_netobj(&rsii.out_token, buf, len)) + goto out; + } + rsii.h.expiry_time = expiry; + rsip = rsi_lookup(&rsii, 1); + status = 0; +out: + rsi_free(&rsii); + if (rsip) + rsi_put(&rsip->h, &rsi_cache); + return status; +} + +static struct cache_detail rsi_cache = { + .hash_size = RSI_HASHMAX, + .hash_table = rsi_table, + .name = "auth.rpcsec.init", + .cache_put = rsi_put, + .cache_request = rsi_request, + .cache_parse = rsi_parse, +}; + +static DefineSimpleCacheLookup(rsi, 0) + +/* + * The rpcsec_context cache is used to store a context that is + * used in data exchange. + * The key is a context handle. The content is: + * uid, gidlist, mechanism, service-set, mech-specific-data + */ + +#define RSC_HASHBITS 10 +#define RSC_HASHMAX (1<handle.data); + if (rsci->mechctx) + gss_delete_sec_context(&rsci->mechctx); +} + +static void rsc_put(struct cache_head *item, struct cache_detail *cd) +{ + struct rsc *rsci = container_of(item, struct rsc, h); + + if (cache_put(item, cd)) { + rsc_free(rsci); + kfree(rsci); + } +} + +static inline int +rsc_hash(struct rsc *rsci) +{ + return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS); +} + +static inline int +rsc_match(struct rsc *new, struct rsc *tmp) +{ + return netobj_equal(&new->handle, &tmp->handle); +} + +static inline void +rsc_init(struct rsc *new, struct rsc *tmp) +{ + new->mechctx = NULL; + new->handle.len = tmp->handle.len; + new->handle.data = tmp->handle.data; + tmp->handle.len = 0; + tmp->handle.data = NULL; +} + +static inline void +rsc_update(struct rsc *new, struct rsc *tmp) +{ + new->mechctx = tmp->mechctx; + tmp->mechctx = NULL; + memset(&new->seqdata, 0, sizeof(new->seqdata)); + spin_lock_init(&new->seqdata.sd_lock); + new->cred = tmp->cred; +} + +static int rsc_parse(struct cache_detail *cd, + char *mesg, int mlen) +{ + /* contexthandle expiry [ uid gid N mechname ...mechdata... ] */ + char *buf = mesg; + int len, rv; + struct rsc rsci, *rscp = NULL; + time_t expiry; + int status = -EINVAL; + + memset(&rsci, 0, sizeof(rsci)); + /* context handle */ + len = qword_get(&mesg, buf, mlen); + if (len < 0) goto out; + status = -ENOMEM; + if (dup_to_netobj(&rsci.handle, buf, len)) + goto out; + + rsci.h.flags = 0; + /* expiry */ + expiry = get_expiry(&mesg); + status = -EINVAL; + if (expiry == 0) + goto out; + + /* uid, or NEGATIVE */ + rv = get_int(&mesg, &rsci.cred.cr_uid); + if (rv == -EINVAL) + goto out; + if (rv == -ENOENT) + set_bit(CACHE_NEGATIVE, &rsci.h.flags); + else { + int N, i; + struct gss_api_mech *gm; + struct xdr_netobj tmp_buf; + + /* gid */ + if (get_int(&mesg, &rsci.cred.cr_gid)) + goto out; + + /* number of additional gid's */ + if (get_int(&mesg, &N)) + goto out; + if (N > NGROUPS) + goto out; + + /* gid's */ + for (i=0; ih, &rsc_cache); + return status; +} + +static struct cache_detail rsc_cache = { + .hash_size = RSC_HASHMAX, + .hash_table = rsc_table, + .name = "auth.rpcsec.context", + .cache_put = rsc_put, + .cache_parse = rsc_parse, +}; + +static DefineSimpleCacheLookup(rsc, 0); + +struct rsc * +gss_svc_searchbyctx(struct xdr_netobj *handle) +{ + struct rsc rsci; + struct rsc *found; + + rsci.handle = *handle; + found = rsc_lookup(&rsci, 0); + if (!found) + return NULL; + if (cache_check(&rsc_cache, &found->h, NULL)) + return NULL; + return found; +} + +/* Implements sequence number algorithm as specified in RFC 2203. */ +static int +gss_check_seq_num(struct rsc *rsci, int seq_num) +{ + struct gss_svc_seq_data *sd = &rsci->seqdata; + + spin_lock(&sd->sd_lock); + if (seq_num > sd->sd_max) { + if (seq_num >= sd->sd_max + GSS_SEQ_WIN) { + memset(sd->sd_win,0,sizeof(sd->sd_win)); + sd->sd_max = seq_num; + } else while (sd->sd_max < seq_num) { + sd->sd_max++; + __clear_bit(sd->sd_max % GSS_SEQ_WIN, sd->sd_win); + } + __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win); + goto ok; + } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) { + goto drop; + } + /* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */ + if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win)) + goto drop; +ok: + spin_unlock(&sd->sd_lock); + return 1; +drop: + spin_unlock(&sd->sd_lock); + return 0; +} + +static inline u32 round_up_to_quad(u32 i) +{ + return (i + 3 ) & ~3; +} + +static inline int +svc_safe_getnetobj(struct iovec *argv, struct xdr_netobj *o) +{ + int l; + + if (argv->iov_len < 4) + return -1; + o->len = ntohl(svc_getu32(argv)); + l = round_up_to_quad(o->len); + if (argv->iov_len < l) + return -1; + o->data = argv->iov_base; + argv->iov_base += l; + argv->iov_len -= l; + return 0; +} + +static inline int +svc_safe_putnetobj(struct iovec *resv, struct xdr_netobj *o) +{ + u32 *p; + + if (resv->iov_len + 4 > PAGE_SIZE) + return -1; + svc_putu32(resv, htonl(o->len)); + p = resv->iov_base + resv->iov_len; + resv->iov_len += round_up_to_quad(o->len); + if (resv->iov_len > PAGE_SIZE) + return -1; + memcpy(p, o->data, o->len); + memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len); + return 0; +} + +/* Verify the checksum on the header and return SVC_OK on success. + * Otherwise, return SVC_DROP (in the case of a bad sequence number) + * or return SVC_DENIED and indicate error in authp. + */ +static int +gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, + u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp) +{ + struct gss_ctx *ctx_id = rsci->mechctx; + struct xdr_buf rpchdr; + struct xdr_netobj checksum; + u32 flavor = 0; + struct iovec *argv = &rqstp->rq_arg.head[0]; + struct iovec iov; + + /* data to compute the checksum over: */ + iov.iov_base = rpcstart; + iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart; + xdr_buf_from_iov(&iov, &rpchdr); + + *authp = rpc_autherr_badverf; + if (argv->iov_len < 4) + return SVC_DENIED; + flavor = ntohl(svc_getu32(argv)); + if (flavor != RPC_AUTH_GSS) + return SVC_DENIED; + if (svc_safe_getnetobj(argv, &checksum)) + return SVC_DENIED; + + if (rqstp->rq_deferred) /* skip verification of revisited request */ + return SVC_OK; + if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) + != GSS_S_COMPLETE) { + *authp = rpcsec_gsserr_credproblem; + return SVC_DENIED; + } + + if (gc->gc_seq > MAXSEQ) { + dprintk("RPC: svcauth_gss: discarding request with large sequence number %d\n", + gc->gc_seq); + *authp = rpcsec_gsserr_ctxproblem; + return SVC_DENIED; + } + if (!gss_check_seq_num(rsci, gc->gc_seq)) { + dprintk("RPC: svcauth_gss: discarding request with old sequence number %d\n", + gc->gc_seq); + return SVC_DROP; + } + return SVC_OK; +} + +static int +gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) +{ + u32 xdr_seq; + u32 maj_stat; + struct xdr_buf verf_data; + struct xdr_netobj mic; + u32 *p; + struct iovec iov; + + svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS)); + xdr_seq = htonl(seq); + + iov.iov_base = &xdr_seq; + iov.iov_len = sizeof(xdr_seq); + xdr_buf_from_iov(&iov, &verf_data); + p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; + mic.data = (u8 *)(p + 1); + maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); + if (maj_stat != GSS_S_COMPLETE) + return -1; + *p++ = htonl(mic.len); + memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len); + p += XDR_QUADLEN(mic.len); + if (!xdr_ressize_check(rqstp, p)) + return -1; + return 0; +} + +struct gss_domain { + struct auth_domain h; + u32 pseudoflavor; +}; + +/* XXX this should be done in gss_pseudoflavors, and shouldn't be hardcoded: */ +static struct auth_domain * +find_gss_auth_domain(struct gss_ctx *ctx, u32 svc) +{ + switch(gss_get_pseudoflavor(ctx, 0, svc)) { + case RPC_AUTH_GSS_KRB5: + return auth_domain_find("gss/krb5"); + case RPC_AUTH_GSS_KRB5I: + return auth_domain_find("gss/krb5i"); + case RPC_AUTH_GSS_KRB5P: + return auth_domain_find("gss/krb5p"); + } + return NULL; +} + +int +svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) +{ + struct gss_domain *new; + struct auth_domain *test; + static char *prefix = "gss/"; + int stat = -1; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + goto out; + cache_init(&new->h.h); + atomic_inc(&new->h.h.refcnt); + new->h.name = kmalloc(strlen(name) + strlen(prefix) + 1, GFP_KERNEL); + if (!new->h.name) + goto out_free_dom; + strcpy(new->h.name, prefix); + strcat(new->h.name, name); + new->h.flavour = RPC_AUTH_GSS; + new->pseudoflavor = pseudoflavor; + new->h.h.expiry_time = NEVER; + new->h.h.flags = 0; + + test = auth_domain_lookup(&new->h, 1); + if (test == &new->h) { + BUG_ON(atomic_dec_and_test(&new->h.h.refcnt)); + } else { /* XXX Duplicate registration? */ + auth_domain_put(&new->h); + goto out; + } + return 0; + +out_free_dom: + kfree(new); +out: + return stat; +} + +/* It would be nice if this bit of code could be shared with the client. + * Obstacles: + * The client shouldn't malloc(), would have to pass in own memory. + * The server uses base of head iovec as read pointer, while the + * client uses separate pointer. */ +static int +unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +{ + /* XXX audit u32/int uses, sign/overflow issues */ + int stat = -EINVAL; + u32 integ_len, maj_stat; + struct xdr_netobj mic; + struct xdr_buf integ_buf; + + integ_len = ntohl(svc_getu32(&buf->head[0])); + if (integ_len & 3) + goto out; + if (integ_len > buf->len) + goto out; + if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) + goto out; + /* copy out mic... */ + if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) + goto out; + if (mic.len > 256) /* XXX: maximum mic length? */ + goto out; + mic.data = kmalloc(mic.len, GFP_KERNEL); + if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) + goto out; + maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); + if (maj_stat != GSS_S_COMPLETE) + goto out; + if (ntohl(svc_getu32(&buf->head[0])) != seq) + goto out; + stat = 0; +out: + return stat; +} + +/* + * Accept an rpcsec packet. + * If context establishment, punt to user space + * If data exchange, verify/decrypt + * If context destruction, handle here + * In the context establishment and destruction case we encode + * response here and return SVC_COMPLETE. + */ +static int +svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) +{ + struct iovec *argv = &rqstp->rq_arg.head[0]; + struct iovec *resv = &rqstp->rq_res.head[0]; + u32 crlen; + struct xdr_netobj tmpobj; + struct gss_svc_data *svcdata = rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc; + struct rsc *rsci = NULL; + struct rsi *rsip, rsikey; + u32 *rpcstart; + u32 *reject_stat = resv->iov_base; + int ret; + + dprintk("RPC: svcauth_gss: argv->iov_len = %d\n", argv->iov_len); + + *authp = rpc_autherr_badcred; + if (!svcdata) + svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); + if (!svcdata) + goto auth_err; + rqstp->rq_auth_data = svcdata; + gc = &svcdata->clcred; + + /* start of rpc packet is 7 u32's back from here: + * xid direction rpcversion prog vers proc flavour + */ + rpcstart = argv->iov_base; + rpcstart -= 7; + + /* credential is: + * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle + * at least 5 u32s, and is preceeded by length, so that makes 6. + */ + + if (argv->iov_len < 5 * 4) + goto auth_err; + crlen = ntohl(svc_getu32(argv)); + if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION) + goto auth_err; + gc->gc_proc = ntohl(svc_getu32(argv)); + gc->gc_seq = ntohl(svc_getu32(argv)); + gc->gc_svc = ntohl(svc_getu32(argv)); + if (svc_safe_getnetobj(argv, &gc->gc_ctx)) + goto auth_err; + if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4) + goto auth_err; + + if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0)) + goto auth_err; + + /* + * We've successfully parsed the credential. Let's check out the + * verifier. An AUTH_NULL verifier is allowed (and required) for + * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for + * PROC_DATA and PROC_DESTROY. + * + * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length). + * AUTH_RPCSEC_GSS verifier is: + * 6 (AUTH_RPCSEC_GSS), length, checksum. + * checksum is calculated over rpcheader from xid up to here. + */ + *authp = rpc_autherr_badverf; + switch (gc->gc_proc) { + case RPC_GSS_PROC_INIT: + case RPC_GSS_PROC_CONTINUE_INIT: + if (argv->iov_len < 2 * 4) + goto auth_err; + if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL) + goto auth_err; + if (ntohl(svc_getu32(argv)) != 0) + goto auth_err; + break; + case RPC_GSS_PROC_DATA: + case RPC_GSS_PROC_DESTROY: + *authp = rpcsec_gsserr_credproblem; + rsci = gss_svc_searchbyctx(&gc->gc_ctx); + if (!rsci) + goto auth_err; + switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) { + case SVC_OK: + break; + case SVC_DENIED: + goto auth_err; + case SVC_DROP: + goto drop; + } + break; + default: + *authp = rpc_autherr_rejectedcred; + goto auth_err; + } + + /* now act upon the command: */ + switch (gc->gc_proc) { + case RPC_GSS_PROC_INIT: + case RPC_GSS_PROC_CONTINUE_INIT: + *authp = rpc_autherr_badcred; + if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) + goto auth_err; + memset(&rsikey, 0, sizeof(rsikey)); + if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) + goto drop; + *authp = rpc_autherr_badverf; + if (svc_safe_getnetobj(argv, &tmpobj)) { + kfree(rsikey.in_handle.data); + goto auth_err; + } + if (dup_netobj(&rsikey.in_token, &tmpobj)) { + kfree(rsikey.in_handle.data); + goto drop; + } + + rsip = rsi_lookup(&rsikey, 0); + rsi_free(&rsikey); + if (!rsip) { + goto drop; + } + switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { + case -EAGAIN: + goto drop; + case -ENOENT: + goto drop; + case 0: + rsci = gss_svc_searchbyctx(&rsip->out_handle); + if (!rsci) { + goto drop; + } + if (gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN)) + goto drop; + if (resv->iov_len + 4 > PAGE_SIZE) + goto drop; + svc_putu32(resv, rpc_success); + if (svc_safe_putnetobj(resv, &rsip->out_handle)) + goto drop; + if (resv->iov_len + 3 * 4 > PAGE_SIZE) + goto drop; + svc_putu32(resv, htonl(rsip->major_status)); + svc_putu32(resv, htonl(rsip->minor_status)); + svc_putu32(resv, htonl(GSS_SEQ_WIN)); + if (svc_safe_putnetobj(resv, &rsip->out_token)) + goto drop; + rqstp->rq_client = NULL; + } + goto complete; + case RPC_GSS_PROC_DESTROY: + set_bit(CACHE_NEGATIVE, &rsci->h.flags); + if (resv->iov_len + 4 > PAGE_SIZE) + goto drop; + svc_putu32(resv, rpc_success); + goto complete; + case RPC_GSS_PROC_DATA: + rqstp->rq_client = + find_gss_auth_domain(rsci->mechctx, gc->gc_svc); + if (rqstp->rq_client == NULL) + goto auth_err; + *authp = rpcsec_gsserr_ctxproblem; + if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) + goto auth_err; + + rqstp->rq_cred = rsci->cred; + + *authp = rpc_autherr_badcred; + switch (gc->gc_svc) { + case RPC_GSS_SVC_NONE: + break; + case RPC_GSS_SVC_INTEGRITY: + if (unwrap_integ_data(&rqstp->rq_arg, + gc->gc_seq, rsci->mechctx)) + goto auth_err; + /* placeholders for length and seq. number: */ + svcdata->body_start = resv->iov_base + resv->iov_len; + svc_putu32(resv, 0); + svc_putu32(resv, 0); + break; + case RPC_GSS_SVC_PRIVACY: + /* currently unsupported */ + default: + goto auth_err; + } + ret = SVC_OK; + goto out; + } +auth_err: + /* Restore write pointer to original value: */ + xdr_ressize_check(rqstp, reject_stat); + ret = SVC_DENIED; + goto out; +complete: + ret = SVC_COMPLETE; + goto out; +drop: + ret = SVC_DROP; +out: + if (rsci) + rsc_put(&rsci->h, &rsc_cache); + return ret; +} + +static int +svcauth_gss_release(struct svc_rqst *rqstp) +{ + struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; + struct rpc_gss_wire_cred *gc = &gsd->clcred; + struct xdr_buf *resbuf = &rqstp->rq_res; + struct xdr_buf integ_buf; + struct xdr_netobj mic; + struct iovec *resv; + u32 *p; + int integ_offset, integ_len; + struct rsc *rsci; + int stat = -EINVAL; + + /* normally not set till svc_send, but we need it here: */ + resbuf->len = resbuf->head[0].iov_len + + resbuf->page_len + resbuf->tail[0].iov_len; + switch (gc->gc_svc) { + case RPC_GSS_SVC_NONE: + break; + case RPC_GSS_SVC_INTEGRITY: + p = gsd->body_start; + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + p++; + integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; + integ_len = resbuf->len - integ_offset; + BUG_ON(integ_len % 4); + *p++ = htonl(integ_len); + *p++ = htonl(gc->gc_seq); + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, + integ_len)) + goto out; + if (resbuf->page_len == 0) { + BUG_ON(resbuf->tail[0].iov_len); + /* Use head for everything */ + resv = &resbuf->head[0]; + } else if (resbuf->tail[0].iov_base == NULL) { + /* copied from nfsd4_encode_read */ + svc_take_page(rqstp); + resbuf->tail[0].iov_base = page_address(rqstp + ->rq_respages[rqstp->rq_resused-1]); + rqstp->rq_restailpage = rqstp->rq_resused-1; + resbuf->tail[0].iov_len = 0; + resv = &resbuf->tail[0]; + } else { + resv = &resbuf->tail[0]; + } + /* XXX bounds checking!: */ + mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; + rsci = gss_svc_searchbyctx(&gc->gc_ctx); + /* Better error return? Hold count on ctx through + * processing instead of looking up again? */ + if (!rsci) + goto out; + /* XXX Whoops, we might overflow here: */ + if (gss_get_mic(rsci->mechctx, 0, &integ_buf, &mic)) + goto out; + svc_putu32(resv, htonl(mic.len)); + resv->iov_len += mic.len; + resbuf->len += mic.len; /* not strictly necessary */ + /* XXX too late, alas: */ + if (resbuf->len > PAGE_SIZE) + goto out; + break; + case RPC_GSS_SVC_PRIVACY: + default: + goto out; + } + + stat = 0; +out: + if (rqstp->rq_client) + auth_domain_put(rqstp->rq_client); + rqstp->rq_client = NULL; + + return stat; +} + +static void +svcauth_gss_domain_release(struct auth_domain *dom) +{ + struct gss_domain *gd = container_of(dom, struct gss_domain, h); + + kfree(dom->name); + kfree(gd); +} + +struct auth_ops svcauthops_gss = { + .name = "rpcsec_gss", + .flavour = RPC_AUTH_GSS, + .accept = svcauth_gss_accept, + .release = svcauth_gss_release, + .domain_release = svcauth_gss_domain_release, +}; + +int +gss_svc_init(void) +{ + cache_register(&rsc_cache); + cache_register(&rsi_cache); + svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); + return 0; +} diff -puN net/sunrpc/svc.c~CITI_NFS4_ALL net/sunrpc/svc.c --- linux-2.6.3/net/sunrpc/svc.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/svc.c 2004-02-19 16:47:04.000000000 -0500 @@ -200,6 +200,8 @@ svc_exit_thread(struct svc_rqst *rqstp) kfree(rqstp->rq_resp); if (rqstp->rq_argp) kfree(rqstp->rq_argp); + if (rqstp->rq_auth_data) + kfree(rqstp->rq_auth_data); kfree(rqstp); /* Release the server */ @@ -322,6 +324,8 @@ svc_process(struct svc_serv *serv, struc goto err_bad_auth; case SVC_DROP: goto dropit; + case SVC_COMPLETE: + goto sendit; } progp = serv->sv_program; diff -puN net/sunrpc/Makefile~CITI_NFS4_ALL net/sunrpc/Makefile --- linux-2.6.3/net/sunrpc/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/Makefile 2004-02-19 16:47:05.000000000 -0500 @@ -2,9 +2,9 @@ # Makefile for Linux kernel SUN RPC # -obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ obj-$(CONFIG_SUNRPC) += sunrpc.o +obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ sunrpc-y := clnt.o xprt.o sched.o \ auth.o auth_null.o auth_unix.o \ diff -puN fs/nfsd/nfs4proc.c~CITI_NFS4_ALL fs/nfsd/nfs4proc.c --- linux-2.6.3/fs/nfsd/nfs4proc.c~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/nfs4proc.c 2004-02-19 16:47:15.000000000 -0500 @@ -52,15 +52,22 @@ #include #include #include +#ifdef CONFIG_NFS_V4_ACL +#include +#endif #define NFSDDBG_FACILITY NFSDDBG_PROC -/* Note: The organization of the OPEN code seems a little strange; it - * has been superfluously split into three routines, one of which is named - * nfsd4_process_open2() even though there is no nfsd4_process_open1()! - * This is because the code has been organized in anticipation of a - * subsequent patch which will implement more of the NFSv4 state model. - */ +static inline void +fh_dup2(struct svc_fh *dst, struct svc_fh *src) +{ + fh_put(dst); + dget(src->fh_dentry); + if (src->fh_export) + cache_get(&src->fh_export->h); + *dst = *src; +} + static int do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { @@ -89,12 +96,19 @@ do_open_lookup(struct svc_rqst *rqstp, s if (!status) { set_change_info(&open->op_cinfo, current_fh); fh_dup2(current_fh, &resfh); + /* XXXJBF: keep a saved svc_fh struct instead?? */ + open->op_stateowner->so_replay.rp_openfh_len = + resfh.fh_handle.fh_size; + memcpy(open->op_stateowner->so_replay.rp_openfh, + &resfh.fh_handle.fh_base, + resfh.fh_handle.fh_size); accmode = MAY_NOP; if (open->op_share_access & NFS4_SHARE_ACCESS_READ) accmode = MAY_READ; if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE) accmode |= (MAY_WRITE | MAY_TRUNC); + accmode |= MAY_OWNER_OVERRIDE; status = fh_verify(rqstp, current_fh, S_IFREG, accmode); } @@ -102,19 +116,39 @@ do_open_lookup(struct svc_rqst *rqstp, s return status; } +/* + * nfs4_unlock_state() called in encode + */ static inline int nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { int status; - dprintk("NFSD: nfsd4_open filename %.*s\n", - (int)open->op_fname.len, open->op_fname.data); + dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", + (int)open->op_fname.len, open->op_fname.data, + open->op_stateowner); /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; + open->op_stateowner = NULL; + nfs4_lock_state(); + /* check seqid for replay. set nfs4_owner */ status = nfsd4_process_open1(open); + if (status == NFSERR_REPLAY_ME) { + struct nfs4_replay *rp = &open->op_stateowner->so_replay; + fh_put(current_fh); + current_fh->fh_handle.fh_size = rp->rp_openfh_len; + memcpy(¤t_fh->fh_handle.fh_base, rp->rp_openfh, + rp->rp_openfh_len); + status = fh_verify(rqstp, current_fh, 0, MAY_NOP); + if (status) + dprintk("nfsd4_open: replay failed" + " restoring previous filehandle\n"); + else + status = NFSERR_REPLAY_ME; + } if (status) return status; /* @@ -172,7 +206,7 @@ static inline int nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh) { if (!save_fh->fh_dentry) - return nfserr_nofilehandle; + return nfserr_restorefh; fh_dup2(current_fh, save_fh); return nfs_ok; @@ -204,11 +238,16 @@ nfsd4_access(struct svc_rqst *rqstp, str static inline int nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit) { + int status; + u32 *p = (u32 *)commit->co_verf.data; *p++ = nfssvc_boot.tv_sec; *p++ = nfssvc_boot.tv_usec; - return nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count); + status = nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count); + if (status == nfserr_symlink) + status = nfserr_inval; + return status; } static inline int @@ -221,6 +260,8 @@ nfsd4_create(struct svc_rqst *rqstp, str fh_init(&resfh, NFS4_FHSIZE); status = fh_verify(rqstp, current_fh, S_IFDIR, MAY_CREATE); + if (status == nfserr_symlink) + status = nfserr_notdir; if (status) return status; @@ -316,8 +357,10 @@ static inline int nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct svc_fh *save_fh, struct nfsd4_link *link) { - int status; + int status = nfserr_nofilehandle; + if (!save_fh->fh_dentry) + return status; status = nfsd_link(rqstp, current_fh, link->li_name, link->li_namelen, save_fh); if (!status) set_change_info(&link->li_cinfo, current_fh); @@ -327,14 +370,18 @@ nfsd4_link(struct svc_rqst *rqstp, struc static inline int nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh) { - /* - * XXX: We currently violate the spec in one small respect - * here. If LOOKUPP is done at the root of the pseudofs, - * the spec requires us to return NFSERR_NOENT. Personally, - * I think that leaving the filehandle unchanged is more - * logical, but this is an academic question anyway, since - * no clients actually use LOOKUPP. - */ + struct svc_fh tmp_fh; + int ret; + + fh_init(&tmp_fh, NFS4_FHSIZE); + if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh, + &rqstp->rq_chandle)) != 0) + return ret; + if (tmp_fh.fh_dentry == current_fh->fh_dentry) { + fh_put(&tmp_fh); + return nfserr_noent; + } + fh_put(&tmp_fh); return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh); } @@ -345,6 +392,20 @@ nfsd4_lookup(struct svc_rqst *rqstp, str } static inline int +access_bits_permit_read(unsigned long access_bmap) +{ + return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); +} + +static inline int +access_bits_permit_write(unsigned long access_bmap) +{ + return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); +} + +static inline int nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read) { struct nfs4_stateid *stp; @@ -382,7 +443,7 @@ nfsd4_read(struct svc_rqst *rqstp, struc goto out; } status = nfserr_openmode; - if (!(stp->st_share_access & NFS4_SHARE_ACCESS_READ)) { + if (!access_bits_permit_read(stp->st_access_bmap)) { dprintk("NFSD: nfsd4_read: file not opened for read!\n"); goto out; } @@ -397,6 +458,11 @@ out: static inline int nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir) { + u64 cookie = readdir->rd_cookie; + static const nfs4_verifier zeroverf = { + .data[0] = 0, + }; + /* no need to check permission - this will be done in nfsd_readdir() */ if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) @@ -405,7 +471,8 @@ nfsd4_readdir(struct svc_rqst *rqstp, st readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; - if (readdir->rd_cookie > ~(u32)0) + if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || + (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) return nfserr_bad_cookie; readdir->rd_rqstp = rqstp; @@ -427,6 +494,8 @@ nfsd4_remove(struct svc_rqst *rqstp, str int status; status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen); + if (status == nfserr_symlink) + return nfserr_notdir; if (!status) { fh_unlock(current_fh); set_change_info(&remove->rm_cinfo, current_fh); @@ -438,11 +507,25 @@ static inline int nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct svc_fh *save_fh, struct nfsd4_rename *rename) { - int status; + int status = nfserr_nofilehandle; + if (!save_fh->fh_dentry) + return status; status = nfsd_rename(rqstp, save_fh, rename->rn_sname, rename->rn_snamelen, current_fh, rename->rn_tname, rename->rn_tnamelen); + + /* the underlying filesystem returns different error's than required + * by NFSv4. both save_fh and current_fh have been verified.. */ + if (status == nfserr_isdir) + status = nfserr_exist; + else if ((status == nfserr_notdir) && + (S_ISDIR(save_fh->fh_dentry->d_inode->i_mode) && + S_ISDIR(current_fh->fh_dentry->d_inode->i_mode))) + status = nfserr_exist; + else if (status == nfserr_symlink) + status = nfserr_notdir; + if (!status) { set_change_info(&rename->rn_sinfo, current_fh); set_change_info(&rename->rn_tinfo, save_fh); @@ -454,14 +537,18 @@ static inline int nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr) { struct nfs4_stateid *stp; - int status = nfs_ok; + int status = nfserr_nofilehandle; + + if (!current_fh->fh_dentry) + goto out; + status = nfs_ok; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { status = nfserr_bad_stateid; if (ZERO_STATEID(&setattr->sa_stateid) || ONE_STATEID(&setattr->sa_stateid)) { dprintk("NFSD: nfsd4_setattr: magic stateid!\n"); - return status; + goto out; } nfs4_lock_state(); @@ -469,17 +556,27 @@ nfsd4_setattr(struct svc_rqst *rqstp, st &setattr->sa_stateid, CHECK_FH | RDWR_STATE, &stp))) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); - goto out; + goto out_unlock; } status = nfserr_openmode; - if (!(stp->st_share_access & NFS4_SHARE_ACCESS_WRITE)) { + if (!access_bits_permit_write(stp->st_access_bmap)) { dprintk("NFSD: nfsd4_setattr: not opened for write!\n"); - goto out; + goto out_unlock; } nfs4_unlock_state(); } - return (nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, 0, (time_t)0)); +#ifdef CONFIG_NFS_V4_ACL + status = nfs_ok; + if (setattr->sa_acl != NULL) + status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl); + if (status) + goto out; +#endif /* CONFIG_NFS_V4_ACL */ + status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, + 0, (time_t)0); out: + return status; +out_unlock: nfs4_unlock_state(); return status; } @@ -513,7 +610,7 @@ nfsd4_write(struct svc_rqst *rqstp, stru } status = nfserr_openmode; - if (!(stp->st_share_access & NFS4_SHARE_ACCESS_WRITE)) { + if (!access_bits_permit_write(stp->st_access_bmap)) { dprintk("NFSD: nfsd4_write: file not open for write!\n"); goto out; } @@ -526,9 +623,12 @@ zero_stateid: *p++ = nfssvc_boot.tv_sec; *p++ = nfssvc_boot.tv_usec; - return (nfsd_write(rqstp, current_fh, write->wr_offset, + status = nfsd_write(rqstp, current_fh, write->wr_offset, write->wr_vec, write->wr_vlen, write->wr_buflen, - &write->wr_how_written)); + &write->wr_how_written); + if (status == nfserr_symlink) + status = nfserr_inval; + return status; out: nfs4_unlock_state(); return status; @@ -552,8 +652,9 @@ nfsd4_verify(struct svc_rqst *rqstp, str if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) - return nfserr_notsupp; - if (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) + return nfserr_attrnotsupp; + if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) + || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) return nfserr_inval; if (verify->ve_attrlen & 3) return nfserr_inval; @@ -568,7 +669,8 @@ nfsd4_verify(struct svc_rqst *rqstp, str status = nfsd4_encode_fattr(current_fh, current_fh->fh_export, current_fh->fh_dentry, buf, - &count, verify->ve_bmval); + &count, verify->ve_bmval, + rqstp); /* this means that nfsd4_encode_fattr() ran out of space */ if (status == nfserr_resource && count == 0) @@ -658,13 +760,32 @@ nfsd4_proc_compound(struct svc_rqst *rqs goto encode_op; } + /* All operations except RENEW, SETCLIENTID, RESTOREFH + * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH + * require a valid current filehandle + * + * SETATTR NOFILEHANDLE error handled in nfsd4_setattr + * due to required returned bitmap argument + */ + if ((!current_fh.fh_dentry) && + !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || + (op->opnum == OP_SETCLIENTID) || + (op->opnum == OP_SETCLIENTID_CONFIRM) || + (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) || + (op->opnum == OP_RELEASE_LOCKOWNER) || + (op->opnum == OP_SETATTR))) { + op->status = nfserr_nofilehandle; + goto encode_op; + } switch (op->opnum) { case OP_ACCESS: op->status = nfsd4_access(rqstp, ¤t_fh, &op->u.access); break; case OP_CLOSE: op->status = nfsd4_close(rqstp, ¤t_fh, &op->u.close); - op->replay = &op->u.close.cl_stateowner->so_replay; + if (op->u.close.cl_stateowner) + op->replay = + &op->u.close.cl_stateowner->so_replay; break; case OP_COMMIT: op->status = nfsd4_commit(rqstp, ¤t_fh, &op->u.commit); @@ -683,12 +804,18 @@ nfsd4_proc_compound(struct svc_rqst *rqs break; case OP_LOCK: op->status = nfsd4_lock(rqstp, ¤t_fh, &op->u.lock); + if (op->u.lock.lk_stateowner) + op->replay = + &op->u.lock.lk_stateowner->so_replay; break; case OP_LOCKT: op->status = nfsd4_lockt(rqstp, ¤t_fh, &op->u.lockt); break; case OP_LOCKU: op->status = nfsd4_locku(rqstp, ¤t_fh, &op->u.locku); + if (op->u.locku.lu_stateowner) + op->replay = + &op->u.locku.lu_stateowner->so_replay; break; case OP_LOOKUP: op->status = nfsd4_lookup(rqstp, ¤t_fh, &op->u.lookup); @@ -703,15 +830,21 @@ nfsd4_proc_compound(struct svc_rqst *rqs break; case OP_OPEN: op->status = nfsd4_open(rqstp, ¤t_fh, &op->u.open); - op->replay = &op->u.open.op_stateowner->so_replay; + if (op->u.open.op_stateowner) + op->replay = + &op->u.open.op_stateowner->so_replay; break; case OP_OPEN_CONFIRM: op->status = nfsd4_open_confirm(rqstp, ¤t_fh, &op->u.open_confirm); - op->replay = &op->u.open_confirm.oc_stateowner->so_replay; + if (op->u.open_confirm.oc_stateowner) + op->replay = + &op->u.open_confirm.oc_stateowner->so_replay; break; case OP_OPEN_DOWNGRADE: op->status = nfsd4_open_downgrade(rqstp, ¤t_fh, &op->u.open_downgrade); - op->replay = &op->u.open_downgrade.od_stateowner->so_replay; + if (op->u.open_downgrade.od_stateowner) + op->replay = + &op->u.open_downgrade.od_stateowner->so_replay; break; case OP_PUTFH: op->status = nfsd4_putfh(rqstp, ¤t_fh, &op->u.putfh); @@ -760,6 +893,9 @@ nfsd4_proc_compound(struct svc_rqst *rqs case OP_WRITE: op->status = nfsd4_write(rqstp, ¤t_fh, &op->u.write); break; + case OP_RELEASE_LOCKOWNER: + op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner); + break; default: BUG_ON(op->status == nfs_ok); break; @@ -768,7 +904,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs encode_op: if (op->status == NFSERR_REPLAY_ME) { nfsd4_encode_replay(resp, op); - status = op->status = NFS_OK; + status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); status = op->status; @@ -776,20 +912,7 @@ encode_op: } out: - if (args->ops != args->iops) { - kfree(args->ops); - args->ops = args->iops; - } - if (args->tmpp) { - kfree(args->tmpp); - args->tmpp = NULL; - } - while (args->to_free) { - struct tmpbuf *tb = args->to_free; - args->to_free = tb->next; - kfree(tb->buf); - kfree(tb); - } + nfsd4_release_compoundargs(args); fh_put(¤t_fh); fh_put(&save_fh); return status; diff -puN fs/nfsd/nfs4xdr.c~CITI_NFS4_ALL fs/nfsd/nfs4xdr.c --- linux-2.6.3/fs/nfsd/nfs4xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/nfs4xdr.c 2004-02-19 16:47:15.000000000 -0500 @@ -51,100 +51,103 @@ #include #include #include -#include #include #include #include +#include +#include +#include #define NFSDDBG_FACILITY NFSDDBG_XDR -/* - * From Peter Astrand : The following routines check - * whether a filename supplied by the client is valid. - */ -static const char trailing_bytes_for_utf8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +static const char utf8_byte_len[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 + 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0 }; static inline int -is_legal_iso_utf8_sequence(unsigned char *source, int length) +is_legal_utf8_sequence(unsigned char *source, int length) { - unsigned char a; - unsigned char *srcptr; + unsigned char *ptr; + unsigned char c; - srcptr = source + length; + if (length==1) return 1; - switch (length) { - /* Everything else falls through when "1"... */ + /* Check for overlong sequence, and check second byte */ + c = *(source + 1); + switch (*source) { + case 0xE0: /* 3 bytes */ + if ( c < 0xA0 ) return 0; + break; + case 0xF0: /* 4 bytes */ + if ( c < 0x90 ) return 0; + break; + case 0xF8: /* 5 bytes */ + if ( c < 0xC8 ) return 0; + break; + case 0xFC: /* 6 bytes */ + if ( c < 0x84 ) return 0; + break; default: - /* Sequences with more than 6 bytes are invalid */ - return 0; + if ( (c & 0xC0) != 0x80) return 0; + } - /* - Byte 3-6 must be 80..BF - */ - case 6: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 5: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 4: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 3: - if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - - case 2: - a = *--srcptr; - - /* Upper limit */ - if (a > 0xBF) - /* 2nd byte may never be > 0xBF */ - return 0; + /* Check that trailing bytes look like 10xxxxxx */ + for (ptr = source++ + length - 1; ptr>source; ptr--) + if ( ((*ptr) & 0xC0) != 0x80 ) return 0; + return 1; +} - /* - Lower limits checks, to detect non-shortest forms. - No fall-through in this inner switch. - */ - switch (*source) { - case 0xE0: /* 3 bytes */ - if (a < 0xA0) return 0; - break; - case 0xF0: /* 4 bytes */ - if (a < 0x90) return 0; - break; - case 0xF8: /* 5 bytes */ - if (a < 0xC8) return 0; - break; - case 0xFC: /* 6 bytes */ - if (a < 0x84) return 0; - break; - default: - /* In all cases, 2nd byte must be >= 0x80 (because leading - 10...) */ - if (a < 0x80) return 0; - } +/* This does some screening on disallowed unicode characters. It is NOT + * comprehensive. + */ +static int +is_allowed_utf8_char(unsigned char *source, int length) +{ + /* We assume length and source point to a valid utf8 sequence */ + unsigned char c; - case 1: - /* Invalid ranges */ - if (*source >= 0x80 && *source < 0xC2) - /* Multibyte char with value < 0xC2, non-shortest */ - return 0; - if (*source > 0xFD) - /* Leading byte starting with 11111110 is illegal */ - return 0; - if (!*source) - return 0; + /* Disallow F0000 and up (in utf8, F3B08080) */ + if (*source > 0xF3 ) return 0; + c = *(source + 1); + switch (*source) { + case 0xF3: + if (c >= 0xB0) return 0; + break; + /* Disallow D800-F8FF (in utf8, EDA080-EFA3BF */ + case 0xED: + if (c >= 0xA0) return 0; + break; + case 0xEE: + return 0; + break; + case 0xEF: + if (c <= 0xA3) return 0; + /* Disallow FFF9-FFFF (EFBFB9-EFBFBF) */ + if (c==0xBF) + /* Don't need to check <=0xBF, since valid utf8 */ + if ( *(source+2) >= 0xB9) return 0; + break; } - return 1; } +/* This routine should really check to see that the proper stringprep + * mappings have been applied. Instead, we do a simple screen of some + * of the more obvious illegal values by calling is_allowed_utf8_char. + * This will allow many illegal strings through, but if a client behaves, + * it will get full functionality. The other option (apart from full + * stringprep checking) is to limit everything to an easily handled subset, + * such as 7-bit ascii. + * + * Note - currently calling routines ignore return value except as boolean. + */ static int check_utf8(char *str, int len) { @@ -155,11 +158,17 @@ check_utf8(char *str, int len) sourceend = str + len; while (chunk < sourceend) { - chunklen = trailing_bytes_for_utf8[*chunk]+1; + chunklen = utf8_byte_len[*chunk]; + if (!chunklen) + return nfserr_inval; if (chunk + chunklen > sourceend) return nfserr_inval; - if (!is_legal_iso_utf8_sequence(chunk, chunklen)) + if (!is_legal_utf8_sequence(chunk, chunklen)) + return nfserr_inval; + if (!is_allowed_utf8_char(chunk, chunklen)) return nfserr_inval; + if ( (chunklen==1) && (!*chunk) ) + return nfserr_inval; /* Disallow embedded nulls */ chunk += chunklen; } @@ -280,27 +289,40 @@ u32 *read_buf(struct nfsd4_compoundargs return p; } -char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) +static int +defer_free(struct nfsd4_compoundargs *argp, + void (*release)(const void *), void *p) { struct tmpbuf *tb; + + tb = kmalloc(sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + tb->buf = p; + tb->release = release; + tb->next = argp->to_free; + argp->to_free = tb; + return 0; +} + +char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) +{ + void *new = NULL; if (p == argp->tmp) { - p = kmalloc(nbytes, GFP_KERNEL); - if (!p) return NULL; + new = kmalloc(nbytes, GFP_KERNEL); + if (!new) return NULL; + p = new; memcpy(p, argp->tmp, nbytes); } else { if (p != argp->tmpp) BUG(); argp->tmpp = NULL; } - tb = kmalloc(sizeof(*tb), GFP_KERNEL); - if (!tb) { - kfree(p); + if (defer_free(argp, kfree, p)) { + kfree(new); return NULL; - } - tb->buf = p; - tb->next = argp->to_free; - argp->to_free = tb; - return (char*)p; + } else + return (char *)p; } @@ -328,7 +350,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoun } static int -nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr) +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, + struct nfs4_acl **acl) { int expected_len, len = 0; u32 dummy32; @@ -344,7 +367,7 @@ nfsd4_decode_fattr(struct nfsd4_compound * read-only attributes return ERR_INVAL. */ if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) - return nfserr_notsupp; + return nfserr_attrnotsupp; if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1)) return nfserr_inval; @@ -357,6 +380,39 @@ nfsd4_decode_fattr(struct nfsd4_compound READ64(iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } +#ifdef CONFIG_NFS_V4_ACL + if (bmval[0] & FATTR4_WORD0_ACL) { + int nace, i; + struct nfs4_ace ace; + + READ_BUF(4); len += 4; + READ32(nace); + + *acl = nfs4_acl_new(); + if (*acl == NULL) { + status = -ENOMEM; + goto out_nfserr; + } + defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl); + + for (i = 0; i < nace; i++) { + READ_BUF(16); len += 16; + READ32(ace.type); + READ32(ace.flag); + READ32(ace.access_mask); + READ32(ace.wholen); + READ_BUF(ace.wholen); + len += XDR_QUADLEN(ace.wholen) << 2; + if (nfs4_acl_add_ace(*acl, ace.type, ace.flag, + ace.access_mask, (char *)p, ace.wholen) < 0) { + status = -ENOMEM; + goto out_nfserr; + } + p += XDR_QUADLEN(ace.wholen); + } + } else + *acl = NULL; +#endif /* CONFIG_NFS_V4_ACL */ if (bmval[1] & FATTR4_WORD1_MODE) { READ_BUF(4); len += 4; @@ -373,7 +429,7 @@ nfsd4_decode_fattr(struct nfsd4_compound READMEM(buf, dummy32); if (check_utf8(buf, dummy32)) return nfserr_inval; - if ((status = name_get_uid(buf, dummy32, &iattr->ia_uid))) + if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) goto out_nfserr; iattr->ia_valid |= ATTR_UID; } @@ -386,7 +442,7 @@ nfsd4_decode_fattr(struct nfsd4_compound READMEM(buf, dummy32); if (check_utf8(buf, dummy32)) return nfserr_inval; - if ((status = name_get_gid(buf, dummy32, &iattr->ia_gid))) + if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) goto out_nfserr; iattr->ia_valid |= ATTR_GID; } @@ -482,6 +538,7 @@ nfsd4_decode_close(struct nfsd4_compound { DECODE_HEAD; + (int)close->cl_stateowner = -1; READ_BUF(4 + sizeof(stateid_t)); READ32(close->cl_seqid); READ32(close->cl_stateid.si_generation); @@ -540,7 +597,7 @@ nfsd4_decode_create(struct nfsd4_compoun if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) return status; - if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr))) + if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) goto out; DECODE_TAIL; @@ -572,6 +629,7 @@ nfsd4_decode_lock(struct nfsd4_compounda { DECODE_HEAD; + (int)lock->lk_stateowner = -1; /* * type, reclaim(boolean), offset, length, new_lock_owner(boolean) */ @@ -629,6 +687,7 @@ nfsd4_decode_locku(struct nfsd4_compound { DECODE_HEAD; + (int)locku->lu_stateowner = -1; READ_BUF(24 + sizeof(stateid_t)); READ32(locku->lu_type); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) @@ -664,6 +723,7 @@ nfsd4_decode_open(struct nfsd4_compounda memset(open->op_bmval, 0, sizeof(open->op_bmval)); open->op_iattr.ia_valid = 0; + (int)open->op_stateowner = -1; /* seqid, share_access, share_deny, clientid, ownerlen */ READ_BUF(16 + sizeof(clientid_t)); @@ -686,7 +746,7 @@ nfsd4_decode_open(struct nfsd4_compounda switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: - if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr))) + if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) goto out; break; case NFS4_CREATE_EXCLUSIVE: @@ -739,6 +799,7 @@ nfsd4_decode_open_confirm(struct nfsd4_c { DECODE_HEAD; + (int)open_conf->oc_stateowner = -1; READ_BUF(4 + sizeof(stateid_t)); READ32(open_conf->oc_req_stateid.si_generation); COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); @@ -752,6 +813,7 @@ nfsd4_decode_open_downgrade(struct nfsd4 { DECODE_HEAD; + (int)open_down->od_stateowner = -1; READ_BUF(4 + sizeof(stateid_t)); READ32(open_down->od_stateid.si_generation); COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); @@ -861,7 +923,7 @@ nfsd4_decode_setattr(struct nfsd4_compou READ_BUF(sizeof(stateid_t)); READ32(setattr->sa_stateid.si_generation); COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); - if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr))) + if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) goto out; DECODE_TAIL; @@ -928,7 +990,7 @@ nfsd4_decode_write(struct nfsd4_compound int len; DECODE_HEAD; - READ_BUF(sizeof(stateid_t) + 16); + READ_BUF(sizeof(stateid_opaque_t) + 20); READ32(write->wr_stateid.si_generation); COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); READ64(write->wr_offset); @@ -972,6 +1034,20 @@ nfsd4_decode_write(struct nfsd4_compound } static int +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) +{ + DECODE_HEAD; + + READ_BUF(12); + COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); + READ32(rlockowner->rl_owner.len); + READ_BUF(rlockowner->rl_owner.len); + READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); + + DECODE_TAIL; +} + +static int nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; @@ -1043,6 +1119,13 @@ nfsd4_decode_compound(struct nfsd4_compo op->opnum = ntohl(*argp->p++); switch (op->opnum) { + case 2: /* Reserved operation */ + op->opnum = OP_ILLEGAL; + if (argp->minorversion == 0) + op->status = nfserr_op_illegal; + else + op->status = nfserr_minor_vers_mismatch; + break; case OP_ACCESS: op->status = nfsd4_decode_access(argp, &op->u.access); break; @@ -1136,14 +1219,12 @@ nfsd4_decode_compound(struct nfsd4_compo case OP_WRITE: op->status = nfsd4_decode_write(argp, &op->u.write); break; + case OP_RELEASE_LOCKOWNER: + op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner); + break; default: - /* - * According to spec, anything greater than OP_WRITE - * is treated as OP_WRITE+1 in the response. - */ - if (op->opnum > OP_WRITE) - op->opnum = OP_WRITE + 1; - op->status = nfserr_notsupp; + op->opnum = OP_ILLEGAL; + op->status = nfserr_op_illegal; break; } @@ -1183,10 +1264,10 @@ nfsd4_decode_compound(struct nfsd4_compo } while (0) #define WRITECINFO(c) do { \ *p++ = htonl(c.atomic); \ - *p++ = htonl(c.before_size); \ - *p++ = htonl(c.before_ctime); \ - *p++ = htonl(c.after_size); \ - *p++ = htonl(c.after_ctime); \ + *p++ = htonl(c.before_ctime_sec); \ + *p++ = htonl(c.before_ctime_nsec); \ + *p++ = htonl(c.after_ctime_sec); \ + *p++ = htonl(c.after_ctime_nsec); \ } while (0) #define RESERVE_SPACE(nbytes) do { \ @@ -1209,10 +1290,13 @@ nfsd4_decode_compound(struct nfsd4_compo * "seqid-mutating" NFSv4 operation. This is * where seqids are incremented, and the * replay cache is filled. + * + * if stateowner != -1 then called with nfs4_lock_state() held */ #define ENCODE_SEQID_OP_TAIL(stateowner) do { \ - if (seqid_mutating_err(nfserr) && stateowner) { \ + if (seqid_mutating_err(nfserr) && stateowner \ + && ((int)stateowner != -1)) { \ if (stateowner->so_confirmed) \ stateowner->so_seqid++; \ stateowner->so_replay.rp_status = nfserr; \ @@ -1220,7 +1304,8 @@ nfsd4_decode_compound(struct nfsd4_compo (((char *)(resp)->p - (char *)save)); \ memcpy(stateowner->so_replay.rp_buf, save, \ stateowner->so_replay.rp_buflen); \ - } } while(0) + } } while(0); \ + if ((int)stateowner != -1) nfs4_unlock_state(); static u32 nfs4_ftypes[16] = { @@ -1239,13 +1324,16 @@ static u32 nfs4_ftypes[16] = { */ int nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval) + struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval, + struct svc_rqst *rqstp) { u32 bmval0 = bmval[0]; u32 bmval1 = bmval[1]; struct kstat stat; - struct name_ent *owner = NULL; - struct name_ent *group = NULL; + char owner[IDMAP_NAMESZ]; + u32 ownerlen = 0; + char group[IDMAP_NAMESZ]; + u32 grouplen = 0; struct svc_fh tempfh; struct kstatfs statfs; int buflen = *countp << 2; @@ -1254,6 +1342,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s u64 dummy64; u32 *p = buffer; int status; + struct nfs4_acl *acl = NULL; BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); @@ -1277,15 +1366,30 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s fhp = &tempfh; } if (bmval1 & FATTR4_WORD1_OWNER) { - status = name_get_user(stat.uid, &owner); - if (status) + int temp = nfsd_map_uid_to_name(rqstp, stat.uid, owner); + if (temp < 0) { + status = temp; goto out_nfserr; + } + ownerlen = (unsigned) temp; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = name_get_group(stat.gid, &group); - if (status) + int temp = nfsd_map_gid_to_name(rqstp, stat.gid, group); + if (temp < 0) { + status = temp; + goto out_nfserr; + } + grouplen = (unsigned) temp; + } +#ifdef CONFIG_NFS_V4_ACL + if (bmval0 & FATTR4_WORD0_ACL) { + status = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); + if (status == -EOPNOTSUPP) + bmval0 &= ~FATTR4_WORD0_ACL; + else if (status < 0) goto out_nfserr; } +#endif /* CONFIG_NFS_V4_ACL */ if ((buflen -= 16) < 0) goto out_resource; @@ -1317,32 +1421,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s } if (bmval0 & FATTR4_WORD0_CHANGE) { /* - * XXX: We currently use the inode ctime as the nfsv4 "changeid" - * attribute. This violates the spec, which says - * - * The server may return the object's time_modify attribute - * for this attribute, but only if the file system object - * can not be updated more frequently than the resolution - * of time_modify. - * - * Since we only have 1-second ctime resolution, this is a pretty - * serious violation. Indeed, 1-second ctime resolution is known - * to be a problem in practice in the NFSv3 world. - * - * The real solution to this problem is probably to work on - * adding high-resolution mtimes to the VFS layer. - * - * Note: Started using i_size for the high 32 bits of the changeid. - * - * Note 2: This _must_ be consistent with the scheme for writing + * Note: This _must_ be consistent with the scheme for writing * change_info, so any changes made here must be reflected there * as well. (See xdr4.h:set_change_info() and the WRITECINFO() * macro above.) */ if ((buflen -= 8) < 0) goto out_resource; - WRITE32(stat.size); - WRITE32(stat.mtime.tv_sec); /* AK: nsec dropped? */ + WRITE32(stat.ctime.tv_sec); + WRITE32(stat.ctime.tv_nsec); /* AK: nsec dropped? */ } if (bmval0 & FATTR4_WORD0_SIZE) { if ((buflen -= 8) < 0) @@ -1387,10 +1474,48 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s goto out_resource; WRITE32(0); } +#ifdef CONFIG_NFS_V4_ACL + if (bmval0 & FATTR4_WORD0_ACL) { + struct nfs4_ace *ace; + struct list_head *h; + int alen; + + if (acl == NULL) { + if ((buflen -= 4) < 0) + goto out_resource; + + WRITE32(0); + goto out_acl; + } + + alen = acl->naces * 16 + 4; + + list_for_each(h, &acl->ace_head) { + ace = list_entry(h, struct nfs4_ace, l_ace); + alen += XDR_QUADLEN(ace->wholen) << 2; + } + + if ((buflen -= alen) < 0) + goto out_resource; + + WRITE32(acl->naces); + + list_for_each(h, &acl->ace_head) { + ace = list_entry(h, struct nfs4_ace, l_ace); + + WRITE32(ace->type); + WRITE32(ace->flag); + WRITE32(ace->access_mask); + WRITE32(ace->wholen); + WRITEMEM(ace->who, ace->wholen); + } + } +out_acl: +#endif /* CONFIG_NFS_V4_ACL */ if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(0); + WRITE32(1); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { if ((buflen -= 4) < 0) @@ -1485,20 +1610,18 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s WRITE32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - int namelen = strlen(owner->name); - buflen -= (XDR_QUADLEN(namelen) << 2) + 4; + buflen -= (XDR_QUADLEN(ownerlen) << 2) + 4; if (buflen < 0) goto out_resource; - WRITE32(namelen); - WRITEMEM(owner->name, namelen); + WRITE32(ownerlen); + WRITEMEM(owner, ownerlen); } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - int namelen = strlen(group->name); - buflen -= (XDR_QUADLEN(namelen) << 2) + 4; + buflen -= (XDR_QUADLEN(grouplen) << 2) + 4; if (buflen < 0) goto out_resource; - WRITE32(namelen); - WRITEMEM(group->name, namelen); + WRITE32(grouplen); + WRITEMEM(group, grouplen); } if (bmval1 & FATTR4_WORD1_RAWDEV) { if ((buflen -= 8) < 0) @@ -1564,12 +1687,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s status = nfs_ok; out: +#ifdef CONFIG_NFS_V4_ACL + nfs4_acl_free(acl); +#endif if (fhp == &tempfh) fh_put(&tempfh); - if (owner) - name_put(owner); - if (group) - name_put(group); return status; out_nfserr: status = nfserrno(status); @@ -1648,7 +1770,8 @@ nfsd4_encode_dirent(struct readdir_cd *c } nfserr = nfsd4_encode_fattr(NULL, exp, - dentry, p, &buflen, cd->rd_bmval); + dentry, p, &buflen, cd->rd_bmval, + cd->rd_rqstp); if (!nfserr) { p += buflen; goto out; @@ -1701,7 +1824,7 @@ out: return 0; nospc: - cd->common.err = nfserr_readdir_nospc; + cd->common.err = nfserr_toosmall; return -EINVAL; } @@ -1771,7 +1894,8 @@ nfsd4_encode_getattr(struct nfsd4_compou buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - resp->p, &buflen, getattr->ga_bmval); + resp->p, &buflen, getattr->ga_bmval, + resp->rqstp); if (!nfserr) resp->p += buflen; @@ -1871,7 +1995,7 @@ nfsd4_encode_open(struct nfsd4_compoundr ENCODE_SEQID_OP_HEAD; if (nfserr) - return; + goto out; RESERVE_SPACE(36 + sizeof(stateid_t)); WRITE32(open->op_stateid.si_generation); @@ -1925,7 +2049,8 @@ nfsd4_encode_open(struct nfsd4_compoundr default: BUG(); } - + /* XXX save filehandle here */ +out: ENCODE_SEQID_OP_TAIL(open->op_stateowner); } @@ -1995,6 +2120,8 @@ nfsd4_encode_read(struct nfsd4_compoundr read->rd_offset, read->rd_iov, read->rd_vlen, &maxcount); + if (nfserr == nfserr_symlink) + nfserr = nfserr_inval; if (nfserr) return nfserr; eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); @@ -2052,6 +2179,8 @@ nfsd4_encode_readlink(struct nfsd4_compo * assume that truncation occurred, and return NFS4ERR_RESOURCE. */ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); + if (nfserr == nfserr_isdir) + return nfserr_inval; if (nfserr) return nfserr; @@ -2081,7 +2210,7 @@ nfsd4_encode_readdir(struct nfsd4_compou { int maxcount; loff_t offset; - u32 *page; + u32 *page, *savep; ENCODE_HEAD; if (nfserr) @@ -2090,6 +2219,7 @@ nfsd4_encode_readdir(struct nfsd4_compou return nfserr_resource; RESERVE_SPACE(8); /* verifier */ + savep = p; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ WRITE32(0); @@ -2107,8 +2237,10 @@ nfsd4_encode_readdir(struct nfsd4_compou * pointer and eof field. */ maxcount = (maxcount >> 2) - 4; - if (maxcount < 0) - return nfserr_readdir_nospc; + if (maxcount < 0) { + nfserr = nfserr_toosmall; + goto err_no_verf; + } svc_take_page(resp->rqstp); page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); @@ -2122,11 +2254,13 @@ nfsd4_encode_readdir(struct nfsd4_compou &offset, &readdir->common, nfsd4_encode_dirent); if (nfserr == nfs_ok && - readdir->common.err == nfserr_readdir_nospc && + readdir->common.err == nfserr_toosmall && readdir->buffer == page) - nfserr = nfserr_readdir_nospc; + nfserr = nfserr_toosmall; + if (nfserr == nfserr_symlink) + nfserr = nfserr_notdir; if (nfserr) - return nfserr; + goto err_no_verf; if (readdir->offset) xdr_encode_hyper(readdir->offset, offset); @@ -2146,6 +2280,10 @@ nfsd4_encode_readdir(struct nfsd4_compou resp->end = resp->p + PAGE_SIZE/4; return 0; +err_no_verf: + p = savep; + ADJUST_ARGS(); + return nfserr; } static void @@ -2237,7 +2375,7 @@ nfsd4_encode_operation(struct nfsd4_comp RESERVE_SPACE(8); WRITE32(op->opnum); - statp = p++; /* to be backfilled at the end */ + statp = p++; /* to be backfilled at the end */ ADJUST_ARGS(); switch (op->opnum) { @@ -2324,6 +2462,8 @@ nfsd4_encode_operation(struct nfsd4_comp case OP_WRITE: nfsd4_encode_write(resp, op->status, &op->u.write); break; + case OP_RELEASE_LOCKOWNER: + break; default: break; } @@ -2340,6 +2480,8 @@ nfsd4_encode_operation(struct nfsd4_comp * * XDR note: do not encode rp->rp_buflen: the buffer contains the * previously sent already encoded operation. + * + * called with nfs4_lock_state() held */ void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) @@ -2351,12 +2493,13 @@ nfsd4_encode_replay(struct nfsd4_compoun RESERVE_SPACE(8); WRITE32(op->opnum); - WRITE32(NFS_OK); + *p++ = rp->rp_status; /* already xdr'ed */ ADJUST_ARGS(); RESERVE_SPACE(rp->rp_buflen); WRITEMEM(rp->rp_buf, rp->rp_buflen); ADJUST_ARGS(); + nfs4_unlock_state(); } /* @@ -2369,6 +2512,24 @@ nfs4svc_encode_voidres(struct svc_rqst * return xdr_ressize_check(rqstp, p); } +void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) +{ + if (args->ops != args->iops) { + kfree(args->ops); + args->ops = args->iops; + } + if (args->tmpp) { + kfree(args->tmpp); + args->tmpp = NULL; + } + while (args->to_free) { + struct tmpbuf *tb = args->to_free; + args->to_free = tb->next; + tb->release(tb->buf); + kfree(tb); + } +} + int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args) { @@ -2381,23 +2542,11 @@ nfs4svc_decode_compoundargs(struct svc_r args->tmpp = NULL; args->to_free = NULL; args->ops = args->iops; + args->rqstp = rqstp; status = nfsd4_decode_compound(args); if (status) { - if (args->ops != args->iops) { - kfree(args->ops); - args->ops = args->iops; - } - if (args->tmpp) { - kfree(args->tmpp); - args->tmpp = NULL; - } - while (args->to_free) { - struct tmpbuf *tb = args->to_free; - args->to_free = tb->next; - kfree(tb->buf); - kfree(tb); - } + nfsd4_release_compoundargs(args); } return !status; } diff -puN include/linux/nfsd/state.h~CITI_NFS4_ALL include/linux/nfsd/state.h --- linux-2.6.3/include/linux/nfsd/state.h~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfsd/state.h 2004-02-19 16:47:15.000000000 -0500 @@ -113,6 +113,8 @@ struct nfs4_replay { unsigned int rp_buflen; char *rp_buf; unsigned intrp_allocated; + int rp_openfh_len; + char rp_openfh[NFS4_FHSIZE]; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; @@ -128,12 +130,20 @@ struct nfs4_replay { * so_perfilestate: heads the list of nfs4_stateid (either open or lock) * and is used to ensure no dangling nfs4_stateid references when we * release a stateowner. +* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when +* close is called to reap associated byte-range locks +* so_close_lru: (open) stateowner is placed on this list instead of being +* reaped (when so_perfilestate is empty) to hold the last close replay. +* reaped by laundramat thread after lease period. */ struct nfs4_stateowner { struct list_head so_idhash; /* hash by so_id */ struct list_head so_strhash; /* hash by op_name */ struct list_head so_perclient; /* nfs4_client->cl_perclient */ struct list_head so_perfilestate; /* list: nfs4_stateid */ + struct list_head so_perlockowner; /* nfs4_stateid->st_perlockowner */ + struct list_head so_close_lru; /* tail queue */ + time_t so_time; /* time of placement on so_close_lru */ int so_is_open_owner; /* 1=openowner,0=lockowner */ u32 so_id; struct nfs4_client * so_client; @@ -164,21 +174,23 @@ struct nfs4_file { * st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry * st_perfile: file_hashtbl[] entry. * st_perfile_state: nfs4_stateowner->so_perfilestate -* st_share_access: used only for open stateid -* st_share_deny: used only for open stateid +* st_perlockowner: (open stateid) list of lock nfs4_stateowners +* st_access_bmap: used only for open stateid +* st_deny_bmap: used only for open stateid */ struct nfs4_stateid { struct list_head st_hash; struct list_head st_perfile; struct list_head st_perfilestate; + struct list_head st_perlockowner; struct nfs4_stateowner * st_stateowner; struct nfs4_file * st_file; stateid_t st_stateid; struct file st_vfs_file; int st_vfs_set; - unsigned int st_share_access; - unsigned int st_share_deny; + unsigned long st_access_bmap; + unsigned long st_deny_bmap; }; /* flags for preprocess_seqid_op() */ @@ -187,6 +199,7 @@ struct nfs4_stateid { #define OPEN_STATE 0x00000004 #define LOCK_STATE 0x00000008 #define RDWR_STATE 0x00000010 +#define CLOSE_STATE 0x00000020 #define seqid_mutating_err(err) \ (((err) != nfserr_stale_clientid) && \ diff -puN fs/nfsd/Makefile~CITI_NFS4_ALL fs/nfsd/Makefile --- linux-2.6.3/fs/nfsd/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/Makefile 2004-02-19 16:47:06.000000000 -0500 @@ -7,5 +7,5 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o -nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o +nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o nfsd-objs := $(nfsd-y) diff -puN /dev/null fs/nfsd/nfs4idmap.c --- /dev/null 2004-01-26 19:20:21.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/nfs4idmap.c 2004-02-19 16:47:06.000000000 -0500 @@ -0,0 +1,569 @@ +/* + * fs/nfsd/nfs4idmap.c + * + * Mapping of UID/GIDs to name and vice versa. + * + * Copyright (c) 2002, 2003 The Regents of the University of + * Michigan. All rights reserved. + * + * Marius Aamodt Eriksen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Cache entry + */ + +/* + * XXX we know that IDMAP_NAMESZ < PAGE_SIZE, but it's ugly to rely on + * that. + */ + +#define IDMAP_TYPE_USER 0 +#define IDMAP_TYPE_GROUP 1 + +struct ent { + struct cache_head h; + int type; /* User / Group */ + uid_t id; + char name[IDMAP_NAMESZ]; + char authname[IDMAP_NAMESZ]; +}; + +#define DefineSimpleCacheLookupMap(STRUCT, FUNC) \ + DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ + (struct STRUCT *item, int set), /*no setup */, \ + & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ + STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0) + +/* Common entry handling */ + +#define ENT_HASHBITS 8 +#define ENT_HASHMAX (1 << ENT_HASHBITS) +#define ENT_HASHMASK (ENT_HASHMAX - 1) + +static inline void +ent_init(struct ent *new, struct ent *itm) +{ + new->id = itm->id; + new->type = itm->type; + + strlcpy(new->name, itm->name, sizeof(new->name)); + strlcpy(new->authname, itm->authname, sizeof(new->name)); +} + +static inline void +ent_update(struct ent *new, struct ent *itm) +{ + ent_init(new, itm); +} + +void +ent_put(struct cache_head *ch, struct cache_detail *cd) +{ + if (cache_put(ch, cd)) { + struct ent *map = container_of(ch, struct ent, h); + kfree(map); + } +} + +/* + * ID -> Name cache + */ + +static struct cache_head *idtoname_table[ENT_HASHMAX]; + +static uint32_t +idtoname_hash(struct ent *ent) +{ + uint32_t hash; + + hash = hash_str(ent->authname, ENT_HASHBITS); + hash = hash_long(hash ^ ent->id, ENT_HASHBITS); + + /* Flip LSB for user/group */ + if (ent->type == IDMAP_TYPE_GROUP) + hash ^= 1; + + return hash; +} + +static void +idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, + int *blen) +{ + struct ent *ent = container_of(ch, struct ent, h); + char idstr[11]; + + qword_add(bpp, blen, ent->authname); + snprintf(idstr, sizeof(idstr), "%d", ent->id); + qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user"); + qword_add(bpp, blen, idstr); + + (*bpp)[-1] = '\n'; +} + +static inline int +idtoname_match(struct ent *a, struct ent *b) +{ + return (a->id == b->id && a->type == b->type && + strcmp(a->authname, b->authname) == 0); +} + +static int +idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) +{ + struct ent *ent; + + if (h == NULL) { + seq_puts(m, "#domain type id [name]\n"); + return 0; + } + ent = container_of(h, struct ent, h); + seq_printf(m, "%s %s %d", ent->authname, + ent->type == IDMAP_TYPE_GROUP ? "group" : "user", + ent->id); + if (test_bit(CACHE_VALID, &h->flags)) + seq_printf(m, " %s", ent->name); + seq_printf(m, "\n"); + return 0; +} + +static int idtoname_parse(struct cache_detail *, char *, int); +static struct ent *idtoname_lookup(struct ent *, int); + +struct cache_detail idtoname_cache = { + .hash_size = ENT_HASHMAX, + .hash_table = idtoname_table, + .name = "nfs4.idtoname", + .cache_put = ent_put, + .cache_request = idtoname_request, + .cache_parse = idtoname_parse, + .cache_show = idtoname_show, +}; + +int +idtoname_parse(struct cache_detail *cd, char *buf, int buflen) +{ + struct ent ent, *res; + char *buf1, *bp; + int error = -EINVAL; + + if (buf[buflen - 1] != '\n') + return (-EINVAL); + buf[buflen - 1]= '\0'; + + buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf1 == NULL) + return (-ENOMEM); + + memset(&ent, 0, sizeof(ent)); + + /* Authentication name */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + memcpy(ent.authname, buf1, sizeof(ent.authname)); + + /* Type */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.type = strcmp(buf1, "user") == 0 ? + IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; + + /* ID */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.id = simple_strtoul(buf1, &bp, 10); + if (bp == buf1) + goto out; + + /* expiry */ + ent.h.expiry_time = get_expiry(&buf); + if (ent.h.expiry_time == 0) + goto out; + + /* Name */ + error = qword_get(&buf, buf1, PAGE_SIZE); + if (error == -EINVAL) + goto out; + if (error == -ENOENT) + set_bit(CACHE_NEGATIVE, &ent.h.flags); + else { + if (error >= IDMAP_NAMESZ) { + error = -EINVAL; + goto out; + } + memcpy(ent.name, buf1, sizeof(ent.name)); + } + error = -ENOMEM; + if ((res = idtoname_lookup(&ent, 1)) == NULL) + goto out; + + ent_put(&res->h, &idtoname_cache); + + error = 0; +out: + kfree(buf1); + + return error; +} + +static DefineSimpleCacheLookupMap(ent, idtoname); + +/* + * Name -> ID cache + */ + +static struct cache_head *nametoid_table[ENT_HASHMAX]; + +static inline int +nametoid_hash(struct ent *ent) +{ + return hash_str(ent->name, ENT_HASHBITS); +} + +void +nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, + int *blen) +{ + struct ent *ent = container_of(ch, struct ent, h); + + qword_add(bpp, blen, ent->authname); + qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user"); + qword_add(bpp, blen, ent->name); + + (*bpp)[-1] = '\n'; +} + +static inline int +nametoid_match(struct ent *a, struct ent *b) +{ + return (a->type == b->type && strcmp(a->name, b->name) == 0 && + strcmp(a->authname, b->authname) == 0); +} + +static int +nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) +{ + struct ent *ent; + + if (h == NULL) { + seq_puts(m, "#domain type name [id]\n"); + return 0; + } + ent = container_of(h, struct ent, h); + seq_printf(m, "%s %s %s", ent->authname, + ent->type == IDMAP_TYPE_GROUP ? "group" : "user", + ent->name); + if (test_bit(CACHE_VALID, &h->flags)) + seq_printf(m, " %d", ent->id); + seq_printf(m, "\n"); + return 0; +} + +static struct ent *nametoid_lookup(struct ent *, int); +int nametoid_parse(struct cache_detail *, char *, int); + +struct cache_detail nametoid_cache = { + .hash_size = ENT_HASHMAX, + .hash_table = nametoid_table, + .name = "nfs4.nametoid", + .cache_put = ent_put, + .cache_request = nametoid_request, + .cache_parse = nametoid_parse, + .cache_show = nametoid_show, +}; + +int +nametoid_parse(struct cache_detail *cd, char *buf, int buflen) +{ + struct ent ent, *res; + char *buf1; + int error = -EINVAL; + + if (buf[buflen - 1] != '\n') + return (-EINVAL); + buf[buflen - 1]= '\0'; + + buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf1 == NULL) + return (-ENOMEM); + + memset(&ent, 0, sizeof(ent)); + + /* Authentication name */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + memcpy(ent.authname, buf1, sizeof(ent.authname)); + + /* Type */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.type = strcmp(buf1, "user") == 0 ? + IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; + + /* Name */ + error = qword_get(&buf, buf1, PAGE_SIZE); + if (error <= 0 || error >= IDMAP_NAMESZ) + goto out; + memcpy(ent.name, buf1, sizeof(ent.name)); + + /* expiry */ + ent.h.expiry_time = get_expiry(&buf); + if (ent.h.expiry_time == 0) + goto out; + + /* ID */ + error = get_int(&buf, &ent.id); + if (error == -EINVAL) + goto out; + if (error == -ENOENT) + set_bit(CACHE_NEGATIVE, &ent.h.flags); + + error = -ENOMEM; + if ((res = nametoid_lookup(&ent, 1)) == NULL) + goto out; + + ent_put(&res->h, &nametoid_cache); + error = 0; +out: + kfree(buf1); + + return (error); +} + +static DefineSimpleCacheLookupMap(ent, nametoid); + +/* + * Exported API + */ + +void +nfsd_idmap_init(void) +{ + cache_register(&idtoname_cache); + cache_register(&nametoid_cache); +} + +void +nfsd_idmap_shutdown(void) +{ + cache_unregister(&idtoname_cache); + cache_unregister(&nametoid_cache); +} + +/* + * Deferred request handling + */ + +struct idmap_defer_req { + struct cache_req req; + struct cache_deferred_req deferred_req; + wait_queue_head_t waitq; + atomic_t count; +}; + +static void +put_mdr(struct idmap_defer_req *mdr) +{ + if (atomic_dec_and_test(&mdr->count)) + kfree(mdr); +} + +static void +idmap_revisit(struct cache_deferred_req *dreq, int toomany) +{ + struct idmap_defer_req *mdr = + container_of(dreq, struct idmap_defer_req, deferred_req); + + wake_up(&mdr->waitq); + put_mdr(mdr); +} + +static struct cache_deferred_req * +idmap_defer(struct cache_req *req) +{ + struct idmap_defer_req *mdr = + container_of(req, struct idmap_defer_req, req); + + mdr->deferred_req.revisit = idmap_revisit; + return (&mdr->deferred_req); +} + +static int threads_waiting = 0; + +static inline int +idmap_lookup_wait(struct idmap_defer_req *mdr, wait_queue_t waitq, struct + svc_rqst *rqstp) { + int ret = -ETIMEDOUT; + + set_task_state(current, TASK_INTERRUPTIBLE); + lock_kernel(); + /* XXX: Does it matter that threads_waiting isn't per-server? */ + /* Note: BKL prevents races with nfsd_svc and other lookups */ + if (2 * threads_waiting > rqstp->rq_server->sv_nrthreads) + goto out; + threads_waiting++; + schedule_timeout(10 * HZ); + threads_waiting--; + ret = 0; +out: + unlock_kernel(); + remove_wait_queue(&mdr->waitq, &waitq); + set_task_state(current, TASK_RUNNING); + put_mdr(mdr); + return ret; +} + +static int +idmap_lookup(struct svc_rqst *rqstp, + struct ent *(*lookup_fn)(struct ent *, int), struct ent *key, + struct cache_detail *detail, struct ent **item) +{ + struct idmap_defer_req *mdr; + DECLARE_WAITQUEUE(waitq, current); + int ret; + + *item = lookup_fn(key, 0); + if (!*item) + return -ENOMEM; + mdr = kmalloc(sizeof(*mdr), GFP_KERNEL); + memset(mdr, 0, sizeof(*mdr)); + init_waitqueue_head(&mdr->waitq); + add_wait_queue(&mdr->waitq, &waitq); + atomic_set(&mdr->count, 2); + mdr->req.defer = idmap_defer; + ret = cache_check(detail, &(*item)->h, &mdr->req); + if (ret == -EAGAIN) { + ret = idmap_lookup_wait(mdr, waitq, rqstp); + if (ret) + goto out; + /* Try again, but don't wait. */ + *item = lookup_fn(key, 0); + ret = -ENOMEM; + if (!*item) + goto out; + ret = -ETIMEDOUT; + if (!test_bit(CACHE_VALID, &(*item)->h.flags)) { + ent_put(&(*item)->h, detail); + goto out; + } + ret = cache_check(detail, &(*item)->h, NULL); + } +out: + return ret; +} + +static int +idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, + uid_t *id) +{ + struct ent *item, key = { + .type = type, + }; + int ret; + + if (namelen + 1 > sizeof(key.name)) + return -EINVAL; + memcpy(key.name, name, namelen); + key.name[namelen] = '\0'; + strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); + ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); + if (ret) + return ret; + *id = item->id; + ent_put(&item->h, &nametoid_cache); + return 0; +} + +static int +idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) +{ + struct ent *item, key = { + .id = id, + .type = type, + }; + int ret; + + strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); + ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); + if (ret) + return ret; + ret = strlen(item->name); + BUG_ON(ret > IDMAP_NAMESZ); + memcpy(name, item->name, ret); + ent_put(&item->h, &idtoname_cache); + return ret; +} + +int +nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, + __u32 *id) +{ + return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); +} + +int +nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, + __u32 *id) +{ + return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); +} + +int +nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +{ + return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); +} + +int +nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +{ + return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); +} diff -puN fs/nfsd/nfsctl.c~CITI_NFS4_ALL fs/nfsd/nfsctl.c --- linux-2.6.3/fs/nfsd/nfsctl.c~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/nfsctl.c 2004-02-19 16:47:06.000000000 -0500 @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -436,6 +437,9 @@ static int __init init_nfsd(void) nfsd_cache_init(); /* RPC reply cache */ nfsd_export_init(); /* Exports table */ nfsd_lockd_init(); /* lockd->nfsd callbacks */ +#ifdef CONFIG_NFSD_V4 + nfsd_idmap_init(); /* Name to ID mapping */ +#endif /* CONFIG_NFSD_V4 */ if (proc_mkdir("fs/nfs", 0)) { struct proc_dir_entry *entry; entry = create_proc_entry("fs/nfs/exports", 0, NULL); @@ -462,6 +466,9 @@ static void __exit exit_nfsd(void) remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); nfsd_lockd_shutdown(); +#ifdef CONFIG_NFSD_V4 + nfsd_idmap_shutdown(); +#endif /* CONFIG_NFSD_V4 */ unregister_filesystem(&nfsd_fs_type); } diff -puN fs/nfsd/nfsproc.c~CITI_NFS4_ALL fs/nfsd/nfsproc.c --- linux-2.6.3/fs/nfsd/nfsproc.c~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/nfsproc.c 2004-02-19 16:47:06.000000000 -0500 @@ -585,6 +585,7 @@ nfserrno (int errno) { nfserr_dquot, -EDQUOT }, #endif { nfserr_stale, -ESTALE }, + { nfserr_jukebox, -ETIMEDOUT }, { nfserr_dropit, -EAGAIN }, { nfserr_dropit, -ENOMEM }, { -1, -EIO } diff -puN /dev/null include/linux/nfsd_idmap.h --- /dev/null 2004-01-26 19:20:21.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfsd_idmap.h 2004-02-19 16:47:06.000000000 -0500 @@ -0,0 +1,54 @@ +/* + * include/linux/nfsd_idmap.h + * + * Mapping of UID to name and vice versa. + * + * Copyright (c) 2002, 2003 The Regents of the University of + * Michigan. All rights reserved. +> * + * Marius Aamodt Eriksen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFSD_IDMAP_H +#define LINUX_NFSD_IDMAP_H + +#include +#include + +/* XXX from linux/nfs_idmap.h */ +#define IDMAP_NAMESZ 128 + +void nfsd_idmap_init(void); +void nfsd_idmap_shutdown(void); + +int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); +int nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); +int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); +int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); + +#endif /* LINUX_NFSD_IDMAP_H */ diff -puN include/linux/nfsd/xdr4.h~CITI_NFS4_ALL include/linux/nfsd/xdr4.h --- linux-2.6.3/include/linux/nfsd/xdr4.h~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfsd/xdr4.h 2004-02-19 16:47:10.000000000 -0500 @@ -39,6 +39,8 @@ #ifndef _LINUX_NFSD_XDR4_H #define _LINUX_NFSD_XDR4_H +#include + #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) @@ -54,10 +56,10 @@ typedef struct { struct nfsd4_change_info { u32 atomic; - u32 before_size; - u32 before_ctime; - u32 after_size; - u32 after_ctime; + u32 before_ctime_sec; + u32 before_ctime_nsec; + u32 after_ctime_sec; + u32 after_ctime_nsec; }; struct nfsd4_access { @@ -95,6 +97,7 @@ struct nfsd4_create { u32 cr_bmval[2]; /* request */ struct iattr cr_iattr; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ + struct nfs4_acl *cr_acl; }; #define cr_linklen u.link.namelen #define cr_linkname u.link.name @@ -216,7 +219,7 @@ struct nfsd4_open { u32 op_rflags; /* response */ int op_truncate; /* used during processing */ struct nfs4_stateowner *op_stateowner; /* used during processing */ - + struct nfs4_acl *op_acl; }; #define op_iattr u.iattr #define op_verf u.verf @@ -263,6 +266,10 @@ struct nfsd4_readdir { u32 * offset; }; +struct nfsd4_release_lockowner { + clientid_t rl_clientid; + struct xdr_netobj rl_owner; +}; struct nfsd4_readlink { struct svc_rqst *rl_rqstp; /* request */ struct svc_fh * rl_fhp; /* request */ @@ -287,6 +294,7 @@ struct nfsd4_setattr { stateid_t sa_stateid; /* request */ u32 sa_bmval[2]; /* request */ struct iattr sa_iattr; /* request */ + struct nfs4_acl *sa_acl; }; struct nfsd4_setclientid { @@ -359,6 +367,7 @@ struct nfsd4_op { struct nfsd4_setclientid_confirm setclientid_confirm; struct nfsd4_verify verify; struct nfsd4_write write; + struct nfsd4_release_lockowner release_lockowner; } u; struct nfs4_replay * replay; }; @@ -373,9 +382,12 @@ struct nfsd4_compoundargs { u32 * tmpp; struct tmpbuf { struct tmpbuf *next; + void (*release)(const void *); void *buf; } *to_free; - + + struct svc_rqst *rqstp; + u32 taglen; char * tag; u32 minorversion; @@ -404,10 +416,10 @@ set_change_info(struct nfsd4_change_info { BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved); cinfo->atomic = 1; - cinfo->before_size = fhp->fh_pre_size; - cinfo->before_ctime = fhp->fh_pre_ctime.tv_sec; - cinfo->after_size = fhp->fh_post_size; - cinfo->after_ctime = fhp->fh_post_ctime.tv_sec; + cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; + cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; + cinfo->after_ctime_sec = fhp->fh_post_ctime.tv_sec; + cinfo->after_ctime_nsec = fhp->fh_post_ctime.tv_nsec; } int nfs4svc_encode_voidres(struct svc_rqst *, u32 *, void *); @@ -419,7 +431,7 @@ void nfsd4_encode_operation(struct nfsd4 void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); int nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, u32 *buffer, int *countp, - u32 *bmval); + u32 *bmval, struct svc_rqst *); extern int nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid); extern int nfsd4_setclientid_confirm(struct svc_rqst *rqstp, @@ -439,6 +451,9 @@ extern int nfsd4_lockt(struct svc_rqst * struct nfsd4_lockt *lockt); extern int nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku); +extern int nfsd4_release_lockowner(struct svc_rqst *rqstp, + struct nfsd4_release_lockowner *rlockowner); +extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *); #endif /* diff -puN -L include/linux/sunrpc/name_lookup.h include/linux/sunrpc/name_lookup.h~CITI_NFS4_ALL /dev/null --- linux-2.6.3/include/linux/sunrpc/name_lookup.h +++ /dev/null 2004-01-26 19:20:21.000000000 -0500 @@ -1,38 +0,0 @@ - -/* - * map between user/group name and id for a given 'client' - */ - -struct name_ent { - char name[20]; -}; -static inline int name_get_user(int uid, struct name_ent **namep) -{ - struct name_ent *n = kmalloc(sizeof(*n),GFP_KERNEL); - if (n) sprintf(n->name, "%d",uid); - *namep = n; - return n ? 0 : -ENOMEM; -} -static inline int name_get_group(int uid, struct name_ent **namep) -{ - struct name_ent *n = kmalloc(sizeof(*n),GFP_KERNEL); - if (n) sprintf(n->name, "%d",uid); - *namep = n; - return n ? 0 : -ENOMEM; -} -static inline int name_get_uid(char *name, int name_len, int *uidp) -{ - *uidp = simple_strtoul(name, NULL, 0); - return 0; -} - -static inline int name_get_gid(char *name, int name_len, int *gidp) -{ - *gidp = simple_strtoul(name, NULL, 0); - return 0; -} - -static inline void name_put(struct name_ent *ent) -{ - kfree(ent); -} diff -puN fs/Makefile~CITI_NFS4_ALL fs/Makefile --- linux-2.6.3/fs/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 +++ linux-2.6.3-bfields/fs/Makefile 2004-02-19 16:47:06.000000000 -0500 @@ -68,6 +68,7 @@ obj-$(CONFIG_NFS_FS) += nfs/ obj-$(CONFIG_EXPORTFS) += exportfs/ obj-$(CONFIG_NFSD) += nfsd/ obj-$(CONFIG_LOCKD) += lockd/ +obj-$(CONFIG_NFS_V4_ACL) += nfs4acl/ obj-$(CONFIG_NLS) += nls/ obj-$(CONFIG_SYSV_FS) += sysv/ obj-$(CONFIG_SMB_FS) += smbfs/ diff -puN /dev/null fs/nfs4acl/acl.c --- /dev/null 2004-01-26 19:20:21.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs4acl/acl.c 2004-02-19 16:47:06.000000000 -0500 @@ -0,0 +1,921 @@ +/* + * fs/nfs4acl/acl.c + * + * Common NFSv4 ACL handling code. + * + * Copyright (c) 2002, 2003 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen + * Jeff Sedlak + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFS4_READ_MODE (NFS4_ACE_READ_DATA | NFS4_ACE_READ_NAMED_ATTRS) +#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_WRITE_NAMED_ATTRS | NFS4_ACE_APPEND_DATA | NFS4_ACE_DELETE_CHILD) +#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE +#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL) +#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL) + +#define MASK_EQUAL(mask1, mask2) \ + ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) + +static u32 +mask_from_posix(unsigned short perm, int owner) +{ + int mask = NFS4_ANYONE_MODE; + if (owner) + mask |= NFS4_OWNER_MODE; + if (perm & ACL_READ) + mask |= NFS4_READ_MODE; + if (perm & ACL_WRITE) + mask |= NFS4_WRITE_MODE; + if (perm & ACL_EXECUTE) + mask |= NFS4_EXECUTE_MODE; + return mask; +} + +static int +mode_from_nfs4(u32 perm, unsigned short *mode, int owner) +{ + /* XXX we might also want to ignore DELETE_CHILD on non-directories */ + /* XXX also add special interpretation to EXECUTE on directories */ + u32 ignore = NFS4_ACE_SYNCHRONIZE; + + *mode = 0; + if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) + *mode |= ACL_READ; + if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE) + *mode |= ACL_WRITE; + if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) + *mode |= ACL_EXECUTE; + if (!MASK_EQUAL(ignore|perm, ignore|mask_from_posix(*mode, owner))) + return -EINVAL; + return 0; +} + +struct ace_container { + struct nfs4_ace *ace; + struct list_head ace_l; +}; + +static short ace2type(struct nfs4_ace *); +static int _posix_to_nfsv4_one(struct nfs4_acl_idmapper *, void *idarg, struct posix_acl *, struct nfs4_acl *, int); +static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl_idmapper *, void *idarg, struct nfs4_acl *); + +struct nfs4_acl * +nfs4_acl_posix_to_nfsv4(struct nfs4_acl_idmapper *idmapper, void *idarg, + struct posix_acl *pacl, struct posix_acl *dpacl) +{ + struct nfs4_acl *acl; + int error = -EINVAL; + + if ((pacl != NULL && + (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) || + (dpacl != NULL && + (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0))) + goto out_err; + + acl = nfs4_acl_new(); + if (acl == NULL) { + error = -ENOMEM; + goto out_err; + } + + if (pacl != NULL) { + error = _posix_to_nfsv4_one(idmapper, idarg, pacl, acl, 0); + if (error < 0) + goto out_acl; + } + + if (dpacl != NULL) { + error = _posix_to_nfsv4_one(idmapper, idarg, dpacl, acl, + NFS4_ACE_FILE_INHERIT_ACE | + NFS4_ACE_DIRECTORY_INHERIT_ACE | + NFS4_ACE_INHERIT_ONLY_ACE); + if (error < 0) + goto out_acl; + } + + return acl; + +out_acl: + nfs4_acl_free(acl); +out_err: + acl = ERR_PTR(error); + + return acl; +} + +static int +nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, char *owner, + int owner_len) +{ + int error; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + eflag, mask, owner, owner_len); + if (error < 0) + return error; + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + eflag, ~mask, owner, owner_len); + return error; +} + +/* We assume the acl has been verified with posix_acl_valid. */ +static int +_posix_to_nfsv4_one(struct nfs4_acl_idmapper *idmapper, void *idarg, + struct posix_acl *pacl, struct nfs4_acl *acl, int eflag) +{ + struct posix_acl_entry *pa, *pe, *group_owner_entry; + int error = -EINVAL; + u32 mask, mask_mask; + char xname[IDMAP_NAMESZ]; + int xnamelen; + + BUG_ON(pacl->a_count < 3); + pe = pacl->a_entries + pacl->a_count; + pa = pe - 2; /* if mask entry exists, it's second from the last. */ + if (pa->e_tag == ACL_MASK) + mask_mask = ~mask_from_posix(pa->e_perm, 0); + else + mask_mask = 0; + + pa = pacl->a_entries; + BUG_ON(pa->e_tag != ACL_USER_OBJ); + mask = mask_from_posix(pa->e_perm, 1); + error = nfs4_acl_add_pair(acl, eflag, mask, "OWNER@", + sizeof("OWNER@") - 1); + if (error < 0) + goto out; + pa++; + + while (pa->e_tag == ACL_USER) { + mask = mask_from_posix(pa->e_perm, 0); + error = idmapper->uid2name(idarg, pa->e_id, xname); + if (error < 0) + goto out; + xnamelen = error; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + eflag, mask_mask, xname, xnamelen); + if (error < 0) + goto out; + + + error = nfs4_acl_add_pair(acl, eflag, mask, xname, xnamelen); + if (error < 0) + goto out; + pa++; + } + + /* In the case of groups, we apply allow ACEs first, then deny ACEs, + * since a user can be in more than one group. */ + + /* allow ACEs */ + + if (pacl->a_count > 3) { + BUG_ON(pa->e_tag != ACL_GROUP_OBJ); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, + "GROUP@", sizeof("GROUP@") - 1); + if (error < 0) + goto out; + } + group_owner_entry = pa; + mask = mask_from_posix(pa->e_perm, 0); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, + "GROUP@", sizeof("GROUP@") - 1); + if (error < 0) + goto out; + pa++; + + while (pa->e_tag == ACL_GROUP) { + mask = mask_from_posix(pa->e_perm, 0); + error = idmapper->gid2name(idarg, pa->e_id, xname); + if (error < 0) + goto out; + xnamelen = error; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, + mask_mask, xname, xnamelen); + if (error < 0) + goto out; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, xname, xnamelen); + if (error < 0) + goto out; + pa++; + } + + /* deny ACEs */ + + pa = group_owner_entry; + mask = mask_from_posix(pa->e_perm, 0); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, + ~mask, "GROUP@", sizeof("GROUP@") - 1); + if (error < 0) + goto out; + pa++; + while (pa->e_tag == ACL_GROUP) { + mask = mask_from_posix(pa->e_perm, 0); + error = idmapper->gid2name(idarg, pa->e_id, xname); + if (error < 0) + goto out; + xnamelen = error; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, ~mask, xname, xnamelen); + if (error < 0) + goto out; + pa++; + } + + if (pa->e_tag == ACL_MASK) + pa++; + BUG_ON(pa->e_tag != ACL_OTHER); + mask = mask_from_posix(pa->e_perm, 0); + error = nfs4_acl_add_pair(acl, eflag, mask, "EVERYONE@", + sizeof("EVERYONE@") - 1); + +out: + return error; +} + +static void +sort_pacl_range(struct posix_acl *pacl, int start, int end) { + int sorted = 0, i; + struct posix_acl_entry tmp; + + /* We just do a bubble sort; easy to do in place, and we're not + * expecting acl's to be long enough to justify anything more. */ + while (!sorted) { + sorted = 1; + for (i = start; i < end; i++) { + if (pacl->a_entries[i].e_id + > pacl->a_entries[i+1].e_id) { + sorted = 0; + tmp = pacl->a_entries[i]; + pacl->a_entries[i] = pacl->a_entries[i+1]; + pacl->a_entries[i+1] = tmp; + } + } + } +} + +static void +sort_pacl(struct posix_acl *pacl) +{ + /* posix_acl_valid requires that users and groups be in order + * by uid/gid. */ + int i, j; + + if (pacl->a_count <= 4) + return; /* no users or groups */ + i = 1; + while (pacl->a_entries[i].e_tag == ACL_USER) + i++; + sort_pacl_range(pacl, 1, i-1); + + BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); + j = i++; + while (pacl->a_entries[j].e_tag == ACL_GROUP) + j++; + sort_pacl_range(pacl, i, j-1); + return; +} + +static int +write_pace(struct nfs4_ace *ace, struct posix_acl *pacl, + struct posix_acl_entry **pace, short tag, + struct nfs4_acl_idmapper *idmapper, void *idarg) +{ + struct posix_acl_entry *this = *pace;; + + if (*pace == pacl->a_entries + pacl->a_count) + return -EINVAL; /* fell off the end */ + (*pace)++; + this->e_tag = tag; + if (mode_from_nfs4(ace->access_mask, &this->e_perm, + tag == ACL_USER_OBJ)) + return -EINVAL; + switch (tag) { + case ACL_USER: + return idmapper->name2uid(idarg, ace->who, ace->wholen, + &this->e_id); + case ACL_GROUP: + return idmapper->name2gid(idarg, ace->who, ace->wholen, + &this->e_id); + default: + this->e_id = ACL_UNDEFINED_ID; + return 0; + } +} + +static struct nfs4_ace * +get_next_v4_ace(struct list_head **p, struct list_head *head) +{ + struct nfs4_ace *ace; + + *p = (*p)->next; + if (*p == head) + return NULL; + ace = list_entry(*p, struct nfs4_ace, l_ace); + + return ace; +} + +int +nfs4_acl_nfsv4_to_posix(struct nfs4_acl_idmapper *idmapper, void *idarg, + struct nfs4_acl *acl, struct posix_acl **pacl, + struct posix_acl **dpacl) +{ + struct nfs4_acl *dacl; + int error = -ENOMEM; + + *pacl = NULL; + *dpacl = NULL; + + dacl = nfs4_acl_new(); + if (dacl == NULL) + goto out; + + error = nfs4_acl_split(acl, dacl); + if (error < 0) + goto out_acl; + + if (pacl != NULL) { + if (acl->naces == 0) { + error = -ENODATA; + goto try_dpacl; + } + + *pacl = _nfsv4_to_posix_one(idmapper, idarg, acl); + if (IS_ERR(*pacl)) { + error = PTR_ERR(*pacl); + *pacl = NULL; + goto out_acl; + } + } + +try_dpacl: + if (dpacl != NULL) { + if (dacl->naces == 0) { + if (pacl == NULL || *pacl == NULL) + error = -ENODATA; + goto out_acl; + } + + error = 0; + *dpacl = _nfsv4_to_posix_one(idmapper, idarg, dacl); + if (IS_ERR(*dpacl)) { + error = PTR_ERR(*dpacl); + *dpacl = NULL; + goto out_acl; + } + } + +out_acl: + if (error && pacl) { + posix_acl_release(*pacl); + *pacl = NULL; + } + nfs4_acl_free(dacl); +out: + return error; +} + +static int +complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny) +{ + return MASK_EQUAL(allow->access_mask, ~deny->access_mask) && + allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && + deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE && + allow->flag == deny->flag && + allow->wholen == deny->wholen && + memcmp(allow->who, deny->who, allow->wholen) == 0; +} + +static inline int +user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct posix_acl *pacl, struct posix_acl_entry **pace, + struct nfs4_acl_idmapper *idmapper, void *idarg) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_USER_OBJ) + goto out; + error = write_pace(ace, pacl, pace, ACL_USER_OBJ, idmapper, idarg); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2)) + goto out; + error = 0; +out: + return error; +} + +static inline int +users_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + struct nfs4_acl_idmapper *idmapper, void *idarg) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + while (ace2type(ace) == ACL_USER) { + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) + goto out; + if (*mask_ace && + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + error = write_pace(ace, pacl, pace, ACL_USER, idmapper, idarg); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2)) + goto out; + if ((*mask_ace)->flag != ace2->flag || + ace2->wholen != (*mask_ace)->wholen || + memcmp(ace2->who, (*mask_ace)->who, + (*mask_ace)->wholen) != 0) + goto out; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + } + error = 0; +out: + return error; +} + +static inline int +group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + struct nfs4_acl_idmapper *idmapper, void *idarg) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + struct ace_container *ac; + struct list_head group_l; + + INIT_LIST_HEAD(&group_l); + ace = list_entry(*p, struct nfs4_ace, l_ace); + + /* group owner (mask and allow aces) */ + + if (pacl->a_count != 3) { + /* then the group owner should be preceded by mask */ + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) + goto out; + if (*mask_ace && + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + + if ((*mask_ace)->flag != ace->flag || + ace->wholen != (*mask_ace)->wholen || + memcmp(ace->who, (*mask_ace)->who, + (*mask_ace)->wholen) != 0) + goto out; + } + + if (ace2type(ace) != ACL_GROUP_OBJ) + goto out; + + ac = kmalloc(sizeof(*ac), GFP_KERNEL); + error = -ENOMEM; + if (ac == NULL) + goto out; + ac->ace = ace; + list_add_tail(&ac->ace_l, &group_l); + + error = -EINVAL; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + + error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, idmapper, idarg); + if (error < 0) + goto out; + + error = -EINVAL; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + + /* groups (mask and allow aces) */ + + while (ace2type(ace) == ACL_GROUP) { + if (*mask_ace == NULL) + goto out; + + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE || + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + ac = kmalloc(sizeof(*ac), GFP_KERNEL); + error = -ENOMEM; + if (ac == NULL) + goto out; + error = -EINVAL; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE || + ace->wholen != (*mask_ace)->wholen || + memcmp(ace->who, (*mask_ace)->who, (*mask_ace)->wholen) != 0) + goto out; + + ac->ace = ace; + list_add_tail(&ac->ace_l, &group_l); + + error = write_pace(ace, pacl, pace, ACL_GROUP, idmapper, idarg); + if (error < 0) + goto out; + error = -EINVAL; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + } + + /* group owner (deny ace) */ + + if (ace2type(ace) != ACL_GROUP_OBJ) + goto out; + ac = list_entry(group_l.next, struct ace_container, ace_l); + ace2 = ac->ace; + if (!complementary_ace_pair(ace2, ace)) + goto out; + list_del(group_l.next); + kfree(ac); + + /* groups (deny aces) */ + + while (!list_empty(&group_l)) { + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_GROUP) + goto out; + ac = list_entry(group_l.next, struct ace_container, ace_l); + ace2 = ac->ace; + if (!complementary_ace_pair(ace2, ace)) + goto out; + list_del(group_l.next); + kfree(ac); + } + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_OTHER) + goto out; + error = 0; +out: + while (!list_empty(&group_l)) { + ac = list_entry(group_l.next, struct ace_container, ace_l); + list_del(group_l.next); + kfree(ac); + } + return error; +} + +static inline int +mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + struct nfs4_acl_idmapper *idmapper, void *idarg) +{ + int error = -EINVAL; + struct nfs4_ace *ace; + + ace = list_entry(*p, struct nfs4_ace, l_ace); + if (pacl->a_count != 3) { + if (*mask_ace == NULL) + goto out; + (*mask_ace)->access_mask = ~(*mask_ace)->access_mask; + write_pace(*mask_ace, pacl, pace, ACL_MASK, idmapper, idarg); + } + error = 0; +out: + return error; +} + +static inline int +other_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct posix_acl *pacl, struct posix_acl_entry **pace, + struct nfs4_acl_idmapper *idmapper, void *idarg) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = list_entry(*p, struct nfs4_ace, l_ace); + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + error = write_pace(ace, pacl, pace, ACL_OTHER, idmapper, idarg); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2)) + goto out; + error = 0; +out: + return error; +} + +static int +calculate_posix_ace_count(struct nfs4_acl *n4acl) +{ + if (n4acl->naces == 6) /* owner, owner group, and other only */ + return 3; + else { /* Otherwise there must be a mask entry. */ + /* Also, the remaining entries are for named users and + * groups, and come in threes (mask, allow, deny): */ + if ( (n4acl->naces - 7) % 3) + return -1; + return 4 + (n4acl->naces - 7)/3; + } +} + + +static struct posix_acl * +_nfsv4_to_posix_one(struct nfs4_acl_idmapper *idmapper, void *idarg, struct nfs4_acl *n4acl) +{ + struct posix_acl *pacl; + int error = -EINVAL, nace = 0; + struct list_head *p; + struct nfs4_ace *mask_ace = NULL; + struct posix_acl_entry *pace; + + nace = calculate_posix_ace_count(n4acl); + + pacl = posix_acl_alloc(nace, GFP_KERNEL); + error = -ENOMEM; + if (pacl == NULL) + goto out_err; + + pace = &pacl->a_entries[0]; + p = &n4acl->ace_head; + + error = user_obj_from_v4(n4acl, &p, pacl, &pace, idmapper, idarg); + if (error) + goto out_acl; + + error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, idmapper, + idarg); + if (error) + goto out_acl; + + error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace, + idmapper, idarg); + if (error) + goto out_acl; + + error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, idmapper, idarg); + if (error) + goto out_acl; + error = other_from_v4(n4acl, &p, pacl, &pace, idmapper, idarg); + if (error) + goto out_acl; + + error = -EINVAL; + if (p->next != &n4acl->ace_head) + goto out_acl; + if (pace != pacl->a_entries + pacl->a_count) + goto out_acl; + + sort_pacl(pacl); + + return pacl; +out_acl: + posix_acl_release(pacl); +out_err: + pacl = ERR_PTR(error); + return pacl; +} + + +struct nfs4_acl * +nfs4_acl_new(void) +{ + struct nfs4_acl *acl; + + if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL) + return NULL; + + acl->naces = 0; + INIT_LIST_HEAD(&acl->ace_head); + + return acl; +} + +void +nfs4_acl_free(struct nfs4_acl *acl) +{ + struct list_head *h; + struct nfs4_ace *ace; + + if (!acl) + return; + + while (!list_empty(&acl->ace_head)) { + h = acl->ace_head.next; + list_del(h); + ace = list_entry(h, struct nfs4_ace, l_ace); + if (ace->who != NULL) + kfree(ace->who); + kfree(ace); + } + + kfree(acl); + + return; +} + +int +nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, + char *who, u32 wholen) +{ + struct nfs4_ace *ace; + + if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) + return -1; + + ace->type = type; + ace->flag = flag; + ace->access_mask = access_mask; + if (wholen > 0) { + if ((ace->who = kmalloc(wholen, GFP_KERNEL)) == NULL) + goto fail; + memcpy(ace->who, who, wholen); + } + ace->wholen = wholen; + + list_add_tail(&ace->l_ace, &acl->ace_head); + + return ++acl->naces; /* XXXJBF: why? */ + +fail: + kfree(ace); + return -1; +} + + +int +nfs4_acl_merge(struct nfs4_acl *fromacl, struct nfs4_acl *withacl) +{ + struct nfs4_ace *ace; + struct list_head *h; + + if (fromacl == NULL || withacl == NULL) + return 0; + + while (!list_empty(&fromacl->ace_head)) { + h = fromacl->ace_head.next; + list_del(h); + ace = list_entry(h, struct nfs4_ace, l_ace); + /* XXX */ + ace->flag |= NFS4_ACE_FILE_INHERIT_ACE | + NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE; + list_add_tail(&ace->l_ace, &withacl->ace_head); + withacl->naces++; + } + + return 0; +} + +int +nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) +{ + struct list_head *h, *n; + struct nfs4_ace *ace; + int error = 0; + + list_for_each_safe(h, n, &acl->ace_head) { + ace = list_entry(h, struct nfs4_ace, l_ace); + + if (!(ace->flag & NFS4_ACE_DIRECTORY_INHERIT_ACE && + ace->flag & NFS4_ACE_FILE_INHERIT_ACE && + ace->flag & NFS4_ACE_INHERIT_ONLY_ACE)) + continue; + + error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, + ace->access_mask, ace->who, ace->wholen) == -1; + if (error < 0) + goto out; + + list_del(h); + if (ace->who != NULL) + kfree(ace->who); + kfree(ace); + acl->naces--; + } + +out: + return error; +} + +static struct { + char *string; + int stringlen; + short type; +} s2t_map[] = { + { + .string = "OWNER@", + .stringlen = sizeof("OWNER@") - 1, + .type = ACL_USER_OBJ + }, + { + .string = "GROUP@", + .stringlen = sizeof("GROUP@") - 1, + .type = ACL_GROUP_OBJ + }, + { + .string = "EVERYONE@", + .stringlen = sizeof("EVERYONE@") - 1, + .type = ACL_OTHER + }, +}; + +static short +ace2type(struct nfs4_ace *ace) +{ + int i; + + if (ace->who == NULL || ace->wholen <= 0) + return (0); + + for (i = 0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) + if (s2t_map[i].stringlen == ace->wholen && + strncmp(s2t_map[i].string, ace->who, ace->wholen) == 0) + return (s2t_map[i].type); + + return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ? ACL_GROUP : ACL_USER); +} diff -puN /dev/null fs/nfs4acl/acl_syms.c --- /dev/null 2004-01-26 19:20:21.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs4acl/acl_syms.c 2004-02-19 16:47:06.000000000 -0500 @@ -0,0 +1,51 @@ +/* + * fs/nfs4acl/acl_syms.c + * + * Common NFSv4 ACL handling symbol exports. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include + +#include +#include +#include +#include +#include + +EXPORT_SYMBOL(nfs4_acl_new); +EXPORT_SYMBOL(nfs4_acl_free); +EXPORT_SYMBOL(nfs4_acl_merge); +EXPORT_SYMBOL(nfs4_acl_split); +EXPORT_SYMBOL(nfs4_acl_add_ace); diff -puN /dev/null fs/nfs4acl/Makefile --- /dev/null 2004-01-26 19:20:21.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs4acl/Makefile 2004-02-19 16:47:06.000000000 -0500 @@ -0,0 +1,3 @@ +obj-$(CONFIG_NFS_V4_ACL) += nfs4acl.o + +nfs4acl-objs := acl.o acl_syms.o diff -puN /dev/null include/linux/nfs4_acl.h --- /dev/null 2004-01-26 19:20:21.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfs4_acl.h 2004-02-19 16:47:06.000000000 -0500 @@ -0,0 +1,68 @@ +/* + * include/linux/nfs4_acl.c + * + * Common NFSv4 ACL handling definitions. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFS4_ACL_H +#define LINUX_NFS4_ACL_H + +#include + +#define NFS4_ACL_TYPE_ACCESS 0 +#define NFS4_ACL_TYPE_DEFAULT 1 + +/* XXX from include/linux/nfs_idmap.h: */ +#define IDMAP_NAMESZ 128 + +struct nfs4_acl_idmapper { + int (*name2uid)(void *, const char *, size_t len, __u32 *); + int (*name2gid)(void *, const char *, size_t len, __u32 *); + int (*uid2name)(void *, __u32, char *); + int (*gid2name)(void *, __u32, char *); +}; + +struct nfs4_acl *nfs4_acl_new(void); +void nfs4_acl_free(struct nfs4_acl *); +int nfs4_acl_merge(struct nfs4_acl *, struct nfs4_acl *); +int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); +int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, + u32, char *, u32); +void nfs4_acl_print(struct nfs4_acl *); +struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct nfs4_acl_idmapper *, void *, + struct posix_acl *, struct posix_acl *); +int nfs4_acl_nfsv4_to_posix(struct nfs4_acl_idmapper *, void *, + struct nfs4_acl *, struct posix_acl **, + struct posix_acl **); + +#endif /* LINUX_NFS4_ACL_H */ diff -puN include/linux/nfs4.h~CITI_NFS4_ALL include/linux/nfs4.h --- linux-2.6.3/include/linux/nfs4.h~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfs4.h 2004-02-19 16:47:14.000000000 -0500 @@ -37,14 +37,67 @@ #define NFS4_SHARE_ACCESS_BOTH 0x0003 #define NFS4_SHARE_DENY_READ 0x0001 #define NFS4_SHARE_DENY_WRITE 0x0002 +#define NFS4_SHARE_DENY_BOTH 0x0003 #define NFS4_SET_TO_SERVER_TIME 0 #define NFS4_SET_TO_CLIENT_TIME 1 -#define NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE 0 -#define NFS4_ACE_ACCESS_DENIED_ACE_TYPE 1 -#define NFS4_ACE_SYSTEM_AUDIT_ACE_TYPE 2 -#define NFS4_ACE_SYSTEM_ALARM_ACE_TYPE 3 +#define ACL4_SUPPORT_ALLOW_ACL 0x00000001 +#define ACL4_SUPPORT_DENY_ACL 0x00000002 +#define ACL4_SUPPORT_AUDIT_ACL 0x00000004 +#define ACL4_SUPPORT_ALARM_ACL 0x00000008 + +#define NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE 0x00000000 +#define NFS4_ACE_ACCESS_DENIED_ACE_TYPE 0x00000001 +#define NFS4_ACE_SYSTEM_AUDIT_ACE_TYPE 0x00000002 +#define NFS4_ACE_SYSTEM_ALARM_ACE_TYPE 0x00000003 + +#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001 +#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002 +#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004 +#define NFS4_ACE_INHERIT_ONLY_ACE 0x00000008 +#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010 +#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020 +#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040 +#define NFS4_ACE_OWNER 0x00000080 +#define NFS4_ACE_GROUP 0x00000100 +#define NFS4_ACE_EVERYONE 0x00000200 + +#define NFS4_ACE_READ_DATA 0x00000001 +#define NFS4_ACE_LIST_DIRECTORY 0x00000001 +#define NFS4_ACE_WRITE_DATA 0x00000002 +#define NFS4_ACE_ADD_FILE 0x00000002 +#define NFS4_ACE_APPEND_DATA 0x00000004 +#define NFS4_ACE_ADD_SUBDIRECTORY 0x00000004 +#define NFS4_ACE_READ_NAMED_ATTRS 0x00000008 +#define NFS4_ACE_WRITE_NAMED_ATTRS 0x00000010 +#define NFS4_ACE_EXECUTE 0x00000020 +#define NFS4_ACE_DELETE_CHILD 0x00000040 +#define NFS4_ACE_READ_ATTRIBUTES 0x00000080 +#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100 +#define NFS4_ACE_DELETE 0x00010000 +#define NFS4_ACE_READ_ACL 0x00020000 +#define NFS4_ACE_WRITE_ACL 0x00040000 +#define NFS4_ACE_WRITE_OWNER 0x00080000 +#define NFS4_ACE_SYNCHRONIZE 0x00100000 +#define NFS4_ACE_GENERIC_READ 0x00120081 +#define NFS4_ACE_GENERIC_WRITE 0x00160106 +#define NFS4_ACE_GENERIC_EXECUTE 0x001200A0 +#define NFS4_ACE_MASK_ALL 0x001F01FF + +struct nfs4_ace { + u32 type; + u32 flag; + u32 access_mask; + char *who; + u32 wholen; + struct list_head l_ace; +}; + +struct nfs4_acl { + u32 naces; + struct list_head ace_head; +}; typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; typedef struct { char data[16]; } nfs4_stateid; @@ -86,6 +139,8 @@ enum nfs_opnum4 { OP_SETCLIENTID_CONFIRM = 36, OP_VERIFY = 37, OP_WRITE = 38, + OP_RELEASE_LOCKOWNER = 39, + OP_ILLEGAL = 10044, }; enum nfsstat4 { @@ -283,7 +338,6 @@ enum lock_type4 { enum { NFSPROC4_CLNT_NULL = 0, /* Unused */ - NFSPROC4_CLNT_COMPOUND, /* Soon to be unused */ NFSPROC4_CLNT_READ, NFSPROC4_CLNT_WRITE, NFSPROC4_CLNT_COMMIT, @@ -300,6 +354,22 @@ enum { NFSPROC4_CLNT_LOCK, NFSPROC4_CLNT_LOCKT, NFSPROC4_CLNT_LOCKU, + NFSPROC4_CLNT_GETACL, + NFSPROC4_CLNT_SETACL, + NFSPROC4_CLNT_ACCESS, + NFSPROC4_CLNT_GETATTR, + NFSPROC4_CLNT_LOOKUP, + NFSPROC4_CLNT_GETROOT_HEAD, + NFSPROC4_CLNT_GETROOT_PATH, + NFSPROC4_CLNT_REMOVE, + NFSPROC4_CLNT_RENAME, + NFSPROC4_CLNT_LINK, + NFSPROC4_CLNT_CREATE, + NFSPROC4_CLNT_PATHCONF, + NFSPROC4_CLNT_STATFS, + NFSPROC4_CLNT_UNLINK, + NFSPROC4_CLNT_READLINK, + NFSPROC4_CLNT_READDIR, }; #endif diff -puN fs/nfs/nfs4xdr.c~CITI_NFS4_ALL fs/nfs/nfs4xdr.c --- linux-2.6.3/fs/nfs/nfs4xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfs/nfs4xdr.c 2004-02-19 16:47:15.000000000 -0500 @@ -51,6 +51,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_XDR @@ -81,11 +82,15 @@ static int nfs_stat_to_errno(int); #define decode_putrootfh_maxsz op_decode_hdr_maxsz #define encode_getfh_maxsz op_encode_hdr_maxsz #define decode_getfh_maxsz op_decode_hdr_maxsz + 1 + \ - (NFS4_FHSIZE >> 2) + ((3+NFS4_FHSIZE) >> 2) #define encode_getattr_maxsz op_encode_hdr_maxsz + 3 -#define nfs4_fattr_bitmap_maxsz 26 + 2 * ((NFS4_MAXNAMLEN +1) >> 2) +#define nfs4_name_maxsz ( 1 + ((3+NFS4_MAXNAMLEN) >> 2) ) +#define nfs4_fattr_bitmap_maxsz 36 + 2 * nfs4_name_maxsz #define decode_getattr_maxsz op_decode_hdr_maxsz + 3 + \ nfs4_fattr_bitmap_maxsz +#define encode_setattr_maxsz op_decode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz +#define decode_setattr_maxsz op_decode_hdr_maxsz + 3 #define encode_savefh_maxsz op_encode_hdr_maxsz #define decode_savefh_maxsz op_decode_hdr_maxsz #define encode_restorefh_maxsz op_encode_hdr_maxsz @@ -115,6 +120,18 @@ static int nfs_stat_to_errno(int); 3 + (NFS4_VERIFIER_SIZE >> 2) #define decode_setclientid_confirm_maxsz \ op_decode_hdr_maxsz +#define encode_lookup_maxsz op_encode_hdr_maxsz + \ + 1 + ((3 + NFS_MAXFHSIZE) >> 2) +#define encode_remove_maxsz op_encode_hdr_maxsz + \ + nfs4_name_maxsz +#define encode_rename_maxsz op_encode_hdr_maxsz + \ + 2 * nfs4_name_maxsz +#define encode_link_maxsz op_encode_hdr_maxsz + \ + nfs4_name_maxsz +#define encode_create_maxsz op_encode_hdr_maxsz + \ + 2 + 2 * nfs4_name_maxsz + \ + nfs4_fattr_bitmap_maxsz +#define decode_create_maxsz op_decode_hdr_maxsz + 8 #define NFS4_enc_compound_sz 1024 /* XXX: large enough? */ #define NFS4_dec_compound_sz 1024 /* XXX: large enough? */ @@ -126,6 +143,18 @@ static int nfs_stat_to_errno(int); decode_putfh_maxsz + \ decode_read_getattr_maxsz + \ op_decode_hdr_maxsz + 2 +#define NFS4_enc_readlink_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz +#define NFS4_dec_readlink_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz +#define NFS4_enc_readdir_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 9 +#define NFS4_dec_readdir_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 2 #define NFS4_enc_write_sz compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_pre_write_getattr_maxsz + \ @@ -255,8 +284,136 @@ static int nfs_stat_to_errno(int); decode_putfh_maxsz + \ decode_getattr_maxsz + \ op_decode_hdr_maxsz + 4 - - +#define NFS4_enc_getacl_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz +#define username_maxsz 1 + ((IDMAP_NAMESZ + 3) >> 2) +#define ace_maxsz 3 + username_maxsz +#define acl_maxentries (NFS_ACL_MAX_ENTRIES - 3) * 3 + 6 +#define acl_maxsz 1 + (acl_maxentries) * (ace_maxsz) +#define NFS4_dec_getacl_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 3 + 1 + acl_maxsz +#define NFS4_enc_setacl_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + 1 + acl_maxsz +#define NFS4_dec_setacl_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_setattr_maxsz +#define NFS4_enc_access_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + 1 +#define NFS4_dec_access_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + 2 +#define NFS4_enc_getattr_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz +#define NFS4_dec_getattr_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz +#define NFS4_enc_lookup_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + encode_lookup_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz +#define NFS4_dec_lookup_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz +#define NFS4_enc_getroot_head_sz compound_encode_hdr_maxsz + \ + op_encode_hdr_maxsz + 1 + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz +#define NFS4_dec_getroot_head_sz compound_decode_hdr_maxsz + \ + op_decode_hdr_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz +#define NFS4_enc_getroot_path_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_lookup_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz +#define NFS4_dec_getroot_path_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz +#define NFS4_enc_remove_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_remove_maxsz + \ + encode_getattr_maxsz +#define NFS4_dec_remove_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 5 + \ + decode_getattr_maxsz +#define NFS4_enc_unlink_sz NFS4_enc_remove_sz +#define NFS4_dec_unlink_sz NFS4_dec_remove_sz +#define NFS4_enc_rename_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_rename_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz +#define NFS4_dec_rename_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 5 + 5 + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz +#define NFS4_enc_link_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_link_maxsz + \ + encode_getattr_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz +#define NFS4_dec_link_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 5 + \ + decode_getattr_maxsz + \ + decode_restorefh_maxsz + \ + decode_getattr_maxsz +#define NFS4_enc_create_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_create_maxsz + \ + encode_getattr_maxsz + \ + encode_getfh_maxsz + \ + encode_restorefh_maxsz + \ + encode_getattr_maxsz +#define NFS4_dec_create_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + decode_create_maxsz + \ + decode_getattr_maxsz + \ + decode_getfh_maxsz + \ + op_decode_hdr_maxsz + \ + decode_getattr_maxsz +#define NFS4_enc_pathconf_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz +#define NFS4_dec_pathconf_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 6 +#define NFS4_enc_statfs_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz +#define NFS4_dec_statfs_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 12 static struct { unsigned int mode; @@ -333,8 +490,7 @@ encode_compound_hdr(struct xdr_stream *x } static int -encode_attrs(struct xdr_stream *xdr, struct iattr *iap, - struct nfs_server *server) +encode_attrs(struct xdr_stream *xdr, struct iattr *iap, struct nfs_server *server) { char owner_name[IDMAP_NAMESZ]; char owner_group[IDMAP_NAMESZ]; @@ -352,7 +508,7 @@ encode_attrs(struct xdr_stream *xdr, str * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -392,6 +548,7 @@ encode_attrs(struct xdr_stream *xdr, str len += 16; else if (iap->ia_valid & ATTR_MTIME) len += 4; + RESERVE_SPACE(len); /* @@ -462,13 +619,13 @@ encode_attrs(struct xdr_stream *xdr, str } static int -encode_access(struct xdr_stream *xdr, struct nfs4_access *access) +encode_access(struct xdr_stream *xdr, u32 access) { uint32_t *p; RESERVE_SPACE(8); WRITE32(OP_ACCESS); - WRITE32(access->ac_req_access); + WRITE32(access); return 0; } @@ -500,37 +657,36 @@ encode_commit(struct xdr_stream *xdr, st } static int -encode_create(struct xdr_stream *xdr, struct nfs4_create *create, - struct nfs_server *server) +encode_create(struct xdr_stream *xdr, struct nfs4_create_arg *create) { uint32_t *p; RESERVE_SPACE(8); WRITE32(OP_CREATE); - WRITE32(create->cr_ftype); + WRITE32(create->ftype); - switch (create->cr_ftype) { + switch (create->ftype) { case NF4LNK: - RESERVE_SPACE(4 + create->cr_textlen); - WRITE32(create->cr_textlen); - WRITEMEM(create->cr_text, create->cr_textlen); + RESERVE_SPACE(4 + create->u.symlink->len); + WRITE32(create->u.symlink->len); + WRITEMEM(create->u.symlink->name, create->u.symlink->len); break; case NF4BLK: case NF4CHR: RESERVE_SPACE(8); - WRITE32(create->cr_specdata1); - WRITE32(create->cr_specdata2); + WRITE32(create->u.device.specdata1); + WRITE32(create->u.device.specdata2); break; default: break; } - RESERVE_SPACE(4 + create->cr_namelen); - WRITE32(create->cr_namelen); - WRITEMEM(create->cr_name, create->cr_namelen); + RESERVE_SPACE(4 + create->name->len); + WRITE32(create->name->len); + WRITEMEM(create->name->name, create->name->len); - return encode_attrs(xdr, create->cr_attrs, server); + return encode_attrs(xdr, create->attrs, create->server); } static int @@ -558,11 +714,14 @@ encode_getattr_two(struct xdr_stream *xd return 0; } +extern u32 nfs4_fattr_bitmap[]; +extern u32 nfs4_statfs_bitmap[]; + static inline int -encode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr) +encode_getfattr(struct xdr_stream *xdr) { - return encode_getattr_two(xdr, getattr->gt_bmval[0], - getattr->gt_bmval[1]); + return encode_getattr_two(xdr, nfs4_fattr_bitmap[0], + nfs4_fattr_bitmap[1]); } /* @@ -618,14 +777,14 @@ encode_getfh(struct xdr_stream *xdr) } static int -encode_link(struct xdr_stream *xdr, struct nfs4_link *link) +encode_link(struct xdr_stream *xdr, struct qstr *name) { uint32_t *p; - RESERVE_SPACE(8 + link->ln_namelen); + RESERVE_SPACE(8 + name->len); WRITE32(OP_LINK); - WRITE32(link->ln_namelen); - WRITEMEM(link->ln_name, link->ln_namelen); + WRITE32(name->len); + WRITEMEM(name->name, name->len); return 0; } @@ -705,15 +864,15 @@ encode_locku(struct xdr_stream *xdr, str } static int -encode_lookup(struct xdr_stream *xdr, struct nfs4_lookup *lookup) +encode_lookup(struct xdr_stream *xdr, struct qstr *name) { - int len = lookup->lo_name->len; + int len = name->len; uint32_t *p; RESERVE_SPACE(8 + len); WRITE32(OP_LOOKUP); WRITE32(len); - WRITEMEM(lookup->lo_name->name, len); + WRITEMEM(name->name, len); return 0; } @@ -883,7 +1042,7 @@ encode_read(struct xdr_stream *xdr, stru } static int -encode_readdir(struct xdr_stream *xdr, struct nfs4_readdir *readdir, struct rpc_rqst *req) +encode_readdir(struct xdr_stream *xdr, struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { struct rpc_auth *auth = req->rq_task->tk_auth; int replen; @@ -891,21 +1050,21 @@ encode_readdir(struct xdr_stream *xdr, s RESERVE_SPACE(32+sizeof(nfs4_verifier)); WRITE32(OP_READDIR); - WRITE64(readdir->rd_cookie); - WRITEMEM(readdir->rd_req_verifier.data, sizeof(readdir->rd_req_verifier.data)); - WRITE32(readdir->rd_count >> 5); /* meaningless "dircount" field */ - WRITE32(readdir->rd_count); + WRITE64(readdir->cookie); + WRITEMEM(readdir->req_verifier.data, sizeof(readdir->req_verifier.data)); + WRITE32(readdir->count >> 5); /* meaningless "dircount" field */ + WRITE32(readdir->count); WRITE32(2); - WRITE32(readdir->rd_bmval[0]); - WRITE32(readdir->rd_bmval[1]); + WRITE32(FATTR4_WORD0_FILEID); + WRITE32(0); /* set up reply iovec * toplevel_status + taglen + rescount + OP_PUTFH + status * + OP_READDIR + status + verifer(2) = 9 */ replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->rd_pages, - readdir->rd_pgbase, readdir->rd_count); + xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages, + readdir->pgbase, readdir->count); return 0; } @@ -925,37 +1084,37 @@ encode_readlink(struct xdr_stream *xdr, * + OP_READLINK + status = 7 */ replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->rl_pages, 0, readlink->rl_count); + xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->pages, 0, readlink->count); return 0; } static int -encode_remove(struct xdr_stream *xdr, struct nfs4_remove *remove) +encode_remove(struct xdr_stream *xdr, struct qstr *name) { uint32_t *p; - RESERVE_SPACE(8 + remove->rm_namelen); + RESERVE_SPACE(8 + name->len); WRITE32(OP_REMOVE); - WRITE32(remove->rm_namelen); - WRITEMEM(remove->rm_name, remove->rm_namelen); + WRITE32(name->len); + WRITEMEM(name->name, name->len); return 0; } static int -encode_rename(struct xdr_stream *xdr, struct nfs4_rename *rename) +encode_rename(struct xdr_stream *xdr, struct qstr *oldname, struct qstr *newname) { uint32_t *p; - RESERVE_SPACE(8 + rename->rn_oldnamelen); + RESERVE_SPACE(8 + oldname->len); WRITE32(OP_RENAME); - WRITE32(rename->rn_oldnamelen); - WRITEMEM(rename->rn_oldname, rename->rn_oldnamelen); + WRITE32(oldname->len); + WRITEMEM(oldname->name, oldname->len); - RESERVE_SPACE(4 + rename->rn_newnamelen); - WRITE32(rename->rn_newnamelen); - WRITEMEM(rename->rn_newname, rename->rn_newnamelen); + RESERVE_SPACE(4 + newname->len); + WRITE32(newname->len); + WRITEMEM(newname->name, newname->len); return 0; } @@ -1011,6 +1170,39 @@ encode_setattr(struct xdr_stream *xdr, s return 0; } +extern nfs4_stateid zero_stateid; + +#ifdef CONFIG_NFS_V4_ACL + +static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p, *attrbuflen; + struct nfs4_ace *ace; + struct nfs4_acl *acl = arg->acl; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(4*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + attrbuflen = p++; + WRITE32(acl->naces); + list_for_each_entry(ace, &acl->ace_head, l_ace) { + RESERVE_SPACE(4*4 + (XDR_QUADLEN(ace->wholen) << 2)); + WRITE32(ace->type); + WRITE32(ace->flag); + WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); + WRITE32(ace->wholen); + WRITEMEM(ace->who, ace->wholen); + } + *attrbuflen = htonl((char *)p - (char *)attrbuflen - 4); + return 0; +} + +#endif /* CONFIG_NFS_V4_ACL */ + static int encode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid *setclientid) { @@ -1068,312 +1260,566 @@ encode_write(struct xdr_stream *xdr, str return 0; } - -/* FIXME: this sucks */ -static int -encode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqst *req) -{ - struct compound_hdr hdr = { - .taglen = cp->taglen, - .tag = cp->tag, - .nops = cp->req_nops, - }; - int i, status = 0; - - encode_compound_hdr(xdr, &hdr); - - for (i = 0; i < cp->req_nops; i++) { - switch (cp->ops[i].opnum) { - case OP_ACCESS: - status = encode_access(xdr, &cp->ops[i].u.access); - break; - case OP_CREATE: - status = encode_create(xdr, &cp->ops[i].u.create, cp->server); - break; - case OP_GETATTR: - status = encode_getattr(xdr, &cp->ops[i].u.getattr); - break; - case OP_GETFH: - status = encode_getfh(xdr); - break; - case OP_LINK: - status = encode_link(xdr, &cp->ops[i].u.link); - break; - case OP_LOOKUP: - status = encode_lookup(xdr, &cp->ops[i].u.lookup); - break; - case OP_PUTFH: - status = encode_putfh(xdr, cp->ops[i].u.putfh.pf_fhandle); - break; - case OP_PUTROOTFH: - status = encode_putrootfh(xdr); - break; - case OP_READDIR: - status = encode_readdir(xdr, &cp->ops[i].u.readdir, req); - break; - case OP_READLINK: - status = encode_readlink(xdr, &cp->ops[i].u.readlink, req); - break; - case OP_REMOVE: - status = encode_remove(xdr, &cp->ops[i].u.remove); - break; - case OP_RENAME: - status = encode_rename(xdr, &cp->ops[i].u.rename); - break; - case OP_RESTOREFH: - status = encode_restorefh(xdr); - break; - case OP_SAVEFH: - status = encode_savefh(xdr); - break; - default: - BUG(); - } - if (status) - return status; - } - - return 0; -} /* * END OF "GENERIC" ENCODE ROUTINES. */ - /* - * Encode COMPOUND argument + * Encode ACCESS request */ static int -nfs4_xdr_enc_compound(struct rpc_rqst *req, uint32_t *p, struct nfs4_compound *cp) +nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, struct nfs4_accessargs *args) { struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; int status; - - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - status = encode_compound(&xdr, cp, req); - cp->timestamp = jiffies; - return status; -} -/* - * Encode a CLOSE request - */ -static int -nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) -{ - struct xdr_stream xdr; - struct compound_hdr hdr = { - .nops = 2, - }; - int status; - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); - if(status) - goto out; - status = encode_close(&xdr, args); + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fhandle); + if (status) + goto out; + status = encode_getfattr(&xdr); + if (status) + goto out; + status = encode_access(&xdr, args->req_access); out: - return status; + return status; } /* - * Encode an OPEN request + * Encode LOOKUP request */ static int -nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args) +nfs4_xdr_enc_lookup(struct rpc_rqst *req, uint32_t *p, struct nfs4_lookupargs *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 7, + .nops = 5, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->dir_fh); if (status) goto out; - status = encode_savefh(&xdr); + status = encode_getfattr(&xdr); if (status) goto out; - status = encode_open(&xdr, args); + status = encode_lookup(&xdr, args->name); if (status) goto out; - status = encode_getattr(&xdr, args->f_getattr); + status = encode_getfattr(&xdr); if (status) goto out; status = encode_getfh(&xdr); - if (status) - goto out; - status = encode_restorefh(&xdr); - if (status) - goto out; - status = encode_getattr(&xdr, args->d_getattr); out: return status; } /* - * Encode an OPEN_CONFIRM request + * Encode GETROOT_HEAD request */ static int -nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args) +nfs4_xdr_enc_getroot_head(struct rpc_rqst *req, uint32_t *p, void *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); - if(status) + status = encode_putrootfh(&xdr); + if (status) goto out; - status = encode_open_confirm(&xdr, args); + status = encode_getfattr(&xdr); + if (status) + goto out; + status = encode_getfh(&xdr); out: return status; } /* - * Encode an OPEN request + * Encode GETROOT_PATH request */ static int -nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, - struct nfs_open_reclaimargs *args) +nfs4_xdr_enc_getroot_path(struct rpc_rqst *req, uint32_t *p, struct nfs4_getroot_arg *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 4, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fhandle); if (status) goto out; - status = encode_open_reclaim(&xdr, args); + status = encode_lookup(&xdr, args->name); + if (status) + goto out; + status = encode_getfattr(&xdr); if (status) goto out; - status = encode_getattr(&xdr, args->f_getattr); + status = encode_getfh(&xdr); out: return status; } /* - * Encode an OPEN_DOWNGRADE request + * Encode REMOVE request */ static int -nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, struct nfs4_remove_arg *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fhandle); if (status) goto out; - status = encode_open_downgrade(&xdr, args); + status = encode_remove(&xdr, args->name); + if (status) + goto out; + status = encode_getfattr(&xdr); out: return status; } /* - * Encode a LOCK request + * Encode UNLINK request */ static int -nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +nfs4_xdr_enc_unlink(struct rpc_rqst *req, uint32_t *p, struct nfs4_unlink *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); - if(status) + if (status) goto out; - status = encode_lock(&xdr, args); + status = encode_remove(&xdr, args->name); + if (status) + goto out; + status = encode_getfattr(&xdr); out: return status; } /* - * Encode a LOCKT request + * Encode RENAME request */ static int -nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, struct nfs4_rename_arg *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 7, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); - if(status) + status = encode_putfh(&xdr, args->old_dir); + if (status) goto out; - status = encode_lockt(&xdr, args); + status = encode_savefh(&xdr); + if (status) + goto out; + status = encode_putfh(&xdr, args->new_dir); + if (status) + goto out; + status = encode_rename(&xdr, args->old_name, args->new_name); + if (status) + goto out; + status = encode_getfattr(&xdr); + if (status) + goto out; + status = encode_restorefh(&xdr); + if (status) + goto out; + status = encode_getfattr(&xdr); out: return status; } /* - * Encode a LOCKU request + * Encode LINK request */ static int -nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, struct nfs4_link_arg *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 7, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); - if(status) + if (status) goto out; - status = encode_locku(&xdr, args); + status = encode_savefh(&xdr); + if (status) + goto out; + status = encode_putfh(&xdr, args->dir_fh); + if (status) + goto out; + status = encode_link(&xdr, args->name); + if (status) + goto out; + status = encode_getfattr(&xdr); + if (status) + goto out; + status = encode_restorefh(&xdr); + if (status) + goto out; + status = encode_getfattr(&xdr); out: return status; } /* - * Encode a READ request + * Encode CREATE request */ static int -nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args) +nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, struct nfs4_create_arg *args) { - struct rpc_auth *auth = req->rq_task->tk_auth; struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 7, }; - int replen, status; + int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->dir_fh); if (status) goto out; - status = encode_read(&xdr, args); + status = encode_savefh(&xdr); if (status) goto out; - status = encode_read_getattr(&xdr); - - /* set up reply iovec - * toplevel status + taglen=0 + rescount + OP_PUTFH + status + status = encode_create(&xdr, args); + if (status) + goto out; + status = encode_getfattr(&xdr); + if (status) + goto out; + status = encode_getfh(&xdr); + if (status) + goto out; + status = encode_restorefh(&xdr); + if (status) + goto out; + status = encode_getfattr(&xdr); +out: + return status; +} + +/* + * Encode GETATTR request + */ +static int +nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fh) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fh); + if (status) + goto out; + status = encode_getfattr(&xdr); + out: + return status; +} + +/* + * Encode a CLOSE request + */ +static int +nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_close(&xdr, args); +out: + return status; +} + +/* + * Encode an OPEN request + */ +static int +nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 7, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_savefh(&xdr); + if (status) + goto out; + status = encode_open(&xdr, args); + if (status) + goto out; + status = encode_getfattr(&xdr); + if (status) + goto out; + status = encode_getfh(&xdr); + if (status) + goto out; + status = encode_restorefh(&xdr); + if (status) + goto out; + status = encode_getfattr(&xdr); +out: + return status; +} + +/* + * Encode an OPEN_CONFIRM request + */ +static int +nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_open_confirm(&xdr, args); +out: + return status; +} + +/* + * Encode an OPEN request + */ +static int +nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, + struct nfs_open_reclaimargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_reclaim(&xdr, args); + if (status) + goto out; + status = encode_getfattr(&xdr); +out: + return status; +} + +/* + * Encode an OPEN_DOWNGRADE request + */ +static int +nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_downgrade(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCK request + */ +static int +nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lock(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKT request + */ +static int +nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lockt(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKU request + */ +static int +nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_locku(&xdr, args); +out: + return status; +} + +/* + * Encode a READLINK request + */ +static int +nfs4_xdr_enc_readlink(struct rpc_rqst *req, uint32_t *p, struct nfs4_readlink *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_readlink(&xdr, args, req); +out: + return status; +} + +/* + * Encode a READDIR request + */ +static int +nfs4_xdr_enc_readdir(struct rpc_rqst *req, uint32_t *p, struct nfs4_readdir_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_readdir(&xdr, args, req); +out: + return status; +} + +/* + * Encode a READ request + */ +static int +nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_read(&xdr, args); + if (status) + goto out; + status = encode_read_getattr(&xdr); + + /* set up reply iovec + * toplevel status + taglen=0 + rescount + OP_PUTFH + status * + OP_READ + status + eof + datalen = 9 */ replen = (RPC_REPHDRSIZE + auth->au_rslack + @@ -1405,12 +1851,62 @@ nfs4_xdr_enc_setattr(struct rpc_rqst *re status = encode_setattr(&xdr, args, args->server); if(status) goto out; - status = encode_getattr(&xdr, args->attr); + status = encode_getfattr(&xdr); +out: + return status; +} + +#ifdef CONFIG_NFS_V4_ACL + +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) + +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_setacl(&xdr, args); out: return status; } /* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,struct nfs_fh *fhandle) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fhandle); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); +out: + return status; + +} + +#endif /* CONFIG_NFS_V4_ACL */ + +/* * Encode a WRITE request */ static int @@ -1487,6 +1983,48 @@ nfs4_xdr_enc_fsinfo(struct rpc_rqst *req } /* + * a PATHCONF request + */ +static int +nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fhandle) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fhandle); + if (!status) + status = encode_getattr_one(&xdr,FATTR4_WORD0_MAXLINK | + FATTR4_WORD0_MAXNAME ); + return status; +} + +/* + * a STATFS request + */ +static int +nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fhandle) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fhandle); + if (!status) + status = encode_getattr_two(&xdr,nfs4_statfs_bitmap[0], + nfs4_statfs_bitmap[1]); + return status; +} + +/* * a RENEW request */ static int @@ -1636,7 +2174,7 @@ decode_change_info(struct xdr_stream *xd } static int -decode_access(struct xdr_stream *xdr, struct nfs4_access *access) +decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) { uint32_t *p; uint32_t supp, acc; @@ -1648,12 +2186,12 @@ decode_access(struct xdr_stream *xdr, st READ_BUF(8); READ32(supp); READ32(acc); - if ((supp & ~access->ac_req_access) || (acc & ~supp)) { + if ((supp & ~access->req_access) || (acc & ~supp)) { printk(KERN_NOTICE "NFS: server returned bad bits in access call!\n"); return -EIO; } - *access->ac_resp_supported = supp; - *access->ac_resp_access = acc; + *access->resp_supported = supp; + *access->resp_access = acc; return 0; } @@ -1686,7 +2224,7 @@ decode_commit(struct xdr_stream *xdr, st } static int -decode_create(struct xdr_stream *xdr, struct nfs4_create *create) +decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) { uint32_t *p; uint32_t bmlen; @@ -1695,7 +2233,7 @@ decode_create(struct xdr_stream *xdr, st status = decode_op_hdr(xdr, OP_CREATE); if (status) return status; - if ((status = decode_change_info(xdr, create->cr_cinfo))) + if ((status = decode_change_info(xdr, cinfo))) return status; READ_BUF(4); READ32(bmlen); @@ -1703,17 +2241,144 @@ decode_create(struct xdr_stream *xdr, st return 0; } -extern uint32_t nfs4_fattr_bitmap[2]; extern uint32_t nfs4_fsstat_bitmap[2]; -extern uint32_t nfs4_pathconf_bitmap[2]; static int -decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, +decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) +{ + uint32_t attrlen, bmlen, + bmval0 = 0, + bmval1 = 0, + len = 0; + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_GETATTR); + if (status) + return status; + + READ_BUF(4); + READ32(bmlen); + if (bmlen > 2) + goto xdr_error; + + READ_BUF((bmlen << 2) + 4); + if (bmlen > 0) + READ32(bmval0); + if (bmlen > 1) + READ32(bmval1); + READ32(attrlen); + + if ((bmval0 & ~nfs4_statfs_bitmap[0]) || + (bmval1 & ~nfs4_statfs_bitmap[1])) { + dprintk("read_attrs: server returned bad attributes!\n"); + goto xdr_error; + } + + if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { + READ_BUF(8); + len += 8; + READ64(fsstat->afiles); + dprintk("read_attrs: files_avail=0x%Lx\n", (long long) fsstat->afiles); + } + if (bmval0 & FATTR4_WORD0_FILES_FREE) { + READ_BUF(8); + len += 8; + READ64(fsstat->ffiles); + dprintk("read_attrs: files_free=0x%Lx\n", (long long) fsstat->ffiles); + } + if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { + READ_BUF(8); + len += 8; + READ64(fsstat->tfiles); + dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles); + } + + if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { + READ_BUF(8); + len += 8; + READ64(fsstat->abytes); + dprintk("read_attrs: savail=0x%Lx\n", (long long) fsstat->abytes); + } + if (bmval1 & FATTR4_WORD1_SPACE_FREE) { + READ_BUF(8); + len += 8; + READ64(fsstat->fbytes); + dprintk("read_attrs: sfree=0x%Lx\n", (long long) fsstat->fbytes); + } + if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { + READ_BUF(8); + len += 8; + READ64(fsstat->tbytes); + dprintk("read_attrs: stotal=0x%Lx\n", (long long) fsstat->tbytes); + } + if (len != attrlen) + goto xdr_error; + + DECODE_TAIL; +} + +static int +decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) +{ + uint32_t bmlen, + attrlen = 0, + bmval0 = 0, + bmval1 = 0, + len = 0; + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_GETATTR); + if (status) + return status; + + READ_BUF(4); + READ32(bmlen); + if ( (bmlen < 1) || (bmlen >2) ) + goto xdr_error; + READ_BUF((bmlen << 2) + 4); + READ32(bmval0); + if (bmval0 & ~(FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME)) { + goto out_bad_bitmap; + } + if (bmlen == 2) { + READ32(bmval1); + if (bmval1 != 0) + goto out_bad_bitmap; + } + + READ32(attrlen); + if (bmval0 & FATTR4_WORD0_MAXLINK) { + READ_BUF(4); + len += 4; + READ32(pathconf->max_link); + dprintk("read_attrs: maxlink=%d\n", pathconf->max_link); + } + if (bmval0 & FATTR4_WORD0_MAXNAME) { + READ_BUF(4); + len += 4; + READ32(pathconf->max_namelen); + dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen); + } + + if (len != attrlen) + goto xdr_error; + return 0; + +out_bad_bitmap: + printk(KERN_NOTICE "%s: server returned bad attribute bitmap\n",__FUNCTION__); + return -EIO; + +xdr_error: + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); + return -EIO; +} + +static int +decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *nfp, struct nfs_server *server) { - struct nfs_fattr *nfp = getattr->gt_attrs; - struct nfs_fsstat *fsstat = getattr->gt_fsstat; - struct nfs_pathconf *pathconf = getattr->gt_pathconf; uint32_t attrlen, dummy32, bmlen, bmval0 = 0, bmval1 = 0, @@ -1739,25 +2404,25 @@ decode_getattr(struct xdr_stream *xdr, s READ32(bmval1); READ32(attrlen); - if ((bmval0 & ~getattr->gt_bmval[0]) || - (bmval1 & ~getattr->gt_bmval[1])) { + if ((bmval0 & ~nfs4_fattr_bitmap[0]) || + (bmval1 & ~nfs4_fattr_bitmap[1])) { dprintk("read_attrs: server returned bad attributes!\n"); goto xdr_error; } - if (nfp) { - nfp->bitmap[0] = bmval0; - nfp->bitmap[1] = bmval1; - } + + BUG_ON(!nfp); + + nfp->bitmap[0] = bmval0; + nfp->bitmap[1] = bmval1; /* * In case the server doesn't return some attributes, * we initialize them here to some nominal values.. */ - if (nfp) { - nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; - nfp->nlink = 1; - nfp->timestamp = jiffies; - } + nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; + nfp->nlink = 1; + nfp->timestamp = jiffies; + if (bmval0 & FATTR4_WORD0_TYPE) { READ_BUF(4); len += 4; @@ -1797,37 +2462,6 @@ decode_getattr(struct xdr_stream *xdr, s READ64(nfp->fileid); dprintk("read_attrs: fileid=%Ld\n", (long long) nfp->fileid); } - if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { - READ_BUF(8); - len += 8; - READ64(fsstat->afiles); - dprintk("read_attrs: files_avail=0x%Lx\n", (long long) fsstat->afiles); - } - if (bmval0 & FATTR4_WORD0_FILES_FREE) { - READ_BUF(8); - len += 8; - READ64(fsstat->ffiles); - dprintk("read_attrs: files_free=0x%Lx\n", (long long) fsstat->ffiles); - } - if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { - READ_BUF(8); - len += 8; - READ64(fsstat->tfiles); - dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles); - } - if (bmval0 & FATTR4_WORD0_MAXLINK) { - READ_BUF(4); - len += 4; - READ32(pathconf->max_link); - dprintk("read_attrs: maxlink=%d\n", pathconf->max_link); - } - if (bmval0 & FATTR4_WORD0_MAXNAME) { - READ_BUF(4); - len += 4; - READ32(pathconf->max_namelen); - dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen); - } - if (bmval1 & FATTR4_WORD1_MODE) { READ_BUF(4); len += 4; @@ -1851,9 +2485,11 @@ decode_getattr(struct xdr_stream *xdr, s } READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); - if ((status = nfs_map_name_to_uid(server->nfs4_state, (char *)p, dummy32, - &nfp->uid)) < 0) { - dprintk("read_attrs: name-to-uid mapping failed!\n"); + status = nfs_map_name_to_uid(server->nfs4_state, (char *)p, + dummy32, &nfp->uid); + if (status) { + dprintk("read_attrs: nfs_map_name_to_uid failed!\n"); + /* goto out; */ nfp->uid = -2; } dprintk("read_attrs: uid=%d\n", (int)nfp->uid); @@ -1868,10 +2504,12 @@ decode_getattr(struct xdr_stream *xdr, s } READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); - if ((status = nfs_map_group_to_gid(server->nfs4_state, (char *)p, dummy32, - &nfp->gid)) < 0) { - dprintk("read_attrs: group-to-gid mapping failed!\n"); + status = nfs_map_group_to_gid(server->nfs4_state, (char *)p, + dummy32, &nfp->gid); + if (status) { + dprintk("read_attrs: gss_get_num failed!\n"); nfp->gid = -2; + /* goto out; */ } dprintk("read_attrs: gid=%d\n", (int)nfp->gid); } @@ -1882,28 +2520,10 @@ decode_getattr(struct xdr_stream *xdr, s len += 8; READ32(major); READ32(minor); - nfp->rdev = MKDEV(major, minor); - if (MAJOR(nfp->rdev) != major || MINOR(nfp->rdev) != minor) - nfp->rdev = 0; - dprintk("read_attrs: rdev=%u:%u\n", major, minor); - } - if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { - READ_BUF(8); - len += 8; - READ64(fsstat->abytes); - dprintk("read_attrs: savail=0x%Lx\n", (long long) fsstat->abytes); - } - if (bmval1 & FATTR4_WORD1_SPACE_FREE) { - READ_BUF(8); - len += 8; - READ64(fsstat->fbytes); - dprintk("read_attrs: sfree=0x%Lx\n", (long long) fsstat->fbytes); - } - if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { - READ_BUF(8); - len += 8; - READ64(fsstat->tbytes); - dprintk("read_attrs: stotal=0x%Lx\n", (long long) fsstat->tbytes); + nfp->rdev = MKDEV(major, minor); + if (MAJOR(nfp->rdev) != major || MINOR(nfp->rdev) != minor) + nfp->rdev = 0; + dprintk("read_attrs: rdev=%u:%u\n", major, minor); } if (bmval1 & FATTR4_WORD1_SPACE_USED) { READ_BUF(8); @@ -1935,6 +2555,88 @@ decode_getattr(struct xdr_stream *xdr, s DECODE_TAIL; } +#ifdef CONFIG_NFS_V4_ACL + +static int +decode_getacl(struct xdr_stream *xdr, struct nfs4_acl **aclp) +{ + uint32_t attrlen, bmlen, + bmval0 = 0, + bmval1 = 0, + len = 0; + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_GETATTR); + if (status) + return status; + + READ_BUF(4); + READ32(bmlen); + if (bmlen > 2) + goto xdr_error; + + READ_BUF((bmlen << 2) + 4); + if (bmlen > 0) + READ32(bmval0); + if (bmlen > 1) + READ32(bmval1); + READ32(attrlen); + + if ((bmval0 & ~FATTR4_WORD0_ACL) || (bmval1)) { + dprintk("read_attrs: server returned bad attributes!\n"); + goto xdr_error; + } + if (bmval0 & FATTR4_WORD0_ACL) { + struct nfs4_acl *acl; + struct nfs4_ace ace; + int i; + u_int nace; + + if (aclp == NULL) + goto xdr_error; /* XXX MARIUS */ + + READ_BUF(4); len += 4; + READ32(nace); + + if (nace == 0) { + *aclp = NULL; + goto out_acl; + } + + acl = *aclp = nfs4_acl_new(); + if (acl == NULL) { + status = -ENOMEM; + goto out; + } + + for (i = 0; i < nace; i++) { + READ_BUF(16); len += 16; + READ32(ace.type); + READ32(ace.flag); + READ32(ace.access_mask); + ace.access_mask &= NFS4_ACE_MASK_ALL; + READ32(ace.wholen); + READ_BUF(ace.wholen); + len += XDR_QUADLEN(ace.wholen) << 2; + status = nfs4_acl_add_ace(acl, ace.type, ace.flag, + ace.access_mask, (char *)p, ace.wholen); + if (status < 0) + goto out; + p += XDR_QUADLEN(ace.wholen); + } + } else if (aclp != NULL) + *aclp = NULL; +out_acl: + + if (len != attrlen) + goto xdr_error; + + DECODE_TAIL; +} + +#endif /* CONFIG_NFS_V4_ACL */ + static int decode_change_attr(struct xdr_stream *xdr, uint64_t *change_attr) { @@ -2067,6 +2769,77 @@ out_bad_bitmap: return -EIO; } +static int +decode_putfh(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_PUTFH); +} + +static int +decode_setattr(struct xdr_stream *xdr) +{ + uint32_t *p; + uint32_t bmlen; + int status; + + + status = decode_op_hdr(xdr, OP_SETATTR); + if (status) + return status; + READ_BUF(4); + READ32(bmlen); + READ_BUF(bmlen << 2); + return 0; +} + +#ifdef CONFIG_NFS_V4_ACL + +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_acl **res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, res); + +out: + return status; +} + +#endif /* CONFIG_NFS_V4_ACL */ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) @@ -2137,9 +2910,8 @@ out_bad_bitmap: } static int -decode_getfh(struct xdr_stream *xdr, struct nfs4_getfh *getfh) +decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh) { - struct nfs_fh *fh = getfh->gf_fhandle; uint32_t *p; uint32_t len; int status; @@ -2161,14 +2933,14 @@ decode_getfh(struct xdr_stream *xdr, str } static int -decode_link(struct xdr_stream *xdr, struct nfs4_link *link) +decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) { int status; status = decode_op_hdr(xdr, OP_LINK); if (status) return status; - return decode_change_info(xdr, link->ln_cinfo); + return decode_change_info(xdr, cinfo); } /* @@ -2296,12 +3068,6 @@ decode_open_downgrade(struct xdr_stream } static int -decode_putfh(struct xdr_stream *xdr) -{ - return decode_op_hdr(xdr, OP_PUTFH); -} - -static int decode_putrootfh(struct xdr_stream *xdr) { return decode_op_hdr(xdr, OP_PUTROOTFH); @@ -2336,7 +3102,7 @@ decode_read(struct xdr_stream *xdr, stru } static int -decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir *readdir) +decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; struct page *page = *rcvbuf->pages; @@ -2350,7 +3116,7 @@ decode_readdir(struct xdr_stream *xdr, s if (status) return status; READ_BUF(8); - COPYMEM(readdir->rd_resp_verifier.data, 8); + COPYMEM(readdir->resp_verifier.data, 8); hdrlen = (char *) p - (char *) iov->iov_base; recvd = req->rq_received - hdrlen; @@ -2358,9 +3124,9 @@ decode_readdir(struct xdr_stream *xdr, s pglen = recvd; xdr_read_pages(xdr, pglen); - BUG_ON(pglen + readdir->rd_pgbase > PAGE_CACHE_SIZE); + BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); - end = (uint32_t *) ((char *)p + pglen + readdir->rd_pgbase); + end = (uint32_t *) ((char *)p + pglen + readdir->pgbase); entry = p; for (nr = 0; *p++; nr++) { if (p + 3 > end) @@ -2421,7 +3187,7 @@ err_unmap: } static int -decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readlink *readlink) +decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) { struct xdr_buf *rcvbuf = &req->rq_rcv_buf; struct iovec *iov = rcvbuf->head; @@ -2469,30 +3235,30 @@ decode_restorefh(struct xdr_stream *xdr) } static int -decode_remove(struct xdr_stream *xdr, struct nfs4_remove *remove) +decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) { int status; status = decode_op_hdr(xdr, OP_REMOVE); if (status) goto out; - status = decode_change_info(xdr, remove->rm_cinfo); + status = decode_change_info(xdr, cinfo); out: return status; } static int -decode_rename(struct xdr_stream *xdr, struct nfs4_rename *rename) +decode_rename(struct xdr_stream *xdr, struct nfs4_change_info *old_cinfo, + struct nfs4_change_info *new_cinfo) { int status; status = decode_op_hdr(xdr, OP_RENAME); if (status) goto out; - if ((status = decode_change_info(xdr, rename->rn_src_cinfo))) - goto out; - if ((status = decode_change_info(xdr, rename->rn_dst_cinfo))) + if ((status = decode_change_info(xdr, old_cinfo))) goto out; + status = decode_change_info(xdr, new_cinfo); out: return status; } @@ -2510,23 +3276,6 @@ decode_savefh(struct xdr_stream *xdr) } static int -decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) -{ - uint32_t *p; - uint32_t bmlen; - int status; - - - status = decode_op_hdr(xdr, OP_SETATTR); - if (status) - return status; - READ_BUF(4); - READ32(bmlen); - READ_BUF(bmlen << 2); - return 0; -} - -static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) { uint32_t *p; @@ -2566,158 +3315,348 @@ decode_setclientid(struct xdr_stream *xd } static int -decode_setclientid_confirm(struct xdr_stream *xdr) +decode_setclientid_confirm(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); +} + +static int +decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_WRITE); + if (status) + return status; + + READ_BUF(16); + READ32(res->count); + READ32(res->verf->committed); + COPYMEM(res->verf->verifier, 8); + return 0; +} + +/* + * Decode OPEN_DOWNGRADE response + */ +static int +nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_downgrade(&xdr, res); +out: + return status; +} + +/* + * END OF "GENERIC" DECODE ROUTINES. + */ + +/* + * Decode ACCESS response + */ +static int +nfs4_xdr_dec_access(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_accessres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); + if (status) + goto out; + status = decode_access(&xdr, res); +out: + return status; +} + +/* + * Decode LOOKUP response + */ +static int +nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookupres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->dirattr, res->server); + if (status) + goto out; + status = decode_lookup(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); + if (status) + goto out; + status = decode_getfh(&xdr, res->fhandle); +out: + return status; +} + +/* + * Decode GETROOT_HEAD response + */ +static int +nfs4_xdr_dec_getroot_head(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getroot_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putrootfh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); + if (status) + goto out; + status = decode_getfh(&xdr, res->fhandle); +out: + return status; +} + +/* + * Decode GETROOT_PATH response + */ +static int +nfs4_xdr_dec_getroot_path(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getroot_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lookup(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); + if (status) + goto out; + status = decode_getfh(&xdr, res->fhandle); +out: + return status; +} + +/* + * Decode REMOVE response + */ +static int +nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_remove_res *res) { - return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_remove(&xdr, res->dir_cinfo); + if (status) + goto out; + status = decode_getfattr(&xdr, res->dir_attr, res->server); +out: + return status; } +/* + * Decode UNLINK response + */ static int -decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) +nfs4_xdr_dec_unlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_unlink *res) { - uint32_t *p; + struct xdr_stream xdr; + struct compound_hdr hdr; int status; - - status = decode_op_hdr(xdr, OP_WRITE); + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); if (status) - return status; - - READ_BUF(16); - READ32(res->count); - READ32(res->verf->committed); - COPYMEM(res->verf->verifier, 8); - return 0; + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_remove(&xdr, &res->cinfo); + if (status) + goto out; + status = decode_getfattr(&xdr, &res->attrs, res->server); +out: + return status; } -/* FIXME: this sucks */ +/* + * Decode RENAME response + */ static int -decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqst *req) +nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_rename_res *res) { + struct xdr_stream xdr; struct compound_hdr hdr; - struct nfs4_op *op; int status; - - status = decode_compound_hdr(xdr, &hdr); + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; - - cp->toplevel_status = hdr.status; - - /* - * We need this if our zero-copy I/O is going to work. Rumor has - * it that the spec will soon mandate it... - */ - if (hdr.taglen != cp->taglen) - dprintk("nfs4: non-conforming server returns tag length mismatch!\n"); - - cp->resp_nops = hdr.nops; - if (hdr.nops > cp->req_nops) { - dprintk("nfs4: resp_nops > req_nops!\n"); - goto xdr_error; - } - - op = &cp->ops[0]; - for (cp->nops = 0; cp->nops < cp->resp_nops; cp->nops++, op++) { - switch (op->opnum) { - case OP_ACCESS: - status = decode_access(xdr, &op->u.access); - break; - case OP_CREATE: - status = decode_create(xdr, &op->u.create); - break; - case OP_GETATTR: - status = decode_getattr(xdr, &op->u.getattr, cp->server); - break; - case OP_GETFH: - status = decode_getfh(xdr, &op->u.getfh); - break; - case OP_LINK: - status = decode_link(xdr, &op->u.link); - break; - case OP_LOOKUP: - status = decode_lookup(xdr); - break; - case OP_PUTFH: - status = decode_putfh(xdr); - break; - case OP_PUTROOTFH: - status = decode_putrootfh(xdr); - break; - case OP_READDIR: - status = decode_readdir(xdr, req, &op->u.readdir); - break; - case OP_READLINK: - status = decode_readlink(xdr, req, &op->u.readlink); - break; - case OP_RESTOREFH: - status = decode_restorefh(xdr); - break; - case OP_REMOVE: - status = decode_remove(xdr, &op->u.remove); - break; - case OP_RENAME: - status = decode_rename(xdr, &op->u.rename); - break; - case OP_SAVEFH: - status = decode_savefh(xdr); - break; - default: - BUG(); - return -EIO; - } - if (status) - break; - } - - DECODE_TAIL; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_savefh(&xdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_rename(&xdr, res->old_cinfo, res->new_cinfo); + if (status) + goto out; + status = decode_getfattr(&xdr, res->new_fattr, res->server); + if (status) + goto out; + status = decode_restorefh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->old_fattr, res->server); +out: + return status; } /* - * Decode OPEN_DOWNGRADE response + * Decode LINK response */ static int -nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) +nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_link_res *res) { - struct xdr_stream xdr; - struct compound_hdr hdr; - int status; - - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); - if (status) - goto out; - status = decode_putfh(&xdr); - if (status) - goto out; - status = decode_open_downgrade(&xdr, res); + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_savefh(&xdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_link(&xdr, res->dir_cinfo); + if (status) + goto out; + status = decode_getfattr(&xdr, res->dir_attr, res->server); + if (status) + goto out; + status = decode_restorefh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); out: - return status; + return status; } /* - * END OF "GENERIC" DECODE ROUTINES. + * Decode CREATE response */ +static int +nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_savefh(&xdr); + if (status) + goto out; + status = decode_create(&xdr,res->dir_cinfo); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); + if (status) + goto out; + status = decode_getfh(&xdr, res->fhandle); + if (status) + goto out; + status = decode_restorefh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->dir_attr, res->server); +out: + return status; +} /* - * Decode COMPOUND response + * Decode GETATTR response */ static int -nfs4_xdr_dec_compound(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_compound *cp) +nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res) { struct xdr_stream xdr; + struct compound_hdr hdr; int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound(&xdr, cp, rqstp))) + status = decode_compound_hdr(&xdr, &hdr); + if (status) goto out; - - status = 0; - if (cp->toplevel_status) - status = -nfs_stat_to_errno(cp->toplevel_status); - + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getfattr(&xdr, res->fattr, res->server); out: return status; + } + /* * Decode CLOSE response */ @@ -2748,9 +3687,6 @@ nfs4_xdr_dec_open(struct rpc_rqst *rqstp { struct xdr_stream xdr; struct compound_hdr hdr; - struct nfs4_getfh gfh = { - .gf_fhandle = &res->fh, - }; int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); @@ -2766,16 +3702,16 @@ nfs4_xdr_dec_open(struct rpc_rqst *rqstp status = decode_open(&xdr, res); if (status) goto out; - status = decode_getattr(&xdr, res->f_getattr, res->server); + status = decode_getfattr(&xdr, res->f_attr, res->server); if (status) goto out; - status = decode_getfh(&xdr, &gfh); + status = decode_getfh(&xdr, &res->fh); if (status) goto out; status = decode_restorefh(&xdr); if (status) goto out; - status = decode_getattr(&xdr, res->d_getattr, res->server); + status = decode_getfattr(&xdr, res->d_attr, res->server); if (status) goto out; out: @@ -2824,7 +3760,7 @@ nfs4_xdr_dec_open_reclaim(struct rpc_rqs status = decode_open(&xdr, res); if (status) goto out; - status = decode_getattr(&xdr, res->f_getattr, res->server); + status = decode_getfattr(&xdr, res->f_attr, res->server); out: return status; } @@ -2846,10 +3782,10 @@ nfs4_xdr_dec_setattr(struct rpc_rqst *rq status = decode_putfh(&xdr); if (status) goto out; - status = decode_setattr(&xdr, res); + status = decode_setattr(&xdr); if (status) goto out; - status = decode_getattr(&xdr, res->attr, res->server); + status = decode_getfattr(&xdr, res->fattr, res->server); out: return status; } @@ -2921,6 +3857,50 @@ out: } /* + * Decode READLINK response + */ +static int +nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_readlink(&xdr, rqstp); +out: + return status; +} + +/* + * Decode READDIR response + */ +static int +nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_readdir_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_readdir(&xdr, rqstp, res); +out: + return status; +} + +/* * Decode Read response */ static int @@ -3033,6 +4013,44 @@ nfs4_xdr_dec_fsinfo(struct rpc_rqst *req } /* + * PATHCONF request + */ +static int +nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_pathconf *pathconf) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_pathconf(&xdr, pathconf); + return status; +} + +/* + * STATFS request + */ +static int +nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fsstat *fsstat) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_statfs(&xdr, fsstat); + return status; +} + +/* * Decode RENEW response */ static int @@ -3201,7 +4219,6 @@ nfs_stat_to_errno(int stat) } struct rpc_procinfo nfs4_procedures[] = { - PROC(COMPOUND, enc_compound, dec_compound), PROC(READ, enc_read, dec_read), PROC(WRITE, enc_write, dec_write), PROC(COMMIT, enc_commit, dec_commit), @@ -3218,6 +4235,24 @@ struct rpc_procinfo nfs4_procedures[] = PROC(LOCK, enc_lock, dec_lock), PROC(LOCKT, enc_lockt, dec_lockt), PROC(LOCKU, enc_locku, dec_locku), +#ifdef CONFIG_NFS_V4_ACL + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), +#endif /* CONFIG_NFS_V4_ACL */ + PROC(ACCESS, enc_access, dec_access), + PROC(GETATTR, enc_getattr, dec_getattr), + PROC(LOOKUP, enc_lookup, dec_lookup), + PROC(GETROOT_HEAD, enc_getroot_head, dec_getroot_head), + PROC(GETROOT_PATH, enc_getroot_path, dec_getroot_path), + PROC(REMOVE, enc_remove, dec_remove), + PROC(RENAME, enc_rename, dec_rename), + PROC(LINK, enc_link, dec_link), + PROC(CREATE, enc_create, dec_create), + PROC(PATHCONF, enc_pathconf, dec_pathconf), + PROC(STATFS, enc_statfs, dec_statfs), + PROC(UNLINK, enc_unlink, dec_unlink), + PROC(READLINK, enc_readlink, dec_readlink), + PROC(READDIR, enc_readdir, dec_readdir), }; struct rpc_version nfs_version4 = { diff -puN fs/nfsd/vfs.c~CITI_NFS4_ALL fs/nfsd/vfs.c --- linux-2.6.3/fs/nfsd/vfs.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/vfs.c 2004-02-19 16:47:12.000000000 -0500 @@ -44,6 +44,16 @@ #include #include #include +#ifdef CONFIG_NFSD_V4 +#include +#include +#include +#include +#include +#include +#include +#include +#endif /* CONFIG_NFSD_V4 */ #include @@ -341,6 +351,204 @@ out_nfserr: goto out; } +#ifdef CONFIG_NFS_V4_ACL + +static int +set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) +{ + int len; + size_t buflen; + char *buf = NULL; + int error = 0; + struct inode *inode = dentry->d_inode; + + buflen = posix_acl_xattr_size(pacl->a_count); + buf = kmalloc(buflen, GFP_KERNEL); + error = -ENOMEM; + if (buf == NULL) + goto out; + + len = posix_acl_to_xattr(pacl, buf, buflen); + if (len < 0) { + error = len; + goto out; + } + + error = -EOPNOTSUPP; + if (inode->i_op && inode->i_op->setxattr) { + down(&inode->i_sem); + security_inode_setxattr(dentry, key, buf, len, 0); + error = inode->i_op->setxattr(dentry, key, buf, len, 0); + if (!error) + security_inode_post_setxattr(dentry, key, buf, len, 0); + up(&inode->i_sem); + } +out: + kfree(buf); + return (error); +} + +static inline int +nfsd_name_to_uid_wrapper(void *arg, const char *name, size_t len, __u32 *id) +{ + return nfsd_map_name_to_uid((struct svc_rqst *)arg, name, len, id); +} + +static inline int +nfsd_name_to_gid_wrapper(void *arg, const char *name, size_t len, __u32 *id) +{ + return nfsd_map_name_to_gid((struct svc_rqst *)arg, name, len, id); +} + +static inline int +nfsd_uid_to_name_wrapper(void *arg, __u32 id, char *name) +{ + return nfsd_map_uid_to_name((struct svc_rqst *)arg, id, name); +} + +static inline int +nfsd_gid_to_name_wrapper(void *arg, __u32 id, char *name) +{ + return nfsd_map_gid_to_name((struct svc_rqst *)arg, id, name); +} + +static struct nfs4_acl_idmapper nfsd_idmapper = { + .name2uid = nfsd_name_to_uid_wrapper, + .name2gid = nfsd_name_to_gid_wrapper, + .uid2name = nfsd_uid_to_name_wrapper, + .gid2name = nfsd_gid_to_name_wrapper, +}; + + +int +nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl) +{ + int error; + struct dentry *dentry; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; + + /* Get inode */ + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); + if (error) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + error = nfs4_acl_nfsv4_to_posix(&nfsd_idmapper, rqstp, acl, &pacl, &dpacl); + if (error < 0) + goto out_nfserr; + + if (pacl) { + error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS); + if (error < 0) + goto out_nfserr; + } + + if (dpacl) { + error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT); + if (error < 0) + goto out_nfserr; + } + + error = nfs_ok; + +out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return (error); +out_nfserr: + error = nfserrno(error); + goto out; +} + +static struct posix_acl * +_get_posix_acl(struct dentry *dentry, char *key) +{ + struct inode *inode = dentry->d_inode; + char *buf = NULL; + int buflen, error = 0; + struct posix_acl *pacl = NULL; + + down(&inode->i_sem); + + buflen = inode->i_op->getxattr(dentry, key, NULL, 0); + if (buflen <= 0) { + error = buflen < 0 ? buflen : -ENODATA; + goto out_sem; + } + + buf = kmalloc(buflen, GFP_KERNEL); + if (buf == NULL) { + error = -ENOMEM; + goto out_sem; + } + + error = -EOPNOTSUPP; + if (inode->i_op && inode->i_op->getxattr) { + error = security_inode_getxattr(dentry, key); + if (error) + goto out_sem; + error = inode->i_op->getxattr(dentry, key, buf, buflen); + } + if (error < 0) + goto out_sem; + + error = 0; + up(&inode->i_sem); + + pacl = posix_acl_from_xattr(buf, buflen); + out: + kfree(buf); + return pacl; + out_sem: + up(&inode->i_sem); + pacl = ERR_PTR(error); + goto out; +} + +int +nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) +{ + struct inode *inode = dentry->d_inode; + int error = 0; + struct posix_acl *pacl = NULL, *dpacl = NULL; + + pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS); + if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) + pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(pacl)) { + error = PTR_ERR(pacl); + pacl = NULL; + goto out; + } + + if (S_ISDIR(inode->i_mode)) { + dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT); + if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) + dpacl = NULL; + else if (IS_ERR(dpacl)) { + error = PTR_ERR(dpacl); + dpacl = NULL; + goto out; + } + } + + *acl = nfs4_acl_posix_to_nfsv4(&nfsd_idmapper, rqstp, pacl, dpacl); + if (IS_ERR(*acl)) { + error = PTR_ERR(*acl); + *acl = NULL; + } + out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return error; +} + +#endif /* CONFIG_NFS_V4_ACL */ + #ifdef CONFIG_NFSD_V3 /* * Check server access rights to a file system object @@ -458,11 +666,15 @@ nfsd_open(struct svc_rqst *rqstp, struct int flags = O_RDONLY|O_LARGEFILE, err; /* - * If we get here, then the client has already done an "open", + * If we get here, then for regular files, + * the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE * in case a chmod has now revoked permission. */ - err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); + if (type == S_IFDIR) + err = fh_verify(rqstp, fhp, type, access); + else + err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); if (err) goto out; @@ -1494,7 +1706,7 @@ nfsd_readdir(struct svc_rqst *rqstp, str err = cdp->err; *offsetp = file.f_pos; - if (err == nfserr_eof || err == nfserr_readdir_nospc) + if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ out_close: nfsd_close(&file); diff -puN include/linux/nfsd/nfsd.h~CITI_NFS4_ALL include/linux/nfsd/nfsd.h --- linux-2.6.3/include/linux/nfsd/nfsd.h~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfsd/nfsd.h 2004-02-19 16:47:11.000000000 -0500 @@ -76,6 +76,11 @@ int nfsd_lookup(struct svc_rqst *, stru const char *, int, struct svc_fh *); int nfsd_setattr(struct svc_rqst *, struct svc_fh *, struct iattr *, int, time_t); +#ifdef CONFIG_NFSD_V4 +int nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, + struct nfs4_acl *); +int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); +#endif /* CONFIG_NFSD_V4 */ int nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, int type, dev_t rdev, struct svc_fh *res); @@ -190,9 +195,12 @@ void nfsd_lockd_shutdown(void); #define nfserr_bad_seqid __constant_htonl(NFSERR_BAD_SEQID) #define nfserr_symlink __constant_htonl(NFSERR_SYMLINK) #define nfserr_not_same __constant_htonl(NFSERR_NOT_SAME) -#define nfserr_readdir_nospc __constant_htonl(NFSERR_READDIR_NOSPC) +#define nfserr_restorefh __constant_htonl(NFSERR_RESTOREFH) +#define nfserr_attrnotsupp __constant_htonl(NFSERR_ATTRNOTSUPP) #define nfserr_bad_xdr __constant_htonl(NFSERR_BAD_XDR) #define nfserr_openmode __constant_htonl(NFSERR_OPENMODE) +#define nfserr_locks_held __constant_htonl(NFSERR_LOCKS_HELD) +#define nfserr_op_illegal __constant_htonl(NFSERR_OP_ILLEGAL) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. @@ -247,7 +255,6 @@ static inline int is_fsid(struct svc_fh /* * The following attributes are currently not supported by the NFSv4 server: - * ACL (will be supported in a forthcoming patch) * ARCHIVE (deprecated anyway) * FS_LOCATIONS (will be supported eventually) * HIDDEN (unlikely to be supported any time soon) @@ -267,7 +274,7 @@ static inline int is_fsid(struct svc_fh | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \ | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_HOMOGENEOUS \ | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \ - | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE) + | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL) #define NFSD_SUPPORTED_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \ @@ -282,7 +289,8 @@ static inline int is_fsid(struct svc_fh (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ -#define NFSD_WRITEABLE_ATTRS_WORD0 FATTR4_WORD0_SIZE +#define NFSD_WRITEABLE_ATTRS_WORD0 \ +(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY_SET) diff -puN net/sunrpc/auth_gss/gss_krb5_crypto.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_crypto.c --- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_crypto.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_crypto.c 2004-02-19 16:47:07.000000000 -0500 @@ -58,14 +58,14 @@ krb5_encrypt( struct scatterlist sg[1]; u8 local_iv[16] = {0}; - dprintk("RPC: krb5_encrypt: input data:\n"); + dprintk("RPC: krb5_encrypt: input data:\n"); print_hexl((u32 *)in, length, 0); if (length % crypto_tfm_alg_blocksize(tfm) != 0) goto out; if (crypto_tfm_alg_ivsize(tfm) > 16) { - dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", + dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", crypto_tfm_alg_ivsize(tfm)); goto out; } @@ -80,10 +80,10 @@ krb5_encrypt( ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); - dprintk("RPC: krb5_encrypt: output data:\n"); + dprintk("RPC: krb5_encrypt: output data:\n"); print_hexl((u32 *)out, length, 0); out: - dprintk("krb5_encrypt returns %d\n",ret); + dprintk("RPC: krb5_encrypt returns %d\n",ret); return(ret); } @@ -99,14 +99,14 @@ krb5_decrypt( struct scatterlist sg[1]; u8 local_iv[16] = {0}; - dprintk("RPC: krb5_decrypt: input data:\n"); + dprintk("RPC: krb5_decrypt: input data:\n"); print_hexl((u32 *)in, length, 0); if (length % crypto_tfm_alg_blocksize(tfm) != 0) goto out; if (crypto_tfm_alg_ivsize(tfm) > 16) { - dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", + dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", crypto_tfm_alg_ivsize(tfm)); goto out; } @@ -120,10 +120,10 @@ krb5_decrypt( ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); - dprintk("RPC: krb5_decrypt: output_data:\n"); + dprintk("RPC: krb5_decrypt: output_data:\n"); print_hexl((u32 *)out, length, 0); out: - dprintk("gss_k5decrypt returns %d\n",ret); + dprintk("RPC: gss_k5decrypt returns %d\n",ret); return(ret); } @@ -152,7 +152,7 @@ krb5_make_checksum(s32 cksumtype, char * cksumname = "md5"; break; default: - dprintk("RPC: krb5_make_checksum:" + dprintk("RPC: krb5_make_checksum:" " unsupported checksum %d", cksumtype); goto out; } diff -puN net/sunrpc/auth_gss/gss_krb5_seqnum.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_seqnum.c --- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_seqnum.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_seqnum.c 2004-02-19 16:47:07.000000000 -0500 @@ -70,7 +70,7 @@ krb5_get_seq_num(struct crypto_tfm *key, s32 code; unsigned char plain[8]; - dprintk("krb5_get_seq_num: \n"); + dprintk("RPC: krb5_get_seq_num:\n"); if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) return code; diff -puN net/sunrpc/auth_gss/gss_pseudoflavors.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_pseudoflavors.c --- linux-2.6.3/net/sunrpc/auth_gss/gss_pseudoflavors.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_pseudoflavors.c 2004-02-19 16:47:07.000000000 -0500 @@ -82,12 +82,13 @@ gss_register_triple(u32 pseudoflavor, st spin_lock(®istered_triples_lock); if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) { - printk("Registered pseudoflavor %d again\n", pseudoflavor); + printk(KERN_WARNING "RPC: Registered pseudoflavor %d again\n", + pseudoflavor); goto err_unlock; } list_add(&triple->triples, ®istered_triples); spin_unlock(®istered_triples_lock); - dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor); + dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor); return 0; @@ -145,7 +146,7 @@ gss_cmp_triples(u32 oid_len, char *oid_d oid.len = oid_len; oid.data = oid_data; - dprintk("RPC: gss_cmp_triples \n"); + dprintk("RPC: gss_cmp_triples\n"); print_sec_triple(&oid,qop,service); spin_lock(®istered_triples_lock); @@ -158,7 +159,7 @@ gss_cmp_triples(u32 oid_len, char *oid_d } } spin_unlock(®istered_triples_lock); - dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor); + dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor); return pseudoflavor; } @@ -193,8 +194,8 @@ gss_pseudoflavor_to_service(u32 pseudofl triple = do_lookup_triple_by_pseudoflavor(pseudoflavor); spin_unlock(®istered_triples_lock); if (!triple) { - dprintk("RPC: gss_pseudoflavor_to_service called with" - " unsupported pseudoflavor %d\n", pseudoflavor); + dprintk("RPC: gss_pseudoflavor_to_service called with unsupported pseudoflavor %d\n", + pseudoflavor); return 0; } return triple->service; @@ -211,8 +212,8 @@ gss_pseudoflavor_to_mech(u32 pseudoflavo if (triple) mech = gss_mech_get(triple->mech); else - dprintk("RPC: gss_pseudoflavor_to_mech called with" - " unsupported pseudoflavor %d\n", pseudoflavor); + dprintk("RPC: gss_pseudoflavor_to_mech called with unsupported pseudoflavor %d\n", + pseudoflavor); return mech; } @@ -223,8 +224,8 @@ gss_pseudoflavor_to_mechOID(u32 pseudofl mech = gss_pseudoflavor_to_mech(pseudoflavor); if (!mech) { - dprintk("RPC: gss_pseudoflavor_to_mechOID called with" - " unsupported pseudoflavor %d\n", pseudoflavor); + dprintk("RPC: gss_pseudoflavor_to_mechOID called with unsupported pseudoflavor %d\n", + pseudoflavor); return -1; } oid->len = mech->gm_oid.len; diff -puN fs/nfsd/nfs4state.c~CITI_NFS4_ALL fs/nfsd/nfs4state.c --- linux-2.6.3/fs/nfsd/nfs4state.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/nfs4state.c 2004-02-19 16:47:15.000000000 -0500 @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -135,12 +136,16 @@ static void release_file(struct nfs4_fil * * client_lru holds client queue ordered by nfs4_client.cl_time * for lease renewal. + * + * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time + * for last close replay. */ static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; static struct list_head client_lru; +static struct list_head close_lru; static inline void renew_client(struct nfs4_client *clp) @@ -269,8 +274,7 @@ cmp_clid(clientid_t * cl1, clientid_t * /* XXX what about NGROUP */ static int cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){ - return((cr1->cr_uid == cr2->cr_uid) && - (cr1->cr_gid == cr2->cr_gid)); + return(cr1->cr_uid == cr2->cr_uid); } @@ -772,6 +776,9 @@ alloc_init_open_stateowner(unsigned int INIT_LIST_HEAD(&sop->so_strhash); INIT_LIST_HEAD(&sop->so_perclient); INIT_LIST_HEAD(&sop->so_perfilestate); + INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */ + INIT_LIST_HEAD(&sop->so_close_lru); + sop->so_time = 0; list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); list_add(&sop->so_perclient, &clp->cl_perclient); @@ -790,13 +797,29 @@ alloc_init_open_stateowner(unsigned int } static void +release_stateid_lockowner(struct nfs4_stateid *open_stp) +{ + struct nfs4_stateowner *lock_sop; + + while (!list_empty(&open_stp->st_perlockowner)) { + lock_sop = list_entry(open_stp->st_perlockowner.next, + struct nfs4_stateowner, so_perlockowner); + /* list_del(&open_stp->st_perlockowner); */ + BUG_ON(lock_sop->so_is_open_owner); + release_stateowner(lock_sop); + } +} + +static void release_stateowner(struct nfs4_stateowner *sop) { struct nfs4_stateid *stp; - list_del_init(&sop->so_idhash); - list_del_init(&sop->so_strhash); - list_del_init(&sop->so_perclient); + list_del(&sop->so_idhash); + list_del(&sop->so_strhash); + list_del(&sop->so_perclient); + list_del(&sop->so_perlockowner); + list_del(&sop->so_close_lru); del_perclient++; while (!list_empty(&sop->so_perfilestate)) { stp = list_entry(sop->so_perfilestate.next, @@ -815,6 +838,7 @@ init_stateid(struct nfs4_stateid *stp, s INIT_LIST_HEAD(&stp->st_hash); INIT_LIST_HEAD(&stp->st_perfilestate); + INIT_LIST_HEAD(&stp->st_perlockowner); INIT_LIST_HEAD(&stp->st_perfile); list_add(&stp->st_hash, &stateid_hashtbl[hashval]); list_add(&stp->st_perfilestate, &sop->so_perfilestate); @@ -826,24 +850,30 @@ init_stateid(struct nfs4_stateid *stp, s stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; - stp->st_share_access = open->op_share_access; - stp->st_share_deny = open->op_share_deny; + stp->st_access_bmap = 0; + stp->st_deny_bmap = 0; + __set_bit(open->op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_deny, &stp->st_deny_bmap); } static void release_stateid(struct nfs4_stateid *stp, int flags) { - list_del_init(&stp->st_hash); + list_del(&stp->st_hash); list_del_perfile++; - list_del_init(&stp->st_perfile); - list_del_init(&stp->st_perfilestate); + list_del(&stp->st_perfile); + list_del(&stp->st_perfilestate); if((stp->st_vfs_set) && (flags & OPEN_STATE)) { + release_stateid_lockowner(stp); nfsd_close(&stp->st_vfs_file); vfsclose++; dput(stp->st_vfs_file.f_dentry); mntput(stp->st_vfs_file.f_vfsmnt); + } else if ((stp->st_vfs_set) && (flags & LOCK_STATE)) { + struct file *filp = &stp->st_vfs_file; + + locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); } - /* should use a slab cache */ kfree(stp); stp = NULL; } @@ -852,12 +882,25 @@ static void release_file(struct nfs4_file *fp) { free_file++; - list_del_init(&fp->fi_hash); + list_del(&fp->fi_hash); iput(fp->fi_inode); kfree(fp); } void +move_to_close_lru(struct nfs4_stateowner *sop) +{ + dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); + /* remove stateowner from all other hash lists except perclient */ + list_del_init(&sop->so_idhash); + list_del_init(&sop->so_strhash); + list_del_init(&sop->so_perlockowner); + + list_add_tail(&sop->so_close_lru, &close_lru); + sop->so_time = get_seconds(); +} + +void release_state_owner(struct nfs4_stateid *stp, struct nfs4_stateowner **sopp, int flag) { @@ -866,16 +909,13 @@ release_state_owner(struct nfs4_stateid dprintk("NFSD: release_state_owner\n"); release_stateid(stp, flag); - /* - * release unused nfs4_stateowners. - * XXX will need to be placed on an open_stateid_lru list to be + + /* place unused nfs4_stateowners on so_close_lru list to be * released by the laundromat service after the lease period * to enable us to handle CLOSE replay */ - if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) { - release_stateowner(sop); - *sopp = NULL; - } + if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) + move_to_close_lru(sop); /* unused nfs4_file's are releseed. XXX slab cache? */ if (list_empty(&fp->fi_perfile)) { release_file(fp); @@ -940,15 +980,46 @@ find_file(unsigned int hashval, struct i return 0; } +#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0) +#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0) + +void +set_access(unsigned int *access, unsigned long bmap) { + int i; + + *access = 0; + for (i = 1; i < 4; i++) { + if(test_bit(i, &bmap)) + *access |= i; + } +} + +void +set_deny(unsigned int *deny, unsigned long bmap) { + int i; + + *deny = 0; + for (i = 0; i < 4; i++) { + if(test_bit(i, &bmap)) + *deny |= i ; + } +} + static int test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { - if ((stp->st_share_access & open->op_share_deny) || - (stp->st_share_deny & open->op_share_access)) { + unsigned int access, deny; + + set_access(&access, stp->st_access_bmap); + set_deny(&deny, stp->st_deny_bmap); + if ((access & open->op_share_deny) || (deny & open->op_share_access)) return 0; - } return 1; } +/* + * Called to check deny when READ with all zero stateid or + * WRITE with all zero or all one stateid + */ int nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { @@ -965,7 +1036,8 @@ nfs4_share_conflict(struct svc_fh *curre /* Search for conflicting share reservations */ list_for_each_safe(pos, next, &fp->fi_perfile) { stp = list_entry(pos, struct nfs4_stateid, st_perfile); - if (stp->st_share_deny & deny_type) + if (test_bit(deny_type, &stp->st_deny_bmap) || + test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) return nfserr_share_denied; } } @@ -1010,6 +1082,8 @@ nfs4_file_downgrade(struct file *filp, u * notfound: * verify clientid * create new owner + * + * called with nfs4_lock_state() held. */ int nfsd4_process_open1(struct nfsd4_open *open) @@ -1028,7 +1102,6 @@ nfsd4_process_open1(struct nfsd4_open *o if (STALE_CLIENTID(&open->op_clientid)) goto out; - nfs4_lock_state(); strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); if (find_openstateowner_str(strhashval, open, &sop)) { open->op_stateowner = sop; @@ -1086,10 +1159,11 @@ instantiate_new_owner: renew: renew_client(sop->so_client); out: - nfs4_unlock_state(); return status; } - +/* + * called with nfs4_lock_state() held. + */ int nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { @@ -1108,7 +1182,10 @@ nfsd4_process_open2(struct svc_rqst *rqs ino = current_fh->fh_dentry->d_inode; - nfs4_lock_state(); + status = nfserr_inval; + if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) + goto out; + fi_hashval = file_hashval(ino); if (find_file(fi_hashval, ino, &fp)) { /* Search for conflicting share reservations */ @@ -1119,6 +1196,9 @@ nfsd4_process_open2(struct svc_rqst *rqs stp = stq; continue; } + /* ignore lock owners */ + if (stq->st_stateowner->so_is_open_owner == 0) + continue; if (!test_share(stq,open)) goto out; } @@ -1137,7 +1217,7 @@ nfsd4_process_open2(struct svc_rqst *rqs GFP_KERNEL)) == NULL) goto out; - if (open->op_share_access && NFS4_SHARE_ACCESS_WRITE) + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) flags = MAY_WRITE; else flags = MAY_READ; @@ -1156,15 +1236,18 @@ nfsd4_process_open2(struct svc_rqst *rqs /* This is an upgrade of an existing OPEN. * OR the incoming share with the existing * nfs4_stateid share */ - int share_access = open->op_share_access; + unsigned int share_access; - share_access &= ~(stp->st_share_access); + set_access(&share_access, stp->st_access_bmap); + share_access = ~share_access; + share_access &= open->op_share_access; /* update the struct file */ if ((status = nfs4_file_upgrade(&stp->st_vfs_file, share_access))) goto out; - stp->st_share_access |= share_access; - stp->st_share_deny |= open->op_share_deny; + /* remember the open */ + set_bit(open->op_share_access, &stp->st_access_bmap); + set_bit(open->op_share_deny, &stp->st_deny_bmap); /* bump the stateid */ update_stateid(&stp->st_stateid); } @@ -1194,7 +1277,6 @@ out: if (!open->op_stateowner->so_confirmed) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; - nfs4_unlock_state(); return status; out_free: kfree(stp); @@ -1250,9 +1332,11 @@ time_t nfs4_laundromat(void) { struct nfs4_client *clp; + struct nfs4_stateowner *sop; struct list_head *pos, *next; time_t cutoff = get_seconds() - NFSD_LEASE_TIME; - time_t t, return_val = NFSD_LEASE_TIME; + time_t t, clientid_val = NFSD_LEASE_TIME; + time_t u, close_val = NFSD_LEASE_TIME; nfs4_lock_state(); @@ -1261,18 +1345,30 @@ nfs4_laundromat(void) clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; - if (return_val > t) - return_val = t; + if (clientid_val > t) + clientid_val = t; break; } dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); expire_client(clp); } - if (return_val < NFSD_LAUNDROMAT_MINTIMEOUT) - return_val = NFSD_LAUNDROMAT_MINTIMEOUT; + list_for_each_safe(pos, next, &close_lru) { + sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); + if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { + u = sop->so_time - cutoff; + if (close_val > u) + close_val = u; + break; + } + dprintk("NFSD: purging unused open stateowner (so_id %d)\n", + sop->so_id); + release_stateowner(sop); + } + if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) + clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; nfs4_unlock_state(); - return return_val; + return clientid_val; } void @@ -1285,17 +1381,22 @@ laundromat_main(void *not_used) schedule_delayed_work(&laundromat_work, t*HZ); } -/* search ownerid_hashtbl[] for stateid owner (stateid->si_stateownerid) */ +/* search ownerid_hashtbl[] and close_lru for stateid owner + * (stateid->si_stateownerid) + */ struct nfs4_stateowner * -find_openstateowner_id(u32 st_id) { +find_openstateowner_id(u32 st_id, int flags) { struct list_head *pos, *next; struct nfs4_stateowner *local = NULL; - unsigned int hashval = ownerid_hashval(st_id); - list_for_each_safe(pos, next, &ownerid_hashtbl[hashval]) { - local = list_entry(pos, struct nfs4_stateowner, so_idhash); - if(local->so_id == st_id) - return local; + dprintk("NFSD: find_openstateowner_id %d\n", st_id); + if (flags & CLOSE_STATE) { + list_for_each_safe(pos, next, &close_lru) { + local = list_entry(pos, struct nfs4_stateowner, + so_close_lru); + if(local->so_id == st_id) + return local; + } } return NULL; } @@ -1303,7 +1404,8 @@ find_openstateowner_id(u32 st_id) { static inline int nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) { - return (fhp->fh_dentry != stp->st_vfs_file.f_dentry); + return (stp->st_vfs_set == 0 || + fhp->fh_dentry->d_inode->i_ino != stp->st_vfs_file.f_dentry->d_inode->i_ino); } static int @@ -1375,7 +1477,7 @@ out: * Checks for sequence id mutating operations. */ int -nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp) +nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) { int status; struct nfs4_stateid *stp; @@ -1412,6 +1514,21 @@ nfs4_preprocess_seqid_op(struct svc_fh * status = nfserr_bad_stateid; + /* for new lock stateowners, check that the lock->v.new.open_stateid + * refers to an open stateowner, and that the lockclid + * (nfs4_lock->v.new.clientid) is the same as the + * open_stateid->st_stateowner->so_client->clientid + */ + if (lockclid) { + struct nfs4_stateowner *sop = stp->st_stateowner; + struct nfs4_client *clp = sop->so_client; + + if (!sop->so_is_open_owner) + goto out; + if (!cmp_clid(&clp->cl_clientid, lockclid)) + goto out; + } + if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); goto out; @@ -1463,24 +1580,30 @@ no_nfs4_stateid: * starting by trying to look up the stateowner. * If stateowner is not found - stateid is bad. */ - if (!(sop = find_openstateowner_id(stateid->si_stateownerid))) { + if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) { printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n"); status = nfserr_bad_stateid; goto out; } + *sopp = sop; check_replay: if (seqid == sop->so_seqid) { printk("NFSD: preprocess_seqid_op: retransmission?\n"); /* indicate replay to calling function */ status = NFSERR_REPLAY_ME; - } else + } else { printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid); + *sopp = NULL; status = nfserr_bad_seqid; + } goto out; } +/* + * nfs4_unlock_state(); called in encode + */ int nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc) { @@ -1491,13 +1614,17 @@ nfsd4_open_confirm(struct svc_rqst *rqst dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", (int)current_fh->fh_dentry->d_name.len, current_fh->fh_dentry->d_name.name); + + if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) + goto out; + oc->oc_stateowner = NULL; nfs4_lock_state(); if ((status = nfs4_preprocess_seqid_op(current_fh, oc->oc_seqid, &oc->oc_req_stateid, CHECK_FH | CONFIRM | OPEN_STATE, - &oc->oc_stateowner, &stp))) + &oc->oc_stateowner, &stp, NULL))) goto out; sop = oc->oc_stateowner; @@ -1512,49 +1639,89 @@ nfsd4_open_confirm(struct svc_rqst *rqst stp->st_stateid.si_generation); status = nfs_ok; out: - nfs4_unlock_state(); return status; } + + +/* + * unset all bits in union bitmap (bmap) that + * do not exist in share (from successful OPEN_DOWNGRADE) + */ +static void +reset_union_bmap_access(unsigned long access, unsigned long *bmap) +{ + int i; + for (i = 1; i < 4; i++) { + if ((i & access) != i) + __clear_bit(i, bmap); + } +} + +static void +reset_union_bmap_deny(unsigned long deny, unsigned long *bmap) +{ + int i; + for (i = 0; i < 4; i++) { + if ((i & deny) != i) + __clear_bit(i, bmap); + } +} + +/* + * nfs4_unlock_state(); called in encode + */ + int nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od) { int status; struct nfs4_stateid *stp; + unsigned int share_access; dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", (int)current_fh->fh_dentry->d_name.len, current_fh->fh_dentry->d_name.name); + od->od_stateowner = NULL; + status = nfserr_inval; + if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny)) + goto out; + nfs4_lock_state(); if ((status = nfs4_preprocess_seqid_op(current_fh, od->od_seqid, &od->od_stateid, CHECK_FH | OPEN_STATE, - &od->od_stateowner, &stp))) + &od->od_stateowner, &stp, NULL))) goto out; status = nfserr_inval; - if (od->od_share_access & ~stp->st_share_access) { - dprintk("NFSD:access not a subset current=%08x, desired=%08x\n", - stp->st_share_access, od->od_share_access); + if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { + dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", + stp->st_access_bmap, od->od_share_access); goto out; } - if (od->od_share_deny & ~stp->st_share_deny) { - dprintk("NFSD:deny not a subset current=%08x, desired=%08x\n", - stp->st_share_deny, od->od_share_deny); + if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) { + dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", + stp->st_deny_bmap, od->od_share_deny); goto out; } + set_access(&share_access, stp->st_access_bmap); nfs4_file_downgrade(&stp->st_vfs_file, - stp->st_share_access & ~od->od_share_access); - stp->st_share_access = od->od_share_access; - stp->st_share_deny = od->od_share_deny; + share_access & ~od->od_share_access); + + reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); + reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); + update_stateid(&stp->st_stateid); memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t)); status = nfs_ok; out: - nfs4_unlock_state(); return status; } +/* + * nfs4_unlock_state() called after encode + */ int nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close) { @@ -1565,11 +1732,13 @@ nfsd4_close(struct svc_rqst *rqstp, stru (int)current_fh->fh_dentry->d_name.len, current_fh->fh_dentry->d_name.name); + close->cl_stateowner = NULL; nfs4_lock_state(); + /* check close_lru for replay */ if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid, &close->cl_stateid, - CHECK_FH | OPEN_STATE, - &close->cl_stateowner, &stp))) + CHECK_FH | OPEN_STATE | CLOSE_STATE, + &close->cl_stateowner, &stp, NULL))) goto out; /* * Return success, but first update the stateid. @@ -1581,7 +1750,6 @@ nfsd4_close(struct svc_rqst *rqstp, stru /* release_state_owner() calls nfsd_close() if needed */ release_state_owner(stp, &close->cl_stateowner, OPEN_STATE); out: - nfs4_unlock_state(); return status; } @@ -1717,7 +1885,7 @@ find_lockstateowner_str(unsigned int has */ static struct nfs4_stateowner * -alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_lock *lock) { +alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) { struct nfs4_stateowner *sop; struct nfs4_replay *rp; unsigned int idhashval; @@ -1729,9 +1897,13 @@ alloc_init_lock_stateowner(unsigned int INIT_LIST_HEAD(&sop->so_strhash); INIT_LIST_HEAD(&sop->so_perclient); INIT_LIST_HEAD(&sop->so_perfilestate); + INIT_LIST_HEAD(&sop->so_perlockowner); + INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ + sop->so_time = 0; list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); list_add(&sop->so_perclient, &clp->cl_perclient); + list_add(&sop->so_perlockowner, &open_stp->st_perlockowner); add_perclient++; sop->so_is_open_owner = 0; sop->so_id = current_ownerid++; @@ -1755,10 +1927,10 @@ alloc_init_lock_stateid(struct nfs4_stat if ((stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL)) == NULL) goto out; - INIT_LIST_HEAD(&stp->st_hash); INIT_LIST_HEAD(&stp->st_perfile); INIT_LIST_HEAD(&stp->st_perfilestate); + INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */ list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); list_add(&stp->st_perfile, &fp->fi_perfile); list_add_perfile++; @@ -1771,15 +1943,24 @@ alloc_init_lock_stateid(struct nfs4_stat stp->st_stateid.si_generation = 0; stp->st_vfs_file = open_stp->st_vfs_file; stp->st_vfs_set = open_stp->st_vfs_set; - stp->st_share_access = -1; - stp->st_share_deny = -1; + stp->st_access_bmap = open_stp->st_access_bmap; + stp->st_deny_bmap = open_stp->st_deny_bmap; out: return stp; } +int +check_lock_length(u64 offset, u64 length) +{ + return ((length == 0) || ((length != ~(u64)0) && + LOFF_OVERFLOW(offset, length))); +} + /* * LOCK operation + * + * nfs4_unlock_state(); called in encode */ int nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock) @@ -1795,6 +1976,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struc dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", lock->lk_offset, lock->lk_length); + if (check_lock_length(lock->lk_offset, lock->lk_length)) + return nfserr_inval; + lock->lk_stateowner = NULL; nfs4_lock_state(); @@ -1812,12 +1996,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struc printk("NFSD: nfsd4_lock: clientid is stale!\n"); goto out; } + /* does the clientid in the lock owner own the open stateid? */ + /* validate and update open stateid and open seqid */ status = nfs4_preprocess_seqid_op(current_fh, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, CHECK_FH | OPEN_STATE, - &open_sop, &open_stp); + &open_sop, &open_stp, + &lock->v.new.clientid); if (status) goto out; /* create lockowner and lock stateid */ @@ -1836,8 +2023,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc &lock->v.new.clientid, &lock_sop)) goto out; status = nfserr_resource; - if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, - open_sop->so_client, lock))) + if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock))) goto out; if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner, fp, open_stp)) == NULL) @@ -1850,7 +2036,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, CHECK_FH | LOCK_STATE, - &lock->lk_stateowner, &lock_stp); + &lock->lk_stateowner, &lock_stp, NULL); if (status) goto out; } @@ -1938,7 +2124,6 @@ out_destroy_new_stateid: release_state_owner(lock_stp, &lock->lk_stateowner, LOCK_STATE); } out: - nfs4_unlock_state(); return status; } @@ -1956,6 +2141,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru unsigned int strhashval; int status; + if (check_lock_length(lockt->lt_offset, lockt->lt_length)) + return nfserr_inval; + lockt->lt_stateowner = NULL; nfs4_lock_state(); @@ -1967,6 +2155,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) { printk("NFSD: nfsd4_lockt: fh_verify() failed!\n"); + if (status == nfserr_symlink) + status = nfserr_inval; goto out; } @@ -1989,13 +2179,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru strhashval = lock_ownerstr_hashval(inode, lockt->lt_clientid.cl_id, lockt->lt_owner); - if (find_lockstateowner_str(strhashval, &lockt->lt_owner, + find_lockstateowner_str(strhashval, &lockt->lt_owner, &lockt->lt_clientid, - &lockt->lt_stateowner)) { - printk("NFSD: nsfd4_lockt: lookup_lockowner() failed!\n"); - goto out; - } - + &lockt->lt_stateowner); sop = lockt->lt_stateowner; if (sop) { file_lock.fl_owner = (fl_owner_t) sop; @@ -2032,7 +2218,10 @@ out: nfs4_unlock_state(); return status; } - + +/* + * nfs4_unlock_state(); called in encode + */ int nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku) { @@ -2043,13 +2232,18 @@ nfsd4_locku(struct svc_rqst *rqstp, stru dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", locku->lu_offset, locku->lu_length); + + if (check_lock_length(locku->lu_offset, locku->lu_length)) + return nfserr_inval; + + locku->lu_stateowner = NULL; nfs4_lock_state(); if ((status = nfs4_preprocess_seqid_op(current_fh, locku->lu_seqid, &locku->lu_stateid, CHECK_FH | LOCK_STATE, - &locku->lu_stateowner, &stp))) + &locku->lu_stateowner, &stp, NULL))) goto out; filp = &stp->st_vfs_file; @@ -2085,7 +2279,6 @@ nfsd4_locku(struct svc_rqst *rqstp, stru memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t)); out: - nfs4_unlock_state(); return status; out_nfserr: @@ -2093,6 +2286,84 @@ out_nfserr: goto out; } +/* + * returns + * 1: locks held by lockowner + * 0: no locks held by lockowner + */ +static int +check_for_locks(struct file *filp, struct nfs4_stateowner *lowner) +{ + struct file_lock **flpp; + struct inode *inode = filp->f_dentry->d_inode; + int status = 0; + + lock_kernel(); + for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { + if ((*flpp)->fl_owner == (fl_owner_t)lowner) + status = 1; + goto out; + } +out: + unlock_kernel(); + return status; +} + +int +nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) +{ + clientid_t *clid = &rlockowner->rl_clientid; + struct list_head *pos, *next; + struct nfs4_stateowner *local = NULL; + struct xdr_netobj *owner = &rlockowner->rl_owner; + int status, i; + + dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", + clid->cl_boot, clid->cl_id); + + /* XXX check for lease expiration */ + + status = nfserr_stale_clientid; + if (STALE_CLIENTID(clid)) { + printk("NFSD: nfsd4_release_lockowner: clientid is stale!\n"); + return status; + } + + nfs4_lock_state(); + + /* find the lockowner */ + status = nfs_ok; + for (i=0; i < LOCK_HASH_SIZE; i++) { + list_for_each_safe(pos, next, &lock_ownerstr_hashtbl[i]) { + local = list_entry(pos, struct nfs4_stateowner, + so_strhash); + if(cmp_owner_str(local, owner, clid)) + break; + } + } + if (local) { + struct nfs4_stateid *stp; + + /* check for any locks held by any stateid associated with the + * (lock) stateowner */ + status = nfserr_locks_held; + list_for_each_safe(pos, next, &local->so_perfilestate) { + stp = list_entry(pos, struct nfs4_stateid, + st_perfilestate); + if(stp->st_vfs_set) { + if (check_for_locks(&stp->st_vfs_file, local)) + goto out; + } + } + /* no locks held by (lock) stateowner */ + status = nfs_ok; + release_stateowner(local); + } +out: + nfs4_unlock_state(); + return status; +} + /* * Start and stop routines */ @@ -2128,6 +2399,7 @@ nfs4_state_init(void) memset(&zerostateid, 0, sizeof(stateid_t)); memset(&onestateid, ~0, sizeof(stateid_t)); + INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); init_MUTEX(&client_sema); boot_time = get_seconds(); diff -puN fs/nfsd/nfs3xdr.c~CITI_NFS4_ALL fs/nfsd/nfs3xdr.c --- linux-2.6.3/fs/nfsd/nfs3xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/nfs3xdr.c 2004-02-19 16:47:08.000000000 -0500 @@ -796,7 +796,7 @@ encode_entry(struct readdir_cd *ccd, con elen = slen + NFS3_ENTRY_BAGGAGE + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0); if (cd->buflen < elen) { - cd->common.err = nfserr_readdir_nospc; + cd->common.err = nfserr_toosmall; return -EINVAL; } *p++ = xdr_one; /* mark entry present */ diff -puN fs/nfsd/nfsxdr.c~CITI_NFS4_ALL fs/nfsd/nfsxdr.c --- linux-2.6.3/fs/nfsd/nfsxdr.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500 +++ linux-2.6.3-bfields/fs/nfsd/nfsxdr.c 2004-02-19 16:47:08.000000000 -0500 @@ -484,7 +484,7 @@ nfssvc_encode_entry(struct readdir_cd *c slen = XDR_QUADLEN(namlen); if ((buflen = cd->buflen - slen - 4) < 0) { - cd->common.err = nfserr_readdir_nospc; + cd->common.err = nfserr_toosmall; return -EINVAL; } *p++ = xdr_one; /* mark entry present */ diff -puN include/linux/nfs.h~CITI_NFS4_ALL include/linux/nfs.h --- linux-2.6.3/include/linux/nfs.h~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfs.h 2004-02-19 16:47:08.000000000 -0500 @@ -92,7 +92,7 @@ NFSERR_NOT_SAME = 10027, /* v4 */ NFSERR_LOCK_RANGE = 10028, /* v4 */ NFSERR_SYMLINK = 10029, /* v4 */ - NFSERR_READDIR_NOSPC = 10030, /* v4 */ + NFSERR_RESTOREFH = 10030, /* v4 */ NFSERR_LEASE_MOVED = 10031, /* v4 */ NFSERR_ATTRNOTSUPP = 10032, /* v4 */ NFSERR_NO_GRACE = 10033, /* v4 */ diff -puN include/linux/nfsd/nfsfh.h~CITI_NFS4_ALL include/linux/nfsd/nfsfh.h --- linux-2.6.3/include/linux/nfsd/nfsfh.h~CITI_NFS4_ALL 2004-02-19 16:47:10.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/nfsd/nfsfh.h 2004-02-19 16:47:10.000000000 -0500 @@ -209,14 +209,6 @@ fh_copy(struct svc_fh *dst, struct svc_f return dst; } -static __inline__ void -fh_dup2(struct svc_fh *dst, struct svc_fh *src) -{ - fh_put(dst); - dget(src->fh_dentry); - *dst = *src; -} - static __inline__ struct svc_fh * fh_init(struct svc_fh *fhp, int maxsize) { diff -puN include/linux/sunrpc/xdr.h~CITI_NFS4_ALL include/linux/sunrpc/xdr.h --- linux-2.6.3/include/linux/sunrpc/xdr.h~CITI_NFS4_ALL 2004-02-19 16:47:15.000000000 -0500 +++ linux-2.6.3-bfields/include/linux/sunrpc/xdr.h 2004-02-19 16:47:15.000000000 -0500 @@ -225,6 +225,9 @@ xdr_reserve_space(struct xdr_stream *xdr extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); +int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len); +int read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj); + /* * Initialize an xdr_stream for decoding data. diff -puN net/sunrpc/xdr.c~CITI_NFS4_ALL net/sunrpc/xdr.c --- linux-2.6.3/net/sunrpc/xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:15.000000000 -0500 +++ linux-2.6.3-bfields/net/sunrpc/xdr.c 2004-02-19 16:47:15.000000000 -0500 @@ -799,7 +799,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, } /* obj is assumed to point to allocated memory of size at least len: */ -static int +int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) { struct xdr_buf subbuf; @@ -824,7 +824,7 @@ out: return status; } -static int +int read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) { u32 raw; _