From 7c0e8552405953b4f67e5549540f977d938bfe55 Mon Sep 17 00:00:00 2001 From: shadow Date: Wed, 11 Feb 2009 08:42:10 +0000 Subject: [PATCH] update client to support vanila kernels up to 2.6.27. Branch b1_6 b=14250 i=green i=deen --- lustre/ChangeLog | 4 + lustre/autoconf/lustre-core.m4 | 284 +++++++++++++++++++++++-- lustre/include/linux/lustre_compat25.h | 91 +++++++- lustre/include/linux/lustre_lib.h | 1 - lustre/include/linux/lustre_patchless_compat.h | 4 +- lustre/include/lprocfs_status.h | 11 + lustre/llite/file.c | 146 ++++++++++++- lustre/llite/llite_internal.h | 5 +- lustre/llite/llite_lib.c | 2 +- lustre/llite/llite_mmap.c | 164 ++++++++++---- lustre/llite/llite_nfs.c | 200 ++++++++++------- lustre/llite/lloop.c | 14 +- lustre/llite/rw.c | 6 +- lustre/llite/symlink.c | 4 + lustre/lvfs/lvfs_linux.c | 14 +- lustre/mgc/mgc_request.c | 2 +- lustre/obdclass/linux/linux-module.c | 5 +- lustre/obdclass/linux/linux-sysctl.c | 117 ++++++---- lustre/obdclass/lprocfs_status.c | 4 +- lustre/obdclass/lustre_handles.c | 2 +- lustre/ptlrpc/service.c | 2 +- 21 files changed, 868 insertions(+), 214 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index f917d62..2067ab5 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -44,6 +44,10 @@ tbd Sun Microsystems, Inc. of Lustre filesystem with 4K stack may cause a stack overflow. For more information, please refer to bugzilla 17630. +Severity : enhancement +Bugzilla : 14250 +Description: Update client support to vanila kernels up to 2.6.27. + Severity : normal Frequency : rare Bugzilla : 18154 diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 9c83f48..d23bd76 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1051,15 +1051,20 @@ LB_LINUX_TRY_COMPILE([ AC_DEFUN([LC_PAGE_CHECKED], [AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked]) LB_LINUX_TRY_COMPILE([ - #include - #include + #include +#ifdef HAVE_LINUX_MMTYPES_H + #include +#endif + #include ],[ - #ifndef PageChecked - #error PageChecked not defined in kernel - #endif - #ifndef SetPageChecked - #error SetPageChecked not defined in kernel - #endif + struct page *p; + + /* before 2.6.26 this define*/ + #ifndef PageChecked + /* 2.6.26 use function instead of define for it */ + SetPageChecked(p); + PageChecked(p); + #endif ],[ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_PAGE_CHECKED, 1, @@ -1177,6 +1182,9 @@ LB_LINUX_TRY_COMPILE([ ]) ]) +# 2.6.18 + + # 2.6.23 have return type 'void' for unregister_blkdev AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT], [AC_MSG_CHECKING([if unregister_blkdev return int]) @@ -1194,6 +1202,25 @@ LB_LINUX_TRY_COMPILE([ ]) # 2.6.23 change .sendfile to .splice_read +# RHEL4 (-92 kernel) have both sendfile and .splice_read API +AC_DEFUN([LC_KERNEL_SENDFILE], +[AC_MSG_CHECKING([if kernel has .sendfile]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + struct file_operations file; + + file.sendfile = NULL; +], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_KERNEL_SENDFILE, 1, + [kernel has .sendfile]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.23 change .sendfile to .splice_read AC_DEFUN([LC_KERNEL_SPLICE_READ], [AC_MSG_CHECKING([if kernel has .splice_read]) LB_LINUX_TRY_COMPILE([ @@ -1213,11 +1240,219 @@ LB_LINUX_TRY_COMPILE([ # 2.6.23 extract nfs export related data into exportfs.h AC_DEFUN([LC_HAVE_EXPORTFS_H], -[ -tmpfl="$CFLAGS" -CFLAGS="$CFLAGS -I$LINUX_OBJ/include" -AC_CHECK_HEADERS([linux/exportfs.h]) -CFLAGS="$tmpfl" +[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [ + AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1, + [kernel has include/exportfs.h]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.23 have new page fault handling API +AC_DEFUN([LC_VM_OP_FAULT], +[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + struct vm_operations_struct op; + + op.fault = NULL; +], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_VM_OP_FAULT, 1, + [if kernel has .fault in vm_operation_struct]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +#2.6.23 has new shrinker API +AC_DEFUN([LC_REGISTER_SHRINKER], +[AC_MSG_CHECKING([if kernel has register_shrinker]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + register_shrinker(NULL); +], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_REGISTER_SHRINKER, 1, + [if kernel has register_shrinker]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.24 has bio_endio with 2 args +AC_DEFUN([LC_BIO_ENDIO_2ARG], +[AC_MSG_CHECKING([if kernel has bio_endio with 2 args]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + bio_endio(NULL, 0); +], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1, + [if kernel has bio_endio with 2 args]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.24 has new members in exports struct. +AC_DEFUN([LC_FH_TO_DENTRY], +[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct]) +LB_LINUX_TRY_COMPILE([ +#ifdef HAVE_LINUX_EXPORTFS_H + #include +#else + #include +#endif +],[ + struct export_operations exp; + + exp.fh_to_dentry = NULL; +], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_FH_TO_DENTRY, 1, + [kernel has .fh_to_dentry member in export_operations struct]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.24 need linux/mm_types.h included +AC_DEFUN([LC_HAVE_MMTYPES_H], +[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [ + AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1, + [kernel has include/mm_types.h]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.24 remove long aged procfs entry -> deleted member +AC_DEFUN([LC_PROCFS_DELETED], +[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + struct proc_dir_entry pde; + + pde.deleted = NULL; +], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_PROCFS_DELETED, 1, + [kernel has deleted member in procfs entry struct]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.25 change define to inline +AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY], +[AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + #ifndef mapping_cap_writeback_dirty + mapping_cap_writeback_dirty(NULL); + #endif +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_MAPPING_CAP_WRITEBACK_DIRTY, 1, + [kernel have mapping_cap_writeback_dirty]) +],[ + AC_MSG_RESULT([no]) +]) +]) + + + +# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct +AC_DEFUN([LC_FS_STRUCT_USE_PATH], +[AC_MSG_CHECKING([fs_struct use path structure]) +LB_LINUX_TRY_COMPILE([ + #include + #include + #include +],[ + struct path path; + struct fs_struct fs; + + fs.pwd = path; +], [ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1, + [fs_struct use path structure]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.26 remove path_release and use path_put instead +AC_DEFUN([LC_PATH_RELEASE], +[AC_MSG_CHECKING([if path_release exist]) +LB_LINUX_TRY_COMPILE([ + #include + #include +],[ + path_release(NULL); +],[ + AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +#2.6.27 +AC_DEFUN([LC_INODE_PERMISION_2ARGS], +[AC_MSG_CHECKING([inode_operations->permission have two args]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + struct inode *inode; + + inode->i_op->permission(NULL,0); +],[ + AC_DEFINE(HAVE_INODE_PERMISION_2ARGS, 1, + [inode_operations->permission have two args]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.27 have file_remove_suid instead of remove_suid +AC_DEFUN([LC_FILE_REMOVE_SUID], +[AC_MSG_CHECKING([kernel have file_remove_suid]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + file_remove_suid(NULL); +],[ + AC_DEFINE(HAVE_FILE_REMOVE_SUID, 1, + [kernel have file_remove_suid]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# 2.6.27 have new page locking API +AC_DEFUN([LC_TRYLOCKPAGE], +[AC_MSG_CHECKING([kernel use trylock_page for page lock]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + trylock_page(NULL); +],[ + AC_DEFINE(HAVE_TRYLOCK_PAGE, 1, + [kernel use trylock_page for page lock]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) ]) # @@ -1313,8 +1548,30 @@ AC_DEFUN([LC_PROG_LINUX], LC_FS_RENAME_DOES_D_MOVE # 2.6.23 LC_UNREGISTER_BLKDEV_RETURN_INT + LC_KERNEL_SENDFILE LC_KERNEL_SPLICE_READ LC_HAVE_EXPORTFS_H + LC_VM_OP_FAULT + LC_REGISTER_SHRINKER + + #2.6.25 + LC_MAPPING_CAP_WRITEBACK_DIRTY + + # 2.6.24 + LC_HAVE_MMTYPES_H + LC_BIO_ENDIO_2ARG + LC_FH_TO_DENTRY + LC_PROCFS_DELETED + + # 2.6.26 + LC_FS_STRUCT_USE_PATH + LC_RCU_LIST_SAFE + LC_PATH_RELEASE + + # 2.6.27 + LC_INODE_PERMISION_2ARGS + LC_FILE_REMOVE_SUID + LC_TRYLOCKPAGE ]) # @@ -1547,6 +1804,7 @@ LB_LINUX_TRY_COMPILE([ ],[ AC_MSG_RESULT([no]) ]) + ],[ AC_MSG_RESULT([no]) ]) diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index f8a5b11..0980faa 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -57,6 +57,28 @@ struct ll_iattr_struct { #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */ #ifndef HAVE_SET_FS_PWD + +#ifdef HAVE_FS_STRUCT_USE_PATH +static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, + struct dentry *dentry) +{ + struct path path; + struct path old_pwd; + + path.mnt = mnt; + path.dentry = dentry; + write_lock(&fs->lock); + old_pwd = fs->pwd; + path_get(&path); + fs->pwd = path; + write_unlock(&fs->lock); + + if (old_pwd.dentry) + path_put(&old_pwd); +} + +#else + static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, struct dentry *dentry) { @@ -75,6 +97,7 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, mntput(old_pwdmnt); } } +#endif #else #define ll_set_fs_pwd set_fs_pwd #endif /* HAVE_SET_FS_PWD */ @@ -151,7 +174,12 @@ void groups_free(struct group_info *ginfo); #endif /* XXX our code should be using the 2.6 calls, not the other way around */ +#ifndef HAVE_TRYLOCK_PAGE #define TryLockPage(page) TestSetPageLocked(page) +#else +#define TryLockPage(page) (!trylock_page(page)) +#endif + #define Page_Uptodate(page) PageUptodate(page) #define ll_redirty_page(page) set_page_dirty(page) @@ -364,8 +392,17 @@ int ll_unregister_blkdev(unsigned int dev, const char *name) #define LL_RENAME_DOES_D_MOVE FS_ODD_RENAME #endif +#ifdef HAVE_FILE_REMOVE_SUID +#define ll_remove_suid(file, mnt) file_remove_suid(file) +#else + #ifdef HAVE_SECURITY_PLUG + #define ll_remove_suid(file,mnt) remove_suid(file->f_dentry,mnt) + #else + #define ll_remove_suid(file,mnt) remove_suid(file->f_dentry) + #endif +#endif + #ifdef HAVE_SECURITY_PLUG -#define ll_remove_suid(inode,mnt) remove_suid(inode,mnt) #define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry,mnt) #define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mnt,mode) #define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,mnt,dir,new,mnt1) @@ -377,7 +414,6 @@ int ll_unregister_blkdev(unsigned int dev, const char *name) #define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \ vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) #else -#define ll_remove_suid(inode,mnt) remove_suid(inode) #define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry) #define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mode) #define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,dir,new) @@ -388,6 +424,57 @@ int ll_unregister_blkdev(unsigned int dev, const char *name) vfs_rename(old,old_dir,new,new_dir) #endif +#ifdef HAVE_REGISTER_SHRINKER +typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask); + +static inline +struct shrinker *set_shrinker(int seek, shrinker_t func) +{ + struct shrinker *s; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return (NULL); + + s->shrink = func; + s->seeks = seek; + + register_shrinker(s); + + return s; +} + +static inline +void remove_shrinker(struct shrinker *shrinker) +{ + if (shrinker == NULL) + return; + + unregister_shrinker(shrinker); + kfree(shrinker); +} +#endif + +#ifdef HAVE_BIO_ENDIO_2ARG +#define cfs_bio_io_error(a,b) bio_io_error((a)) +#define cfs_bio_endio(a,b,c) bio_endio((a),(c)) +#else +#define cfs_bio_io_error(a,b) bio_io_error((a),(b)) +#define cfs_bio_endio(a,b,c) bio_endio((a),(b),(c)) +#endif + +#ifdef HAVE_FS_STRUCT_USE_PATH +#define cfs_fs_pwd(fs) ((fs)->pwd.dentry) +#define cfs_fs_mnt(fs) ((fs)->pwd.mnt) +#else +#define cfs_fs_pwd(fs) ((fs)->pwd) +#define cfs_fs_mnt(fs) ((fs)->pwdmnt) +#endif + +#ifndef list_for_each_safe_rcu +#define list_for_each_safe_rcu(a,b,c) list_for_each_rcu(a, c) +#endif + #ifndef abs static inline int abs(int x) { diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 1092c61..875615a 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -49,7 +49,6 @@ # include # include #else -# include # include # include # include diff --git a/lustre/include/linux/lustre_patchless_compat.h b/lustre/include/linux/lustre_patchless_compat.h index 6a1b9f5..f5ba194 100644 --- a/lustre/include/linux/lustre_patchless_compat.h +++ b/lustre/include/linux/lustre_patchless_compat.h @@ -52,7 +52,7 @@ static inline void ll_remove_from_page_cache(struct page *page) BUG_ON(!PageLocked(page)); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)) +#ifdef HAVE_RW_TREE_LOCK write_lock_irq(&mapping->tree_lock); #else spin_lock_irq(&mapping->tree_lock); @@ -65,7 +65,7 @@ static inline void ll_remove_from_page_cache(struct page *page) #else __dec_zone_page_state(page, NR_FILE_PAGES); #endif -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)) +#ifdef HAVE_RW_TREE_LOCK write_unlock_irq(&mapping->tree_lock); #else spin_unlock_irq(&mapping->tree_lock); diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 32579d8..a80524d 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -521,6 +521,8 @@ extern struct rw_semaphore _lprocfs_lock; #define LPROCFS_EXIT() do { \ up_read(&_lprocfs_lock); \ } while(0) + +#ifdef HAVE_PROCFS_DELETED #define LPROCFS_ENTRY_AND_CHECK(dp) do { \ typecheck(struct proc_dir_entry *, dp); \ LPROCFS_ENTRY(); \ @@ -529,6 +531,14 @@ extern struct rw_semaphore _lprocfs_lock; return -ENODEV; \ } \ } while(0) +#define LPROCFS_CHECK_DELETED(dp) ((dp)->deleted) +#else + +#define LPROCFS_ENTRY_AND_CHECK(dp) \ + LPROCFS_ENTRY(); +#define LPROCFS_CHECK_DELETED(dp) (0) +#endif + #define LPROCFS_WRITE_ENTRY() do { \ down_write(&_lprocfs_lock); \ } while(0) @@ -536,6 +546,7 @@ extern struct rw_semaphore _lprocfs_lock; up_write(&_lprocfs_lock); \ } while(0) + /* You must use these macros when you want to refer to * the import in a client obd_device for a lprocfs entry */ #define LPROCFS_CLIMP_CHECK(obd) do { \ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 7b8e43a..9c9af2d 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1796,11 +1796,12 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, #endif } +#ifdef HAVE_KERNEL_SENDFILE /* * Send file content (through pagecache) somewhere with helper */ -static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, - read_actor_t actor, void *target) +static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos, + size_t count, read_actor_t actor, void *target) { struct inode *inode = in_file->f_dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); @@ -1809,10 +1810,10 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, struct ll_lock_tree_node *node; struct ost_lvb lvb; struct ll_ra_read bead; - int rc; - ssize_t retval; + ssize_t rc; __u64 kms; ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", inode->i_ino, inode->i_generation, inode, count, *ppos); @@ -1826,8 +1827,10 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, in_file->f_ra.ra_pages = 0; /* File with no objects, nothing to lock */ - if (!lsm) - RETURN(generic_file_sendfile(in_file, ppos, count, actor, target)); + if (!lsm) { + rc = generic_file_sendfile(in_file, ppos, count, actor, target); + RETURN(rc); + } node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR); if (IS_ERR(node)) @@ -1867,8 +1870,8 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, /* A glimpse is necessary to determine whether we return a * short read (B) or some zeroes at the end of the buffer (C) */ ll_inode_size_unlock(inode, 1); - retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); - if (retval) + rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); + if (rc) goto out; } else { /* region is within kms and, hence, within real file size (A) */ @@ -1884,13 +1887,115 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, ll_ra_read_in(in_file, &bead); /* BUG: 5972 */ file_accessed(in_file); - retval = generic_file_sendfile(in_file, ppos, count, actor, target); + rc = generic_file_sendfile(in_file, ppos, count, actor, target); ll_ra_read_ex(in_file, &bead); out: ll_tree_unlock(&tree); - RETURN(retval); + RETURN(rc); } +#endif + +/* change based on + * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=f0930fffa99e7fe0a0c4b6c7d9a244dc88288c27 + */ +#ifdef HAVE_KERNEL_SPLICE_READ +static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t count, + unsigned int flags) +{ + struct inode *inode = in_file->f_dentry->d_inode; + struct ll_inode_info *lli = ll_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + struct ll_lock_tree tree; + struct ll_lock_tree_node *node; + struct ost_lvb lvb; + struct ll_ra_read bead; + ssize_t rc; + __u64 kms; + ENTRY; + + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", + inode->i_ino, inode->i_generation, inode, count, *ppos); + + /* "If nbyte is 0, read() will return 0 and have no other results." + * -- Single Unix Spec */ + if (count == 0) + RETURN(0); + + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count); + /* turn off the kernel's read-ahead */ + in_file->f_ra.ra_pages = 0; + + /* File with no objects, nothing to lock */ + if (!lsm) { + rc = generic_file_splice_read(in_file, ppos, pipe, count, flags); + RETURN(rc); + } + + node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR); + if (IS_ERR(node)) + RETURN(PTR_ERR(node)); + + tree.lt_fd = LUSTRE_FPRIVATE(in_file); + rc = ll_tree_lock(&tree, node, NULL, count, + in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0); + if (rc != 0) + RETURN(rc); + + ll_clear_file_contended(inode); + ll_inode_size_lock(inode, 1); + /* + * Consistency guarantees: following possibilities exist for the + * relation between region being read and real file size at this + * moment: + * + * (A): the region is completely inside of the file; + * + * (B-x): x bytes of region are inside of the file, the rest is + * outside; + * + * (C): the region is completely outside of the file. + * + * This classification is stable under DLM lock acquired by + * ll_tree_lock() above, because to change class, other client has to + * take DLM lock conflicting with our lock. Also, any updates to + * ->i_size by other threads on this client are serialized by + * ll_inode_size_lock(). This guarantees that short reads are handled + * correctly in the face of concurrent writes and truncates. + */ + inode_init_lvb(inode, &lvb); + obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1); + kms = lvb.lvb_size; + if (*ppos + count - 1 > kms) { + /* A glimpse is necessary to determine whether we return a + * short read (B) or some zeroes at the end of the buffer (C) */ + ll_inode_size_unlock(inode, 1); + rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); + if (rc) + goto out; + } else { + /* region is within kms and, hence, within real file size (A) */ + i_size_write(inode, kms); + ll_inode_size_unlock(inode, 1); + } + + CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n", + inode->i_ino, count, *ppos, i_size_read(inode)); + + bead.lrr_start = *ppos >> CFS_PAGE_SHIFT; + bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; + ll_ra_read_in(in_file, &bead); + /* BUG: 5972 */ + file_accessed(in_file); + rc = generic_file_splice_read(in_file, ppos, pipe, count, flags); + ll_ra_read_ex(in_file, &bead); + + out: + ll_tree_unlock(&tree); + RETURN(rc); +} +#endif static int ll_lov_recreate_obj(struct inode *inode, struct file *file, unsigned long arg) @@ -3079,7 +3184,11 @@ int lustre_check_acl(struct inode *inode, int mask) } #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)) +#ifndef HAVE_INODE_PERMISION_2ARGS int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) +#else +int ll_inode_permission(struct inode *inode, int mask) +#endif { CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n", inode->i_ino, inode->i_generation, inode, mask); @@ -3088,7 +3197,7 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) return generic_permission(inode, mask, lustre_check_acl); } #else -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) +#ifndef HAVE_INODE_PERMISION_2ARGS int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) #else int ll_inode_permission(struct inode *inode, int mask) @@ -3158,7 +3267,12 @@ struct file_operations ll_file_operations = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, +#ifdef HAVE_KERNEL_SPLICE_READ + .splice_read = ll_file_splice_read, +#endif +#ifdef HAVE_KERNEL_SENDFILE .sendfile = ll_file_sendfile, +#endif .fsync = ll_fsync, }; @@ -3180,7 +3294,12 @@ struct file_operations ll_file_operations_flock = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, +#ifdef HAVE_KERNEL_SPLICE_READ + .splice_read = ll_file_splice_read, +#endif +#ifdef HAVE_KERNEL_SENDFILE .sendfile = ll_file_sendfile, +#endif .fsync = ll_fsync, #ifdef HAVE_F_OP_FLOCK .flock = ll_file_flock, @@ -3207,7 +3326,12 @@ struct file_operations ll_file_operations_noflock = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, +#ifdef HAVE_KERNEL_SPLICE_READ + .splice_read = ll_file_splice_read, +#endif +#ifdef HAVE_KERNEL_SENDFILE .sendfile = ll_file_sendfile, +#endif .fsync = ll_fsync, #ifdef HAVE_F_OP_FLOCK .flock = ll_file_noflock, diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 43dac19..e6c19dd 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -647,7 +647,7 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat); int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); struct ll_file_data *ll_file_data_get(void); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) +#ifndef HAVE_INODE_PERMISION_2ARGS int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd); #else int ll_inode_permission(struct inode *inode, int mask); @@ -727,9 +727,6 @@ int ll_process_config(struct lustre_cfg *lcfg); /* llite/llite_nfs.c */ extern struct export_operations lustre_export_operations; __u32 get_uuid2int(const char *name, int len); -struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len, - int fhtype, int parent); -int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent); /* llite/special.c */ extern struct inode_operations ll_special_inode_operations; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 2cc2467..d532e49 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1355,7 +1355,7 @@ static int ll_setattr_do_truncate(struct inode *inode, loff_t new_size) rc = vmtruncate(inode, new_size); clear_bit(LLI_F_SRVLOCK, &lli->lli_flags); if (rc != 0) { - LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0); + LASSERT(SEM_COUNT(&lli->lli_size_sem) <= 0); ll_inode_size_unlock(inode, 0); } } diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 886cf9d..a56b6de 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -81,8 +81,7 @@ struct ll_lock_tree_node { int lt_get_mmap_locks(struct ll_lock_tree *tree, unsigned long addr, size_t count); -struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, - int *type); +static struct vm_operations_struct ll_file_vm_ops; struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start, __u64 end, ldlm_mode_t mode) @@ -285,9 +284,19 @@ static ldlm_mode_t mode_from_vma(struct vm_area_struct *vma) return LCK_PR; } +static void policy_from_vma_pgoff(ldlm_policy_data_t *policy, + struct vm_area_struct *vma, + __u64 pgoff, size_t count) +{ + policy->l_extent.start = pgoff << CFS_PAGE_SHIFT; + policy->l_extent.end = (policy->l_extent.start + count - 1) | + ~CFS_PAGE_MASK; +} + static void policy_from_vma(ldlm_policy_data_t *policy, struct vm_area_struct *vma, unsigned long addr, size_t count) + { policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) + ((__u64)vma->vm_pgoff << CFS_PAGE_SHIFT); @@ -308,7 +317,7 @@ static struct vm_area_struct * our_vma(unsigned long addr, size_t count) spin_lock(&mm->page_table_lock); for(vma = find_vma(mm, addr); vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) { - if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage && + if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops && vma->vm_flags & VM_SHARED) { ret = vma; break; @@ -360,44 +369,30 @@ int lt_get_mmap_locks(struct ll_lock_tree *tree, } RETURN(0); } -/** - * Page fault handler. - * - * \param vma - is virtiual area struct related to page fault - * \param address - address when hit fault - * \param type - of fault - * - * \return allocated and filled page for address - * \retval NOPAGE_SIGBUS if page not exist on this address - * \retval NOPAGE_OOM not have memory for allocate new page - */ -struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, - int *type) + +static int ll_get_extent_lock(struct vm_area_struct *vma, unsigned long pgoff, + int *save_flags, struct lustre_handle *lockh) { struct file *filp = vma->vm_file; struct ll_file_data *fd = LUSTRE_FPRIVATE(filp); struct inode *inode = filp->f_dentry->d_inode; - struct lustre_handle lockh = { 0 }; ldlm_policy_data_t policy; ldlm_mode_t mode; - struct page *page = NULL; struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm; struct ost_lvb lvb; __u64 kms, old_mtime; - unsigned long pgoff, size, rand_read, seq_read; - int rc = 0; + unsigned long size; ENTRY; if (lli->lli_smd == NULL) { CERROR("No lsm on fault?\n"); - RETURN(NOPAGE_SIGBUS); + RETURN(0); } ll_clear_file_contended(inode); /* start and end the lock on the first and last bytes in the page */ - policy_from_vma(&policy, vma, address, CFS_PAGE_SIZE); + policy_from_vma_pgoff(&policy, vma, pgoff, CFS_PAGE_SIZE); CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n", vma, inode->i_ino, policy.l_extent.start, policy.l_extent.end); @@ -405,26 +400,28 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, mode = mode_from_vma(vma); old_mtime = LTIME_S(inode->i_mtime); - lsm = lli->lli_smd; - rc = ll_extent_lock(fd, inode, lsm, mode, &policy, - &lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU); - if (rc != 0) - RETURN(NOPAGE_SIGBUS); + if(ll_extent_lock(fd, inode, lli->lli_smd, mode, &policy, + lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU) != 0) + RETURN(0); if (vma->vm_flags & VM_EXEC && LTIME_S(inode->i_mtime) != old_mtime) CWARN("binary changed. inode %lu\n", inode->i_ino); - lov_stripe_lock(lsm); + lov_stripe_lock(lli->lli_smd); inode_init_lvb(inode, &lvb); - obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1); + if(obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 1)) { + lov_stripe_unlock(lli->lli_smd); + RETURN(0); + } kms = lvb.lvb_size; - pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff; size = (kms + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; + CDEBUG(D_INFO, "Kms %lu - %lu\n", size, pgoff); if (pgoff >= size) { - lov_stripe_unlock(lsm); + lov_stripe_unlock(lli->lli_smd); ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); + lov_stripe_lock(lli->lli_smd); } else { /* XXX change inode size without ll_inode_size_lock() held! * there is a race condition with truncate path. (see @@ -446,29 +443,69 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n", inode->i_ino, i_size_read(inode)); } - lov_stripe_unlock(lsm); } /* If mapping is writeable, adjust kms to cover this page, * but do not extend kms beyond actual file size. * policy.l_extent.end is set to the end of the page by policy_from_vma * bug 10919 */ - lov_stripe_lock(lsm); if (mode == LCK_PW) - obd_adjust_kms(ll_i2obdexp(inode), lsm, + obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd, min_t(loff_t, policy.l_extent.end + 1, i_size_read(inode)), 0); - lov_stripe_unlock(lsm); + lov_stripe_unlock(lli->lli_smd); /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that * the kernel will not read other pages not covered by ldlm in * filemap_nopage. we do our readahead in ll_readpage. */ - rand_read = vma->vm_flags & VM_RAND_READ; - seq_read = vma->vm_flags & VM_SEQ_READ; + *save_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ); vma->vm_flags &= ~ VM_SEQ_READ; vma->vm_flags |= VM_RAND_READ; + return 1; +} + +static void ll_put_extent_lock(struct vm_area_struct *vma, int save_flags, + struct lustre_handle *lockh) +{ + struct file *filp = vma->vm_file; + struct ll_file_data *fd = LUSTRE_FPRIVATE(filp); + struct inode *inode = filp->f_dentry->d_inode; + ldlm_mode_t mode; + + mode = mode_from_vma(vma); + vma->vm_flags &= ~(VM_RAND_READ | VM_SEQ_READ); + vma->vm_flags |= save_flags; + + ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, lockh); +} + +#ifndef HAVE_VM_OP_FAULT +/** + * Page fault handler. + * + * \param vma - is virtiual area struct related to page fault + * \param address - address when hit fault + * \param type - of fault + * + * \return allocated and filled page for address + * \retval NOPAGE_SIGBUS if page not exist on this address + * \retval NOPAGE_OOM not have memory for allocate new page + */ +struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, + int *type) +{ + struct lustre_handle lockh = { 0 }; + int save_fags = 0; + unsigned long pgoff; + struct page *page; + ENTRY; + + pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff; + if(!ll_get_extent_lock(vma, pgoff, &save_fags, &lockh)) + RETURN(NOPAGE_SIGBUS); + page = filemap_nopage(vma, address, type); if (page != NOPAGE_SIGBUS && page != NOPAGE_OOM) LL_CDEBUG_PAGE(D_PAGE, page, "got addr %lu type %lx\n", address, @@ -477,13 +514,48 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n", address, (long)type); - vma->vm_flags &= ~VM_RAND_READ; - vma->vm_flags |= (rand_read | seq_read); + ll_put_extent_lock(vma, save_fags, &lockh); - ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh); RETURN(page); } +#else +/* New fault() API*/ +/** + * Page fault handler. + * + * \param vma - is virtiual area struct related to page fault + * \param address - address when hit fault + * \param type - of fault + * + * \return allocated and filled page for address + * \retval NOPAGE_SIGBUS if page not exist on this address + * \retval NOPAGE_OOM not have memory for allocate new page + */ +int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct lustre_handle lockh = { 0 }; + int save_fags = 0; + int rc; + ENTRY; + + if(!ll_get_extent_lock(vma, vmf->pgoff, &save_fags, &lockh)) + RETURN(VM_FAULT_SIGBUS); + + rc = filemap_fault(vma, vmf); + if (vmf->page) + LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n", + vmf->virtual_address); + else + CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", + vmf->virtual_address); + + ll_put_extent_lock(vma, save_fags, &lockh); + + RETURN(rc); +} +#endif + /* To avoid cancel the locks covering mmapped region for lock cache pressure, * we track the mapped vma count by lli_mmap_cnt. * ll_vm_open(): when first vma is linked, split locks from lru. @@ -548,6 +620,7 @@ static void ll_vm_close(struct vm_area_struct *vma) } } +#ifndef HAVE_VM_OP_FAULT #ifndef HAVE_FILEMAP_POPULATE static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); #endif @@ -562,6 +635,7 @@ static int ll_populate(struct vm_area_struct *area, unsigned long address, rc = filemap_populate(area, address, len, prot, pgoff, 1); RETURN(rc); } +#endif /* return the user space pointer that maps to a file offset via a vma */ static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte) @@ -588,10 +662,14 @@ int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last) } static struct vm_operations_struct ll_file_vm_ops = { - .nopage = ll_nopage, .open = ll_vm_open, .close = ll_vm_close, +#ifdef HAVE_VM_OP_FAULT + .fault = ll_fault, +#else + .nopage = ll_nopage, .populate = ll_populate, +#endif }; int ll_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -602,7 +680,7 @@ int ll_file_mmap(struct file * file, struct vm_area_struct * vma) ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1); rc = generic_file_mmap(file, vma); if (rc == 0) { -#ifndef HAVE_FILEMAP_POPULATE +#if !defined(HAVE_FILEMAP_POPULATE) && !defined(HAVE_VM_OP_FAULT) if (!filemap_populate) filemap_populate = vma->vm_ops->populate; #endif diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index c941d73..76486b5 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -68,36 +68,30 @@ static int ll_nfs_test_inode(struct inode *inode, void *opaque) } static struct inode * search_inode_for_lustre(struct super_block *sb, - unsigned long ino, - unsigned long generation, - int mode) + struct ll_fid *iid) { struct ptlrpc_request *req = NULL; struct ll_sb_info *sbi = ll_s2sbi(sb); - struct ll_fid fid; unsigned long valid = 0; int eadatalen = 0, rc; struct inode *inode = NULL; - struct ll_fid iid = { .id = ino, .generation = generation }; ENTRY; - inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid); + inode = ILOOKUP(sb, iid->id, ll_nfs_test_inode, iid); if (inode) RETURN(inode); - if (S_ISREG(mode)) { - rc = ll_get_max_mdsize(sbi, &eadatalen); - if (rc) - RETURN(ERR_PTR(rc)); - valid |= OBD_MD_FLEASIZE; - } - fid.id = (__u64)ino; - fid.generation = generation; - fid.f_type = mode; - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req); + rc = ll_get_max_mdsize(sbi, &eadatalen); + if (rc) + RETURN(ERR_PTR(rc)); + + valid |= OBD_MD_FLEASIZE; + + /* mds_fid2dentry is ignore f_type */ + rc = mdc_getattr(sbi->ll_mdc_exp, iid, valid, eadatalen, &req); if (rc) { - CERROR("failure %d inode %lu\n", rc, ino); + CERROR("failure %d inode "LPU64"\n", rc, iid->id); RETURN(ERR_PTR(rc)); } @@ -111,27 +105,27 @@ static struct inode * search_inode_for_lustre(struct super_block *sb, RETURN(inode); } -static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino, - __u32 generation, umode_t mode) +static struct dentry *ll_iget_for_nfs(struct super_block *sb, + struct ll_fid *iid) { struct inode *inode; struct dentry *result; ENTRY; - if (ino == 0) + if (iid->id == 0) RETURN(ERR_PTR(-ESTALE)); - inode = search_inode_for_lustre(sb, ino, generation, mode); - if (IS_ERR(inode)) { + inode = search_inode_for_lustre(sb, iid); + if (IS_ERR(inode)) RETURN(ERR_PTR(PTR_ERR(inode))); - } + if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)){ + (iid->generation && inode->i_generation != iid->generation)) { /* we didn't find the right inode.. */ CERROR("Inode %lu, Bad count: %lu %d or version %u %u\n", inode->i_ino, (unsigned long)inode->i_nlink, atomic_read(&inode->i_count), inode->i_generation, - generation); + iid->generation); iput(inode); RETURN(ERR_PTR(-ESTALE)); } @@ -146,57 +140,102 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino, RETURN(result); } -struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len, - int fhtype, int parent) +#define LUSTRE_NFS_FID 0x94 + +struct lustre_nfs_fid { + struct ll_fid child; + struct ll_fid parent; + umode_t mode; +}; + +/* The return value is file handle type: + * 1 -- contains child file handle; + * 2 -- contains child file handle and parent file handle; + * 255 -- error. + */ +static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen, + int connectable) { - switch (fhtype) { - case 2: - if (len < 5) - break; - if (parent) - return ll_iget_for_nfs(sb, data[3], 0, data[4]); - case 1: - if (len < 3) - break; - if (parent) - break; - return ll_iget_for_nfs(sb, data[0], data[1], data[2]); - default: break; - } - return ERR_PTR(-EINVAL); + struct inode *inode = de->d_inode; + struct inode *parent = de->d_parent->d_inode; + struct lustre_nfs_fid *nfs_fid = (void *)fh; + ENTRY; + + CDEBUG(D_INFO, "encoding for (%lu) maxlen=%d minlen=%lu\n", + inode->i_ino, *plen, + sizeof(struct lustre_nfs_fid)); + + if (*plen < sizeof(struct lustre_nfs_fid)) + RETURN(255); + + ll_inode2fid(&nfs_fid->child, inode); + ll_inode2fid(&nfs_fid->parent, parent); + + nfs_fid->mode = (S_IFMT & inode->i_mode); + *plen = sizeof(struct lustre_nfs_fid); + + RETURN(LUSTRE_NFS_FID); } -int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp, - int need_parent) +#ifdef HAVE_FH_TO_DENTRY +static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) { - if (*lenp < 3) - return 255; - *datap++ = dentry->d_inode->i_ino; - *datap++ = dentry->d_inode->i_generation; - *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode); - - if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) { - *lenp = 3; - return 1; - } - if (dentry->d_parent) { - *datap++ = dentry->d_parent->d_inode->i_ino; - *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode); + struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid; - *lenp = 5; - return 2; - } - *lenp = 3; - return 1; + if (fh_type != LUSTRE_NFS_FID) + RETURN(ERR_PTR(-EINVAL)); + + RETURN(ll_iget_for_nfs(sb, &nfs_fid->child)); } +static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid; + + if (fh_type != LUSTRE_NFS_FID) + RETURN(ERR_PTR(-EINVAL)); + RETURN(ll_iget_for_nfs(sb, &nfs_fid->parent)); +} + +#else +/* + * This length is counted as amount of __u32, + * It is composed of a fid and a mode + */ +static struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, + int fh_type, + int (*acceptable)(void *, struct dentry *), + void *context) +{ + struct lustre_nfs_fid *nfs_fid = (void *)fh; + struct dentry *entry; + ENTRY; + + CDEBUG(D_INFO, "decoding for "LPU64" fh_len=%d fh_type=%x\n", + nfs_fid->child.id, fh_len, fh_type); + + if (fh_type != LUSTRE_NFS_FID) + RETURN(ERR_PTR(-ESTALE)); + + entry = sb->s_export_op->find_exported_dentry(sb, &nfs_fid->child, + &nfs_fid->parent, + acceptable, context); + RETURN(entry); +} + -#if THREAD_SIZE >= 8192 struct dentry *ll_get_dentry(struct super_block *sb, void *data) { - __u32 *inump = (__u32*)data; - return ll_iget_for_nfs(sb, inump[0], inump[1], S_IFREG); + struct lustre_nfs_fid *fid = data; + ENTRY; + + RETURN(ll_iget_for_nfs(sb, &fid->child)); + } +#endif + struct dentry *ll_get_parent(struct dentry *dchild) { struct ptlrpc_request *req = NULL; @@ -208,11 +247,11 @@ struct dentry *ll_get_parent(struct dentry *dchild) char dotdot[] = ".."; int rc = 0; ENTRY; - + LASSERT(dir && S_ISDIR(dir->i_mode)); - - sbi = ll_s2sbi(dir->i_sb); - + + sbi = ll_s2sbi(dir->i_sb); + fid.id = (__u64)dir->i_ino; fid.generation = dir->i_generation; fid.f_type = S_IFDIR; @@ -223,11 +262,12 @@ struct dentry *ll_get_parent(struct dentry *dchild) CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino); return ERR_PTR(rc); } - body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body)); - + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body)); + LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID)); - - result = ll_iget_for_nfs(dir->i_sb, body->ino, body->generation, S_IFDIR); + fid.id = body->ino; + fid.generation = body->generation; + result = ll_iget_for_nfs(dir->i_sb, &fid); if (IS_ERR(result)) rc = PTR_ERR(result); @@ -236,10 +276,18 @@ struct dentry *ll_get_parent(struct dentry *dchild) if (rc) return ERR_PTR(rc); RETURN(result); -} +} + +#if THREAD_SIZE >= 8192 struct export_operations lustre_export_operations = { - .get_parent = ll_get_parent, - .get_dentry = ll_get_dentry, + .encode_fh = ll_encode_fh, +#ifdef HAVE_FH_TO_DENTRY + .fh_to_dentry = ll_fh_to_dentry, + .fh_to_parent = ll_fh_to_parent, +#else + .get_dentry = ll_get_dentry, + .decode_fh = ll_decode_fh, +#endif }; #endif diff --git a/lustre/llite/lloop.c b/lustre/llite/lloop.c index 8a683cd..213eaa6 100644 --- a/lustre/llite/lloop.c +++ b/lustre/llite/lloop.c @@ -152,7 +152,7 @@ struct lloop_device { struct semaphore lo_bh_mutex; atomic_t lo_pending; - request_queue_t *lo_queue; + struct request_queue *lo_queue; /* data to handle bio for lustre. */ struct lo_request_data { @@ -283,7 +283,7 @@ static struct bio *loop_get_bio(struct lloop_device *lo) return bio; } -static int loop_make_request(request_queue_t *q, struct bio *old_bio) +static int loop_make_request(struct request_queue *q, struct bio *old_bio) { struct lloop_device *lo = q->queuedata; int rw = bio_rw(old_bio); @@ -312,7 +312,7 @@ err: if (atomic_dec_and_test(&lo->lo_pending)) up(&lo->lo_bh_mutex); out: - bio_io_error(old_bio, old_bio->bi_size); + cfs_bio_io_error(old_bio, old_bio->bi_size); return 0; inactive: spin_unlock_irq(&lo->lo_lock); @@ -322,7 +322,7 @@ inactive: /* * kick off io on the underlying address space */ -static void loop_unplug(request_queue_t *q) +static void loop_unplug(struct request_queue *q) { struct lloop_device *lo = q->queuedata; @@ -334,7 +334,7 @@ static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio) { int ret; ret = do_bio_filebacked(lo, bio); - bio_endio(bio, bio->bi_size, ret); + cfs_bio_endio(bio, bio->bi_size, ret); } /* @@ -736,7 +736,7 @@ static int __init lloop_init(void) out_mem4: while (i--) - blk_put_queue(loop_dev[i].lo_queue); + blk_cleanup_queue(loop_dev[i].lo_queue); i = max_loop; out_mem3: while (i--) @@ -758,7 +758,7 @@ static void lloop_exit(void) ll_iocontrol_unregister(ll_iocontrol_magic); for (i = 0; i < max_loop; i++) { del_gendisk(disks[i]); - blk_put_queue(loop_dev[i].lo_queue); + blk_cleanup_queue(loop_dev[i].lo_queue); put_disk(disks[i]); } if (ll_unregister_blkdev(lloop_major, "lloop")) diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 9079515..0b647a2 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -61,6 +61,8 @@ #define DEBUG_SUBSYSTEM S_LLITE +#include + #include #include "llite_internal.h" #include @@ -186,7 +188,7 @@ void ll_truncate(struct inode *inode) GOTO(out_unlock, 0); } - LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0); + LASSERT(SEM_COUNT(&lli->lli_size_sem) <= 0); if (!srvlock) { struct ost_lvb lvb; @@ -2123,7 +2125,7 @@ ssize_t ll_file_lockless_io(struct file *file, const struct iovec *iov, rc = generic_write_checks(file, ppos, &count, 0); if (rc) GOTO(out, rc); - rc = ll_remove_suid(file->f_dentry, file->f_vfsmnt); + rc = ll_remove_suid(file, file->f_vfsmnt); if (rc) GOTO(out, rc); } diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index ec6fcc4..fb0b25f 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -177,8 +177,12 @@ static LL_FOLLOW_LINK_RETURN_TYPE ll_follow_link(struct dentry *dentry, struct n up(&lli->lli_size_sem); } if (rc) { +#ifdef HAVE_PATH_RELEASE path_release(nd); /* Kernel assumes that ->follow_link() releases nameidata on error */ +#else + path_put(&nd->path); +#endif GOTO(out, rc); } diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 631d8bc..5a77267 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -148,10 +148,10 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, */ save->fs = get_fs(); - LASSERT(atomic_read(¤t->fs->pwd->d_count)); + LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count)); LASSERT(atomic_read(&new_ctx->pwd->d_count)); - save->pwd = dget(current->fs->pwd); - save->pwdmnt = mntget(current->fs->pwdmnt); + save->pwd = dget(cfs_fs_pwd(current->fs)); + save->pwdmnt = mntget(cfs_fs_mnt(current->fs)); save->luc.luc_umask = current->fs->umask; LASSERT(save->pwd); @@ -205,10 +205,10 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, atomic_read(¤t->fs->pwdmnt->mnt_count)); */ - LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n", - current->fs->pwd, new_ctx->pwd); - LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n", - current->fs->pwdmnt, new_ctx->pwdmnt); + LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n", + cfs_fs_pwd(current->fs), new_ctx->pwd); + LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n", + cfs_fs_mnt(current->fs), new_ctx->pwdmnt); set_fs(saved->fs); ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd); diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 5a7714b..258f67c 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -415,7 +415,7 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, obd->obd_lvfs_ctxt.fs = get_ds(); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd, + dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs), strlen(MOUNT_CONFIGS_DIR)); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (IS_ERR(dentry)) { diff --git a/lustre/obdclass/linux/linux-module.c b/lustre/obdclass/linux/linux-module.c index 40603d6..431f074 100644 --- a/lustre/obdclass/linux/linux-module.c +++ b/lustre/obdclass/linux/linux-module.c @@ -419,13 +419,14 @@ int class_procfs_init(void) ENTRY; obd_sysctl_init(); - proc_lustre_root = proc_mkdir("lustre", proc_root_fs); + proc_lustre_root = lprocfs_register("fs/lustre", NULL, + lprocfs_base, NULL); if (!proc_lustre_root) { printk(KERN_ERR "LustreError: error registering /proc/fs/lustre\n"); RETURN(-ENOMEM); } - proc_version = lprocfs_add_vars(proc_lustre_root, lprocfs_base, NULL); + entry = create_proc_entry("devices", 0444, proc_lustre_root); if (entry == NULL) { CERROR("error registering /proc/fs/lustre/devices\n"); diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index b9eac5e..d49beb6 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -56,7 +56,9 @@ cfs_sysctl_table_header_t *obd_table_header = NULL; -#define OBD_SYSCTL 300 +#ifndef HAVE_SYSCTL_UNNUMBERED + +#define CTL_LUSTRE 300 enum { OBD_FAIL_LOC = 1, /* control test failures instrumentation */ @@ -74,6 +76,23 @@ enum { OBD_ALLOC_FAIL_RATE, /* memory allocation random failure rate */ OBD_MAX_DIRTY_PAGES, /* maximum dirty pages */ }; +#else +#define CTL_LUSTRE CTL_UNNUMBERED +#define OBD_FAIL_LOC CTL_UNNUMBERED +#define OBD_FAIL_VAL CTL_UNNUMBERED +#define OBD_TIMEOUT CTL_UNNUMBERED +#define OBD_DUMP_ON_TIMEOUT CTL_UNNUMBERED +#define OBD_MEMUSED CTL_UNNUMBERED +#define OBD_PAGESUSED CTL_UNNUMBERED +#define OBD_MAXMEMUSED CTL_UNNUMBERED +#define OBD_MAXPAGESUSED CTL_UNNUMBERED +#define OBD_SYNCFILTER CTL_UNNUMBERED +#define OBD_LDLM_TIMEOUT CTL_UNNUMBERED +#define OBD_DUMP_ON_EVICTION CTL_UNNUMBERED +#define OBD_DEBUG_PEER_ON_TIMEOUT CTL_UNNUMBERED +#define OBD_ALLOC_FAIL_RATE CTL_UNNUMBERED +#define OBD_MAX_DIRTY_PAGES CTL_UNNUMBERED +#endif int LL_PROC_PROTO(proc_fail_loc) { @@ -120,7 +139,8 @@ int LL_PROC_PROTO(proc_max_dirty_pages_in_mb) obd_max_dirty_pages = 4 << (20 - CFS_PAGE_SHIFT); } } else { - char buf[21]; + char buf[22]; + struct ctl_table dummy; int len; len = lprocfs_read_frac_helper(buf, sizeof(buf), @@ -129,7 +149,13 @@ int LL_PROC_PROTO(proc_max_dirty_pages_in_mb) if (len > *lenp) len = *lenp; buf[len] = '\0'; - if (copy_to_user(buffer, buf, len)) + + dummy = *table; + dummy.data = buf; + dummy.maxlen = sizeof(buf); + + rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos); + if (rc) return -EFAULT; *lenp = len; } @@ -152,7 +178,8 @@ int LL_PROC_PROTO(proc_alloc_fail_rate) (unsigned int*)table->data, OBD_ALLOC_FAIL_MULT); } else { - char buf[21]; + char buf[22]; + struct ctl_table dummy; int len; len = lprocfs_read_frac_helper(buf, sizeof(buf), @@ -161,7 +188,12 @@ int LL_PROC_PROTO(proc_alloc_fail_rate) if (len > *lenp) len = *lenp; buf[len] = '\0'; - if (copy_to_user(buffer, buf, len)) + dummy = *table; + dummy.data = buf; + dummy.maxlen = sizeof(buf); + + rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos); + if(rc) return -EFAULT; *lenp = len; } @@ -172,6 +204,7 @@ int LL_PROC_PROTO(proc_alloc_fail_rate) int LL_PROC_PROTO(proc_memory_alloc) { + struct ctl_table dummy; char buf[22]; int len; DECLARE_LL_PROC_PPOS_DECL; @@ -187,15 +220,17 @@ int LL_PROC_PROTO(proc_memory_alloc) if (len > *lenp) len = *lenp; buf[len] = '\0'; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - *lenp = len; - *ppos += *lenp; - return 0; + + dummy = *table; + dummy.data = buf; + dummy.maxlen = sizeof(buf); + + return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos); } int LL_PROC_PROTO(proc_pages_alloc) { + struct ctl_table dummy; char buf[22]; int len; DECLARE_LL_PROC_PPOS_DECL; @@ -211,15 +246,17 @@ int LL_PROC_PROTO(proc_pages_alloc) if (len > *lenp) len = *lenp; buf[len] = '\0'; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - *lenp = len; - *ppos += *lenp; - return 0; + + dummy = *table; + dummy.data = buf; + dummy.maxlen = sizeof(buf); + + return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos); } int LL_PROC_PROTO(proc_mem_max) { + struct ctl_table dummy; char buf[22]; int len; DECLARE_LL_PROC_PPOS_DECL; @@ -235,17 +272,19 @@ int LL_PROC_PROTO(proc_mem_max) if (len > *lenp) len = *lenp; buf[len] = '\0'; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - *lenp = len; - *ppos += *lenp; - return 0; + + dummy = *table; + dummy.data = buf; + dummy.maxlen = sizeof(buf); + + return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos); } int LL_PROC_PROTO(proc_pages_max) { char buf[22]; int len; + struct ctl_table dummy; DECLARE_LL_PROC_PPOS_DECL; if (!*lenp || (*ppos && !write)) { @@ -254,16 +293,17 @@ int LL_PROC_PROTO(proc_pages_max) } if (write) return -EINVAL; + dummy = *table; + dummy.data = buf; + dummy.maxlen = sizeof(buf); + len = snprintf(buf, sizeof(buf), LPU64, + obd_pages_max()); - len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max()); - if (len > *lenp) - len = *lenp; - buf[len] = '\0'; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - *lenp = len; - *ppos += *lenp; - return 0; + if (len > *lenp) + len = *lenp; + buf[len] = '\0'; + + return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos); } static cfs_sysctl_table_t obd_table[] = { @@ -281,7 +321,8 @@ static cfs_sysctl_table_t obd_table[] = { .data = &obd_fail_val, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, }, { .ctl_name = OBD_TIMEOUT, @@ -297,7 +338,7 @@ static cfs_sysctl_table_t obd_table[] = { .data = &obd_debug_peer_on_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, }, { .ctl_name = OBD_DUMP_ON_TIMEOUT, @@ -305,7 +346,7 @@ static cfs_sysctl_table_t obd_table[] = { .data = &obd_dump_on_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, }, { .ctl_name = OBD_DUMP_ON_EVICTION, @@ -313,7 +354,7 @@ static cfs_sysctl_table_t obd_table[] = { .data = &obd_dump_on_eviction, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, }, { .ctl_name = OBD_MEMUSED, @@ -321,7 +362,7 @@ static cfs_sysctl_table_t obd_table[] = { .data = NULL, .maxlen = 0, .mode = 0444, - .proc_handler = &proc_memory_alloc + .proc_handler = &proc_memory_alloc, }, { .ctl_name = OBD_PAGESUSED, @@ -329,7 +370,7 @@ static cfs_sysctl_table_t obd_table[] = { .data = NULL, .maxlen = 0, .mode = 0444, - .proc_handler = &proc_pages_alloc + .proc_handler = &proc_pages_alloc, }, { .ctl_name = OBD_MAXMEMUSED, @@ -337,7 +378,7 @@ static cfs_sysctl_table_t obd_table[] = { .data = NULL, .maxlen = 0, .mode = 0444, - .proc_handler = &proc_mem_max + .proc_handler = &proc_mem_max, }, { .ctl_name = OBD_MAXPAGESUSED, @@ -345,7 +386,7 @@ static cfs_sysctl_table_t obd_table[] = { .data = NULL, .maxlen = 0, .mode = 0444, - .proc_handler = &proc_pages_max + .proc_handler = &proc_pages_max, }, { .ctl_name = OBD_LDLM_TIMEOUT, @@ -378,7 +419,7 @@ static cfs_sysctl_table_t obd_table[] = { static cfs_sysctl_table_t parent_table[] = { { - .ctl_name = OBD_SYSCTL, + .ctl_name = CTL_LUSTRE, .procname = "lustre", .data = NULL, .maxlen = 0, diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index afc882f..ef6133b 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -151,7 +151,7 @@ static ssize_t lprocfs_fops_read(struct file *f, char __user *buf, size_t size, LPROCFS_ENTRY(); OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10); - if (!dp->deleted && dp->read_proc) + if (!LPROCFS_CHECK_DELETED(dp) && dp->read_proc) rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE, &eof, dp->data); LPROCFS_EXIT(); @@ -191,7 +191,7 @@ static ssize_t lprocfs_fops_write(struct file *f, const char __user *buf, int rc = -EIO; LPROCFS_ENTRY(); - if (!dp->deleted && dp->write_proc) + if (!LPROCFS_CHECK_DELETED(dp) && dp->write_proc) rc = dp->write_proc(f, buf, size, dp->data); LPROCFS_EXIT(); return rc; diff --git a/lustre/obdclass/lustre_handles.c b/lustre/obdclass/lustre_handles.c index e830330..c97eb9f 100644 --- a/lustre/obdclass/lustre_handles.c +++ b/lustre/obdclass/lustre_handles.c @@ -232,7 +232,7 @@ static void cleanup_all_handles(void) int i; for (i = 0; i < HANDLE_HASH_SIZE; i++) { - struct list_head *tmp, *pos; + struct list_head *tmp, *pos = NULL; spin_lock(&handle_hash[i].lock); list_for_each_safe_rcu(tmp, pos, &(handle_hash[i].head)) { struct portals_handle *h; diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 0b7a5db..559d221 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1501,7 +1501,7 @@ void ptlrpc_daemonize(char *name) cfs_daemonize(name); exit_fs(cfs_current()); current->fs = fs; - ll_set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd); + ll_set_fs_pwd(current->fs, cfs_fs_mnt(init_task.fs), cfs_fs_pwd(init_task.fs)); } static void -- 1.8.3.1