From 99af76a6aff4dac41ed3cc467cbd8e4bfe1990bc Mon Sep 17 00:00:00 2001 From: yangsheng Date: Tue, 18 Dec 2012 12:21:36 +0800 Subject: [PATCH] LU-1337 llite: support SEEK_HOLE and SEEK_DATA kernel 3.1 introduced SEEK_HOLE and SEEK_DATA. llite should handle them as required by vfs. For now, we can just consider whole file is data. Also fix race between concurrent SEEK_CURs by taking f_lock in SEEK_CUR, as implemented since kernel commit ef3d0fd27. Before that, kernel handled the race by taking i_mutex. Kernel introduced f_lock to protect f_flags when dropping BKL in kernel commit db1dd4d3. Later, f_lock is extended to protect Protects f_ep_links, f_flags, and f_pos vs i_size in lseek SEEK_CUR. Signed-off-by: Peng Tao Signed-off-by: yang sheng Change-Id: I944102f20ab1ca1083a16d41e95802d6b8bda15d Reviewed-on: http://review.whamcloud.com/4579 Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Peng Tao Reviewed-by: Lai Siyao Reviewed-by: James Simmons --- lustre/autoconf/lustre-core.m4 | 18 +++++- lustre/include/linux/lustre_compat25.h | 11 ++++ lustre/llite/file.c | 113 +++++++++++++++++++++++++-------- 3 files changed, 111 insertions(+), 31 deletions(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 4bf9a02..109d528 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1239,6 +1239,7 @@ AC_DEFUN([LC_EXPORT_CPUMASK_OF_NODE], the kernel])]) # x86_64 ]) + # 2.6.31 replaces blk_queue_hardsect_size by blk_queue_logical_block_size function AC_DEFUN([LC_BLK_QUEUE_LOG_BLK_SIZE], [AC_MSG_CHECKING([if blk_queue_logical_block_size is defined]) @@ -1350,7 +1351,7 @@ LB_LINUX_TRY_COMPILE([ ],[ AC_MSG_RESULT(no) ]) -]) +]) # 2.6.32 removes blk_queue_max_sectors and add blk_queue_max_hw_sectors # check blk_queue_max_sectors and use it until disappear. @@ -1875,6 +1876,16 @@ LB_LINUX_TRY_COMPILE([ ]) # +# 3.1 introduced generic_file_llseek_size() +# +AC_DEFUN([LC_FILE_LLSEEK_SIZE], + [LB_CHECK_SYMBOL_EXPORT([generic_file_llseek_size], + [fs/read_write.c], + [AC_DEFINE(HAVE_FILE_LLSEEK_SIZE, 1, + [generic_file_llseek_size is exported by the kernel])]) +]) + +# # 3.2 request_queue.make_request_fn defined as function returns with void # see kernel commit 5a7bbad27a410350e64a2d7f5ec18fc73836c14f # @@ -2263,8 +2274,8 @@ AC_DEFUN([LC_PROG_LINUX], # 2.6.29 LC_SB_ANY_QUOTA_LOADED - # 2.6.30 - LC_EXPORT_CPUMASK_OF_NODE + # 2.6.30 + LC_EXPORT_CPUMASK_OF_NODE # 2.6.31 LC_BLK_QUEUE_LOG_BLK_SIZE @@ -2310,6 +2321,7 @@ AC_DEFUN([LC_PROG_LINUX], LC_LM_XXX_LOCK_MANAGER_OPS LC_INODE_DIO_WAIT LC_IOP_GET_ACL + LC_FILE_LLSEEK_SIZE # 3.1.1 LC_BLOCKS_FOR_TRUNCATE diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 398f571..53e6a8f 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -688,6 +688,17 @@ static inline int ll_quota_off(struct super_block *sb, int off, int remount) # define NO_QUOTA (-EDQUOT) #endif +#ifndef SEEK_DATA +#define SEEK_DATA 3 /* seek to the next data */ +#endif +#ifndef SEEK_HOLE +#define SEEK_HOLE 4 /* seek to the next hole */ +#endif + +#ifndef FMODE_UNSIGNED_OFFSET +#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) +#endif + #if !defined(_ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_) && !defined(ext2_set_bit) # define ext2_set_bit __test_and_set_bit_le # define ext2_clear_bit __test_and_clear_bit_le diff --git a/lustre/llite/file.c b/lustre/llite/file.c index da53bd3..9855069 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1945,39 +1945,96 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } } -loff_t ll_file_seek(struct file *file, loff_t offset, int origin) +#ifndef HAVE_FILE_LLSEEK_SIZE +static inline loff_t +llseek_execute(struct file *file, loff_t offset, loff_t maxsize) +{ + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) + return -EINVAL; + if (offset > maxsize) + return -EINVAL; + + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + return offset; +} + +static loff_t +generic_file_llseek_size(struct file *file, loff_t offset, int origin, + loff_t maxsize, loff_t eof) { - struct inode *inode = file->f_dentry->d_inode; - loff_t retval; - ENTRY; - retval = offset + ((origin == 2) ? i_size_read(inode) : - (origin == 1) ? file->f_pos : 0); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%s)\n", - inode->i_ino, inode->i_generation, inode, retval, retval, - origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET"); - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); - - if (origin == 2) { /* SEEK_END */ - int rc; + struct inode *inode = file->f_dentry->d_inode; - rc = ll_glimpse_size(inode); - if (rc != 0) - RETURN(rc); + switch (origin) { + case SEEK_END: + offset += eof; + break; + case SEEK_CUR: + /* + * Here we special-case the lseek(fd, 0, SEEK_CUR) + * position-querying operation. Avoid rewriting the "same" + * f_pos value back to the file because a concurrent read(), + * write() or lseek() might have altered it + */ + if (offset == 0) + return file->f_pos; + /* + * f_lock protects against read/modify/write race with other + * SEEK_CURs. Note that parallel writes and reads behave + * like SEEK_SET. + */ + mutex_lock(&inode->i_mutex); + offset = llseek_execute(file, file->f_pos + offset, maxsize); + mutex_unlock(&inode->i_mutex); + return offset; + case SEEK_DATA: + /* + * In the generic case the entire file is data, so as long as + * offset isn't at the end of the file then the offset is data. + */ + if (offset >= eof) + return -ENXIO; + break; + case SEEK_HOLE: + /* + * There is a virtual hole at the end of the file, so as long as + * offset isn't i_size or larger, return i_size. + */ + if (offset >= eof) + return -ENXIO; + offset = eof; + break; + } - offset += i_size_read(inode); - } else if (origin == 1) { /* SEEK_CUR */ - offset += file->f_pos; - } + return llseek_execute(file, offset, maxsize); +} +#endif - retval = -EINVAL; - if (offset >= 0 && offset <= ll_file_maxbytes(inode)) { - if (offset != file->f_pos) { - file->f_pos = offset; - } - retval = offset; - } +loff_t ll_file_seek(struct file *file, loff_t offset, int origin) +{ + struct inode *inode = file->f_dentry->d_inode; + loff_t retval, eof = 0; + + ENTRY; + retval = offset + ((origin == SEEK_END) ? i_size_read(inode) : + (origin == SEEK_CUR) ? file->f_pos : 0); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n", + inode->i_ino, inode->i_generation, inode, retval, retval, + origin); + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); + + if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) { + retval = ll_glimpse_size(inode); + if (retval != 0) + RETURN(retval); + eof = i_size_read(inode); + } - RETURN(retval); + retval = generic_file_llseek_size(file, offset, origin, + ll_file_maxbytes(inode), eof); + RETURN(retval); } int ll_flush(struct file *file, fl_owner_t id) -- 1.8.3.1