Whamcloud - gitweb
LU-1337 llite: support SEEK_HOLE and SEEK_DATA
authoryangsheng <ys@whamcloud.com>
Tue, 18 Dec 2012 04:21:36 +0000 (12:21 +0800)
committerOleg Drokin <green@whamcloud.com>
Fri, 21 Dec 2012 16:22:58 +0000 (11:22 -0500)
kernel 3.1 introduced SEEK_HOLE and SEEK_DATA. llite should
handle them as required by vfs. For now, we can just consider
whole file is data.

Also fix race between concurrent SEEK_CURs by taking f_lock
in SEEK_CUR, as implemented since kernel commit ef3d0fd27.
Before that, kernel handled the race by taking i_mutex.

Kernel introduced f_lock to protect f_flags when dropping BKL
in kernel commit db1dd4d3. Later, f_lock is extended to protect
Protects f_ep_links, f_flags, and f_pos vs i_size in lseek
SEEK_CUR.

Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: yang sheng <ys@whamcloud.com>
Change-Id: I944102f20ab1ca1083a16d41e95802d6b8bda15d
Reviewed-on: http://review.whamcloud.com/4579
Tested-by: Hudson
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Peng Tao <bergwolf@gmail.com>
Reviewed-by: Lai Siyao <laisiyao@whamcloud.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
lustre/autoconf/lustre-core.m4
lustre/include/linux/lustre_compat25.h
lustre/llite/file.c

index 4bf9a02..109d528 100644 (file)
@@ -1239,6 +1239,7 @@ AC_DEFUN([LC_EXPORT_CPUMASK_OF_NODE],
                                              the kernel])]) # x86_64
          ])
 
+
 # 2.6.31 replaces blk_queue_hardsect_size by blk_queue_logical_block_size function
 AC_DEFUN([LC_BLK_QUEUE_LOG_BLK_SIZE],
 [AC_MSG_CHECKING([if blk_queue_logical_block_size is defined])
@@ -1350,7 +1351,7 @@ LB_LINUX_TRY_COMPILE([
 ],[
         AC_MSG_RESULT(no)
 ])
-])  
+])
 
 # 2.6.32 removes blk_queue_max_sectors and add blk_queue_max_hw_sectors
 # check blk_queue_max_sectors and use it until disappear.
@@ -1875,6 +1876,16 @@ LB_LINUX_TRY_COMPILE([
 ])
 
 #
+# 3.1 introduced generic_file_llseek_size()
+#
+AC_DEFUN([LC_FILE_LLSEEK_SIZE],
+       [LB_CHECK_SYMBOL_EXPORT([generic_file_llseek_size],
+       [fs/read_write.c],
+        [AC_DEFINE(HAVE_FILE_LLSEEK_SIZE, 1,
+                  [generic_file_llseek_size is exported by the kernel])])
+])
+
+#
 # 3.2 request_queue.make_request_fn defined as function returns with void
 # see kernel commit 5a7bbad27a410350e64a2d7f5ec18fc73836c14f
 #
@@ -2263,8 +2274,8 @@ AC_DEFUN([LC_PROG_LINUX],
          # 2.6.29
          LC_SB_ANY_QUOTA_LOADED
 
-         # 2.6.30
-         LC_EXPORT_CPUMASK_OF_NODE
+        # 2.6.30
+        LC_EXPORT_CPUMASK_OF_NODE
 
          # 2.6.31
          LC_BLK_QUEUE_LOG_BLK_SIZE
@@ -2310,6 +2321,7 @@ AC_DEFUN([LC_PROG_LINUX],
         LC_LM_XXX_LOCK_MANAGER_OPS
         LC_INODE_DIO_WAIT
         LC_IOP_GET_ACL
+        LC_FILE_LLSEEK_SIZE
 
         # 3.1.1
         LC_BLOCKS_FOR_TRUNCATE
index 398f571..53e6a8f 100644 (file)
@@ -688,6 +688,17 @@ static inline int ll_quota_off(struct super_block *sb, int off, int remount)
 # define NO_QUOTA (-EDQUOT)
 #endif
 
+#ifndef SEEK_DATA
+#define SEEK_DATA      3       /* seek to the next data */
+#endif
+#ifndef SEEK_HOLE
+#define SEEK_HOLE      4       /* seek to the next hole */
+#endif
+
+#ifndef FMODE_UNSIGNED_OFFSET
+#define FMODE_UNSIGNED_OFFSET  ((__force fmode_t)0x2000)
+#endif
+
 #if !defined(_ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_) && !defined(ext2_set_bit)
 # define ext2_set_bit             __test_and_set_bit_le
 # define ext2_clear_bit           __test_and_clear_bit_le
index da53bd3..9855069 100644 (file)
@@ -1945,39 +1945,96 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
         }
 }
 
-loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
+#ifndef HAVE_FILE_LLSEEK_SIZE
+static inline loff_t
+llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
+{
+       if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
+               return -EINVAL;
+       if (offset > maxsize)
+               return -EINVAL;
+
+       if (offset != file->f_pos) {
+               file->f_pos = offset;
+               file->f_version = 0;
+       }
+       return offset;
+}
+
+static loff_t
+generic_file_llseek_size(struct file *file, loff_t offset, int origin,
+                loff_t maxsize, loff_t eof)
 {
-        struct inode *inode = file->f_dentry->d_inode;
-        loff_t retval;
-        ENTRY;
-        retval = offset + ((origin == 2) ? i_size_read(inode) :
-                           (origin == 1) ? file->f_pos : 0);
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%s)\n",
-               inode->i_ino, inode->i_generation, inode, retval, retval,
-               origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
-        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
-
-        if (origin == 2) { /* SEEK_END */
-                int rc;
+       struct inode *inode = file->f_dentry->d_inode;
 
-                rc = ll_glimpse_size(inode);
-                if (rc != 0)
-                        RETURN(rc);
+       switch (origin) {
+       case SEEK_END:
+               offset += eof;
+               break;
+       case SEEK_CUR:
+               /*
+                * Here we special-case the lseek(fd, 0, SEEK_CUR)
+                * position-querying operation.  Avoid rewriting the "same"
+                * f_pos value back to the file because a concurrent read(),
+                * write() or lseek() might have altered it
+                */
+               if (offset == 0)
+                       return file->f_pos;
+               /*
+                * f_lock protects against read/modify/write race with other
+                * SEEK_CURs. Note that parallel writes and reads behave
+                * like SEEK_SET.
+                */
+               mutex_lock(&inode->i_mutex);
+               offset = llseek_execute(file, file->f_pos + offset, maxsize);
+               mutex_unlock(&inode->i_mutex);
+               return offset;
+       case SEEK_DATA:
+               /*
+                * In the generic case the entire file is data, so as long as
+                * offset isn't at the end of the file then the offset is data.
+                */
+               if (offset >= eof)
+                       return -ENXIO;
+               break;
+       case SEEK_HOLE:
+               /*
+                * There is a virtual hole at the end of the file, so as long as
+                * offset isn't i_size or larger, return i_size.
+                */
+               if (offset >= eof)
+                       return -ENXIO;
+               offset = eof;
+               break;
+       }
 
-                offset += i_size_read(inode);
-        } else if (origin == 1) { /* SEEK_CUR */
-                offset += file->f_pos;
-        }
+       return llseek_execute(file, offset, maxsize);
+}
+#endif
 
-        retval = -EINVAL;
-        if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
-                if (offset != file->f_pos) {
-                        file->f_pos = offset;
-                }
-                retval = offset;
-        }
+loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       loff_t retval, eof = 0;
+
+       ENTRY;
+       retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
+                          (origin == SEEK_CUR) ? file->f_pos : 0);
+       CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
+              inode->i_ino, inode->i_generation, inode, retval, retval,
+              origin);
+       ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
+
+       if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
+               retval = ll_glimpse_size(inode);
+               if (retval != 0)
+                       RETURN(retval);
+               eof = i_size_read(inode);
+       }
 
-        RETURN(retval);
+       retval = generic_file_llseek_size(file, offset, origin,
+                                         ll_file_maxbytes(inode), eof);
+       RETURN(retval);
 }
 
 int ll_flush(struct file *file, fl_owner_t id)