Whamcloud - gitweb
LU-1337 llite: kernel 3.1 kills inode->i_alloc_sem
authorLiu Xuezhao <xuezhao.liu@emc.com>
Thu, 27 Sep 2012 06:20:25 +0000 (14:20 +0800)
committerOleg Drokin <green@whamcloud.com>
Fri, 2 Nov 2012 19:30:33 +0000 (15:30 -0400)
Kernel 3.1 kills inode->i_alloc_sem, use i_dio_count and
inode_dio_wait/inode_dio_done instead.
(kernel commit bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3).

Add HAVE_INODE_DIO_WAIT to differentiate it.

Signed-off-by: Liu Xuezhao <xuezhao.liu@emc.com>
Change-Id: Ife36e07a85c76153985a4a86ee1973262c4c0e27
Reviewed-on: http://review.whamcloud.com/3582
Tested-by: Hudson
Reviewed-by: Jinshan Xiong <jinshan.xiong@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/autoconf/lustre-core.m4
lustre/include/linux/lustre_compat25.h
lustre/llite/llite_lib.c
lustre/llite/vvp_io.c
lustre/llite/vvp_page.c
lustre/osc/osc_cache.c
lustre/osd-ldiskfs/osd_io.c

index 9798be0..fa0d526 100644 (file)
@@ -1794,40 +1794,61 @@ LB_LINUX_TRY_COMPILE([
 ])
 
 #
-# 3.1.1 has ext4_blocks_for_truncate
+# 3.1 renames lock-manager ops(lock_manager_operations) from fl_xxx to lm_xxx
+# see kernel commit 8fb47a4fbf858a164e973b8ea8ef5e83e61f2e50
 #
-AC_DEFUN([LC_BLOCKS_FOR_TRUNCATE],
-[AC_MSG_CHECKING([if kernel has ext4_blocks_for_truncate])
+AC_DEFUN([LC_LM_XXX_LOCK_MANAGER_OPS],
+[AC_MSG_CHECKING([if lock-manager ops renamed to lm_xxx])
 LB_LINUX_TRY_COMPILE([
        #include <linux/fs.h>
-       #include "$LINUX/fs/ext4/ext4_jbd2.h"
-       #include "$LINUX/fs/ext4/truncate.h"
 ],[
-       ext4_blocks_for_truncate(NULL);
+       struct lock_manager_operations lm_ops;
+       lm_ops.lm_compare_owner = NULL;
 ],[
+       AC_DEFINE(HAVE_LM_XXX_LOCK_MANAGER_OPS, 1,
+                 [lock-manager ops renamed to lm_xxx])
        AC_MSG_RESULT([yes])
-       AC_DEFINE(HAVE_BLOCKS_FOR_TRUNCATE, 1,
-                 [kernel has ext4_blocks_for_truncate])
 ],[
        AC_MSG_RESULT([no])
 ])
 ])
 
 #
-# 3.1 renames lock-manager ops(lock_manager_operations) from fl_xxx to lm_xxx
-# see kernel commit 8fb47a4fbf858a164e973b8ea8ef5e83e61f2e50
+# 3.1 kills inode->i_alloc_sem, use i_dio_count and inode_dio_wait/
+#     inode_dio_done instead.
+# see kernel commit bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3
 #
-AC_DEFUN([LC_LM_XXX_LOCK_MANAGER_OPS],
-[AC_MSG_CHECKING([if lock-manager ops renamed to lm_xxx])
+AC_DEFUN([LC_INODE_DIO_WAIT],
+[AC_MSG_CHECKING([if inode->i_alloc_sem is killed and use inode_dio_wait/done.])
 LB_LINUX_TRY_COMPILE([
        #include <linux/fs.h>
 ],[
-       struct lock_manager_operations lm_ops;
-       lm_ops.lm_compare_owner = NULL;
+       inode_dio_wait((struct inode *)0);
+       inode_dio_done((struct inode *)0);
+],[
+       AC_DEFINE(HAVE_INODE_DIO_WAIT, 1,
+                 [inode->i_alloc_sem is killed and use inode_dio_wait/done])
+       AC_MSG_RESULT([yes])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# 3.1.1 has ext4_blocks_for_truncate
+#
+AC_DEFUN([LC_BLOCKS_FOR_TRUNCATE],
+[AC_MSG_CHECKING([if kernel has ext4_blocks_for_truncate])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/fs.h>
+       #include "$LINUX/fs/ext4/ext4_jbd2.h"
+       #include "$LINUX/fs/ext4/truncate.h"
+],[
+       ext4_blocks_for_truncate(NULL);
 ],[
-       AC_DEFINE(HAVE_LM_XXX_LOCK_MANAGER_OPS, 1,
-                 [lock-manager ops renamed to lm_xxx])
        AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_BLOCKS_FOR_TRUNCATE, 1,
+                 [kernel has ext4_blocks_for_truncate])
 ],[
        AC_MSG_RESULT([no])
 ])
@@ -2013,11 +2034,12 @@ AC_DEFUN([LC_PROG_LINUX],
          LC_REQUEST_QUEUE_UNPLUG_FN
         LC_HAVE_FSTYPE_MOUNT
 
-        # 3.1.1
-        LC_BLOCKS_FOR_TRUNCATE
-
         # 3.1
         LC_LM_XXX_LOCK_MANAGER_OPS
+        LC_INODE_DIO_WAIT
+
+        # 3.1.1
+        LC_BLOCKS_FOR_TRUNCATE
 
         # 3.3
         LC_HAVE_MIGRATE_HEADER
index 6173836..9362d9f 100644 (file)
@@ -172,13 +172,17 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
                 (type *)( (char *)__mptr - offsetof(type,member) );})
 #endif
 
-#define UP_WRITE_I_ALLOC_SEM(i)   up_write(&(i)->i_alloc_sem)
-#define DOWN_WRITE_I_ALLOC_SEM(i) down_write(&(i)->i_alloc_sem)
-#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) LASSERT(down_read_trylock(&(i)->i_alloc_sem) == 0)
-
-#define UP_READ_I_ALLOC_SEM(i)    up_read(&(i)->i_alloc_sem)
-#define DOWN_READ_I_ALLOC_SEM(i)  down_read(&(i)->i_alloc_sem)
-#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i) LASSERT(down_write_trylock(&(i)->i_alloc_sem) == 0)
+#ifdef HAVE_INODE_DIO_WAIT
+/* inode_dio_wait(i) use as-is for write lock */
+# define inode_dio_write_done(i)       do {} while (0) /* for write unlock */
+# define inode_dio_read(i)             atomic_inc(&(i)->i_dio_count)
+/* inode_dio_done(i) use as-is for read unlock */
+#else
+# define inode_dio_wait(i)             down_write(&(i)->i_alloc_sem)
+# define inode_dio_write_done(i)       up_write(&(i)->i_alloc_sem)
+# define inode_dio_read(i)             down_read(&(i)->i_alloc_sem)
+# define inode_dio_done(i)             up_read(&(i)->i_alloc_sem)
+#endif
 
 #include <linux/mpage.h>        /* for generic_writepages */
 
index f1aa2af..2a0d6d6 100644 (file)
@@ -1465,12 +1465,12 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr)
 
        if (!S_ISDIR(inode->i_mode)) {
                if (ia_valid & ATTR_SIZE)
-                       UP_WRITE_I_ALLOC_SEM(inode);
+                       inode_dio_write_done(inode);
                mutex_unlock(&inode->i_mutex);
                cfs_down_write(&lli->lli_trunc_sem);
                mutex_lock(&inode->i_mutex);
                if (ia_valid & ATTR_SIZE)
-                       DOWN_WRITE_I_ALLOC_SEM(inode);
+                       inode_dio_wait(inode);
        }
 
        /* We need a steady stripe configuration for setattr to avoid
index f8b28b2..a656e71 100644 (file)
@@ -292,9 +292,9 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env,
         * This last one is especially bad for racing o_append users on other
         * nodes.
         */
-       mutex_unlock(&inode->i_mutex);
        if (cl_io_is_trunc(ios->cis_io))
-               UP_WRITE_I_ALLOC_SEM(inode);
+               inode_dio_write_done(inode);
+       mutex_unlock(&inode->i_mutex);
        cio->u.setattr.cui_locks_released = 1;
        return 0;
 }
@@ -346,7 +346,7 @@ static int vvp_io_setattr_trunc(const struct lu_env *env,
                                 const struct cl_io_slice *ios,
                                 struct inode *inode, loff_t size)
 {
-       DOWN_WRITE_I_ALLOC_SEM(inode);
+       inode_dio_wait(inode);
        return 0;
 }
 
@@ -418,7 +418,7 @@ static void vvp_io_setattr_fini(const struct lu_env *env,
        if (cio->u.setattr.cui_locks_released) {
                mutex_lock(&inode->i_mutex);
                if (cl_io_is_trunc(io))
-                       DOWN_WRITE_I_ALLOC_SEM(inode);
+                       inode_dio_wait(inode);
                cio->u.setattr.cui_locks_released = 0;
        }
        vvp_io_fini(env, ios);
@@ -659,18 +659,19 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 static int vvp_io_fault_start(const struct lu_env *env,
                               const struct cl_io_slice *ios)
 {
-        struct vvp_io       *vio     = cl2vvp_io(env, ios);
-        struct cl_io        *io      = ios->cis_io;
-        struct cl_object    *obj     = io->ci_obj;
-        struct inode        *inode   = ccc_object_inode(obj);
-        struct cl_fault_io  *fio     = &io->u.ci_fault;
-        struct vvp_fault_io *cfio    = &vio->u.fault;
-        loff_t               offset;
-        int                  result  = 0;
-        cfs_page_t          *vmpage  = NULL;
-        struct cl_page      *page;
-        loff_t               size;
-        pgoff_t              last; /* last page in a file data region */
+       struct vvp_io       *vio     = cl2vvp_io(env, ios);
+       struct cl_io        *io      = ios->cis_io;
+       struct cl_object    *obj     = io->ci_obj;
+       struct inode        *inode   = ccc_object_inode(obj);
+       struct ll_inode_info *lli    = ll_i2info(inode);
+       struct cl_fault_io  *fio     = &io->u.ci_fault;
+       struct vvp_fault_io *cfio    = &vio->u.fault;
+       loff_t               offset;
+       int                  result  = 0;
+       cfs_page_t          *vmpage  = NULL;
+       struct cl_page      *page;
+       loff_t               size;
+       pgoff_t              last; /* last page in a file data region */
 
         if (fio->ft_executable &&
             LTIME_S(inode->i_mtime) != vio->u.fault.ft_mtime)
@@ -685,30 +686,32 @@ static int vvp_io_fault_start(const struct lu_env *env,
         if (result != 0)
                 return result;
 
-        /* must return locked page */
-        if (fio->ft_mkwrite) {
-               /* we grab alloc_sem to exclude truncate case.
+       /* must return locked page */
+       if (fio->ft_mkwrite) {
+               /* we grab lli_trunc_sem to exclude truncate case.
                 * Otherwise, we could add dirty pages into osc cache
                 * while truncate is on-going. */
-               DOWN_READ_I_ALLOC_SEM(inode);
-
-                LASSERT(cfio->ft_vmpage != NULL);
-                lock_page(cfio->ft_vmpage);
-        } else {
-                result = vvp_io_kernel_fault(cfio);
-                if (result != 0)
-                        return result;
-        }
+               cfs_down_read(&lli->lli_trunc_sem);
+
+               LASSERT(cfio->ft_vmpage != NULL);
+               lock_page(cfio->ft_vmpage);
+       } else {
+               result = vvp_io_kernel_fault(cfio);
+               if (result != 0)
+                       return result;
+       }
 
-        vmpage = cfio->ft_vmpage;
-        LASSERT(PageLocked(vmpage));
+       vmpage = cfio->ft_vmpage;
+       LASSERT(PageLocked(vmpage));
 
-        if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE))
-                ll_invalidate_page(vmpage);
+       if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE))
+               ll_invalidate_page(vmpage);
 
+       size = i_size_read(inode);
         /* Though we have already held a cl_lock upon this page, but
          * it still can be truncated locally. */
-        if (unlikely(vmpage->mapping == NULL)) {
+       if (unlikely((vmpage->mapping != inode->i_mapping) ||
+                    (page_offset(vmpage) > size))) {
                 CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n");
 
                 /* return +1 to stop cl_io_loop() and ll_fault() will catch
@@ -756,7 +759,6 @@ static int vvp_io_fault_start(const struct lu_env *env,
                 }
         }
 
-        size = i_size_read(inode);
         last = cl_index(obj, size - 1);
         LASSERT(fio->ft_index <= last);
         if (fio->ft_index == last)
@@ -772,11 +774,11 @@ static int vvp_io_fault_start(const struct lu_env *env,
         EXIT;
 
 out:
-        /* return unlocked vmpage to avoid deadlocking */
+       /* return unlocked vmpage to avoid deadlocking */
        if (vmpage != NULL)
                unlock_page(vmpage);
        if (fio->ft_mkwrite)
-               UP_READ_I_ALLOC_SEM(inode);
+               cfs_up_read(&lli->lli_trunc_sem);
 #ifdef HAVE_VM_OP_FAULT
        cfio->fault.ft_flags &= ~VM_FAULT_LOCKED;
 #endif
index ffb4464..e200c05 100644 (file)
@@ -429,7 +429,6 @@ static void vvp_transient_page_verify(const struct cl_page *page)
        struct inode *inode = ccc_object_inode(page->cp_obj);
 
        LASSERT(!mutex_trylock(&inode->i_mutex));
-       /* LASSERT_SEM_LOCKED(&inode->i_alloc_sem); */
 }
 
 static int vvp_transient_page_own(const struct lu_env *env,
index 061fdea..519ed39 100644 (file)
@@ -2722,9 +2722,9 @@ void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio,
  * The caller must have called osc_cache_writeback_range() to issue IO
  * otherwise it will take a long time for this function to finish.
  *
- * Caller must hold inode_mutex and i_alloc_sem, or cancel exclusive
- * dlm lock so that nobody else can dirty this range of file while we're
- * waiting for extents to be written.
+ * Caller must hold inode_mutex , or cancel exclusive dlm lock so that
+ * nobody else can dirty this range of file while we're waiting for
+ * extents to be written.
  */
 int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
                         pgoff_t start, pgoff_t end)
index 2630fe8..0e19e80 100644 (file)
@@ -448,7 +448,6 @@ struct page *osd_get_page(struct dt_object *dt, loff_t offset, int rw)
 /*
  * there are following "locks":
  * journal_start
- * i_alloc_sem
  * i_mutex
  * page lock