From f78805dbf041bb42b250fe754dff7b864856203f Mon Sep 17 00:00:00 2001 From: Liu Xuezhao Date: Thu, 27 Sep 2012 14:20:25 +0800 Subject: [PATCH] LU-1337 llite: kernel 3.1 kills inode->i_alloc_sem Kernel 3.1 kills inode->i_alloc_sem, use i_dio_count and inode_dio_wait/inode_dio_done instead. (kernel commit bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3). Add HAVE_INODE_DIO_WAIT to differentiate it. Signed-off-by: Liu Xuezhao Change-Id: Ife36e07a85c76153985a4a86ee1973262c4c0e27 Reviewed-on: http://review.whamcloud.com/3582 Tested-by: Hudson Reviewed-by: Jinshan Xiong Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/autoconf/lustre-core.m4 | 60 ++++++++++++++++++--------- lustre/include/linux/lustre_compat25.h | 18 +++++---- lustre/llite/llite_lib.c | 4 +- lustre/llite/vvp_io.c | 74 +++++++++++++++++----------------- lustre/llite/vvp_page.c | 1 - lustre/osc/osc_cache.c | 6 +-- lustre/osd-ldiskfs/osd_io.c | 1 - 7 files changed, 95 insertions(+), 69 deletions(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 9798be0..fa0d526 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1794,40 +1794,61 @@ LB_LINUX_TRY_COMPILE([ ]) # -# 3.1.1 has ext4_blocks_for_truncate +# 3.1 renames lock-manager ops(lock_manager_operations) from fl_xxx to lm_xxx +# see kernel commit 8fb47a4fbf858a164e973b8ea8ef5e83e61f2e50 # -AC_DEFUN([LC_BLOCKS_FOR_TRUNCATE], -[AC_MSG_CHECKING([if kernel has ext4_blocks_for_truncate]) +AC_DEFUN([LC_LM_XXX_LOCK_MANAGER_OPS], +[AC_MSG_CHECKING([if lock-manager ops renamed to lm_xxx]) LB_LINUX_TRY_COMPILE([ #include - #include "$LINUX/fs/ext4/ext4_jbd2.h" - #include "$LINUX/fs/ext4/truncate.h" ],[ - ext4_blocks_for_truncate(NULL); + struct lock_manager_operations lm_ops; + lm_ops.lm_compare_owner = NULL; ],[ + AC_DEFINE(HAVE_LM_XXX_LOCK_MANAGER_OPS, 1, + [lock-manager ops renamed to lm_xxx]) AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_BLOCKS_FOR_TRUNCATE, 1, - [kernel has ext4_blocks_for_truncate]) ],[ AC_MSG_RESULT([no]) ]) ]) # -# 3.1 renames lock-manager ops(lock_manager_operations) from fl_xxx to lm_xxx -# see kernel commit 8fb47a4fbf858a164e973b8ea8ef5e83e61f2e50 +# 3.1 kills inode->i_alloc_sem, use i_dio_count and inode_dio_wait/ +# inode_dio_done instead. +# see kernel commit bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 # -AC_DEFUN([LC_LM_XXX_LOCK_MANAGER_OPS], -[AC_MSG_CHECKING([if lock-manager ops renamed to lm_xxx]) +AC_DEFUN([LC_INODE_DIO_WAIT], +[AC_MSG_CHECKING([if inode->i_alloc_sem is killed and use inode_dio_wait/done.]) LB_LINUX_TRY_COMPILE([ #include ],[ - struct lock_manager_operations lm_ops; - lm_ops.lm_compare_owner = NULL; + inode_dio_wait((struct inode *)0); + inode_dio_done((struct inode *)0); +],[ + AC_DEFINE(HAVE_INODE_DIO_WAIT, 1, + [inode->i_alloc_sem is killed and use inode_dio_wait/done]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# +# 3.1.1 has ext4_blocks_for_truncate +# +AC_DEFUN([LC_BLOCKS_FOR_TRUNCATE], +[AC_MSG_CHECKING([if kernel has ext4_blocks_for_truncate]) +LB_LINUX_TRY_COMPILE([ + #include + #include "$LINUX/fs/ext4/ext4_jbd2.h" + #include "$LINUX/fs/ext4/truncate.h" +],[ + ext4_blocks_for_truncate(NULL); ],[ - AC_DEFINE(HAVE_LM_XXX_LOCK_MANAGER_OPS, 1, - [lock-manager ops renamed to lm_xxx]) AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_BLOCKS_FOR_TRUNCATE, 1, + [kernel has ext4_blocks_for_truncate]) ],[ AC_MSG_RESULT([no]) ]) @@ -2013,11 +2034,12 @@ AC_DEFUN([LC_PROG_LINUX], LC_REQUEST_QUEUE_UNPLUG_FN LC_HAVE_FSTYPE_MOUNT - # 3.1.1 - LC_BLOCKS_FOR_TRUNCATE - # 3.1 LC_LM_XXX_LOCK_MANAGER_OPS + LC_INODE_DIO_WAIT + + # 3.1.1 + LC_BLOCKS_FOR_TRUNCATE # 3.3 LC_HAVE_MIGRATE_HEADER diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 6173836..9362d9f 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -172,13 +172,17 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, (type *)( (char *)__mptr - offsetof(type,member) );}) #endif -#define UP_WRITE_I_ALLOC_SEM(i) up_write(&(i)->i_alloc_sem) -#define DOWN_WRITE_I_ALLOC_SEM(i) down_write(&(i)->i_alloc_sem) -#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) LASSERT(down_read_trylock(&(i)->i_alloc_sem) == 0) - -#define UP_READ_I_ALLOC_SEM(i) up_read(&(i)->i_alloc_sem) -#define DOWN_READ_I_ALLOC_SEM(i) down_read(&(i)->i_alloc_sem) -#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i) LASSERT(down_write_trylock(&(i)->i_alloc_sem) == 0) +#ifdef HAVE_INODE_DIO_WAIT +/* inode_dio_wait(i) use as-is for write lock */ +# define inode_dio_write_done(i) do {} while (0) /* for write unlock */ +# define inode_dio_read(i) atomic_inc(&(i)->i_dio_count) +/* inode_dio_done(i) use as-is for read unlock */ +#else +# define inode_dio_wait(i) down_write(&(i)->i_alloc_sem) +# define inode_dio_write_done(i) up_write(&(i)->i_alloc_sem) +# define inode_dio_read(i) down_read(&(i)->i_alloc_sem) +# define inode_dio_done(i) up_read(&(i)->i_alloc_sem) +#endif #include /* for generic_writepages */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index f1aa2af..2a0d6d6 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1465,12 +1465,12 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr) if (!S_ISDIR(inode->i_mode)) { if (ia_valid & ATTR_SIZE) - UP_WRITE_I_ALLOC_SEM(inode); + inode_dio_write_done(inode); mutex_unlock(&inode->i_mutex); cfs_down_write(&lli->lli_trunc_sem); mutex_lock(&inode->i_mutex); if (ia_valid & ATTR_SIZE) - DOWN_WRITE_I_ALLOC_SEM(inode); + inode_dio_wait(inode); } /* We need a steady stripe configuration for setattr to avoid diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index f8b28b2..a656e71 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -292,9 +292,9 @@ static int vvp_io_setattr_iter_init(const struct lu_env *env, * This last one is especially bad for racing o_append users on other * nodes. */ - mutex_unlock(&inode->i_mutex); if (cl_io_is_trunc(ios->cis_io)) - UP_WRITE_I_ALLOC_SEM(inode); + inode_dio_write_done(inode); + mutex_unlock(&inode->i_mutex); cio->u.setattr.cui_locks_released = 1; return 0; } @@ -346,7 +346,7 @@ static int vvp_io_setattr_trunc(const struct lu_env *env, const struct cl_io_slice *ios, struct inode *inode, loff_t size) { - DOWN_WRITE_I_ALLOC_SEM(inode); + inode_dio_wait(inode); return 0; } @@ -418,7 +418,7 @@ static void vvp_io_setattr_fini(const struct lu_env *env, if (cio->u.setattr.cui_locks_released) { mutex_lock(&inode->i_mutex); if (cl_io_is_trunc(io)) - DOWN_WRITE_I_ALLOC_SEM(inode); + inode_dio_wait(inode); cio->u.setattr.cui_locks_released = 0; } vvp_io_fini(env, ios); @@ -659,18 +659,19 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) static int vvp_io_fault_start(const struct lu_env *env, const struct cl_io_slice *ios) { - struct vvp_io *vio = cl2vvp_io(env, ios); - struct cl_io *io = ios->cis_io; - struct cl_object *obj = io->ci_obj; - struct inode *inode = ccc_object_inode(obj); - struct cl_fault_io *fio = &io->u.ci_fault; - struct vvp_fault_io *cfio = &vio->u.fault; - loff_t offset; - int result = 0; - cfs_page_t *vmpage = NULL; - struct cl_page *page; - loff_t size; - pgoff_t last; /* last page in a file data region */ + struct vvp_io *vio = cl2vvp_io(env, ios); + struct cl_io *io = ios->cis_io; + struct cl_object *obj = io->ci_obj; + struct inode *inode = ccc_object_inode(obj); + struct ll_inode_info *lli = ll_i2info(inode); + struct cl_fault_io *fio = &io->u.ci_fault; + struct vvp_fault_io *cfio = &vio->u.fault; + loff_t offset; + int result = 0; + cfs_page_t *vmpage = NULL; + struct cl_page *page; + loff_t size; + pgoff_t last; /* last page in a file data region */ if (fio->ft_executable && LTIME_S(inode->i_mtime) != vio->u.fault.ft_mtime) @@ -685,30 +686,32 @@ static int vvp_io_fault_start(const struct lu_env *env, if (result != 0) return result; - /* must return locked page */ - if (fio->ft_mkwrite) { - /* we grab alloc_sem to exclude truncate case. + /* must return locked page */ + if (fio->ft_mkwrite) { + /* we grab lli_trunc_sem to exclude truncate case. * Otherwise, we could add dirty pages into osc cache * while truncate is on-going. */ - DOWN_READ_I_ALLOC_SEM(inode); - - LASSERT(cfio->ft_vmpage != NULL); - lock_page(cfio->ft_vmpage); - } else { - result = vvp_io_kernel_fault(cfio); - if (result != 0) - return result; - } + cfs_down_read(&lli->lli_trunc_sem); + + LASSERT(cfio->ft_vmpage != NULL); + lock_page(cfio->ft_vmpage); + } else { + result = vvp_io_kernel_fault(cfio); + if (result != 0) + return result; + } - vmpage = cfio->ft_vmpage; - LASSERT(PageLocked(vmpage)); + vmpage = cfio->ft_vmpage; + LASSERT(PageLocked(vmpage)); - if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE)) - ll_invalidate_page(vmpage); + if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE)) + ll_invalidate_page(vmpage); + size = i_size_read(inode); /* Though we have already held a cl_lock upon this page, but * it still can be truncated locally. */ - if (unlikely(vmpage->mapping == NULL)) { + if (unlikely((vmpage->mapping != inode->i_mapping) || + (page_offset(vmpage) > size))) { CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n"); /* return +1 to stop cl_io_loop() and ll_fault() will catch @@ -756,7 +759,6 @@ static int vvp_io_fault_start(const struct lu_env *env, } } - size = i_size_read(inode); last = cl_index(obj, size - 1); LASSERT(fio->ft_index <= last); if (fio->ft_index == last) @@ -772,11 +774,11 @@ static int vvp_io_fault_start(const struct lu_env *env, EXIT; out: - /* return unlocked vmpage to avoid deadlocking */ + /* return unlocked vmpage to avoid deadlocking */ if (vmpage != NULL) unlock_page(vmpage); if (fio->ft_mkwrite) - UP_READ_I_ALLOC_SEM(inode); + cfs_up_read(&lli->lli_trunc_sem); #ifdef HAVE_VM_OP_FAULT cfio->fault.ft_flags &= ~VM_FAULT_LOCKED; #endif diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index ffb4464..e200c05 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -429,7 +429,6 @@ static void vvp_transient_page_verify(const struct cl_page *page) struct inode *inode = ccc_object_inode(page->cp_obj); LASSERT(!mutex_trylock(&inode->i_mutex)); - /* LASSERT_SEM_LOCKED(&inode->i_alloc_sem); */ } static int vvp_transient_page_own(const struct lu_env *env, diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 061fdea..519ed39 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -2722,9 +2722,9 @@ void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio, * The caller must have called osc_cache_writeback_range() to issue IO * otherwise it will take a long time for this function to finish. * - * Caller must hold inode_mutex and i_alloc_sem, or cancel exclusive - * dlm lock so that nobody else can dirty this range of file while we're - * waiting for extents to be written. + * Caller must hold inode_mutex , or cancel exclusive dlm lock so that + * nobody else can dirty this range of file while we're waiting for + * extents to be written. */ int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, pgoff_t start, pgoff_t end) diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 2630fe8..0e19e80 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -448,7 +448,6 @@ struct page *osd_get_page(struct dt_object *dt, loff_t offset, int rw) /* * there are following "locks": * journal_start - * i_alloc_sem * i_mutex * page lock -- 1.8.3.1