From 7af2428511c286788d9c73c436cb43d6f791821f Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Fri, 19 Aug 2011 15:24:15 -0700 Subject: [PATCH] LU-613 clio: Client dead-lock during binary exec The root cause is clio takes attr lock and i_size_sem in reverse order. In my patch, I find out there is no deadlock issue any more in fault and truncate path, so it holds i_size_sem in fault path to fix this problem. Change-Id: I04cca9324158a34fded6651692410e29aae2e402 Signed-off-by: Jinshan Xiong Reviewed-on: http://review.whamcloud.com/1281 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Oleg Drokin --- lustre/include/lclient.h | 3 +-- lustre/lclient/lcommon_cl.c | 40 +++++++++------------------------------- lustre/liblustre/llite_cl.c | 2 +- lustre/llite/rw.c | 5 ----- lustre/llite/vvp_io.c | 10 ++++------ lustre/tests/Makefile.am | 2 +- lustre/tests/racer.sh | 17 ++++++++++------- lustre/tests/racer/file_exec.sh | 13 +++++++++++++ lustre/tests/racer/racer.sh | 2 +- 9 files changed, 40 insertions(+), 54 deletions(-) create mode 100755 lustre/tests/racer/file_exec.sh diff --git a/lustre/include/lclient.h b/lustre/include/lclient.h index 6a5425c..dd6721a 100644 --- a/lustre/include/lclient.h +++ b/lustre/include/lclient.h @@ -331,8 +331,7 @@ void ccc_io_advance(const struct lu_env *env, const struct cl_io_slice *ios, void ccc_io_update_iov(const struct lu_env *env, struct ccc_io *cio, struct cl_io *io); int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, loff_t start, size_t count, int vfslock, - int *exceed); + struct cl_io *io, loff_t start, size_t count, int *exceed); void ccc_req_completion(const struct lu_env *env, const struct cl_req_slice *slice, int ioret); void ccc_req_attr_set(const struct lu_env *env,const struct cl_req_slice *slice, diff --git a/lustre/lclient/lcommon_cl.c b/lustre/lclient/lcommon_cl.c index 821ef6e..792dc2c 100644 --- a/lustre/lclient/lcommon_cl.c +++ b/lustre/lclient/lcommon_cl.c @@ -828,22 +828,20 @@ void ccc_io_advance(const struct lu_env *env, } } -static void ccc_object_size_lock(struct cl_object *obj, int vfslock) +static void ccc_object_size_lock(struct cl_object *obj) { struct inode *inode = ccc_object_inode(obj); - if (vfslock) - cl_isize_lock(inode, 0); + cl_isize_lock(inode, 0); cl_object_attr_lock(obj); } -static void ccc_object_size_unlock(struct cl_object *obj, int vfslock) +static void ccc_object_size_unlock(struct cl_object *obj) { struct inode *inode = ccc_object_inode(obj); cl_object_attr_unlock(obj); - if (vfslock) - cl_isize_unlock(inode, 0); + cl_isize_unlock(inode, 0); } /** @@ -856,13 +854,9 @@ static void ccc_object_size_unlock(struct cl_object *obj, int vfslock) * protect consistency between inode size and cl_object * attributes. cl_object_size_lock() protects consistency between cl_attr's of * top-object and sub-objects. - * - * In page fault path cl_isize_lock cannot be taken, client has to live with - * the resulting races. */ int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, loff_t start, size_t count, int vfslock, - int *exceed) + struct cl_io *io, loff_t start, size_t count, int *exceed) { struct cl_attr *attr = ccc_env_thread_attr(env); struct inode *inode = ccc_object_inode(obj); @@ -889,7 +883,7 @@ int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, * ll_inode_size_lock(). This guarantees that short reads are handled * correctly in the face of concurrent writes and truncates. */ - ccc_object_size_lock(obj, vfslock); + ccc_object_size_lock(obj); result = cl_object_attr_get(env, obj, attr); if (result == 0) { kms = attr->cat_kms; @@ -899,7 +893,7 @@ int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, * return a short read (B) or some zeroes at the end * of the buffer (C) */ - ccc_object_size_unlock(obj, vfslock); + ccc_object_size_unlock(obj); result = cl_glimpse_lock(env, io, inode, obj); if (result == 0 && exceed != NULL) { /* If objective page index exceed end-of-file @@ -926,24 +920,8 @@ int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, * which will always be >= the kms value here. * b=11081 */ - /* - * XXX in a page fault path, change inode size without - * ll_inode_size_lock() held! there is a race - * condition with truncate path. (see ll_extent_lock) - */ - /* - * XXX i_size_write() is not used because it is not - * safe to take the ll_inode_size_lock() due to a - * potential lock inversion (bug 6077). And since - * it's not safe to use i_size_write() without a - * covering mutex we do the assignment directly. It - * is not critical that the size be correct. - */ if (cl_isize_read(inode) < kms) { - if (vfslock) - cl_isize_write_nolock(inode, kms); - else - cl_isize_write(inode, kms); + cl_isize_write_nolock(inode, kms); CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n", PFID(lu_object_fid(&obj->co_lu)), @@ -952,7 +930,7 @@ int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, } } } - ccc_object_size_unlock(obj, vfslock); + ccc_object_size_unlock(obj); return result; } diff --git a/lustre/liblustre/llite_cl.c b/lustre/liblustre/llite_cl.c index d553284..5e43db8 100644 --- a/lustre/liblustre/llite_cl.c +++ b/lustre/liblustre/llite_cl.c @@ -643,7 +643,7 @@ static int slp_io_start(const struct lu_env *env, const struct cl_io_slice *ios) if (IS_ERR(iogroup)) RETURN(PTR_ERR(iogroup)); - err = ccc_prep_size(env, obj, io, pos, cnt, 0, &exceed); + err = ccc_prep_size(env, obj, io, pos, cnt, &exceed); if (err != 0 || (write == 0 && exceed != 0)) GOTO(out, err); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index b8849a5..6bc1525 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -73,17 +73,12 @@ * must be called under ->lli_size_sem */ void ll_truncate(struct inode *inode) { - struct ll_inode_info *lli = ll_i2info(inode); ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %llu\n", inode->i_ino, inode->i_generation, inode, i_size_read(inode)); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1); - if (lli->lli_size_sem_owner == cfs_current()) { - LASSERT_SEM_LOCKED(&lli->lli_size_sem); - ll_inode_size_unlock(inode, 0); - } EXIT; return; diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index c7e6313..4b031ac 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -326,13 +326,11 @@ static int vvp_do_vmtruncate(struct inode *inode, size_t size) int result; /* * Only ll_inode_size_lock is taken at this level. lov_stripe_lock() - * is grabbed by ll_truncate() only over call to obd_adjust_kms(). If - * vmtruncate returns 0, then ll_truncate dropped ll_inode_size_lock() + * is grabbed by ll_truncate() only over call to obd_adjust_kms(). */ ll_inode_size_lock(inode, 0); result = vmtruncate(inode, size); - if (result != 0) - ll_inode_size_unlock(inode, 0); + ll_inode_size_unlock(inode, 0); return result; } @@ -512,7 +510,7 @@ static int vvp_io_read_start(const struct lu_env *env, CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); - result = ccc_prep_size(env, obj, io, pos, tot, 1, &exceed); + result = ccc_prep_size(env, obj, io, pos, tot, &exceed); if (result != 0) return result; else if (exceed != 0) @@ -705,7 +703,7 @@ static int vvp_io_fault_start(const struct lu_env *env, /* offset of the last byte on the page */ offset = cl_offset(obj, fio->ft_index + 1) - 1; LASSERT(cl_index(obj, offset) == fio->ft_index); - result = ccc_prep_size(env, obj, io, 0, offset + 1, 0, NULL); + result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL); if (result != 0) return result; diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index be7b4ac..b6746c7 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -30,7 +30,7 @@ nobase_noinst_SCRIPTS = cfg/local.sh nobase_noinst_SCRIPTS += test-groups/regression test-groups/regression-mpi nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh nobase_noinst_SCRIPTS += racer/dir_create.sh racer/file_create.sh racer/file_list.sh -nobase_noinst_SCRIPTS += racer/file_rm.sh racer/racer.sh racer/file_concat.sh +nobase_noinst_SCRIPTS += racer/file_rm.sh racer/racer.sh racer/file_concat.sh racer/file_exec.sh nobase_noinst_SCRIPTS += racer/file_link.sh racer/file_rename.sh racer/file_symlink.sh nobase_noinst_SCRIPTS += rmtacl/make-tree rmtacl/run nobase_noinst_DATA = acl/cp.test acl/getfacl-noacl.test acl/inheritance.test diff --git a/lustre/tests/racer.sh b/lustre/tests/racer.sh index 66596ae..edcf900 100644 --- a/lustre/tests/racer.sh +++ b/lustre/tests/racer.sh @@ -12,21 +12,24 @@ init_logging racer=$LUSTRE/tests/racer/racer.sh echo racer: $racer +DURATION=${DURATION:-900} +[ "$SLOW" = "no" ] && DURATION=300 +MOUNT_2=${MOUNT_2:-"yes"} + +build_test_filter +check_and_setup_lustre + CLIENTS=${CLIENTS:-$HOSTNAME} -RACERDIRS=${RACERDIRS:-$DIR} +RACERDIRS=${RACERDIRS:-"$DIR $DIR2"} echo RACERDIRS=$RACERDIRS for d in ${RACERDIRS}; do + is_mounted $d || continue + RDIRS="$RDIRS $d/racer" mkdir -p $d/racer # lfs setstripe $d/racer -c -1 done -DURATION=${DURATION:-900} -[ "$SLOW" = "no" ] && DURATION=300 - -build_test_filter -check_and_setup_lustre - # run racer test_1() { local rrc=0 diff --git a/lustre/tests/racer/file_exec.sh b/lustre/tests/racer/file_exec.sh new file mode 100755 index 0000000..7f44491 --- /dev/null +++ b/lustre/tests/racer/file_exec.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +DIR=$1 +MAX=$2 +PROG=/bin/sleep + +while /bin/true ; do + file=$((RANDOM % MAX)) + cp $PROG $DIR/$file > /dev/null 2>&1 + $DIR/$file 0.$((RANDOM % 5 + 1)) 2> /dev/null + sleep $((RANDOM % 3)) +done + diff --git a/lustre/tests/racer/racer.sh b/lustre/tests/racer/racer.sh index efd3bcd..216e56f 100755 --- a/lustre/tests/racer/racer.sh +++ b/lustre/tests/racer/racer.sh @@ -12,7 +12,7 @@ NUM_THREADS=${NUM_THREADS:-3} mkdir -p $DIR RACER_PROGS="file_create dir_create file_rm file_rename file_link file_symlink \ -file_list file_concat" +file_list file_concat file_exec" racer_cleanup() { -- 1.8.3.1