From fb3c3d205265cc334ea743c5e86bb1f2c4779e00 Mon Sep 17 00:00:00 2001 From: Yang Sheng Date: Mon, 23 Sep 2024 16:08:20 +0800 Subject: [PATCH] LU-17692 llite: flock work with lockd Change the flock to invoke lock copy API to work with nfs server. Fixes: 7f8af8f37e ("LU-17692 flock: get extra reference for lockd") Signed-off-by: Yang Sheng Change-Id: Ic5b3daf825dfa616a394c47a76f0d08e87c5bc9a Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56488 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Zhenyu Xu Reviewed-by: Oleg Drokin --- lustre/llite/file.c | 23 +++++++------------- lustre/tests/flocks_test.c | 11 ++++++---- lustre/tests/parallel-scale-nfs.sh | 43 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 20 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index bc0620e..e85d593e 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -5460,7 +5460,6 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) struct md_op_data *op_data; struct lustre_handle lockh = { 0 }; union ldlm_policy_data flock = { { 0 } }; - struct file_lock flbuf = *file_lock; int fl_type = file_lock->C_FLC_TYPE; ktime_t kstart = ktime_get(); __u64 flags = 0; @@ -5521,11 +5520,6 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) case F_GETLK64: #endif flags = LDLM_FL_TEST_LOCK; - /* - * To work with lockd we should check local lock first, - * else lock_owner could disappear in conflict case. - */ - posix_test_lock(file, &flbuf); break; case F_CANCELLK: CDEBUG(D_DLMTRACE, "F_CANCELLK owner=%llx %llu-%llu\n", @@ -5639,16 +5633,13 @@ out: } if (rc == 0 && (flags & LDLM_FL_TEST_LOCK) && - flbuf.C_FLC_TYPE != file_lock->C_FLC_TYPE) { /* Verify local & remote */ - CERROR("Flock LR mismatch! inode="DFID", flags=%#llx, mode=%u, " - "pid=%u/%u, start=%llu/%llu, end=%llu/%llu,type=%u/%u\n", - PFID(ll_inode2fid(inode)), flags, einfo.ei_mode, - file_lock->C_FLC_PID, flbuf.C_FLC_PID, - file_lock->fl_start, flbuf.fl_start, - file_lock->fl_end, flbuf.fl_end, - file_lock->C_FLC_TYPE, flbuf.C_FLC_TYPE); - /* return local */ - *file_lock = flbuf; + file_lock->C_FLC_TYPE != F_UNLCK) { + struct file_lock flbuf; + + /* Take a extra reference for lockowner while + * working with lockd. + */ + locks_copy_conflock(&flbuf, file_lock); } if (!rc) diff --git a/lustre/tests/flocks_test.c b/lustre/tests/flocks_test.c index fa7fe6c..b734f96 100644 --- a/lustre/tests/flocks_test.c +++ b/lustre/tests/flocks_test.c @@ -830,11 +830,14 @@ static int t6(int argc, char *argv[]) } if (lock.l_type == F_UNLCK) break; - printf("FLOCK %d: RWS:%s POS:%ld LEN:%ld PID:%d\n", - i, fmode2str(lock.l_type), lock.l_start, - lock.l_len, lock.l_pid); + if (i > 0) + printf(";"); + printf("%s%ld,%ld", fmode2str(lock.l_type), + lock.l_start, lock.l_len); lock.l_start += lock.l_len; } + if (lock.l_start > 0) + printf(".\n"); close(fd); if (rc == EXIT_FAILURE) break; @@ -849,7 +852,7 @@ static int t6(int argc, char *argv[]) } } put_fds(); - printf("Time for processing %.03lfs\n", now() - stime); + fprintf(stderr, "Time for processing %.03lfs\n", now() - stime); return rc; } diff --git a/lustre/tests/parallel-scale-nfs.sh b/lustre/tests/parallel-scale-nfs.sh index ce43103..5e3393d 100755 --- a/lustre/tests/parallel-scale-nfs.sh +++ b/lustre/tests/parallel-scale-nfs.sh @@ -143,6 +143,49 @@ test_1() { } run_test 1 "test copy with attributes" +test_2() { + local mp1file=$TESTDIR/file1 + local tmpdir=$(mktemp -d /tmp/nfs-XXXXXX) + local mp2file=$tmpdir/${mp1file#$NFS_CLIMNTPT} + + mount -v -t nfs -o nfsvers=$NFSVERSION,async \ + $LUSTRE_CLIENT_NFSSRV:$NFS_SRVMNTPT $tmpdir ||\ + error "Nfs 2nd mount($tmpdir) error" + + local owc=$(do_node $LUSTRE_CLIENT_NFSSRV \ + "dmesg | grep -v 'DEBUG MARKER:' | grep -c 'refcount_t: underflow; use-after-free'") + (( $owc > 0 )) && do_node $LUSTRE_CLIENT_NFSSRV \ + 'echo 1 > /sys/kernel/debug/clear_warn_once' + + touch $mp1file + local i=0 + for ((i=1; i<=10; i++)) do + [ $i -eq 10 ] && echo "P100" + echo "R$((i * 2)),10" + echo "W$((i * 100)),100" + sleep 1 + done | flocks_test 6 $mp1file & + local pid=$! + for ((i = 0; i < 5; )); do + echo "T0" | flocks_test 6 $mp2file |\ + grep 'R2,26;W100,900.' && i=$((i + 1)) + local nwc=$(do_node $LUSTRE_CLIENT_NFSSRV \ + "dmesg | grep -v 'DEBUG MARKER:' | grep -c 'refcount_t: underflow; use-after-free'") + (( $owc >= $nwc )) || { + do_node $LUSTRE_CLIENT_NFSSRV \ + "dmesg | grep -1 'refcount_t: underflow; use-after-free'" + error "Failed (owc:$owc < nwc:$nwc)" + } + sleep 1 + done + kill -9 $pid + wait + + umount $tmpdir + rm -rf $tmpdir || true +} +run_test 2 "fcntl getlk on nfs shouldn't cause refcount underflow" + test_compilebench() { if [[ "$TESTSUITE" =~ "parallel-scale-nfs" ]]; then skip "LU-12957 and LU-13068: compilebench for $TESTSUITE" -- 1.8.3.1