From: Alexander Zarochentsev Date: Mon, 20 May 2024 18:33:18 +0000 (+0000) Subject: LU-17865 osc: fiemap deadlock fix X-Git-Tag: 2.15.64~185 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=97e66947cbbc7a6247a756535b0138e0227ebb25;p=fs%2Flustre-release.git LU-17865 osc: fiemap deadlock fix A fiemap call may deadlock due to wrongly requesting an ldlm lock at server while the same lock is cached and pinned at the client. Two PR lock requests are compatible so the deadlock also needs a concurrent write lock. ll_fiemap_info_key is shared between osc_object_fiemap() calls, once OBD_FL_SRVLOCK flag is set, it is reused for all subsequent RPCs regardless of the local lock caching status. HPE-bug-id: LUS-12353 Signed-off-by: Alexander Zarochentsev Change-Id: I6e76bc5e4549ed887b8f6177432acf90f9ec614d Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55163 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 787853d..816c2f7 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -441,6 +441,7 @@ extern bool obd_enable_health_write; #define OBD_FAIL_OSC_NO_SIZE_DATA 0x415 #define OBD_FAIL_OSC_DELAY_CANCEL 0x416 #define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417 +#define OBD_FAIL_OSC_FIEMAP 0x418 #define OBD_FAIL_OSC_MARK_COMPRESSED 0x419 #define OBD_FAIL_PTLRPC 0x500 diff --git a/lustre/osc/osc_object.c b/lustre/osc/osc_object.c index d9b2290..2b1e90c 100644 --- a/lustre/osc/osc_object.c +++ b/lustre/osc/osc_object.c @@ -282,13 +282,15 @@ static int osc_object_fiemap(const struct lu_env *env, struct cl_object *obj, LDLM_FL_BLOCK_GRANTED | LDLM_FL_LVB_READY, &resid, LDLM_EXTENT, &policy, LCK_PR | LCK_PW, &lockh); + fmkey->lfik_oa.o_valid |= OBD_MD_FLFLAGS; if (mode) { /* lock is cached on client */ + fmkey->lfik_oa.o_flags &= ~OBD_FL_SRVLOCK; if (mode != LCK_PR) { ldlm_lock_addref(&lockh, LCK_PR); ldlm_lock_decref(&lockh, LCK_PW); } + CFS_FAIL_TIMEOUT(OBD_FAIL_OSC_FIEMAP, cfs_fail_val); } else { /* no cached lock, needs acquire lock on server side */ - fmkey->lfik_oa.o_valid |= OBD_MD_FLFLAGS; fmkey->lfik_oa.o_flags |= OBD_FL_SRVLOCK; } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index dbe6116..5cf8fe3 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -15987,6 +15987,29 @@ test_130g() { } run_test 130g "FIEMAP (overstripe file)" +test_130h() { + (( OSTCOUNT < 2 )) && skip_env "need 2 OSTs" + + $LFS setstripe -o 0,1 -S 1M $DIR/$tfile + $LFS getstripe $DIR/$tfile + dd if=/dev/zero of=$DIR/$tfile bs=1M count=2 + $LCTL set_param ldlm.namespaces.*-OST0000-osc-*.lru_size=clear + sleep 1 + local before=$(date +%s) + ##define OBD_FAIL_OSC_FIEMAP 0x418 + $LCTL set_param fail_loc=0x80000418 fail_val=5 + checkfiemap $DIR/$tfile $((2 * 1024 * 1024)) & + sleep 1 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=3 + wait + $LCTL set_param fail_loc=0 fail_val=0 + # check for client eviction + local evict=$($LCTL get_param osc.$FSNAME-OST0001-osc-f*.state | + awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + [ -z "$evict" ] || [[ $evict -le $before ]] || error "eviction happened" +} +run_test 130h "FIEMAP deadlock" + # Test for writev/readv test_131a() { rwv -f $DIR/$tfile -w -n 3 524288 1048576 1572864 ||