From: Etienne AUJAMES Date: Wed, 22 Feb 2023 10:37:49 +0000 (+0100) Subject: LU-16571 utils: fix parallel "lfs migrate -b" on hard links X-Git-Tag: 2.15.55~122 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=2310f4b8a6b6050cccedd4982ce80aa1cfbd3fe1;p=fs%2Flustre-release.git LU-16571 utils: fix parallel "lfs migrate -b" on hard links Multiple blocking "lfs migrate" on the same file can exhaust "ost" service threads of an OSS CPT. llapi_get_data_version(...,LL_DV_RD_FLUSH) causes the OSS server to take a server-side extent lock PR to force clients with write lock to update the data version of the object. migrate_block() (lfs.c) checks the file data version is check with LL_DV_RD_FLUSH before taking the group lock. So "ofd_getattr_hdl()" server side lock will conflict with the lfs instance that has the group lock. Each attempt to get server-side extent lock will take an "ost" service thread slot waiting the group lock to be released. If all threads of the "ost" servive are exhausted on a CPT, the OSS can not handle requests from the client and it will get queued inside the NRS policy. This causes the lfs process with the group lock to hang (pread needs "ost" service to get sizes of objects). This patch check the file data version inside the group lock without LL_DV_RD_FLUSH. This flag is not needed, the client already has an extent group lock on all the OST objects. Add the regression test sanity 56xj. Test-Parameters: testlist=sanity env=ONLY=56xj,ONLY_REPEAT=20 Test-Parameters: testlist=sanity env=ONLY=56 Test-Parameters: testlist=sanity env=ONLY=56 Test-Parameters: testlist=sanity env=ONLY=56 Signed-off-by: Etienne AUJAMES Change-Id: I0bacd372dd6f36a4ac776133dff45dc836c7c7f7 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50113 Reviewed-by: Andreas Dilger Reviewed-by: Patrick Farrell Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 375f7df..ad3fa8a 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8212,6 +8212,44 @@ test_56xi() { } run_test 56xi "lfs migrate stats support" +test_56xj() { # LU-16571 "lfs migrate -b" can cause thread starvation on OSS + (( $OSTCOUNT >= 2 )) || skip "needs >= 2 OSTs" + + local file=$DIR/$tfile + local linkdir=$DIR/$tdir + + test_mkdir $linkdir || error "fail to create $linkdir" + $LFS setstripe -i 0 -c 1 -S1M $file + dd if=/dev/urandom of=$file bs=1M count=10 || + error "fail to create $file" + + # Create file links + local cpts + local threads_max + local nlinks + + thread_max=$(do_facet ost1 "$LCTL get_param -n ost.OSS.ost.threads_max") + cpts=$(do_facet ost1 "$LCTL get_param -n cpu_partition_table | wc -l") + (( nlinks = thread_max * 3 / 2 / cpts)) + + echo "create $nlinks hard links of $file" + createmany -l $file $linkdir/link $nlinks + + # Parallel migrates (should not block) + local i + for ((i = 0; i < nlinks; i++)); do + echo $linkdir/link$i + done | xargs -n1 -P $nlinks $LFS migrate -c2 + + local stripe_count + stripe_count=$($LFS getstripe -c $file) || + error "fail to get stripe count on $file" + + ((stripe_count == 2)) || + error "fail to migrate $file (stripe_count = $stripe_count)" +} +run_test 56xj "lfs migrate -b should not cause starvation of threads on OSS" + test_56y() { [ $MDS1_VERSION -lt $(version_code 2.4.53) ] && skip "No HSM $(lustre_build_version $SINGLEMDS) MDS < 2.4.53" diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 47e6541..071285c 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -1023,31 +1023,34 @@ static int migrate_block(int fd, int fdv, int rc; int rc2; - rc = fstat(fd, &st); - if (rc < 0) { - error_loc = "cannot stat source file"; - return -errno; - } + do + gid = random(); + while (gid == 0); - rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH); + + /* The grouplock blocks all concurrent accesses to the file. */ + rc = llapi_group_lock(fd, gid); if (rc < 0) { - error_loc = "cannot get dataversion"; + error_loc = "cannot get group lock"; return rc; } - do - gid = random(); - while (gid == 0); + rc = fstat(fd, &st); + if (rc < 0) { + error_loc = "cannot stat source file"; + rc = -errno; + goto out_unlock; + } /* - * The grouplock blocks all concurrent accesses to the file. - * It has to be taken after llapi_get_data_version as it would - * block it too. + * LL_DV_RD_FLUSH should not be set, otherwise the servers will try to + * get extent locks on the OST objects. This will conflict with our + * extent group locks. */ - rc = llapi_group_lock(fd, gid); + rc = llapi_get_data_version(fd, &dv1, 0); if (rc < 0) { - error_loc = "cannot get group lock"; - return rc; + error_loc = "cannot get dataversion"; + goto out_unlock; } rc = migrate_copy_data(fd, fdv, NULL, bandwidth_bytes_sec,