From b13a5b351e71e4e7b0016e2a1426b1a21bf783c4 Mon Sep 17 00:00:00 2001 From: Aurelien Degremont Date: Mon, 26 Sep 2022 12:27:37 +0000 Subject: [PATCH] LU-16188 mdt: fix incompatible HSM request handling When the coordinator tries to send multiple hsm actions in a single request, if one of the request fails incompat checks all the requests are marked as STARTED but none of the requests are sent to the agent. Return EAGAIN from mdt_agent_hsm_send() so that the coordinator would not mark the requests as STARTED. It would retry them later. Add a sanity-hsm test. Test-Parameters: trivial testlist=sanity-hsm Change-Id: Id4fb858021be6dc6b0cbcf140c3f2051efce57ad Signed-off-by: Jeya Ganesh Babu Jegatheesan Signed-off-by: Aurelien Degremont Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48658 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Sergey Cheremencev Reviewed-by: Nikitas Angelinas Reviewed-by: Etienne AUJAMES Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_hsm_cdt_agent.c | 2 +- lustre/tests/sanity-hsm.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lustre/mdt/mdt_hsm_cdt_agent.c b/lustre/mdt/mdt_hsm_cdt_agent.c index 3e51788..e72e3da 100644 --- a/lustre/mdt/mdt_hsm_cdt_agent.c +++ b/lustre/mdt/mdt_hsm_cdt_agent.c @@ -534,7 +534,7 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti, * So no need the rebuild a full valid HAL now */ if (fail_request) - GOTO(out_buf, rc = 0); + GOTO(out_buf, rc = -EAGAIN); /* Cancel memory registration is useless for purge * non registration avoid a deadlock : diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index b8cc1af..974cafa 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -3909,6 +3909,33 @@ test_113() { } run_test 113 "wrong stat after restore" +test_114() { + mkdir_on_mdt0 $DIR/$tdir + + local f1=$DIR/$tdir/${tfile}1 + local f2=$DIR/$tdir/${tfile}2 + local fid1=$(create_empty_file "$f1") + local fid2=$(create_empty_file "$f2") + + copytool setup + + # Prevent archive from completing + cdt_disable + + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1 $f2 + # wait archive to register at CDT + wait_request_state "$fid1" ARCHIVE WAITING + + # Set f2 in an incompatible state + $LFS hsm_set --noarchive $f2 + + cdt_enable + + wait_request_state "$fid1" ARCHIVE SUCCEED + wait_request_state "$fid2" ARCHIVE FAILED +} +run_test 114 "Incompatible request does not set other requests as STARTED" + test_200() { mkdir_on_mdt0 $DIR/$tdir -- 1.8.3.1