From: Etienne AUJAMES Date: Wed, 2 Aug 2023 09:27:41 +0000 (+0200) Subject: LU-16235 hsm: get a valid cookie for RAoLU request X-Git-Tag: 2.16.51~177 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=241cf3c6d08277c4a401ec8bd109274123bf9cdf;p=fs%2Flustre-release.git LU-16235 hsm: get a valid cookie for RAoLU request Add a way to get a valid cookie when nobody initializes cdt_last_cookie. RAoLU policy is allowed to queue a remove request with the coordinator stopped. In that cases cdt_last_cookie can not be yet initialize and the remove request can be queued with a conflicting cookie. This patch adds cdt_update_last_cookie() that reverses process the hsm llog and stops at the first non-cancel action to determine the last cookie. Add the regression test sanity-hsm 26e. Test-Parameters: testlist=sanity-hsm Test-Parameters: testlist=sanity-hsm Test-Parameters: testlist=sanity-hsm Test-Parameters: testlist=sanity-hsm env=ONLY=26e,ONLY_REPEAT=30 Signed-off-by: Etienne AUJAMES Signed-off-by: Nikitas Angelinas Change-Id: I6468a24b95fcb8768e12f40edfcea3ce8407281f Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51850 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 5794e95..2dce801 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -1000,9 +1000,9 @@ static int hsm_restore_cb(const struct lu_env *env, larr = (struct llog_agent_req_rec *)hdr; hai = &larr->arr_hai; - if (hai->hai_cookie >= cdt->cdt_last_cookie) { + if (hai->hai_cookie > cdt->cdt_last_cookie) { /* update the cookie to avoid collision */ - cdt->cdt_last_cookie = hai->hai_cookie + 1; + cdt->cdt_last_cookie = hai->hai_cookie; } if (hai->hai_action != HSMA_RESTORE || @@ -1039,8 +1039,9 @@ out: */ static int mdt_hsm_pending_restore(struct mdt_thread_info *mti) { + struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator; struct hsm_restore_data hrd; - int rc; + int rc; ENTRY; hrd.hrd_mti = mti; @@ -1048,7 +1049,14 @@ static int mdt_hsm_pending_restore(struct mdt_thread_info *mti) rc = cdt_llog_process(mti->mti_env, mti->mti_mdt, hsm_restore_cb, &hrd, 0, 0, WRITE); - RETURN(rc); + if (rc < 0) + RETURN(rc); + + /* no pending request found -> start a new session */ + if (!cdt->cdt_last_cookie) + cdt->cdt_last_cookie = ktime_get_real_seconds(); + + RETURN(0); } int hsm_init_ucred(struct lu_ucred *uc) @@ -1228,9 +1236,6 @@ static int mdt_hsm_cdt_start(struct mdt_device *mdt) BUILD_BUG_ON(BIT(CDT_POLICY_SHIFT_COUNT - 1) != CDT_POLICY_LAST); cdt->cdt_policy = CDT_DEFAULT_POLICY; - /* just need to be larger than previous one */ - /* cdt_last_cookie is protected by cdt_llog_lock */ - cdt->cdt_last_cookie = ktime_get_real_seconds(); atomic_set(&cdt->cdt_request_count, 0); atomic_set(&cdt->cdt_archive_count, 0); atomic_set(&cdt->cdt_restore_count, 0); diff --git a/lustre/mdt/mdt_hsm_cdt_actions.c b/lustre/mdt/mdt_hsm_cdt_actions.c index 44f6cb7..d1f3758 100644 --- a/lustre/mdt/mdt_hsm_cdt_actions.c +++ b/lustre/mdt/mdt_hsm_cdt_actions.c @@ -254,6 +254,70 @@ int cdt_llog_process(const struct lu_env *env, struct mdt_device *mdt, } /** + * llog_cat_process() callback, used to find last used cookie. + * The processing ends at the first non-cancel record. + * \param env [IN] environment + * \param llh [IN] llog handle + * \param hdr [IN] llog record + * \param data [IN/OUT] cb data = coordinator + * \retval 0 success + * \retval -ve failure + */ +static int hsm_last_cookie_cb(const struct lu_env *env, struct llog_handle *llh, + struct llog_rec_hdr *hdr, void *data) +{ + struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr; + struct hsm_action_item *hai = &larr->arr_hai; + struct coordinator *cdt = data; + + /* do not stop on cancel, it takes cookie from other request */ + if (hai->hai_action == HSMA_CANCEL) + RETURN(0); + + if (hai->hai_cookie > cdt->cdt_last_cookie) + cdt->cdt_last_cookie = hai->hai_cookie; + + RETURN(LLOG_PROC_BREAK); +} + +/** + * Update the last cookie used by a request. + * \param mti [IN] context + */ +static int cdt_update_last_cookie(const struct lu_env *env, + struct coordinator *cdt) +__must_hold(&cdt->cdt_llog_lock) +{ + struct mdt_device *mdt; + struct obd_device *obd; + struct llog_ctxt *lctxt; + int rc; + + mdt = container_of(cdt, typeof(*mdt), mdt_coordinator); + obd = mdt2obd_dev(mdt); + lctxt = llog_get_context(obd, LLOG_AGENT_ORIG_CTXT); + if (!lctxt || !lctxt->loc_handle) + RETURN(-ENOENT); + + rc = llog_cat_reverse_process(env, lctxt->loc_handle, + hsm_last_cookie_cb, cdt); + + llog_ctxt_put(lctxt); + + if (rc < 0) { + CERROR("%s: failed to process HSM_ACTIONS llog: rc = %d\n", + mdt_obd_name(mdt), rc); + RETURN(rc); + } + + /* no pending request found -> start a new session */ + if (!cdt->cdt_last_cookie) + cdt->cdt_last_cookie = ktime_get_real_seconds(); + + RETURN(0); +} + +/** * add an entry in agent llog * \param env [IN] environment * \param mdt [IN] PDT device @@ -293,18 +357,28 @@ int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt, down_write(&cdt->cdt_llog_lock); + /* If cdt_last_cookie is not set, try to initialize it. + * This is used by RAoLU with non-started coordinator. + */ + if (unlikely(!cdt->cdt_last_cookie)) { + rc = cdt_update_last_cookie(env, cdt); + if (rc < 0) + GOTO(unlock, rc); + } + /* in case of cancel request, the cookie is already set to the * value of the request cookie to be cancelled * so we do not change it */ if (hai->hai_action == HSMA_CANCEL) larr->arr_hai.hai_cookie = hai->hai_cookie; else - larr->arr_hai.hai_cookie = cdt->cdt_last_cookie++; + larr->arr_hai.hai_cookie = ++cdt->cdt_last_cookie; rc = llog_cat_add(env, lctxt->loc_handle, &larr->arr_hdr, NULL); if (rc > 0) rc = 0; +unlock: up_write(&cdt->cdt_llog_lock); llog_ctxt_put(lctxt); diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 7ca0986..f4bf3d2 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -394,6 +394,14 @@ get_request_count() { "awk -vn=0 '/'$fid'.*action='$request'/ {n++}; END {print n}'" } +get_request_cookie() { + local fid=$1 + local request=$2 + + do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.actions |"\ + "awk '/'$fid'.*action='$request'/ {print \\\$6}' | cut -f3 -d/" +} + # Ensure the number of HSM request for a given FID is correct # assert_request_count FID REQUEST_TYPE COUNT [ERROR_MSG] assert_request_count() { @@ -2372,6 +2380,53 @@ test_26d() { } run_test 26d "RAoLU when Client eviction" +test_26e() { + # test needs a running copytool + copytool setup + mkdir_on_mdt0 $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local fid=$(create_small_file $f) + local f2=$DIR/$tdir/$tfile-2 + local fid2=$(create_small_file $f2) + + $LFS hsm_archive $f || error "could not archive file" + wait_request_state $fid ARCHIVE SUCCEED + + kill_copytools + wait_copytools || error "copytool failed to stop" + + $LFS hsm_archive $f2 || error "could not archive file" + wait_request_state $fid2 ARCHIVE WAITING + + local last_cookie=$(( $(get_request_cookie $fid2 ARCHIVE) )) + + stack_trap "cdt_set_mount_state enabled" + cdt_set_mount_state shutdown + + fail mds1 + cdt_check_state stopped + + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" + set_hsm_param remove_archive_on_last_unlink 1 + + rm -f $f + + wait_request_state $fid REMOVE WAITING + + local new_cookie=$(( $(get_request_cookie $fid REMOVE) )) + echo "Check cookie from RAoLU request (last: $last_cookie, remove: $new_cookie)" + (( new_cookie == last_cookie + 1 )) || + error "RAoLU fail to setup a valid cookie ($new_cookie != $last_cookie + 1)" + + cdt_enable + copytool setup + + wait_request_state $fid2 ARCHIVE SUCCEED + wait_request_state $fid REMOVE SUCCEED +} +run_test 26e "RAoLU with a non-started coordinator" + test_27a() { # test needs a running copytool copytool setup