From 39862136c3cfee127c4b0a9604ff12f560af3124 Mon Sep 17 00:00:00 2001 From: Nikitas Angelinas Date: Wed, 24 Jul 2019 02:43:53 -0700 Subject: [PATCH] LU-11675 hsm: don't allow new HSM requests during CDT_INIT When the HSM CDT is shut down and restarted, it resets cdt_last_cookie using ktime_get_real_seconds() and examines the CDT llog for existing requests, in order to set cdt_last_cookie to the highest known value, so that newly-assigned cookies are unique. There is a window between CDT_INIT and CDT_RUNNING during which new requests can arrive, and if the CDT llog has not been fully examined, cookies can be reused. This can cause the following two assertions to be triggered in cdt_agent_record_hash_add(): LASSERT(carl0->carl_cat_idx == carl1->carl_cat_idx); LASSERT(carl0->carl_rec_idx == carl1->carl_rec_idx); Fix this by not allowing new HSM requests during CDT_INIT. Also, cookie values are incremented on a separate line, which causes one value to be skipped at CDT startup time. This is not an issue, but there does not seem to be a need for it; fix this post-incrementing and assigning cookie values in the same line. Signed-off-by: Nikitas Angelinas Cray-bug-id: LUS-6589 Test-Parameters: trivial testlist=sanity-hsm Change-Id: I18a1c3e85de6c50a9bf1ce598e21d83d893ad0ca Reviewed-on: https://review.whamcloud.com/33671 Reviewed-by: Quentin Bouget Reviewed-by: Andriy Skulysh Reviewed-by: Ben Evans Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_coordinator.c | 3 ++- lustre/mdt/mdt_hsm_cdt_actions.c | 8 +++----- lustre/mdt/mdt_hsm_cdt_client.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 723924d..e1101e7 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -876,9 +876,10 @@ static int hsm_restore_cb(const struct lu_env *env, larr = (struct llog_agent_req_rec *)hdr; hai = &larr->arr_hai; - if (hai->hai_cookie > cdt->cdt_last_cookie) + if (hai->hai_cookie >= cdt->cdt_last_cookie) { /* update the cookie to avoid collision */ cdt->cdt_last_cookie = hai->hai_cookie + 1; + } if (hai->hai_action != HSMA_RESTORE || agent_req_in_final_state(larr->arr_status)) diff --git a/lustre/mdt/mdt_hsm_cdt_actions.c b/lustre/mdt/mdt_hsm_cdt_actions.c index c80fb4b..cd2c247 100644 --- a/lustre/mdt/mdt_hsm_cdt_actions.c +++ b/lustre/mdt/mdt_hsm_cdt_actions.c @@ -289,12 +289,10 @@ int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt, /* in case of cancel request, the cookie is already set to the * value of the request cookie to be cancelled * so we do not change it */ - if (hai->hai_action == HSMA_CANCEL) { + if (hai->hai_action == HSMA_CANCEL) larr->arr_hai.hai_cookie = hai->hai_cookie; - } else { - cdt->cdt_last_cookie++; - larr->arr_hai.hai_cookie = cdt->cdt_last_cookie; - } + else + larr->arr_hai.hai_cookie = cdt->cdt_last_cookie++; rc = llog_cat_add(env, lctxt->loc_handle, &larr->arr_hdr, NULL); if (rc > 0) diff --git a/lustre/mdt/mdt_hsm_cdt_client.c b/lustre/mdt/mdt_hsm_cdt_client.c index b0c01c9..705e98eb 100644 --- a/lustre/mdt/mdt_hsm_cdt_client.c +++ b/lustre/mdt/mdt_hsm_cdt_client.c @@ -420,7 +420,7 @@ int mdt_hsm_add_actions(struct mdt_thread_info *mti, ENTRY; /* no coordinator started, so we cannot serve requests */ - if (cdt->cdt_state == CDT_STOPPED) + if (cdt->cdt_state == CDT_STOPPED || cdt->cdt_state == CDT_INIT) RETURN(-EAGAIN); if (!hal_is_sane(hal)) -- 1.8.3.1