From b6ca2afe07966cb536a77adf83001fb328de4bb0 Mon Sep 17 00:00:00 2001 From: Nikitas Angelinas Date: Wed, 24 Jul 2019 02:43:53 -0700 Subject: [PATCH] LU-11675 hsm: don't allow new HSM requests during CDT_INIT When the HSM CDT is shut down and restarted, it resets cdt_last_cookie using ktime_get_real_seconds() and examines the CDT llog for existing requests, in order to set cdt_last_cookie to the highest known value, so that newly-assigned cookies are unique. There is a window between CDT_INIT and CDT_RUNNING during which new requests can arrive, and if the CDT llog has not been fully examined, cookies can be reused. This can cause the following two assertions to be triggered in cdt_agent_record_hash_add(): LASSERT(carl0->carl_cat_idx == carl1->carl_cat_idx); LASSERT(carl0->carl_rec_idx == carl1->carl_rec_idx); Fix this by not allowing new HSM requests during CDT_INIT. Also, cookie values are incremented on a separate line, which causes one value to be skipped at CDT startup time. This is not an issue, but there does not seem to be a need for it; fix this post-incrementing and assigning cookie values in the same line. Lustre-change: https://review.whamcloud.com/33671 Lustre-commit: 39862136c3cfee127c4b0a9604ff12f560af3124 Signed-off-by: Nikitas Angelinas Cray-bug-id: LUS-6589 Test-Parameters: trivial testlist=sanity-hsm Change-Id: I18a1c3e85de6c50a9bf1ce598e21d83d893ad0ca Reviewed-by: Quentin Bouget Reviewed-by: Andriy Skulysh Reviewed-by: Ben Evans Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/36212 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_coordinator.c | 3 ++- lustre/mdt/mdt_hsm_cdt_actions.c | 8 +++----- lustre/mdt/mdt_hsm_cdt_client.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 7548598..50373e1 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -925,9 +925,10 @@ static int hsm_restore_cb(const struct lu_env *env, larr = (struct llog_agent_req_rec *)hdr; hai = &larr->arr_hai; - if (hai->hai_cookie > cdt->cdt_last_cookie) + if (hai->hai_cookie >= cdt->cdt_last_cookie) { /* update the cookie to avoid collision */ cdt->cdt_last_cookie = hai->hai_cookie + 1; + } if (hai->hai_action != HSMA_RESTORE || agent_req_in_final_state(larr->arr_status)) diff --git a/lustre/mdt/mdt_hsm_cdt_actions.c b/lustre/mdt/mdt_hsm_cdt_actions.c index 93c4a05..8381101 100644 --- a/lustre/mdt/mdt_hsm_cdt_actions.c +++ b/lustre/mdt/mdt_hsm_cdt_actions.c @@ -289,12 +289,10 @@ int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt, /* in case of cancel request, the cookie is already set to the * value of the request cookie to be cancelled * so we do not change it */ - if (hai->hai_action == HSMA_CANCEL) { + if (hai->hai_action == HSMA_CANCEL) larr->arr_hai.hai_cookie = hai->hai_cookie; - } else { - cdt->cdt_last_cookie++; - larr->arr_hai.hai_cookie = cdt->cdt_last_cookie; - } + else + larr->arr_hai.hai_cookie = cdt->cdt_last_cookie++; rc = llog_cat_add(env, lctxt->loc_handle, &larr->arr_hdr, NULL); if (rc > 0) diff --git a/lustre/mdt/mdt_hsm_cdt_client.c b/lustre/mdt/mdt_hsm_cdt_client.c index b0c01c9..705e98eb 100644 --- a/lustre/mdt/mdt_hsm_cdt_client.c +++ b/lustre/mdt/mdt_hsm_cdt_client.c @@ -420,7 +420,7 @@ int mdt_hsm_add_actions(struct mdt_thread_info *mti, ENTRY; /* no coordinator started, so we cannot serve requests */ - if (cdt->cdt_state == CDT_STOPPED) + if (cdt->cdt_state == CDT_STOPPED || cdt->cdt_state == CDT_INIT) RETURN(-EAGAIN); if (!hal_is_sane(hal)) -- 1.8.3.1