Whamcloud - gitweb
LU-15132 hsm: Protect against parallel HSM restore requests 67/45367/15
authorEtienne AUJAMES <etienne.aujames@cea.fr>
Thu, 21 Oct 2021 14:31:01 +0000 (16:31 +0200)
committerOleg Drokin <green@whamcloud.com>
Mon, 18 Jul 2022 05:34:17 +0000 (05:34 +0000)
Multiple parallel accesses (read/write) to the same released file
could cause multiple HSM restore requests to be sent.
On the MDT side, each restore request waits the first one to complete
before grabbing the MDS_INODELOCK_LAYOUT LCK_EX and registering the
llog record.

This could cause several MDT threads to hang for the same restore
request sent in parallel. In the worst case, all MDT threads can
hang and the MDS is not longer able to handle requests.

This patch checks if an HSM restore handle exists before taking the
lock.

Test-Parameters: testlist=sanity-hsm,sanity-hsm
Test-Parameters: testlist=sanity-hsm env=ONLY=12s,ONLY_REPEAT=50
Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: I9584edc2c7411aa41b2e318e55f57c117d1c3dfb
Reviewed-on: https://review.whamcloud.com/45367
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Reviewed-by: Nikitas Angelinas <nikitas.angelinas@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/mdt/mdt_coordinator.c
lustre/mdt/mdt_hsm_cdt_client.c
lustre/tests/sanity-hsm.sh

index f6aea9a..96cd590 100644 (file)
@@ -267,6 +267,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_STRIPE_CREATE      0x188
 #define OBD_FAIL_MDS_STRIPE_FID                 0x189
 #define OBD_FAIL_MDS_LINK_RENAME_RACE   0x18a
+#define OBD_FAIL_MDS_HSM_RESTORE_RACE   0x18b
 
 /* OI scrub */
 #define OBD_FAIL_OSD_SCRUB_DELAY                       0x190
index d0885d0..b321d94 100644 (file)
@@ -488,6 +488,9 @@ static void mdt_hsm_cdt_cleanup(struct mdt_device *mdt)
        mutex_lock(&cdt->cdt_restore_lock);
        list_for_each_entry_safe(crh, tmp3, &cdt->cdt_restore_handle_list,
                                 crh_list) {
+               /* not locked yet, cleanup by cdt_restore_handle_add() */
+               if (crh->crh_lh.mlh_type == MDT_NUL_LOCK)
+                       continue;
                list_del(&crh->crh_list);
                /* give back layout lock */
                mdt_object_unlock(cdt_mti, NULL, &crh->crh_lh, 1);
@@ -801,10 +804,21 @@ clean_cb_alloc:
        RETURN(rc);
 }
 
+/**
+ * register a new HSM restore handle for a file and take EX lock on the layout
+ * \param mti [IN] thread info
+ * \param cdt [IN] coordinator
+ * \param fid [IN] fid of the file to restore
+ * \param he  [IN] HSM extent
+ * \retval 0 success
+ * \retval 1 restore handle already exists for the fid
+ * \retval -ve failure
+ */
 int cdt_restore_handle_add(struct mdt_thread_info *mti, struct coordinator *cdt,
                           const struct lu_fid *fid,
                           const struct hsm_extent *he)
 {
+       struct mdt_lock_handle lh = { 0 };
        struct cdt_restore_handle *crh;
        struct mdt_object *obj;
        int rc;
@@ -821,31 +835,48 @@ int cdt_restore_handle_add(struct mdt_thread_info *mti, struct coordinator *cdt,
         */
        crh->crh_extent.start = 0;
        crh->crh_extent.end = he->length;
+       crh->crh_lh.mlh_type = MDT_NUL_LOCK;
+
+       mutex_lock(&cdt->cdt_restore_lock);
+       if (cdt_restore_handle_find(cdt, fid) != NULL)
+               GOTO(out_crl, rc = 1);
+
+       if (unlikely(cdt->cdt_state == CDT_STOPPED ||
+                    cdt->cdt_state == CDT_STOPPING))
+               GOTO(out_crl, rc = -EAGAIN);
+
+       list_add_tail(&crh->crh_list, &cdt->cdt_restore_handle_list);
+       mutex_unlock(&cdt->cdt_restore_lock);
+
        /* get the layout lock */
-       mdt_lock_reg_init(&crh->crh_lh, LCK_EX);
-       obj = mdt_object_find_lock(mti, &crh->crh_fid, &crh->crh_lh,
+       mdt_lock_reg_init(&lh, LCK_EX);
+       obj = mdt_object_find_lock(mti, &crh->crh_fid, &lh,
                                   MDS_INODELOCK_LAYOUT);
-       if (IS_ERR(obj))
-               GOTO(out_crh, rc = PTR_ERR(obj));
+       if (IS_ERR(obj)) {
+               mutex_lock(&cdt->cdt_restore_lock);
+               GOTO(out_ldel, rc = PTR_ERR(obj));
+       }
 
        /* We do not keep a reference on the object during the restore
-        * which can be very long. */
+        * which can be very long.
+        */
        mdt_object_put(mti->mti_env, obj);
 
        mutex_lock(&cdt->cdt_restore_lock);
        if (unlikely(cdt->cdt_state == CDT_STOPPED ||
-                    cdt->cdt_state == CDT_STOPPING)) {
-               mutex_unlock(&cdt->cdt_restore_lock);
+                    cdt->cdt_state == CDT_STOPPING))
                GOTO(out_lh, rc = -EAGAIN);
-       }
 
-       list_add_tail(&crh->crh_list, &cdt->cdt_restore_handle_list);
+       crh->crh_lh = lh;
        mutex_unlock(&cdt->cdt_restore_lock);
 
        RETURN(0);
 out_lh:
        mdt_object_unlock(mti, NULL, &crh->crh_lh, 1);
-out_crh:
+out_ldel:
+       list_del(&crh->crh_list);
+out_crl:
+       mutex_unlock(&cdt->cdt_restore_lock);
        OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
 
        return rc;
@@ -953,6 +984,8 @@ static int hsm_restore_cb(const struct lu_env *env,
        }
 
        rc = cdt_restore_handle_add(mti, cdt, &hai->hai_fid, &hai->hai_extent);
+       if (rc == 1)
+               rc = 0;
 out:
        RETURN(rc);
 }
index 9f78796..4e837d6 100644 (file)
@@ -365,8 +365,16 @@ static int mdt_hsm_register_hal(struct mdt_thread_info *mti,
                        if (hai->hai_extent.offset != 0)
                                GOTO(out, rc = -EPROTO);
 
+                       /* LU-15132 */
+                       OBD_RACE(OBD_FAIL_MDS_HSM_RESTORE_RACE);
+
                        rc = cdt_restore_handle_add(mti, cdt, &hai->hai_fid,
                                                    &hai->hai_extent);
+                       if (rc == 1) {
+                               rc = 0;
+                               continue;
+                       }
+
                        if (rc < 0)
                                GOTO(out, rc);
                }
index 1830789..b234123 100755 (executable)
@@ -1384,6 +1384,37 @@ test_12r() {
 }
 run_test 12r "lseek restores released file"
 
+test_12s() {
+       local f=$DIR/$tdir/$tfile
+       local fid
+       local pid1 pid2
+
+       (( MDS1_VERSION >= $(version_code 2.15.50) )) ||
+               skip "Need MDS version newer than 2.15.50"
+
+       # test needs a running copytool
+       copytool setup
+
+       mkdir_on_mdt0 $DIR/$tdir
+       fid=$(copy_file /etc/hosts $f)
+
+       $LFS hsm_archive $f || error "archive of $f failed"
+       wait_request_state $fid ARCHIVE SUCCEED
+       $LFS hsm_release $f || error "release of $f failed"
+
+#define OBD_FAIL_ONCE|OBD_FAIL_MDS_HSM_RESTORE_RACE 0x8000018b
+       do_facet mds1 $LCTL set_param fail_loc=0x8000018b
+       cat $f > /dev/null & pid1=$!
+       cat $f > /dev/null & pid2=$!
+
+       wait $pid1 || error "cat process 1 fail (pid: $pid1)"
+       wait $pid2 || error "cat process 2 fail (pid: $pid2)"
+
+       # Race exists if more than 1 restore requests is registered
+       assert_request_count $fid RESTORE 1
+}
+run_test 12s "race between restore requests"
+
 test_13() {
        local -i i j k=0
        for i in {1..10}; do