Whamcloud - gitweb
LU-15132 hsm: Protect against parallel HSM restore requests
authorEtienne AUJAMES <etienne.aujames@cea.fr>
Thu, 21 Oct 2021 14:31:01 +0000 (16:31 +0200)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 11 Oct 2022 07:48:13 +0000 (07:48 +0000)
Multiple parallel accesses (read/write) to the same released file
could cause multiple HSM restore requests to be sent.
On the MDT side, each restore request waits the first one to complete
before grabbing the MDS_INODELOCK_LAYOUT LCK_EX and registering the
llog record.

This could cause several MDT threads to hang for the same restore
request sent in parallel. In the worst case, all MDT threads can
hang and the MDS is not longer able to handle requests.

This patch checks if an HSM restore handle exists before taking the
lock.

Lustre-change: https://review.whamcloud.com/45367
Lustre-commit: 66b3e74bccf1451d135b7f331459b6af1c06431b

Test-Parameters: testlist=sanity-hsm,sanity-hsm
Test-Parameters: testlist=sanity-hsm env=ONLY=12s,ONLY_REPEAT=50
Signed-off-by: Xing Huang <hxing@ddn.com>
Change-Id: I9584edc2c7411aa41b2e318e55f57c117d1c3dfb
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/48650
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/obd_support.h
lustre/mdt/mdt_coordinator.c
lustre/mdt/mdt_hsm_cdt_client.c
lustre/tests/sanity-hsm.sh

index 10fddf1..d61ab11 100644 (file)
@@ -268,6 +268,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_DQACQ_NET           0x187
 #define OBD_FAIL_MDS_STRIPE_CREATE      0x188
 #define OBD_FAIL_MDS_STRIPE_FID                 0x189
+#define OBD_FAIL_MDS_HSM_RESTORE_RACE   0x18b
 
 /* OI scrub */
 #define OBD_FAIL_OSD_SCRUB_DELAY                       0x190
index f730d84..8bf197e 100644 (file)
@@ -488,6 +488,9 @@ static void mdt_hsm_cdt_cleanup(struct mdt_device *mdt)
        mutex_lock(&cdt->cdt_restore_lock);
        list_for_each_entry_safe(crh, tmp3, &cdt->cdt_restore_handle_list,
                                 crh_list) {
+               /* not locked yet, cleanup by cdt_restore_handle_add() */
+               if (crh->crh_lh.mlh_type == MDT_NUL_LOCK)
+                       continue;
                list_del(&crh->crh_list);
                /* give back layout lock */
                mdt_object_unlock(cdt_mti, NULL, &crh->crh_lh, 1);
@@ -776,10 +779,21 @@ clean_cb_alloc:
        RETURN(rc);
 }
 
+/**
+ * register a new HSM restore handle for a file and take EX lock on the layout
+ * \param mti [IN] thread info
+ * \param cdt [IN] coordinator
+ * \param fid [IN] fid of the file to restore
+ * \param he  [IN] HSM extent
+ * \retval 0 success
+ * \retval 1 restore handle already exists for the fid
+ * \retval -ve failure
+ */
 int cdt_restore_handle_add(struct mdt_thread_info *mti, struct coordinator *cdt,
                           const struct lu_fid *fid,
                           const struct hsm_extent *he)
 {
+       struct mdt_lock_handle lh = { 0 };
        struct cdt_restore_handle *crh;
        struct mdt_object *obj;
        int rc;
@@ -796,31 +810,48 @@ int cdt_restore_handle_add(struct mdt_thread_info *mti, struct coordinator *cdt,
         */
        crh->crh_extent.start = 0;
        crh->crh_extent.end = he->length;
+       crh->crh_lh.mlh_type = MDT_NUL_LOCK;
+
+       mutex_lock(&cdt->cdt_restore_lock);
+       if (cdt_restore_handle_find(cdt, fid) != NULL)
+               GOTO(out_crl, rc = 1);
+
+       if (unlikely(cdt->cdt_state == CDT_STOPPED ||
+                    cdt->cdt_state == CDT_STOPPING))
+               GOTO(out_crl, rc = -EAGAIN);
+
+       list_add_tail(&crh->crh_list, &cdt->cdt_restore_handle_list);
+       mutex_unlock(&cdt->cdt_restore_lock);
+
        /* get the layout lock */
-       mdt_lock_reg_init(&crh->crh_lh, LCK_EX);
-       obj = mdt_object_find_lock(mti, &crh->crh_fid, &crh->crh_lh,
+       mdt_lock_reg_init(&lh, LCK_EX);
+       obj = mdt_object_find_lock(mti, &crh->crh_fid, &lh,
                                   MDS_INODELOCK_LAYOUT);
-       if (IS_ERR(obj))
-               GOTO(out_crh, rc = PTR_ERR(obj));
+       if (IS_ERR(obj)) {
+               mutex_lock(&cdt->cdt_restore_lock);
+               GOTO(out_ldel, rc = PTR_ERR(obj));
+       }
 
        /* We do not keep a reference on the object during the restore
-        * which can be very long. */
+        * which can be very long.
+        */
        mdt_object_put(mti->mti_env, obj);
 
        mutex_lock(&cdt->cdt_restore_lock);
        if (unlikely(cdt->cdt_state == CDT_STOPPED ||
-                    cdt->cdt_state == CDT_STOPPING)) {
-               mutex_unlock(&cdt->cdt_restore_lock);
+                    cdt->cdt_state == CDT_STOPPING))
                GOTO(out_lh, rc = -EAGAIN);
-       }
 
-       list_add_tail(&crh->crh_list, &cdt->cdt_restore_handle_list);
+       crh->crh_lh = lh;
        mutex_unlock(&cdt->cdt_restore_lock);
 
        RETURN(0);
 out_lh:
        mdt_object_unlock(mti, NULL, &crh->crh_lh, 1);
-out_crh:
+out_ldel:
+       list_del(&crh->crh_list);
+out_crl:
+       mutex_unlock(&cdt->cdt_restore_lock);
        OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
 
        return rc;
@@ -928,6 +959,8 @@ static int hsm_restore_cb(const struct lu_env *env,
        }
 
        rc = cdt_restore_handle_add(mti, cdt, &hai->hai_fid, &hai->hai_extent);
+       if (rc == 1)
+               rc = 0;
 out:
        RETURN(rc);
 }
index 5cc2c35..2e92e62 100644 (file)
@@ -365,8 +365,16 @@ static int mdt_hsm_register_hal(struct mdt_thread_info *mti,
                        if (hai->hai_extent.offset != 0)
                                GOTO(out, rc = -EPROTO);
 
+                       /* LU-15132 */
+                       OBD_RACE(OBD_FAIL_MDS_HSM_RESTORE_RACE);
+
                        rc = cdt_restore_handle_add(mti, cdt, &hai->hai_fid,
                                                    &hai->hai_extent);
+                       if (rc == 1) {
+                               rc = 0;
+                               continue;
+                       }
+
                        if (rc < 0)
                                GOTO(out, rc);
                }
index a65d698..f8715ee 100755 (executable)
@@ -1392,6 +1392,37 @@ test_12r() {
 }
 run_test 12r "lseek restores released file"
 
+test_12s() {
+       local f=$DIR/$tdir/$tfile
+       local fid
+       local pid1 pid2
+
+       (( MDS1_VERSION >= $(version_code 2.15.50) )) ||
+               skip "Need MDS version newer than 2.15.50"
+
+       # test needs a running copytool
+       copytool setup
+
+       mkdir_on_mdt0 $DIR/$tdir
+       fid=$(copy_file /etc/hosts $f)
+
+       $LFS hsm_archive $f || error "archive of $f failed"
+       wait_request_state $fid ARCHIVE SUCCEED
+       $LFS hsm_release $f || error "release of $f failed"
+
+#define OBD_FAIL_ONCE|OBD_FAIL_MDS_HSM_RESTORE_RACE 0x8000018b
+       do_facet mds1 $LCTL set_param fail_loc=0x8000018b
+       cat $f > /dev/null & pid1=$!
+       cat $f > /dev/null & pid2=$!
+
+       wait $pid1 || error "cat process 1 fail (pid: $pid1)"
+       wait $pid2 || error "cat process 2 fail (pid: $pid2)"
+
+       # Race exists if more than 1 restore requests is registered
+       assert_request_count $fid RESTORE 1
+}
+run_test 12s "race between restore requests"
+
 test_13() {
        local -i i j k=0
        for i in {1..10}; do