Whamcloud - gitweb
LU-10684 tests: skip recovery-small 110[h-j]
[fs/lustre-release.git] / lustre / mdt / mdt_hsm_cdt_client.c
index a2be524..c8f4d1e 100644 (file)
@@ -23,7 +23,7 @@
  * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
  *     alternatives
  *
- * Copyright (c) 2013, 2014, Intel Corporation.
+ * Copyright (c) 2013, 2017, Intel Corporation.
  */
 /*
  * lustre/mdt/mdt_hsm_cdt_client.c
@@ -281,48 +281,22 @@ hsm_action_permission(struct mdt_thread_info *mti,
        RETURN(*mask & (1UL << hsma) ? 0 : -EPERM);
 }
 
-/*
- * Coordinator external API
- */
-
-/**
- * register a list of requests
- * \param mti [IN]
- * \param hal [IN] list of requests
- * \retval 0 success
- * \retval -ve failure
- * in case of restore, caller must hold layout lock
- */
-int mdt_hsm_add_actions(struct mdt_thread_info *mti,
-                       struct hsm_action_list *hal)
+/* Process a single HAL. hsm_find_compatible has already been called
+ * on it. */
+static int mdt_hsm_register_hal(struct mdt_thread_info *mti,
+                               struct mdt_device *mdt,
+                               struct coordinator *cdt,
+                               struct hsm_action_list *hal)
 {
-       struct mdt_device       *mdt = mti->mti_mdt;
-       struct coordinator      *cdt = &mdt->mdt_coordinator;
        struct hsm_action_item  *hai;
        struct mdt_object       *obj = NULL;
-       int                      rc = 0, i;
+       int                      rc, i;
        struct md_hsm            mh;
        bool                     is_restore = false;
        __u64                    compound_id;
-       ENTRY;
-
-       /* no coordinator started, so we cannot serve requests */
-       if (cdt->cdt_state == CDT_STOPPED)
-               RETURN(-EAGAIN);
-
-       if (!hal_is_sane(hal))
-               RETURN(-EINVAL);
 
        compound_id = atomic_inc_return(&cdt->cdt_compound_id);
 
-       /* search for compatible request, if found hai_cookie is set
-        * to the request cookie
-        * it is also used to set the cookie for cancel request by FID
-        */
-       rc = hsm_find_compatible(mti->mti_env, mdt, hal);
-       if (rc)
-               GOTO(out, rc);
-
        hai = hai_first(hal);
        for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
                int archive_id;
@@ -411,45 +385,24 @@ int mdt_hsm_add_actions(struct mdt_thread_info *mti,
 
                /* if restore, take an exclusive lock on layout */
                if (hai->hai_action == HSMA_RESTORE) {
-                       struct cdt_restore_handle *crh;
-
                        /* in V1 only whole file is supported. */
                        if (hai->hai_extent.offset != 0)
                                GOTO(out, rc = -EPROTO);
 
-                       OBD_SLAB_ALLOC_PTR(crh, mdt_hsm_cdt_kmem);
-                       if (crh == NULL)
-                               GOTO(out, rc = -ENOMEM);
-
-                       crh->crh_fid = hai->hai_fid;
-                       /* in V1 only whole file is supported. However the
-                        * restore may be due to truncate. */
-                       crh->crh_extent.start = 0;
-                       crh->crh_extent.end = hai->hai_extent.length;
-
-                       mdt_lock_reg_init(&crh->crh_lh, LCK_EX);
-                       obj = mdt_object_find_lock(mti, &crh->crh_fid,
-                                                  &crh->crh_lh,
-                                                  MDS_INODELOCK_LAYOUT);
-                       if (IS_ERR(obj)) {
-                               rc = PTR_ERR(obj);
-                               CERROR("%s: cannot take layout lock for "
-                                      DFID": rc = %d\n", mdt_obd_name(mdt),
-                                      PFID(&crh->crh_fid), rc);
-                               OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
+                       rc = cdt_restore_handle_add(mti, cdt, &hai->hai_fid,
+                                                   &hai->hai_extent);
+                       if (rc < 0)
                                GOTO(out, rc);
-                       }
-
-                       /* we choose to not keep a keep a reference
-                        * on the object during the restore time which can be
-                        * very long */
-                       mdt_object_put(mti->mti_env, obj);
-
-                       mutex_lock(&cdt->cdt_restore_lock);
-                       list_add_tail(&crh->crh_list, &cdt->cdt_restore_hdl);
-                       mutex_unlock(&cdt->cdt_restore_lock);
                }
 record:
+               /*
+                * Wait here to catch the 2nd RESTORE request to the same FID.
+                * Normally layout lock protects against adding such request.
+                * But when cdt is stopping it cancel all locks via
+                * ldlm_resource_clean and protections may not work.
+                * See LU-9266 and sanity-hsm_407 for details.
+                */
+               OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_HSM_CDT_DELAY, cfs_fail_val);
                /* record request */
                rc = mdt_agent_record_add(mti->mti_env, mdt, compound_id,
                                          archive_id, flags, hai);
@@ -463,10 +416,53 @@ record:
                rc = 0;
 
        GOTO(out, rc);
+
 out:
-       /* if work has been added, wake up coordinator */
+       return rc;
+}
+
+/*
+ * Coordinator external API
+ */
+
+/**
+ * register a list of requests
+ * \param mti [IN]
+ * \param hal [IN] list of requests
+ * \retval 0 success
+ * \retval -ve failure
+ * in case of restore, caller must hold layout lock
+ */
+int mdt_hsm_add_actions(struct mdt_thread_info *mti,
+                       struct hsm_action_list *hal)
+{
+       struct mdt_device       *mdt = mti->mti_mdt;
+       struct coordinator      *cdt = &mdt->mdt_coordinator;
+       int                      rc;
+       ENTRY;
+
+       /* no coordinator started, so we cannot serve requests */
+       if (cdt->cdt_state == CDT_STOPPED)
+               RETURN(-EAGAIN);
+
+       if (!hal_is_sane(hal))
+               RETURN(-EINVAL);
+
+       /* search for compatible request, if found hai_cookie is set
+        * to the request cookie
+        * it is also used to set the cookie for cancel request by FID
+        */
+       rc = hsm_find_compatible(mti->mti_env, mdt, hal);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = mdt_hsm_register_hal(mti, mdt, cdt, hal);
+
+       GOTO(out, rc);
+out:
+       /* if work has been added, signal the coordinator */
        if (rc == 0 || rc == -ENODATA)
-               mdt_hsm_cdt_wakeup(mdt);
+               mdt_hsm_cdt_event(cdt);
 
        return rc;
 }
@@ -483,24 +479,15 @@ out:
 bool mdt_hsm_restore_is_running(struct mdt_thread_info *mti,
                                const struct lu_fid *fid)
 {
-       struct mdt_device               *mdt = mti->mti_mdt;
-       struct coordinator              *cdt = &mdt->mdt_coordinator;
-       struct cdt_restore_handle       *crh;
-       bool                             rc = false;
+       struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
+       bool is_running;
        ENTRY;
 
-       if (!fid_is_sane(fid))
-               RETURN(rc);
-
        mutex_lock(&cdt->cdt_restore_lock);
-       list_for_each_entry(crh, &cdt->cdt_restore_hdl, crh_list) {
-               if (lu_fid_eq(&crh->crh_fid, fid)) {
-                       rc = true;
-                       break;
-               }
-       }
+       is_running = (cdt_restore_handle_find(cdt, fid) != NULL);
        mutex_unlock(&cdt->cdt_restore_lock);
-       RETURN(rc);
+
+       RETURN(is_running);
 }
 
 /**