Whamcloud - gitweb
LU-7988 hsm: update many cookie status at once 73/28973/2
authorBruno Faccini <bruno.faccini@intel.com>
Tue, 18 Jul 2017 08:21:53 +0000 (10:21 +0200)
committerJohn L. Hammond <john.hammond@intel.com>
Thu, 26 Oct 2017 16:08:13 +0000 (16:08 +0000)
Instead of calling mdt_agent_record_update, which calls
cdt_llog_process, once for every HAL, build a list of the cookies to
update with their status and call mdt_agent_record_update just once
per seconds at most.

Update mdt_agent_record_update to take a status for every cookie.

Test-Parameters: trivial testlist=sanity-hsm

Lustre-change: https://review.whamcloud.com/19584
Lustre-commit: f3a415289b560b5f422efe2bd08b3b7cff113cf0

Signed-off-by: frank zago <fzago@cray.com>
Change-Id: Ie4afd667727e07570ed6a2d51e8dfaea8302b97b
Signed-off-by: Ben Evans <bevans@cray.com>
Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
Signed-off-by: Minh Diep <minh.diep@intel.com>
Reviewed-on: https://review.whamcloud.com/28973
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
lustre/mdt/mdt_coordinator.c
lustre/mdt/mdt_hsm_cdt_actions.c
lustre/mdt/mdt_hsm_cdt_agent.c
lustre/mdt/mdt_internal.h

index e59452e..50f92bc 100644 (file)
@@ -538,6 +538,10 @@ static int mdt_coordinator(void *data)
 
        while (1) {
                int i;
+               int update_idx = 0;
+               int updates_sz;
+               int updates_cnt;
+               struct hsm_record_update *updates;
 
                /* Limit execution of the expensive requests traversal
                 * to at most every "wait_event_time" jiffies. This prevents
@@ -609,14 +613,32 @@ static int mdt_coordinator(void *data)
                        goto clean_cb_alloc;
                }
 
+               /* Compute how many HAI we have in all the requests */
+               updates_cnt = 0;
+               for (i = 0; i < hsd.request_cnt; i++) {
+                       const struct hsm_scan_request *request =
+                               &hsd.request[i];
+
+                       updates_cnt += request->hal->hal_count;
+               }
+
+               /* Allocate a temporary array to store the cookies to
+                * update, and their status. */
+               updates_sz = updates_cnt * sizeof(*updates);
+               OBD_ALLOC(updates, updates_sz);
+               if (updates == NULL) {
+                       CERROR("%s: Cannot allocate memory (%d o) "
+                              "for %d updates\n",
+                              mdt_obd_name(mdt), updates_sz, updates_cnt);
+                       continue;
+               }
+
                /* here hsd contains a list of requests to be started */
                for (i = 0; i < hsd.request_cnt; i++) {
                        struct hsm_scan_request *request = &hsd.request[i];
                        struct hsm_action_list  *hal = request->hal;
                        struct hsm_action_item  *hai;
-                       __u64                   *cookies;
-                       int                      sz, j;
-                       enum agent_req_status    status;
+                       int                      j;
 
                        /* still room for work ? */
                        if (atomic_read(&cdt->cdt_request_count) >=
@@ -628,34 +650,32 @@ static int mdt_coordinator(void *data)
                         * if the copy tool failed to do the request
                         * it has to use hsm_progress
                         */
-                       status = (rc ? ARS_WAITING : ARS_STARTED);
 
                        /* set up cookie vector to set records status
                         * after copy tools start or failed
                         */
-                       sz = hal->hal_count * sizeof(__u64);
-                       OBD_ALLOC(cookies, sz);
-                       if (cookies == NULL)
-                               continue;
-
                        hai = hai_first(hal);
                        for (j = 0; j < hal->hal_count; j++) {
-                               cookies[j] = hai->hai_cookie;
+                               updates[update_idx].cookie = hai->hai_cookie;
+                               updates[update_idx].status =
+                                       (rc ? ARS_WAITING : ARS_STARTED);
                                hai = hai_next(hai);
+                               update_idx++;
                        }
+               }
 
-                       rc = mdt_agent_record_update(mti->mti_env, mdt, cookies,
-                                                    hal->hal_count, status);
+               if (update_idx) {
+                       rc = mdt_agent_record_update(mti->mti_env, mdt,
+                                                    updates, update_idx);
                        if (rc)
                                CERROR("%s: mdt_agent_record_update() failed, "
-                                      "rc=%d, cannot update status to %s "
+                                      "rc=%d, cannot update records "
                                       "for %d cookies\n",
-                                      mdt_obd_name(mdt), rc,
-                                      agent_req_status2name(status),
-                                      hal->hal_count);
-
-                       OBD_FREE(cookies, sz);
+                                      mdt_obd_name(mdt), rc, update_idx);
                }
+
+               OBD_FREE(updates, updates_sz);
+
 clean_cb_alloc:
                /* free hal allocated by callback */
                for (i = 0; i < hsd.request_cnt; i++) {
@@ -1113,9 +1133,13 @@ int mdt_hsm_add_hal(struct mdt_thread_info *mti,
                 * it will be done when updating the request status
                 */
                if (hai->hai_action == HSMA_CANCEL) {
+                       struct hsm_record_update update = {
+                               .cookie = hai->hai_cookie,
+                               .status = ARS_CANCELED,
+                       };
+
                        rc = mdt_agent_record_update(mti->mti_env, mti->mti_mdt,
-                                                    &hai->hai_cookie,
-                                                    1, ARS_CANCELED);
+                                                    &update, 1);
                        if (rc) {
                                CERROR("%s: mdt_agent_record_update() failed, "
                                       "rc=%d, cannot update status to %s "
@@ -1540,10 +1564,13 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
                /* update record first (LU-9075) */
                if (update_record) {
                        int rc1;
+                       struct hsm_record_update update = {
+                               .cookie = pgs->hpk_cookie,
+                               .status = status,
+                       };
 
                        rc1 = mdt_agent_record_update(mti->mti_env, mdt,
-                                                    &pgs->hpk_cookie, 1,
-                                                    status);
+                                                     &update, 1);
                        if (rc1)
                                CERROR("%s: mdt_agent_record_update() failed,"
                                       " rc=%d, cannot update status to %s"
index abb3e6a..b17aa17 100644 (file)
@@ -318,10 +318,9 @@ free:
  */
 struct data_update_cb {
        struct mdt_device       *mdt;
-       __u64                   *cookies;
-       int                      cookies_count;
-       int                      cookies_done;
-       enum agent_req_status    status;
+       struct hsm_record_update *updates;
+       unsigned int             updates_count;
+       unsigned int             updates_done;
        cfs_time_t               change_time;
 };
 
@@ -348,32 +347,38 @@ static int mdt_agent_record_update_cb(const struct lu_env *env,
        ducb = data;
 
        /* check if all done */
-       if (ducb->cookies_count == ducb->cookies_done)
+       if (ducb->updates_count == ducb->updates_done)
                RETURN(LLOG_PROC_BREAK);
 
        /* if record is in final state, never change */
-       /* if record is a cancel request, it cannot be canceled
-        * this is to manage the following case:
-        * when a request is canceled, we have 2 records with the
-        * the same cookie : the one to cancel and the cancel request
-        * the 1st has to be set to ARS_CANCELED and the 2nd to ARS_SUCCEED
-        */
-       if (agent_req_in_final_state(larr->arr_status) ||
-           (larr->arr_hai.hai_action == HSMA_CANCEL &&
-            ducb->status == ARS_CANCELED))
+       if (agent_req_in_final_state(larr->arr_status))
                RETURN(0);
 
        rc = 0;
-       for (i = 0 ; i < ducb->cookies_count ; i++) {
+       for (i = 0 ; i < ducb->updates_count ; i++) {
+               struct hsm_record_update *update = &ducb->updates[i];
+
                CDEBUG(D_HSM, "%s: search %#llx, found %#llx\n",
-                      mdt_obd_name(ducb->mdt), ducb->cookies[i],
+                      mdt_obd_name(ducb->mdt), update->cookie,
                       larr->arr_hai.hai_cookie);
-               if (larr->arr_hai.hai_cookie == ducb->cookies[i]) {
-
-                       larr->arr_status = ducb->status;
+               if (larr->arr_hai.hai_cookie == update->cookie) {
+
+                       /* If record is a cancel request, it cannot be
+                        * canceled. This is to manage the following
+                        * case: when a request is canceled, we have 2
+                        * records with the the same cookie: the one
+                        * to cancel and the cancel request the 1st
+                        * has to be set to ARS_CANCELED and the 2nd
+                        * to ARS_SUCCEED
+                        */
+                       if (larr->arr_hai.hai_action == HSMA_CANCEL &&
+                           update->status == ARS_CANCELED)
+                               RETURN(0);
+
+                       larr->arr_status = update->status;
                        larr->arr_req_change = ducb->change_time;
                        rc = llog_write(env, llh, hdr, hdr->lrh_index);
-                       ducb->cookies_done++;
+                       ducb->updates_done++;
                        break;
                }
        }
@@ -387,17 +392,18 @@ static int mdt_agent_record_update_cb(const struct lu_env *env,
 
 /**
  * update an entry in agent llog
+ *
  * \param env [IN] environment
  * \param mdt [IN] MDT device
- * \param cookie [IN] entries to update
- *    log cookie are returned by register
- * \param status [IN] new status of the request
- * \retval 0 success
- * \retval -ve failure
+ * \param updates [IN] array of entries to update
+ * \param updates_count [IN] number of entries in updates
+ *
+ * \retval 0 on success
+ * \retval negative on failure
  */
 int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt,
-                           __u64 *cookies, int cookies_count,
-                           enum agent_req_status status)
+                           struct hsm_record_update *updates,
+                           unsigned int updates_count)
 {
        struct data_update_cb    ducb;
        u32 start_cat_idx = -1;
@@ -410,13 +416,14 @@ int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt,
 
        /* Find the first location (start_cat_idx, start_rec_idx)
         * among the records corresponding to cookies. */
-       for (i = 0; i < cookies_count; i++) {
+       for (i = 0; i < updates_count; i++) {
                /* If we cannot find a cached location for a cookie
                 * (perhaps because the MDT was restart then we must
                 * start from the beginning. In this case
                 * mdt_agent_record_hash_get() sets both of cat_idx and
                 * rec_idx to 0. */
-               cdt_agent_record_hash_lookup(&mdt->mdt_coordinator, cookies[i],
+               cdt_agent_record_hash_lookup(&mdt->mdt_coordinator,
+                                            updates[i].cookie,
                                             &cat_idx, &rec_idx);
                if (cat_idx < start_cat_idx) {
                        start_cat_idx = cat_idx;
@@ -432,20 +439,18 @@ int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt,
                start_rec_idx -= 1;
 
        ducb.mdt = mdt;
-       ducb.cookies = cookies;
-       ducb.cookies_count = cookies_count;
-       ducb.cookies_done = 0;
-       ducb.status = status;
+       ducb.updates = updates;
+       ducb.updates_count = updates_count;
+       ducb.updates_done = 0;
        ducb.change_time = cfs_time_current_sec();
 
        rc = cdt_llog_process(env, mdt, mdt_agent_record_update_cb, &ducb,
                              start_cat_idx, start_rec_idx, WRITE);
        if (rc < 0)
                CERROR("%s: cdt_llog_process() failed, rc=%d, cannot update "
-                      "status to %s for %d cookies, done %d\n",
+                      "status for %u cookies, done %u\n",
                       mdt_obd_name(mdt), rc,
-                      agent_req_status2name(status),
-                      cookies_count, ducb.cookies_done);
+                      updates_count, ducb.updates_done);
        RETURN(rc);
 }
 
@@ -694,4 +699,3 @@ const struct file_operations mdt_hsm_actions_fops = {
        .llseek         = seq_lseek,
        .release        = lprocfs_release_hsm_actions,
 };
-
index d73b20a..c7ce3f7 100644 (file)
@@ -405,6 +405,8 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
                hai = hai_first(hal);
                for (i = 0; i < hal->hal_count; i++,
                     hai = hai_next(hai)) {
+                       struct hsm_record_update update;
+
                        /* only removes are concerned */
                        if (hai->hai_action != HSMA_REMOVE) {
                                /* count if other actions than HSMA_REMOVE,
@@ -424,9 +426,11 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
                         * XXX: this should only cause duplicates to be sent,
                         * unless a method to record already successfully
                         * reached archive_ids is implemented */
+
+                       update.cookie = hai->hai_cookie;
+                       update.status = ARS_SUCCEED;
                        rc2 = mdt_agent_record_update(mti->mti_env, mdt,
-                                                    &hai->hai_cookie,
-                                                    1, ARS_SUCCEED);
+                                                     &update, 1);
                        if (rc2) {
                                CERROR("%s: mdt_agent_record_update() "
                                      "failed, cannot update "
@@ -473,13 +477,17 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
                if (!IS_ERR(obj)) {
                        mdt_object_put(mti->mti_env, obj);
                } else if (PTR_ERR(obj) == -ENOENT) {
+                       struct hsm_record_update update = {
+                               .cookie = hai->hai_cookie,
+                               .status = ARS_FAILED,
+                       };
+
                        if (hai->hai_action == HSMA_REMOVE)
                                continue;
 
                        fail_request = true;
                        rc = mdt_agent_record_update(mti->mti_env, mdt,
-                                                    &hai->hai_cookie,
-                                                    1, ARS_FAILED);
+                                                    &update, 1);
                        if (rc < 0) {
                                CERROR("%s: mdt_agent_record_update() failed, "
                                       "cannot update status to %s for cookie "
@@ -497,14 +505,18 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
 
                if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
                                              hal->hal_flags, &hsm)) {
+                       struct hsm_record_update update = {
+                               .cookie = hai->hai_cookie,
+                               .status = ARS_FAILED,
+                       };
+
                        /* incompatible request, we abort the request */
                        /* next time coordinator will wake up, it will
                         * make the same compound with valid only
                         * records */
                        fail_request = true;
                        rc = mdt_agent_record_update(mti->mti_env, mdt,
-                                                    &hai->hai_cookie,
-                                                    1, ARS_FAILED);
+                                                    &update, 1);
                        if (rc) {
                                CERROR("%s: mdt_agent_record_update() failed, "
                                       "cannot update status to %s for cookie "
@@ -759,4 +771,3 @@ const struct file_operations mdt_hsm_agent_fops = {
        .llseek         = seq_lseek,
        .release        = lprocfs_seq_release,
 };
-
index 7498402..90732cf 100644 (file)
@@ -527,6 +527,11 @@ struct cdt_restore_handle {
 };
 extern struct kmem_cache *mdt_hsm_cdt_kmem;    /** restore handle slab cache */
 
+struct hsm_record_update {
+       __u64 cookie;
+       enum agent_req_status status;
+};
+
 static inline const struct md_device_operations *
 mdt_child_ops(struct mdt_device * m)
 {
@@ -837,9 +842,9 @@ int cdt_llog_process(const struct lu_env *env, struct mdt_device *mdt,
 int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt,
                         __u64 compound_id, __u32 archive_id,
                         __u64 flags, struct hsm_action_item *hai);
-int mdt_agent_record_update(const struct lu_env *env,
-                           struct mdt_device *mdt, __u64 *cookies,
-                           int cookies_count, enum agent_req_status status);
+int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt,
+                           struct hsm_record_update *updates,
+                           unsigned int updates_count);
 void cdt_agent_record_hash_add(struct coordinator *cdt, u64 cookie, u32 cat_idt,
                               u32 rec_idx);
 void cdt_agent_record_hash_lookup(struct coordinator *cdt, u64 cookie,