Whamcloud - gitweb
LU-7986 hsm: update actions llog in place 47/19447/2
authorJohn L. Hammond <john.hammond@intel.com>
Fri, 5 Jun 2015 21:39:23 +0000 (16:39 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 21 Apr 2016 02:28:30 +0000 (02:28 +0000)
Update HSM actions llog entries in place rather than doing an append
and cancel.

Signed-off-by: John L. Hammond <john.hammond@intel.com>
Change-Id: I5c5282d199e43fbc6709d5dc9bc050e0b98fb889
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Reviewed-on: http://review.whamcloud.com/19447
Tested-by: Jenkins
Reviewed-by: Frank Zago <fzago@cray.com>
Reviewed-by: jacques-Charles Lafoucriere <jacques-charles.lafoucriere@cea.fr>
Tested-by: Maloo <hpdd-maloo@intel.com>
lustre/mdt/mdt_coordinator.c
lustre/mdt/mdt_hsm_cdt_actions.c
lustre/mdt/mdt_internal.h

index 9f371bc..c74cf10 100644 (file)
@@ -142,10 +142,6 @@ struct hsm_scan_data {
                int                      hal_used_sz;
                struct hsm_action_list  *hal;
        } *request;
-       /* records to be canceled */
-       int                              max_cookie;    /** vector size */
-       int                              cookie_cnt;    /** used count */
-       __u64                           *cookies;
 };
 
 /**
@@ -164,7 +160,7 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                              struct llog_rec_hdr *hdr,
                              void *data)
 {
-       const struct llog_agent_req_rec *larr;
+       struct llog_agent_req_rec       *larr;
        struct hsm_scan_data            *hsd;
        struct hsm_action_item          *hai;
        struct mdt_device               *mdt;
@@ -292,7 +288,9 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                break;
        }
        case ARS_STARTED: {
+               struct hsm_progress_kernel pgs;
                struct cdt_agent_req *car;
+               cfs_time_t now = cfs_time_current_sec();
                cfs_time_t last;
 
                /* we search for a running request
@@ -309,74 +307,52 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 
                /* test if request too long, if yes cancel it
                 * the same way the copy tool acknowledge a cancel request */
-               if ((last + cdt->cdt_active_req_timeout)
-                    < cfs_time_current_sec()) {
-                       struct hsm_progress_kernel pgs;
-
-                       dump_llog_agent_req_rec("mdt_coordinator_cb(): "
-                                               "request timed out, start "
-                                               "cleaning", larr);
-                       /* a too old cancel request just needs to be removed
-                        * this can happen, if copy tool does not support cancel
-                        * for other requests, we have to remove the running
-                        * request and notify the copytool
-                        */
-                       pgs.hpk_fid = larr->arr_hai.hai_fid;
-                       pgs.hpk_cookie = larr->arr_hai.hai_cookie;
-                       pgs.hpk_extent = larr->arr_hai.hai_extent;
-                       pgs.hpk_flags = HP_FLAG_COMPLETED;
-                       pgs.hpk_errval = ENOSYS;
-                       pgs.hpk_data_version = 0;
-                       /* update request state, but do not record in llog, to
-                        * avoid deadlock on cdt_llog_lock
-                        */
-                       rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0);
-                       if (rc)
-                               CERROR("%s: Cannot cleanup timed out request: "
-                                      DFID" for cookie "LPX64" action=%s\n",
-                                      mdt_obd_name(mdt),
-                                      PFID(&pgs.hpk_fid), pgs.hpk_cookie,
-                                      hsm_copytool_action2name(
-                                                    larr->arr_hai.hai_action));
-
-                       if (rc == -ENOENT) {
-                               /* The request no longer exists, forget
-                                * about it, and do not send a cancel request
-                                * to the client, for which an error will be
-                                * sent back, leading to an endless cycle of
-                                * cancellation. */
-                               RETURN(LLOG_DEL_RECORD);
-                       }
-
-                       /* add the cookie to the list of record to be
-                        * canceled by caller */
-                       if (hsd->max_cookie == (hsd->cookie_cnt - 1)) {
-                               __u64 *ptr, *old_ptr;
-                               int old_sz, new_sz, new_cnt;
+               if (now <= last + cdt->cdt_active_req_timeout)
+                       RETURN(0);
 
-                               /* need to increase vector size */
-                               old_sz = sizeof(__u64) * hsd->max_cookie;
-                               old_ptr = hsd->cookies;
+               dump_llog_agent_req_rec("request timed out, start cleaning",
+                                       larr);
+               /* a too old cancel request just needs to be removed
+                * this can happen, if copy tool does not support
+                * cancel for other requests, we have to remove the
+                * running request and notify the copytool */
+               pgs.hpk_fid = larr->arr_hai.hai_fid;
+               pgs.hpk_cookie = larr->arr_hai.hai_cookie;
+               pgs.hpk_extent = larr->arr_hai.hai_extent;
+               pgs.hpk_flags = HP_FLAG_COMPLETED;
+               pgs.hpk_errval = ENOSYS;
+               pgs.hpk_data_version = 0;
+
+               /* update request state, but do not record in llog, to
+                * avoid deadlock on cdt_llog_lock */
+               rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0);
+               if (rc)
+                       CERROR("%s: cannot cleanup timed out request: "
+                              DFID" for cookie "LPX64" action=%s\n",
+                              mdt_obd_name(mdt),
+                              PFID(&pgs.hpk_fid), pgs.hpk_cookie,
+                              hsm_copytool_action2name(
+                                      larr->arr_hai.hai_action));
+
+               if (rc == -ENOENT) {
+                       /* The request no longer exists, forget
+                        * about it, and do not send a cancel request
+                        * to the client, for which an error will be
+                        * sent back, leading to an endless cycle of
+                        * cancellation. */
+                       RETURN(LLOG_DEL_RECORD);
+               }
 
-                               new_cnt = 2 * hsd->max_cookie;
-                               new_sz = sizeof(__u64) * new_cnt;
+               /* XXX A cancel request cannot be cancelled. */
+               if (larr->arr_hai.hai_action == HSMA_CANCEL)
+                       RETURN(0);
 
-                               OBD_ALLOC(ptr, new_sz);
-                               if (!ptr) {
-                                       CERROR("%s: Cannot allocate memory "
-                                              "(%d o) for cookie vector\n",
-                                              mdt_obd_name(mdt), new_sz);
-                                       RETURN(-ENOMEM);
-                               }
-                               memcpy(ptr, hsd->cookies, old_sz);
-                               hsd->cookies = ptr;
-                               hsd->max_cookie = new_cnt;
-                               OBD_FREE(old_ptr, old_sz);
-                       }
-                       hsd->cookies[hsd->cookie_cnt] =
-                                                      larr->arr_hai.hai_cookie;
-                       hsd->cookie_cnt++;
-               }
+               larr->arr_status = ARS_CANCELED;
+               larr->arr_req_change = now;
+               rc = llog_write(hsd->mti->mti_env, llh, hdr, hdr->lrh_index);
+               if (rc < 0)
+                       CERROR("%s: cannot update agent log: rc = %d\n",
+                              mdt_obd_name(mdt), rc);
                break;
        }
        case ARS_FAILED:
@@ -461,10 +437,6 @@ static int mdt_coordinator(void *data)
        CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
               mdt_obd_name(mdt), current_pid());
 
-       /* timeouted cookie vector initialization */
-       hsd.max_cookie = 0;
-       hsd.cookie_cnt = 0;
-       hsd.cookies = NULL;
        /* we use a copy of cdt_max_requests in the cb, so if cdt_max_requests
         * increases due to a change from /proc we do not overflow the
         * hsd.request[] vector
@@ -525,16 +497,6 @@ static int mdt_coordinator(void *data)
                        }
                }
 
-               /* create canceled cookie vector for an arbitrary size
-                * if needed, vector will grow during llog scan
-                */
-               hsd.max_cookie = 10;
-               hsd.cookie_cnt = 0;
-               OBD_ALLOC(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-               if (!hsd.cookies) {
-                       rc = -ENOMEM;
-                       goto clean_cb_alloc;
-               }
                hsd.request_cnt = 0;
 
                rc = cdt_llog_process(mti->mti_env, mdt,
@@ -542,23 +504,7 @@ static int mdt_coordinator(void *data)
                if (rc < 0)
                        goto clean_cb_alloc;
 
-               CDEBUG(D_HSM, "Found %d requests to send and %d"
-                             " requests to cancel\n",
-                      hsd.request_cnt, hsd.cookie_cnt);
-               /* first we cancel llog records of the timed out requests */
-               if (hsd.cookie_cnt > 0) {
-                       rc = mdt_agent_record_update(mti->mti_env, mdt,
-                                                    hsd.cookies,
-                                                    hsd.cookie_cnt,
-                                                    ARS_CANCELED);
-                       if (rc)
-                               CERROR("%s: mdt_agent_record_update() failed, "
-                                      "rc=%d, cannot update status to %s "
-                                      "for %d cookies\n",
-                                      mdt_obd_name(mdt), rc,
-                                      agent_req_status2name(ARS_CANCELED),
-                                      hsd.cookie_cnt);
-               }
+               CDEBUG(D_HSM, "found %d requests to send\n", hsd.request_cnt);
 
                if (list_empty(&cdt->cdt_agents)) {
                        CDEBUG(D_HSM, "no agent available, "
@@ -640,14 +586,6 @@ static int mdt_coordinator(void *data)
                        kuc_free(hal, hsd.request[i].hal_used_sz);
                }
 clean_cb_alloc:
-               /* free cookie vector allocated for/by callback */
-               if (hsd.cookies) {
-                       OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-                       hsd.max_cookie = 0;
-                       hsd.cookie_cnt = 0;
-                       hsd.cookies = NULL;
-               }
-
                /* free hal allocated by callback */
                for (i = 0; i < hsd.max_requests; i++) {
                        if (hsd.request[i].hal) {
@@ -668,9 +606,6 @@ out:
        if (hsd.request)
                OBD_FREE(hsd.request, hsd.request_sz);
 
-       if (hsd.cookies)
-               OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-
        if (cdt->cdt_state == CDT_STOPPING) {
                /* request comes from /proc path, so we need to clean cdt
                 * struct */
@@ -1473,8 +1408,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
                       " on fid="DFID"\n",
                       mdt_obd_name(mdt),
                       pgs->hpk_cookie, PFID(&pgs->hpk_fid));
-               if (car == NULL)
-                       RETURN(-ENOENT);
+
                RETURN(PTR_ERR(car));
        }
 
@@ -1605,10 +1539,9 @@ static int mdt_cancel_all_cb(const struct lu_env *env,
            larr->arr_status == ARS_STARTED) {
                larr->arr_status = ARS_CANCELED;
                larr->arr_req_change = cfs_time_current_sec();
-               rc = mdt_agent_llog_update_rec(env, hcad->mdt, llh, larr);
-               if (rc == 0)
-                       RETURN(LLOG_DEL_RECORD);
+               rc = llog_write(env, llh, hdr, hdr->lrh_index);
        }
+
        RETURN(rc);
 }
 
index 5c5f254..137cb4a 100644 (file)
@@ -205,12 +205,10 @@ static int mdt_agent_record_update_cb(const struct lu_env *env,
        struct llog_agent_req_rec       *larr;
        struct data_update_cb           *ducb;
        int                              rc, i;
-       int                              found;
        ENTRY;
 
        larr = (struct llog_agent_req_rec *)hdr;
        ducb = data;
-       found = 0;
 
        /* check if all done */
        if (ducb->cookies_count == ducb->cookies_done)
@@ -237,10 +235,8 @@ static int mdt_agent_record_update_cb(const struct lu_env *env,
 
                        larr->arr_status = ducb->status;
                        larr->arr_req_change = ducb->change_time;
-                       rc = mdt_agent_llog_update_rec(env, ducb->mdt, llh,
-                                                      larr);
+                       rc = llog_write(env, llh, hdr, hdr->lrh_index);
                        ducb->cookies_done++;
-                       found = 1;
                        break;
                }
        }
@@ -249,9 +245,6 @@ static int mdt_agent_record_update_cb(const struct lu_env *env,
                CERROR("%s: mdt_agent_llog_update_rec() failed, rc = %d\n",
                       mdt_obd_name(ducb->mdt), rc);
 
-       if (found == 1)
-               RETURN(LLOG_DEL_RECORD);
-
        RETURN(rc);
 }
 
@@ -290,35 +283,6 @@ int mdt_agent_record_update(const struct lu_env *env, struct mdt_device *mdt,
        RETURN(rc);
 }
 
-/**
- * update a llog record
- *  cdt_llog_lock must be hold
- * \param env [IN] environment
- * \param mdt [IN] mdt device
- * \param llh [IN] llog handle, must be a catalog handle
- * \param larr [IN] record
- * \retval 0 success
- * \retval -ve failure
- */
-int mdt_agent_llog_update_rec(const struct lu_env *env,
-                             struct mdt_device *mdt, struct llog_handle *llh,
-                             struct llog_agent_req_rec *larr)
-{
-       struct llog_rec_hdr      saved_hdr;
-       int                      rc;
-       ENTRY;
-
-       /* saved old record info */
-       saved_hdr = larr->arr_hdr;
-       /* add new record with updated values */
-       larr->arr_hdr.lrh_id = 0;
-       larr->arr_hdr.lrh_index = 0;
-       rc = llog_cat_add(env, llh->u.phd.phd_cat_handle, &larr->arr_hdr,
-                         NULL);
-       larr->arr_hdr = saved_hdr;
-       RETURN(rc);
-}
-
 /*
  * Agent actions /proc seq_file methods
  * As llog processing uses a callback for each entry, we cannot do a sequential
index d6256fd..8829027 100644 (file)
@@ -807,9 +807,6 @@ int mdt_agent_record_add(const struct lu_env *env, struct mdt_device *mdt,
 int mdt_agent_record_update(const struct lu_env *env,
                            struct mdt_device *mdt, __u64 *cookies,
                            int cookies_count, enum agent_req_status status);
-int mdt_agent_llog_update_rec(const struct lu_env *env, struct mdt_device *mdt,
-                             struct llog_handle *llh,
-                             struct llog_agent_req_rec *larr);
 
 /* mdt/mdt_hsm_cdt_agent.c */
 extern const struct file_operations mdt_hsm_agent_fops;