Whamcloud - gitweb
LU-7986 hsm: update actions llog in place
[fs/lustre-release.git] / lustre / mdt / mdt_coordinator.c
index 63d9582..c74cf10 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_MDS
 
+#include <linux/kthread.h>
 #include <obd_support.h>
 #include <lustre_net.h>
 #include <lustre_export.h>
 #include <obd.h>
 #include <lprocfs_status.h>
 #include <lustre_log.h>
+#include <lustre_kernelcomm.h>
 #include "mdt_internal.h"
 
-static struct lprocfs_seq_vars lprocfs_mdt_hsm_vars[];
+static struct lprocfs_vars lprocfs_mdt_hsm_vars[];
 
 /**
  * get obj and HSM attributes on a fid
@@ -140,10 +142,6 @@ struct hsm_scan_data {
                int                      hal_used_sz;
                struct hsm_action_list  *hal;
        } *request;
-       /* records to be canceled */
-       int                              max_cookie;    /** vector size */
-       int                              cookie_cnt;    /** used count */
-       __u64                           *cookies;
 };
 
 /**
@@ -162,7 +160,7 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                              struct llog_rec_hdr *hdr,
                              void *data)
 {
-       const struct llog_agent_req_rec *larr;
+       struct llog_agent_req_rec       *larr;
        struct hsm_scan_data            *hsd;
        struct hsm_action_item          *hai;
        struct mdt_device               *mdt;
@@ -185,8 +183,8 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                    cdt->cdt_max_requests)
                        break;
 
-               /* first search if the request if known in the list we have
-                * build and if there is room in the request vector */
+               /* first search whether the request is found in the list we
+                * have built and if there is room in the request vector */
                empty_slot = -1;
                found = -1;
                for (i = 0; i < hsd->max_requests &&
@@ -290,7 +288,9 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                break;
        }
        case ARS_STARTED: {
+               struct hsm_progress_kernel pgs;
                struct cdt_agent_req *car;
+               cfs_time_t now = cfs_time_current_sec();
                cfs_time_t last;
 
                /* we search for a running request
@@ -307,74 +307,52 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 
                /* test if request too long, if yes cancel it
                 * the same way the copy tool acknowledge a cancel request */
-               if ((last + cdt->cdt_active_req_timeout)
-                    < cfs_time_current_sec()) {
-                       struct hsm_progress_kernel pgs;
-
-                       dump_llog_agent_req_rec("mdt_coordinator_cb(): "
-                                               "request timeouted, start "
-                                               "cleaning", larr);
-                       /* a too old cancel request just needs to be removed
-                        * this can happen, if copy tool does not support cancel
-                        * for other requests, we have to remove the running
-                        * request and notify the copytool
-                        */
-                       pgs.hpk_fid = larr->arr_hai.hai_fid;
-                       pgs.hpk_cookie = larr->arr_hai.hai_cookie;
-                       pgs.hpk_extent = larr->arr_hai.hai_extent;
-                       pgs.hpk_flags = HP_FLAG_COMPLETED;
-                       pgs.hpk_errval = ENOSYS;
-                       pgs.hpk_data_version = 0;
-                       /* update request state, but do not record in llog, to
-                        * avoid deadlock on cdt_llog_lock
-                        */
-                       rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0);
-                       if (rc)
-                               CERROR("%s: Cannot cleanup timeouted request: "
-                                      DFID" for cookie "LPX64" action=%s\n",
-                                      mdt_obd_name(mdt),
-                                      PFID(&pgs.hpk_fid), pgs.hpk_cookie,
-                                      hsm_copytool_action2name(
-                                                    larr->arr_hai.hai_action));
-
-                       if (rc == -ENOENT) {
-                               /* The request no longer exists, forget
-                                * about it, and do not send a cancel request
-                                * to the client, for which an error will be
-                                * sent back, leading to an endless cycle of
-                                * cancellation. */
-                               RETURN(LLOG_DEL_RECORD);
-                       }
-
-                       /* add the cookie to the list of record to be
-                        * canceled by caller */
-                       if (hsd->max_cookie == (hsd->cookie_cnt - 1)) {
-                               __u64 *ptr, *old_ptr;
-                               int old_sz, new_sz, new_cnt;
+               if (now <= last + cdt->cdt_active_req_timeout)
+                       RETURN(0);
 
-                               /* need to increase vector size */
-                               old_sz = sizeof(__u64) * hsd->max_cookie;
-                               old_ptr = hsd->cookies;
+               dump_llog_agent_req_rec("request timed out, start cleaning",
+                                       larr);
+               /* a too old cancel request just needs to be removed
+                * this can happen, if copy tool does not support
+                * cancel for other requests, we have to remove the
+                * running request and notify the copytool */
+               pgs.hpk_fid = larr->arr_hai.hai_fid;
+               pgs.hpk_cookie = larr->arr_hai.hai_cookie;
+               pgs.hpk_extent = larr->arr_hai.hai_extent;
+               pgs.hpk_flags = HP_FLAG_COMPLETED;
+               pgs.hpk_errval = ENOSYS;
+               pgs.hpk_data_version = 0;
+
+               /* update request state, but do not record in llog, to
+                * avoid deadlock on cdt_llog_lock */
+               rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0);
+               if (rc)
+                       CERROR("%s: cannot cleanup timed out request: "
+                              DFID" for cookie "LPX64" action=%s\n",
+                              mdt_obd_name(mdt),
+                              PFID(&pgs.hpk_fid), pgs.hpk_cookie,
+                              hsm_copytool_action2name(
+                                      larr->arr_hai.hai_action));
+
+               if (rc == -ENOENT) {
+                       /* The request no longer exists, forget
+                        * about it, and do not send a cancel request
+                        * to the client, for which an error will be
+                        * sent back, leading to an endless cycle of
+                        * cancellation. */
+                       RETURN(LLOG_DEL_RECORD);
+               }
 
-                               new_cnt = 2 * hsd->max_cookie;
-                               new_sz = sizeof(__u64) * new_cnt;
+               /* XXX A cancel request cannot be cancelled. */
+               if (larr->arr_hai.hai_action == HSMA_CANCEL)
+                       RETURN(0);
 
-                               OBD_ALLOC(ptr, new_sz);
-                               if (!ptr) {
-                                       CERROR("%s: Cannot allocate memory "
-                                              "(%d o) for cookie vector\n",
-                                              mdt_obd_name(mdt), new_sz);
-                                       RETURN(-ENOMEM);
-                               }
-                               memcpy(ptr, hsd->cookies, old_sz);
-                               hsd->cookies = ptr;
-                               hsd->max_cookie = new_cnt;
-                               OBD_FREE(old_ptr, old_sz);
-                       }
-                       hsd->cookies[hsd->cookie_cnt] =
-                                                      larr->arr_hai.hai_cookie;
-                       hsd->cookie_cnt++;
-               }
+               larr->arr_status = ARS_CANCELED;
+               larr->arr_req_change = now;
+               rc = llog_write(hsd->mti->mti_env, llh, hdr, hdr->lrh_index);
+               if (rc < 0)
+                       CERROR("%s: cannot update agent log: rc = %d\n",
+                              mdt_obd_name(mdt), rc);
                break;
        }
        case ARS_FAILED:
@@ -401,7 +379,7 @@ int hsm_cdt_procfs_init(struct mdt_device *mdt)
        ENTRY;
 
        /* init /proc entries, failure is not critical */
-       cdt->cdt_proc_dir = lprocfs_seq_register("hsm",
+       cdt->cdt_proc_dir = lprocfs_register("hsm",
                                             mdt2obd_dev(mdt)->obd_proc_entry,
                                             lprocfs_mdt_hsm_vars, mdt);
        if (IS_ERR(cdt->cdt_proc_dir)) {
@@ -433,7 +411,7 @@ void  hsm_cdt_procfs_fini(struct mdt_device *mdt)
  * \param none
  * \retval var vector
  */
-struct lprocfs_seq_vars *hsm_cdt_get_proc_vars(void)
+struct lprocfs_vars *hsm_cdt_get_proc_vars(void)
 {
        return lprocfs_mdt_hsm_vars;
 }
@@ -449,7 +427,7 @@ static int mdt_coordinator(void *data)
        struct mdt_thread_info  *mti = data;
        struct mdt_device       *mdt = mti->mti_mdt;
        struct coordinator      *cdt = &mdt->mdt_coordinator;
-       struct hsm_scan_data     hsd = { 0 };
+       struct hsm_scan_data     hsd = { NULL };
        int                      rc = 0;
        ENTRY;
 
@@ -459,10 +437,6 @@ static int mdt_coordinator(void *data)
        CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
               mdt_obd_name(mdt), current_pid());
 
-       /* timeouted cookie vector initialization */
-       hsd.max_cookie = 0;
-       hsd.cookie_cnt = 0;
-       hsd.cookies = NULL;
        /* we use a copy of cdt_max_requests in the cb, so if cdt_max_requests
         * increases due to a change from /proc we do not overflow the
         * hsd.request[] vector
@@ -523,16 +497,6 @@ static int mdt_coordinator(void *data)
                        }
                }
 
-               /* create canceled cookie vector for an arbitrary size
-                * if needed, vector will grow during llog scan
-                */
-               hsd.max_cookie = 10;
-               hsd.cookie_cnt = 0;
-               OBD_ALLOC(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-               if (!hsd.cookies) {
-                       rc = -ENOMEM;
-                       goto clean_cb_alloc;
-               }
                hsd.request_cnt = 0;
 
                rc = cdt_llog_process(mti->mti_env, mdt,
@@ -540,23 +504,7 @@ static int mdt_coordinator(void *data)
                if (rc < 0)
                        goto clean_cb_alloc;
 
-               CDEBUG(D_HSM, "Found %d requests to send and %d"
-                             " requests to cancel\n",
-                      hsd.request_cnt, hsd.cookie_cnt);
-               /* first we cancel llog records of the timeouted requests */
-               if (hsd.cookie_cnt > 0) {
-                       rc = mdt_agent_record_update(mti->mti_env, mdt,
-                                                    hsd.cookies,
-                                                    hsd.cookie_cnt,
-                                                    ARS_CANCELED);
-                       if (rc)
-                               CERROR("%s: mdt_agent_record_update() failed, "
-                                      "rc=%d, cannot update status to %s "
-                                      "for %d cookies\n",
-                                      mdt_obd_name(mdt), rc,
-                                      agent_req_status2name(ARS_CANCELED),
-                                      hsd.cookie_cnt);
-               }
+               CDEBUG(D_HSM, "found %d requests to send\n", hsd.request_cnt);
 
                if (list_empty(&cdt->cdt_agents)) {
                        CDEBUG(D_HSM, "no agent available, "
@@ -638,14 +586,6 @@ static int mdt_coordinator(void *data)
                        kuc_free(hal, hsd.request[i].hal_used_sz);
                }
 clean_cb_alloc:
-               /* free cookie vector allocated for/by callback */
-               if (hsd.cookies) {
-                       OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-                       hsd.max_cookie = 0;
-                       hsd.cookie_cnt = 0;
-                       hsd.cookies = NULL;
-               }
-
                /* free hal allocated by callback */
                for (i = 0; i < hsd.max_requests; i++) {
                        if (hsd.request[i].hal) {
@@ -666,9 +606,6 @@ out:
        if (hsd.request)
                OBD_FREE(hsd.request, hsd.request_sz);
 
-       if (hsd.cookies)
-               OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-
        if (cdt->cdt_state == CDT_STOPPING) {
                /* request comes from /proc path, so we need to clean cdt
                 * struct */
@@ -1270,7 +1207,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                         * ENOSYS only if does not support cancel
                         */
                        /* this can also happen when cdt calls it to
-                        * for a timeouted request */
+                        * for a timed out request */
                        *status = ARS_FAILED;
                        /* to have a cancel event in changelog */
                        pgs->hpk_errval = ECANCELED;
@@ -1400,7 +1337,7 @@ unlock:
                struct cdt_restore_handle       *crh;
 
                /* restore in data FID done, we swap the layouts
-                * only if restore is successfull */
+                * only if restore is successful */
                if (pgs->hpk_errval == 0) {
                        rc = hsm_swap_layouts(mti, &car->car_hai->hai_fid,
                                              &car->car_hai->hai_dfid, &mh);
@@ -1471,8 +1408,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
                       " on fid="DFID"\n",
                       mdt_obd_name(mdt),
                       pgs->hpk_cookie, PFID(&pgs->hpk_fid));
-               if (car == NULL)
-                       RETURN(-ENOENT);
+
                RETURN(PTR_ERR(car));
        }
 
@@ -1603,10 +1539,9 @@ static int mdt_cancel_all_cb(const struct lu_env *env,
            larr->arr_status == ARS_STARTED) {
                larr->arr_status = ARS_CANCELED;
                larr->arr_req_change = cfs_time_current_sec();
-               rc = mdt_agent_llog_update_rec(env, hcad->mdt, llh, larr);
-               if (rc == 0)
-                       RETURN(LLOG_DEL_RECORD);
+               rc = llog_write(env, llh, hdr, hdr->lrh_index);
        }
+
        RETURN(rc);
 }
 
@@ -1711,7 +1646,7 @@ out:
 }
 
 /**
- * check if a request is comptaible with file status
+ * check if a request is compatible with file status
  * \param hai [IN] request description
  * \param hal_an [IN] request archive number (not used)
  * \param rq_flags [IN] request flags
@@ -2065,17 +2000,20 @@ int mdt_hsm_cdt_control_seq_show(struct seq_file *m, void *data)
 static int
 mdt_hsm_request_mask_show(struct seq_file *m, __u64 mask)
 {
-       int i, rc = 0;
+       bool first = true;
+       int i;
        ENTRY;
 
        for (i = 0; i < 8 * sizeof(mask); i++) {
-               if (mask & (1UL << i))
-                       rc += seq_printf(m, "%s%s", rc == 0 ? "" : " ",
-                                       hsm_copytool_action2name(i));
+               if (mask & (1UL << i)) {
+                       seq_printf(m, "%s%s", first ? "" : " ",
+                                  hsm_copytool_action2name(i));
+                       first = false;
+               }
        }
-       rc += seq_printf(m, "\n");
+       seq_putc(m, '\n');
 
-       RETURN(rc);
+       RETURN(0);
 }
 
 static int
@@ -2213,7 +2151,7 @@ LPROC_SEQ_FOPS(mdt_hsm_user_request_mask);
 LPROC_SEQ_FOPS(mdt_hsm_group_request_mask);
 LPROC_SEQ_FOPS(mdt_hsm_other_request_mask);
 
-static struct lprocfs_seq_vars lprocfs_mdt_hsm_vars[] = {
+static struct lprocfs_vars lprocfs_mdt_hsm_vars[] = {
        { .name =       "agents",
          .fops =       &mdt_hsm_agent_fops                     },
        { .name =       "actions",