Whamcloud - gitweb
LU-7986 hsm: update actions llog in place
[fs/lustre-release.git] / lustre / mdt / mdt_coordinator.c
index daf8d95..c74cf10 100644 (file)
@@ -142,10 +142,6 @@ struct hsm_scan_data {
                int                      hal_used_sz;
                struct hsm_action_list  *hal;
        } *request;
-       /* records to be canceled */
-       int                              max_cookie;    /** vector size */
-       int                              cookie_cnt;    /** used count */
-       __u64                           *cookies;
 };
 
 /**
@@ -164,7 +160,7 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                              struct llog_rec_hdr *hdr,
                              void *data)
 {
-       const struct llog_agent_req_rec *larr;
+       struct llog_agent_req_rec       *larr;
        struct hsm_scan_data            *hsd;
        struct hsm_action_item          *hai;
        struct mdt_device               *mdt;
@@ -292,7 +288,9 @@ static int mdt_coordinator_cb(const struct lu_env *env,
                break;
        }
        case ARS_STARTED: {
+               struct hsm_progress_kernel pgs;
                struct cdt_agent_req *car;
+               cfs_time_t now = cfs_time_current_sec();
                cfs_time_t last;
 
                /* we search for a running request
@@ -309,74 +307,52 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 
                /* test if request too long, if yes cancel it
                 * the same way the copy tool acknowledge a cancel request */
-               if ((last + cdt->cdt_active_req_timeout)
-                    < cfs_time_current_sec()) {
-                       struct hsm_progress_kernel pgs;
-
-                       dump_llog_agent_req_rec("mdt_coordinator_cb(): "
-                                               "request timed out, start "
-                                               "cleaning", larr);
-                       /* a too old cancel request just needs to be removed
-                        * this can happen, if copy tool does not support cancel
-                        * for other requests, we have to remove the running
-                        * request and notify the copytool
-                        */
-                       pgs.hpk_fid = larr->arr_hai.hai_fid;
-                       pgs.hpk_cookie = larr->arr_hai.hai_cookie;
-                       pgs.hpk_extent = larr->arr_hai.hai_extent;
-                       pgs.hpk_flags = HP_FLAG_COMPLETED;
-                       pgs.hpk_errval = ENOSYS;
-                       pgs.hpk_data_version = 0;
-                       /* update request state, but do not record in llog, to
-                        * avoid deadlock on cdt_llog_lock
-                        */
-                       rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0);
-                       if (rc)
-                               CERROR("%s: Cannot cleanup timed out request: "
-                                      DFID" for cookie "LPX64" action=%s\n",
-                                      mdt_obd_name(mdt),
-                                      PFID(&pgs.hpk_fid), pgs.hpk_cookie,
-                                      hsm_copytool_action2name(
-                                                    larr->arr_hai.hai_action));
-
-                       if (rc == -ENOENT) {
-                               /* The request no longer exists, forget
-                                * about it, and do not send a cancel request
-                                * to the client, for which an error will be
-                                * sent back, leading to an endless cycle of
-                                * cancellation. */
-                               RETURN(LLOG_DEL_RECORD);
-                       }
-
-                       /* add the cookie to the list of record to be
-                        * canceled by caller */
-                       if (hsd->max_cookie == (hsd->cookie_cnt - 1)) {
-                               __u64 *ptr, *old_ptr;
-                               int old_sz, new_sz, new_cnt;
+               if (now <= last + cdt->cdt_active_req_timeout)
+                       RETURN(0);
 
-                               /* need to increase vector size */
-                               old_sz = sizeof(__u64) * hsd->max_cookie;
-                               old_ptr = hsd->cookies;
+               dump_llog_agent_req_rec("request timed out, start cleaning",
+                                       larr);
+               /* a too old cancel request just needs to be removed
+                * this can happen, if copy tool does not support
+                * cancel for other requests, we have to remove the
+                * running request and notify the copytool */
+               pgs.hpk_fid = larr->arr_hai.hai_fid;
+               pgs.hpk_cookie = larr->arr_hai.hai_cookie;
+               pgs.hpk_extent = larr->arr_hai.hai_extent;
+               pgs.hpk_flags = HP_FLAG_COMPLETED;
+               pgs.hpk_errval = ENOSYS;
+               pgs.hpk_data_version = 0;
+
+               /* update request state, but do not record in llog, to
+                * avoid deadlock on cdt_llog_lock */
+               rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0);
+               if (rc)
+                       CERROR("%s: cannot cleanup timed out request: "
+                              DFID" for cookie "LPX64" action=%s\n",
+                              mdt_obd_name(mdt),
+                              PFID(&pgs.hpk_fid), pgs.hpk_cookie,
+                              hsm_copytool_action2name(
+                                      larr->arr_hai.hai_action));
+
+               if (rc == -ENOENT) {
+                       /* The request no longer exists, forget
+                        * about it, and do not send a cancel request
+                        * to the client, for which an error will be
+                        * sent back, leading to an endless cycle of
+                        * cancellation. */
+                       RETURN(LLOG_DEL_RECORD);
+               }
 
-                               new_cnt = 2 * hsd->max_cookie;
-                               new_sz = sizeof(__u64) * new_cnt;
+               /* XXX A cancel request cannot be cancelled. */
+               if (larr->arr_hai.hai_action == HSMA_CANCEL)
+                       RETURN(0);
 
-                               OBD_ALLOC(ptr, new_sz);
-                               if (!ptr) {
-                                       CERROR("%s: Cannot allocate memory "
-                                              "(%d o) for cookie vector\n",
-                                              mdt_obd_name(mdt), new_sz);
-                                       RETURN(-ENOMEM);
-                               }
-                               memcpy(ptr, hsd->cookies, old_sz);
-                               hsd->cookies = ptr;
-                               hsd->max_cookie = new_cnt;
-                               OBD_FREE(old_ptr, old_sz);
-                       }
-                       hsd->cookies[hsd->cookie_cnt] =
-                                                      larr->arr_hai.hai_cookie;
-                       hsd->cookie_cnt++;
-               }
+               larr->arr_status = ARS_CANCELED;
+               larr->arr_req_change = now;
+               rc = llog_write(hsd->mti->mti_env, llh, hdr, hdr->lrh_index);
+               if (rc < 0)
+                       CERROR("%s: cannot update agent log: rc = %d\n",
+                              mdt_obd_name(mdt), rc);
                break;
        }
        case ARS_FAILED:
@@ -461,10 +437,6 @@ static int mdt_coordinator(void *data)
        CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
               mdt_obd_name(mdt), current_pid());
 
-       /* timeouted cookie vector initialization */
-       hsd.max_cookie = 0;
-       hsd.cookie_cnt = 0;
-       hsd.cookies = NULL;
        /* we use a copy of cdt_max_requests in the cb, so if cdt_max_requests
         * increases due to a change from /proc we do not overflow the
         * hsd.request[] vector
@@ -525,16 +497,6 @@ static int mdt_coordinator(void *data)
                        }
                }
 
-               /* create canceled cookie vector for an arbitrary size
-                * if needed, vector will grow during llog scan
-                */
-               hsd.max_cookie = 10;
-               hsd.cookie_cnt = 0;
-               OBD_ALLOC(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-               if (!hsd.cookies) {
-                       rc = -ENOMEM;
-                       goto clean_cb_alloc;
-               }
                hsd.request_cnt = 0;
 
                rc = cdt_llog_process(mti->mti_env, mdt,
@@ -542,23 +504,7 @@ static int mdt_coordinator(void *data)
                if (rc < 0)
                        goto clean_cb_alloc;
 
-               CDEBUG(D_HSM, "Found %d requests to send and %d"
-                             " requests to cancel\n",
-                      hsd.request_cnt, hsd.cookie_cnt);
-               /* first we cancel llog records of the timed out requests */
-               if (hsd.cookie_cnt > 0) {
-                       rc = mdt_agent_record_update(mti->mti_env, mdt,
-                                                    hsd.cookies,
-                                                    hsd.cookie_cnt,
-                                                    ARS_CANCELED);
-                       if (rc)
-                               CERROR("%s: mdt_agent_record_update() failed, "
-                                      "rc=%d, cannot update status to %s "
-                                      "for %d cookies\n",
-                                      mdt_obd_name(mdt), rc,
-                                      agent_req_status2name(ARS_CANCELED),
-                                      hsd.cookie_cnt);
-               }
+               CDEBUG(D_HSM, "found %d requests to send\n", hsd.request_cnt);
 
                if (list_empty(&cdt->cdt_agents)) {
                        CDEBUG(D_HSM, "no agent available, "
@@ -640,14 +586,6 @@ static int mdt_coordinator(void *data)
                        kuc_free(hal, hsd.request[i].hal_used_sz);
                }
 clean_cb_alloc:
-               /* free cookie vector allocated for/by callback */
-               if (hsd.cookies) {
-                       OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-                       hsd.max_cookie = 0;
-                       hsd.cookie_cnt = 0;
-                       hsd.cookies = NULL;
-               }
-
                /* free hal allocated by callback */
                for (i = 0; i < hsd.max_requests; i++) {
                        if (hsd.request[i].hal) {
@@ -668,9 +606,6 @@ out:
        if (hsd.request)
                OBD_FREE(hsd.request, hsd.request_sz);
 
-       if (hsd.cookies)
-               OBD_FREE(hsd.cookies, hsd.max_cookie * sizeof(__u64));
-
        if (cdt->cdt_state == CDT_STOPPING) {
                /* request comes from /proc path, so we need to clean cdt
                 * struct */
@@ -915,13 +850,6 @@ int mdt_hsm_cdt_init(struct mdt_device *mdt)
        cdt->cdt_policy = CDT_DEFAULT_POLICY;
        cdt->cdt_active_req_timeout = 3600;
 
-       /* Initialize cdt_compound_id here to allow its usage for
-        * delayed requests from RAoLU policy */
-       atomic_set(&cdt->cdt_compound_id, cfs_time_current_sec());
-
-       /* by default do not remove archives on last unlink */
-       cdt->cdt_remove_archive_on_last_unlink = false;
-
        RETURN(0);
 }
 
@@ -973,6 +901,7 @@ int mdt_hsm_cdt_start(struct mdt_device *mdt)
 
        cdt->cdt_state = CDT_INIT;
 
+       atomic_set(&cdt->cdt_compound_id, cfs_time_current_sec());
        /* just need to be larger than previous one */
        /* cdt_last_cookie is protected by cdt_llog_lock */
        cdt->cdt_last_cookie = cfs_time_current_sec();
@@ -1479,8 +1408,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
                       " on fid="DFID"\n",
                       mdt_obd_name(mdt),
                       pgs->hpk_cookie, PFID(&pgs->hpk_fid));
-               if (car == NULL)
-                       RETURN(-ENOENT);
+
                RETURN(PTR_ERR(car));
        }
 
@@ -1611,10 +1539,9 @@ static int mdt_cancel_all_cb(const struct lu_env *env,
            larr->arr_status == ARS_STARTED) {
                larr->arr_status = ARS_CANCELED;
                larr->arr_req_change = cfs_time_current_sec();
-               rc = mdt_agent_llog_update_rec(env, hcad->mdt, llh, larr);
-               if (rc == 0)
-                       RETURN(LLOG_DEL_RECORD);
+               rc = llog_write(env, llh, hdr, hdr->lrh_index);
        }
+
        RETURN(rc);
 }
 
@@ -2073,17 +2000,20 @@ int mdt_hsm_cdt_control_seq_show(struct seq_file *m, void *data)
 static int
 mdt_hsm_request_mask_show(struct seq_file *m, __u64 mask)
 {
-       int i, rc = 0;
+       bool first = true;
+       int i;
        ENTRY;
 
        for (i = 0; i < 8 * sizeof(mask); i++) {
-               if (mask & (1UL << i))
-                       rc += seq_printf(m, "%s%s", rc == 0 ? "" : " ",
-                                       hsm_copytool_action2name(i));
+               if (mask & (1UL << i)) {
+                       seq_printf(m, "%s%s", first ? "" : " ",
+                                  hsm_copytool_action2name(i));
+                       first = false;
+               }
        }
-       rc += seq_printf(m, "\n");
+       seq_putc(m, '\n');
 
-       RETURN(rc);
+       RETURN(0);
 }
 
 static int
@@ -2212,36 +2142,6 @@ mdt_hsm_other_request_mask_seq_write(struct file *file, const char __user *buf,
                                           &cdt->cdt_other_request_mask);
 }
 
-static int mdt_hsm_cdt_raolu_seq_show(struct seq_file *m, void *data)
-{
-       struct mdt_device       *mdt = m->private;
-       struct coordinator      *cdt = &mdt->mdt_coordinator;
-       ENTRY;
-
-       seq_printf(m, "%d\n", (int)cdt->cdt_remove_archive_on_last_unlink);
-       RETURN(0);
-}
-
-static ssize_t
-mdt_hsm_cdt_raolu_seq_write(struct file *file, const char __user *buffer,
-                         size_t count, loff_t *off)
-
-{
-       struct seq_file         *m = file->private_data;
-       struct mdt_device       *mdt = m->private;
-       struct coordinator      *cdt = &mdt->mdt_coordinator;
-       int                      val;
-       int                      rc;
-       ENTRY;
-
-       rc = lprocfs_write_helper(buffer, count, &val);
-       if (rc < 0)
-               RETURN(rc);
-
-       cdt->cdt_remove_archive_on_last_unlink = val;
-       RETURN(count);
-}
-
 LPROC_SEQ_FOPS(mdt_hsm_cdt_loop_period);
 LPROC_SEQ_FOPS(mdt_hsm_cdt_grace_delay);
 LPROC_SEQ_FOPS(mdt_hsm_cdt_active_req_timeout);
@@ -2250,7 +2150,6 @@ LPROC_SEQ_FOPS(mdt_hsm_cdt_default_archive_id);
 LPROC_SEQ_FOPS(mdt_hsm_user_request_mask);
 LPROC_SEQ_FOPS(mdt_hsm_group_request_mask);
 LPROC_SEQ_FOPS(mdt_hsm_other_request_mask);
-LPROC_SEQ_FOPS(mdt_hsm_cdt_raolu);
 
 static struct lprocfs_vars lprocfs_mdt_hsm_vars[] = {
        { .name =       "agents",
@@ -2278,7 +2177,5 @@ static struct lprocfs_vars lprocfs_mdt_hsm_vars[] = {
          .fops =       &mdt_hsm_group_request_mask_fops,       },
        { .name =       "other_request_mask",
          .fops =       &mdt_hsm_other_request_mask_fops,       },
-       { .name =       "remove_archive_on_last_unlink",
-         .fops =       &mdt_hsm_cdt_raolu_fops,                },
        { 0 }
 };