Whamcloud - gitweb
LU-11892 hsm: fix memory leak when scheduling HSM requests
[fs/lustre-release.git] / lustre / mdt / mdt_coordinator.c
index 9cb5f1e..51708d1 100644 (file)
@@ -142,6 +142,7 @@ struct hsm_scan_data {
         * for new work?
         */
        bool                     hsd_housekeeping;
+       bool                     hsd_one_restore;
        int                      hsd_action_count;
        int                      hsd_request_len; /* array alloc len */
        int                      hsd_request_count; /* array used count */
@@ -162,21 +163,21 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env,
        int i;
 
        /* Are agents full? */
+       if (atomic_read(&cdt->cdt_request_count) >= cdt->cdt_max_requests)
+               RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
+
        if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
            cdt->cdt_max_requests) {
-               if (hsd->hsd_housekeeping) {
-                       /* Unknown request and no more room for a new
-                        * request. Continue to scan to find other
-                        * entries for already existing requests. */
-                       RETURN(0);
-               } else {
-                       /* We cannot send and more requests, stop
-                        * here. There might be more known requests
-                        * that could be merged, but this avoid
-                        * analyzing too many llogs for minor
-                        * gains. */
-                       RETURN(LLOG_PROC_BREAK);
-               }
+               /* We cannot send any more request
+                *
+                *                     *** SPECIAL CASE ***
+                *
+                * Restore requests are too important not to schedule at least
+                * one, everytime we can.
+                */
+               if (larr->arr_hai.hai_action != HSMA_RESTORE ||
+                   hsd->hsd_one_restore)
+                       RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
        }
 
        hai_size = cfs_size_round(larr->arr_hai.hai_len);
@@ -193,17 +194,55 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env,
                }
        }
 
-       if (!request) {
-               struct hsm_action_list *hal;
+       /* Are we trying to force-schedule a request? */
+       if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
+           cdt->cdt_max_requests) {
+               /* Is there really no compatible hsm_scan_request? */
+               if (!request) {
+                       for (i -= 1; i >= 0; i--) {
+                               if (hsd->hsd_request[i].hal->hal_archive_id ==
+                                   archive_id) {
+                                       request = &hsd->hsd_request[i];
+                                       break;
+                               }
+                       }
+               }
+
+               /* Make room for the hai */
+               if (request) {
+                       /* Discard the last hai until there is enough space */
+                       do {
+                               request->hal->hal_count--;
+
+                               hai = hai_first(request->hal);
+                               for (i = 0; i < request->hal->hal_count; i++)
+                                       hai = hai_next(hai);
+                               request->hal_used_sz -=
+                                       cfs_size_round(hai->hai_len);
+                               hsd->hsd_action_count--;
+                       } while (request->hal_used_sz + hai_size >
+                                LDLM_MAXREQSIZE);
+               } else if (hsd->hsd_housekeeping) {
+                       struct hsm_scan_request *tmp;
+
+                       /* Discard the (whole) last hal */
+                       hsd->hsd_request_count--;
+                       LASSERT(hsd->hsd_request_count >= 0);
+                       tmp = &hsd->hsd_request[hsd->hsd_request_count];
+                       hsd->hsd_action_count -= tmp->hal->hal_count;
+                       LASSERT(hsd->hsd_action_count >= 0);
+                       OBD_FREE(tmp->hal, tmp->hal_sz);
+               } else {
+                       /* Bailing out, this code path is too hot */
+                       RETURN(LLOG_PROC_BREAK);
 
-               if (hsd->hsd_request_count == hsd->hsd_request_len) {
-                       /* Logic as above. */
-                       if (hsd->hsd_housekeeping)
-                               RETURN(0);
-                       else
-                               RETURN(LLOG_PROC_BREAK);
                }
+       }
 
+       if (!request) {
+               struct hsm_action_list *hal;
+
+               LASSERT(hsd->hsd_request_count < hsd->hsd_request_len);
                request = &hsd->hsd_request[hsd->hsd_request_count];
 
                /* allocates hai vector size just needs to be large
@@ -246,15 +285,22 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env,
 
        memcpy(hai, &larr->arr_hai, larr->arr_hai.hai_len);
 
-       request->hal_used_sz += cfs_size_round(hai->hai_len);
+       request->hal_used_sz += hai_size;
        request->hal->hal_count++;
 
        hsd->hsd_action_count++;
 
-       if (hai->hai_action != HSMA_CANCEL)
+       switch (hai->hai_action) {
+       case HSMA_CANCEL:
+               break;
+       case HSMA_RESTORE:
+               hsd->hsd_one_restore = true;
+               /* Intentional fallthrough */
+       default:
                cdt_agent_record_hash_add(cdt, hai->hai_cookie,
                                          llh->lgh_hdr->llh_cat_idx,
                                          larr->arr_hdr.lrh_index);
+       }
 
        RETURN(0);
 }
@@ -270,7 +316,7 @@ static int mdt_cdt_started_cb(const struct lu_env *env,
        struct cdt_agent_req *car;
        time64_t now = ktime_get_real_seconds();
        time64_t last;
-       int cl_flags;
+       enum changelog_rec_flags clf_flags;
        int rc;
 
        if (!hsd->hsd_housekeeping)
@@ -302,30 +348,30 @@ static int mdt_cdt_started_cb(const struct lu_env *env,
        }
 
        /* Emit a changelog record for the failed action.*/
-       cl_flags = 0;
-       hsm_set_cl_error(&cl_flags, ECANCELED);
+       clf_flags = 0;
+       hsm_set_cl_error(&clf_flags, ECANCELED);
 
        switch (hai->hai_action) {
        case HSMA_ARCHIVE:
-               hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+               hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
                break;
        case HSMA_RESTORE:
-               hsm_set_cl_event(&cl_flags, HE_RESTORE);
+               hsm_set_cl_event(&clf_flags, HE_RESTORE);
                break;
        case HSMA_REMOVE:
-               hsm_set_cl_event(&cl_flags, HE_REMOVE);
+               hsm_set_cl_event(&clf_flags, HE_REMOVE);
                break;
        case HSMA_CANCEL:
-               hsm_set_cl_event(&cl_flags, HE_CANCEL);
+               hsm_set_cl_event(&clf_flags, HE_CANCEL);
                break;
        default:
                /* Unknown record type, skip changelog. */
-               cl_flags = 0;
+               clf_flags = 0;
                break;
        }
 
-       if (cl_flags != 0)
-               mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+       if (clf_flags != 0)
+               mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
                             &hai->hai_fid);
 
        if (hai->hai_action == HSMA_RESTORE)
@@ -646,6 +692,7 @@ static int mdt_coordinator(void *data)
 
                hsd.hsd_action_count = 0;
                hsd.hsd_request_count = 0;
+               hsd.hsd_one_restore = false;
 
                rc = cdt_llog_process(mti->mti_env, mdt, mdt_coordinator_cb,
                                      &hsd, 0, 0, WRITE);
@@ -675,10 +722,10 @@ static int mdt_coordinator(void *data)
                updates_sz = updates_cnt * sizeof(*updates);
                OBD_ALLOC_LARGE(updates, updates_sz);
                if (updates == NULL) {
-                       CERROR("%s: Cannot allocate memory (%d o) "
-                              "for %d updates\n",
+                       CERROR("%s: Cannot allocate memory (%d bytes) "
+                               "for %d updates. Too many HSM requests?\n",
                               mdt_obd_name(mdt), updates_sz, updates_cnt);
-                       continue;
+                       goto clean_cb_alloc;
                }
 
                /* here hsd contains a list of requests to be started */
@@ -1347,16 +1394,17 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                                     const struct cdt_agent_req *car,
                                     enum agent_req_status *status)
 {
-       const struct lu_env     *env = mti->mti_env;
-       struct mdt_device       *mdt = mti->mti_mdt;
-       struct coordinator      *cdt = &mdt->mdt_coordinator;
-       struct mdt_object       *obj = NULL;
-       int                      cl_flags = 0, rc = 0;
-       struct md_hsm            mh;
-       bool                     is_mh_changed;
-       bool                     need_changelog = true;
-       ENTRY;
+       const struct lu_env *env = mti->mti_env;
+       struct mdt_device *mdt = mti->mti_mdt;
+       struct coordinator *cdt = &mdt->mdt_coordinator;
+       struct mdt_object *obj = NULL;
+       enum changelog_rec_flags clf_flags = 0;
+       struct md_hsm mh;
+       bool is_mh_changed;
+       bool need_changelog = true;
+       int rc = 0;
 
+       ENTRY;
        /* default is to retry */
        *status = ARS_WAITING;
 
@@ -1406,25 +1454,24 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                               mdt_obd_name(mdt),
                               pgs->hpk_cookie, PFID(&pgs->hpk_fid),
                               pgs->hpk_errval);
-                       hsm_set_cl_error(&cl_flags,
-                                        CLF_HSM_ERROVERFLOW);
+                       hsm_set_cl_error(&clf_flags, CLF_HSM_ERROVERFLOW);
                        rc = -EINVAL;
                } else {
-                       hsm_set_cl_error(&cl_flags, pgs->hpk_errval);
+                       hsm_set_cl_error(&clf_flags, pgs->hpk_errval);
                }
 
                switch (car->car_hai->hai_action) {
                case HSMA_ARCHIVE:
-                       hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+                       hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
                        break;
                case HSMA_RESTORE:
-                       hsm_set_cl_event(&cl_flags, HE_RESTORE);
+                       hsm_set_cl_event(&clf_flags, HE_RESTORE);
                        break;
                case HSMA_REMOVE:
-                       hsm_set_cl_event(&cl_flags, HE_REMOVE);
+                       hsm_set_cl_event(&clf_flags, HE_REMOVE);
                        break;
                case HSMA_CANCEL:
-                       hsm_set_cl_event(&cl_flags, HE_CANCEL);
+                       hsm_set_cl_event(&clf_flags, HE_CANCEL);
                        CERROR("%s: Failed request %#llx on "DFID
                               " cannot be a CANCEL\n",
                               mdt_obd_name(mdt),
@@ -1444,7 +1491,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                *status = ARS_SUCCEED;
                switch (car->car_hai->hai_action) {
                case HSMA_ARCHIVE:
-                       hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+                       hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
                        /* set ARCHIVE keep EXIST and clear LOST and
                         * DIRTY */
                        mh.mh_arch_ver = pgs->hpk_data_version;
@@ -1453,7 +1500,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                        is_mh_changed = true;
                        break;
                case HSMA_RESTORE:
-                       hsm_set_cl_event(&cl_flags, HE_RESTORE);
+                       hsm_set_cl_event(&clf_flags, HE_RESTORE);
 
                        /* do not clear RELEASED and DIRTY here
                         * this will occur in hsm_swap_layouts()
@@ -1465,13 +1512,13 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                        is_mh_changed = true;
                        break;
                case HSMA_REMOVE:
-                       hsm_set_cl_event(&cl_flags, HE_REMOVE);
+                       hsm_set_cl_event(&clf_flags, HE_REMOVE);
                        /* clear ARCHIVED EXISTS and LOST */
                        mh.mh_flags &= ~(HS_ARCHIVED | HS_EXISTS | HS_LOST);
                        is_mh_changed = true;
                        break;
                case HSMA_CANCEL:
-                       hsm_set_cl_event(&cl_flags, HE_CANCEL);
+                       hsm_set_cl_event(&clf_flags, HE_CANCEL);
                        CERROR("%s: Successful request %#llx on "DFID" cannot be a CANCEL\n",
                               mdt_obd_name(mdt),
                               pgs->hpk_cookie,
@@ -1493,7 +1540,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
         * filled
         */
        if (rc == 0 && !IS_ERR(obj))
-               hsm_set_cl_flags(&cl_flags,
+               hsm_set_cl_flags(&clf_flags,
                                 mh.mh_flags & HS_DIRTY ? CLF_HSM_DIRTY : 0);
 
        /* unlock is done later, after layout lock management */
@@ -1522,7 +1569,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                /* restore special case, need to create ChangeLog record
                 * before to give back layout lock to avoid concurrent
                 * file updater to post out of order ChangeLog */
-               mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+               mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
                             &car->car_hai->hai_fid);
                need_changelog = false;
 
@@ -1534,7 +1581,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
 out:
        /* always add a ChangeLog record */
        if (need_changelog)
-               mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+               mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
                             &car->car_hai->hai_fid);
 
        if (!IS_ERR(obj))