Whamcloud - gitweb
LU-11892 hsm: fix memory leak when scheduling HSM requests
[fs/lustre-release.git] / lustre / mdt / mdt_coordinator.c
index 6a7782e..51708d1 100644 (file)
@@ -101,21 +101,21 @@ void mdt_hsm_dump_hal(int level, const char *prefix,
        struct hsm_action_item  *hai;
        char                     buf[12];
 
-       CDEBUG(level, "%s: HAL header: version %X count %d compound %#llx"
+       CDEBUG(level, "%s: HAL header: version %X count %d"
                      " archive_id %d flags %#llx\n",
               prefix, hal->hal_version, hal->hal_count,
-              hal->hal_compound_id, hal->hal_archive_id, hal->hal_flags);
+              hal->hal_archive_id, hal->hal_flags);
 
        hai = hai_first(hal);
        for (i = 0; i < hal->hal_count; i++) {
                sz = hai->hai_len - sizeof(*hai);
                CDEBUG(level, "%s %d: fid="DFID" dfid="DFID
-                      " compound/cookie=%#llx/%#llx"
+                      " cookie=%#llx"
                       " action=%s extent=%#llx-%#llx gid=%#llx"
                       " datalen=%d data=[%s]\n",
                       prefix, i,
                       PFID(&hai->hai_fid), PFID(&hai->hai_dfid),
-                      hal->hal_compound_id, hai->hai_cookie,
+                      hai->hai_cookie,
                       hsm_copytool_action2name(hai->hai_action),
                       hai->hai_extent.offset,
                       hai->hai_extent.length,
@@ -136,21 +136,17 @@ struct hsm_scan_request {
 };
 
 struct hsm_scan_data {
-       struct mdt_thread_info          *mti;
-       char                             fs_name[MTI_NAME_MAXLEN+1];
+       struct mdt_thread_info  *hsd_mti;
+       char                     hsd_fsname[MTI_NAME_MAXLEN + 1];
        /* are we scanning the logs for housekeeping, or just looking
         * for new work?
         */
-       bool                             housekeeping;
-       /* request to be send to agents */
-       int                              max_requests;  /** vector size */
-       int                              request_cnt;   /** used count */
-       struct hsm_scan_request         *request;
-};
-
-struct hsm_thread_data {
-       struct mdt_thread_info  *cdt_mti;
-       struct hsm_scan_request *request;
+       bool                     hsd_housekeeping;
+       bool                     hsd_one_restore;
+       int                      hsd_action_count;
+       int                      hsd_request_len; /* array alloc len */
+       int                      hsd_request_count; /* array used count */
+       struct hsm_scan_request *hsd_request;
 };
 
 static int mdt_cdt_waiting_cb(const struct lu_env *env,
@@ -162,107 +158,149 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env,
        struct coordinator *cdt = &mdt->mdt_coordinator;
        struct hsm_scan_request *request;
        struct hsm_action_item *hai;
+       size_t hai_size;
+       u32 archive_id;
        int i;
 
        /* Are agents full? */
        if (atomic_read(&cdt->cdt_request_count) >= cdt->cdt_max_requests)
-               RETURN(0);
+               RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
+
+       if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
+           cdt->cdt_max_requests) {
+               /* We cannot send any more request
+                *
+                *                     *** SPECIAL CASE ***
+                *
+                * Restore requests are too important not to schedule at least
+                * one, everytime we can.
+                */
+               if (larr->arr_hai.hai_action != HSMA_RESTORE ||
+                   hsd->hsd_one_restore)
+                       RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
+       }
 
-       /* first search whether the request is found in the list we
-        * have built. */
+       hai_size = cfs_size_round(larr->arr_hai.hai_len);
+       archive_id = larr->arr_archive_id;
+
+       /* Can we add this action to one of the existing HALs in hsd. */
        request = NULL;
-       for (i = 0; i < hsd->request_cnt; i++) {
-               if (hsd->request[i].hal->hal_compound_id ==
-                   larr->arr_compound_id) {
-                       request = &hsd->request[i];
+       for (i = 0; i < hsd->hsd_request_count; i++) {
+               if (hsd->hsd_request[i].hal->hal_archive_id == archive_id &&
+                   hsd->hsd_request[i].hal_used_sz + hai_size <=
+                   LDLM_MAXREQSIZE) {
+                       request = &hsd->hsd_request[i];
                        break;
                }
        }
 
-       if (!request) {
-               struct hsm_action_list *hal;
-
-               if (hsd->request_cnt == hsd->max_requests) {
-                       if (!hsd->housekeeping) {
-                               /* The request array is full, stop
-                                * here. There might be more known
-                                * requests that could be merged, but
-                                * this avoid analyzing too many llogs
-                                * for minor gains. */
-                               RETURN(LLOG_PROC_BREAK);
-                       } else {
-                               /* Unknown request and no more room
-                                * for a new request. Continue to scan
-                                * to find other entries for already
-                                * existing requests. */
-                               RETURN(0);
+       /* Are we trying to force-schedule a request? */
+       if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
+           cdt->cdt_max_requests) {
+               /* Is there really no compatible hsm_scan_request? */
+               if (!request) {
+                       for (i -= 1; i >= 0; i--) {
+                               if (hsd->hsd_request[i].hal->hal_archive_id ==
+                                   archive_id) {
+                                       request = &hsd->hsd_request[i];
+                                       break;
+                               }
                        }
                }
 
-               request = &hsd->request[hsd->request_cnt];
+               /* Make room for the hai */
+               if (request) {
+                       /* Discard the last hai until there is enough space */
+                       do {
+                               request->hal->hal_count--;
+
+                               hai = hai_first(request->hal);
+                               for (i = 0; i < request->hal->hal_count; i++)
+                                       hai = hai_next(hai);
+                               request->hal_used_sz -=
+                                       cfs_size_round(hai->hai_len);
+                               hsd->hsd_action_count--;
+                       } while (request->hal_used_sz + hai_size >
+                                LDLM_MAXREQSIZE);
+               } else if (hsd->hsd_housekeeping) {
+                       struct hsm_scan_request *tmp;
+
+                       /* Discard the (whole) last hal */
+                       hsd->hsd_request_count--;
+                       LASSERT(hsd->hsd_request_count >= 0);
+                       tmp = &hsd->hsd_request[hsd->hsd_request_count];
+                       hsd->hsd_action_count -= tmp->hal->hal_count;
+                       LASSERT(hsd->hsd_action_count >= 0);
+                       OBD_FREE(tmp->hal, tmp->hal_sz);
+               } else {
+                       /* Bailing out, this code path is too hot */
+                       RETURN(LLOG_PROC_BREAK);
+
+               }
+       }
+
+       if (!request) {
+               struct hsm_action_list *hal;
+
+               LASSERT(hsd->hsd_request_count < hsd->hsd_request_len);
+               request = &hsd->hsd_request[hsd->hsd_request_count];
 
                /* allocates hai vector size just needs to be large
                 * enough */
                request->hal_sz = sizeof(*request->hal) +
-                       cfs_size_round(MTI_NAME_MAXLEN + 1) +
-                       2 * cfs_size_round(larr->arr_hai.hai_len);
-               OBD_ALLOC(hal, request->hal_sz);
+                       cfs_size_round(MTI_NAME_MAXLEN + 1) + 2 * hai_size;
+               OBD_ALLOC_LARGE(hal, request->hal_sz);
                if (!hal)
                        RETURN(-ENOMEM);
 
                hal->hal_version = HAL_VERSION;
-               strlcpy(hal->hal_fsname, hsd->fs_name, MTI_NAME_MAXLEN + 1);
-               hal->hal_compound_id = larr->arr_compound_id;
+               strlcpy(hal->hal_fsname, hsd->hsd_fsname, MTI_NAME_MAXLEN + 1);
                hal->hal_archive_id = larr->arr_archive_id;
                hal->hal_flags = larr->arr_flags;
                hal->hal_count = 0;
                request->hal_used_sz = hal_size(hal);
                request->hal = hal;
-               hsd->request_cnt++;
-               hai = hai_first(hal);
-       } else {
-               /* request is known */
-               /* we check if record archive num is the same as the
-                * known request, if not we will serve it in multiple
-                * time because we do not know if the agent can serve
-                * multiple backend a use case is a compound made of
-                * multiple restore where the files are not archived
-                * in the same backend */
-               if (larr->arr_archive_id != request->hal->hal_archive_id)
-                       RETURN(0);
+               hsd->hsd_request_count++;
+       } else if (request->hal_sz < request->hal_used_sz + hai_size) {
+               /* Not enough room, need an extension */
+               void *hal_buffer;
+               int sz;
 
-               if (request->hal_sz < request->hal_used_sz +
-                   cfs_size_round(larr->arr_hai.hai_len)) {
-                       /* Not enough room, need an extension */
-                       void *hal_buffer;
-                       int sz;
-
-                       sz = 2 * request->hal_sz;
-                       OBD_ALLOC(hal_buffer, sz);
-                       if (!hal_buffer)
-                               RETURN(-ENOMEM);
-                       memcpy(hal_buffer, request->hal, request->hal_used_sz);
-                       OBD_FREE(request->hal, request->hal_sz);
-                       request->hal = hal_buffer;
-                       request->hal_sz = sz;
-               }
+               sz = min_t(int, 2 * request->hal_sz, LDLM_MAXREQSIZE);
+               LASSERT(request->hal_used_sz + hai_size < sz);
 
-               hai = hai_first(request->hal);
-               for (i = 0; i < request->hal->hal_count; i++)
-                       hai = hai_next(hai);
+               OBD_ALLOC_LARGE(hal_buffer, sz);
+               if (!hal_buffer)
+                       RETURN(-ENOMEM);
+
+               memcpy(hal_buffer, request->hal, request->hal_used_sz);
+               OBD_FREE_LARGE(request->hal, request->hal_sz);
+               request->hal = hal_buffer;
+               request->hal_sz = sz;
        }
 
+       hai = hai_first(request->hal);
+       for (i = 0; i < request->hal->hal_count; i++)
+               hai = hai_next(hai);
+
        memcpy(hai, &larr->arr_hai, larr->arr_hai.hai_len);
-       hai->hai_cookie = larr->arr_hai.hai_cookie;
-       hai->hai_gid = larr->arr_hai.hai_gid;
 
-       request->hal_used_sz += cfs_size_round(hai->hai_len);
+       request->hal_used_sz += hai_size;
        request->hal->hal_count++;
 
-       if (hai->hai_action != HSMA_CANCEL)
+       hsd->hsd_action_count++;
+
+       switch (hai->hai_action) {
+       case HSMA_CANCEL:
+               break;
+       case HSMA_RESTORE:
+               hsd->hsd_one_restore = true;
+               /* Intentional fallthrough */
+       default:
                cdt_agent_record_hash_add(cdt, hai->hai_cookie,
                                          llh->lgh_hdr->llh_cat_idx,
                                          larr->arr_hdr.lrh_index);
+       }
 
        RETURN(0);
 }
@@ -278,10 +316,10 @@ static int mdt_cdt_started_cb(const struct lu_env *env,
        struct cdt_agent_req *car;
        time64_t now = ktime_get_real_seconds();
        time64_t last;
-       int cl_flags;
+       enum changelog_rec_flags clf_flags;
        int rc;
 
-       if (!hsd->housekeeping)
+       if (!hsd->hsd_housekeeping)
                RETURN(0);
 
        /* we search for a running request
@@ -310,38 +348,38 @@ static int mdt_cdt_started_cb(const struct lu_env *env,
        }
 
        /* Emit a changelog record for the failed action.*/
-       cl_flags = 0;
-       hsm_set_cl_error(&cl_flags, ECANCELED);
+       clf_flags = 0;
+       hsm_set_cl_error(&clf_flags, ECANCELED);
 
        switch (hai->hai_action) {
        case HSMA_ARCHIVE:
-               hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+               hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
                break;
        case HSMA_RESTORE:
-               hsm_set_cl_event(&cl_flags, HE_RESTORE);
+               hsm_set_cl_event(&clf_flags, HE_RESTORE);
                break;
        case HSMA_REMOVE:
-               hsm_set_cl_event(&cl_flags, HE_REMOVE);
+               hsm_set_cl_event(&clf_flags, HE_REMOVE);
                break;
        case HSMA_CANCEL:
-               hsm_set_cl_event(&cl_flags, HE_CANCEL);
+               hsm_set_cl_event(&clf_flags, HE_CANCEL);
                break;
        default:
                /* Unknown record type, skip changelog. */
-               cl_flags = 0;
+               clf_flags = 0;
                break;
        }
 
-       if (cl_flags != 0)
-               mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+       if (clf_flags != 0)
+               mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
                             &hai->hai_fid);
 
        if (hai->hai_action == HSMA_RESTORE)
-               cdt_restore_handle_del(hsd->mti, cdt, &hai->hai_fid);
+               cdt_restore_handle_del(hsd->hsd_mti, cdt, &hai->hai_fid);
 
        larr->arr_status = ARS_CANCELED;
        larr->arr_req_change = now;
-       rc = llog_write(hsd->mti->mti_env, llh, &larr->arr_hdr,
+       rc = llog_write(hsd->hsd_mti->mti_env, llh, &larr->arr_hdr,
                        larr->arr_hdr.lrh_index);
        if (rc < 0) {
                CERROR("%s: cannot update agent log: rc = %d\n",
@@ -377,7 +415,7 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 {
        struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr;
        struct hsm_scan_data *hsd = data;
-       struct mdt_device *mdt = hsd->mti->mti_mdt;
+       struct mdt_device *mdt = hsd->hsd_mti->mti_mdt;
        struct coordinator *cdt = &mdt->mdt_coordinator;
        ENTRY;
 
@@ -389,7 +427,7 @@ static int mdt_coordinator_cb(const struct lu_env *env,
        case ARS_STARTED:
                RETURN(mdt_cdt_started_cb(env, mdt, llh, larr, hsd));
        default:
-               if (!hsd->housekeeping)
+               if (!hsd->hsd_housekeeping)
                        RETURN(0);
 
                if ((larr->arr_req_change + cdt->cdt_grace_delay) <
@@ -477,6 +515,9 @@ static void mdt_hsm_cdt_cleanup(struct mdt_device *mdt)
        down_write(&cdt->cdt_agent_lock);
        list_for_each_entry_safe(ha, tmp2, &cdt->cdt_agents, ha_list) {
                list_del(&ha->ha_list);
+               if (ha->ha_archive_cnt != 0)
+                       OBD_FREE(ha->ha_archive_id, ha->ha_archive_cnt *
+                                sizeof(*ha->ha_archive_id));
                OBD_FREE_PTR(ha);
        }
        up_write(&cdt->cdt_agent_lock);
@@ -556,26 +597,21 @@ static int set_cdt_state(struct coordinator *cdt, enum cdt_states new_state)
  */
 static int mdt_coordinator(void *data)
 {
-       struct hsm_thread_data  *thread_data = data;
-       struct mdt_thread_info  *mti = thread_data->cdt_mti;
+       struct mdt_thread_info  *mti = data;
        struct mdt_device       *mdt = mti->mti_mdt;
        struct coordinator      *cdt = &mdt->mdt_coordinator;
        struct hsm_scan_data     hsd = { NULL };
        time64_t                 last_housekeeping = 0;
-       int                      rc = 0;
-       int                      request_sz;
+       size_t request_sz = 0;
+       int rc;
        ENTRY;
 
-       /* set up hsd->request and max_requests */
-       hsd.max_requests = cdt->cdt_max_requests;
-       request_sz = hsd.max_requests * sizeof(*hsd.request);
-       hsd.request = thread_data->request;
-
        CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
               mdt_obd_name(mdt), current_pid());
 
-       hsd.mti = mti;
-       obd_uuid2fsname(hsd.fs_name, mdt_obd_name(mdt), MTI_NAME_MAXLEN);
+       hsd.hsd_mti = mti;
+       obd_uuid2fsname(hsd.hsd_fsname, mdt_obd_name(mdt),
+                       sizeof(hsd.hsd_fsname));
 
        set_cdt_state(cdt, CDT_RUNNING);
 
@@ -619,9 +655,9 @@ static int mdt_coordinator(void *data)
                if (last_housekeeping + cdt->cdt_loop_period <=
                    ktime_get_real_seconds()) {
                        last_housekeeping = ktime_get_real_seconds();
-                       hsd.housekeeping = true;
+                       hsd.hsd_housekeeping = true;
                } else if (cdt->cdt_event) {
-                       hsd.housekeeping = false;
+                       hsd.hsd_housekeeping = false;
                } else {
                        continue;
                }
@@ -630,7 +666,7 @@ static int mdt_coordinator(void *data)
 
                CDEBUG(D_HSM, "coordinator starts reading llog\n");
 
-               if (hsd.max_requests != cdt->cdt_max_requests) {
+               if (hsd.hsd_request_len != cdt->cdt_max_requests) {
                        /* cdt_max_requests has changed,
                         * we need to allocate a new buffer
                         */
@@ -641,25 +677,30 @@ static int mdt_coordinator(void *data)
                        if (!tmp) {
                                CERROR("Failed to resize request buffer, "
                                       "keeping it at %d\n",
-                                      hsd.max_requests);
-                               cdt->cdt_max_requests = hsd.max_requests;
+                                      hsd.hsd_request_len);
                        } else {
-                               OBD_FREE_LARGE(hsd.request, request_sz);
-                               hsd.max_requests = max_requests;
-                               request_sz = hsd.max_requests *
+                               if (hsd.hsd_request != NULL)
+                                       OBD_FREE_LARGE(hsd.hsd_request,
+                                                      request_sz);
+
+                               hsd.hsd_request_len = max_requests;
+                               request_sz = hsd.hsd_request_len *
                                        sizeof(struct hsm_scan_request);
-                               hsd.request = tmp;
+                               hsd.hsd_request = tmp;
                        }
                }
 
-               hsd.request_cnt = 0;
+               hsd.hsd_action_count = 0;
+               hsd.hsd_request_count = 0;
+               hsd.hsd_one_restore = false;
 
                rc = cdt_llog_process(mti->mti_env, mdt, mdt_coordinator_cb,
                                      &hsd, 0, 0, WRITE);
                if (rc < 0)
                        goto clean_cb_alloc;
 
-               CDEBUG(D_HSM, "found %d requests to send\n", hsd.request_cnt);
+               CDEBUG(D_HSM, "found %d requests to send\n",
+                      hsd.hsd_request_count);
 
                if (list_empty(&cdt->cdt_agents)) {
                        CDEBUG(D_HSM, "no agent available, "
@@ -669,9 +710,9 @@ static int mdt_coordinator(void *data)
 
                /* Compute how many HAI we have in all the requests */
                updates_cnt = 0;
-               for (i = 0; i < hsd.request_cnt; i++) {
+               for (i = 0; i < hsd.hsd_request_count; i++) {
                        const struct hsm_scan_request *request =
-                               &hsd.request[i];
+                               &hsd.hsd_request[i];
 
                        updates_cnt += request->hal->hal_count;
                }
@@ -679,17 +720,17 @@ static int mdt_coordinator(void *data)
                /* Allocate a temporary array to store the cookies to
                 * update, and their status. */
                updates_sz = updates_cnt * sizeof(*updates);
-               OBD_ALLOC(updates, updates_sz);
+               OBD_ALLOC_LARGE(updates, updates_sz);
                if (updates == NULL) {
-                       CERROR("%s: Cannot allocate memory (%d o) "
-                              "for %d updates\n",
+                       CERROR("%s: Cannot allocate memory (%d bytes) "
+                               "for %d updates. Too many HSM requests?\n",
                               mdt_obd_name(mdt), updates_sz, updates_cnt);
-                       continue;
+                       goto clean_cb_alloc;
                }
 
                /* here hsd contains a list of requests to be started */
-               for (i = 0; i < hsd.request_cnt; i++) {
-                       struct hsm_scan_request *request = &hsd.request[i];
+               for (i = 0; i < hsd.hsd_request_count; i++) {
+                       struct hsm_scan_request *request = &hsd.hsd_request[i];
                        struct hsm_action_list  *hal = request->hal;
                        struct hsm_action_item  *hai;
                        int                      j;
@@ -728,19 +769,19 @@ static int mdt_coordinator(void *data)
                                       mdt_obd_name(mdt), rc, update_idx);
                }
 
-               OBD_FREE(updates, updates_sz);
+               OBD_FREE_LARGE(updates, updates_sz);
 
 clean_cb_alloc:
                /* free hal allocated by callback */
-               for (i = 0; i < hsd.request_cnt; i++) {
-                       struct hsm_scan_request *request = &hsd.request[i];
+               for (i = 0; i < hsd.hsd_request_count; i++) {
+                       struct hsm_scan_request *request = &hsd.hsd_request[i];
 
-                       OBD_FREE(request->hal, request->hal_sz);
+                       OBD_FREE_LARGE(request->hal, request->hal_sz);
                }
        }
 
-       if (hsd.request)
-               OBD_FREE_LARGE(hsd.request, request_sz);
+       if (hsd.hsd_request != NULL)
+               OBD_FREE_LARGE(hsd.hsd_request, request_sz);
 
        mdt_hsm_cdt_cleanup(mdt);
 
@@ -1032,10 +1073,6 @@ int mdt_hsm_cdt_init(struct mdt_device *mdt)
        cdt->cdt_policy = CDT_DEFAULT_POLICY;
        cdt->cdt_active_req_timeout = 3600;
 
-       /* Initialize cdt_compound_id here to allow its usage for
-        * delayed requests from RAoLU policy */
-       atomic_set(&cdt->cdt_compound_id, ktime_get_real_seconds());
-
        /* by default do not remove archives on last unlink */
        cdt->cdt_remove_archive_on_last_unlink = false;
 
@@ -1085,11 +1122,10 @@ int  mdt_hsm_cdt_fini(struct mdt_device *mdt)
 static int mdt_hsm_cdt_start(struct mdt_device *mdt)
 {
        struct coordinator      *cdt = &mdt->mdt_coordinator;
+       struct mdt_thread_info *cdt_mti;
        int                      rc;
        void                    *ptr;
        struct task_struct      *task;
-       int                      request_sz;
-       struct hsm_thread_data   thread_data;
        ENTRY;
 
        /* functions defined but not yet used
@@ -1122,9 +1158,8 @@ static int mdt_hsm_cdt_start(struct mdt_device *mdt)
         * /proc entries are created by the coordinator thread */
 
        /* set up list of started restore requests */
-       thread_data.cdt_mti =
-               lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
-       rc = mdt_hsm_pending_restore(thread_data.cdt_mti);
+       cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
+       rc = mdt_hsm_pending_restore(cdt_mti);
        if (rc)
                CERROR("%s: cannot take the layout locks needed"
                       " for registered restore: %d\n",
@@ -1133,19 +1168,10 @@ static int mdt_hsm_cdt_start(struct mdt_device *mdt)
        if (mdt->mdt_bottom->dd_rdonly)
                RETURN(0);
 
-       /* Allocate the initial hsd.request[] vector*/
-       request_sz = cdt->cdt_max_requests * sizeof(struct hsm_scan_request);
-       OBD_ALLOC_LARGE(thread_data.request, request_sz);
-       if (!thread_data.request) {
-               set_cdt_state(cdt, CDT_STOPPED);
-               RETURN(-ENOMEM);
-       }
-
-       task = kthread_run(mdt_coordinator, &thread_data, "hsm_cdtr");
+       task = kthread_run(mdt_coordinator, cdt_mti, "hsm_cdtr");
        if (IS_ERR(task)) {
                rc = PTR_ERR(task);
                set_cdt_state(cdt, CDT_STOPPED);
-               OBD_FREE(thread_data.request, request_sz);
                CERROR("%s: error starting coordinator thread: %d\n",
                       mdt_obd_name(mdt), rc);
        } else {
@@ -1278,8 +1304,7 @@ int mdt_hsm_add_hal(struct mdt_thread_info *mti,
                                GOTO(out, rc);
                }
 
-               car = mdt_cdt_alloc_request(hal->hal_compound_id,
-                                           hal->hal_archive_id, hal->hal_flags,
+               car = mdt_cdt_alloc_request(hal->hal_archive_id, hal->hal_flags,
                                            uuid, hai);
                if (IS_ERR(car))
                        GOTO(out, rc = PTR_ERR(car));
@@ -1342,7 +1367,15 @@ static int hsm_swap_layouts(struct mdt_thread_info *mti,
                                     mdt_object_child(obj),
                                     mdt_object_child(dobj),
                                     SWAP_LAYOUTS_MDS_HSM);
-
+       if (rc == 0) {
+               rc = mdt_lsom_downgrade(mti, obj);
+               if (rc)
+                       CDEBUG(D_INODE,
+                              "%s: File fid="DFID" SOM "
+                              "downgrade failed, rc = %d\n",
+                              mdt_obd_name(mti->mti_mdt),
+                              PFID(mdt_object_fid(obj)), rc);
+       }
 out_dobj:
        mdt_object_unlock_put(mti, dobj, dlh, 1);
 out:
@@ -1361,16 +1394,17 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                                     const struct cdt_agent_req *car,
                                     enum agent_req_status *status)
 {
-       const struct lu_env     *env = mti->mti_env;
-       struct mdt_device       *mdt = mti->mti_mdt;
-       struct coordinator      *cdt = &mdt->mdt_coordinator;
-       struct mdt_object       *obj = NULL;
-       int                      cl_flags = 0, rc = 0;
-       struct md_hsm            mh;
-       bool                     is_mh_changed;
-       bool                     need_changelog = true;
-       ENTRY;
+       const struct lu_env *env = mti->mti_env;
+       struct mdt_device *mdt = mti->mti_mdt;
+       struct coordinator *cdt = &mdt->mdt_coordinator;
+       struct mdt_object *obj = NULL;
+       enum changelog_rec_flags clf_flags = 0;
+       struct md_hsm mh;
+       bool is_mh_changed;
+       bool need_changelog = true;
+       int rc = 0;
 
+       ENTRY;
        /* default is to retry */
        *status = ARS_WAITING;
 
@@ -1420,25 +1454,24 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                               mdt_obd_name(mdt),
                               pgs->hpk_cookie, PFID(&pgs->hpk_fid),
                               pgs->hpk_errval);
-                       hsm_set_cl_error(&cl_flags,
-                                        CLF_HSM_ERROVERFLOW);
+                       hsm_set_cl_error(&clf_flags, CLF_HSM_ERROVERFLOW);
                        rc = -EINVAL;
                } else {
-                       hsm_set_cl_error(&cl_flags, pgs->hpk_errval);
+                       hsm_set_cl_error(&clf_flags, pgs->hpk_errval);
                }
 
                switch (car->car_hai->hai_action) {
                case HSMA_ARCHIVE:
-                       hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+                       hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
                        break;
                case HSMA_RESTORE:
-                       hsm_set_cl_event(&cl_flags, HE_RESTORE);
+                       hsm_set_cl_event(&clf_flags, HE_RESTORE);
                        break;
                case HSMA_REMOVE:
-                       hsm_set_cl_event(&cl_flags, HE_REMOVE);
+                       hsm_set_cl_event(&clf_flags, HE_REMOVE);
                        break;
                case HSMA_CANCEL:
-                       hsm_set_cl_event(&cl_flags, HE_CANCEL);
+                       hsm_set_cl_event(&clf_flags, HE_CANCEL);
                        CERROR("%s: Failed request %#llx on "DFID
                               " cannot be a CANCEL\n",
                               mdt_obd_name(mdt),
@@ -1458,7 +1491,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                *status = ARS_SUCCEED;
                switch (car->car_hai->hai_action) {
                case HSMA_ARCHIVE:
-                       hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+                       hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
                        /* set ARCHIVE keep EXIST and clear LOST and
                         * DIRTY */
                        mh.mh_arch_ver = pgs->hpk_data_version;
@@ -1467,7 +1500,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                        is_mh_changed = true;
                        break;
                case HSMA_RESTORE:
-                       hsm_set_cl_event(&cl_flags, HE_RESTORE);
+                       hsm_set_cl_event(&clf_flags, HE_RESTORE);
 
                        /* do not clear RELEASED and DIRTY here
                         * this will occur in hsm_swap_layouts()
@@ -1479,13 +1512,13 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                        is_mh_changed = true;
                        break;
                case HSMA_REMOVE:
-                       hsm_set_cl_event(&cl_flags, HE_REMOVE);
+                       hsm_set_cl_event(&clf_flags, HE_REMOVE);
                        /* clear ARCHIVED EXISTS and LOST */
                        mh.mh_flags &= ~(HS_ARCHIVED | HS_EXISTS | HS_LOST);
                        is_mh_changed = true;
                        break;
                case HSMA_CANCEL:
-                       hsm_set_cl_event(&cl_flags, HE_CANCEL);
+                       hsm_set_cl_event(&clf_flags, HE_CANCEL);
                        CERROR("%s: Successful request %#llx on "DFID" cannot be a CANCEL\n",
                               mdt_obd_name(mdt),
                               pgs->hpk_cookie,
@@ -1507,7 +1540,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
         * filled
         */
        if (rc == 0 && !IS_ERR(obj))
-               hsm_set_cl_flags(&cl_flags,
+               hsm_set_cl_flags(&clf_flags,
                                 mh.mh_flags & HS_DIRTY ? CLF_HSM_DIRTY : 0);
 
        /* unlock is done later, after layout lock management */
@@ -1536,7 +1569,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                /* restore special case, need to create ChangeLog record
                 * before to give back layout lock to avoid concurrent
                 * file updater to post out of order ChangeLog */
-               mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+               mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
                             &car->car_hai->hai_fid);
                need_changelog = false;
 
@@ -1548,7 +1581,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
 out:
        /* always add a ChangeLog record */
        if (need_changelog)
-               mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+               mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
                             &car->car_hai->hai_fid);
 
        if (!IS_ERR(obj))
@@ -1807,7 +1840,6 @@ static int hsm_cancel_all_actions(struct mdt_device *mdt)
                obd_uuid2fsname(hal->hal_fsname, mdt_obd_name(mdt),
                                MTI_NAME_MAXLEN);
                hal->hal_fsname[MTI_NAME_MAXLEN] = '\0';
-               hal->hal_compound_id = car->car_compound_id;
                hal->hal_archive_id = car->car_archive_id;
                hal->hal_flags = car->car_flags;
                hal->hal_count = 0;
@@ -2087,14 +2119,15 @@ mdt_hsm_##VAR##_seq_write(struct file *file, const char __user *buffer, \
        struct seq_file         *m = file->private_data;                \
        struct mdt_device       *mdt = m->private;                      \
        struct coordinator      *cdt = &mdt->mdt_coordinator;           \
-       __s64                    val;                                   \
-       int                      rc;                                    \
-       ENTRY;                                                          \
+       unsigned int val;                                               \
+       int rc;                                                         \
                                                                        \
-       rc = lprocfs_str_to_s64(buffer, count, &val);                   \
+       ENTRY;                                                          \
+       rc = kstrtouint_from_user(buffer, count, 0, &val);              \
        if (rc)                                                         \
                RETURN(rc);                                             \
-       if (val > 0 && val < INT_MAX) {                                 \
+                                                                       \
+       if (val !=  0) {                                                \
                cdt->VAR = val;                                         \
                RETURN(count);                                          \
        }                                                               \
@@ -2364,11 +2397,11 @@ mdt_hsm_cdt_raolu_seq_write(struct file *file, const char __user *buffer,
        struct seq_file *m = file->private_data;
        struct mdt_device *mdt = m->private;
        struct coordinator *cdt = &mdt->mdt_coordinator;
-       __s64 val;
+       bool val;
        int rc;
-       ENTRY;
 
-       rc = lprocfs_str_to_s64(buffer, count, &val);
+       ENTRY;
+       rc = kstrtobool_from_user(buffer, count, &val);
        if (rc < 0)
                RETURN(rc);