Whamcloud - gitweb
LU-11892 hsm: fix memory leak when scheduling HSM requests
[fs/lustre-release.git] / lustre / mdt / mdt_hsm_cdt_agent.c
index c3d4182..8db738d 100644 (file)
@@ -22,7 +22,7 @@
 /*
  * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
  *
- * Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2016, 2017, Intel Corporation.
  *     alternatives
  *
  */
@@ -40,7 +40,6 @@
 #include <obd.h>
 #include <obd_support.h>
 #include <lustre_export.h>
-#include <lustre/lustre_user.h>
 #include <lprocfs_status.h>
 #include <lustre_kernelcomm.h>
 #include "mdt_internal.h"
@@ -141,6 +140,10 @@ out_free:
        if (ha != NULL)
                OBD_FREE_PTR(ha);
 out:
+       /* wake the coordinator to potentially schedule requests */
+       if (rc == -EEXIST || rc == 0)
+               mdt_hsm_cdt_event(cdt);
+
        return rc;
 }
 
@@ -311,7 +314,6 @@ int mdt_hsm_find_best_agent(struct coordinator *cdt, __u32 archive,
 int mdt_hsm_send_action_to_each_archive(struct mdt_thread_info *mti,
                                    struct hsm_action_item *hai)
 {
-       __u64 compound_id;
        struct hsm_agent *ha;
        __u32 archive_mask = 0;
        struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
@@ -329,13 +331,10 @@ int mdt_hsm_send_action_to_each_archive(struct mdt_thread_info *mti,
                                continue;
                        archive_mask |= (1 << ha->ha_archive_id[i]);
 
-                       /* XXX: instead of creating one request record per
-                        * new action, it could make sense to gather
-                        * all for the same archive_id as one compound
-                        * request/id, like in mdt_hsm_add_actions() ?? */
-                       compound_id = atomic_inc_return(&cdt->cdt_compound_id);
+                       /* XXX: it could make sense to gather all
+                        * actions for the same archive_id like in
+                        * mdt_hsm_add_actions() ?? */
                        rc = mdt_agent_record_add(mti->mti_env, mti->mti_mdt,
-                                                 compound_id,
                                                  ha->ha_archive_id[i], 0,
                                                  hai);
                        if (rc) {
@@ -362,7 +361,7 @@ int mdt_hsm_send_action_to_each_archive(struct mdt_thread_info *mti,
 }
 
 /**
- * send a compound request to the agent
+ * send a HAL to the agent
  * \param mti [IN] context
  * \param hal [IN] request (can be a kuc payload)
  * \param purge [IN] purge mode (no record)
@@ -405,6 +404,8 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
                hai = hai_first(hal);
                for (i = 0; i < hal->hal_count; i++,
                     hai = hai_next(hai)) {
+                       struct hsm_record_update update;
+
                        /* only removes are concerned */
                        if (hai->hai_action != HSMA_REMOVE) {
                                /* count if other actions than HSMA_REMOVE,
@@ -424,9 +425,11 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
                         * XXX: this should only cause duplicates to be sent,
                         * unless a method to record already successfully
                         * reached archive_ids is implemented */
+
+                       update.cookie = hai->hai_cookie;
+                       update.status = ARS_SUCCEED;
                        rc2 = mdt_agent_record_update(mti->mti_env, mdt,
-                                                    &hai->hai_cookie,
-                                                    1, ARS_SUCCEED);
+                                                     &update, 1);
                        if (rc2) {
                                CERROR("%s: mdt_agent_record_update() "
                                      "failed, cannot update "
@@ -463,94 +466,77 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
        fail_request = false;
        hai = hai_first(hal);
        for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
-               if (hai->hai_action != HSMA_CANCEL) {
-                       struct mdt_object *obj;
-                       struct md_hsm hsm;
+               struct mdt_object *obj;
+               struct md_hsm hsm;
 
-                       obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
-                       if (!IS_ERR(obj) && obj != NULL) {
-                               mdt_object_put(mti->mti_env, obj);
-                       } else {
-                               if (hai->hai_action == HSMA_REMOVE)
-                                       continue;
-
-                               if (obj == NULL) {
-                                       fail_request = true;
-                                       rc = mdt_agent_record_update(
-                                                            mti->mti_env, mdt,
-                                                            &hai->hai_cookie,
-                                                            1, ARS_FAILED);
-                                       if (rc) {
-                                               CERROR(
-                                             "%s: mdt_agent_record_update() "
-                                             "failed, cannot update "
-                                             "status to %s for cookie "
-                                             "%#llx: rc = %d\n",
-                                             mdt_obd_name(mdt),
-                                             agent_req_status2name(ARS_FAILED),
-                                             hai->hai_cookie, rc);
-                                               GOTO(out_buf, rc);
-                                       }
-                                       continue;
-                               }
-                               GOTO(out_buf, rc = PTR_ERR(obj));
+               if (hai->hai_action == HSMA_CANCEL)
+                       continue;
+
+               obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
+               if (!IS_ERR(obj)) {
+                       mdt_object_put(mti->mti_env, obj);
+               } else if (PTR_ERR(obj) == -ENOENT) {
+                       struct hsm_record_update update = {
+                               .cookie = hai->hai_cookie,
+                               .status = ARS_FAILED,
+                       };
+
+                       if (hai->hai_action == HSMA_REMOVE)
+                               continue;
+
+                       fail_request = true;
+                       rc = mdt_agent_record_update(mti->mti_env, mdt,
+                                                    &update, 1);
+                       if (rc < 0) {
+                               CERROR("%s: mdt_agent_record_update() failed, "
+                                      "cannot update status to %s for cookie "
+                                      "%#llx: rc = %d\n",
+                                      mdt_obd_name(mdt),
+                                      agent_req_status2name(ARS_FAILED),
+                                      hai->hai_cookie, rc);
+                               GOTO(out_buf, rc);
                        }
 
-                       if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
-                                                     hal->hal_flags, &hsm)) {
-                               /* incompatible request, we abort the request */
-                               /* next time coordinator will wake up, it will
-                                * make the same compound with valid only
-                                * records */
-                               fail_request = true;
-                               rc = mdt_agent_record_update(mti->mti_env, mdt,
-                                                            &hai->hai_cookie,
-                                                            1, ARS_FAILED);
-                               if (rc) {
-                                       CERROR("%s: mdt_agent_record_update() "
-                                             "failed, cannot update "
-                                             "status to %s for cookie "
-                                             "%#llx: rc = %d\n",
-                                             mdt_obd_name(mdt),
-                                             agent_req_status2name(ARS_FAILED),
-                                             hai->hai_cookie, rc);
-                                       GOTO(out_buf, rc);
-                               }
-
-                               /* if restore and record status updated, give
-                                * back granted layout lock */
-                               if (hai->hai_action == HSMA_RESTORE) {
-                                       struct cdt_restore_handle *crh = NULL;
-                                       struct mdt_object *obj = NULL;
-
-                                       mutex_lock(&cdt->cdt_restore_lock);
-                                       crh = mdt_hsm_restore_hdl_find(cdt,
-                                                               &hai->hai_fid);
-                                       if (crh != NULL)
-                                               list_del(&crh->crh_list);
-                                       mutex_unlock(&cdt->cdt_restore_lock);
-                                       obj = mdt_object_find(mti->mti_env,
-                                                             mti->mti_mdt,
-                                                             &hai->hai_fid);
-                                       if (!IS_ERR(obj) && crh != NULL)
-                                               mdt_object_unlock(mti, obj,
-                                                                 &crh->crh_lh,
-                                                                 1);
-                                       if (crh != NULL)
-                                               OBD_SLAB_FREE_PTR(crh,
-                                                       mdt_hsm_cdt_kmem);
-                                       if (!IS_ERR(obj))
-                                               mdt_object_put(mti->mti_env,
-                                                              obj);
-                               }
+                       continue;
+               } else {
+                       GOTO(out_buf, rc = PTR_ERR(obj));
+               }
+
+               if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
+                                             hal->hal_flags, &hsm)) {
+                       struct hsm_record_update update = {
+                               .cookie = hai->hai_cookie,
+                               .status = ARS_FAILED,
+                       };
+
+                       /* incompatible request, we abort the request */
+                       /* next time coordinator will wake up, it will
+                        * make the same HAL with valid only
+                        * records */
+                       fail_request = true;
+                       rc = mdt_agent_record_update(mti->mti_env, mdt,
+                                                    &update, 1);
+                       if (rc) {
+                               CERROR("%s: mdt_agent_record_update() failed, "
+                                      "cannot update status to %s for cookie "
+                                      "%#llx: rc = %d\n",
+                                      mdt_obd_name(mdt),
+                                      agent_req_status2name(ARS_FAILED),
+                                      hai->hai_cookie, rc);
+                               GOTO(out_buf, rc);
                        }
+
+                       /* if restore and record status updated, give
+                        * back granted layout lock */
+                       if (hai->hai_action == HSMA_RESTORE)
+                               cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
                }
        }
 
-       /* we found incompatible requests, so the compound cannot be send
+       /* we found incompatible requests, so the HAL cannot be sent
         * as is. Bad records have been invalidated in llog.
         * Valid one will be reschedule next time coordinator will wake up
-        * So no need the rebuild a full valid compound request now
+        * So no need the rebuild a full valid HAL now
         */
        if (fail_request)
                GOTO(out_buf, rc = 0);
@@ -624,25 +610,6 @@ out_buf:
 }
 
 /**
- * update status of a request
- * \param mti [IN]
- * \param pgs [IN] progress of the copy tool
- * \retval 0 success
- * \retval -ve failure
- */
-int mdt_hsm_coordinator_update(struct mdt_thread_info *mti,
-                              struct hsm_progress_kernel *pgs)
-{
-       int      rc;
-
-       ENTRY;
-       /* ask to coordinator to update request state and
-        * to record on disk the result */
-       rc = mdt_hsm_update_request_state(mti, pgs, 1);
-       RETURN(rc);
-}
-
-/**
  * seq_file method called to start access to /proc file
  */
 static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off)
@@ -770,4 +737,3 @@ const struct file_operations mdt_hsm_agent_fops = {
        .llseek         = seq_lseek,
        .release        = lprocfs_seq_release,
 };
-