Whamcloud - gitweb
LU-10467 ptlrpc: convert final users of LWI_TIMEOUT_INTERVAL
[fs/lustre-release.git] / lustre / mdt / mdt_hsm_cdt_agent.c
index 92e0730..2bce139 100644 (file)
@@ -21,6 +21,8 @@
  */
 /*
  * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
+ *
+ * Copyright (c) 2016, 2017, Intel Corporation.
  *     alternatives
  *
  */
@@ -38,8 +40,8 @@
 #include <obd.h>
 #include <obd_support.h>
 #include <lustre_export.h>
-#include <lustre/lustre_user.h>
 #include <lprocfs_status.h>
+#include <lustre_kernelcomm.h>
 #include "mdt_internal.h"
 
 /*
@@ -48,7 +50,7 @@
 
 /*
  * find a hsm_agent by uuid
- * lock cdt_agent_lock needs to be hold by caller
+ * lock cdt_agent_lock needs to be held by caller
  * \param cdt [IN] coordinator
  * \param uuid [IN] agent UUID
  * \retval hsm_agent pointer or NULL if not found
@@ -138,6 +140,10 @@ out_free:
        if (ha != NULL)
                OBD_FREE_PTR(ha);
 out:
+       /* wake the coordinator to potentially schedule requests */
+       if (rc == -EEXIST || rc == 0)
+               mdt_hsm_cdt_event(cdt);
+
        return rc;
 }
 
@@ -305,8 +311,57 @@ int mdt_hsm_find_best_agent(struct coordinator *cdt, __u32 archive,
        RETURN(rc);
 }
 
+int mdt_hsm_send_action_to_each_archive(struct mdt_thread_info *mti,
+                                   struct hsm_action_item *hai)
+{
+       struct hsm_agent *ha;
+       __u32 archive_mask = 0;
+       struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
+       int i;
+       /* return error by default in case all archive_ids have unregistered */
+       int rc = -EAGAIN;
+       ENTRY;
+
+       /* send action to all registered archive_ids */
+       down_read(&cdt->cdt_agent_lock);
+       list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
+               for (i = 0; (i < ha->ha_archive_cnt); i++) {
+                       /* only send once for each archive_id */
+                       if ((1 << ha->ha_archive_id[i]) & archive_mask)
+                               continue;
+                       archive_mask |= (1 << ha->ha_archive_id[i]);
+
+                       /* XXX: it could make sense to gather all
+                        * actions for the same archive_id like in
+                        * mdt_hsm_add_actions() ?? */
+                       rc = mdt_agent_record_add(mti->mti_env, mti->mti_mdt,
+                                                 ha->ha_archive_id[i], 0,
+                                                 hai);
+                       if (rc) {
+                               CERROR("%s: unable to add HSM remove request "
+                                      "for "DFID": rc=%d\n",
+                                      mdt_obd_name(mti->mti_mdt),
+                                      PFID(&hai->hai_fid), rc);
+                               break;
+                       } else {
+                               CDEBUG(D_HSM, "%s: added HSM remove request "
+                                      "for "DFID", archive_id=%d\n",
+                                      mdt_obd_name(mti->mti_mdt),
+                                      PFID(&hai->hai_fid),
+                                      ha->ha_archive_id[i]);
+                       }
+               }
+               /* early exit from loop due to error? */
+               if (i != ha->ha_archive_cnt)
+                       break;
+       }
+       up_read(&cdt->cdt_agent_lock);
+
+       RETURN(rc);
+}
+
 /**
- * send a compound request to the agent
+ * send a HAL to the agent
  * \param mti [IN] context
  * \param hal [IN] request (can be a kuc payload)
  * \param purge [IN] purge mode (no record)
@@ -333,6 +388,65 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
        ENTRY;
 
        rc = mdt_hsm_find_best_agent(cdt, hal->hal_archive_id, &uuid);
+       if (rc && hal->hal_archive_id == 0) {
+               uint notrmcount = 0;
+               int rc2 = 0;
+
+               /* special case of remove requests with no archive_id specified,
+                * and no agent registered to serve all archives, then create a
+                * set of new requests, each to be sent to each registered
+                * archives.
+                * Todo so, find all HSMA_REMOVE entries, and then :
+                *     _ set completed status as SUCCESS (or FAIL?)
+                *     _ create a new LLOG record for each archive_id
+                *       presently being served by any CT
+                */
+               hai = hai_first(hal);
+               for (i = 0; i < hal->hal_count; i++,
+                    hai = hai_next(hai)) {
+                       struct hsm_record_update update;
+
+                       /* only removes are concerned */
+                       if (hai->hai_action != HSMA_REMOVE) {
+                               /* count if other actions than HSMA_REMOVE,
+                                * to return original error/rc */
+                               notrmcount++;
+                               continue;
+                       }
+
+                       /* send remove request to all registered archive_ids */
+                       rc2 = mdt_hsm_send_action_to_each_archive(mti, hai);
+                       if (rc2)
+                               break;
+
+                       /* only update original request as SUCCEED if it has
+                        * been successfully broadcasted to all available
+                        * archive_ids
+                        * XXX: this should only cause duplicates to be sent,
+                        * unless a method to record already successfully
+                        * reached archive_ids is implemented */
+
+                       update.cookie = hai->hai_cookie;
+                       update.status = ARS_SUCCEED;
+                       rc2 = mdt_agent_record_update(mti->mti_env, mdt,
+                                                     &update, 1);
+                       if (rc2) {
+                               CERROR("%s: mdt_agent_record_update() "
+                                     "failed, cannot update "
+                                     "status to %s for cookie "
+                                     "%#llx: rc = %d\n",
+                                     mdt_obd_name(mdt),
+                                     agent_req_status2name(ARS_SUCCEED),
+                                     hai->hai_cookie, rc2);
+                               break;
+                       }
+               }
+               /* only remove requests with archive_id=0 */
+               if (notrmcount == 0)
+                       RETURN(rc2);
+
+       }
+
        if (rc) {
                CERROR("%s: Cannot find agent for archive %d: rc = %d\n",
                       mdt_obd_name(mdt), hal->hal_archive_id, rc);
@@ -343,84 +457,86 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
               hal->hal_archive_id);
 
        len = hal_size(hal);
-       if (kuc_ispayload(hal)) {
-               /* hal is already a kuc payload
-                * we do not need to alloc a new one
-                * this avoid a alloc/memcpy/free
-                */
-               buf = hal;
-       } else {
-               buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
-               if (IS_ERR(buf))
-                       RETURN(PTR_ERR(buf));
-               memcpy(buf, hal, len);
-       }
+       buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
+       if (IS_ERR(buf))
+               RETURN(PTR_ERR(buf));
+       memcpy(buf, hal, len);
 
        /* Check if request is still valid (cf file hsm flags) */
        fail_request = false;
        hai = hai_first(hal);
        for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
-               if (hai->hai_action != HSMA_CANCEL) {
-                       struct mdt_object *obj;
-                       struct md_hsm hsm;
+               struct mdt_object *obj;
+               struct md_hsm hsm;
 
-                       obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
-                       if (!IS_ERR(obj) && obj != NULL) {
-                               mdt_object_put(mti->mti_env, obj);
-                       } else {
-                               if (hai->hai_action == HSMA_REMOVE)
-                                       continue;
-
-                               if (obj == NULL) {
-                                       fail_request = true;
-                                       rc = mdt_agent_record_update(
-                                                            mti->mti_env, mdt,
-                                                            &hai->hai_cookie,
-                                                            1, ARS_FAILED);
-                                       if (rc) {
-                                               CERROR(
-                                             "%s: mdt_agent_record_update() "
-                                             "failed, cannot update "
-                                             "status to %s for cookie "
-                                             LPX64": rc = %d\n",
-                                             mdt_obd_name(mdt),
-                                             agent_req_status2name(ARS_FAILED),
-                                             hai->hai_cookie, rc);
-                                               GOTO(out_buf, rc);
-                                       }
-                                       continue;
-                               }
-                               GOTO(out_buf, rc = PTR_ERR(obj));
+               if (hai->hai_action == HSMA_CANCEL)
+                       continue;
+
+               obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
+               if (!IS_ERR(obj)) {
+                       mdt_object_put(mti->mti_env, obj);
+               } else if (PTR_ERR(obj) == -ENOENT) {
+                       struct hsm_record_update update = {
+                               .cookie = hai->hai_cookie,
+                               .status = ARS_FAILED,
+                       };
+
+                       if (hai->hai_action == HSMA_REMOVE)
+                               continue;
+
+                       fail_request = true;
+                       rc = mdt_agent_record_update(mti->mti_env, mdt,
+                                                    &update, 1);
+                       if (rc < 0) {
+                               CERROR("%s: mdt_agent_record_update() failed, "
+                                      "cannot update status to %s for cookie "
+                                      "%#llx: rc = %d\n",
+                                      mdt_obd_name(mdt),
+                                      agent_req_status2name(ARS_FAILED),
+                                      hai->hai_cookie, rc);
+                               GOTO(out_buf, rc);
                        }
 
-                       if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
-                                                     hal->hal_flags, &hsm)) {
-                               /* incompatible request, we abort the request */
-                               /* next time coordinator will wake up, it will
-                                * make the same compound with valid only
-                                * records */
-                               fail_request = true;
-                               rc = mdt_agent_record_update(mti->mti_env, mdt,
-                                                            &hai->hai_cookie,
-                                                            1, ARS_FAILED);
-                               if (rc) {
-                                       CERROR("%s: mdt_agent_record_update() "
-                                             "failed, cannot update "
-                                             "status to %s for cookie "
-                                             LPX64": rc = %d\n",
-                                             mdt_obd_name(mdt),
-                                             agent_req_status2name(ARS_FAILED),
-                                             hai->hai_cookie, rc);
-                                       GOTO(out_buf, rc);
-                               }
+                       continue;
+               } else {
+                       GOTO(out_buf, rc = PTR_ERR(obj));
+               }
+
+               if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
+                                             hal->hal_flags, &hsm)) {
+                       struct hsm_record_update update = {
+                               .cookie = hai->hai_cookie,
+                               .status = ARS_FAILED,
+                       };
+
+                       /* incompatible request, we abort the request */
+                       /* next time coordinator will wake up, it will
+                        * make the same HAL with valid only
+                        * records */
+                       fail_request = true;
+                       rc = mdt_agent_record_update(mti->mti_env, mdt,
+                                                    &update, 1);
+                       if (rc) {
+                               CERROR("%s: mdt_agent_record_update() failed, "
+                                      "cannot update status to %s for cookie "
+                                      "%#llx: rc = %d\n",
+                                      mdt_obd_name(mdt),
+                                      agent_req_status2name(ARS_FAILED),
+                                      hai->hai_cookie, rc);
+                               GOTO(out_buf, rc);
                        }
+
+                       /* if restore and record status updated, give
+                        * back granted layout lock */
+                       if (hai->hai_action == HSMA_RESTORE)
+                               cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
                }
        }
 
-       /* we found incompatible requests, so the compound cannot be send
+       /* we found incompatible requests, so the HAL cannot be sent
         * as is. Bad records have been invalidated in llog.
         * Valid one will be reschedule next time coordinator will wake up
-        * So no need the rebuild a full valid compound request now
+        * So no need the rebuild a full valid HAL now
         */
        if (fail_request)
                GOTO(out_buf, rc = 0);
@@ -444,8 +560,10 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
         *  the ldlm_callback_handler. Note this sends a request RPC
         * from a server (MDT) to a client (MDC), backwards of normal comms.
         */
-       exp = cfs_hash_lookup(mdt2obd_dev(mdt)->obd_uuid_hash, &uuid);
+       exp = obd_uuid_lookup(mdt2obd_dev(mdt), &uuid);
        if (exp == NULL || exp->exp_disconnected) {
+               if (exp != NULL)
+                       class_export_put(exp);
                /* This should clean up agents on evicted exports */
                rc = -ENOENT;
                CERROR("%s: agent uuid (%s) not found, unregistering:"
@@ -486,35 +604,15 @@ out:
        }
 
 out_buf:
-       if (buf != hal)
-               kuc_free(buf, len);
-
-       RETURN(rc);
-}
-
-/**
- * update status of a request
- * \param mti [IN]
- * \param pgs [IN] progress of the copy tool
- * \retval 0 success
- * \retval -ve failure
- */
-int mdt_hsm_coordinator_update(struct mdt_thread_info *mti,
-                              struct hsm_progress_kernel *pgs)
-{
-       int      rc;
+       kuc_free(buf, len);
 
-       ENTRY;
-       /* ask to coodinator to update request state and
-        * to record on disk the result */
-       rc = mdt_hsm_update_request_state(mti, pgs, 1);
        RETURN(rc);
 }
 
 /**
- * seq_file method called to start access to /proc file
+ * seq_file method called to start access to debugfs file
  */
-static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off)
+static void *mdt_hsm_agent_debugfs_start(struct seq_file *s, loff_t *off)
 {
        struct mdt_device       *mdt = s->private;
        struct coordinator      *cdt = &mdt->mdt_coordinator;
@@ -544,7 +642,7 @@ static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off)
  * seq_file method called to get next item
  * just returns NULL at eof
  */
-static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p)
+static void *mdt_hsm_agent_debugfs_next(struct seq_file *s, void *v, loff_t *p)
 {
        struct mdt_device       *mdt = s->private;
        struct coordinator      *cdt = &mdt->mdt_coordinator;
@@ -565,7 +663,7 @@ static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p)
 
 /**
  */
-static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v)
+static int mdt_hsm_agent_debugfs_show(struct seq_file *s, void *v)
 {
        struct list_head        *pos = v;
        struct hsm_agent        *ha;
@@ -576,15 +674,16 @@ static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v)
                RETURN(0);
 
        ha = list_entry(pos, struct hsm_agent, ha_list);
-       seq_printf(s, "uuid=%s archive#=%d (", ha->ha_uuid.uuid,
-                  ha->ha_archive_cnt);
-       if (ha->ha_archive_cnt == 0)
-               seq_printf(s, "all");
-       else
-               for (i = 0; i < ha->ha_archive_cnt; i++)
-                       seq_printf(s, "%d ", ha->ha_archive_id[i]);
+       seq_printf(s, "uuid=%s archive_id=", ha->ha_uuid.uuid);
+       if (ha->ha_archive_cnt == 0) {
+               seq_printf(s, "ANY");
+       } else {
+               seq_printf(s, "%d", ha->ha_archive_id[0]);
+               for (i = 1; i < ha->ha_archive_cnt; i++)
+                       seq_printf(s, ",%d", ha->ha_archive_id[i]);
+       }
 
-       seq_printf(s, ") r=%d s=%d f=%d\n",
+       seq_printf(s, " requests=[current:%d ok:%d errors:%d]\n",
                   atomic_read(&ha->ha_requests),
                   atomic_read(&ha->ha_success),
                   atomic_read(&ha->ha_failure));
@@ -592,9 +691,9 @@ static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v)
 }
 
 /**
- * seq_file method called to stop access to /proc file
+ * seq_file method called to stop access to debugfs file
  */
-static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v)
+static void mdt_hsm_agent_debugfs_stop(struct seq_file *s, void *v)
 {
        struct mdt_device       *mdt = s->private;
        struct coordinator      *cdt = &mdt->mdt_coordinator;
@@ -602,34 +701,30 @@ static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v)
        up_read(&cdt->cdt_agent_lock);
 }
 
-/* hsm agent list proc functions */
-static const struct seq_operations mdt_hsm_agent_proc_ops = {
-       .start  = mdt_hsm_agent_proc_start,
-       .next   = mdt_hsm_agent_proc_next,
-       .show   = mdt_hsm_agent_proc_show,
-       .stop   = mdt_hsm_agent_proc_stop,
+/* hsm agent list debugfs functions */
+static const struct seq_operations mdt_hsm_agent_debugfs_ops = {
+       .start  = mdt_hsm_agent_debugfs_start,
+       .next   = mdt_hsm_agent_debugfs_next,
+       .show   = mdt_hsm_agent_debugfs_show,
+       .stop   = mdt_hsm_agent_debugfs_stop,
 };
 
 /**
- * public function called at open of /proc file to get
+ * public function called at open of debugfs file to get
  * list of agents
  */
-static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file)
+static int ldebugfs_open_hsm_agent(struct inode *inode, struct file *file)
 {
        struct seq_file *s;
        int              rc;
        ENTRY;
 
-       if (LPROCFS_ENTRY_AND_CHECK(PDE(inode)))
-                       RETURN(-ENOENT);
-
-       rc = seq_open(file, &mdt_hsm_agent_proc_ops);
-       if (rc) {
-               LPROCFS_EXIT();
+       rc = seq_open(file, &mdt_hsm_agent_debugfs_ops);
+       if (rc)
                RETURN(rc);
-       }
+
        s = file->private_data;
-       s->private = PDE(inode)->data;
+       s->private = inode->i_private;
 
        RETURN(rc);
 }
@@ -637,9 +732,8 @@ static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file)
 /* methods to access hsm agent list */
 const struct file_operations mdt_hsm_agent_fops = {
        .owner          = THIS_MODULE,
-       .open           = lprocfs_open_hsm_agent,
+       .open           = ldebugfs_open_hsm_agent,
        .read           = seq_read,
        .llseek         = seq_lseek,
-       .release        = lprocfs_seq_release,
+       .release        = seq_release,
 };
-