X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_hsm_cdt_agent.c;h=1767e112bdf11a1b931a6d837dd2ac1ff919ad6c;hb=2eaa49ef0f16798d564883b16cea9e96fad52495;hp=9a9ce6d7b8cdddeb78c30ef6a88fc4b67805b807;hpb=c54e61cef76eb0e00c9e69729cf4ac8578c90f47;p=fs%2Flustre-release.git

diff --git a/lustre/mdt/mdt_hsm_cdt_agent.c b/lustre/mdt/mdt_hsm_cdt_agent.c
index 9a9ce6d..1767e11 100644
--- a/lustre/mdt/mdt_hsm_cdt_agent.c
+++ b/lustre/mdt/mdt_hsm_cdt_agent.c
@@ -21,6 +21,8 @@
  */
 /*
  * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
+ *
+ * Copyright (c) 2016, 2017, Intel Corporation.
  *     alternatives
  *
  */
@@ -38,8 +40,8 @@
 #include <obd.h>
 #include <obd_support.h>
 #include <lustre_export.h>
-#include <lustre/lustre_user.h>
 #include <lprocfs_status.h>
+#include <lustre_kernelcomm.h>
 #include "mdt_internal.h"
 
 /*
@@ -48,7 +50,7 @@
 
 /*
  * find a hsm_agent by uuid
- * lock cdt_agent_lock needs to be hold by caller
+ * lock cdt_agent_lock needs to be held by caller
  * \param cdt [IN] coordinator
  * \param uuid [IN] agent UUID
  * \retval hsm_agent pointer or NULL if not found
@@ -133,11 +135,14 @@ int mdt_hsm_agent_register(struct mdt_thread_info *mti,
 out_free:
 
 	if (ha != NULL && ha->ha_archive_id != NULL)
-		OBD_FREE(ha->ha_archive_id,
-			 ha->ha_archive_cnt * sizeof(*ha->ha_archive_id));
+		OBD_FREE_PTR_ARRAY(ha->ha_archive_id, ha->ha_archive_cnt);
 	if (ha != NULL)
 		OBD_FREE_PTR(ha);
 out:
+	/* wake the coordinator to potentially schedule requests */
+	if (rc == -EEXIST || rc == 0)
+		mdt_hsm_cdt_event(cdt);
+
 	return rc;
 }
 
@@ -159,13 +164,13 @@ int mdt_hsm_agent_register_mask(struct mdt_thread_info *mti,
 	nr_archives = hweight32(archive_mask);
 
 	if (nr_archives != 0) {
-		OBD_ALLOC(archive_id, nr_archives * sizeof(*archive_id));
+		OBD_ALLOC_PTR_ARRAY(archive_id, nr_archives);
 		if (!archive_id)
 			RETURN(-ENOMEM);
 
 		nr_archives = 0;
 		for (i = 0; i < sizeof(archive_mask) * 8; i++) {
-			if ((1 << i) & archive_mask) {
+			if (BIT(i) & archive_mask) {
 				archive_id[nr_archives] = i + 1;
 				nr_archives++;
 			}
@@ -175,7 +180,7 @@ int mdt_hsm_agent_register_mask(struct mdt_thread_info *mti,
 	rc = mdt_hsm_agent_register(mti, uuid, nr_archives, archive_id);
 
 	if (archive_id != NULL)
-		OBD_FREE(archive_id, nr_archives * sizeof(*archive_id));
+		OBD_FREE_PTR_ARRAY(archive_id, nr_archives);
 
 	RETURN(rc);
 }
@@ -211,8 +216,7 @@ int mdt_hsm_agent_unregister(struct mdt_thread_info *mti,
 		GOTO(out, rc = -ENOENT);
 
 	if (ha->ha_archive_cnt != 0)
-		OBD_FREE(ha->ha_archive_id,
-			 ha->ha_archive_cnt * sizeof(*ha->ha_archive_id));
+		OBD_FREE_PTR_ARRAY(ha->ha_archive_id, ha->ha_archive_cnt);
 	OBD_FREE_PTR(ha);
 
 	GOTO(out, rc = 0);
@@ -305,8 +309,57 @@ int mdt_hsm_find_best_agent(struct coordinator *cdt, __u32 archive,
 	RETURN(rc);
 }
 
+int mdt_hsm_send_action_to_each_archive(struct mdt_thread_info *mti,
+				    struct hsm_action_item *hai)
+{
+	struct hsm_agent *ha;
+	__u32 archive_mask = 0;
+	struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
+	int i;
+	/* return error by default in case all archive_ids have unregistered */
+	int rc = -EAGAIN;
+	ENTRY;
+
+	/* send action to all registered archive_ids */
+	down_read(&cdt->cdt_agent_lock);
+	list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
+		for (i = 0; (i < ha->ha_archive_cnt); i++) {
+			/* only send once for each archive_id */
+			if (BIT(ha->ha_archive_id[i]) & archive_mask)
+				continue;
+			archive_mask |= BIT(ha->ha_archive_id[i]);
+
+			/* XXX: it could make sense to gather all
+			 * actions for the same archive_id like in
+			 * mdt_hsm_add_actions() ?? */
+			rc = mdt_agent_record_add(mti->mti_env, mti->mti_mdt,
+						  ha->ha_archive_id[i], 0,
+						  hai);
+			if (rc) {
+				CERROR("%s: unable to add HSM remove request "
+				       "for "DFID": rc=%d\n",
+				       mdt_obd_name(mti->mti_mdt),
+				       PFID(&hai->hai_fid), rc);
+				break;
+			} else {
+				CDEBUG(D_HSM, "%s: added HSM remove request "
+				       "for "DFID", archive_id=%d\n",
+				       mdt_obd_name(mti->mti_mdt),
+				       PFID(&hai->hai_fid),
+				       ha->ha_archive_id[i]);
+			}
+		}
+		/* early exit from loop due to error? */
+		if (i != ha->ha_archive_cnt)
+			break;
+	}
+	up_read(&cdt->cdt_agent_lock);
+
+	RETURN(rc);
+}
+
 /**
- * send a compound request to the agent
+ * send a HAL to the agent
  * \param mti [IN] context
  * \param hal [IN] request (can be a kuc payload)
  * \param purge [IN] purge mode (no record)
@@ -333,6 +386,65 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
 	ENTRY;
 
 	rc = mdt_hsm_find_best_agent(cdt, hal->hal_archive_id, &uuid);
+	if (rc && hal->hal_archive_id == 0) {
+		uint notrmcount = 0;
+		int rc2 = 0;
+
+		/* special case of remove requests with no archive_id specified,
+		 * and no agent registered to serve all archives, then create a
+		 * set of new requests, each to be sent to each registered
+		 * archives.
+		 * Todo so, find all HSMA_REMOVE entries, and then :
+		 *     _ set completed status as SUCCESS (or FAIL?)
+		 *     _ create a new LLOG record for each archive_id
+		 *       presently being served by any CT
+		 */
+		hai = hai_first(hal);
+		for (i = 0; i < hal->hal_count; i++,
+		     hai = hai_next(hai)) {
+			struct hsm_record_update update;
+
+			/* only removes are concerned */
+			if (hai->hai_action != HSMA_REMOVE) {
+				/* count if other actions than HSMA_REMOVE,
+				 * to return original error/rc */
+				notrmcount++;
+				continue;
+			}
+
+			/* send remove request to all registered archive_ids */
+			rc2 = mdt_hsm_send_action_to_each_archive(mti, hai);
+			if (rc2)
+				break;
+
+			/* only update original request as SUCCEED if it has
+			 * been successfully broadcasted to all available
+			 * archive_ids
+			 * XXX: this should only cause duplicates to be sent,
+			 * unless a method to record already successfully
+			 * reached archive_ids is implemented */
+
+			update.cookie = hai->hai_cookie;
+			update.status = ARS_SUCCEED;
+			rc2 = mdt_agent_record_update(mti->mti_env, mdt,
+						      &update, 1);
+			if (rc2) {
+				CERROR("%s: mdt_agent_record_update() "
+				      "failed, cannot update "
+				      "status to %s for cookie "
+				      "%#llx: rc = %d\n",
+				      mdt_obd_name(mdt),
+				      agent_req_status2name(ARS_SUCCEED),
+				      hai->hai_cookie, rc2);
+				break;
+			}
+		}
+		/* only remove requests with archive_id=0 */
+		if (notrmcount == 0)
+			RETURN(rc2);
+
+	}
+
 	if (rc) {
 		CERROR("%s: Cannot find agent for archive %d: rc = %d\n",
 		       mdt_obd_name(mdt), hal->hal_archive_id, rc);
@@ -343,84 +455,86 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
 	       hal->hal_archive_id);
 
 	len = hal_size(hal);
-	if (kuc_ispayload(hal)) {
-		/* hal is already a kuc payload
-		 * we do not need to alloc a new one
-		 * this avoid a alloc/memcpy/free
-		 */
-		buf = hal;
-	} else {
-		buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
-		if (IS_ERR(buf))
-			RETURN(PTR_ERR(buf));
-		memcpy(buf, hal, len);
-	}
+	buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
+	if (IS_ERR(buf))
+		RETURN(PTR_ERR(buf));
+	memcpy(buf, hal, len);
 
 	/* Check if request is still valid (cf file hsm flags) */
 	fail_request = false;
 	hai = hai_first(hal);
 	for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
-		if (hai->hai_action != HSMA_CANCEL) {
-			struct mdt_object *obj;
-			struct md_hsm hsm;
+		struct mdt_object *obj;
+		struct md_hsm hsm;
 
-			obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
-			if (!IS_ERR(obj) && obj != NULL) {
-				mdt_object_put(mti->mti_env, obj);
-			} else {
-				if (hai->hai_action == HSMA_REMOVE)
-					continue;
-
-				if (obj == NULL) {
-					fail_request = true;
-					rc = mdt_agent_record_update(
-							     mti->mti_env, mdt,
-							     &hai->hai_cookie,
-							     1, ARS_FAILED);
-					if (rc) {
-						CERROR(
-					      "%s: mdt_agent_record_update() "
-					      "failed, cannot update "
-					      "status to %s for cookie "
-					      LPX64": rc = %d\n",
-					      mdt_obd_name(mdt),
-					      agent_req_status2name(ARS_FAILED),
-					      hai->hai_cookie, rc);
-						GOTO(out_buf, rc);
-					}
-					continue;
-				}
-				GOTO(out_buf, rc = PTR_ERR(obj));
+		if (hai->hai_action == HSMA_CANCEL)
+			continue;
+
+		obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
+		if (!IS_ERR(obj)) {
+			mdt_object_put(mti->mti_env, obj);
+		} else if (PTR_ERR(obj) == -ENOENT) {
+			struct hsm_record_update update = {
+				.cookie = hai->hai_cookie,
+				.status = ARS_FAILED,
+			};
+
+			if (hai->hai_action == HSMA_REMOVE)
+				continue;
+
+			fail_request = true;
+			rc = mdt_agent_record_update(mti->mti_env, mdt,
+						     &update, 1);
+			if (rc < 0) {
+				CERROR("%s: mdt_agent_record_update() failed, "
+				       "cannot update status to %s for cookie "
+				       "%#llx: rc = %d\n",
+				       mdt_obd_name(mdt),
+				       agent_req_status2name(ARS_FAILED),
+				       hai->hai_cookie, rc);
+				GOTO(out_buf, rc);
 			}
 
-			if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
-						      hal->hal_flags, &hsm)) {
-				/* incompatible request, we abort the request */
-				/* next time coordinator will wake up, it will
-				 * make the same compound with valid only
-				 * records */
-				fail_request = true;
-				rc = mdt_agent_record_update(mti->mti_env, mdt,
-							     &hai->hai_cookie,
-							     1, ARS_FAILED);
-				if (rc) {
-					CERROR("%s: mdt_agent_record_update() "
-					      "failed, cannot update "
-					      "status to %s for cookie "
-					      LPX64": rc = %d\n",
-					      mdt_obd_name(mdt),
-					      agent_req_status2name(ARS_FAILED),
-					      hai->hai_cookie, rc);
-					GOTO(out_buf, rc);
-				}
+			continue;
+		} else {
+			GOTO(out_buf, rc = PTR_ERR(obj));
+		}
+
+		if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
+					      hal->hal_flags, &hsm)) {
+			struct hsm_record_update update = {
+				.cookie = hai->hai_cookie,
+				.status = ARS_FAILED,
+			};
+
+			/* incompatible request, we abort the request */
+			/* next time coordinator will wake up, it will
+			 * make the same HAL with valid only
+			 * records */
+			fail_request = true;
+			rc = mdt_agent_record_update(mti->mti_env, mdt,
+						     &update, 1);
+			if (rc) {
+				CERROR("%s: mdt_agent_record_update() failed, "
+				       "cannot update status to %s for cookie "
+				       "%#llx: rc = %d\n",
+				       mdt_obd_name(mdt),
+				       agent_req_status2name(ARS_FAILED),
+				       hai->hai_cookie, rc);
+				GOTO(out_buf, rc);
 			}
+
+			/* if restore and record status updated, give
+			 * back granted layout lock */
+			if (hai->hai_action == HSMA_RESTORE)
+				cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
 		}
 	}
 
-	/* we found incompatible requests, so the compound cannot be send
+	/* we found incompatible requests, so the HAL cannot be sent
 	 * as is. Bad records have been invalidated in llog.
 	 * Valid one will be reschedule next time coordinator will wake up
-	 * So no need the rebuild a full valid compound request now
+	 * So no need the rebuild a full valid HAL now
 	 */
 	if (fail_request)
 		GOTO(out_buf, rc = 0);
@@ -444,8 +558,10 @@ int mdt_hsm_agent_send(struct mdt_thread_info *mti,
 	 *  the ldlm_callback_handler. Note this sends a request RPC
 	 * from a server (MDT) to a client (MDC), backwards of normal comms.
 	 */
-	exp = cfs_hash_lookup(mdt2obd_dev(mdt)->obd_uuid_hash, &uuid);
+	exp = obd_uuid_lookup(mdt2obd_dev(mdt), &uuid);
 	if (exp == NULL || exp->exp_disconnected) {
+		if (exp != NULL)
+			class_export_put(exp);
 		/* This should clean up agents on evicted exports */
 		rc = -ENOENT;
 		CERROR("%s: agent uuid (%s) not found, unregistering:"
@@ -486,35 +602,15 @@ out:
 	}
 
 out_buf:
-	if (buf != hal)
-		kuc_free(buf, len);
+	kuc_free(buf, len);
 
 	RETURN(rc);
 }
 
 /**
- * update status of a request
- * \param mti [IN]
- * \param pgs [IN] progress of the copy tool
- * \retval 0 success
- * \retval -ve failure
+ * seq_file method called to start access to debugfs file
  */
-int mdt_hsm_coordinator_update(struct mdt_thread_info *mti,
-			       struct hsm_progress_kernel *pgs)
-{
-	int      rc;
-
-	ENTRY;
-	/* ask to coodinator to update request state and
-	 * to record on disk the result */
-	rc = mdt_hsm_update_request_state(mti, pgs, 1);
-	RETURN(rc);
-}
-
-/**
- * seq_file method called to start access to /proc file
- */
-static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off)
+static void *mdt_hsm_agent_debugfs_start(struct seq_file *s, loff_t *off)
 {
 	struct mdt_device	*mdt = s->private;
 	struct coordinator	*cdt = &mdt->mdt_coordinator;
@@ -544,7 +640,7 @@ static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off)
  * seq_file method called to get next item
  * just returns NULL at eof
  */
-static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p)
+static void *mdt_hsm_agent_debugfs_next(struct seq_file *s, void *v, loff_t *p)
 {
 	struct mdt_device	*mdt = s->private;
 	struct coordinator	*cdt = &mdt->mdt_coordinator;
@@ -565,7 +661,7 @@ static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p)
 
 /**
  */
-static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v)
+static int mdt_hsm_agent_debugfs_show(struct seq_file *s, void *v)
 {
 	struct list_head	*pos = v;
 	struct hsm_agent	*ha;
@@ -593,9 +689,9 @@ static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v)
 }
 
 /**
- * seq_file method called to stop access to /proc file
+ * seq_file method called to stop access to debugfs file
  */
-static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v)
+static void mdt_hsm_agent_debugfs_stop(struct seq_file *s, void *v)
 {
 	struct mdt_device	*mdt = s->private;
 	struct coordinator	*cdt = &mdt->mdt_coordinator;
@@ -603,33 +699,30 @@ static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v)
 	up_read(&cdt->cdt_agent_lock);
 }
 
-/* hsm agent list proc functions */
-static const struct seq_operations mdt_hsm_agent_proc_ops = {
-	.start	= mdt_hsm_agent_proc_start,
-	.next	= mdt_hsm_agent_proc_next,
-	.show	= mdt_hsm_agent_proc_show,
-	.stop	= mdt_hsm_agent_proc_stop,
+/* hsm agent list debugfs functions */
+static const struct seq_operations mdt_hsm_agent_debugfs_ops = {
+	.start	= mdt_hsm_agent_debugfs_start,
+	.next	= mdt_hsm_agent_debugfs_next,
+	.show	= mdt_hsm_agent_debugfs_show,
+	.stop	= mdt_hsm_agent_debugfs_stop,
 };
 
 /**
- * public function called at open of /proc file to get
+ * public function called at open of debugfs file to get
  * list of agents
  */
-static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file)
+static int ldebugfs_open_hsm_agent(struct inode *inode, struct file *file)
 {
 	struct seq_file	*s;
 	int		 rc;
 	ENTRY;
 
-	if (LPROCFS_ENTRY_CHECK(PDE(inode)))
-		RETURN(-ENOENT);
-
-	rc = seq_open(file, &mdt_hsm_agent_proc_ops);
+	rc = seq_open(file, &mdt_hsm_agent_debugfs_ops);
 	if (rc)
 		RETURN(rc);
 
 	s = file->private_data;
-	s->private = PDE(inode)->data;
+	s->private = inode->i_private;
 
 	RETURN(rc);
 }
@@ -637,9 +730,8 @@ static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file)
 /* methods to access hsm agent list */
 const struct file_operations mdt_hsm_agent_fops = {
 	.owner		= THIS_MODULE,
-	.open		= lprocfs_open_hsm_agent,
+	.open		= ldebugfs_open_hsm_agent,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= lprocfs_seq_release,
+	.release	= seq_release,
 };
-