X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_coordinator.c;h=536320ececb6645e4be141821ffcb2c546e0db04;hp=c74cf10ebca9d9dbd4dd8384e497975677eb2369;hb=9811edb26d3bff66c61acc39a72554461445298f;hpb=91144acb3dc1120c00797269afa621c94cb64e1e

diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c
index c74cf10..536320e 100644
--- a/lustre/mdt/mdt_coordinator.c
+++ b/lustre/mdt/mdt_coordinator.c
@@ -23,7 +23,7 @@
  * Copyright (c) 2011, 2012 Commissariat a l'energie atomique et aux energies
  *                          alternatives
  *
- * Copyright (c) 2013, 2014, Intel Corporation.
+ * Copyright (c) 2013, 2016, Intel Corporation.
  * Use is subject to license terms.
  */
 /*
@@ -102,8 +102,8 @@ void mdt_hsm_dump_hal(int level, const char *prefix,
 	struct hsm_action_item	*hai;
 	char			 buf[12];
 
-	CDEBUG(level, "%s: HAL header: version %X count %d compound "LPX64
-		      " archive_id %d flags "LPX64"\n",
+	CDEBUG(level, "%s: HAL header: version %X count %d compound %#llx"
+		      " archive_id %d flags %#llx\n",
 	       prefix, hal->hal_version, hal->hal_count,
 	       hal->hal_compound_id, hal->hal_archive_id, hal->hal_flags);
 
@@ -111,8 +111,8 @@ void mdt_hsm_dump_hal(int level, const char *prefix,
 	for (i = 0; i < hal->hal_count; i++) {
 		sz = hai->hai_len - sizeof(*hai);
 		CDEBUG(level, "%s %d: fid="DFID" dfid="DFID
-		       " compound/cookie="LPX64"/"LPX64
-		       " action=%s extent="LPX64"-"LPX64" gid="LPX64
+		       " compound/cookie=%#llx/%#llx"
+		       " action=%s extent=%#llx-%#llx gid=%#llx"
 		       " datalen=%d data=[%s]\n",
 		       prefix, i,
 		       PFID(&hai->hai_fid), PFID(&hai->hai_dfid),
@@ -130,18 +130,19 @@ void mdt_hsm_dump_hal(int level, const char *prefix,
  * data passed to llog_cat_process() callback
  * to scan requests and take actions
  */
+struct hsm_scan_request {
+	int			 hal_sz;
+	int			 hal_used_sz;
+	struct hsm_action_list	*hal;
+};
+
 struct hsm_scan_data {
 	struct mdt_thread_info		*mti;
 	char				 fs_name[MTI_NAME_MAXLEN+1];
 	/* request to be send to agents */
-	int				 request_sz;	/** allocated size */
 	int				 max_requests;	/** vector size */
 	int				 request_cnt;	/** used count */
-	struct {
-		int			 hal_sz;
-		int			 hal_used_sz;
-		struct hsm_action_list	*hal;
-	} *request;
+	struct hsm_scan_request		*request;
 };
 
 /**
@@ -176,55 +177,47 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 	dump_llog_agent_req_rec("mdt_coordinator_cb(): ", larr);
 	switch (larr->arr_status) {
 	case ARS_WAITING: {
-		int i, empty_slot, found;
+		int i;
+		struct hsm_scan_request *request;
 
 		/* Are agents full? */
-		if (atomic_read(&cdt->cdt_request_count) ==
+		if (atomic_read(&cdt->cdt_request_count) >=
 		    cdt->cdt_max_requests)
 			break;
 
-		/* first search whether the request is found in the list we
-		 * have built and if there is room in the request vector */
-		empty_slot = -1;
-		found = -1;
-		for (i = 0; i < hsd->max_requests &&
-			    (empty_slot == -1 || found == -1); i++) {
-			if (hsd->request[i].hal == NULL) {
-				empty_slot = i;
-				continue;
-			}
+		/* first search whether the request is found in the
+		 * list we have built. */
+		request = NULL;
+		for (i = 0; i < hsd->request_cnt; i++) {
 			if (hsd->request[i].hal->hal_compound_id ==
-				larr->arr_compound_id) {
-				found = i;
-				continue;
+			    larr->arr_compound_id) {
+				request = &hsd->request[i];
+				break;
 			}
 		}
-		if (found == -1 && empty_slot == -1)
-			/* unknown request and no more room for new request,
-			 * continue scan for to find other entries for
-			 * already found request
-			 */
-			RETURN(0);
 
-		if (found == -1) {
+		if (!request) {
 			struct hsm_action_list *hal;
 
-			/* request is not already known */
+			if (hsd->request_cnt == hsd->max_requests)
+				/* Unknown request and no more room
+				 * for a new request. Continue to scan
+				 * to find other entries for already
+				 * existing requests.
+				 */
+				RETURN(0);
+
+			request = &hsd->request[hsd->request_cnt];
+
 			/* allocates hai vector size just needs to be large
 			 * enough */
-			hsd->request[empty_slot].hal_sz =
-				     sizeof(*hsd->request[empty_slot].hal) +
-				     cfs_size_round(MTI_NAME_MAXLEN+1) +
-				     2 * cfs_size_round(larr->arr_hai.hai_len);
-			OBD_ALLOC(hal, hsd->request[empty_slot].hal_sz);
-			if (!hal) {
-				CERROR("%s: Cannot allocate memory (%d o)"
-				       "for compound "LPX64"\n",
-				       mdt_obd_name(mdt),
-				       hsd->request[i].hal_sz,
-				       larr->arr_compound_id);
+			request->hal_sz =
+				sizeof(*request->hal) +
+				cfs_size_round(MTI_NAME_MAXLEN+1) +
+				2 * cfs_size_round(larr->arr_hai.hai_len);
+			OBD_ALLOC(hal, request->hal_sz);
+			if (!hal)
 				RETURN(-ENOMEM);
-			}
 			hal->hal_version = HAL_VERSION;
 			strlcpy(hal->hal_fsname, hsd->fs_name,
 				MTI_NAME_MAXLEN + 1);
@@ -232,10 +225,9 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 			hal->hal_archive_id = larr->arr_archive_id;
 			hal->hal_flags = larr->arr_flags;
 			hal->hal_count = 0;
-			hsd->request[empty_slot].hal_used_sz = hal_size(hal);
-			hsd->request[empty_slot].hal = hal;
+			request->hal_used_sz = hal_size(hal);
+			request->hal = hal;
 			hsd->request_cnt++;
-			found = empty_slot;
 			hai = hai_first(hal);
 		} else {
 			/* request is known */
@@ -247,44 +239,37 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 			 * where the files are not archived in the same backend
 			 */
 			if (larr->arr_archive_id !=
-			    hsd->request[found].hal->hal_archive_id)
+			    request->hal->hal_archive_id)
 				RETURN(0);
 
-			if (hsd->request[found].hal_sz <
-			    hsd->request[found].hal_used_sz +
-			     cfs_size_round(larr->arr_hai.hai_len)) {
+			if (request->hal_sz <
+			    request->hal_used_sz +
+			    cfs_size_round(larr->arr_hai.hai_len)) {
 				/* Not enough room, need an extension */
 				void *hal_buffer;
 				int sz;
 
-				sz = 2 * hsd->request[found].hal_sz;
+				sz = 2 * request->hal_sz;
 				OBD_ALLOC(hal_buffer, sz);
-				if (!hal_buffer) {
-					CERROR("%s: Cannot allocate memory "
-					       "(%d o) for compound "LPX64"\n",
-					       mdt_obd_name(mdt), sz,
-					       larr->arr_compound_id);
+				if (!hal_buffer)
 					RETURN(-ENOMEM);
-				}
-				memcpy(hal_buffer, hsd->request[found].hal,
-				       hsd->request[found].hal_used_sz);
-				OBD_FREE(hsd->request[found].hal,
-					 hsd->request[found].hal_sz);
-				hsd->request[found].hal = hal_buffer;
-				hsd->request[found].hal_sz = sz;
+				memcpy(hal_buffer, request->hal,
+				       request->hal_used_sz);
+				OBD_FREE(request->hal,
+					 request->hal_sz);
+				request->hal = hal_buffer;
+				request->hal_sz = sz;
 			}
-			hai = hai_first(hsd->request[found].hal);
-			for (i = 0; i < hsd->request[found].hal->hal_count;
-			     i++)
+			hai = hai_first(request->hal);
+			for (i = 0; i < request->hal->hal_count; i++)
 				hai = hai_next(hai);
 		}
 		memcpy(hai, &larr->arr_hai, larr->arr_hai.hai_len);
 		hai->hai_cookie = larr->arr_hai.hai_cookie;
 		hai->hai_gid = larr->arr_hai.hai_gid;
 
-		hsd->request[found].hal_used_sz +=
-						   cfs_size_round(hai->hai_len);
-		hsd->request[found].hal->hal_count++;
+		request->hal_used_sz += cfs_size_round(hai->hai_len);
+		request->hal->hal_count++;
 		break;
 	}
 	case ARS_STARTED: {
@@ -328,7 +313,7 @@ static int mdt_coordinator_cb(const struct lu_env *env,
 		rc = mdt_hsm_update_request_state(hsd->mti, &pgs, 0);
 		if (rc)
 			CERROR("%s: cannot cleanup timed out request: "
-			       DFID" for cookie "LPX64" action=%s\n",
+			       DFID" for cookie %#llx action=%s\n",
 			       mdt_obd_name(mdt),
 			       PFID(&pgs.hpk_fid), pgs.hpk_cookie,
 			       hsm_copytool_action2name(
@@ -429,10 +414,11 @@ static int mdt_coordinator(void *data)
 	struct coordinator	*cdt = &mdt->mdt_coordinator;
 	struct hsm_scan_data	 hsd = { NULL };
 	int			 rc = 0;
+	int			 request_sz;
 	ENTRY;
 
-	cdt->cdt_thread.t_flags = SVC_RUNNING;
-	wake_up(&cdt->cdt_thread.t_ctl_waitq);
+	cdt->cdt_flags = SVC_RUNNING;
+	wake_up(&cdt->cdt_waitq);
 
 	CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
 	       mdt_obd_name(mdt), current_pid());
@@ -442,8 +428,8 @@ static int mdt_coordinator(void *data)
 	 * hsd.request[] vector
 	 */
 	hsd.max_requests = cdt->cdt_max_requests;
-	hsd.request_sz = hsd.max_requests * sizeof(*hsd.request);
-	OBD_ALLOC(hsd.request, hsd.request_sz);
+	request_sz = hsd.max_requests * sizeof(*hsd.request);
+	OBD_ALLOC(hsd.request, request_sz);
 	if (!hsd.request)
 		GOTO(out, rc = -ENOMEM);
 
@@ -456,23 +442,22 @@ static int mdt_coordinator(void *data)
 
 		lwi = LWI_TIMEOUT(cfs_time_seconds(cdt->cdt_loop_period),
 				  NULL, NULL);
-		l_wait_event(cdt->cdt_thread.t_ctl_waitq,
-			     (cdt->cdt_thread.t_flags &
-			      (SVC_STOPPING|SVC_EVENT)),
+		l_wait_event(cdt->cdt_waitq,
+			     cdt->cdt_flags & (SVC_STOPPING|SVC_EVENT),
 			     &lwi);
 
 		CDEBUG(D_HSM, "coordinator resumes\n");
 
-		if (cdt->cdt_thread.t_flags & SVC_STOPPING ||
+		if (cdt->cdt_flags & SVC_STOPPING ||
 		    cdt->cdt_state == CDT_STOPPING) {
-			cdt->cdt_thread.t_flags &= ~SVC_STOPPING;
+			cdt->cdt_flags &= ~SVC_STOPPING;
 			rc = 0;
 			break;
 		}
 
 		/* wake up before timeout, new work arrives */
-		if (cdt->cdt_thread.t_flags & SVC_EVENT)
-			cdt->cdt_thread.t_flags &= ~SVC_EVENT;
+		if (cdt->cdt_flags & SVC_EVENT)
+			cdt->cdt_flags &= ~SVC_EVENT;
 
 		/* if coordinator is suspended continue to wait */
 		if (cdt->cdt_state == CDT_DISABLE) {
@@ -486,11 +471,10 @@ static int mdt_coordinator(void *data)
 			/* cdt_max_requests has changed,
 			 * we need to allocate a new buffer
 			 */
-			OBD_FREE(hsd.request, hsd.request_sz);
+			OBD_FREE(hsd.request, request_sz);
 			hsd.max_requests = cdt->cdt_max_requests;
-			hsd.request_sz =
-				   hsd.max_requests * sizeof(*hsd.request);
-			OBD_ALLOC(hsd.request, hsd.request_sz);
+			request_sz = hsd.max_requests * sizeof(*hsd.request);
+			OBD_ALLOC(hsd.request, request_sz);
 			if (!hsd.request) {
 				rc = -ENOMEM;
 				break;
@@ -513,38 +497,19 @@ static int mdt_coordinator(void *data)
 		}
 
 		/* here hsd contains a list of requests to be started */
-		for (i = 0; i < hsd.max_requests; i++) {
-			struct hsm_action_list	*hal;
+		for (i = 0; i < hsd.request_cnt; i++) {
+			struct hsm_scan_request *request = &hsd.request[i];
+			struct hsm_action_list	*hal = request->hal;
 			struct hsm_action_item	*hai;
 			__u64			*cookies;
 			int			 sz, j;
 			enum agent_req_status	 status;
 
 			/* still room for work ? */
-			if (atomic_read(&cdt->cdt_request_count) ==
+			if (atomic_read(&cdt->cdt_request_count) >=
 			    cdt->cdt_max_requests)
 				break;
 
-			if (hsd.request[i].hal == NULL)
-				continue;
-
-			/* found a request, we start it */
-			/* kuc payload allocation so we avoid an additionnal
-			 * allocation in mdt_hsm_agent_send()
-			 */
-			hal = kuc_alloc(hsd.request[i].hal_used_sz,
-					KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
-			if (IS_ERR(hal)) {
-				CERROR("%s: Cannot allocate memory (%d o) "
-				       "for compound "LPX64"\n",
-				       mdt_obd_name(mdt),
-				       hsd.request[i].hal_used_sz,
-				       hsd.request[i].hal->hal_compound_id);
-				continue;
-			}
-			memcpy(hal, hsd.request[i].hal,
-			       hsd.request[i].hal_used_sz);
-
 			rc = mdt_hsm_agent_send(mti, hal, 0);
 			/* if failure, we suppose it is temporary
 			 * if the copy tool failed to do the request
@@ -555,56 +520,41 @@ static int mdt_coordinator(void *data)
 			/* set up cookie vector to set records status
 			 * after copy tools start or failed
 			 */
-			sz = hsd.request[i].hal->hal_count * sizeof(__u64);
+			sz = hal->hal_count * sizeof(__u64);
 			OBD_ALLOC(cookies, sz);
-			if (cookies == NULL) {
-				CERROR("%s: Cannot allocate memory (%d o) "
-				       "for cookies vector "LPX64"\n",
-				       mdt_obd_name(mdt), sz,
-				       hsd.request[i].hal->hal_compound_id);
-				kuc_free(hal, hsd.request[i].hal_used_sz);
+			if (cookies == NULL)
 				continue;
-			}
+
 			hai = hai_first(hal);
-			for (j = 0; j < hsd.request[i].hal->hal_count; j++) {
+			for (j = 0; j < hal->hal_count; j++) {
 				cookies[j] = hai->hai_cookie;
 				hai = hai_next(hai);
 			}
 
 			rc = mdt_agent_record_update(mti->mti_env, mdt, cookies,
-						hsd.request[i].hal->hal_count,
-						status);
+						     hal->hal_count, status);
 			if (rc)
 				CERROR("%s: mdt_agent_record_update() failed, "
 				       "rc=%d, cannot update status to %s "
 				       "for %d cookies\n",
 				       mdt_obd_name(mdt), rc,
 				       agent_req_status2name(status),
-				       hsd.request[i].hal->hal_count);
+				       hal->hal_count);
 
 			OBD_FREE(cookies, sz);
-			kuc_free(hal, hsd.request[i].hal_used_sz);
 		}
 clean_cb_alloc:
 		/* free hal allocated by callback */
-		for (i = 0; i < hsd.max_requests; i++) {
-			if (hsd.request[i].hal) {
-				OBD_FREE(hsd.request[i].hal,
-					 hsd.request[i].hal_sz);
-				hsd.request[i].hal_sz = 0;
-				hsd.request[i].hal = NULL;
-				hsd.request_cnt--;
-			}
-		}
-		LASSERT(hsd.request_cnt == 0);
+		for (i = 0; i < hsd.request_cnt; i++) {
+			struct hsm_scan_request *request = &hsd.request[i];
 
-		/* reset callback data */
-		memset(hsd.request, 0, hsd.request_sz);
+			OBD_FREE(request->hal, request->hal_sz);
+		}
 	}
 	EXIT;
 out:
 	if (hsd.request)
-		OBD_FREE(hsd.request, hsd.request_sz);
+		OBD_FREE(hsd.request, request_sz);
 
 	if (cdt->cdt_state == CDT_STOPPING) {
 		/* request comes from /proc path, so we need to clean cdt
@@ -616,8 +566,8 @@ out:
 		 * by mdt_stop_coordinator(), we have to ack
 		 * and cdt cleaning will be done by event sender
 		 */
-		cdt->cdt_thread.t_flags = SVC_STOPPED;
-		wake_up(&cdt->cdt_thread.t_ctl_waitq);
+		cdt->cdt_flags = SVC_STOPPED;
+		wake_up(&cdt->cdt_waitq);
 	}
 
 	if (rc != 0)
@@ -639,7 +589,7 @@ out:
  * \retval cdt_restore_handle found
  * \retval NULL not found
  */
-static struct cdt_restore_handle *hsm_restore_hdl_find(struct coordinator *cdt,
+struct cdt_restore_handle *mdt_hsm_restore_hdl_find(struct coordinator *cdt,
 						       const struct lu_fid *fid)
 {
 	struct cdt_restore_handle	*crh;
@@ -700,6 +650,17 @@ static int hsm_restore_cb(const struct lu_env *env,
 
 	/* restore request not in a final state */
 
+	/* force replay of restore requests left in started state from previous
+	 * CDT context, to be canceled later if finally found to be incompatible
+	 * when being re-started */
+	if (larr->arr_status == ARS_STARTED) {
+		larr->arr_status = ARS_WAITING;
+		larr->arr_req_change = cfs_time_current_sec();
+		rc = llog_write(env, llh, hdr, hdr->lrh_index);
+		if (rc != 0)
+			GOTO(out, rc);
+	}
+
 	OBD_SLAB_ALLOC_PTR(crh, mdt_hsm_cdt_kmem);
 	if (crh == NULL)
 		RETURN(-ENOMEM);
@@ -788,8 +749,8 @@ int mdt_hsm_cdt_wakeup(struct mdt_device *mdt)
 		RETURN(-ESRCH);
 
 	/* wake up coordinator */
-	cdt->cdt_thread.t_flags = SVC_EVENT;
-	wake_up(&cdt->cdt_thread.t_ctl_waitq);
+	cdt->cdt_flags = SVC_EVENT;
+	wake_up(&cdt->cdt_waitq);
 
 	RETURN(0);
 }
@@ -809,7 +770,7 @@ int mdt_hsm_cdt_init(struct mdt_device *mdt)
 
 	cdt->cdt_state = CDT_STOPPED;
 
-	init_waitqueue_head(&cdt->cdt_thread.t_ctl_waitq);
+	init_waitqueue_head(&cdt->cdt_waitq);
 	mutex_init(&cdt->cdt_llog_lock);
 	init_rwsem(&cdt->cdt_agent_lock);
 	init_rwsem(&cdt->cdt_request_lock);
@@ -876,7 +837,7 @@ int  mdt_hsm_cdt_fini(struct mdt_device *mdt)
  * \retval 0 success
  * \retval -ve failure
  */
-int mdt_hsm_cdt_start(struct mdt_device *mdt)
+static int mdt_hsm_cdt_start(struct mdt_device *mdt)
 {
 	struct coordinator	*cdt = &mdt->mdt_coordinator;
 	int			 rc;
@@ -934,8 +895,8 @@ int mdt_hsm_cdt_start(struct mdt_device *mdt)
 		rc = 0;
 	}
 
-	wait_event(cdt->cdt_thread.t_ctl_waitq,
-		       (cdt->cdt_thread.t_flags & SVC_RUNNING));
+	wait_event(cdt->cdt_waitq,
+		       (cdt->cdt_flags & SVC_RUNNING));
 
 	cdt->cdt_state = CDT_RUNNING;
 	mdt->mdt_opts.mo_coordinator = 1;
@@ -963,10 +924,10 @@ int mdt_hsm_cdt_stop(struct mdt_device *mdt)
 
 	if (cdt->cdt_state != CDT_STOPPING) {
 		/* stop coordinator thread before cleaning */
-		cdt->cdt_thread.t_flags = SVC_STOPPING;
-		wake_up(&cdt->cdt_thread.t_ctl_waitq);
-		wait_event(cdt->cdt_thread.t_ctl_waitq,
-			   cdt->cdt_thread.t_flags & SVC_STOPPED);
+		cdt->cdt_flags = SVC_STOPPING;
+		wake_up(&cdt->cdt_waitq);
+		wait_event(cdt->cdt_waitq,
+			   cdt->cdt_flags & SVC_STOPPED);
 	}
 	cdt->cdt_state = CDT_STOPPED;
 
@@ -1042,7 +1003,7 @@ int mdt_hsm_add_hal(struct mdt_thread_info *mti,
 			if (rc) {
 				CERROR("%s: mdt_agent_record_update() failed, "
 				       "rc=%d, cannot update status to %s "
-				       "for cookie "LPX64"\n",
+				       "for cookie %#llx\n",
 				       mdt_obd_name(mdt), rc,
 				       agent_req_status2name(ARS_CANCELED),
 				       hai->hai_cookie);
@@ -1100,39 +1061,37 @@ out:
 /**
  * swap layouts between 2 fids
  * \param mti [IN] context
- * \param fid1 [IN]
- * \param fid2 [IN]
+ * \param obj [IN]
+ * \param dfid [IN]
  * \param mh_common [IN] MD HSM
  */
 static int hsm_swap_layouts(struct mdt_thread_info *mti,
-			    const lustre_fid *fid, const lustre_fid *dfid,
+			    struct mdt_object *obj, const struct lu_fid *dfid,
 			    struct md_hsm *mh_common)
 {
-	struct mdt_device	*mdt = mti->mti_mdt;
-	struct mdt_object	*child1, *child2;
-	struct mdt_lock_handle	*lh2;
+	struct mdt_object	*dobj;
+	struct mdt_lock_handle	*dlh;
 	int			 rc;
 	ENTRY;
 
-	child1 = mdt_object_find(mti->mti_env, mdt, fid);
-	if (IS_ERR(child1))
-		GOTO(out, rc = PTR_ERR(child1));
+	if (!mdt_object_exists(obj))
+		GOTO(out, rc = -ENOENT);
 
-	/* we already have layout lock on FID so take only
+	/* we already have layout lock on obj so take only
 	 * on dfid */
-	lh2 = &mti->mti_lh[MDT_LH_OLD];
-	mdt_lock_reg_init(lh2, LCK_EX);
-	child2 = mdt_object_find_lock(mti, dfid, lh2, MDS_INODELOCK_LAYOUT);
-	if (IS_ERR(child2))
-		GOTO(out_child1, rc = PTR_ERR(child2));
+	dlh = &mti->mti_lh[MDT_LH_OLD];
+	mdt_lock_reg_init(dlh, LCK_EX);
+	dobj = mdt_object_find_lock(mti, dfid, dlh, MDS_INODELOCK_LAYOUT);
+	if (IS_ERR(dobj))
+		GOTO(out, rc = PTR_ERR(dobj));
 
 	/* if copy tool closes the volatile before sending the final
 	 * progress through llapi_hsm_copy_end(), all the objects
 	 * are removed and mdd_swap_layout LBUG */
-	if (!mdt_object_exists(child2)) {
+	if (!mdt_object_exists(dobj)) {
 		CERROR("%s: Copytool has closed volatile file "DFID"\n",
 		       mdt_obd_name(mti->mti_mdt), PFID(dfid));
-		GOTO(out_child2, rc = -ENOENT);
+		GOTO(out_dobj, rc = -ENOENT);
 	}
 	/* Since we only handle restores here, unconditionally use
 	 * SWAP_LAYOUTS_MDS_HSM flag to ensure original layout will
@@ -1143,17 +1102,15 @@ static int hsm_swap_layouts(struct mdt_thread_info *mti,
 	 * only need to clear RELEASED and DIRTY.
 	 */
 	mh_common->mh_flags &= ~(HS_RELEASED | HS_DIRTY);
-	rc = mdt_hsm_attr_set(mti, child2, mh_common);
+	rc = mdt_hsm_attr_set(mti, dobj, mh_common);
 	if (rc == 0)
 		rc = mo_swap_layouts(mti->mti_env,
-				     mdt_object_child(child1),
-				     mdt_object_child(child2),
+				     mdt_object_child(obj),
+				     mdt_object_child(dobj),
 				     SWAP_LAYOUTS_MDS_HSM);
 
-out_child2:
-	mdt_object_unlock_put(mti, child2, lh2, 1);
-out_child1:
-	mdt_object_put(mti->mti_env, child1);
+out_dobj:
+	mdt_object_unlock_put(mti, dobj, dlh, 1);
 out:
 	RETURN(rc);
 }
@@ -1183,15 +1140,11 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
 	/* default is to retry */
 	*status = ARS_WAITING;
 
-	/* find object by FID */
+	/* find object by FID
+	 * if error/removed continue anyway to get correct reporting done */
 	obj = mdt_hsm_get_md_hsm(mti, &car->car_hai->hai_fid, &mh);
 	/* we will update MD HSM only if needed */
 	is_mh_changed = false;
-	if (IS_ERR(obj)) {
-		/* object removed */
-		*status = ARS_SUCCEED;
-		goto unlock;
-	}
 
 	/* no need to change mh->mh_arch_id
 	 * mdt_hsm_get_md_hsm() got it from disk and it is still valid
@@ -1219,14 +1172,16 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
 			*status = ARS_SUCCEED;
 			break;
 		default:
+			/* retry only if current policy or requested, and
+			 * object is not on error/removed */
 			*status = (cdt->cdt_policy & CDT_NORETRY_ACTION ||
-				   !(pgs->hpk_flags & HP_FLAG_RETRY) ?
-				   ARS_FAILED : ARS_WAITING);
+				   !(pgs->hpk_flags & HP_FLAG_RETRY) ||
+				   IS_ERR(obj)) ? ARS_FAILED : ARS_WAITING;
 			break;
 		}
 
 		if (pgs->hpk_errval > CLF_HSM_MAXERROR) {
-			CERROR("%s: Request "LPX64" on "DFID
+			CERROR("%s: Request %#llx on "DFID
 			       " failed, error code %d too large\n",
 			       mdt_obd_name(mdt),
 			       pgs->hpk_cookie, PFID(&pgs->hpk_fid),
@@ -1250,14 +1205,14 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
 			break;
 		case HSMA_CANCEL:
 			hsm_set_cl_event(&cl_flags, HE_CANCEL);
-			CERROR("%s: Failed request "LPX64" on "DFID
+			CERROR("%s: Failed request %#llx on "DFID
 			       " cannot be a CANCEL\n",
 			       mdt_obd_name(mdt),
 			       pgs->hpk_cookie,
 			       PFID(&pgs->hpk_fid));
 			break;
 		default:
-			CERROR("%s: Failed request "LPX64" on "DFID
+			CERROR("%s: Failed request %#llx on "DFID
 			       " %d is an unknown action\n",
 			       mdt_obd_name(mdt),
 			       pgs->hpk_cookie, PFID(&pgs->hpk_fid),
@@ -1297,17 +1252,13 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
 			break;
 		case HSMA_CANCEL:
 			hsm_set_cl_event(&cl_flags, HE_CANCEL);
-			CERROR("%s: Successful request "LPX64
-			       " on "DFID
-			       " cannot be a CANCEL\n",
+			CERROR("%s: Successful request %#llx on "DFID" cannot be a CANCEL\n",
 			       mdt_obd_name(mdt),
 			       pgs->hpk_cookie,
 			       PFID(&pgs->hpk_fid));
 			break;
 		default:
-			CERROR("%s: Successful request "LPX64
-			       " on "DFID
-			       " %d is an unknown action\n",
+			CERROR("%s: Successful request %#llx on "DFID" %d is an unknown action\n",
 			       mdt_obd_name(mdt),
 			       pgs->hpk_cookie, PFID(&pgs->hpk_fid),
 			       car->car_hai->hai_action);
@@ -1324,23 +1275,20 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
 				 mh.mh_flags & HS_DIRTY ? CLF_HSM_DIRTY : 0);
 
 	/* unlock is done later, after layout lock management */
-	if (is_mh_changed)
+	if (is_mh_changed && !IS_ERR(obj))
 		rc = mdt_hsm_attr_set(mti, obj, &mh);
 
-unlock:
 	/* we give back layout lock only if restore was successful or
-	 * if restore was canceled or if policy is to not retry
+	 * if no retry will be attempted and if object is still alive,
 	 * in other cases we just unlock the object */
-	if (car->car_hai->hai_action == HSMA_RESTORE &&
-	    (pgs->hpk_errval == 0 || pgs->hpk_errval == ECANCELED ||
-	     cdt->cdt_policy & CDT_NORETRY_ACTION)) {
+	if (car->car_hai->hai_action == HSMA_RESTORE) {
 		struct cdt_restore_handle	*crh;
 
 		/* restore in data FID done, we swap the layouts
 		 * only if restore is successful */
-		if (pgs->hpk_errval == 0) {
-			rc = hsm_swap_layouts(mti, &car->car_hai->hai_fid,
-					      &car->car_hai->hai_dfid, &mh);
+		if (pgs->hpk_errval == 0 && !IS_ERR_OR_NULL(obj)) {
+			rc = hsm_swap_layouts(mti, obj, &car->car_hai->hai_dfid,
+					      &mh);
 			if (rc) {
 				if (cdt->cdt_policy & CDT_NORETRY_ACTION)
 					*status = ARS_FAILED;
@@ -1353,15 +1301,16 @@ unlock:
 
 		/* give back layout lock */
 		mutex_lock(&cdt->cdt_restore_lock);
-		crh = hsm_restore_hdl_find(cdt, &car->car_hai->hai_fid);
+		crh = mdt_hsm_restore_hdl_find(cdt, &car->car_hai->hai_fid);
 		if (crh != NULL)
 			list_del(&crh->crh_list);
 		mutex_unlock(&cdt->cdt_restore_lock);
-		/* just give back layout lock, we keep
-		 * the reference which is given back
-		 * later with the lock for HSM flags */
-		if (!IS_ERR(obj) && crh != NULL)
-			mdt_object_unlock(mti, obj, &crh->crh_lh, 1);
+		/* Just give back layout lock, we keep the reference
+		 * which is given back later with the lock for HSM
+		 * flags.
+		 * XXX obj may be invalid so we do not pass it. */
+		if (crh != NULL)
+			mdt_object_unlock(mti, NULL, &crh->crh_lh, 1);
 
 		if (crh != NULL)
 			OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
@@ -1370,11 +1319,12 @@ unlock:
 	GOTO(out, rc);
 
 out:
-	if (obj != NULL && !IS_ERR(obj)) {
-		mo_changelog(env, CL_HSM, cl_flags,
-			     mdt_object_child(obj));
+	/* always add a ChangeLog record */
+	mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+		     &car->car_hai->hai_fid);
+
+	if (!IS_ERR(obj))
 		mdt_object_put(mti->mti_env, obj);
-	}
 
 	RETURN(rc);
 }
@@ -1404,7 +1354,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
 	/* first do sanity checks */
 	car = mdt_cdt_update_request(cdt, pgs);
 	if (IS_ERR(car)) {
-		CERROR("%s: Cannot find running request for cookie "LPX64
+		CERROR("%s: Cannot find running request for cookie %#llx"
 		       " on fid="DFID"\n",
 		       mdt_obd_name(mdt),
 		       pgs->hpk_cookie, PFID(&pgs->hpk_fid));
@@ -1412,7 +1362,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
 		RETURN(PTR_ERR(car));
 	}
 
-	CDEBUG(D_HSM, "Progress received for fid="DFID" cookie="LPX64
+	CDEBUG(D_HSM, "Progress received for fid="DFID" cookie=%#llx"
 		      " action=%s flags=%d err=%d fid="DFID" dfid="DFID"\n",
 		      PFID(&pgs->hpk_fid), pgs->hpk_cookie,
 		      hsm_copytool_action2name(car->car_hai->hai_action),
@@ -1431,7 +1381,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
 	     car->car_hai->hai_action == HSMA_ARCHIVE) &&
 	    (!lu_fid_eq(&pgs->hpk_fid, &car->car_hai->hai_dfid) &&
 	     !lu_fid_eq(&pgs->hpk_fid, &car->car_hai->hai_fid))) {
-		CERROR("%s: Progress on "DFID" for cookie "LPX64
+		CERROR("%s: Progress on "DFID" for cookie %#llx"
 		       " does not match request FID "DFID" nor data FID "
 		       DFID"\n",
 		       mdt_obd_name(mdt),
@@ -1442,7 +1392,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
 	}
 
 	if (pgs->hpk_errval != 0 && !(pgs->hpk_flags & HP_FLAG_COMPLETED)) {
-		CERROR("%s: Progress on "DFID" for cookie "LPX64" action=%s"
+		CERROR("%s: Progress on "DFID" for cookie %#llx action=%s"
 		       " is not coherent (err=%d and not completed"
 		       " (flags=%d))\n",
 		       mdt_obd_name(mdt),
@@ -1465,7 +1415,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
 		/* remove request from memory list */
 		mdt_cdt_remove_request(cdt, pgs->hpk_cookie);
 
-		CDEBUG(D_HSM, "Updating record: fid="DFID" cookie="LPX64
+		CDEBUG(D_HSM, "Updating record: fid="DFID" cookie=%#llx"
 			      " action=%s status=%s\n", PFID(&pgs->hpk_fid),
 		       pgs->hpk_cookie,
 		       hsm_copytool_action2name(car->car_hai->hai_action),
@@ -1480,7 +1430,7 @@ int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
 			if (rc1)
 				CERROR("%s: mdt_agent_record_update() failed,"
 				       " rc=%d, cannot update status to %s"
-				       " for cookie "LPX64"\n",
+				       " for cookie %#llx\n",
 				       mdt_obd_name(mdt), rc1,
 				       agent_req_status2name(status),
 				       pgs->hpk_cookie);
@@ -1682,8 +1632,8 @@ bool mdt_hsm_is_action_compat(const struct hsm_action_item *hai,
 		is_compat = true;
 		break;
 	}
-	CDEBUG(D_HSM, "fid="DFID" action=%s flags="LPX64
-		      " extent="LPX64"-"LPX64" hsm_flags=%.8X %s\n",
+	CDEBUG(D_HSM, "fid="DFID" action=%s flags=%#llx"
+		      " extent=%#llx-%#llx hsm_flags=%.8X %s\n",
 		      PFID(&hai->hai_fid),
 		      hsm_copytool_action2name(hai->hai_action), rq_flags,
 		      hai->hai_extent.offset, hai->hai_extent.length,
@@ -1738,7 +1688,7 @@ static void hsm_policy_bit2str(struct seq_file *m, const __u64 mask,
 	ENTRY;
 
 	if (hexa)
-		seq_printf(m, "("LPX64") ", mask);
+		seq_printf(m, "(%#llx) ", mask);
 
 	for (i = 0; i < CDT_POLICY_SHIFT_COUNT; i++) {
 		bit = (1ULL << i);
@@ -1831,7 +1781,7 @@ mdt_hsm_policy_seq_write(struct file *file, const char __user *buffer,
 
 	} while (start != NULL);
 
-	CDEBUG(D_HSM, "%s: new policy: rm="LPX64" add="LPX64" set="LPX64"\n",
+	CDEBUG(D_HSM, "%s: new policy: rm=%#llx add=%#llx set=%#llx\n",
 	       mdt_obd_name(mdt), remove_mask, add_mask, set_mask);
 
 	/* if no sign in all string, it is a clear and set
@@ -1862,7 +1812,7 @@ static int mdt_hsm_##VAR##_seq_show(struct seq_file *m, void *data)	\
 	struct coordinator	*cdt = &mdt->mdt_coordinator;		\
 	ENTRY;								\
 									\
-	seq_printf(m, LPU64"\n", (__u64)cdt->VAR);			\
+	seq_printf(m, "%llu\n", (__u64)cdt->VAR);			\
 	RETURN(0);							\
 }									\
 static ssize_t								\
@@ -1873,14 +1823,14 @@ mdt_hsm_##VAR##_seq_write(struct file *file, const char __user *buffer,	\
 	struct seq_file		*m = file->private_data;		\
 	struct mdt_device	*mdt = m->private;			\
 	struct coordinator	*cdt = &mdt->mdt_coordinator;		\
-	int			 val;					\
+	__s64			 val;					\
 	int			 rc;					\
 	ENTRY;								\
 									\
-	rc = lprocfs_write_helper(buffer, count, &val);			\
+	rc = lprocfs_str_to_s64(buffer, count, &val);			\
 	if (rc)								\
 		RETURN(rc);						\
-	if (val > 0) {							\
+	if (val > 0 && val < INT_MAX) {					\
 		cdt->VAR = val;						\
 		RETURN(count);						\
 	}								\