From: Qian Yingjin <qian@ddn.com>
Date: Fri, 25 Jun 2021 08:22:35 +0000 (+0800)
Subject: LU-14793 hsm: record index for further HSM action scanning
X-Git-Tag: 2.14.56~94
X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F77%2F44077%2F11;p=fs%2Flustre-release.git

LU-14793 hsm: record index for further HSM action scanning

there is contention between HSM archive request and "hsm_cdtr"
kernel thread:
->mdt_hsm_request()
  ->mdt_hsm_add_actions()
    ->mdt_hsm_register_hal()
      ->mdt_agent_record_add()
        ->down_write(&cdt->cdt_llog_lock)
        ->llog_cat_add()
        ->up_write(&cdt->cdt_llog_lock)

->mdt_coordinator()
  ->cdt_llog_process()
    ->down_write(&cdt->cdt_llog_lock);
    ->llog_cat_process()
    ->up_write(&cdt->cdt_llog_lock);

HSM archive request and HSM cat llog scanning in the kernel daemon
"hsm_cdtr" are both contenting for write llog lock to add or
update the "hsm_actions" llog.

In the tesing, it uses max_requests = 1000000.
In the current implementation, it means kernel daemon thread
"hsm_cdtr" needs to scan nearly whole "hsm_actions" llog from the
beginning position with write llog lock held.
This will slow down the HSM archive requests which is contented
for write llog lock.

As llog is append-only, we record the latest handled position in
the llog, thus next scanning can start from the previous recorded
postion (llog index), does not need to start from the beginning.

Another way to mitigate this probelm is:
when the llog scanner found that there are other process
contended for the llog lock, it will stop the llog scanning and
release write llog lock properly for incoming HSM archive requests.

After applied this patch, with 200000 HSM actions in llog, the time
to queue 10000 HSM archive requests reduces from 10 seconds to 4
seconds.

Signed-off-by: Qian Yingjin <qian@ddn.com>
Change-Id: I2e92daf34844605ee648787daf859143335c68bf
Reviewed-on: https://review.whamcloud.com/44077
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---

diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c
index 2dbcebc..a1fc6b0 100644
--- a/lustre/mdt/mdt_coordinator.c
+++ b/lustre/mdt/mdt_coordinator.c
@@ -141,6 +141,8 @@ struct hsm_scan_data {
 	 */
 	bool			 hsd_housekeeping;
 	bool			 hsd_one_restore;
+	u32			 hsd_start_cat_idx;
+	u32			 hsd_start_rec_idx;
 	int			 hsd_action_count;
 	int			 hsd_request_len; /* array alloc len */
 	int			 hsd_request_count; /* array used count */
@@ -158,6 +160,7 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env,
 	struct hsm_action_item *hai;
 	size_t hai_size;
 	u32 archive_id;
+	bool wrapped;
 	int i;
 
 	/* Are agents full? */
@@ -300,6 +303,16 @@ static int mdt_cdt_waiting_cb(const struct lu_env *env,
 					  larr->arr_hdr.lrh_index);
 	}
 
+	wrapped = llh->lgh_hdr->llh_cat_idx >= llh->lgh_last_idx &&
+		  llh->lgh_hdr->llh_count > 1;
+	if ((!wrapped && llh->lgh_hdr->llh_cat_idx > hsd->hsd_start_cat_idx) ||
+	    (wrapped && llh->lgh_hdr->llh_cat_idx < hsd->hsd_start_cat_idx) ||
+	    (llh->lgh_hdr->llh_cat_idx == hsd->hsd_start_cat_idx &&
+	     larr->arr_hdr.lrh_index > hsd->hsd_start_rec_idx)) {
+		hsd->hsd_start_cat_idx = llh->lgh_hdr->llh_cat_idx;
+		hsd->hsd_start_rec_idx = larr->arr_hdr.lrh_index;
+	}
+
 	RETURN(0);
 }
 
@@ -572,6 +585,8 @@ static int mdt_coordinator(void *data)
 		int update_idx = 0;
 		int updates_sz;
 		int updates_cnt;
+		u32 start_cat_idx;
+		u32 start_rec_idx;
 		struct hsm_record_update *updates;
 
 		/* Limit execution of the expensive requests traversal
@@ -605,8 +620,12 @@ static int mdt_coordinator(void *data)
 		    ktime_get_real_seconds()) {
 			last_housekeeping = ktime_get_real_seconds();
 			hsd.hsd_housekeeping = true;
+			start_cat_idx = 0;
+			start_rec_idx = 0;
 		} else if (cdt->cdt_event) {
 			hsd.hsd_housekeeping = false;
+			start_cat_idx = hsd.hsd_start_cat_idx;
+			start_rec_idx = hsd.hsd_start_rec_idx;
 		} else {
 			continue;
 		}
@@ -644,7 +663,8 @@ static int mdt_coordinator(void *data)
 		hsd.hsd_one_restore = false;
 
 		rc = cdt_llog_process(mti->mti_env, mdt, mdt_coordinator_cb,
-				      &hsd, 0, 0, WRITE);
+				      &hsd, start_cat_idx, start_rec_idx,
+				      WRITE);
 		if (rc < 0)
 			goto clean_cb_alloc;
 
@@ -654,6 +674,9 @@ static int mdt_coordinator(void *data)
 		if (list_empty(&cdt->cdt_agents)) {
 			CDEBUG(D_HSM, "no agent available, "
 				      "coordinator sleeps\n");
+			/* reset HSM scanning index range. */
+			hsd.hsd_start_cat_idx = start_cat_idx;
+			hsd.hsd_start_rec_idx = start_rec_idx;
 			goto clean_cb_alloc;
 		}
 
@@ -706,6 +729,15 @@ static int mdt_coordinator(void *data)
 				hai = hai_next(hai);
 				update_idx++;
 			}
+
+			/* TODO: narrow down the HSM action range that already
+			 * scanned accroding to the cookies when a failure
+			 * occurs.
+			 */
+			if (rc) {
+				hsd.hsd_start_cat_idx = start_cat_idx;
+				hsd.hsd_start_rec_idx = start_rec_idx;
+			}
 		}
 
 		if (update_idx) {
diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh
index 0394610..d739235 100755
--- a/lustre/tests/sanity-hsm.sh
+++ b/lustre/tests/sanity-hsm.sh
@@ -2821,6 +2821,91 @@ test_40() {
 }
 run_test 40 "Parallel archive requests"
 
+hsm_archive_batch() {
+	local files_num=$1
+	local batch_max=$2
+	local filebase=$3
+	local batch_num=0
+	local fileset=""
+	local i=0
+
+	while [ $i -lt $files_num ]; do
+		if [ $batch_num -eq $batch_max ]; then
+			$LFS hsm_archive $fileset || error "HSM archive failed"
+			# Reset the batch container.
+			fileset=""
+			batch_num=0
+		fi
+
+		fileset+="${filebase}$i "
+		batch_num=$(( batch_num + 1 ))
+		i=$(( i + 1 ))
+	done
+
+	if [ $batch_num -ne 0 ]; then
+		$LFS hsm_archive $fileset || error "HSM archive failed"
+		fileset=""
+		batch_num=0
+	fi
+}
+
+test_50() {
+	local dir=$DIR/$tdir
+	local batch_max=50
+
+	set_hsm_param max_requests 1000000
+	mkdir $dir || error "mkdir $dir failed"
+	df -i $MOUNT
+
+	local start
+	local elapsed
+	local files_num
+	local filebase
+
+	files_num=10000
+	filebase="$dir/$tfile.start."
+	createmany -m $filebase $files_num ||
+		error "createmany -m $filebase failed: $?"
+
+	start=$SECONDS
+	hsm_archive_batch $files_num $batch_max "$filebase"
+	elapsed=$((SECONDS - start))
+	do_facet $SINGLEMDS "$LCTL get_param -n \
+		 $HSM_PARAM.actions | grep WAITING | wc -l"
+	unlinkmany $filebase $files_num || error "unlinkmany $filabase failed"
+	echo "Start Phase files_num: $files_num time: $elapsed"
+
+	files_num=20000
+	filebase="$dir/$tfile.in."
+	createmany -m $filebase $files_num ||
+		error "createmany -m $filebase failed: $?"
+	start=$SECONDS
+	hsm_archive_batch  $files_num $batch_max "$filebase"
+	elapsed=$((SECONDS - start))
+	unlinkmany $filebase $files_num || error "unlinkmany $filabase failed"
+	echo "Middle Phase files_num: $files_num time: $elapsed"
+
+	files_num=10000
+	filebase="$dir/$tfile.end."
+	createmany -m $filebase $files_num ||
+		error "createmany -m $filebase failed: $?"
+
+	start=$SECONDS
+	hsm_archive_batch $files_num $batch_max "$filebase"
+	elapsed=$((SECONDS - start))
+	do_facet $SINGLEMDS "$LCTL get_param -n \
+		 $HSM_PARAM.actions | grep WAITING | wc -l"
+
+	unlinkmany $filebase $files_num || error "unlinkmany $filebase failed"
+	echo "End Phase files_num: $files_num time: $elapsed"
+
+	do_facet $SINGLEMDS "$LCTL get_param -n \
+		 $HSM_PARAM.actions | grep WAITING | wc -l"
+
+	cdt_purge
+}
+run_test 50 "Archive with large number of pending HSM actions"
+
 test_52() {
 	# test needs a running copytool
 	copytool setup