From a6b3faffeaea7abbef389ad5296880a522a13460 Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Tue, 12 Mar 2024 21:33:19 -0400 Subject: [PATCH] LU-17634 hsm: serialize HSM restore for a file on a client For a file in HSM released, exists, archived status, start tens of processes to read it in parallel on a client, and one read process may report "No data available" error. After analyzed the error, we found the following bug in HSM code: Reading a released file already granted LAYOUT lock on a client: P1: ->vvp_io_init() ->lov_io_init_released(): io->ci_restore_needed = 1; ->vvp_io_fini() ->ll_layout_restore() ->mdc_ioc_hsm_request() ->mdc_hsm_request_lock_to_cancel() ->ldlm_cancel_resource_local() remove LAYOUT lock from resource into cancel list NOT yet cancel the LAYOUT lock on the client via ELC... P2: ->vvp_io_init() ->lov_io_init_released(): io->ci_restore_needed = 1; ->vvp_io_fini() ->ll_layout_restore() ->mdc_ioc_hsm_request() ->mdc_hsm_request_lock_to_cancel() SKIP: No any conflict LAYOUT lock on resource lock list as P1 has already move it (if any) into its cancel list ->mdt_hsm_request() ->cdt_restore_handle_add() ->cdt_restore_handle_find() ->list_add_tail(): add @crh to restore handle list NOT yet obtain EX LAYOUT lock to cancel cached LAYOUT locks on client side... P3: ->ll_file_read_iter() ->ll_do_fast_read(): => return -ENODATA; ->vvp_io_init() ->lov_io_init_released(): io->ci_restore_needed = 1; ->vvp_io_fini() ->ll_layout_restore() ->mdc_ioc_hsm_request() ->mdc_hsm_request_lock_to_cancel() SKIP as P1 has already move the conflict LAYOUT lock (if any) into its cancel list ->mdt_hsm_request() ->cdt_restore_handle_add() ->cdt_restore_handle_find() SKIP as found a restore handle with same FID in the the restore handle list added by P2. ->ll_layout_refresh() ->io->ci_need_restart = vio->vui_layout_gen != gen; ->LAYOUT gen does not have any change as the LAYOUT lock on the client is not revoken yet, will not restart I/O... ->return -ENODATA; =>from fast read We can fix this bug by serializing the HSM restore operation on a client by using the @lli->lli_layout_mutex simply. Add sanity-hsm/test_12{t, u} to verfiy it. Signed-off-by: Qian Yingjin Change-Id: Idc2a8c1818386c64798d7e28500c20c80ff369f1 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54366 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Etienne AUJAMES Reviewed-by: Andreas Dilger Reviewed-by: Li Xi Reviewed-by: Oleg Drokin --- lustre/llite/file.c | 11 +++++-- lustre/tests/sanity-hsm.sh | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index aa7b73e..6260687 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -6579,8 +6579,10 @@ int ll_layout_write_intent(struct inode *inode, enum layout_intent_opc opc, */ int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length) { - struct hsm_user_request *hur; - int len, rc; + struct ll_inode_info *lli = ll_i2info(inode); + struct hsm_user_request *hur; + int len, rc; + ENTRY; len = sizeof(struct hsm_user_request) + @@ -6597,8 +6599,13 @@ int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length) hur->hur_user_item[0].hui_extent.offset = offset; hur->hur_user_item[0].hui_extent.length = length; hur->hur_request.hr_itemcount = 1; + rc = mutex_lock_interruptible(&lli->lli_layout_mutex); + if (rc) + GOTO(out_free, rc); rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp, len, hur, NULL); + mutex_unlock(&lli->lli_layout_mutex); +out_free: OBD_FREE(hur, len); RETURN(rc); } diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 7f5d893..0324adf 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -1448,6 +1448,78 @@ test_12s() { } run_test 12s "race between restore requests" +test_12t() { + copytool setup + + mkdir_on_mdt0 $DIR/$tdir + + local file=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/hosts $file) + local n=32 + + $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $file || + error "failed to HSM archive $file" + wait_request_state $fid ARCHIVE SUCCEED + rm -f $file || error "failed to rm $file" + + copytool import $fid $file + check_hsm_flags $file "0x0000000d" + + local -a pids + + for ((i=0; i < $n; i++)); do + cat $file > /dev/null & + pids[$i]=$! + done + + for ((i=0; i < $n; i++));do + wait ${pids[$i]} || error "$?: failed to read pid=${pids[$i]}" + done +} +run_test 12t "Multiple parallel reads for a HSM imported file" + +test_12u() { + local dir=$DIR/$tdir + local n=10 + local t=32 + + copytool setup + mkdir_on_mdt0 $dir || error "failed to mkdir $dir" + + local -a pids + local -a fids + + for ((i=0; i<$n; i++)); do + fids[$i]=$(copy_file /etc/hosts $dir/$tfile.$i) + $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $dir/$tfile.$i || + error "failed to HSM archive $dir/$tfile.$i" + done + + for ((i=0; i<$n; i++)); do + wait_request_state ${fids[$i]} ARCHIVE SUCCEED + rm $dir/$tfile.$i || error "failed to rm $dir/$tfile.$i" + done + + for ((i=0; i<$n; i++)); do + copytool import ${fids[$i]} $dir/$tfile.$i + check_hsm_flags $dir/$tfile.$i "0x0000000d" + done + + for ((i=0; i<$n; i++)); do + for ((j=0; j<$t; j++)); do + cat $dir/$tfile.$i > /dev/null & + pids[$((i * t + j))]=$! + done + done + + local pid + + for pid in "${pids[@]}"; do + wait $pid || error "$pid: failed to cat file: $?" + done +} +run_test 12u "Multiple reads on multiple HSM imported files in parallel" + test_13() { local -i i j k=0 for i in {1..10}; do -- 1.8.3.1