From c4c8cd23f9a3bb4efc9004134aa9c1ac4acceaf9 Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Fri, 25 Aug 2017 23:53:58 -0700 Subject: [PATCH] LU-9771 flr: randomize mirror pick Pick mirror randomly for read on client side and write on the MDT. Test-Parameters: testlist=sanity-flr Signed-off-by: Jinshan Xiong Change-Id: I8dff91e6b7354a9841a2f1595df611717fe51afb Reviewed-on: https://review.whamcloud.com/29097 Reviewed-by: Bobi Jam Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo --- lustre/include/obd_support.h | 1 + lustre/llite/lcommon_cl.c | 4 + lustre/lod/lod_object.c | 12 ++- lustre/lov/lov_object.c | 19 +++++ lustre/tests/sanity-flr.sh | 185 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 218 insertions(+), 3 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index e56ec02..57ac467 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -613,6 +613,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_FLR_GLIMPSE_IMMUTABLE 0x1A00 #define OBD_FAIL_FLR_LV_DELAY 0x1A01 #define OBD_FAIL_FLR_LV_INC 0x1A02 +#define OBD_FAIL_FLR_RANDOM_PICK_MIRROR 0x1A03 /* DT */ #define OBD_FAIL_DT_DECLARE_ATTR_GET 0x2000 diff --git a/lustre/llite/lcommon_cl.c b/lustre/llite/lcommon_cl.c index 698b242..0bad830 100644 --- a/lustre/llite/lcommon_cl.c +++ b/lustre/llite/lcommon_cl.c @@ -174,6 +174,10 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md) result = PTR_ERR(clob); } else { result = cl_conf_set(env, lli->lli_clob, &conf); + if (result == -EBUSY) { + /* ignore the error since I/O will handle it later */ + result = 0; + } } cl_env_put(env, &refcheck); diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 2afb9c0..1372f31 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -5366,6 +5366,7 @@ static int lod_declare_update_rdonly(const struct lu_env *env, struct lod_layout_component *lod_comp; struct layout_intent *layout = mlc->mlc_intent; struct lu_extent extent = layout->li_extent; + unsigned int seq = 0; int picked; int i; int rc; @@ -5378,6 +5379,11 @@ static int lod_declare_update_rdonly(const struct lu_env *env, CDEBUG(D_LAYOUT, DFID": trying to write :"DEXT"\n", PFID(lod_object_fid(lo)), PEXT(&extent)); + if (OBD_FAIL_CHECK(OBD_FAIL_FLR_RANDOM_PICK_MIRROR)) { + get_random_bytes(&seq, sizeof(seq)); + seq %= lo->ldo_mirror_count; + } + /** * Pick a mirror as the primary. * Now it only picks the first mirror, this algo can be @@ -5385,8 +5391,10 @@ static int lod_declare_update_rdonly(const struct lu_env *env, * the availability of OSTs. */ for (picked = -1, i = 0; i < lo->ldo_mirror_count; i++) { - if (!lo->ldo_mirrors[i].lme_stale) { - picked = i; + int index = (i + seq) % lo->ldo_mirror_count; + + if (!lo->ldo_mirrors[index].lme_stale) { + picked = index; break; } } diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 15d5c3c..94b98af 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -776,6 +776,25 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, GOTO(out, result = -EINVAL); } + if (OBD_FAIL_CHECK(OBD_FAIL_FLR_RANDOM_PICK_MIRROR)) { + unsigned int seq; + + get_random_bytes(&seq, sizeof(seq)); + seq %= mirror_count; + + i = 0; + lov_foreach_mirror_entry(lov, lre) { + i++; + if (lre->lre_stale) + continue; + + if (!seq--) { + comp->lo_preferred_mirror = i - 1; + break; + } + } + } + LASSERT(comp->lo_preferred_mirror >= 0); EXIT; diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index 8a2af4e..3e50c77 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -12,7 +12,7 @@ export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin ONLY=${ONLY:-"$*"} # Bug number for skipped test: -ALWAYS_EXCEPT="$SANITY_FLR_EXCEPT" +ALWAYS_EXCEPT="$SANITY_FLR_EXCEPT 201" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! [ "$ALWAYS_EXCEPT$EXCEPT" ] && @@ -725,6 +725,189 @@ test_38() { } run_test 38 "resync" +ctrl_file=$(mktemp /tmp/CTRL.XXXXXX) +lock_file=$(mktemp /var/lock/FLR.XXXXXX) + +write_file_200() { + local tf=$1 + + local fsize=$(stat --printf=%s $tf) + + while [ -f $ctrl_file ]; do + local off=$((RANDOM << 8)) + local len=$((RANDOM << 5 + 131072)) + + [ $((off + len)) -gt $fsize ] && { + fsize=$((off + len)) + echo "Extending file size to $fsize .." + } + + flock -s $lock_file -c \ + "$MULTIOP $tf oO_WRONLY:z${off}w${len}c" || + { rm -f $ctrl_file; + error "failed writing to $off:$len"; } + sleep 0.$((RANDOM % 2 + 1)) + done +} + +read_file_200() { + local tf=$1 + + while [ -f $ctrl_file ]; do + flock -s $lock_file -c "cat $tf &> /dev/null" || + { rm -f $ctrl_file; error "read failed"; } + sleep 0.$((RANDOM % 2 + 1)) + done +} + +resync_file_200() { + local tf=$1 + + options=("" "-e resync_start" "-e delay_before_copy -d 1" "" "") + + exec 200<>$lock_file + while [ -f $ctrl_file ]; do + local index=$((RANDOM % ${#options[@]})) + local lock_taken=false + + [ $((RANDOM % 4)) -eq 0 ] && { + index=0 + lock_taken=true + echo -n "lock to " + } + + echo -n "resync file $tf with '${options[$index]}' .." + + $lock_taken && flock -x 200 + mirror_io resync ${options[$index]} $tf &> /dev/null && + echo "done" || echo "failed" + + $lock_taken && flock -u 200 + + sleep 0.$((RANDOM % 8 + 1)) + done +} + +test_200() { + local tf=$DIR/$tfile + local tf2=$DIR2/$tfile + local tf3=$DIR3/$tfile + + $LFS setstripe -E 1M -E 2M -c 2 -E 4M -E 16M -E eof $tf + $LFS setstripe -E 2M -E 6M -c 2 -E 8M -E 32M -E eof $tf-2 + $LFS setstripe -E 4M -c 2 -E 8M -E 64M -E eof $tf-3 + + $LFS setstripe --component-add --mirror=$tf-2 $tf + $LFS setstripe --component-add --mirror=$tf-3 $tf + + mkdir -p $MOUNT2 && mount_client $MOUNT2 + + mkdir -p $MOUNT3 && mount_client $MOUNT3 + + verify_flr_state $tf3 "read_only" + + #define OBD_FAIL_FLR_RANDOM_PICK_MIRROR 0x1A03 + $LCTL set_param fail_loc=0x1A03 + + local mds_idx=mds$(($($LFS getstripe -M $tf) + 1)) + do_facet $mds_idx $LCTL set_param fail_loc=0x1A03 + + declare -a pids + + write_file_200 $tf & + pids+=($!) + + read_file_200 $tf & + pids+=($!) + + write_file_200 $tf2 & + pids+=($!) + + read_file_200 $tf2 & + pids+=($!) + + resync_file_200 $tf3 & + pids+=($!) + + local sleep_time=60 + [ "$SLOW" = "yes" ] && sleep_time=360 + while [ $sleep_time -gt 0 -a -f $ctrl_file ]; do + sleep 1 + ((--sleep_time)) + done + + rm -f $ctrl_file + + echo "Waiting ${pids[@]}" + wait ${pids[@]} + + umount_client $MOUNT2 + umount_client $MOUNT3 + + rm -f $lock_file + + # resync and verify mirrors + mirror_io resync $tf + get_mirror_ids $tf + + local csum=$(mirror_io dump -i ${mirror_array[0]} $tf | md5sum) + for id in ${mirror_array[@]:1}; do + [ "$(mirror_io dump -i $id $tf | md5sum)" = "$csum" ] || + error "checksum error for mirror $id" + done + + true +} +run_test 200 "stress test" + +cleanup_test_201() { + trap 0 + do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $CL_USER + + umount_client $MOUNT2 +} + +test_201() { + local delay=${RESYNC_DELAY:-5} + + MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid | + awk '{ gsub(/_UUID/,""); print $1 }' | head -n1) + + trap cleanup_test_201 EXIT + + CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + changelog_register -n) + + mkdir -p $MOUNT2 && mount_client $MOUNT2 + + local index=0 + while :; do + local log=$($LFS changelog $MDT0 $index | grep FLRW) + [ -z "$log" ] && { sleep 1; continue; } + + index=$(echo $log | awk '{print $1}') + local ts=$(date -d "$(echo $log | awk '{print $3}')" "+%s" -u) + local fid=$(echo $log | awk '{print $6}' | sed -e 's/t=//') + local file=$($LFS fid2path $MOUNT2 $fid 2> /dev/null) + + ((++index)) + [ -z "$file" ] && continue + + local now=$(date +%s) + + echo "file: $file $fid was modified at $ts, now: $now, " \ + "will be resynced at $((ts+delay))" + + [ $now -lt $((ts + delay)) ] && sleep $((ts + delay - now)) + + mirror_io resync $file + echo "$file resync done" + done + + cleanup_test_201 +} +run_test 201 "FLR data mover" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1