From: Fan Yong Date: Sat, 25 Apr 2015 14:13:38 +0000 (+0800) Subject: LU-6361 lfsck: measure LFSCK 4 performance X-Git-Tag: 2.7.55~39 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=db2110a7af734f42a3a0b425feade8498c309895 LU-6361 lfsck: measure LFSCK 4 performance Measure the performance of namespace LFSCK for routine-check cases and repairing cases under kinds of DNE configurations. It also measures how much the namespace LFSCK will affect the performance of creating small files. Test-Parameters: mdtcount=4 mdscount=2 mdtfilesystemtype=ldiskfs mdsfilesystemtype=ldiskfs ostfilesystemtype=ldiskfs envdefinitions=SLOW=yes testlist=lfsck-performance Signed-off-by: Fan Yong Change-Id: I4f510b946d9d779fabcdec0ecd8e6c65278eb875 Reviewed-on: http://review.whamcloud.com/12109 Tested-by: Jenkins Reviewed-by: Jian Yu Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index f898d67..bcdf461 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -3304,6 +3304,8 @@ static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de, if (de->file_type & LDISKFS_DIRENT_LUFID) { rec = (struct osd_fid_pack *) (de->name + de->name_len + 1); rc = osd_fid_unpack((struct lu_fid *)fid, rec); + if (rc == 0 && unlikely(!fid_is_sane((struct lu_fid *)fid))) + rc = -EINVAL; } return rc; } diff --git a/lustre/tests/lfsck-performance.sh b/lustre/tests/lfsck-performance.sh index f3b9046..3b6cc7d 100644 --- a/lustre/tests/lfsck-performance.sh +++ b/lustre/tests/lfsck-performance.sh @@ -38,7 +38,7 @@ RLCTL="${RCMD} ${LCTL}" MDT_DEV="${FSNAME}-MDT0000" MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/}) START_NAMESPACE="${RLCTL} lfsck_start -M ${MDT_DEV} -t namespace" -STOP_LFSCK="${RLCTL} lfsck_stop -M ${MDT_DEV}" +STOP_LFSCK="${RLCTL} lfsck_stop -M ${MDT_DEV} -A" SHOW_NAMESPACE="${RLCTL} get_param -n mdd.${MDT_DEV}.lfsck_namespace" MNTOPTS_NOSCRUB="-o user_xattr,noscrub" remote_mds && ECHOCMD=${RCMD} || ECHOCMD="eval" @@ -49,6 +49,8 @@ if [ ${NTHREADS} -eq 0 ]; then fi lfsck_attach() { + ${RCMD} "modprobe obdecho" + ${ECHOCMD} "${LCTL} <<-EOF attach echo_client lfsck-MDT0000 lfsck-MDT0000_UUID setup ${MDT_DEV} mdd @@ -355,11 +357,11 @@ run_test 3 "lfsck namespace impact on create performance" show_layout() { local idx=$1 - $RLCTL get_param -n mdd.$(facet_svc mds${idx}).lfsck_layout + do_facet mds${idx} \ + "$LCTL get_param -n mdd.$(facet_svc mds${idx}).lfsck_layout" } -layout_test_one() -{ +layout_test_one() { echo "***** Start layout LFSCK on all devices at: $(date) *****" $RLCTL lfsck_start -M ${MDT_DEV} -t layout -A -r || return 21 @@ -382,8 +384,7 @@ layout_test_one() done } -layout_gen_one() -{ +layout_gen_one() { local idx1=$1 local idx2=$2 local mntpt="/mnt/lustre_lfsck_${idx1}_${idx2}" @@ -416,8 +417,7 @@ layout_gen_one() umount_client $mntpt } -layout_gen_set() -{ +layout_gen_set() { local cnt=$1 echo "##### Start generate test set for subdirs=$cnt at: $(date) #####" @@ -433,8 +433,7 @@ layout_gen_set() echo "##### End generate test set for subdirs=$cnt at: $(date) #####" } -t4_test() -{ +t4_test() { local saved_mdscount=$MDSCOUNT local saved_ostcount=$OSTCOUNT @@ -500,8 +499,7 @@ test_4b() { } run_test 4b "Single MDS lfsck layout performance (repairing case) without load" -t5_test() -{ +t5_test() { local saved_mdscount=$MDSCOUNT echo "stopall" @@ -564,7 +562,7 @@ test_5b() { } run_test 5b "lfsck layout performance (repairing case) without load for DNE" -layout_fast_create() { +lfsck_fast_create() { local total=$1 local lbase=$2 local threads=$3 @@ -590,8 +588,7 @@ layout_fast_create() { done } -lfsck_detach_error() -{ +lfsck_detach_error() { lfsck_detach error "$@" } @@ -655,7 +652,7 @@ test_6() { "(9) Fail to start lfsck_layout with speed ${sl}" echo "&&&&& Start create files set from ${m} at: $(date) &&&&&" - layout_fast_create $nfiles ${m} $NTHREADS || + lfsck_fast_create $nfiles ${m} $NTHREADS || lfsck_detach_error "(10) Fail to create files" echo "&&&&& End create files set from ${m} at: $(date) &&&&&" done @@ -668,7 +665,7 @@ test_6() { "(11) Fail to start lfsck_layout with full speed" echo "&&&&& start to create files set from ${m} at: $(date) &&&&&" - layout_fast_create $nfiles ${m} $NTHREADS || + lfsck_fast_create $nfiles ${m} $NTHREADS || lfsck_detach_error "(12) Fail to create files" echo "&&&&& end to create files set from ${m} at: $(date) &&&&&" @@ -677,7 +674,7 @@ test_6() { echo echo "create without lfsck_layout run back-ground" echo "&&&&& start to create files set from ${m} at: $(date) &&&&&" - layout_fast_create $nfiles ${m} $NTHREADS || + lfsck_fast_create $nfiles ${m} $NTHREADS || lfsck_detach_error "(13) Fail to create files" echo "&&&&& end to create files set from ${m} at: $(date) &&&&&" @@ -690,6 +687,357 @@ test_6() { } run_test 6 "lfsck layout impact on create performance" +show_namespace() { + local idx=$1 + + do_facet mds${idx} \ + "$LCTL get_param -n mdd.$(facet_svc mds${idx}).lfsck_namespace" +} + +namespace_test_one() { + echo "***** Start namespace LFSCK on all devices at: $(date) *****" + $RLCTL lfsck_start -M ${MDT_DEV} -t namespace -A -r || return 21 + + for n in $(seq $MDSCOUNT); do + wait_update_facet mds${n} "$LCTL get_param -n \ + mdd.$(facet_svc mds${n}).lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" $WTIME || { + show_namespace ${n} + return 22 + } + done + echo "***** End namespace LFSCK on all devices at: $(date) *****" + + for n in $(seq $MDSCOUNT); do + show_namespace ${n} + + local SPEED=$(show_namespace ${n} | + awk '/^average_speed_total/ { print $2 }') + echo + echo "lfsck_namespace speed on MDS_${n} is $SPEED objs/sec" + echo + done +} + +namespace_gen_one() { + local idx1=$1 + local idx2=$2 + local idx3=$(((idx1 + 1) % MDSCOUNT)) + local base_mntpt="/mnt/lustre_lfsck_${idx1}" + local show_dir="$LFSCKDIR/${idx1}/${idx2}" + local work_dir="${base_mntpt}_0/$tdir/${idx1}/${idx2}" + + mkdir $show_dir || return 20 + + local count=$((UNIT * 78 / 100)) # 78% regular files + local sub_count=$((count / NTHREADS)) + echo "Creating $count regular files under $show_dir at: $(date)" + for ((m = 0; m < $NTHREADS; m++)); do + local sub_dir="${base_mntpt}_${m}/$tdir/${idx1}/${idx2}" + + createmany -o ${sub_dir}/f_${m}_ $sub_count > /dev/null & + done + + wait || { + error_noexit "(21) Fail to gen regular files under $show_dir" + return 21 + } + + count=$((UNIT * 3 / 100)) # 3% local sub-dirs + echo "Creating $count local sub-dirs under $show_dir at: $(date)" + createmany -d $work_dir/d_l_ $count > /dev/null || { + error_noexit "(22) Fail to gen local sub-dir under $show_dir" + return 22 + } + + # 3% * 5 = 15% regular files under local sub-dirs + echo "Creating 5 regular files under each local sub-dir at: $(date)" + for ((m = 0; m < $count; m++)); do + createmany -o $work_dir/d_l_${m}/f_l_ 5 > /dev/null || { + error_noexit \ + "(23) Fail to gen regular under $work_dir/d_l_${m}" + return 23 + } + done + + count=$((UNIT * 4 / 1000)) # 0.4% multiple hard-links + echo "Creating $count multiple hard-links under $show_dir at: $(date)" + for ((m = 0; m < $count; m++)); do + ln $work_dir/f_0_${m} $work_dir/f_m_${m} || { + error_noexit \ + "(24) Fail to hardlink to $work_dir/f_0_${m}" + return 24 + } + done + + count=$((UNIT * 3 / 1000)) # 0.3% remote sub-dirs + echo "Creating $count remote sub-dirs under $show_dir, and 4 regular" \ + "files under each remote sub-dir at: $(date)" + for ((m = 0; m < $count; m++)); do + $LFS mkdir -i ${idx3} $work_dir/d_r_${m} || { + error_noexit \ + "(25) Fail to remote mkdir $work_dir/d_r_${m}" + return 25 + } + + # 0.3% * 4 = 1.2% regular files under remote sub-dirs + createmany -o $work_dir/d_r_${m}/f_r_ 4 > /dev/null || { + error_noexit \ + "(26) Fail to gen regular under $work_dir/d_r_${m}" + return 26 + } + done + + # 0.3% 2-striped sub-dirs + 0.6% shards of the 2-striped sub-dirs + count=$((UNIT * 3 / 1000)) + echo "Creating $count 2-striped sub-dirs under $show_dir," \ + "and 4 regular files under each striped sub-dir at: $(date)" + for ((m = 0; m < $count; m++)); do + $LFS setdirstripe -i ${idx1} -c 2 -t all_char \ + $work_dir/d_s_${m} || { + error_noexit \ + "(27) Fail to make striped-dir $work_dir/d_s_${m}" + return 27 + } + + # 0.3% * 4 = 1.2% regular files under striped sub-dirs + createmany -o $work_dir/d_s_${m}/f_s_ 4 > /dev/null || { + error_noexit \ + "(28) Fail to gen regular under $work_dir/d_s_${m}" + return 28 + } + done +} + +namespace_gen_mdt() { + local mdt_idx=$1 + local dir_cnt=$2 + local base_mntpt="/mnt/lustre_lfsck_${mdt_idx}" + + $LFS mkdir -i ${mdt_idx} $LFSCKDIR/${mdt_idx} || return 10 + + for ((m = 0; m < $NTHREADS; m++)); do + local mntpt="${base_mntpt}_${m}" + + mkdir -p $mntpt || { + umount ${base_mntpt}_* + error_noexit "(11) Fail to mkdir $mntpt" + return 11 + } + + mount_client $mntpt || { + umount ${base_mntpt}_* + error_noexit "(12) Fail to mount $mntpt" + return 12 + } + done + + for ((l = 0; l < $dir_cnt; l++)); do + namespace_gen_one ${mdt_idx} ${l} + done + + umount ${base_mntpt}_* +} + +namespace_gen_set() { + + local cnt=$1 + + echo "##### Start generate test set for subdirs=$cnt at: $(date) #####" + for ((k = 0; k < $MDSCOUNT; k++)); do + namespace_gen_mdt ${k} ${cnt} & + done + wait + echo "##### End generate test set for subdirs=$cnt at: $(date) #####" +} + +t7_test() { + local local_loc=$1 + local saved_mdscount=$MDSCOUNT + + [ $MDSCOUNT -le 8 ] || + error "Too much MDT, test data set on each MDT may be unbalance" + + echo "stopall" + stopall > /dev/null || error "(1) Fail to stopall" + + LFSCKDIR="$DIR/$tdir" + for ((i = 2; i <= $saved_mdscount; i = $((i + 2)))); do + MDSCOUNT=${i} + + echo "+++++ Start cycle mdscount=$MDSCOUNT at: $(date) +++++" + echo + + for ((j = $MINSUBDIR; j <= $MAXSUBDIR; + j = $((j + MINSUBDIR)))); do + echo "formatall" + formatall > /dev/null || + error "(2) Fail to formatall, subdirs=${j}" + + echo "setupall" + setupall > /dev/null || + error "(3) Fail to setupall, subdirs=${j}" + + mkdir $LFSCKDIR || + error "(4) mkdir $LFSCKDIR, subdirs=${j}" + + $LFS setstripe -c 1 -i -1 $LFSCKDIR || + error "(5) Fail to setstripe on $LFSCKDIR" + + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=$local_loc + + local RC=0 + namespace_gen_set ${j} || RC=$? + [ $RC -eq 0 ] || + error "(6) generate set $RC, subdirs=${j}" + + RC=0 + namespace_test_one || RC=$? + [ $RC -eq 0 ] || + error "(7) LFSCK failed with $RC, subdirs=${j}" + + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 + done + + echo "stopall" + stopall > /dev/null || error "(8) Fail to stopall" + + echo + echo "----- Stop cycle mdscount=$MDSCOUNT at: $(date) -----" + done + + MDSCOUNT=$saved_mdscount +} + +test_7a() { + t7_test 0 +} +run_test 7a "namespace LFSCK performance (routine check) without load for DNE" + +test_7b() { + echo "Inject failure stub to simulate the case of lost linkEA" + #define OBD_FAIL_LFSCK_NO_LINKEA 0x161d + t7_test 0x161d +} +run_test 7b "namespace LFSCK performance (repairing lost linkEA) for DNE" + +test_7c() { + echo "Inject failure stub to simulate the case of bad FID-in-dirent" + #define OBD_FAIL_FID_INDIR 0x1501 + t7_test 0x1501 +} +run_test 7c "namespace LFSCK performance (repairing bad FID-in-dirent) for DNE" + +test_8() { + [ $MDSCOUNT -lt 2 ] && + skip "We need at least 2 MDSes for this test" && return + + [ $INCFACTOR -gt 25 ] && INCFACTOR=25 + + echo "stopall" + stopall > /dev/null || error "(1) Fail to stopall" + + local saved_mdscount=$MDSCOUNT + + LFSCKDIR="$DIR/$tdir" + MDSCOUNT=2 + echo "formatall" + formatall > /dev/null || error "(2) Fail to formatall" + + echo "setupall" + setupall > /dev/null || error "(3) Fail to setupall" + + mkdir $LFSCKDIR || error "(4) Fail to mkdir $LFSCKDIR" + + $LFS setstripe -c 1 -i 0 $LFSCKDIR || + error "(5) Fail to setstripe on $LFSCKDIR" + + local RC=0 + namespace_gen_set $TOTSUBDIR || RC=$? + [ $RC -eq 0 ] || + error "(6) Fail to generate set $RC, subdirs=$TOTSUBDIR" + + echo + echo "***** Start namespace LFSCK at: $(date) *****" + $RLCTL lfsck_start -M ${MDT_DEV} -t namespace -A -r || + error "(7) Fail to start namespace LFSCK" + + for n in $(seq $MDSCOUNT); do + wait_update_facet mds${n} "$LCTL get_param -n \ + mdd.$(facet_svc mds${n}).lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" $WTIME || { + show_namespace ${n} + error "(8) namespace LFSCK cannot finished in time" + } + done + echo "***** End namespace LFSCK at: $(date) *****" + + local SPEED=$(show_namespace 1 | + awk '/^average_speed_phase1/ { print $2 }') + echo "lfsck_namespace full_speed is $SPEED objs/sec" + echo + + local inc_count=$((BASE_COUNT * INCFACTOR / 100)) + local nfiles=$((inc_count / 2)) + local m=0 + + lfsck_attach + + local stime=$(date +%s) + lfsck_fast_create $nfiles ${m} $NTHREADS || + lfsck_detach_error "(9) Fail to create files" + local etime=$(date +%s) + echo "created $nfiles without lfsck_namespace run back-ground used" \ + "$((etime - stime)) seconds" + echo + + for ((m = nfiles, n = $INCFACTOR; n < 100; + m = $((m + inc_count)), n = $((n + INCFACTOR)))); do + local sl=$((SPEED * n / 100)) + + $STOP_LFSCK > /dev/null 2>&1 + echo "start lfsck_namespace with speed ${sl} at: $(date)" + $RLCTL lfsck_start -M ${MDT_DEV} -t namespace -A -r -s ${sl} || + lfsck_detach_error \ + "(10) Fail to start lfsck_namespace with speed ${sl}" + + stime=$(date +%s) + lfsck_fast_create $nfiles ${m} $NTHREADS || + lfsck_detach_error "(11) Fail to create files" + etime=$(date +%s) + echo "created $nfiles with namespace LFSCK run with the" \ + "speed limit of ${n}% of full speed used" \ + "$((etime - stime)) seconds" + echo + done + + $STOP_LFSCK > /dev/null 2>&1 + echo "start lfsck_namespace with full speed at: $(date)" + $RLCTL lfsck_start -M ${MDT_DEV} -t namespace -A -r -s 0 || + lfsck_detach_error \ + "(12) Fail to start lfsck_namespace with full speed" + + stime=$(date +%s) + lfsck_fast_create $nfiles ${m} $NTHREADS || + lfsck_detach_error "(13) Fail to create files" + etime=$(date +%s) + echo "created $nfiles with namespace LFSCK run with full speed used" \ + "$((etime - stime)) seconds" + echo + + $STOP_LFSCK > /dev/null 2>&1 + + lfsck_detach + + echo "stopall" + stopall > /dev/null || error "(14) Fail to stopall" + + MDSCOUNT=$saved_mdscount +} +run_test 8 "lfsck namespace impact on create performance" + # cleanup the system at last lfsck_cleanup complete $SECONDS