From: Mikhail Pershin Date: Thu, 6 Apr 2023 08:15:50 +0000 (+0300) Subject: LU-16232 scripts: clean specific MDTs update llogs X-Git-Tag: 2.15.56~66 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F58%2F50558%2F13;p=fs%2Flustre-release.git LU-16232 scripts: clean specific MDTs update llogs Modify remove_updatelog script as following: - avoid bash-specific process substitution for compatibility - add '-m option to select only specific MDTs to process through their update llogs only - add test to check script actions Signed-off-by: Mikhail Pershin Change-Id: Icd935d5f4964d364f38b4962355ef8a5e679dbb3 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50558 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger Reviewed-by: Colin Faber --- diff --git a/lustre/scripts/remove_updatelog b/lustre/scripts/remove_updatelog index 2fad732..d4ee82a 100755 --- a/lustre/scripts/remove_updatelog +++ b/lustre/scripts/remove_updatelog @@ -31,6 +31,18 @@ # via GZIP env variable: # # GZIP=xz bash remove_updatelog -n -z /tmp/llogs_saved # Archive name will ends with .xz in that case instead of .gz +# +# Script allows to cleanup llogs related to specific MDTs by their indices: +# # bash remove_updatelog -m 0,1 /mnt/mdt0 +# +# That can be useful when llog corruption occurred over particular MDT +# pair, e.g. lustre-MDT0001-osp-MDT0003 reports llog problems, that means +# the problem reported by MDT0003 communicating to MDT0001, so remote MDT0003 +# llog on MDT0001 is corrupted and the script should be ran on MDT0001 to +# remove MDT0003 update llog after MDT0001 local ldiskfs/ZFS mount: +# #bash remove_updatelog -m 3 +# + ECHO=echo PROG=$(basename $0) @@ -39,10 +51,12 @@ GZIP=${GZIP:-gzip} usage() { cat -- <&2 -usage: remove_updatelog [--dry-run|-n] [--help|-h] [--quiet|-q] - --help|-h show this usage message - --dry-run|-n only print the names of files to be removed - --quiet|-q run quietly (don't print filenames or status) +usage: remove_updatelog [--dry-run|-n] [--mdt|-m indices] [--help|-h]\n + [--quiet|-q] + --help|-h show this usage message + --mdt|-m delete llogs of selected MDTs only + --dry-run|-n only print the names of files to be removed + --quiet|-q run quietly (don't print filenames or status) --zip|-z save all llogs into compressed tar archive with given name prefix using gzip by default. Other compression @@ -53,7 +67,8 @@ The 'localmount' argument should be an ldiskfs mounted MDT device mountpoint. Examples: remove_updatelog /mnt/mdt0 remove_updatelog --dry-run /mnt/mdt0 - remove_changelog -z /tmp/llogs /mnt/mdt0 + remove_updatelog -z /tmp/llogs /mnt/mdt0 + remove_updatelog -m 0,1 /mnt/mdt0 USAGE exit 1 } @@ -62,12 +77,14 @@ OPT_DRYRUN=false OPT_ARCH="" OPT_MOUNT="" OPT_MDTS=() +OPT_MDTIDX=() # Examine any long options and arguments while [ -n "$*" ]; do arg="$1" case "$arg" in -h|--help) usage;; + -m|--mdt) OPT_MDTIDX=($(echo $2 | tr "," " "));; -n|--dry-run) OPT_DRYRUN=true;; -q|--quiet) ECHO=:;; -z|--zip) OPT_ARCH="$2.tar"; shift;; @@ -84,10 +101,10 @@ remove_updatelog() { local arch=$OPT_ARCH local length=0 - if [[ -z $(df -t ldiskfs $mntpoint 2>/dev/null) ]] ; then - echo "$PROG: '$mntpoint' is not ldiskfs mount." - exit 1 - fi +# if [[ -z $(df -t ldiskfs $mntpoint 2>/dev/null) ]] ; then +# echo "$PROG: '$mntpoint' is not ldiskfs mount." +# exit 1 +# fi if $OPT_DRYRUN; then $ECHO "Dry run was requested, no changes will be applied" @@ -97,19 +114,27 @@ remove_updatelog() { if [[ ! -f $catlist ]] ; then echo "$PROG: $catlist doesn't exist already." else - read -r -d '' -a OPT_MDTS < <(hexdump -v -e '2/8 " %16x" 2/8 "\n"' $catlist | - awk '{print "[0x"$2":0x"$1":0x0]"}') + read -r -d '' -a OPT_MDTS <<< $(hexdump -v -e '2/8 " %16x" 2/8 "\n"' $catlist | + awk '{print "[0x"$2":0x"$1":0x0]"}') if [[ ! $(which $LLOG_READER 2>/dev/null) ]] ; then echo "$PROG: $LLOG_READER is missing." exit 1 fi [[ -z $arch ]] || tar -cf $arch $catlist 2>/dev/null + length=${#OPT_MDTS[@]} - for (( i = 0; i < ${length}; i++ )); do + (( ${#OPT_MDTIDX[@]} > 0 )) || OPT_MDTIDX=($(seq 0 $((length - 1)))) + echo "Selected MDTS: ${OPT_MDTIDX[*]}" + for i in ${OPT_MDTIDX[@]} ; do local catalog=$dir/${OPT_MDTS[$i]} - $ECHO "Processing MDT$i llogs ..." + if (( $i >= $length)) ; then + echo "skip wrong index $i, total $length MDTs" + continue + fi + + $ECHO "Processing MDT$i llog catalog ${OPT_MDTS[$i]} ..." if [[ ! -f $catalog ]] ; then echo "$PROG: $catalog doesn't exist already." continue diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 5a6bc4c..11e6a55 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -10539,6 +10539,55 @@ test_135() { } run_test 135 "check the behavior when changelog is wrapped around" +test_140() { + (( MDS1_VERSION >= $(version_code 2.15.55) )) || + skip "need MDS version at least 2.15.55" + (( MDSCOUNT >= 2 )) || skip "needs >= 2 MDTs" + + RM_UPDATELOG=$(do_facet mds2 "which remove_updatelog 2> /dev/null") + RM_UPDATELOG=${RM_UPDATELOG:-"$LUSTRE/scripts/remove_updatelog"} + + [ -f "$RM_UPDATELOG" ] || + skip_env "remove_updatelog is not found on mds2" + + local mntpt=$(facet_mntpt mds2) + + setup_noconfig + test_mkdir -c $MDSCOUNT -p $DIR/$tdir || + error "mkdir $DIR/$tdir failed" + mkdir $DIR/$tdir/d{1..256} + + stop_mdt 1 + stop_mdt 2 + + mount_fstype mds2 || error "mount as fstype mds2 failed" + do_facet mds2 $RM_UPDATELOG -n $mntpt + MDTS=($(do_facet mds2 $RM_UPDATELOG -n $mntpt | + grep -o "Processing MDT[0-9]*" | awk -F'MDT' '{print $2}')) + (( ${#MDTS[@]} == MDSCOUNT )) || + error "Processed ${#MDTS[@]} from $MDSCOUNT" + + do_facet mds2 $RM_UPDATELOG -n -m 1,0 $mntpt + MDTS=($(do_facet mds2 $RM_UPDATELOG -n -m 1,0 $mntpt | + grep -o "Processing MDT[0-9]*" | awk -F'MDT' '{print $2}')) + (( ${#MDTS[@]} == 2 )) || + error "Processed ${#MDTS[@]} instead of 2" + (( ${MDTS[0]} == 1 && ${MDTS[1]} == 0 )) || + error "Processed: ${MDTS[*]}, expected: 1 0" + + do_facet mds2 $RM_UPDATELOG -m 0 $mntpt + unmount_fstype mds2 + start_mdt 2 || error "mds2 start fail" + start_mdt 1 || error "mds1 start fail" + wait_clients_import_state ${CLIENTS:-$HOSTNAME} mds1 FULL + wait_clients_import_state ${CLIENTS:-$HOSTNAME} mds2 FULL + + rm -rf $DIR/$tdir || error "Can't remove $tdir" + stopall + reformat_and_config +} +run_test 140 "remove_updatelog script actions" + # # (This was sanity/802a) #