3 # remove_updatelogs: emergency remove MDT updatelog files from server.
5 # This is emergency tool to cleanup updatelogs in server if llog records
6 # cannot be removed by regular means, e.g. due to llog corruptions
8 # Tool goes the following:
9 # - goes through update_log catlist to find per-MDT update llog catalog
10 # - process llog catalog to delete all plain llogs in it
11 # - truncate or remove related llog catalog after all
12 # - truncates update_llogs itself if all catalogs were removed
14 # Script required parameter is mount point of server FS mounted locally
15 # it accepts also optional options as described below in usage()
17 # Steps to cleanup problematic llogs:
19 # 1. mount MDT filesystem locally on server as ldiskfs mount
20 # 2. run script first in dry-run mode to make sure it parses llogs as needed:
21 # # bash remove_updatelog -n <ldiskfs_mount>
22 # 3. save all llogs for analysis:
23 # # bash remove_updatelog -n -z /tmp/llogs_saved <ldiskfs_mount>
24 # 4. check that /tmp/llogs_saved.tar.gz exists and has all llogs inside:
25 # # ls -ali /tmp/llogs_saved.tar.gz
26 # # tar -tf /tmp/llog_saved.tar.gz
27 # 5. finally run script to delete all llogs:
28 # # bash remove_updatelog <ldiskfs_mount>
30 # For better llogs compression xz can be used as well, pass it to the script
31 # via GZIP env variable:
32 # # GZIP=xz bash remove_updatelog -n -z /tmp/llogs_saved <ldiskfs_mount>
33 # Archive name will ends with .xz in that case instead of .gz
35 # Script allows to cleanup llogs related to specific MDTs by their indices:
36 # # bash remove_updatelog -m 0,1 /mnt/mdt0
38 # That can be useful when llog corruption occurred over particular MDT
39 # pair, e.g. lustre-MDT0001-osp-MDT0003 reports llog problems, that means
40 # the problem reported by MDT0003 communicating to MDT0001, so remote MDT0003
41 # llog on MDT0001 is corrupted and the script should be ran on MDT0001 to
42 # remove MDT0003 update llog after MDT0001 local ldiskfs/ZFS mount:
43 # #bash remove_updatelog -m 3 <mdt1_mount_point>
49 LLOG_READER=${LLOG_READER:-llog_reader}
54 usage: remove_updatelog [--dry-run|-n] [--mdt|-m indices] [--help|-h]\n
55 [--quiet|-q] <localmount>
56 --help|-h show this usage message
57 --mdt|-m <index,...> delete llogs of selected MDTs only
58 --dry-run|-n only print the names of files to be removed
59 --quiet|-q run quietly (don't print filenames or status)
60 --zip|-z <name_prefix>
61 save all llogs into compressed tar archive with given
62 name prefix using gzip by default. Other compression
63 tools can be used via GZIP env variable.
65 The 'localmount' argument should be an ldiskfs mounted MDT device mountpoint.
68 remove_updatelog /mnt/mdt0
69 remove_updatelog --dry-run /mnt/mdt0
70 remove_updatelog -z /tmp/llogs /mnt/mdt0
71 remove_updatelog -m 0,1 /mnt/mdt0
82 # Examine any long options and arguments
87 -m|--mdt) OPT_MDTIDX=($(echo $2 | tr "," " "));;
88 -n|--dry-run) OPT_DRYRUN=true;;
90 -z|--zip) OPT_ARCH="$2.tar"; shift;;
92 [ -d "$arg" ] && OPT_MOUNT="$arg";;
98 local mntpoint=$OPT_MOUNT
99 local catlist=${mntpoint}/update_log
100 local dir=${mntpoint}/update_log_dir
104 # if [[ -z $(df -t ldiskfs $mntpoint 2>/dev/null) ]] ; then
105 # echo "$PROG: '$mntpoint' is not ldiskfs mount."
110 $ECHO "Dry run was requested, no changes will be applied"
113 $ECHO "Scan update_log at '$mntpoint':"
114 if [[ ! -f $catlist ]] ; then
115 echo "$PROG: $catlist doesn't exist already."
117 read -r -d '' -a OPT_MDTS <<< $(hexdump -v -e '2/8 " %16x" 2/8 "\n"' $catlist |
118 awk '{print "[0x"$2":0x"$1":0x0]"}')
120 if [[ ! $(which $LLOG_READER 2>/dev/null) ]] ; then
121 echo "$PROG: $LLOG_READER is missing."
124 [[ -z $arch ]] || tar -cf $arch $catlist 2>/dev/null
126 length=${#OPT_MDTS[@]}
127 (( ${#OPT_MDTIDX[@]} > 0 )) || OPT_MDTIDX=($(seq 0 $((length - 1))))
128 echo "Selected MDTS: ${OPT_MDTIDX[*]}"
129 for i in ${OPT_MDTIDX[@]} ; do
130 local catalog=$dir/${OPT_MDTS[$i]}
132 if (( $i >= $length)) ; then
133 echo "skip wrong index $i, total $length MDTs"
137 $ECHO "Processing MDT$i llog catalog ${OPT_MDTS[$i]} ..."
138 if [[ ! -f $catalog ]] ; then
139 echo "$PROG: $catalog doesn't exist already."
142 [[ -z $arch ]] || tar -rf $arch $catalog 2>/dev/null
143 if (( $(stat -c %s $catalog) >= 8192 )) ; then
144 while read -r plain ; do
147 # compatibility checks:
148 # old llog reader reports path in /O
149 # but correct path in update_log_dir
150 if [ ${plain:0:1} == 'O' ] ; then
151 local fid=${plain#"O/"*}
153 # old format: O/8589935617/d3/3
154 # get sequence and oid in hex:
155 fid=$(printf "[0x%x:0x%x:0x0]" ${fid%%/*} ${fid##*/})
158 path=$mntpoint/$plain
161 tar -rf $arch $path 2>/dev/null
163 $OPT_DRYRUN || rm -f $path
164 done < <(llog_reader $catalog |
165 awk -F "path=" '/path=/ { print $2 }')
167 echo "$PROG: $catalog is too small."
170 $OPT_DRYRUN || > $catalog
173 if [[ "$arch" ]] ; then
175 $ECHO "llog archive was created by $GZIP"
179 if [ -z $OPT_MOUNT ] ; then
180 echo "Mount is not specified, exiting"