3 # Copyright (C) 2018 Oracle. All Rights Reserved.
5 # Author: Darrick J. Wong <darrick.wong@oracle.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it would be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write the Free Software Foundation,
19 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
21 # Automatically check a LVM-managed filesystem online.
22 # We use lvm snapshots to do this, which means that we can only
23 # check filesystems in VGs that have at least 256MB (or so) of
30 conffile="@root_sysconfdir@/e2scrub.conf"
32 test -f "${conffile}" && . "${conffile}"
35 echo "Usage: $0 [OPTIONS] mountpoint | device"
37 echo "mountpoint must be on a LVM-managed block device"
38 echo "-r: Remove e2scrub snapshot and exit, do not check anything."
39 echo "-t: Run fstrim if successful."
40 echo "-V: Print version information and exit."
44 echo "e2scrub @E2FSPROGS_VERSION@ (@E2FSPROGS_DATE@)"
50 # If we're being run as a service, the return code must fit the LSB
51 # init script action error guidelines, which is to say that we
52 # compress all errors to 1 ("generic or unspecified error", LSB 5.0
53 # section 22.2) and hope the admin will scan the log for what
56 # We have to sleep 2 seconds here because journald uses the pid to
57 # connect our log messages to the systemd service. This is critical
58 # for capturing all the log messages if the scrub fails, because the
59 # fail service uses the service name to gather log messages for the
61 if [ -n "${SERVICE_MODE}" ]; then
62 test "${ret}" -ne 0 && ret=1
69 while getopts "rtV" opt; do
73 "V") print_version; exitcode 0;;
74 *) print_help; exitcode 2;;
77 shift "$((OPTIND - 1))"
80 if [ -z "${arg}" ]; then
85 # Find the device for a given mountpoint
87 local mountpt="$(realpath "$1")"
89 lsblk -o NAME,FSTYPE,MOUNTPOINT -p -P -n 2> /dev/null | while read vars; do
91 if [ "${mountpt}" != "${MOUNTPOINT}" ]; then
104 # Check a device argument
107 local fstype="$(lsblk -o FSTYPE -n "${dev}" 2> /dev/null)"
121 if [ -n "${dev}" ]; then
122 lsblk -o MOUNTPOINT -n "${dev}"
126 # Construct block device path and mountpoint from argument
127 if [ -b "${arg}" ]; then
128 dev="$(dev_from_arg "${arg}")"
129 mnt="$(mnt_from_dev "${dev}")"
131 dev="$(dev_from_mount "${arg}")"
134 if [ ! -e "${dev}" ]; then
135 echo "${arg}: Not an ext[234] filesystem."
140 # Make sure this is an LVM device we can snapshot
141 lvm_vars="$(lvs --nameprefixes -o name,vgname,lv_role --noheadings "${dev}" 2> /dev/null)"
143 if [ -z "${LVM2_VG_NAME}" ] || [ -z "${LVM2_LV_NAME}" ] ||
144 echo "${LVM2_LV_ROLE}" | grep -q "snapshot"; then
145 echo "${arg}: Not connnected to a LVM logical volume."
149 start_time="$(date +'%Y%m%d%H%M%S')"
150 snap="${LVM2_LV_NAME}.e2scrub"
151 snap_dev="/dev/${LVM2_VG_NAME}/${snap}"
154 # Remove and wait for removal to succeed.
155 ${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" 3>&-
156 while [ -e "${snap_dev}" ] && [ "$?" -eq "5" ]; do
158 ${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" 3>&-
163 # First we recover the journal, then we see if e2fsck tries any
164 # non-optimization repairs. If either of these two returns a
165 # non-zero status (errors fixed or remaining) then this fs is bad.
167 export E2FSCK_FIXES_ONLY
168 ${DBG} "@root_sbindir@/e2fsck" -E journal_only -p ${e2fsck_opts} "${snap_dev}" || return $?
169 ${DBG} "@root_sbindir@/e2fsck" -f -y ${e2fsck_opts} "${snap_dev}"
173 ${DBG} "@root_sbindir@/tune2fs" -C 0 -T "${start_time}" "${dev}"
177 ${DBG} "@root_sbindir@/tune2fs" -E force_fsck "${dev}"
181 # Try to remove snapshot for 30s, bail out if we can't remove it.
182 lveremove_deadline="$(( $(date "+%s") + 30))"
183 ${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" 3>&- 2>/dev/null
184 while [ -e "${snap_dev}" ] && [ "$?" -eq "5" ] &&
185 [ "$(date "+%s")" -lt "${lvremove_deadline}" ]; do
187 ${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" 3>&-
189 if [ -e "${snap_dev}" ]; then
190 echo "${arg}: e2scrub snapshot is in use, cannot check!"
193 # Create the snapshot, wait for device to appear.
194 ${DBG} lvcreate -s -L "${snap_size_mb}m" -n "${snap}" "${LVM2_VG_NAME}/${LVM2_LV_NAME}" 3>&-
195 if [ $? -ne 0 ]; then
196 echo "${arg}: e2scrub snapshot FAILED, will not check!"
199 ${DBG} udevadm settle 2> /dev/null
203 if [ "${reap}" -gt 0 ]; then
204 if [ -e "${snap_dev}" ]; then
205 teardown 2> /dev/null
212 trap "teardown; exit 1" EXIT INT QUIT TERM
219 echo "${arg}: Scrub succeeded."
224 # Trim the free space, which requires the snapshot be deleted.
225 if [ "${fstrim}" -eq 1 ] && [ -d "${mnt}" ] && type fstrim > /dev/null 2>&1; then
226 echo "${arg}: Trimming free space."
233 # Operational error, what now?
234 echo "${arg}: e2fsck operational error."
240 # fsck failed. Check if the snapshot is invalid; if so, make a
241 # note of that at the end of the log. This isn't necessarily a
242 # failure because the mounted fs could have overflowed the
243 # snapshot with regular disk writes /or/ our repair process
244 # could have done it by repairing too much.
246 # If it's really corrupt we ought to fsck at next boot.
247 is_invalid="$(lvs -o lv_snapshot_invalid --noheadings "${snap_dev}" | awk '{print $1}')"
248 if [ -n "${is_invalid}" ]; then
249 echo "${arg}: Scrub FAILED due to invalid snapshot."
252 echo "${arg}: Scrub FAILED due to corruption! Unmount and run e2fsck -y."