Whamcloud - gitweb
LU-13535 lfsck: fix possible PFL layout corruption 85/38585/5
authorMikhail Pershin <mpershin@whamcloud.com>
Tue, 12 May 2020 20:32:22 +0000 (23:32 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 27 May 2020 17:29:54 +0000 (17:29 +0000)
While checking lmm_oi in composite layout the pointer to 'lmm'
is re-assigned to component entry but the same pointer is used
for LOV EA buffer to update EA. Therefore if lmm_oi was fixed in
some component then just current entry is saved as new layout.

Lustre-change: https://review.whamcloud.com/38584
Lustre-commit: be009cb4a73b3bef7302083bec7d1d6289d515b7

Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: Ifbd984a71b383ab4ca35ad59ed9cd8cf57b6d7cc
Reviewed-on: https://review.whamcloud.com/38585
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Stephan Thiell <sthiell@stanford.edu>
lustre/lfsck/lfsck_layout.c
lustre/tests/sanity-lfsck.sh

index c4c09fc..5a3e000 100644 (file)
@@ -5578,14 +5578,15 @@ again:
        lmm = buf->lb_buf;
        magic = le32_to_cpu(lmm->lmm_magic);
        if (magic == LOV_MAGIC_COMP_V1) {
+               struct lov_mds_md_v1 *v1;
                int i;
 
                lcm = buf->lb_buf;
                count = le16_to_cpu(lcm->lcm_entry_count);
                for (i = 0; i < count; i++) {
                        lcme = &lcm->lcm_entries[i];
-                       lmm = buf->lb_buf + le32_to_cpu(lcme->lcme_offset);
-                       if (memcmp(oi, &lmm->lmm_oi, sizeof(*oi)) != 0)
+                       v1 = buf->lb_buf + le32_to_cpu(lcme->lcme_offset);
+                       if (memcmp(oi, &v1->lmm_oi, sizeof(*oi)) != 0)
                                goto fix;
                }
 
@@ -5634,12 +5635,13 @@ fix:
        }
 
        if (magic == LOV_MAGIC_COMP_V1) {
+               struct lov_mds_md_v1 *v1;
                int i;
 
                for (i = 0; i < count; i++) {
                        lcme = &lcm->lcm_entries[i];
-                       lmm = buf->lb_buf + le32_to_cpu(lcme->lcme_offset);
-                       lmm->lmm_oi = *oi;
+                       v1 = buf->lb_buf + le32_to_cpu(lcme->lcme_offset);
+                       v1->lmm_oi = *oi;
                }
        } else {
                lmm->lmm_oi = *oi;
index a45e9ab..e4def73 100644 (file)
@@ -5552,6 +5552,34 @@ test_37()
 }
 run_test 37 "LFSCK must skip a ORPHAN"
 
+test_40a() {
+       [[ $MDSCOUNT -ge 2 ]] || skip "needs >= 2 MDTs"
+
+       check_mount_and_prep
+       $LFS mkdir -i 1 $DIR/$tdir/dir1
+       $LFS setstripe -E 1M -c1 -S 1M -E 128M -c2 -S 4M -E eof $DIR/$tdir/dir1
+
+       touch $DIR/$tdir/dir1/f1
+       local layout1=$(get_layout_param $DIR/$tdir/dir1/f1)
+
+       echo "Migrate $DIR/$tdir/dir1 from MDT1 to MDT0"
+       $LFS migrate -m 0 $DIR/$tdir/dir1
+
+       echo "trigger LFSCK for layout"
+       do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t layout -r
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_layout |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_LAYOUT
+               error "(2) unexpected status"
+       }
+
+       local layout2=$(get_layout_param $DIR/$tdir/dir1/f1)
+
+       [[ "$layout1" == "$layout2" ]] || error "layout lost after lfsck"
+}
+run_test 40a "LFSCK correctly fixes lmm_oi in composite layout"
 
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}