From: Fan Yong Date: Thu, 24 Sep 2015 09:04:41 +0000 (+0800) Subject: LU-7169 tests: check disk corruption during failover X-Git-Tag: 2.7.64~43 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=f84e06eead85de5cd7832855bab5ff72a542e971;ds=inline LU-7169 tests: check disk corruption during failover It is a debug patch for conf-sanity test_84. It is suspected that there is some disk corruption during the MDT0 failover. Test-Parameters: mdsfilesystemtype=ldiskfs mdtfilesystemtype=ldiskfs ostfilesystemtype=ldiskfs testlist=conf-sanity,conf-sanity,conf-sanity Signed-off-by: Fan Yong Change-Id: I7e20f26e1ecee483474ace44c8284b5776f3c602 Reviewed-on: http://review.whamcloud.com/16664 Tested-by: Jenkins Reviewed-by: Andreas Dilger Reviewed-by: Jian Yu Tested-by: Maloo --- diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 61ed4cc..31b879b 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -1735,7 +1735,19 @@ osd_ios_lookup_one_len(const char *name, struct dentry *parent, int namelen) struct dentry *dentry; dentry = ll_lookup_one_len(name, parent, namelen); - if (!IS_ERR(dentry) && dentry->d_inode == NULL) { + if (IS_ERR(dentry)) { + int rc = PTR_ERR(dentry); + + if (rc != -ENOENT) + CERROR("Fail to find %.*s in %.*s (%lu/%u): rc = %d\n", + namelen, name, parent->d_name.len, + parent->d_name.name, parent->d_inode->i_ino, + parent->d_inode->i_generation, rc); + + return dentry; + } + + if (dentry->d_inode == NULL) { dput(dentry); return ERR_PTR(-ENOENT); } @@ -2488,8 +2500,29 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) GOTO(cleanup_inode, rc); } else { if (memcmp(sf->sf_uuid, es->s_uuid, 16) != 0) { + struct obd_uuid *old_uuid; + struct obd_uuid *new_uuid; + + OBD_ALLOC_PTR(old_uuid); + OBD_ALLOC_PTR(new_uuid); + if (old_uuid == NULL || new_uuid == NULL) { + CERROR("%.16s: UUID has been changed, but" + "failed to allocate RAM for report\n", + LDISKFS_SB(sb)->s_es->s_volume_name); + } else { + class_uuid_unparse(sf->sf_uuid, old_uuid); + class_uuid_unparse(es->s_uuid, new_uuid); + CERROR("%.16s: UUID has been changed from " + "%s to %s\n", + LDISKFS_SB(sb)->s_es->s_volume_name, + old_uuid->uuid, new_uuid->uuid); + } osd_scrub_file_reset(scrub, es->s_uuid,SF_INCONSISTENT); dirty = 1; + if (old_uuid != NULL) + OBD_FREE_PTR(old_uuid); + if (new_uuid != NULL) + OBD_FREE_PTR(new_uuid); } else if (sf->sf_status == SS_SCANNING) { sf->sf_status = SS_CRASHED; dirty = 1; diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index fb97eeb..488948c 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -5591,7 +5591,7 @@ test_84() { #define OBD_FAIL_TGT_REPLAY_DELAY 0x709 | FAIL_SKIP do_facet $SINGLEMDS "lctl set_param fail_loc=0x20000709 fail_val=5" - facet_failover $SINGLEMDS || error "failover: $?" + facet_failover --fsck $SINGLEMDS || error "failover: $?" client_up echo "recovery status" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 15feee6..2547035 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -2460,6 +2460,13 @@ affected_facets () { } facet_failover() { + local E2FSCK_ON_MDT0=false + if [ "$1" == "--fsck" ]; then + shift + [ $(facet_fstype $SINGLEMDS) == ldiskfs ] && + E2FSCK_ON_MDT0=true + fi + local facets=$1 local sleep_time=$2 local -a affecteds @@ -2493,6 +2500,9 @@ facet_failover() { shutdown_facet $facet done + $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \ + $(mdsdevname 1) "-n" || error "Running e2fsck") + for ((index=0; index<$total; index++)); do facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1) echo reboot facets: ${affecteds[index]}