From: manoj Date: Tue, 14 Jul 2009 21:11:44 +0000 (+0000) Subject: b=19894 X-Git-Tag: v1_9_220~23 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=054ce9ccdefb7fd15aa33e967ca882372b87c29d b=19894 r=grev r=nathan.rutman Kill and cleanup iozone before replicating data. --- diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index 7645795..320eeeb 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -438,7 +438,6 @@ if [ "$SANITY_GSS" != "no" ]; then fi -echo replication sanity: $LREPLICATE_TEST [ "$LREPLICATE_TEST" != "no" ] && skip_remmds lreplicate-test && LREPLICATE_TEST=no && MSKIPPED=1 [ "$LREPLICATE_TEST" != "no" ] && skip_remost lreplicate-test && LREPLICATE_TEST=no && OSKIPPED=1 if [ "$LREPLICATE_TEST" != "no" ]; then diff --git a/lustre/tests/lreplicate-test.sh b/lustre/tests/lreplicate-test.sh index 12dbcb9..4e0d261 100644 --- a/lustre/tests/lreplicate-test.sh +++ b/lustre/tests/lreplicate-test.sh @@ -392,7 +392,20 @@ test_4() { $KILL -SIGCONT $child_pid sleep 60 - $KILL -SIGKILL $child_pid + $KILL -SIGKILL $(pgrep run_iozone.sh) + $KILL -SIGKILL $(pgrep iozone) + + # After killing 'run_iozone.sh', process 'iozone' becomes the + # child of PID 1. Hence 'wait' does not wait for it. Killing + # iozone first, means more iozone processes are spawned off which + # is not desirable. So, after sending a sigkill, the test goes + # into a wait loop for iozone to cleanup and exit. + wait + while [ "$(pgrep "iozone")" != "" ]; + do + ps -ef | grep iozone | grep -v grep + sleep 1; + done $LREPLICATE -l $LREPL_LOG check_diff $DIR/$tdir $TGT/$tdir @@ -563,5 +576,5 @@ log "cleanup: ======================================================" cd $ORIG_PWD check_and_cleanup_lustre echo '=========================== finished ===============================' -[ -f "$REPLOG" ] && cat $REPLLOG && grep -q FAIL $REPLLOG && exit 1 || true +[ -f "$REPLLOG" ] && cat $REPLLOG && grep -q FAIL $REPLLOG && exit 1 || true echo "$0: completed" diff --git a/lustre/utils/lreplicate.c b/lustre/utils/lreplicate.c index 1eb775a..b1b64e4 100644 --- a/lustre/utils/lreplicate.c +++ b/lustre/utils/lreplicate.c @@ -1352,6 +1352,16 @@ void lr_print_status(struct lr_info *info) printf("Using rsync: %s (%s)\n", rsync, rsync_ver); } +DECLARE_CHANGELOG_NAMES; + +void lr_print_failure(struct lr_info *info, int rc) +{ + fprintf(stderr, "Replication of operation failed(%d):" + " %lld %s (%d) %s %s %s\n", rc, info->recno, + changelog_str[info->type], info->type, info->tfid, + info->pfid, info->name); +} + /* Replicate filesystem operations from src_path to target_path */ int lr_replicate() { @@ -1466,9 +1476,7 @@ int lr_replicate() break; } if (rc && rc != -ENOENT) { - fprintf(stderr, "Replication of operation %d, " - "index %lld failed: %d\n", - info->type, info->recno, rc); + lr_print_failure(info, rc); errors++; if (abort_on_err) break;