From cb578b39f53e4426f848d830dc87914e8259c401 Mon Sep 17 00:00:00 2001 From: Dmitry Zogin Date: Tue, 31 Aug 2010 15:54:08 -0400 Subject: [PATCH] b=21563 Metadata performance has degraded for some operations between 1.6.5 and 1.8.1 Change mdsrate tool to handle time with double resolution and use MPI_Wtime(). Also stat operations measurements do not require --iter argument anymore, and do not use readdir() internally. i=johann i=iandrew.perepechko i=robert.read --- lustre/tests/mdsrate-create-large.sh | 4 +- lustre/tests/mdsrate-create-small.sh | 6 +- lustre/tests/mdsrate-stat-large.sh | 6 +- lustre/tests/mdsrate-stat-small.sh | 6 +- lustre/tests/mpi/mdsrate.c | 104 ++++++++++++++++++++++------------- 5 files changed, 72 insertions(+), 54 deletions(-) diff --git a/lustre/tests/mdsrate-create-large.sh b/lustre/tests/mdsrate-create-large.sh index 614889c..0ff62a2 100644 --- a/lustre/tests/mdsrate-create-large.sh +++ b/lustre/tests/mdsrate-create-large.sh @@ -66,7 +66,7 @@ else log "===== $0 ### 1 NODE UNLINK ###" if [ -f "$LOG" ]; then - CREATED=$(awk '/total:/ { print $7 }' $LOG) + CREATED=$(sed -n '/^Rate:/s/^.* \([0-9]*\) creates .*/\1/p' $LOG) [ $CREATED -gt 0 ] && NUM_FILES=$CREATED fi @@ -107,7 +107,7 @@ else log "===== $0 ### $NUM_CLIENTS NODES UNLINK ###" if [ -f "$LOG" ]; then - CREATED=$(awk '/total:/ { print $7 }' $LOG) + CREATED=$(sed -n '/^Rate:/s/^.* \([0-9]*\) creates .*/\1/p' $LOG) [ $CREATED -gt 0 ] && NUM_FILES=$CREATED fi diff --git a/lustre/tests/mdsrate-create-small.sh b/lustre/tests/mdsrate-create-small.sh index 58f66da..8cfb415 100644 --- a/lustre/tests/mdsrate-create-small.sh +++ b/lustre/tests/mdsrate-create-small.sh @@ -1,4 +1,4 @@ -#!/bin/bash + #!/bin/bash # # This test was used in a set of CMD3 tests (cmd3-3 test). @@ -78,7 +78,7 @@ else log "===== $0 ### 1 NODE UNLINK ###" if [ -f "$LOG" ]; then - CREATED=$(awk '/total:/ { print $7 }' $LOG) + CREATED=$(sed -n '/^Rate:/s/^.* \([0-9]*\) creates .*/\1/p' $LOG) [ $CREATED -gt 0 ] && NUM_FILES=$CREATED fi @@ -126,7 +126,7 @@ else log "===== $0 ### $NUM_CLIENTS NODES UNLINK with $THREADS_PER_CLIENT threads per client ###" if [ -f "$LOG" ]; then - CREATED=$(awk '/total:/ { print $7 }' $LOG) + CREATED=$(sed -n '/^Rate:/s/^.* \([0-9]*\) creates .*/\1/p' $LOG) [ $CREATED -gt 0 ] && NUM_FILES=$CREATED fi diff --git a/lustre/tests/mdsrate-stat-large.sh b/lustre/tests/mdsrate-stat-large.sh index a136dea..b0dff02 100644 --- a/lustre/tests/mdsrate-stat-large.sh +++ b/lustre/tests/mdsrate-stat-large.sh @@ -25,9 +25,6 @@ TESTDIR=$MOUNT/mdsrate NUM_FILES=${NUM_FILES:-1000000} TIME_PERIOD=${TIME_PERIOD:-600} # seconds -# --random_order (default) -OR- --readdir_order -DIR_ORDER=${DIR_ORDER:-"--readdir_order"} - LOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} CLIENT=$SINGLECLIENT NODES_TO_USE=${NODES_TO_USE:-$CLIENTS} @@ -70,8 +67,7 @@ else fi COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --stat --time ${TIME_PERIOD} - --dir ${TESTDIR} --nfiles ${NUM_FILES} --filefmt 'f%%d' - ${DIR_ORDER} ${SEED_OPTION}" + --dir ${TESTDIR} --nfiles ${NUM_FILES} --filefmt 'f%%d'" # 1 if [ -n "$NOSINGLE" ]; then diff --git a/lustre/tests/mdsrate-stat-small.sh b/lustre/tests/mdsrate-stat-small.sh index a153254..40db5ed 100644 --- a/lustre/tests/mdsrate-stat-small.sh +++ b/lustre/tests/mdsrate-stat-small.sh @@ -25,9 +25,6 @@ TESTDIR=$MOUNT/mdsrate NUM_FILES=${NUM_FILES:-1000000} TIME_PERIOD=${TIME_PERIOD:-600} # seconds -# --random_order (default) -OR- --readdir_order -DIR_ORDER=${DIR_ORDER:-"--readdir_order"} - LOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} CLIENT=$SINGLECLIENT NODES_TO_USE=${NODES_TO_USE:-$CLIENTS} @@ -70,8 +67,7 @@ else fi COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --stat --time ${TIME_PERIOD} - --dir ${TESTDIR} --nfiles ${NUM_FILES} --filefmt 'f%%d' - ${DIR_ORDER} ${SEED_OPTION}" + --dir ${TESTDIR} --nfiles ${NUM_FILES} --filefmt 'f%%d'" # 1 if [ -n "$NOSINGLE" ]; then diff --git a/lustre/tests/mpi/mdsrate.c b/lustre/tests/mpi/mdsrate.c index fc10732..dfd899a 100644 --- a/lustre/tests/mpi/mdsrate.c +++ b/lustre/tests/mpi/mdsrate.c @@ -133,15 +133,13 @@ struct stat statbuf; #define dmesg if (debug) printf #define DISPLAY_PROGRESS() { \ - if ((++nops % CHECK_COUNT) == 0 && verbose) { \ - curTime = time(0); \ + if (verbose && (nops % CHECK_COUNT == 0)) { \ + curTime = MPI_Wtime(); \ interval = curTime - lastTime; \ if (interval > DISPLAY_TIME || nops % DISPLAY_COUNT == 0) { \ - rate = (float)(nops - lastOps); \ - if (interval > 1) \ - rate /= (float)interval; \ - printf("Rank %d: %.2f %ss/sec %lu secs " \ - "(total: %d %ss %lu secs)\n", \ + rate = (double)(nops - lastOps)/interval; \ + printf("Rank %d: %.2f %ss/sec %.2f secs " \ + "(total: %d %ss %.2f secs)\n", \ myrank, rate, cmd, interval, \ nops, cmd, curTime - startTime); \ lastOps = nops; \ @@ -306,9 +304,9 @@ process_args(int argc, char *argv[]) if ((*endptr != 0) || (iters <= 0)) { fatal(0, "Invalid --iters value.\n"); } - if (mode != LOOKUP && mode != OPEN && mode != STAT) { + if (mode != LOOKUP && mode != OPEN) { usage(stderr, "--iters only makes sense with " - "--lookup, --open, or --stat.\n"); + "--lookup or --open.\n"); } break; case TIME: @@ -398,9 +396,9 @@ process_args(int argc, char *argv[]) break; case RANDOM: case READDIR: - if (mode != LOOKUP && mode != OPEN && mode != STAT) { + if (mode != LOOKUP && mode != OPEN) { fatal(0, "--%s can only be specified with " - "--lookup, --open, or --stat.\n", + "--lookup, or --open.\n", (char *)longOpts[index].name); } order = c; @@ -424,7 +422,7 @@ process_args(int argc, char *argv[]) usage(stderr, "too many arguments %d >= %d.\n", optind, argc); } - if (mode == CREATE || mode == MKNOD || mode == UNLINK) { + if (mode == CREATE || mode == MKNOD || mode == UNLINK || mode == STAT) { if (seconds != 0) { if (nfiles == 0) nfiles = INT_MAX; @@ -432,7 +430,7 @@ process_args(int argc, char *argv[]) usage(stderr, "--nfiles or --time must be specified " "with %s.\n", cmd); } - } else if (mode == LOOKUP || mode == OPEN || mode == STAT) { + } else if (mode == LOOKUP || mode == OPEN) { if (seconds != 0) { if (iters == 0) iters = INT_MAX; @@ -545,9 +543,13 @@ static inline char *next_file() int main(int argc, char *argv[]) { - int i, j, fd, rc, nops, lastOps, ag_ops; - float rate, ag_rate; - time_t startTime, lastTime, curTime, interval; + int i, j, fd, rc, nops, lastOps; + int ag_ops = 0; + double ag_interval = 0; + double ag_rate = 0; + double rate, avg_rate, effective_rate; + double startTime, curTime, lastTime, interval; + time_t timestamp; char * file; rc = MPI_Init(&argc, &argv); @@ -564,10 +566,10 @@ main(int argc, char *argv[]) process_args(argc, argv); - startTime = time(0); + timestamp = time(0); if ((myrank == 0) || debug) { printf("%d: %s starting at %s", - myrank, hostname, ctime(&startTime)); + myrank, hostname, ctime(×tamp)); } /* if we're not measuring creation rates then precreate @@ -605,10 +607,10 @@ main(int argc, char *argv[]) dir, strerror(rc)); } - startTime = time(0); + timestamp = time(0); j = random() % nfiles; dmesg("%d: %s initializing dir offset %u: %s", - myrank, hostname, j, ctime(&startTime)); + myrank, hostname, j, ctime(×tamp)); for (i = 0; i <= j; i++) { if ((dir_entry = readdir(directory)) == NULL) { @@ -617,17 +619,13 @@ main(int argc, char *argv[]) } } - lastTime = time(0); + timestamp = time(0); dmesg("%d: index %d, filename %s, offset %ld: " "%s initialization complete: %s", myrank, i, dir_entry->d_name, telldir(directory), - hostname, ctime(&lastTime)); + hostname, ctime(×tamp)); } - rc = MPI_Barrier(MPI_COMM_WORLD); - if (rc != MPI_SUCCESS) - fatal(myrank, "prep MPI_Barrier failed: %d\n", rc); - if (seconds) { act.sa_handler = sigalrm_handler; (void)sigemptyset(&act.sa_mask); @@ -636,7 +634,11 @@ main(int argc, char *argv[]) alarm(seconds); } - startTime = lastTime = time(0); + rc = MPI_Barrier(MPI_COMM_WORLD); + if (rc != MPI_SUCCESS) + fatal(myrank, "prep MPI_Barrier failed: %d\n", rc); + + startTime = lastTime = MPI_Wtime(); nops = lastOps = 0; switch (mode) { @@ -651,6 +653,7 @@ main(int argc, char *argv[]) } close(fd); + nops++; DISPLAY_PROGRESS(); } @@ -675,6 +678,7 @@ main(int argc, char *argv[]) "error: %s\n", filename, strerror(rc)); } + nops++; DISPLAY_PROGRESS(); } break; @@ -690,6 +694,7 @@ main(int argc, char *argv[]) filename, strerror(rc)); } + nops++; DISPLAY_PROGRESS(); } break; @@ -705,19 +710,24 @@ main(int argc, char *argv[]) close(fd); + nops++; DISPLAY_PROGRESS(); } break; case STAT: - for (; nops < iters && !alarm_caught;) { - rc = stat(file = next_file(), &statbuf); + for (; begin <= end && !alarm_caught; begin += dirthreads) { + sprintf(filename, filefmt, begin); + rc = stat(filename, &statbuf); if (rc) { if (((rc = errno) == EINTR) && alarm_caught) break; + if (((rc = errno) == ENOENT) && ignore) + continue; fatal(myrank, "stat(%s) error: %s\n", - file, strerror(rc)); + filename, strerror(rc)); } + nops++; DISPLAY_PROGRESS(); } break; @@ -734,16 +744,18 @@ main(int argc, char *argv[]) filename, strerror(rc)); } + nops++; DISPLAY_PROGRESS(); } break; } - curTime = time(0); + rc = MPI_Barrier(MPI_COMM_WORLD); + if (rc != MPI_SUCCESS) + fatal(myrank, "prep MPI_Barrier failed: %d\n", rc); + curTime = MPI_Wtime(); interval = curTime - startTime; - rate = (float)(nops); - if (interval != 0) - rate /= (float)interval; + rate = (double) (nops) / interval; rc = MPI_Reduce(&nops, &ag_ops, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); @@ -751,15 +763,29 @@ main(int argc, char *argv[]) fatal(myrank, "Failure in MPI_Reduce of total ops.\n"); } - rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_FLOAT, MPI_SUM, 0, + rc = MPI_Reduce(&interval, &ag_interval, 1, MPI_DOUBLE, MPI_SUM, 0, + MPI_COMM_WORLD); + if (rc != MPI_SUCCESS) { + fatal(myrank, "Failure in MPI_Reduce of total interval.\n"); + } + + rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rc != MPI_SUCCESS) { fatal(myrank, "Failure in MPI_Reduce of aggregated rate.\n"); } if (myrank == 0) { - printf("Rate: %.2f %ss/sec (total: %d threads %d %ss %lu secs)" - "\n", ag_rate, cmd, nthreads, ag_ops, cmd, interval); + + curTime = MPI_Wtime(); + interval = curTime - startTime; + effective_rate = (double) ag_ops / interval; + avg_rate = (double) ag_ops / ag_interval; + + printf("Rate: %.2f eff %.2f aggr %.2f avg client %ss/sec " + "(total: %d threads %d %ss %d dirs %d threads/dir %.2f secs)\n", + effective_rate, ag_rate, avg_rate, cmd, nthreads, ag_ops, + cmd, ndirs, dirthreads, interval); } if (recreate) { @@ -777,10 +803,10 @@ main(int argc, char *argv[]) } } - curTime = time(0); + timestamp = time(0); if ((myrank == 0) || debug) { printf("%d: %s finished at %s", - myrank, hostname, ctime(&curTime)); + myrank, hostname, ctime(×tamp)); } MPI_Finalize(); -- 1.8.3.1