From eb4471773dc1058c60fc1f12bfb8d0948d85e7d7 Mon Sep 17 00:00:00 2001 From: Dmitry Zogin Date: Fri, 2 Jul 2010 19:53:49 -0400 Subject: [PATCH] b=21563 Metadata performance has degraded for some operations between 1.6.5 and 1.8.1 Change mdsrate tool to handle time with double resolution and use MPI_Wtime(). Also stat operations measurements do not require --iter argument anymore, and do not use readdir() internally. i=johann i=iandrew.perepechko i=robert.read --- lustre/tests/mdsrate-stat-large.sh | 6 +- lustre/tests/mdsrate-stat-small.sh | 6 +- lustre/tests/mpi/mdsrate.c | 110 +++++++++++++++++++------------------ 3 files changed, 60 insertions(+), 62 deletions(-) diff --git a/lustre/tests/mdsrate-stat-large.sh b/lustre/tests/mdsrate-stat-large.sh index 5580985..6d269dc 100644 --- a/lustre/tests/mdsrate-stat-large.sh +++ b/lustre/tests/mdsrate-stat-large.sh @@ -26,9 +26,6 @@ TESTDIR=$MOUNT/stat NUM_FILES=${NUM_FILES:-1000000} TIME_PERIOD=${TIME_PERIOD:-600} # seconds -# --random_order (default) -OR- --readdir_order -DIR_ORDER=${DIR_ORDER:-"--readdir_order"} - LOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} CLIENT=$SINGLECLIENT NODES_TO_USE=${NODES_TO_USE:-$CLIENTS} @@ -77,8 +74,7 @@ else fi COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --stat --time ${TIME_PERIOD} - --dir ${TESTDIR} --nfiles ${NUM_FILES} --filefmt 'f%%d' - ${DIR_ORDER} ${SEED_OPTION}" + --dir ${TESTDIR} --nfiles ${NUM_FILES} --filefmt 'f%%d'" # 1 if [ -n "$NOSINGLE" ]; then diff --git a/lustre/tests/mdsrate-stat-small.sh b/lustre/tests/mdsrate-stat-small.sh index 50586ee..efad812 100644 --- a/lustre/tests/mdsrate-stat-small.sh +++ b/lustre/tests/mdsrate-stat-small.sh @@ -26,9 +26,6 @@ TESTDIR=$BASEDIR/stat NUM_FILES=${NUM_FILES:-1000000} TIME_PERIOD=${TIME_PERIOD:-600} # seconds -# --random_order (default) -OR- --readdir_order -DIR_ORDER=${DIR_ORDER:-"--readdir_order"} - LOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} CLIENT=$SINGLECLIENT NODES_TO_USE=${NODES_TO_USE:-$CLIENTS} @@ -77,8 +74,7 @@ else fi COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --stat --time ${TIME_PERIOD} - --dir ${TESTDIR} --nfiles ${NUM_FILES} --filefmt 'f%%d' - ${DIR_ORDER} ${SEED_OPTION}" + --dir ${TESTDIR} --nfiles ${NUM_FILES} --filefmt 'f%%d'" # 1 if [ -n "$NOSINGLE" ]; then diff --git a/lustre/tests/mpi/mdsrate.c b/lustre/tests/mpi/mdsrate.c index ffd7dd1..daf3c48 100644 --- a/lustre/tests/mpi/mdsrate.c +++ b/lustre/tests/mpi/mdsrate.c @@ -134,15 +134,13 @@ struct stat statbuf; #define dmesg if (debug) printf #define DISPLAY_PROGRESS() { \ - if ((++nops % CHECK_COUNT) == 0 && verbose) { \ - curTime = time(0); \ + if (verbose && (nops % CHECK_COUNT == 0)) { \ + curTime = MPI_Wtime(); \ interval = curTime - lastTime; \ if (interval > DISPLAY_TIME || nops % DISPLAY_COUNT == 0) { \ - rate = (float)(nops - lastOps); \ - if (interval > 1) \ - rate /= (float)interval; \ - printf("Rank %d: %.2f %ss/sec %lu secs " \ - "(total: %d %ss %lu secs)\n", \ + rate = (double)(nops - lastOps)/interval; \ + printf("Rank %d: %.2f %ss/sec %.2f secs " \ + "(total: %d %ss %.2f secs)\n", \ myrank, rate, cmd, interval, \ nops, cmd, curTime - startTime); \ lastOps = nops; \ @@ -307,9 +305,9 @@ process_args(int argc, char *argv[]) if ((*endptr != 0) || (iters <= 0)) { fatal(0, "Invalid --iters value.\n"); } - if (mode != LOOKUP && mode != OPEN && mode != STAT) { + if (mode != LOOKUP && mode != OPEN) { usage(stderr, "--iters only makes sense with " - "--lookup, --open, or --stat.\n"); + "--lookup or --open.\n"); } break; case TIME: @@ -399,9 +397,9 @@ process_args(int argc, char *argv[]) break; case RANDOM: case READDIR: - if (mode != LOOKUP && mode != OPEN && mode != STAT) { + if (mode != LOOKUP && mode != OPEN) { fatal(0, "--%s can only be specified with " - "--lookup, --open, or --stat.\n", + "--lookup, or --open.\n", (char *)longOpts[index].name); } order = rc; @@ -425,7 +423,7 @@ process_args(int argc, char *argv[]) usage(stderr, "too many arguments %d >= %d.\n", optind, argc); } - if (mode == CREATE || mode == MKNOD || mode == UNLINK) { + if (mode == CREATE || mode == MKNOD || mode == UNLINK || mode == STAT) { if (seconds != 0) { if (nfiles == 0) nfiles = INT_MAX; @@ -433,7 +431,7 @@ process_args(int argc, char *argv[]) usage(stderr, "--nfiles or --time must be specified " "with %s.\n", cmd); } - } else if (mode == LOOKUP || mode == OPEN || mode == STAT) { + } else if (mode == LOOKUP || mode == OPEN) { if (seconds != 0) { if (iters == 0) iters = INT_MAX; @@ -546,12 +544,13 @@ static inline char *next_file() int main(int argc, char *argv[]) { - int i, j, fd, rc, nops, lastOps, ag_ops; - int ag_interval = 0; - float rate, ag_rate; - float avg_rate = 0; - float effective_rate = 0; - time_t startTime, lastTime, curTime, interval; + int i, j, fd, rc, nops, lastOps; + int ag_ops = 0; + double ag_interval = 0; + double ag_rate = 0; + double rate, avg_rate, effective_rate; + double startTime, curTime, lastTime, interval; + time_t timestamp; char * file; rc = MPI_Init(&argc, &argv); @@ -568,10 +567,10 @@ main(int argc, char *argv[]) process_args(argc, argv); - startTime = time(0); + timestamp = time(0); if ((myrank == 0) || debug) { printf("%d: %s starting at %s", - myrank, hostname, ctime(&startTime)); + myrank, hostname, ctime(×tamp)); } /* if we're not measuring creation rates then precreate @@ -609,10 +608,10 @@ main(int argc, char *argv[]) dir, strerror(rc)); } - startTime = time(0); + timestamp = time(0); j = random() % nfiles; dmesg("%d: %s initializing dir offset %u: %s", - myrank, hostname, j, ctime(&startTime)); + myrank, hostname, j, ctime(×tamp)); for (i = 0; i <= j; i++) { if ((dir_entry = readdir(directory)) == NULL) { @@ -621,17 +620,13 @@ main(int argc, char *argv[]) } } - lastTime = time(0); + timestamp = time(0); dmesg("%d: index %d, filename %s, offset %ld: " "%s initialization complete: %s", myrank, i, dir_entry->d_name, telldir(directory), - hostname, ctime(&lastTime)); + hostname, ctime(×tamp)); } - rc = MPI_Barrier(MPI_COMM_WORLD); - if (rc != MPI_SUCCESS) - fatal(myrank, "prep MPI_Barrier failed: %d\n", rc); - if (seconds) { act.sa_handler = sigalrm_handler; (void)sigemptyset(&act.sa_mask); @@ -640,7 +635,11 @@ main(int argc, char *argv[]) alarm(seconds); } - startTime = lastTime = time(0); + rc = MPI_Barrier(MPI_COMM_WORLD); + if (rc != MPI_SUCCESS) + fatal(myrank, "prep MPI_Barrier failed: %d\n", rc); + + startTime = lastTime = MPI_Wtime(); nops = lastOps = 0; switch (mode) { @@ -655,6 +654,7 @@ main(int argc, char *argv[]) } close(fd); + nops++; DISPLAY_PROGRESS(); } @@ -679,6 +679,7 @@ main(int argc, char *argv[]) "error: %s\n", filename, strerror(rc)); } + nops++; DISPLAY_PROGRESS(); } break; @@ -694,6 +695,7 @@ main(int argc, char *argv[]) filename, strerror(rc)); } + nops++; DISPLAY_PROGRESS(); } break; @@ -709,19 +711,24 @@ main(int argc, char *argv[]) close(fd); + nops++; DISPLAY_PROGRESS(); } break; case STAT: - for (; nops < iters && !alarm_caught;) { - rc = stat(file = next_file(), &statbuf); + for (; begin <= end && !alarm_caught; begin += dirthreads) { + sprintf(filename, filefmt, begin); + rc = stat(filename, &statbuf); if (rc) { if (((rc = errno) == EINTR) && alarm_caught) break; + if (((rc = errno) == ENOENT) && ignore) + continue; fatal(myrank, "stat(%s) error: %s\n", - file, strerror(rc)); + filename, strerror(rc)); } + nops++; DISPLAY_PROGRESS(); } break; @@ -738,16 +745,18 @@ main(int argc, char *argv[]) filename, strerror(rc)); } + nops++; DISPLAY_PROGRESS(); } break; } - curTime = time(0); + rc = MPI_Barrier(MPI_COMM_WORLD); + if (rc != MPI_SUCCESS) + fatal(myrank, "prep MPI_Barrier failed: %d\n", rc); + curTime = MPI_Wtime(); interval = curTime - startTime; - rate = (float)(nops); - if (interval != 0) - rate /= (float)interval; + rate = (double) (nops) / interval; rc = MPI_Reduce(&nops, &ag_ops, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); @@ -755,32 +764,29 @@ main(int argc, char *argv[]) fatal(myrank, "Failure in MPI_Reduce of total ops.\n"); } - rc = MPI_Reduce(&interval, &ag_interval, 1, MPI_INT, MPI_SUM, 0, + rc = MPI_Reduce(&interval, &ag_interval, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rc != MPI_SUCCESS) { fatal(myrank, "Failure in MPI_Reduce of total interval.\n"); } - rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_FLOAT, MPI_SUM, 0, + rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (rc != MPI_SUCCESS) { fatal(myrank, "Failure in MPI_Reduce of aggregated rate.\n"); } if (myrank == 0) { - curTime = time(0); - interval = curTime - startTime; - if (interval != 0) - effective_rate = (float) ag_ops/ (float)interval; - - if (ag_interval > 0) - avg_rate = (float)ag_ops/(float)ag_interval; + curTime = MPI_Wtime(); + interval = curTime - startTime; + effective_rate = (double) ag_ops / interval; + avg_rate = (double) ag_ops / ag_interval; - printf("Rate: %.2f eff %.2f aggr %.2f client %ss/sec " - "(total: %d threads %d %ss %d dirs %d threads/dir %lu secs)\n", - effective_rate, ag_rate, avg_rate, cmd, nthreads, ag_ops, cmd, - ndirs, dirthreads, interval); + printf("Rate: %.2f eff %.2f aggr %.2f avg client %ss/sec " + "(total: %d threads %d %ss %d dirs %d threads/dir %.2f secs)\n", + effective_rate, ag_rate, avg_rate, cmd, nthreads, ag_ops, + cmd, ndirs, dirthreads, interval); } if (recreate) { @@ -798,10 +804,10 @@ main(int argc, char *argv[]) } } - curTime = time(0); + timestamp = time(0); if ((myrank == 0) || debug) { printf("%d: %s finished at %s", - myrank, hostname, ctime(&curTime)); + myrank, hostname, ctime(×tamp)); } MPI_Finalize(); -- 1.8.3.1