1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * 2003, Copyright, Hewlett-Packard Development Compnay, LP.
6 * Developed under the sponsorship of the U.S. Government
7 * under Subcontract No. B514193
11 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
12 * Use is subject to license terms.
18 #include <sys/types.h>
29 #include <sys/ioctl.h>
35 #include <liblustre.h>
36 #include <lustre/liblustreapi.h> /* for O_LOV_DELAY_CREATE */
38 #define CHECK_COUNT 10000
39 #define DISPLAY_COUNT (CHECK_COUNT * 10)
40 #define DISPLAY_TIME 100
69 struct option longOpts[] = {
70 {"create", 0, NULL, CREATE },
71 {"lookup", 0, NULL, LOOKUP },
72 {"mknod", 0, NULL, MKNOD },
73 {"open", 0, NULL, OPEN },
74 {"stat", 0, NULL, STAT },
75 {"unlink", 0, NULL, UNLINK },
76 {"begin", 1, NULL, BEGIN },
77 {"iters", 1, NULL, ITERS },
78 {"time", 1, NULL, TIME }, /* seconds */
79 {"dirfmt", 1, NULL, DIRFMT },
80 {"ndirs", 1, NULL, NDIRS },
81 {"filefmt", 1, NULL, FILEFMT },
82 {"nfiles", 1, NULL, NFILES },
83 {"noexcl", 0, NULL, NOEXCL },
84 {"stripes", 1, NULL, STRIPES },
85 {"seed", 1, NULL, SEED },
86 {"seedfile", 1, NULL, SEEDFILE },
87 {"random_order", 0, NULL, RANDOM },
88 {"readdir_order", 0, NULL, READDIR },
89 {"recreate", 0, NULL, RECREATE },
90 {"ignore", 0, NULL, IGNORE },
91 {"verbose", 0, NULL, VERBOSE },
92 {"debug", 0, NULL, DEBUG },
93 {"help", 0, NULL, HELP },
103 char hostname[512] = "unknown";
106 int openflags = O_RDWR|O_CREAT|O_EXCL;
110 char mkdir_cmd[PATH_MAX+14];
114 struct dirent *dir_entry;
116 char filefmt[PATH_MAX];
117 char filename[PATH_MAX];
125 struct sigaction act;
134 #define dmesg if (debug) printf
136 #define DISPLAY_PROGRESS() { \
137 if ((++nops % CHECK_COUNT) == 0 && verbose) { \
139 interval = curTime - lastTime; \
140 if (interval > DISPLAY_TIME || nops % DISPLAY_COUNT == 0) { \
141 rate = (float)(nops - lastOps); \
143 rate /= (float)interval; \
144 printf("Rank %d: %.2f %ss/sec %lu secs " \
145 "(total: %d %ss %lu secs)\n", \
146 myrank, rate, cmd, interval, \
147 nops, cmd, curTime - startTime); \
149 lastTime = curTime; \
154 char *usage_msg = "usage: %s\n"
155 " { --create [ --noexcl ] | --lookup | --mknod |\n"
156 " --open | --stat | --unlink [ --recreate ] [ --ignore ] }\n"
157 " [ --help ] [ --verbose ] [ --debug ]\n"
158 " { [ --begin <num> ] --nfiles <num> }\n"
159 " [ --iters <num> ] [ --time <secs> ]\n"
160 " [ --dirfmt <str> ] [ --ndirs <num> ]\n"
161 " [ --filefmt <str> ] [ --stripes <num> ]\n"
162 " [ --random_order [--seed <num> | --seedfile <file>] ]\n"
163 " [ --readdir_order ]\n";
166 usage(FILE *stream, char *fmt, ...)
172 fprintf(stream, "%s: ", prog);
174 vfprintf(stderr, fmt, ap);
177 fprintf(stream, usage_msg, prog);
181 exit(stream == stderr);
184 /* Print process myrank and message, and exit (i.e. a fatal error) */
186 fatal(int rank, const char *fmt, ...)
188 if (rank == myrank) {
191 fprintf(stderr, "rank %d: ", rank);
193 vfprintf(stderr, fmt, ap);
197 MPI_Abort(MPI_COMM_WORLD, 1);
202 sigalrm_handler(int signum)
207 /* HAVE_LLAPI_FILE_LOOKUP is defined by liblustreapi.h if this function is
208 * defined therein. Otherwise we can do the equivalent operation via ioctl
209 * if we have access to a complete lustre build tree to get the various
210 * definitions - then compile with USE_MDC_LOOKUP defined. */
211 #if defined(HAVE_LLAPI_FILE_LOOKUP)
212 #define HAVE_MDC_LOOKUP
213 #elif defined(USE_MDC_LOOKUP)
215 #include <liblustre.h>
216 #include <linux/lustre_lib.h>
218 int llapi_file_lookup(int dirfd, const char *name)
220 struct obd_ioctl_data data = { 0 };
225 if (dirfd < 0 || name == NULL)
228 data.ioc_version = OBD_IOCTL_VERSION;
229 data.ioc_len = sizeof(data);
230 data.ioc_inlbuf1 = name;
231 data.ioc_inllen1 = strlen(name) + 1;
233 rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
235 fatal(myrank, "ioctl_pack failed: rc = %d\n", rc);
239 return ioctl(fd, IOC_MDC_LOOKUP, buf);
241 #define HAVE_MDC_LOOKUP
245 process_args(int argc, char *argv[])
247 char c, *cp, *endptr;
248 int i, index, offset, tmpend, rc;
255 prog = basename(argv[0]);
256 strcpy(filefmt, "f%d");
257 gethostname(hostname, sizeof(hostname));
259 /* auto create shortOpts rather than maintaining a static string. */
260 for (opt = longOpts, cp = shortOpts; opt->name != NULL; opt++, cp++) {
266 while ((c = getopt_long(argc,argv, shortOpts, longOpts,&index)) != -1) {
269 openflags &= ~(O_CREAT|O_EXCL);
271 #ifdef HAVE_MDC_LOOKUP
278 fatal(0, "Invalid - more than one operation "
280 longOpts[index].name);
283 cmd = (char *)longOpts[index].name;
286 if (mode != CREATE && mode != MKNOD) {
287 usage(stderr, "--noexcl only applies to "
288 "--create or --mknod.\n");
290 openflags &= ~O_EXCL;
293 if (mode != UNLINK) {
294 usage(stderr, "--recreate only makes sense"
300 begin = strtol(optarg, &endptr, 0);
301 if ((*endptr != 0) || (begin < 0)) {
302 fatal(0, "Invalid --start value.\n");
306 iters = strtol(optarg, &endptr, 0);
307 if ((*endptr != 0) || (iters <= 0)) {
308 fatal(0, "Invalid --iters value.\n");
310 if (mode != LOOKUP && mode != OPEN && mode != STAT) {
311 usage(stderr, "--iters only makes sense with "
312 "--lookup, --open, or --stat.\n");
316 seconds = strtol(optarg, &endptr, 0);
317 if ((*endptr != 0) || (seconds <= 0)) {
318 fatal(0, "Invalid --time value.\n");
322 if (strlen(optarg) > (PATH_MAX - 16)) {
323 fatal(0, "--dirfmt too long\n");
328 ndirs = strtol(optarg, &endptr, 0);
329 if ((*endptr != 0) || (ndirs <= 0)) {
330 fatal(0, "Invalid --ndirs value.\n");
332 if ((ndirs > nthreads) &&
333 ((mode == CREATE) || (mode == MKNOD))) {
334 fatal(0, "--ndirs=%d must be less than or "
335 "equal to the number of threads (%d).\n",
340 if (strlen(optarg) > 4080) {
341 fatal(0, "--filefmt too long\n");
344 /* Use %%d where you want the file # in the name. */
345 sprintf(filefmt, optarg, myrank);
348 nfiles = strtol(optarg, &endptr, 0);
349 if ((*endptr != 0) || (nfiles <= 0)) {
350 fatal(0, "Invalid --nfiles value.\n");
354 stripes = strtol(optarg, &endptr, 0);
355 if ((*endptr != 0) || (stripes < 0)) {
356 fatal(0, "Invalid --stripes value.\n");
360 openflags |= O_LOV_DELAY_CREATE;
362 fatal(0, "non-zero --stripes value "
363 "not yet supported.\n");
368 seed = strtoul(optarg, &endptr, 0);
370 fatal(0, "bad --seed option %s\n", optarg);
374 seed_file = fopen(optarg, "r");
376 fatal(myrank, "fopen(%s) error: %s\n",
377 optarg, strerror(errno));
380 for (i = -1; fgets(tmp, 16, seed_file) != NULL;) {
386 rc = sscanf(tmp, "%d", &seed);
387 if ((rc != 1) || (seed < 0)) {
388 fatal(myrank, "Invalid seed value '%s' "
389 "at line %d in %s.\n",
393 fatal(myrank, "File '%s' too short. Does not "
394 "contain a seed for thread %d.\n",
402 if (mode != LOOKUP && mode != OPEN && mode != STAT) {
403 fatal(0, "--%s can only be specified with "
404 "--lookup, --open, or --stat.\n",
405 (char *)longOpts[index].name);
420 usage(stderr, "unrecognized option: '%c'.\n", optopt);
425 usage(stderr, "too many arguments %d >= %d.\n", optind, argc);
428 if (mode == CREATE || mode == MKNOD || mode == UNLINK) {
432 } else if (nfiles == 0) {
433 usage(stderr, "--nfiles or --time must be specified "
436 } else if (mode == LOOKUP || mode == OPEN || mode == STAT) {
440 } else if (iters == 0) {
441 usage(stderr, "--iters or --time must be specifed "
446 usage(stderr, "--nfiles must be specifed with --%s.\n",
451 int fd = open("/dev/urandom", O_RDONLY);
454 if (read(fd, &seed, sizeof(seed)) <
465 dmesg("%s: rank %d seed %d (%s).\n", prog, myrank, seed,
466 (order == RANDOM) ? "random_order" : "readdir_order");
468 usage(stderr, "one --create, --mknod, --open, --stat,"
469 #ifdef HAVE_MDC_LOOKUP
472 " or --unlink must be specifed.");
475 /* support for multiple threads in a dir, set begin/end appropriately.*/
476 dirnum = myrank % ndirs;
477 dirthreads = nthreads / ndirs;
478 if (nthreads > (ndirs * dirthreads + dirnum))
481 offset = myrank / ndirs;
483 tmpend = begin + nfiles - 1;
487 end = begin + (nfiles / dirthreads) * dirthreads + offset;
488 if ((end > tmpend) || (end <= 0))
497 dmesg("%d: iters %d nfiles %d time %d begin %d end %d dirthreads %d."
498 "\n", myrank, iters, nfiles, seconds, begin, end, dirthreads);
500 if (dirfmt == NULL) {
503 sprintf(dir, dirfmt, dirnum);
505 sprintf(mkdir_cmd, "/bin/mkdir -p %s", dir);
506 #ifdef _LIGHTWEIGHT_KERNEL
507 printf("NOTICE: not running system(%s)\n", mkdir_cmd);
509 rc = system(mkdir_cmd);
511 fatal(myrank, "'%s' failed.\n", mkdir_cmd);
517 fatal(myrank, "unable to chdir to '%s'.\n", dir);
522 static inline char *next_file()
524 if (order == RANDOM) {
525 sprintf(filename, filefmt, random() % nfiles);
531 dir_entry = readdir(directory);
532 if (dir_entry == NULL) {
533 rewinddir(directory);
534 while ((dir_entry = readdir(directory)) != NULL) {
535 if (dir_entry->d_name[0] != '.')
536 return(dir_entry->d_name);
539 fatal(myrank, "unable to read directory %s (%s).\n",
540 dir, strerror(errno));
543 return(dir_entry->d_name);
547 main(int argc, char *argv[])
549 int i, j, fd, rc, nops, lastOps, ag_ops;
551 time_t startTime, lastTime, curTime, interval;
554 rc = MPI_Init(&argc, &argv);
555 if (rc != MPI_SUCCESS)
556 fatal(myrank, "MPI_Init failed: %d\n", rc);
558 rc = MPI_Comm_size(MPI_COMM_WORLD, &nthreads);
559 if (rc != MPI_SUCCESS)
560 fatal(myrank, "MPI_Comm_size failed: %d\n", rc);
562 rc = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
563 if (rc != MPI_SUCCESS)
564 fatal(myrank, "MPI_Comm_rank failed: %d\n", rc);
566 process_args(argc, argv);
569 if ((myrank == 0) || debug) {
570 printf("%d: %s starting at %s",
571 myrank, hostname, ctime(&startTime));
574 /* if we're not measuring creation rates then precreate
575 * the files we're operating on. */
576 if ((mode != CREATE) && (mode != MKNOD) && !ignore) {
577 /* create the files in reverse order. When we encounter
578 * a file that already exists, assume the remainder of
579 * the files exist to save time. The timed performance
580 * test scripts make use of this behavior. */
581 for (i = end, j = 0; i >= begin; i -= dirthreads) {
582 sprintf(filename, filefmt, i);
583 fd = open(filename, openflags, 0644);
588 fatal(myrank, "precreate open(%s) error: %s\n",
589 filename, strerror(rc));
594 dmesg("%d: %s pre-created %d files.\n",myrank,hostname,j);
596 rc = MPI_Barrier(MPI_COMM_WORLD);
597 if (rc != MPI_SUCCESS)
598 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
601 if (order == READDIR) {
602 directory = opendir(dir);
603 if (directory == NULL) {
605 fatal(myrank, "opendir(%s) error: %s\n",
610 j = random() % nfiles;
611 dmesg("%d: %s initializing dir offset %u: %s",
612 myrank, hostname, j, ctime(&startTime));
614 for (i = 0; i <= j; i++) {
615 if ((dir_entry = readdir(directory)) == NULL) {
616 fatal(myrank, "could not read entry number %d "
617 "in directory %s.\n", i, dir);
622 dmesg("%d: index %d, filename %s, offset %ld: "
623 "%s initialization complete: %s",
624 myrank, i, dir_entry->d_name, telldir(directory),
625 hostname, ctime(&lastTime));
628 rc = MPI_Barrier(MPI_COMM_WORLD);
629 if (rc != MPI_SUCCESS)
630 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
633 act.sa_handler = sigalrm_handler;
634 (void)sigemptyset(&act.sa_mask);
636 sigaction(SIGALRM, &act, NULL);
640 startTime = lastTime = time(0);
645 for (; begin <= end && !alarm_caught; begin += dirthreads) {
646 sprintf(filename, filefmt, begin);
647 if ((fd = open(filename, openflags, 0644)) < 0) {
648 if (((rc = errno) == EINTR) && alarm_caught)
650 fatal(myrank, "open(%s) error: %s\n",
651 filename, strerror(rc));
658 dmesg("%d: created %d files, last file '%s'.\n",
659 myrank, nops, filename);
661 #ifdef HAVE_MDC_LOOKUP
663 fd = open(dir, O_RDONLY);
665 fatal(myrank, "open(dir == '%s') error: %s\n",
666 dir, strerror(errno));
669 for (; nops < iters && !alarm_caught;) {
670 char *filename = next_file();
671 rc = llapi_file_lookup(fd, filename);
673 if (((rc = errno) == EINTR) && alarm_caught)
675 fatal(myrank, "llapi_file_lookup(%s) "
676 "error: %s\n", filename, strerror(rc));
684 for (; begin <= end && !alarm_caught; begin += dirthreads) {
685 sprintf(filename, filefmt, begin);
686 rc = mknod(filename, S_IFREG| 0644, 0);
688 if (((rc = errno) == EINTR) && alarm_caught)
690 fatal(myrank, "mknod(%s) error: %s\n",
691 filename, strerror(rc));
698 for (; nops < iters && !alarm_caught;) {
700 if ((fd = open(file, openflags, 0644)) < 0) {
701 if (((rc = errno) == EINTR) && alarm_caught)
703 fatal(myrank, "open(%s) error: %s\n",
713 for (; nops < iters && !alarm_caught;) {
714 rc = stat(file = next_file(), &statbuf);
716 if (((rc = errno) == EINTR) && alarm_caught)
718 fatal(myrank, "stat(%s) error: %s\n",
726 for (; begin <= end && !alarm_caught; begin += dirthreads) {
727 sprintf(filename, filefmt, begin);
728 rc = unlink(filename);
730 if (((rc = errno) == EINTR) && alarm_caught)
732 if (((rc = errno) == ENOENT) && ignore)
734 fatal(myrank, "unlink(%s) error: %s\n",
735 filename, strerror(rc));
744 interval = curTime - startTime;
745 rate = (float)(nops);
747 rate /= (float)interval;
749 rc = MPI_Reduce(&nops, &ag_ops, 1, MPI_INT, MPI_SUM, 0,
751 if (rc != MPI_SUCCESS) {
752 fatal(myrank, "Failure in MPI_Reduce of total ops.\n");
755 rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_FLOAT, MPI_SUM, 0,
757 if (rc != MPI_SUCCESS) {
758 fatal(myrank, "Failure in MPI_Reduce of aggregated rate.\n");
762 printf("Rate: %.2f %ss/sec (total: %d threads %d %ss %lu secs)"
763 "\n", ag_rate, cmd, nthreads, ag_ops, cmd, interval);
767 for (begin = beginsave; begin <= end; begin += dirthreads) {
768 sprintf(filename, filefmt, begin);
769 if ((fd = open(filename, openflags, 0644)) < 0) {
773 fatal(myrank, "recreate open(%s) error: %s\n",
774 filename, strerror(rc));
782 if ((myrank == 0) || debug) {
783 printf("%d: %s finished at %s",
784 myrank, hostname, ctime(&curTime));