2 * 2003, Copyright, Hewlett-Packard Development Compnay, LP.
4 * Developed under the sponsorship of the U.S. Government
5 * under Subcontract No. B514193
9 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
10 * Use is subject to license terms.
12 * Copyright (c) 2012, Intel Corporation.
18 #include <sys/types.h>
29 #include <sys/ioctl.h>
35 #include <lustre/lustreapi.h> /* for O_LOV_DELAY_CREATE */
37 #define CHECK_COUNT 10000
38 #define DISPLAY_COUNT (CHECK_COUNT * 10)
39 #define DISPLAY_TIME 100
68 struct option longOpts[] = {
69 {"create", 0, NULL, CREATE },
70 {"lookup", 0, NULL, LOOKUP },
71 {"mknod", 0, NULL, MKNOD },
72 {"open", 0, NULL, OPEN },
73 {"stat", 0, NULL, STAT },
74 {"unlink", 0, NULL, UNLINK },
75 {"begin", 1, NULL, BEGIN },
76 {"iters", 1, NULL, ITERS },
77 {"time", 1, NULL, TIME }, /* seconds */
78 {"dirfmt", 1, NULL, DIRFMT },
79 {"ndirs", 1, NULL, NDIRS },
80 {"filefmt", 1, NULL, FILEFMT },
81 {"nfiles", 1, NULL, NFILES },
82 {"noexcl", 0, NULL, NOEXCL },
83 {"stripes", 1, NULL, STRIPES },
84 {"seed", 1, NULL, SEED },
85 {"seedfile", 1, NULL, SEEDFILE },
86 {"random_order", 0, NULL, RANDOM },
87 {"readdir_order", 0, NULL, READDIR },
88 {"recreate", 0, NULL, RECREATE },
89 {"ignore", 0, NULL, IGNORE },
90 {"verbose", 0, NULL, VERBOSE },
91 {"debug", 0, NULL, DEBUG },
92 {"help", 0, NULL, HELP },
102 char hostname[512] = "unknown";
105 int openflags = O_RDWR|O_CREAT|O_EXCL;
109 char mkdir_cmd[PATH_MAX+14];
113 struct dirent *dir_entry;
115 char filefmt[PATH_MAX];
116 char filename[PATH_MAX];
124 struct sigaction act;
133 #define dmesg if (debug) printf
135 #define DISPLAY_PROGRESS() { \
136 if (verbose && (nops % CHECK_COUNT == 0)) { \
137 curTime = MPI_Wtime(); \
138 interval = curTime - lastTime; \
139 if (interval > DISPLAY_TIME || nops % DISPLAY_COUNT == 0) { \
140 rate = (double)(nops - lastOps)/interval; \
141 printf("Rank %d: %.2f %ss/sec %.2f secs " \
142 "(total: %d %ss %.2f secs)\n", \
143 myrank, rate, cmd, interval, \
144 nops, cmd, curTime - startTime); \
146 lastTime = curTime; \
151 char *usage_msg = "usage: %s\n"
152 " { --create [ --noexcl ] | --lookup | --mknod |\n"
153 " --open | --stat | --unlink [ --recreate ] [ --ignore ] }\n"
154 " [ --help ] [ --verbose ] [ --debug ]\n"
155 " { [ --begin <num> ] --nfiles <num> }\n"
156 " [ --iters <num> ] [ --time <secs> ]\n"
157 " [ --dirfmt <str> ] [ --ndirs <num> ]\n"
158 " [ --filefmt <str> ] [ --stripes <num> ]\n"
159 " [ --random_order [--seed <num> | --seedfile <file>] ]\n"
160 " [ --readdir_order ]\n";
163 usage(FILE *stream, char *fmt, ...)
169 fprintf(stream, "%s: ", prog);
171 vfprintf(stderr, fmt, ap);
174 fprintf(stream, usage_msg, prog);
178 exit(stream == stderr);
181 /* Print process myrank and message, and exit (i.e. a fatal error) */
183 fatal(int rank, const char *fmt, ...)
185 if (rank == myrank) {
188 fprintf(stderr, "rank %d: ", rank);
190 vfprintf(stderr, fmt, ap);
194 MPI_Abort(MPI_COMM_WORLD, 1);
199 sigalrm_handler(int signum)
204 /* HAVE_LLAPI_FILE_LOOKUP is defined by liblustreapi.h if this function is
205 * defined therein. Otherwise we can do the equivalent operation via ioctl
206 * if we have access to a complete lustre build tree to get the various
207 * definitions - then compile with USE_MDC_LOOKUP defined. */
208 #if defined(HAVE_LLAPI_FILE_LOOKUP)
209 #define HAVE_MDC_LOOKUP
210 #elif defined(USE_MDC_LOOKUP)
212 #include <liblustre.h>
213 #include <linux/lustre_lib.h>
215 int llapi_file_lookup(int dirfd, const char *name)
217 struct obd_ioctl_data data = { 0 };
222 if (dirfd < 0 || name == NULL)
225 data.ioc_version = OBD_IOCTL_VERSION;
226 data.ioc_len = sizeof(data);
227 data.ioc_inlbuf1 = name;
228 data.ioc_inllen1 = strlen(name) + 1;
230 rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
232 fatal(myrank, "ioctl_pack failed: rc = %d\n", rc);
236 return ioctl(fd, IOC_MDC_LOOKUP, buf);
238 #define HAVE_MDC_LOOKUP
242 process_args(int argc, char *argv[])
245 int i, index, offset, tmpend, rc;
252 prog = basename(argv[0]);
253 strcpy(filefmt, "f%d");
254 gethostname(hostname, sizeof(hostname));
256 /* auto create shortOpts rather than maintaining a static string. */
257 for (opt = longOpts, cp = shortOpts; opt->name != NULL; opt++, cp++) {
263 while ((rc = getopt_long(argc,argv, shortOpts, longOpts,&index)) != -1) {
266 openflags &= ~(O_CREAT|O_EXCL);
268 #ifdef HAVE_MDC_LOOKUP
275 fatal(0, "Invalid - more than one operation "
277 longOpts[index].name);
280 cmd = (char *)longOpts[index].name;
283 if (mode != CREATE && mode != MKNOD) {
284 usage(stderr, "--noexcl only applies to "
285 "--create or --mknod.\n");
287 openflags &= ~O_EXCL;
290 if (mode != UNLINK) {
291 usage(stderr, "--recreate only makes sense"
297 begin = strtol(optarg, &endptr, 0);
298 if ((*endptr != 0) || (begin < 0)) {
299 fatal(0, "Invalid --start value.\n");
303 iters = strtol(optarg, &endptr, 0);
304 if ((*endptr != 0) || (iters <= 0)) {
305 fatal(0, "Invalid --iters value.\n");
307 if (mode != LOOKUP && mode != OPEN) {
308 usage(stderr, "--iters only makes sense with "
309 "--lookup or --open.\n");
313 seconds = strtol(optarg, &endptr, 0);
314 if ((*endptr != 0) || (seconds <= 0)) {
315 fatal(0, "Invalid --time value.\n");
319 if (strlen(optarg) > (PATH_MAX - 16)) {
320 fatal(0, "--dirfmt too long\n");
325 ndirs = strtol(optarg, &endptr, 0);
326 if ((*endptr != 0) || (ndirs <= 0)) {
327 fatal(0, "Invalid --ndirs value.\n");
329 if ((ndirs > nthreads) &&
330 ((mode == CREATE) || (mode == MKNOD))) {
331 fatal(0, "--ndirs=%d must be less than or "
332 "equal to the number of threads (%d).\n",
337 if (strlen(optarg) > 4080) {
338 fatal(0, "--filefmt too long\n");
341 /* Use %%d where you want the file # in the name. */
342 sprintf(filefmt, optarg, myrank);
345 nfiles = strtol(optarg, &endptr, 0);
346 if ((*endptr != 0) || (nfiles <= 0)) {
347 fatal(0, "Invalid --nfiles value.\n");
351 stripes = strtol(optarg, &endptr, 0);
352 if ((*endptr != 0) || (stripes < 0)) {
353 fatal(0, "Invalid --stripes value.\n");
357 openflags |= O_LOV_DELAY_CREATE;
359 fatal(0, "non-zero --stripes value "
360 "not yet supported.\n");
365 seed = strtoul(optarg, &endptr, 0);
367 fatal(0, "bad --seed option %s\n", optarg);
371 seed_file = fopen(optarg, "r");
373 fatal(myrank, "fopen(%s) error: %s\n",
374 optarg, strerror(errno));
377 for (i = -1; fgets(tmp, 16, seed_file) != NULL;) {
383 rc = sscanf(tmp, "%d", &seed);
384 if ((rc != 1) || (seed < 0)) {
385 fatal(myrank, "Invalid seed value '%s' "
386 "at line %d in %s.\n",
390 fatal(myrank, "File '%s' too short. Does not "
391 "contain a seed for thread %d.\n",
399 if (mode != LOOKUP && mode != OPEN) {
400 fatal(0, "--%s can only be specified with "
401 "--lookup, or --open.\n",
402 (char *)longOpts[index].name);
417 usage(stderr, "unrecognized option: '%c'.\n", optopt);
422 usage(stderr, "too many arguments %d >= %d.\n", optind, argc);
425 if (mode == CREATE || mode == MKNOD || mode == UNLINK || mode == STAT) {
429 } else if (nfiles == 0) {
430 usage(stderr, "--nfiles or --time must be specified "
433 } else if (mode == LOOKUP || mode == OPEN) {
437 } else if (iters == 0) {
438 usage(stderr, "--iters or --time must be specifed "
443 usage(stderr, "--nfiles must be specifed with --%s.\n",
448 int fd = open("/dev/urandom", O_RDONLY);
451 if (read(fd, &seed, sizeof(seed)) <
462 dmesg("%s: rank %d seed %d (%s).\n", prog, myrank, seed,
463 (order == RANDOM) ? "random_order" : "readdir_order");
465 usage(stderr, "one --create, --mknod, --open, --stat,"
466 #ifdef HAVE_MDC_LOOKUP
469 " or --unlink must be specifed.");
472 /* support for multiple threads in a dir, set begin/end appropriately.*/
473 dirnum = myrank % ndirs;
474 dirthreads = nthreads / ndirs;
475 if (nthreads > (ndirs * dirthreads + dirnum))
478 offset = myrank / ndirs;
480 tmpend = begin + nfiles - 1;
484 end = begin + (nfiles / dirthreads) * dirthreads + offset;
485 if ((end > tmpend) || (end <= 0))
494 dmesg("%d: iters %d nfiles %d time %d begin %d end %d dirthreads %d."
495 "\n", myrank, iters, nfiles, seconds, begin, end, dirthreads);
497 if (dirfmt == NULL) {
500 sprintf(dir, dirfmt, dirnum);
502 sprintf(mkdir_cmd, "/bin/mkdir -p %s", dir);
503 #ifdef _LIGHTWEIGHT_KERNEL
504 printf("NOTICE: not running system(%s)\n", mkdir_cmd);
506 rc = system(mkdir_cmd);
508 fatal(myrank, "'%s' failed.\n", mkdir_cmd);
514 fatal(myrank, "unable to chdir to '%s'.\n", dir);
519 static inline char *next_file()
521 if (order == RANDOM) {
522 sprintf(filename, filefmt, random() % nfiles);
528 dir_entry = readdir(directory);
529 if (dir_entry == NULL) {
530 rewinddir(directory);
531 while ((dir_entry = readdir(directory)) != NULL) {
532 if (dir_entry->d_name[0] != '.')
533 return(dir_entry->d_name);
536 fatal(myrank, "unable to read directory %s (%s).\n",
537 dir, strerror(errno));
540 return(dir_entry->d_name);
544 main(int argc, char *argv[])
546 int i, j, fd, rc, nops, lastOps;
548 double ag_interval = 0;
550 double rate, avg_rate, effective_rate;
551 double startTime, curTime, lastTime, interval;
555 rc = MPI_Init(&argc, &argv);
556 if (rc != MPI_SUCCESS)
557 fatal(myrank, "MPI_Init failed: %d\n", rc);
559 rc = MPI_Comm_size(MPI_COMM_WORLD, &nthreads);
560 if (rc != MPI_SUCCESS)
561 fatal(myrank, "MPI_Comm_size failed: %d\n", rc);
563 rc = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
564 if (rc != MPI_SUCCESS)
565 fatal(myrank, "MPI_Comm_rank failed: %d\n", rc);
567 process_args(argc, argv);
570 if ((myrank == 0) || debug) {
571 printf("%d: %s starting at %s",
572 myrank, hostname, ctime(×tamp));
575 /* if we're not measuring creation rates then precreate
576 * the files we're operating on. */
577 if ((mode != CREATE) && (mode != MKNOD) && !ignore &&
578 (mode != UNLINK || recreate)) {
579 /* create the files in reverse order. When we encounter
580 * a file that already exists, assume the remainder of
581 * the files exist to save time. The timed performance
582 * test scripts make use of this behavior. */
583 for (i = end, j = 0; i >= begin; i -= dirthreads) {
584 sprintf(filename, filefmt, i);
585 fd = open(filename, openflags, 0644);
590 fatal(myrank, "precreate open(%s) error: %s\n",
591 filename, strerror(rc));
596 dmesg("%d: %s pre-created %d files.\n",myrank,hostname,j);
598 rc = MPI_Barrier(MPI_COMM_WORLD);
599 if (rc != MPI_SUCCESS)
600 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
603 if (order == READDIR) {
604 directory = opendir(dir);
605 if (directory == NULL) {
607 fatal(myrank, "opendir(%s) error: %s\n",
612 j = random() % nfiles;
613 dmesg("%d: %s initializing dir offset %u: %s",
614 myrank, hostname, j, ctime(×tamp));
616 for (i = 0; i <= j; i++) {
617 if ((dir_entry = readdir(directory)) == NULL) {
618 fatal(myrank, "could not read entry number %d "
619 "in directory %s.\n", i, dir);
624 dmesg("%d: index %d, filename %s, offset %ld: "
625 "%s initialization complete: %s",
626 myrank, i, dir_entry->d_name, telldir(directory),
627 hostname, ctime(×tamp));
631 act.sa_handler = sigalrm_handler;
632 (void)sigemptyset(&act.sa_mask);
634 sigaction(SIGALRM, &act, NULL);
638 rc = MPI_Barrier(MPI_COMM_WORLD);
639 if (rc != MPI_SUCCESS)
640 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
642 startTime = lastTime = MPI_Wtime();
647 for (; begin <= end && !alarm_caught; begin += dirthreads) {
648 sprintf(filename, filefmt, begin);
649 if ((fd = open(filename, openflags, 0644)) < 0) {
650 if (((rc = errno) == EINTR) && alarm_caught)
652 fatal(myrank, "open(%s) error: %s\n",
653 filename, strerror(rc));
661 dmesg("%d: created %d files, last file '%s'.\n",
662 myrank, nops, filename);
664 #ifdef HAVE_MDC_LOOKUP
666 fd = open(dir, O_RDONLY);
668 fatal(myrank, "open(dir == '%s') error: %s\n",
669 dir, strerror(errno));
672 for (; nops < iters && !alarm_caught;) {
673 char *filename = next_file();
674 rc = llapi_file_lookup(fd, filename);
676 if (((rc = errno) == EINTR) && alarm_caught)
678 fatal(myrank, "llapi_file_lookup(%s) "
679 "error: %s\n", filename, strerror(rc));
688 for (; begin <= end && !alarm_caught; begin += dirthreads) {
689 sprintf(filename, filefmt, begin);
690 rc = mknod(filename, S_IFREG| 0644, 0);
692 if (((rc = errno) == EINTR) && alarm_caught)
694 fatal(myrank, "mknod(%s) error: %s\n",
695 filename, strerror(rc));
703 for (; nops < iters && !alarm_caught;) {
705 if ((fd = open(file, openflags, 0644)) < 0) {
706 if (((rc = errno) == EINTR) && alarm_caught)
708 fatal(myrank, "open(%s) error: %s\n",
719 for (; begin <= end && !alarm_caught; begin += dirthreads) {
720 sprintf(filename, filefmt, begin);
721 rc = stat(filename, &statbuf);
723 if (((rc = errno) == EINTR) && alarm_caught)
725 if (((rc = errno) == ENOENT) && ignore)
727 fatal(myrank, "stat(%s) error: %s\n",
728 filename, strerror(rc));
736 for (; begin <= end && !alarm_caught; begin += dirthreads) {
737 sprintf(filename, filefmt, begin);
738 rc = unlink(filename);
740 if (((rc = errno) == EINTR) && alarm_caught)
742 if ((rc = errno) == ENOENT) {
745 /* no more files to unlink */
748 fatal(myrank, "unlink(%s) error: %s\n",
749 filename, strerror(rc));
758 rc = MPI_Barrier(MPI_COMM_WORLD);
759 if (rc != MPI_SUCCESS)
760 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
761 curTime = MPI_Wtime();
762 interval = curTime - startTime;
763 rate = (double) (nops) / interval;
765 rc = MPI_Reduce(&nops, &ag_ops, 1, MPI_INT, MPI_SUM, 0,
767 if (rc != MPI_SUCCESS) {
768 fatal(myrank, "Failure in MPI_Reduce of total ops.\n");
771 rc = MPI_Reduce(&interval, &ag_interval, 1, MPI_DOUBLE, MPI_SUM, 0,
773 if (rc != MPI_SUCCESS) {
774 fatal(myrank, "Failure in MPI_Reduce of total interval.\n");
777 rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_DOUBLE, MPI_SUM, 0,
779 if (rc != MPI_SUCCESS) {
780 fatal(myrank, "Failure in MPI_Reduce of aggregated rate.\n");
785 curTime = MPI_Wtime();
786 interval = curTime - startTime;
787 effective_rate = (double) ag_ops / interval;
788 avg_rate = (double) ag_ops / ag_interval;
790 printf("Rate: %.2f eff %.2f aggr %.2f avg client %ss/sec "
791 "(total: %d threads %d %ss %d dirs %d threads/dir %.2f secs)\n",
792 effective_rate, ag_rate, avg_rate, cmd, nthreads, ag_ops,
793 cmd, ndirs, dirthreads, interval);
794 if (mode == UNLINK && !recreate && !ignore && ag_ops != nfiles)
795 printf("Warning: only unlinked %d files instead of %d"
796 "\n", ag_ops, nfiles);
800 for (begin = beginsave; begin <= end; begin += dirthreads) {
801 sprintf(filename, filefmt, begin);
802 if ((fd = open(filename, openflags, 0644)) < 0) {
806 fatal(myrank, "recreate open(%s) error: %s\n",
807 filename, strerror(rc));
815 if ((myrank == 0) || debug) {
816 printf("%d: %s finished at %s",
817 myrank, hostname, ctime(×tamp));