Whamcloud - gitweb
LU-11297 lnet: MR Routing Feature
[fs/lustre-release.git] / lustre / tests / mpi / mdsrate.c
1 /*
2  * 2003, Copyright, Hewlett-Packard Development Compnay, LP.
3  *
4  * Developed under the sponsorship of the U.S. Government
5  *     under Subcontract No. B514193
6  */
7
8 /*
9  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
10  * Use is subject to license terms.
11  *
12  * Copyright (c) 2012, 2015, Intel Corporation.
13  */
14
15 #include <stdio.h>
16 #include <getopt.h>
17 #include <libgen.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <time.h>
21 #include <limits.h>
22 #include <errno.h>
23 #include <string.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <signal.h>
29 #include <sys/ioctl.h>
30 #include <dirent.h>
31 #include <sys/xattr.h>
32
33 #include "mpi.h"
34
35 /* lustre */
36 #include <lustre/lustreapi.h>        /* for O_LOV_DELAY_CREATE */
37
38 #define CHECK_COUNT 10000
39 #define DISPLAY_COUNT (CHECK_COUNT * 10)
40 #define DISPLAY_TIME 100
41
42 enum {
43         CREATE          = 'c',
44         LOOKUP          = 'l',
45         MKNOD           = 'm',
46         OPEN            = 'o',
47         STAT            = 's',
48         UNLINK          = 'u',
49         BEGIN           = 'b',
50         ITERS           = 'i',
51         TIME            = 't',
52         DIRFMT          = 'd',
53         NDIRS           = 'D',
54         FILEFMT         = 'f',
55         NFILES          = 'F',
56         NOEXCL          = 'X',
57         STRIPES         = 'S',
58         SEED            = 'r',
59         SEEDFILE        = 'R',
60         RANDOM          = 'A',
61         READDIR         = 'B',
62         RECREATE        = 'C',
63         SETXATTR        = 'x',
64         SMALLWRITE      = 'w',
65         IGNORE          = 'E',
66         VERBOSE         = 'V',
67         DEBUG           = 'v',
68         HELP            = 'h',
69         MNT             = 'M',
70         MNTCOUNT        = 'N',
71         MDTCOUNT        = 'T',
72 };
73
74 struct option longOpts[] = {
75         { .name = "create", .has_arg = no_argument, .val = CREATE },
76         { .name = "lookup", .has_arg = no_argument, .val = LOOKUP },
77         { .name = "mknod", .has_arg = no_argument, .val = MKNOD },
78         { .name = "open", .has_arg = no_argument, .val = OPEN },
79         { .name = "stat", .has_arg = no_argument, .val = STAT },
80         { .name = "unlink", .has_arg = no_argument, .val = UNLINK },
81         { .name = "begin", .has_arg = required_argument, .val = BEGIN },
82         { .name = "iters", .has_arg = required_argument, .val = ITERS },
83         /* time is in seconds */
84         { .name = "time", .has_arg = required_argument, .val = TIME },
85         { .name = "dirfmt", .has_arg = required_argument, .val = DIRFMT },
86         { .name = "ndirs", .has_arg = required_argument, .val = NDIRS },
87         { .name = "filefmt", .has_arg = required_argument, .val = FILEFMT },
88         { .name = "nfiles", .has_arg = required_argument, .val = NFILES },
89         { .name = "noexcl", .has_arg = no_argument, .val = NOEXCL },
90         { .name = "stripes", .has_arg = required_argument, .val = STRIPES },
91         { .name = "seed", .has_arg = required_argument, .val = SEED },
92         { .name = "seedfile", .has_arg = required_argument, .val = SEEDFILE },
93         { .name = "random_order", .has_arg = no_argument, .val = RANDOM },
94         { .name = "readdir_order", .has_arg = no_argument, .val = READDIR },
95         { .name = "recreate", .has_arg = no_argument, .val = RECREATE },
96         { .name = "setxattr", .has_arg = no_argument, .val = SETXATTR },
97         { .name = "smallwrite", .has_arg = no_argument, .val = SMALLWRITE },
98         { .name = "ignore", .has_arg = no_argument, .val = IGNORE },
99         { .name = "verbose", .has_arg = no_argument, .val = VERBOSE },
100         { .name = "debug", .has_arg = no_argument, .val = DEBUG },
101         { .name = "help", .has_arg = no_argument, .val = HELP },
102         { .name = "mdtcount", .has_arg = required_argument, .val = MDTCOUNT },
103         { .name = "mntcount", .has_arg = required_argument, .val = MNTCOUNT },
104         { .name = "mntfmt", .has_arg = required_argument, .val = MNT },
105         { .name = NULL }
106 };
107
108 int foo1, foo2;
109
110 char   shortOpts[128];
111 int    myrank = -1;
112 int    nthreads = -1;
113 char * prog;
114 char   hostname[512] = "unknown";
115 char   mode;
116 char * cmd;
117 int    openflags = O_RDWR|O_CREAT|O_EXCL;
118 int    ndirs = 1;
119 char * dirfmt;
120 char   dir[PATH_MAX];
121 char   mkdir_cmd[PATH_MAX+32];
122 int    dirthreads;
123 int    dirnum;
124 DIR *  directory;
125 struct dirent *dir_entry;
126 int    nfiles;
127 char   filefmt[PATH_MAX];
128 char   filename[PATH_MAX];
129 char   path[PATH_MAX];
130 int    stripes = -1;
131 int    begin;
132 int    beginsave;
133 int    end;
134 int    iters;
135 int    seconds;
136 int    alarm_caught;
137 struct sigaction act;
138 int    order = RANDOM;
139 int    seed;
140 int    recreate;
141 int    ignore;
142 int    verbose;
143 int    debug;
144 struct stat statbuf;
145 bool   with_xattr;
146 char   xattrname[] = "user.mdsrate";
147 char   xattrbuf[4096];
148 /* max xattr name + value length is block size, use 4000 here to avoid ENOSPC */
149 int    xattrlen = 4000;
150 bool   smallwrite;
151 int    mnt_count = -1;
152 int    mdt_count = 1;
153 char  *mntfmt;
154
155 #define dmesg if (debug) printf
156
157 #define DISPLAY_PROGRESS() {                                                \
158         if (verbose && (nops % CHECK_COUNT == 0)) {                         \
159                 curTime = MPI_Wtime();                                      \
160                 interval = curTime - lastTime;                              \
161                 if (interval > DISPLAY_TIME || nops % DISPLAY_COUNT == 0) { \
162                         rate = (double)(nops - lastOps)/interval;           \
163                         printf("Rank %d: %.2f %ss/sec %.2f secs "           \
164                                "(total: %d %ss %.2f secs)\n",               \
165                                myrank, rate, cmd, interval,                 \
166                                nops, cmd, curTime - startTime);             \
167                         lastOps = nops;                                     \
168                         lastTime = curTime;                                 \
169                 }                                                           \
170         }                                                                   \
171 }
172
173 char *usage_msg = "usage: %s\n"
174                   "    { --create [ --noexcl | --setxattr | --smallwrite ] |\n"
175                   "      --lookup | --mknod [ --setxattr ] | --open |\n"
176                   "      --stat | --unlink [ --recreate ] [ --ignore ] |\n"
177                   "      --setxattr }\n"
178                   "    [ --help ] [ --verbose ] [ --debug ]\n"
179                   "    { [ --begin <num> ] --nfiles <num> }\n"
180                   "    [ --iters <num> ] [ --time <secs> ]\n"
181                   "    [ --dirfmt <str> ] [ --ndirs  <num> ]\n"
182                   "    [ --filefmt <str> ] [ --stripes <num> ]\n"
183                   "    [ --random_order [--seed <num> | --seedfile <file>] ]\n"
184                   "    [ --readdir_order ] [ --mntfmt <str> ]\n"
185                   "    [ --mntcount <num> ] [ --mdtcount <num> ]\n"
186                   "    [ --setxattr ] }\n";
187
188 static void
189 usage(FILE *stream, char *fmt, ...)
190 {
191         if (myrank == 0) {
192                 if (fmt != NULL) {
193                         va_list       ap;
194
195                         fprintf(stream, "%s: ", prog);
196                         va_start(ap, fmt);
197                         vfprintf(stderr, fmt, ap);
198                         va_end(ap);
199                 }
200                 fprintf(stream, usage_msg, prog);
201         }
202
203         MPI_Finalize();
204         exit(stream == stderr);
205 }
206
207 /* Print process myrank and message, and exit (i.e. a fatal error) */
208 static int
209 fatal(int rank, const char *fmt, ...)
210 {
211         if (rank == myrank) {
212                 va_list       ap;
213
214                 fprintf(stderr, "rank %d: ", rank);
215                 va_start(ap, fmt);
216                 vfprintf(stderr, fmt, ap);
217                 va_end(ap);
218         }
219
220         MPI_Abort(MPI_COMM_WORLD, 1);
221         exit(1);
222 }
223
224 static void
225 sigalrm_handler(int signum)
226 {
227         alarm_caught++;
228 }
229
230 static void
231 process_args(int argc, char *argv[])
232 {
233         char   *cp, *endptr;
234         int    i, index, offset, tmpend, rc;
235         char   tmp[16];
236         FILE * seed_file;
237         struct option *opt;
238
239         setbuf(stdout, 0);
240         setbuf(stderr, 0);
241         prog = basename(argv[0]);
242         strcpy(filefmt, "f%d");
243         gethostname(hostname, sizeof(hostname));
244
245         /* auto create shortOpts rather than maintaining a static string. */
246         for (opt = longOpts, cp = shortOpts; opt->name != NULL; opt++, cp++) {
247                 *cp = opt->val;
248                 if (opt->has_arg)
249                         *++cp = ':';
250         }
251
252         while ((rc = getopt_long(argc,argv, shortOpts, longOpts,&index)) != -1) {
253                 switch (rc) {
254                 case OPEN:
255                         openflags &= ~(O_CREAT|O_EXCL);
256                 case CREATE:
257                 case LOOKUP:
258                 case MKNOD:
259                 case STAT:
260                 case UNLINK:
261                         if (cmd != NULL) {
262                                 fatal(0, "Invalid - more than one operation "
263                                            "specified: --%s\n",
264                                         longOpts[index].name);
265                         }
266                         mode = rc;
267                         cmd = (char *)longOpts[index].name;
268                         break;
269                 case NOEXCL:
270                         if (mode != CREATE && mode != MKNOD) {
271                                 usage(stderr, "--noexcl only applies to "
272                                               "--create or --mknod.\n");
273                         }
274                         openflags &= ~O_EXCL;
275                         break;
276                 case RECREATE:
277                         if (mode != UNLINK) {
278                                 usage(stderr, "--recreate only makes sense"
279                                               "with --unlink.\n");
280                         }
281                         recreate++;
282                         break;
283                 case SETXATTR:
284                         if (cmd == NULL) {
285                                 mode = SETXATTR;
286                                 cmd = (char *)longOpts[index].name;
287                         } else if (mode == CREATE || mode == MKNOD) {
288                                 with_xattr = true;
289                         } else {
290                                 usage(stderr, "--setxattr only makes sense "
291                                       "with --create, --mknod or alone.\n");
292                         }
293                         break;
294                 case SMALLWRITE:
295                         if (mode != CREATE)
296                                 usage(stderr, "--smallwrite only applies to "
297                                               "--create.\n");
298                         smallwrite = true;
299                         break;
300                 case BEGIN:
301                         begin = strtol(optarg, &endptr, 0);
302                         if ((*endptr != 0) || (begin < 0)) {
303                                 fatal(0, "Invalid --start value.\n");
304                         }
305                         break;
306                 case ITERS:
307                         iters = strtol(optarg, &endptr, 0);
308                         if ((*endptr != 0) || (iters <= 0)) {
309                                 fatal(0, "Invalid --iters value.\n");
310                         }
311                         if (mode != LOOKUP && mode != OPEN) {
312                                 usage(stderr, "--iters only makes sense with "
313                                               "--lookup or --open.\n");
314                         }
315                         break;
316                 case TIME:
317                         seconds = strtol(optarg, &endptr, 0);
318                         if ((*endptr != 0) || (seconds <= 0)) {
319                                 fatal(0, "Invalid --time value.\n");
320                         }
321                         break;
322                 case DIRFMT:
323                         if (strlen(optarg) > (PATH_MAX - 16)) {
324                                 fatal(0, "--dirfmt too long\n");
325                         }
326                         dirfmt = optarg;
327                         break;
328                 case NDIRS:
329                         ndirs = strtol(optarg, &endptr, 0);
330                         if ((*endptr != 0) || (ndirs <= 0)) {
331                                 fatal(0, "Invalid --ndirs value.\n");
332                         }
333                         if ((ndirs > nthreads) &&
334                             ((mode == CREATE) || (mode == MKNOD))) {
335                                 fatal(0, "--ndirs=%d must be less than or "
336                                       "equal to the number of threads (%d).\n",
337                                       ndirs, nthreads);
338                         }
339                         break;
340                 case FILEFMT:
341                         if (strlen(optarg) > 4080) {
342                                 fatal(0, "--filefmt too long\n");
343                         }
344
345                         /* Use %%d where you want the file # in the name. */
346                         sprintf(filefmt, optarg, myrank);
347                         break;
348                 case NFILES:
349                         nfiles = strtol(optarg, &endptr, 0);
350                         if ((*endptr != 0) || (nfiles <= 0)) {
351                                 fatal(0, "Invalid --nfiles value.\n");
352                         }
353                         break;
354                 case STRIPES:
355                         stripes = strtol(optarg, &endptr, 0);
356                         if ((*endptr != 0) || (stripes < 0)) {
357                                 fatal(0, "Invalid --stripes value.\n");
358                         }
359
360                         if (stripes == 0) {
361                                 openflags |= O_LOV_DELAY_CREATE;
362                         } else {
363                                 fatal(0, "non-zero --stripes value "
364                                          "not yet supported.\n");
365                         }
366
367                         break;
368                 case SEED:
369                         seed = strtoul(optarg, &endptr, 0);
370                         if (*endptr) {
371                                 fatal(0, "bad --seed option %s\n", optarg);
372                         }
373                         break;
374                 case SEEDFILE:
375                         seed_file = fopen(optarg, "r");
376                         if (!seed_file) {
377                               fatal(myrank, "fopen(%s) error: %s\n",
378                                       optarg, strerror(errno));
379                         }
380
381                         for (i = -1; fgets(tmp, 16, seed_file) != NULL;) {
382                                 if (++i == myrank)
383                                         break;
384                         }
385
386                         if (i == myrank) {
387                                 rc = sscanf(tmp, "%d", &seed);
388                                 if ((rc != 1) || (seed < 0)) {
389                                         fatal(myrank, "Invalid seed value '%s' "
390                                               "at line %d in %s.\n",
391                                               tmp, i, optarg);
392                                 }
393                         } else {
394                                 fatal(myrank, "File '%s' too short. Does not "
395                                       "contain a seed for thread %d.\n",
396                                       optarg, myrank);
397                         }
398
399                         fclose(seed_file);
400                         break;
401                 case RANDOM:
402                 case READDIR:
403                         if (mode != LOOKUP && mode != OPEN)  {
404                                 fatal(0, "--%s can only be specified with "
405                                          "--lookup, or --open.\n",
406                                       (char *)longOpts[index].name);
407                         }
408                         order = rc;
409                         break;
410                 case IGNORE:
411                         ++ignore;
412                         break;
413                 case DEBUG:
414                         ++debug;
415                 case VERBOSE:
416                         ++verbose;
417                         break;
418                 case HELP:
419                         usage(stdout, NULL);
420                         break;
421                 case MNT:
422                         if (strlen(optarg) > (PATH_MAX - 16))
423                                 fatal(0, "--mnt too long\n");
424                         mntfmt = optarg;
425                         break;
426                 case MNTCOUNT:
427                         mnt_count = strtol(optarg, &endptr, 0);
428                         if ((*endptr != 0) || (mnt_count <= 0)) {
429                                 fatal(0, "Invalid --mnt_count value %s.\n",
430                                       optarg);
431                         }
432                         break;
433                 case MDTCOUNT:
434                         mdt_count = strtol(optarg, &endptr, 0);
435                         if ((*endptr != 0) || (mdt_count <= 0)) {
436                                 fatal(0, "Invalid --mdt_count value %s.\n",
437                                       optarg);
438                         }
439                         break;
440                 default:
441                         usage(stderr, "unrecognized option: '%c'.\n", optopt);
442                 }
443         }
444
445         if (optind < argc) {
446                 usage(stderr, "too many arguments %d >= %d.\n", optind, argc);
447         }
448
449         if ((mnt_count != -1 && mntfmt == NULL) ||
450             (mnt_count == -1 && mntfmt != NULL)) {
451                 usage(stderr, "mnt_count and mntfmt must be specified at the "
452                              "same time\n");
453         }
454
455         if (mode == CREATE || mode == MKNOD || mode == UNLINK ||
456             mode == STAT || mode == SETXATTR) {
457                 if (seconds != 0) {
458                         if (nfiles == 0)
459                                 nfiles = INT_MAX;
460                 } else if (nfiles == 0) {
461                         usage(stderr, "--nfiles or --time must be specified "
462                                       "with %s.\n", cmd);
463                 }
464         } else if (mode == LOOKUP || mode == OPEN) {
465                 if (seconds != 0) {
466                         if (iters == 0)
467                                 iters = INT_MAX;
468                 } else if (iters == 0) {
469                         usage(stderr, "--iters or --time must be specifed "
470                                       "with %s.\n", cmd);
471                 }
472
473                 if (nfiles == 0) {
474                         usage(stderr, "--nfiles must be specifed with --%s.\n",
475                               cmd);
476                 }
477
478                 if (seed == 0) {
479                         int fd = open("/dev/urandom", O_RDONLY);
480
481                         if (fd >= 0) {
482                                 if (read(fd, &seed, sizeof(seed)) <
483                                     sizeof(seed))
484                                         seed = time(0);
485                                 close(fd);
486                         } else {
487                                 seed = time(0);
488                         }
489                 }
490
491                 srand(seed);
492
493                 dmesg("%s: rank %d seed %d (%s).\n", prog, myrank, seed,
494                       (order == RANDOM) ? "random_order" : "readdir_order");
495         } else {
496                 usage(stderr, "one --create, --mknod, --open, --stat,"
497                       " --lookup,"
498                       " --unlink or --setxattr must be specifed.");
499         }
500
501         /* support for multiple threads in a dir, set begin/end appropriately.*/
502         dirnum = myrank % ndirs;
503         dirthreads = nthreads / ndirs;
504         if (nthreads > (ndirs * dirthreads + dirnum))
505                 ++dirthreads;
506
507         offset = myrank / ndirs;
508
509         tmpend = begin + nfiles - 1;
510         if (tmpend <= 0)
511                 tmpend = INT_MAX;
512
513         end = begin + (nfiles / dirthreads) * dirthreads + offset;
514         if ((end > tmpend) || (end <= 0))
515                 end -= dirthreads;
516
517         /* make sure mnt_count <= nthreads, otherwise it might div 0 in
518          * the following test */
519         if (mnt_count > nthreads)
520                 mnt_count = nthreads;
521
522         begin += offset;
523         if (begin < 0)
524                 begin = INT_MAX;
525
526         beginsave = begin;
527
528         dmesg("%d: iters %d nfiles %d time %d begin %d end %d dirthreads %d."
529               "\n", myrank, iters, nfiles, seconds, begin, end, dirthreads);
530
531         if (dirfmt == NULL) {
532                 strcpy(dir, ".");
533         } else {
534                 int dir_len = 0;
535
536                 if (mntfmt != NULL) {
537                         sprintf(dir, mntfmt, (myrank / (nthreads/mnt_count)));
538                         strcat(dir, "/");
539                         dir_len = strlen(dir);
540                 }
541                 sprintf(dir + dir_len, dirfmt, dirnum);
542
543                 if (mdt_count > 1) {
544                         struct stat sb;
545                         if (stat(dir, &sb) == 0) {
546                                 if (!S_ISDIR(sb.st_mode))
547                                         fatal(myrank, "'%s' is not dir\n", dir);
548                         } else if (errno == ENOENT) {
549                                 sprintf(mkdir_cmd, "lfs mkdir -i %d %s",
550                                         myrank % mdt_count, dir);
551                         } else {
552                                 fatal(myrank, "'%s' stat failed\n", dir);
553                         }
554                 } else {
555                         sprintf(mkdir_cmd, "mkdir -p %s", dir);
556                 }
557
558                 dmesg("%d: %s\n", myrank, mkdir_cmd);
559 #ifdef _LIGHTWEIGHT_KERNEL
560                 printf("NOTICE: not running system(%s)\n", mkdir_cmd);
561 #else
562                 rc = system(mkdir_cmd);
563                 if (rc)
564                         fatal(myrank, "'%s' failed.\n", mkdir_cmd);
565 #endif
566
567                 rc = chdir(dir);
568                 if (rc) {
569                         fatal(myrank, "unable to chdir to '%s'.\n", dir);
570                 }
571         }
572 }
573
574 static inline char *next_file()
575 {
576         if (order == RANDOM) {
577                 sprintf(filename, filefmt, random() % nfiles);
578                 return(filename);
579         }
580
581         /* readdir order */
582
583         dir_entry = readdir(directory);
584         if (dir_entry == NULL) {
585                 rewinddir(directory);
586                 while ((dir_entry = readdir(directory)) != NULL) {
587                         if (dir_entry->d_name[0] != '.')
588                                 return(dir_entry->d_name);
589                 }
590
591                 fatal(myrank, "unable to read directory %s (%s).\n",
592                       dir, strerror(errno));
593         }
594
595         return(dir_entry->d_name);
596 }
597
598 int
599 main(int argc, char *argv[])
600 {
601         int    i, j, fd, rc, nops, lastOps;
602         int ag_ops = 0;
603         double ag_interval = 0;
604         double ag_rate = 0;
605         double rate, avg_rate, effective_rate;
606         double startTime, curTime, lastTime, interval;
607         time_t timestamp;
608         char * file;
609
610         rc = MPI_Init(&argc, &argv);
611         if (rc != MPI_SUCCESS)
612                 fatal(myrank, "MPI_Init failed: %d\n", rc);
613
614         rc = MPI_Comm_size(MPI_COMM_WORLD, &nthreads);
615         if (rc != MPI_SUCCESS)
616                 fatal(myrank, "MPI_Comm_size failed: %d\n", rc);
617
618         rc = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
619         if (rc != MPI_SUCCESS)
620                 fatal(myrank, "MPI_Comm_rank failed: %d\n", rc);
621
622         process_args(argc, argv);
623
624         timestamp = time(0);
625         if ((myrank == 0) || debug) {
626                 printf("%d: %s starting at %s",
627                        myrank, hostname, ctime(&timestamp));
628         }
629
630         /* if we're not measuring creation rates then precreate
631          * the files we're operating on. */
632         if ((mode != CREATE) && (mode != MKNOD) && !ignore &&
633             (mode != UNLINK || recreate)) {
634                 /* create the files in reverse order. When we encounter
635                  * a file that already exists, assume the remainder of 
636                  * the files exist to save time. The timed performance
637                  * test scripts make use of this behavior. */
638                 for (i = end, j = 0; i >= begin; i -= dirthreads) {
639                         sprintf(filename, filefmt, i);
640                         fd = open(filename, openflags, 0644);
641                         if (fd < 0) {
642                                 if (errno == EEXIST)
643                                         break;
644                                 rc = errno;
645                                 fatal(myrank, "precreate open(%s) error: %s\n",
646                                       filename, strerror(rc));
647                         }
648                         j++;
649                         close(fd);
650                 }
651                 dmesg("%d: %s pre-created %d files.\n",myrank,hostname,j);
652
653                 rc = MPI_Barrier(MPI_COMM_WORLD);
654                 if (rc != MPI_SUCCESS)
655                         fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
656         }
657
658         if (order == READDIR) {
659                 directory = opendir(dir);
660                 if (directory == NULL) {
661                         rc = errno;
662                         fatal(myrank, "opendir(%s) error: %s\n",
663                               dir, strerror(rc));
664                 }
665
666                 timestamp = time(0);
667                 j = random() % nfiles;
668                 dmesg("%d: %s initializing dir offset %u: %s",
669                       myrank, hostname, j, ctime(&timestamp));
670
671                 for (i = 0; i <= j; i++) {
672                         if ((dir_entry = readdir(directory)) == NULL) {
673                                 fatal(myrank, "could not read entry number %d "
674                                       "in directory %s.\n", i, dir);
675                         }
676                 }
677
678                 timestamp = time(0);
679                 dmesg("%d: index %d, filename %s, offset %ld: "
680                       "%s initialization complete: %s",
681                       myrank, i, dir_entry->d_name, telldir(directory),
682                       hostname, ctime(&timestamp));
683         }
684
685         if (seconds) {
686                 act.sa_handler = sigalrm_handler;
687                 (void)sigemptyset(&act.sa_mask);
688                 act.sa_flags = 0;
689                 sigaction(SIGALRM, &act, NULL);
690                 alarm(seconds);
691         }
692
693         rc = MPI_Barrier(MPI_COMM_WORLD);
694         if (rc != MPI_SUCCESS)
695                 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
696
697         startTime = lastTime = MPI_Wtime();
698         nops = lastOps = 0;
699
700         switch (mode) {
701         case CREATE:
702                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
703                         snprintf(filename, sizeof(filename), filefmt, begin);
704                         fd = open(filename, openflags, 0644);
705                         if (fd < 0) {
706                                 rc = errno;
707                                 if (rc == EINTR && alarm_caught)
708                                         break;
709                                 fatal(myrank, "open(%s) error: %s\n",
710                                       filename, strerror(rc));
711                         }
712
713                         if (with_xattr) {
714                                 rc = fsetxattr(fd, xattrname, xattrbuf,
715                                                xattrlen, XATTR_CREATE);
716                                 if (rc) {
717                                         rc = errno;
718                                         if (rc == EINTR && alarm_caught)
719                                                 break;
720                                         fatal(myrank,
721                                               "setxattr(%s) error: %s\n",
722                                               filename, strerror(rc));
723                                 }
724                         }
725                         if (smallwrite) {
726                                 rc = write(fd, xattrbuf, xattrlen);
727                                 if (rc < 0) {
728                                         rc = errno;
729                                         if (rc == EINTR && alarm_caught)
730                                                 break;
731                                         fatal(myrank,
732                                               "write(%s) error: %s\n",
733                                               filename, strerror(rc));
734                                 }
735                         }
736
737                         close(fd);
738                         nops++;
739                         DISPLAY_PROGRESS();
740                 }
741
742                 dmesg("%d: created %d files, last file '%s'.\n",
743                       myrank, nops, filename);
744                 break;
745         case LOOKUP:
746                 fd = open(dir, O_RDONLY);
747                 if (fd < 0) {
748                         fatal(myrank, "open(dir == '%s') error: %s\n",
749                               dir, strerror(errno));
750                 }
751
752                 for (; nops < iters && !alarm_caught;) {
753                         char *filename = next_file();
754                         rc = llapi_file_lookup(fd, filename);
755                         if (rc < 0) {
756                                 if (((rc = errno) == EINTR) && alarm_caught)
757                                         break;
758                                 fatal(myrank, "llapi_file_lookup(%s) "
759                                       "error: %s\n", filename, strerror(rc));
760                         }
761
762                         nops++;
763                         DISPLAY_PROGRESS();
764                 }
765                 break;
766         case MKNOD:
767                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
768                         snprintf(filename, sizeof(filename), filefmt, begin);
769                         rc = mknod(filename, S_IFREG | 0644, 0);
770                         if (rc) {
771                                 rc = errno;
772                                 if (rc == EINTR && alarm_caught)
773                                         break;
774                                 fatal(myrank, "mknod(%s) error: %s\n",
775                                       filename, strerror(rc));
776                         }
777
778                         if (with_xattr) {
779                                 rc = setxattr(filename, xattrname, xattrbuf,
780                                               xattrlen, XATTR_CREATE);
781                                 if (rc) {
782                                         rc = errno;
783                                         if (rc == EINTR && alarm_caught)
784                                                 break;
785                                         fatal(myrank,
786                                               "setxattr(%s) error: %s\n",
787                                               filename, strerror(rc));
788                                 }
789                         }
790
791                         nops++;
792                         DISPLAY_PROGRESS();
793                 }
794                 break;
795         case OPEN:
796                 for (; nops < iters && !alarm_caught;) {
797                         file = next_file();
798                         if ((fd = open(file, openflags, 0644)) < 0) {
799                                 if (((rc = errno) == EINTR) && alarm_caught)
800                                         break;
801                                 fatal(myrank, "open(%s) error: %s\n",
802                                       file, strerror(rc));
803                         }
804
805                         close(fd);
806
807                         nops++;
808                         DISPLAY_PROGRESS();
809                 }
810                 break;
811         case STAT:
812                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
813                         sprintf(filename, filefmt, begin);
814                         rc = stat(filename, &statbuf);
815                         if (rc) {
816                                 if (((rc = errno) == EINTR) && alarm_caught)
817                                         break;
818                                 if (((rc = errno) == ENOENT) && ignore)
819                                         continue;
820                                 fatal(myrank, "stat(%s) error: %s\n",
821                                       filename, strerror(rc));
822                         }
823
824                         nops++;
825                         DISPLAY_PROGRESS();
826                 }
827                 break;
828         case UNLINK:
829                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
830                         sprintf(filename, filefmt, begin);
831                         rc = unlink(filename);
832                         if (rc) {
833                                 if (((rc = errno) == EINTR) && alarm_caught)
834                                         break;
835                                 if ((rc = errno) == ENOENT) {
836                                         if (ignore)
837                                                 continue;
838                                         /* no more files to unlink */
839                                         break;
840                                 }
841                                 fatal(myrank, "unlink(%s) error: %s\n",
842                                       filename, strerror(rc));
843                         }
844
845                         nops++;
846                         DISPLAY_PROGRESS();
847                 }
848                 break;
849         case SETXATTR:
850                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
851                         snprintf(filename, sizeof(filename), filefmt, begin);
852                         rc = setxattr(filename, xattrname, xattrbuf, xattrlen,
853                                       XATTR_CREATE);
854                         if (rc) {
855                                 rc = errno;
856                                 if (rc == EINTR && alarm_caught)
857                                         break;
858                                 if (rc == ENOENT && ignore)
859                                         continue;
860                                 fatal(myrank, "setxattr(%s) error: %s\n",
861                                       filename, strerror(rc));
862                         }
863
864                         nops++;
865                         DISPLAY_PROGRESS();
866                 }
867                 break;
868         }
869
870         rc = MPI_Barrier(MPI_COMM_WORLD);
871         if (rc != MPI_SUCCESS)
872                fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
873         curTime = MPI_Wtime();
874         interval = curTime - startTime;
875         rate = (double) (nops) / interval;
876
877         rc = MPI_Reduce(&nops, &ag_ops, 1, MPI_INT, MPI_SUM, 0,
878                         MPI_COMM_WORLD);
879         if (rc != MPI_SUCCESS) {
880                 fatal(myrank, "Failure in MPI_Reduce of total ops.\n");
881         }
882
883         rc = MPI_Reduce(&interval, &ag_interval, 1, MPI_DOUBLE, MPI_SUM, 0,
884                         MPI_COMM_WORLD);
885         if (rc != MPI_SUCCESS) {
886                 fatal(myrank, "Failure in MPI_Reduce of total interval.\n");
887         }
888
889         rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_DOUBLE, MPI_SUM, 0,
890                         MPI_COMM_WORLD);
891         if (rc != MPI_SUCCESS) {
892                 fatal(myrank, "Failure in MPI_Reduce of aggregated rate.\n");
893         }
894
895         if (myrank == 0) {
896                 curTime = MPI_Wtime();
897                 interval = curTime - startTime;
898                 effective_rate = (double) ag_ops / interval;
899                 avg_rate = (double) ag_ops / ag_interval;
900
901                 printf("Rate: %.2f eff %.2f aggr %.2f avg client %ss/sec "
902                        "(total: %d threads %d %ss %d dirs %d threads/dir %.2f secs)\n",
903                        effective_rate, ag_rate, avg_rate, cmd, nthreads, ag_ops,
904                        cmd, ndirs, dirthreads, interval);
905                 if (mode == UNLINK && !recreate && !ignore && ag_ops != nfiles)
906                         printf("Warning: only unlinked %d files instead of %d"
907                                "\n", ag_ops, nfiles);
908         }
909
910         if (recreate) {
911                 for (begin = beginsave; begin <= end; begin += dirthreads) {
912                         sprintf(filename, filefmt, begin);
913                         if ((fd = open(filename, openflags, 0644)) < 0) {
914                                 rc = errno;
915                                 if (rc == EEXIST)
916                                         break;
917                                 fatal(myrank, "recreate open(%s) error: %s\n",
918                                       filename, strerror(rc));
919                         }
920
921                         close(fd);
922                 }
923         }
924
925         timestamp = time(0);
926         if ((myrank == 0) || debug) {
927                 printf("%d: %s finished at %s",
928                        myrank, hostname, ctime(&timestamp));
929         }
930
931         MPI_Finalize();
932         return(0);
933 }