Whamcloud - gitweb
LU-12391 tests: mdsrate tests improvements
[fs/lustre-release.git] / lustre / tests / mpi / mdsrate.c
1 /*
2  * 2003, Copyright, Hewlett-Packard Development Compnay, LP.
3  *
4  * Developed under the sponsorship of the U.S. Government
5  *     under Subcontract No. B514193
6  */
7
8 /*
9  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
10  * Use is subject to license terms.
11  *
12  * Copyright (c) 2012, 2015, Intel Corporation.
13  */
14
15 #include <stdio.h>
16 #include <getopt.h>
17 #include <libgen.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <time.h>
21 #include <limits.h>
22 #include <errno.h>
23 #include <string.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <signal.h>
29 #include <sys/ioctl.h>
30 #include <dirent.h>
31 #include <sys/xattr.h>
32
33 #include "mpi.h"
34
35 /* lustre */
36 #include <lustre/lustreapi.h>        /* for O_LOV_DELAY_CREATE */
37
38 #define CHECK_COUNT 10000
39 #define DISPLAY_COUNT (CHECK_COUNT * 10)
40 #define DISPLAY_TIME 100
41
42 enum {
43         CREATE          = 'c',
44         LOOKUP          = 'l',
45         MKNOD           = 'm',
46         OPEN            = 'o',
47         STAT            = 's',
48         UNLINK          = 'u',
49         BEGIN           = 'b',
50         ITERS           = 'i',
51         TIME            = 't',
52         DIRFMT          = 'd',
53         NDIRS           = 'D',
54         FILEFMT         = 'f',
55         NFILES          = 'F',
56         NOEXCL          = 'X',
57         STRIPES         = 'S',
58         SEED            = 'r',
59         SEEDFILE        = 'R',
60         RANDOM          = 'A',
61         READDIR         = 'B',
62         RECREATE        = 'C',
63         SETXATTR        = 'x',
64         SMALLWRITE      = 'w',
65         IGNORE          = 'E',
66         VERBOSE         = 'V',
67         DEBUG           = 'v',
68         HELP            = 'h',
69         MNT             = 'M',
70         MNTCOUNT        = 'N',
71         MDTCOUNT        = 'T',
72 };
73
74 struct option longOpts[] = {
75         { .name = "create", .has_arg = no_argument, .val = CREATE },
76         { .name = "lookup", .has_arg = no_argument, .val = LOOKUP },
77         { .name = "mknod", .has_arg = no_argument, .val = MKNOD },
78         { .name = "open", .has_arg = no_argument, .val = OPEN },
79         { .name = "stat", .has_arg = no_argument, .val = STAT },
80         { .name = "unlink", .has_arg = no_argument, .val = UNLINK },
81         { .name = "begin", .has_arg = required_argument, .val = BEGIN },
82         { .name = "iters", .has_arg = required_argument, .val = ITERS },
83         /* time is in seconds */
84         { .name = "time", .has_arg = required_argument, .val = TIME },
85         { .name = "dirfmt", .has_arg = required_argument, .val = DIRFMT },
86         { .name = "ndirs", .has_arg = required_argument, .val = NDIRS },
87         { .name = "filefmt", .has_arg = required_argument, .val = FILEFMT },
88         { .name = "nfiles", .has_arg = required_argument, .val = NFILES },
89         { .name = "noexcl", .has_arg = no_argument, .val = NOEXCL },
90         { .name = "stripes", .has_arg = required_argument, .val = STRIPES },
91         { .name = "seed", .has_arg = required_argument, .val = SEED },
92         { .name = "seedfile", .has_arg = required_argument, .val = SEEDFILE },
93         { .name = "random_order", .has_arg = no_argument, .val = RANDOM },
94         { .name = "readdir_order", .has_arg = no_argument, .val = READDIR },
95         { .name = "recreate", .has_arg = no_argument, .val = RECREATE },
96         { .name = "setxattr", .has_arg = no_argument, .val = SETXATTR },
97         { .name = "smallwrite", .has_arg = no_argument, .val = SMALLWRITE },
98         { .name = "ignore", .has_arg = no_argument, .val = IGNORE },
99         { .name = "verbose", .has_arg = no_argument, .val = VERBOSE },
100         { .name = "debug", .has_arg = no_argument, .val = DEBUG },
101         { .name = "help", .has_arg = no_argument, .val = HELP },
102         { .name = "mdtcount", .has_arg = required_argument, .val = MDTCOUNT },
103         { .name = "mntcount", .has_arg = required_argument, .val = MNTCOUNT },
104         { .name = "mntfmt", .has_arg = required_argument, .val = MNT },
105         { .name = NULL }
106 };
107
108 int foo1, foo2;
109
110 char   shortOpts[128];
111 int    myrank = -1;
112 int    nthreads = -1;
113 char *prog;
114 char   hostname[512] = "unknown";
115 char   mode;
116 char *cmd;
117 int    openflags = O_RDWR | O_CREAT | O_EXCL;
118 int    ndirs = 1;
119 char *dirfmt;
120 char   dir[PATH_MAX];
121 char   mkdir_cmd[PATH_MAX + 48];
122 int    dirthreads;
123 int    dirnum;
124 DIR *directory;
125 struct dirent *dir_entry;
126 int    nfiles;
127 char   filefmt[PATH_MAX];
128 char   filename[PATH_MAX];
129 char   path[PATH_MAX];
130 int    stripes = -1;
131 int    begin;
132 int    beginsave;
133 int    end;
134 int    iters;
135 int    seconds;
136 int    alarm_caught;
137 struct sigaction act;
138 int    order = RANDOM;
139 int    seed;
140 int    recreate;
141 int    ignore;
142 int    verbose;
143 int    debug;
144 struct stat statbuf;
145 bool   with_xattr;
146 char   xattrname[] = "user.mdsrate";
147 char   xattrbuf[4096];
148 /* max xattr name + value length is block size, use 4000 here to avoid ENOSPC */
149 int    xattrlen = 4000;
150 bool   smallwrite;
151 int    mnt_count = -1;
152 int    mdt_count = 1;
153 char  *mntfmt;
154
155 #define dmesg if (debug) printf
156
157 #define DISPLAY_PROGRESS() {                                                \
158         if (verbose && (nops % CHECK_COUNT == 0)) {                         \
159                 curTime = MPI_Wtime();                                      \
160                 interval = curTime - lastTime;                              \
161                 if (interval > DISPLAY_TIME || nops % DISPLAY_COUNT == 0) { \
162                         rate = (double)(nops - lastOps) / interval;         \
163                         printf("Rank %d: %.2f %ss/sec %.2f secs "           \
164                         "(total: %d %ss %.2f secs)\n",                      \
165                         myrank, rate, cmd, interval,                        \
166                         nops, cmd, curTime - startTime);                    \
167                         lastOps = nops;                                     \
168                         lastTime = curTime;                                 \
169                 }                                                           \
170         }                                                                   \
171 }
172
173 char *usage_msg = "usage: %s\n"
174                   "    { --create [ --noexcl | --setxattr | --smallwrite ] |\n"
175                   "      --lookup | --mknod [ --setxattr ] | --open |\n"
176                   "      --stat | --unlink [ --recreate ] [ --ignore ] |\n"
177                   "      --setxattr }\n"
178                   "    [ --help ] [ --verbose ] [ --debug ]\n"
179                   "    { [ --begin <num> ] --nfiles <num> }\n"
180                   "    [ --iters <num> ] [ --time <secs> ]\n"
181                   "    [ --dirfmt <str> ] [ --ndirs  <num> ]\n"
182                   "    [ --filefmt <str> ] [ --stripes <num> ]\n"
183                   "    [ --random_order [--seed <num> | --seedfile <file>] ]\n"
184                   "    [ --readdir_order ] [ --mntfmt <str> ]\n"
185                   "    [ --mntcount <num> ] [ --mdtcount <num> ]\n"
186                   "    [ --setxattr ] }\n";
187
188 static void
189 usage(FILE *stream, char *fmt, ...)
190 {
191         if (myrank == 0) {
192                 if (fmt) {
193                         va_list       ap;
194
195                         fprintf(stream, "%s: ", prog);
196                         va_start(ap, fmt);
197                         vfprintf(stderr, fmt, ap);
198                         va_end(ap);
199                 }
200                 fprintf(stream, usage_msg, prog);
201         }
202
203         MPI_Finalize();
204         exit(stream == stderr);
205 }
206
207 /* Print process myrank and message, and exit (i.e. a fatal error) */
208 static int
209 fatal(int rank, const char *fmt, ...)
210 {
211         if (rank == myrank) {
212                 va_list       ap;
213
214                 fprintf(stderr, "rank %d: ", rank);
215                 va_start(ap, fmt);
216                 vfprintf(stderr, fmt, ap);
217                 va_end(ap);
218         }
219
220         MPI_Abort(MPI_COMM_WORLD, 1);
221         exit(1);
222 }
223
224 static void
225 sigalrm_handler(int signum)
226 {
227         alarm_caught++;
228 }
229
230 static void
231 process_args(int argc, char *argv[])
232 {
233         char   *cp, *endptr;
234         int    i, index, offset, tmpend, rc;
235         char   tmp[16];
236         FILE *seed_file;
237         struct option *opt;
238
239         setbuf(stdout, 0);
240         setbuf(stderr, 0);
241         prog = basename(argv[0]);
242         strcpy(filefmt, "f%d");
243         gethostname(hostname, sizeof(hostname));
244
245         /* auto create shortOpts rather than maintaining a static string. */
246         for (opt = longOpts, cp = shortOpts; opt->name != NULL; opt++, cp++) {
247                 *cp = opt->val;
248                 if (opt->has_arg)
249                         *++cp = ':';
250         }
251
252         while ((rc = getopt_long(argc, argv, shortOpts,
253                                  longOpts, &index)) != -1) {
254                 switch (rc) {
255                 case OPEN:
256                         openflags &= ~(O_CREAT | O_EXCL);
257                 case CREATE:
258                 case LOOKUP:
259                 case MKNOD:
260                 case STAT:
261                 case UNLINK:
262                         if (cmd) {
263                                 fatal(0,
264                                       "Invalid - more than one operation specified: --%s\n",
265                                       longOpts[index].name);
266                         }
267                         mode = rc;
268                         cmd = (char *)longOpts[index].name;
269                         break;
270                 case NOEXCL:
271                         if (mode != CREATE && mode != MKNOD) {
272                                 usage(stderr,
273                                       "--noexcl only applies to --create or --mknod.\n");
274                         }
275                         openflags &= ~O_EXCL;
276                         break;
277                 case RECREATE:
278                         if (mode != UNLINK) {
279                                 usage(stderr,
280                                       "--recreate only makes sense with --unlink.\n");
281                         }
282                         recreate++;
283                         break;
284                 case SETXATTR:
285                         if (!cmd) {
286                                 mode = SETXATTR;
287                                 cmd = (char *)longOpts[index].name;
288                         } else if (mode == CREATE || mode == MKNOD) {
289                                 with_xattr = true;
290                         } else {
291                                 usage(stderr,
292                                       "--setxattr only makes sense with --create, --mknod or alone.\n");
293                         }
294                         break;
295                 case SMALLWRITE:
296                         if (mode != CREATE)
297                                 usage(stderr,
298                                       "--smallwrite only applies to --create.\n");
299                         smallwrite = true;
300                         break;
301                 case BEGIN:
302                         begin = strtol(optarg, &endptr, 0);
303                         if ((*endptr != 0) || (begin < 0))
304                                 fatal(0, "Invalid --start value.\n");
305                         break;
306                 case ITERS:
307                         iters = strtol(optarg, &endptr, 0);
308                         if ((*endptr != 0) || (iters <= 0))
309                                 fatal(0, "Invalid --iters value.\n");
310                         if (mode != LOOKUP && mode != OPEN)
311                                 usage(stderr,
312                                       "--iters only makes sense with --lookup or --open.\n");
313                         break;
314                 case TIME:
315                         seconds = strtol(optarg, &endptr, 0);
316                         if ((*endptr != 0) || (seconds <= 0))
317                                 fatal(0, "Invalid --time value.\n");
318                         break;
319                 case DIRFMT:
320                         if (strlen(optarg) > (PATH_MAX - 16))
321                                 fatal(0, "--dirfmt too long\n");
322                         dirfmt = optarg;
323                         break;
324                 case NDIRS:
325                         ndirs = strtol(optarg, &endptr, 0);
326                         if ((*endptr != 0) || (ndirs <= 0))
327                                 fatal(0, "Invalid --ndirs value.\n");
328                         if ((ndirs > nthreads) &&
329                             ((mode == CREATE) || (mode == MKNOD))) {
330                                 fatal(0,
331                                       "--ndirs=%d must be less than or equal to the number of threads (%d).\n",
332                                       ndirs, nthreads);
333                         }
334                         break;
335                 case FILEFMT:
336                         if (strlen(optarg) > 4080)
337                                 fatal(0, "--filefmt too long\n");
338
339                         /* Use %%d where you want the file # in the name. */
340                         sprintf(filefmt, optarg, myrank);
341                         break;
342                 case NFILES:
343                         nfiles = strtol(optarg, &endptr, 0);
344                         if ((*endptr != 0) || (nfiles <= 0))
345                                 fatal(0, "Invalid --nfiles value.\n");
346                         break;
347                 case STRIPES:
348                         stripes = strtol(optarg, &endptr, 0);
349                         if ((*endptr != 0) || (stripes < 0))
350                                 fatal(0, "Invalid --stripes value.\n");
351
352                         if (stripes == 0) {
353                                 openflags |= O_LOV_DELAY_CREATE;
354                         } else {
355                                 fatal(0,
356                                       "non-zero --stripes value not yet supported.\n");
357                         }
358
359                         break;
360                 case SEED:
361                         seed = strtoul(optarg, &endptr, 0);
362                         if (*endptr)
363                                 fatal(0, "bad --seed option %s\n", optarg);
364                         break;
365                 case SEEDFILE:
366                         seed_file = fopen(optarg, "r");
367                         if (!seed_file) {
368                                 fatal(myrank, "fopen(%s) error: %s\n",
369                                       optarg, strerror(errno));
370                         }
371
372                         for (i = -1; fgets(tmp, 16, seed_file) != NULL;) {
373                                 if (++i == myrank)
374                                         break;
375                         }
376
377                         if (i == myrank) {
378                                 rc = sscanf(tmp, "%d", &seed);
379                                 if ((rc != 1) || (seed < 0)) {
380                                         fatal(myrank,
381                                               "Invalid seed value '%s' at line %d in %s.\n",
382                                               tmp, i, optarg);
383                                 }
384                         } else {
385                                 fatal(myrank,
386                                       "File '%s' too short. Does not contain a seed for thread %d.\n",
387                                       optarg, myrank);
388                         }
389
390                         fclose(seed_file);
391                         break;
392                 case RANDOM:
393                 case READDIR:
394                         if (mode != LOOKUP && mode != OPEN)  {
395                                 fatal(0,
396                                       "--%s can only be specified with --lookup, or --open.\n",
397                                       (char *)longOpts[index].name);
398                         }
399                         order = rc;
400                         break;
401                 case IGNORE:
402                         ++ignore;
403                         break;
404                 case DEBUG:
405                         ++debug;
406                 case VERBOSE:
407                         ++verbose;
408                         break;
409                 case HELP:
410                         usage(stdout, NULL);
411                         break;
412                 case MNT:
413                         if (strlen(optarg) > (PATH_MAX - 16))
414                                 fatal(0, "--mnt too long\n");
415                         mntfmt = optarg;
416                         break;
417                 case MNTCOUNT:
418                         mnt_count = strtol(optarg, &endptr, 0);
419                         if ((*endptr != 0) || (mnt_count <= 0)) {
420                                 fatal(0, "Invalid --mnt_count value %s.\n",
421                                       optarg);
422                         }
423                         break;
424                 case MDTCOUNT:
425                         mdt_count = strtol(optarg, &endptr, 0);
426                         if ((*endptr != 0) || (mdt_count <= 0)) {
427                                 fatal(0, "Invalid --mdt_count value %s.\n",
428                                       optarg);
429                         }
430                         break;
431                 default:
432                         usage(stderr, "unrecognized option: '%c'.\n", optopt);
433                 }
434         }
435
436         if (optind < argc)
437                 usage(stderr, "too many arguments %d >= %d.\n", optind, argc);
438
439         if ((mnt_count != -1 && !mntfmt) ||
440             (mnt_count == -1 && mntfmt))
441                 usage(stderr,
442                       "mnt_count and mntfmt must be specified at the same time\n");
443
444         if (mode == CREATE || mode == MKNOD || mode == UNLINK ||
445             mode == STAT || mode == SETXATTR) {
446                 if (seconds != 0) {
447                         if (nfiles == 0)
448                                 nfiles = INT_MAX;
449                 } else if (nfiles == 0) {
450                         usage(stderr,
451                               "--nfiles or --time must be specified with %s.\n",
452                               cmd);
453                 }
454         } else if (mode == LOOKUP || mode == OPEN) {
455                 if (seconds != 0) {
456                         if (iters == 0)
457                                 iters = INT_MAX;
458                 } else if (iters == 0) {
459                         usage(stderr,
460                               "--iters or --time must be specifed with %s.\n",
461                               cmd);
462                 }
463
464                 if (nfiles == 0) {
465                         usage(stderr, "--nfiles must be specifed with --%s.\n",
466                               cmd);
467                 }
468
469                 if (seed == 0) {
470                         int fd = open("/dev/urandom", O_RDONLY);
471
472                         if (fd >= 0) {
473                                 if (read(fd, &seed, sizeof(seed)) <
474                                     sizeof(seed))
475                                         seed = time(0);
476                                 close(fd);
477                         } else {
478                                 seed = time(0);
479                         }
480                 }
481
482                 srand(seed);
483
484                 dmesg("%s: rank %d seed %d (%s).\n", prog, myrank, seed,
485                       (order == RANDOM) ? "random_order" : "readdir_order");
486         } else {
487                 usage(stderr,
488                       "one --create, --mknod, --open, --stat, --lookup, --unlink or --setxattr must be specifed.");
489         }
490
491         /* support for multiple threads in a dir, set begin/end appropriately.*/
492         dirnum = myrank % ndirs;
493         dirthreads = nthreads / ndirs;
494         if (nthreads > (ndirs * dirthreads + dirnum))
495                 ++dirthreads;
496
497         offset = myrank / ndirs;
498
499         tmpend = begin + nfiles - 1;
500         if (tmpend <= 0)
501                 tmpend = INT_MAX;
502
503         end = begin + (nfiles / dirthreads) * dirthreads + offset;
504         if ((end > tmpend) || (end <= 0))
505                 end -= dirthreads;
506
507         /*
508          * make sure mnt_count <= nthreads, otherwise it might div 0 in
509          * the following test
510          */
511         if (mnt_count > nthreads)
512                 mnt_count = nthreads;
513
514         begin += offset;
515         if (begin < 0)
516                 begin = INT_MAX;
517
518         beginsave = begin;
519
520         dmesg("%d: iters %d nfiles %d time %d begin %d end %d dirthreads %d.\n",
521               myrank, iters, nfiles, seconds, begin, end, dirthreads);
522
523         if (!dirfmt) {
524                 strcpy(dir, ".");
525         } else {
526                 int dir_len = 0;
527
528                 if (mntfmt) {
529                         sprintf(dir, mntfmt, (myrank / (nthreads / mnt_count)));
530                         strcat(dir, "/");
531                         dir_len = strlen(dir);
532                 }
533                 sprintf(dir + dir_len, dirfmt, dirnum);
534
535                 if (mdt_count > 1) {
536                         struct stat sb;
537
538                         if (stat(dir, &sb) == 0) {
539                                 if (!S_ISDIR(sb.st_mode))
540                                         fatal(myrank, "'%s' is not dir\n", dir);
541                         } else if (errno == ENOENT) {
542                                 sprintf(mkdir_cmd, "lfs mkdir -i %d -c %d %s",
543                                         rand() % mdt_count,
544                                         rand() % mdt_count + 1, dir);
545                         } else {
546                                 fatal(myrank, "'%s' stat failed\n", dir);
547                         }
548                 } else {
549                         sprintf(mkdir_cmd, "mkdir -p %s", dir);
550                 }
551
552 #ifdef _LIGHTWEIGHT_KERNEL
553                 printf("NOTICE: not running system(%s)\n", mkdir_cmd);
554 #else
555                 if (ndirs == 1) {
556                         if (myrank == 0) {
557                                 dmesg("%d: %s\n", myrank, mkdir_cmd);
558                                 rc = system(mkdir_cmd);
559                         } else {
560                                 rc = 0;
561                         }
562                         if (MPI_Barrier(MPI_COMM_WORLD) != MPI_SUCCESS)
563                                 fatal(myrank, "mkdir MPI_Barrier failed\n");
564                 } else {
565                         dmesg("%d: %s\n", myrank, mkdir_cmd);
566                         rc = system(mkdir_cmd);
567                 }
568                 if (rc)
569                         fatal(myrank, "'%s' failed.\n", mkdir_cmd);
570 #endif
571
572                 rc = chdir(dir);
573                 if (rc)
574                         fatal(myrank, "unable to chdir to '%s'.\n", dir);
575         }
576 }
577
578 static inline char *next_file()
579 {
580         if (order == RANDOM) {
581                 sprintf(filename, filefmt, random() % nfiles);
582                 return filename;
583         }
584
585         /* readdir order */
586
587         dir_entry = readdir(directory);
588         if (!dir_entry) {
589                 rewinddir(directory);
590                 while ((dir_entry = readdir(directory)) != NULL) {
591                         if (dir_entry->d_name[0] != '.')
592                                 return dir_entry->d_name;
593                 }
594
595                 fatal(myrank, "unable to read directory %s (%s).\n",
596                       dir, strerror(errno));
597         }
598
599         return dir_entry->d_name;
600 }
601
602 int
603 main(int argc, char *argv[])
604 {
605         int    i, j, fd, rc, nops, lastOps;
606         int ag_ops = 0;
607         double ag_interval = 0;
608         double ag_rate = 0;
609         double rate, avg_rate, effective_rate;
610         double startTime, curTime, lastTime, interval;
611         time_t timestamp;
612         char *file;
613
614         rc = MPI_Init(&argc, &argv);
615         if (rc != MPI_SUCCESS)
616                 fatal(myrank, "MPI_Init failed: %d\n", rc);
617
618         rc = MPI_Comm_size(MPI_COMM_WORLD, &nthreads);
619         if (rc != MPI_SUCCESS)
620                 fatal(myrank, "MPI_Comm_size failed: %d\n", rc);
621
622         rc = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
623         if (rc != MPI_SUCCESS)
624                 fatal(myrank, "MPI_Comm_rank failed: %d\n", rc);
625
626         process_args(argc, argv);
627
628         timestamp = time(0);
629         if ((myrank == 0) || debug) {
630                 printf("%d: %s starting at %s",
631                        myrank, hostname, ctime(&timestamp));
632         }
633
634         /*
635          * if we're not measuring creation rates then precreate
636          * the files we're operating on.
637          */
638         if ((mode != CREATE) && (mode != MKNOD) && !ignore &&
639             (mode != UNLINK || recreate)) {
640                 /*
641                  * create the files in reverse order. When we encounter
642                  * a file that already exists, assume the remainder of
643                  * the files exist to save time. The timed performance
644                  * test scripts make use of this behavior.
645                  */
646                 for (i = end, j = 0; i >= begin; i -= dirthreads) {
647                         sprintf(filename, filefmt, i);
648                         fd = open(filename, openflags, 0644);
649                         if (fd < 0) {
650                                 if (errno == EEXIST)
651                                         break;
652                                 rc = errno;
653                                 fatal(myrank, "precreate open(%s) error: %s\n",
654                                       filename, strerror(rc));
655                         }
656                         j++;
657                         close(fd);
658                 }
659                 dmesg("%d: %s pre-created %d files.\n", myrank, hostname, j);
660
661                 rc = MPI_Barrier(MPI_COMM_WORLD);
662                 if (rc != MPI_SUCCESS)
663                         fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
664         }
665
666         if (order == READDIR) {
667                 directory = opendir(dir);
668                 if (!directory) {
669                         rc = errno;
670                         fatal(myrank, "opendir(%s) error: %s\n",
671                               dir, strerror(rc));
672                 }
673
674                 timestamp = time(0);
675                 j = random() % nfiles;
676                 dmesg("%d: %s initializing dir offset %u: %s",
677                       myrank, hostname, j, ctime(&timestamp));
678
679                 for (i = 0; i <= j; i++) {
680                         if ((dir_entry = readdir(directory)) == NULL) {
681                                 fatal(myrank,
682                                       "could not read entry number %d in directory %s.\n",
683                                       i, dir);
684                         }
685                 }
686
687                 timestamp = time(0);
688                 dmesg("%d: index %d, filename %s, offset %ld: %s initialization complete: %s",
689                       myrank, i, dir_entry->d_name, telldir(directory),
690                       hostname, ctime(&timestamp));
691         }
692
693         if (seconds) {
694                 act.sa_handler = sigalrm_handler;
695                 (void)sigemptyset(&act.sa_mask);
696                 act.sa_flags = 0;
697                 sigaction(SIGALRM, &act, NULL);
698                 alarm(seconds);
699         }
700
701         rc = MPI_Barrier(MPI_COMM_WORLD);
702         if (rc != MPI_SUCCESS)
703                 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
704
705         startTime = MPI_Wtime();
706         lastTime = MPI_Wtime();
707         nops = 0;
708         lastOps = 0;
709
710         switch (mode) {
711         case CREATE:
712                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
713                         snprintf(filename, sizeof(filename), filefmt, begin);
714                         fd = open(filename, openflags, 0644);
715                         if (fd < 0) {
716                                 rc = errno;
717                                 if (rc == EINTR && alarm_caught)
718                                         break;
719                                 fatal(myrank, "open(%s) error: %s\n",
720                                       filename, strerror(rc));
721                         }
722
723                         if (with_xattr) {
724                                 rc = fsetxattr(fd, xattrname, xattrbuf,
725                                                xattrlen, XATTR_CREATE);
726                                 if (rc) {
727                                         rc = errno;
728                                         if (rc == EINTR && alarm_caught)
729                                                 break;
730                                         fatal(myrank,
731                                               "setxattr(%s) error: %s\n",
732                                               filename, strerror(rc));
733                                 }
734                         }
735                         if (smallwrite) {
736                                 rc = write(fd, xattrbuf, xattrlen);
737                                 if (rc < 0) {
738                                         rc = errno;
739                                         if (rc == EINTR && alarm_caught)
740                                                 break;
741                                         fatal(myrank,
742                                               "write(%s) error: %s\n",
743                                               filename, strerror(rc));
744                                 }
745                         }
746
747                         close(fd);
748                         nops++;
749                         DISPLAY_PROGRESS();
750                 }
751
752                 dmesg("%d: created %d files, last file '%s'.\n",
753                       myrank, nops, filename);
754                 break;
755         case LOOKUP:
756                 fd = open(dir, O_RDONLY);
757                 if (fd < 0) {
758                         fatal(myrank, "open(dir == '%s') error: %s\n",
759                               dir, strerror(errno));
760                 }
761
762                 for (; nops < iters && !alarm_caught;) {
763                         char *filename = next_file();
764
765                         rc = llapi_file_lookup(fd, filename);
766                         if (rc < 0) {
767                                 if (((rc = errno) == EINTR) && alarm_caught)
768                                         break;
769                                 fatal(myrank,
770                                       "llapi_file_lookup(%s) error: %s\n",
771                                       filename, strerror(rc));
772                         }
773
774                         nops++;
775                         DISPLAY_PROGRESS();
776                 }
777                 break;
778         case MKNOD:
779                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
780                         snprintf(filename, sizeof(filename), filefmt, begin);
781                         rc = mknod(filename, S_IFREG | 0644, 0);
782                         if (rc) {
783                                 rc = errno;
784                                 if (rc == EINTR && alarm_caught)
785                                         break;
786                                 fatal(myrank, "mknod(%s) error: %s\n",
787                                       filename, strerror(rc));
788                         }
789
790                         if (with_xattr) {
791                                 rc = setxattr(filename, xattrname, xattrbuf,
792                                               xattrlen, XATTR_CREATE);
793                                 if (rc) {
794                                         rc = errno;
795                                         if (rc == EINTR && alarm_caught)
796                                                 break;
797                                         fatal(myrank,
798                                               "setxattr(%s) error: %s\n",
799                                               filename, strerror(rc));
800                                 }
801                         }
802
803                         nops++;
804                         DISPLAY_PROGRESS();
805                 }
806                 break;
807         case OPEN:
808                 for (; nops < iters && !alarm_caught;) {
809                         file = next_file();
810                         if ((fd = open(file, openflags, 0644)) < 0) {
811                                 if (((rc = errno) == EINTR) && alarm_caught)
812                                         break;
813                                 fatal(myrank, "open(%s) error: %s\n",
814                                       file, strerror(rc));
815                         }
816
817                         close(fd);
818
819                         nops++;
820                         DISPLAY_PROGRESS();
821                 }
822                 break;
823         case STAT:
824                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
825                         sprintf(filename, filefmt, begin);
826                         rc = stat(filename, &statbuf);
827                         if (rc) {
828                                 if (((rc = errno) == EINTR) && alarm_caught)
829                                         break;
830                                 if (((rc = errno) == ENOENT) && ignore)
831                                         continue;
832                                 fatal(myrank, "stat(%s) error: %s\n",
833                                       filename, strerror(rc));
834                         }
835
836                         nops++;
837                         DISPLAY_PROGRESS();
838                 }
839                 break;
840         case UNLINK:
841                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
842                         sprintf(filename, filefmt, begin);
843                         rc = unlink(filename);
844                         if (rc) {
845                                 if (((rc = errno) == EINTR) && alarm_caught)
846                                         break;
847                                 if ((rc = errno) == ENOENT) {
848                                         if (ignore)
849                                                 continue;
850                                         /* no more files to unlink */
851                                         break;
852                                 }
853                                 fatal(myrank, "unlink(%s) error: %s\n",
854                                       filename, strerror(rc));
855                         }
856
857                         nops++;
858                         DISPLAY_PROGRESS();
859                 }
860                 break;
861         case SETXATTR:
862                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
863                         snprintf(filename, sizeof(filename), filefmt, begin);
864                         rc = setxattr(filename, xattrname, xattrbuf, xattrlen,
865                                       XATTR_CREATE);
866                         if (rc) {
867                                 rc = errno;
868                                 if (rc == EINTR && alarm_caught)
869                                         break;
870                                 if (rc == ENOENT && ignore)
871                                         continue;
872                                 fatal(myrank, "setxattr(%s) error: %s\n",
873                                       filename, strerror(rc));
874                         }
875
876                         nops++;
877                         DISPLAY_PROGRESS();
878                 }
879                 break;
880         }
881
882         rc = MPI_Barrier(MPI_COMM_WORLD);
883         if (rc != MPI_SUCCESS)
884                 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
885         curTime = MPI_Wtime();
886         interval = curTime - startTime;
887         rate = (double)(nops) / interval;
888
889         rc = MPI_Reduce(&nops, &ag_ops, 1, MPI_INT, MPI_SUM, 0,
890                         MPI_COMM_WORLD);
891         if (rc != MPI_SUCCESS)
892                 fatal(myrank, "Failure in MPI_Reduce of total ops.\n");
893
894         rc = MPI_Reduce(&interval, &ag_interval, 1, MPI_DOUBLE, MPI_SUM, 0,
895                         MPI_COMM_WORLD);
896         if (rc != MPI_SUCCESS)
897                 fatal(myrank, "Failure in MPI_Reduce of total interval.\n");
898
899         rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_DOUBLE, MPI_SUM, 0,
900                         MPI_COMM_WORLD);
901         if (rc != MPI_SUCCESS)
902                 fatal(myrank, "Failure in MPI_Reduce of aggregated rate.\n");
903
904         if (myrank == 0) {
905                 curTime = MPI_Wtime();
906                 interval = curTime - startTime;
907                 effective_rate = (double)ag_ops / interval;
908                 avg_rate = (double)ag_ops / ag_interval;
909
910                 printf("Rate: %.2f eff %.2f aggr %.2f avg client %ss/sec (total: %d threads %d %ss %d dirs %d threads/dir %.2f secs)\n",
911                        effective_rate, ag_rate, avg_rate, cmd, nthreads, ag_ops,
912                        cmd, ndirs, dirthreads, interval);
913                 if (mode == UNLINK && !recreate && !ignore && ag_ops != nfiles)
914                         printf("Warning: only unlinked %d files instead of %d\n",
915                                ag_ops, nfiles);
916         }
917
918         if (recreate) {
919                 for (begin = beginsave; begin <= end; begin += dirthreads) {
920                         sprintf(filename, filefmt, begin);
921                         if ((fd = open(filename, openflags, 0644)) < 0) {
922                                 rc = errno;
923                                 if (rc == EEXIST)
924                                         break;
925                                 fatal(myrank, "recreate open(%s) error: %s\n",
926                                       filename, strerror(rc));
927                         }
928
929                         close(fd);
930                 }
931         }
932
933         timestamp = time(0);
934         if ((myrank == 0) || debug) {
935                 printf("%d: %s finished at %s",
936                        myrank, hostname, ctime(&timestamp));
937         }
938
939         MPI_Finalize();
940         return 0;
941 }