Whamcloud - gitweb
LU-7243 misc: update Intel copyright messages 2015
[fs/lustre-release.git] / lustre / tests / mpi / mdsrate.c
1 /*
2  * 2003, Copyright, Hewlett-Packard Development Compnay, LP.
3  *
4  * Developed under the sponsorship of the U.S. Government
5  *     under Subcontract No. B514193
6  */
7
8 /*
9  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
10  * Use is subject to license terms.
11  *
12  * Copyright (c) 2012, 2015, Intel Corporation.
13  */
14
15 #include <stdio.h>
16 #include <getopt.h>
17 #include <libgen.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <time.h>
21 #include <limits.h>
22 #include <errno.h>
23 #include <string.h>
24 #include <fcntl.h>
25 #include <unistd.h>
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <signal.h>
29 #include <sys/ioctl.h>
30 #include <dirent.h>
31 #include <sys/xattr.h>
32
33 #include "mpi.h"
34
35 /* lustre */
36 #include <lustre/lustreapi.h>        /* for O_LOV_DELAY_CREATE */
37
38 #define CHECK_COUNT 10000
39 #define DISPLAY_COUNT (CHECK_COUNT * 10)
40 #define DISPLAY_TIME 100
41
42 enum {
43         CREATE          = 'c',
44         LOOKUP          = 'l',
45         MKNOD           = 'm',
46         OPEN            = 'o',
47         STAT            = 's',
48         UNLINK          = 'u',
49         BEGIN           = 'b',
50         ITERS           = 'i',
51         TIME            = 't',
52         DIRFMT          = 'd',
53         NDIRS           = 'D',
54         FILEFMT         = 'f',
55         NFILES          = 'F',
56         NOEXCL          = 'X',
57         STRIPES         = 'S',
58         SEED            = 'r',
59         SEEDFILE        = 'R',
60         RANDOM          = 'A',
61         READDIR         = 'B',
62         RECREATE        = 'C',
63         SETXATTR        = 'x',
64         SMALLWRITE      = 'w',
65         IGNORE          = 'E',
66         VERBOSE         = 'V',
67         DEBUG           = 'v',
68         HELP            = 'h',
69         MNT             = 'M',
70         MNTCOUNT        = 'N',
71         MDTCOUNT        = 'T',
72 };
73
74 struct option longOpts[] = {
75         {"create",              0, NULL, CREATE     },
76         {"lookup",              0, NULL, LOOKUP     },
77         {"mknod",               0, NULL, MKNOD      },
78         {"open",                0, NULL, OPEN       },
79         {"stat",                0, NULL, STAT       },
80         {"unlink",              0, NULL, UNLINK     },
81         {"begin",               1, NULL, BEGIN      },
82         {"iters",               1, NULL, ITERS      },
83         {"time",                1, NULL, TIME       },   /* seconds */
84         {"dirfmt",              1, NULL, DIRFMT     },
85         {"ndirs",               1, NULL, NDIRS      },
86         {"filefmt",             1, NULL, FILEFMT    },
87         {"nfiles",              1, NULL, NFILES     },
88         {"noexcl",              0, NULL, NOEXCL     },
89         {"stripes",             1, NULL, STRIPES    },
90         {"seed",                1, NULL, SEED       },
91         {"seedfile",            1, NULL, SEEDFILE   },
92         {"random_order",        0, NULL, RANDOM     },
93         {"readdir_order",       0, NULL, READDIR    },
94         {"recreate",            0, NULL, RECREATE   },
95         {"setxattr",            0, NULL, SETXATTR   },
96         {"smallwrite",          0, NULL, SMALLWRITE },
97         {"ignore",              0, NULL, IGNORE     },
98         {"verbose",             0, NULL, VERBOSE    },
99         {"debug",               0, NULL, DEBUG      },
100         {"help",                0, NULL, HELP       },
101         {"mdtcount",            1, NULL, MDTCOUNT   },
102         {"mntcount",            1, NULL, MNTCOUNT   },
103         {"mntfmt",              1, NULL, MNT        },
104         { 0,                    0, NULL, 0          }
105 };
106
107 int foo1, foo2;
108
109 char   shortOpts[128];
110 int    myrank = -1;
111 int    nthreads = -1;
112 char * prog;
113 char   hostname[512] = "unknown";
114 char   mode;
115 char * cmd;
116 int    openflags = O_RDWR|O_CREAT|O_EXCL;
117 int    ndirs = 1;
118 char * dirfmt;
119 char   dir[PATH_MAX];
120 char   mkdir_cmd[PATH_MAX+14];
121 int    dirthreads;
122 int    dirnum;
123 DIR *  directory;
124 struct dirent *dir_entry;
125 int    nfiles;
126 char   filefmt[PATH_MAX];
127 char   filename[PATH_MAX];
128 char   path[PATH_MAX];
129 int    stripes = -1;
130 int    begin;
131 int    beginsave;
132 int    end;
133 int    iters;
134 int    seconds;
135 int    alarm_caught;
136 struct sigaction act;
137 int    order = RANDOM;
138 int    seed;
139 int    recreate;
140 int    ignore;
141 int    verbose;
142 int    debug;
143 struct stat statbuf;
144 bool   with_xattr;
145 char   xattrname[] = "user.mdsrate";
146 char   xattrbuf[4096];
147 /* max xattr name + value length is block size, use 4000 here to avoid ENOSPC */
148 int    xattrlen = 4000;
149 bool   smallwrite;
150 int    mnt_count = -1;
151 int    mdt_count = 1;
152 char  *mntfmt;
153
154 #define dmesg if (debug) printf
155
156 #define DISPLAY_PROGRESS() {                                                \
157         if (verbose && (nops % CHECK_COUNT == 0)) {                         \
158                 curTime = MPI_Wtime();                                      \
159                 interval = curTime - lastTime;                              \
160                 if (interval > DISPLAY_TIME || nops % DISPLAY_COUNT == 0) { \
161                         rate = (double)(nops - lastOps)/interval;           \
162                         printf("Rank %d: %.2f %ss/sec %.2f secs "           \
163                                "(total: %d %ss %.2f secs)\n",               \
164                                myrank, rate, cmd, interval,                 \
165                                nops, cmd, curTime - startTime);             \
166                         lastOps = nops;                                     \
167                         lastTime = curTime;                                 \
168                 }                                                           \
169         }                                                                   \
170 }
171
172 char *usage_msg = "usage: %s\n"
173                   "    { --create [ --noexcl | --setxattr | --smallwrite ] |\n"
174                   "      --lookup | --mknod [ --setxattr ] | --open |\n"
175                   "      --stat | --unlink [ --recreate ] [ --ignore ] |\n"
176                   "      --setxattr }\n"
177                   "    [ --help ] [ --verbose ] [ --debug ]\n"
178                   "    { [ --begin <num> ] --nfiles <num> }\n"
179                   "    [ --iters <num> ] [ --time <secs> ]\n"
180                   "    [ --dirfmt <str> ] [ --ndirs  <num> ]\n"
181                   "    [ --filefmt <str> ] [ --stripes <num> ]\n"
182                   "    [ --random_order [--seed <num> | --seedfile <file>] ]\n"
183                   "    [ --readdir_order ] [ --mntfmt <str> ]\n"
184                   "    [ --mntcount <num> ] [ --mdtcount <num> ]\n"
185                   "    [ --setxattr ] }\n";
186
187 static void
188 usage(FILE *stream, char *fmt, ...)
189 {
190         if (myrank == 0) {
191                 if (fmt != NULL) {
192                         va_list       ap;
193
194                         fprintf(stream, "%s: ", prog);
195                         va_start(ap, fmt);
196                         vfprintf(stderr, fmt, ap);
197                         va_end(ap);
198                 }
199                 fprintf(stream, usage_msg, prog);
200         }
201
202         MPI_Finalize();
203         exit(stream == stderr);
204 }
205
206 /* Print process myrank and message, and exit (i.e. a fatal error) */
207 static int
208 fatal(int rank, const char *fmt, ...)
209 {
210         if (rank == myrank) {
211                 va_list       ap;
212
213                 fprintf(stderr, "rank %d: ", rank);
214                 va_start(ap, fmt);
215                 vfprintf(stderr, fmt, ap);
216                 va_end(ap);
217         }
218
219         MPI_Abort(MPI_COMM_WORLD, 1);
220         exit(1);
221 }
222
223 static void
224 sigalrm_handler(int signum)
225 {
226         alarm_caught++;
227 }
228
229 /* HAVE_LLAPI_FILE_LOOKUP is defined by liblustreapi.h if this function is
230  * defined therein.  Otherwise we can do the equivalent operation via ioctl
231  * if we have access to a complete lustre build tree to get the various
232  * definitions - then compile with USE_MDC_LOOKUP defined. */
233 #if defined(HAVE_LLAPI_FILE_LOOKUP)
234 #define HAVE_MDC_LOOKUP
235 #elif defined(USE_MDC_LOOKUP)
236 #include <config.h>
237 #include <libcfs/libcfs.h>
238 #include <lustre_ioctl.h>
239
240 int llapi_file_lookup(int dirfd, const char *name)
241 {
242         struct obd_ioctl_data data = { 0 };
243         char rawbuf[8192];
244         char *buf = rawbuf;
245         int rc;
246
247         if (dirfd < 0 || name == NULL)
248                 return -EINVAL;
249
250         data.ioc_version = OBD_IOCTL_VERSION;
251         data.ioc_len = sizeof(data);
252         data.ioc_inlbuf1 = name;
253         data.ioc_inllen1 = strlen(name) + 1;
254
255         rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
256         if (rc) {
257                 fatal(myrank, "ioctl_pack failed: rc = %d\n", rc);
258                 return rc;
259         }
260
261         return ioctl(fd, IOC_MDC_LOOKUP, buf);
262 }
263 #define HAVE_MDC_LOOKUP
264 #endif
265
266 static void
267 process_args(int argc, char *argv[])
268 {
269         char   *cp, *endptr;
270         int    i, index, offset, tmpend, rc;
271         char   tmp[16];
272         FILE * seed_file;
273         struct option *opt;
274
275         setbuf(stdout, 0);
276         setbuf(stderr, 0);
277         prog = basename(argv[0]);
278         strcpy(filefmt, "f%d");
279         gethostname(hostname, sizeof(hostname));
280
281         /* auto create shortOpts rather than maintaining a static string. */
282         for (opt = longOpts, cp = shortOpts; opt->name != NULL; opt++, cp++) {
283                 *cp = opt->val;
284                 if (opt->has_arg)
285                         *++cp = ':';
286         }
287
288         while ((rc = getopt_long(argc,argv, shortOpts, longOpts,&index)) != -1) {
289                 switch (rc) {
290                 case OPEN:
291                         openflags &= ~(O_CREAT|O_EXCL);
292                 case CREATE:
293 #ifdef HAVE_MDC_LOOKUP
294                 case LOOKUP:
295 #endif
296                 case MKNOD:
297                 case STAT:
298                 case UNLINK:
299                         if (cmd != NULL) {
300                                 fatal(0, "Invalid - more than one operation "
301                                            "specified: --%s\n",
302                                         longOpts[index].name);
303                         }
304                         mode = rc;
305                         cmd = (char *)longOpts[index].name;
306                         break;
307                 case NOEXCL:
308                         if (mode != CREATE && mode != MKNOD) {
309                                 usage(stderr, "--noexcl only applies to "
310                                               "--create or --mknod.\n");
311                         }
312                         openflags &= ~O_EXCL;
313                         break;
314                 case RECREATE:
315                         if (mode != UNLINK) {
316                                 usage(stderr, "--recreate only makes sense"
317                                               "with --unlink.\n");
318                         }
319                         recreate++;
320                         break;
321                 case SETXATTR:
322                         if (cmd == NULL) {
323                                 mode = SETXATTR;
324                                 cmd = (char *)longOpts[index].name;
325                         } else if (mode == CREATE || mode == MKNOD) {
326                                 with_xattr = true;
327                         } else {
328                                 usage(stderr, "--setxattr only makes sense "
329                                       "with --create, --mknod or alone.\n");
330                         }
331                         break;
332                 case SMALLWRITE:
333                         if (mode != CREATE)
334                                 usage(stderr, "--smallwrite only applies to "
335                                               "--create.\n");
336                         smallwrite = true;
337                         break;
338                 case BEGIN:
339                         begin = strtol(optarg, &endptr, 0);
340                         if ((*endptr != 0) || (begin < 0)) {
341                                 fatal(0, "Invalid --start value.\n");
342                         }
343                         break;
344                 case ITERS:
345                         iters = strtol(optarg, &endptr, 0);
346                         if ((*endptr != 0) || (iters <= 0)) {
347                                 fatal(0, "Invalid --iters value.\n");
348                         }
349                         if (mode != LOOKUP && mode != OPEN) {
350                                 usage(stderr, "--iters only makes sense with "
351                                               "--lookup or --open.\n");
352                         }
353                         break;
354                 case TIME:
355                         seconds = strtol(optarg, &endptr, 0);
356                         if ((*endptr != 0) || (seconds <= 0)) {
357                                 fatal(0, "Invalid --time value.\n");
358                         }
359                         break;
360                 case DIRFMT:
361                         if (strlen(optarg) > (PATH_MAX - 16)) {
362                                 fatal(0, "--dirfmt too long\n");
363                         }
364                         dirfmt = optarg;
365                         break;
366                 case NDIRS:
367                         ndirs = strtol(optarg, &endptr, 0);
368                         if ((*endptr != 0) || (ndirs <= 0)) {
369                                 fatal(0, "Invalid --ndirs value.\n");
370                         }
371                         if ((ndirs > nthreads) &&
372                             ((mode == CREATE) || (mode == MKNOD))) {
373                                 fatal(0, "--ndirs=%d must be less than or "
374                                       "equal to the number of threads (%d).\n",
375                                       ndirs, nthreads);
376                         }
377                         break;
378                 case FILEFMT:
379                         if (strlen(optarg) > 4080) {
380                                 fatal(0, "--filefmt too long\n");
381                         }
382
383                         /* Use %%d where you want the file # in the name. */
384                         sprintf(filefmt, optarg, myrank);
385                         break;
386                 case NFILES:
387                         nfiles = strtol(optarg, &endptr, 0);
388                         if ((*endptr != 0) || (nfiles <= 0)) {
389                                 fatal(0, "Invalid --nfiles value.\n");
390                         }
391                         break;
392                 case STRIPES:
393                         stripes = strtol(optarg, &endptr, 0);
394                         if ((*endptr != 0) || (stripes < 0)) {
395                                 fatal(0, "Invalid --stripes value.\n");
396                         }
397
398                         if (stripes == 0) {
399                                 openflags |= O_LOV_DELAY_CREATE;
400                         } else {
401                                 fatal(0, "non-zero --stripes value "
402                                          "not yet supported.\n");
403                         }
404
405                         break;
406                 case SEED:
407                         seed = strtoul(optarg, &endptr, 0);
408                         if (*endptr) {
409                                 fatal(0, "bad --seed option %s\n", optarg);
410                         }
411                         break;
412                 case SEEDFILE:
413                         seed_file = fopen(optarg, "r");
414                         if (!seed_file) {
415                               fatal(myrank, "fopen(%s) error: %s\n",
416                                       optarg, strerror(errno));
417                         }
418
419                         for (i = -1; fgets(tmp, 16, seed_file) != NULL;) {
420                                 if (++i == myrank)
421                                         break;
422                         }
423
424                         if (i == myrank) {
425                                 rc = sscanf(tmp, "%d", &seed);
426                                 if ((rc != 1) || (seed < 0)) {
427                                         fatal(myrank, "Invalid seed value '%s' "
428                                               "at line %d in %s.\n",
429                                               tmp, i, optarg);
430                                 }
431                         } else {
432                                 fatal(myrank, "File '%s' too short. Does not "
433                                       "contain a seed for thread %d.\n",
434                                       optarg, myrank);
435                         }
436
437                         fclose(seed_file);
438                         break;
439                 case RANDOM:
440                 case READDIR:
441                         if (mode != LOOKUP && mode != OPEN)  {
442                                 fatal(0, "--%s can only be specified with "
443                                          "--lookup, or --open.\n",
444                                       (char *)longOpts[index].name);
445                         }
446                         order = rc;
447                         break;
448                 case IGNORE:
449                         ++ignore;
450                         break;
451                 case DEBUG:
452                         ++debug;
453                 case VERBOSE:
454                         ++verbose;
455                         break;
456                 case HELP:
457                         usage(stdout, NULL);
458                         break;
459                 case MNT:
460                         if (strlen(optarg) > (PATH_MAX - 16))
461                                 fatal(0, "--mnt too long\n");
462                         mntfmt = optarg;
463                         break;
464                 case MNTCOUNT:
465                         mnt_count = strtol(optarg, &endptr, 0);
466                         if ((*endptr != 0) || (mnt_count <= 0)) {
467                                 fatal(0, "Invalid --mnt_count value %s.\n",
468                                       optarg);
469                         }
470                         break;
471                 case MDTCOUNT:
472                         mdt_count = strtol(optarg, &endptr, 0);
473                         if ((*endptr != 0) || (mdt_count <= 0)) {
474                                 fatal(0, "Invalid --mdt_count value %s.\n",
475                                       optarg);
476                         }
477                         break;
478                 default:
479                         usage(stderr, "unrecognized option: '%c'.\n", optopt);
480                 }
481         }
482
483         if (optind < argc) {
484                 usage(stderr, "too many arguments %d >= %d.\n", optind, argc);
485         }
486
487         if ((mnt_count != -1 && mntfmt == NULL) ||
488             (mnt_count == -1 && mntfmt != NULL)) {
489                 usage(stderr, "mnt_count and mntfmt must be specified at the "
490                              "same time\n");
491         }
492
493         if (mode == CREATE || mode == MKNOD || mode == UNLINK ||
494             mode == STAT || mode == SETXATTR) {
495                 if (seconds != 0) {
496                         if (nfiles == 0)
497                                 nfiles = INT_MAX;
498                 } else if (nfiles == 0) {
499                         usage(stderr, "--nfiles or --time must be specified "
500                                       "with %s.\n", cmd);
501                 }
502         } else if (mode == LOOKUP || mode == OPEN) {
503                 if (seconds != 0) {
504                         if (iters == 0)
505                                 iters = INT_MAX;
506                 } else if (iters == 0) {
507                         usage(stderr, "--iters or --time must be specifed "
508                                       "with %s.\n", cmd);
509                 }
510
511                 if (nfiles == 0) {
512                         usage(stderr, "--nfiles must be specifed with --%s.\n",
513                               cmd);
514                 }
515
516                 if (seed == 0) {
517                         int fd = open("/dev/urandom", O_RDONLY);
518
519                         if (fd >= 0) {
520                                 if (read(fd, &seed, sizeof(seed)) <
521                                     sizeof(seed))
522                                         seed = time(0);
523                                 close(fd);
524                         } else {
525                                 seed = time(0);
526                         }
527                 }
528
529                 srand(seed);
530
531                 dmesg("%s: rank %d seed %d (%s).\n", prog, myrank, seed,
532                       (order == RANDOM) ? "random_order" : "readdir_order");
533         } else {
534                 usage(stderr, "one --create, --mknod, --open, --stat,"
535 #ifdef HAVE_MDC_LOOKUP
536                       " --lookup,"
537 #endif
538                       " --unlink or --setxattr must be specifed.");
539         }
540
541         /* support for multiple threads in a dir, set begin/end appropriately.*/
542         dirnum = myrank % ndirs;
543         dirthreads = nthreads / ndirs;
544         if (nthreads > (ndirs * dirthreads + dirnum))
545                 ++dirthreads;
546
547         offset = myrank / ndirs;
548
549         tmpend = begin + nfiles - 1;
550         if (tmpend <= 0)
551                 tmpend = INT_MAX;
552
553         end = begin + (nfiles / dirthreads) * dirthreads + offset;
554         if ((end > tmpend) || (end <= 0))
555                 end -= dirthreads;
556
557         /* make sure mnt_count <= nthreads, otherwise it might div 0 in
558          * the following test */
559         if (mnt_count > nthreads)
560                 mnt_count = nthreads;
561
562         begin += offset;
563         if (begin < 0)
564                 begin = INT_MAX;
565
566         beginsave = begin;
567
568         dmesg("%d: iters %d nfiles %d time %d begin %d end %d dirthreads %d."
569               "\n", myrank, iters, nfiles, seconds, begin, end, dirthreads);
570
571         if (dirfmt == NULL) {
572                 strcpy(dir, ".");
573         } else {
574                 int dir_len = 0;
575
576                 if (mntfmt != NULL) {
577                         sprintf(dir, mntfmt, (myrank / (nthreads/mnt_count)));
578                         strcat(dir, "/");
579                         dir_len = strlen(dir);
580                 }
581                 sprintf(dir + dir_len, dirfmt, dirnum);
582
583                 if (mdt_count > 1) {
584                         struct stat sb;
585                         if (stat(dir, &sb) == 0) {
586                                 if (!S_ISDIR(sb.st_mode))
587                                         fatal(myrank, "'%s' is not dir\n", dir);
588                         } else if (errno == ENOENT) {
589                                 sprintf(mkdir_cmd, "lfs mkdir -i %d %s",
590                                         myrank % mdt_count, dir);
591                         } else {
592                                 fatal(myrank, "'%s' stat failed\n", dir);
593                         }
594                 } else {
595                         sprintf(mkdir_cmd, "mkdir -p %s", dir);
596                 }
597
598                 dmesg("%d: %s\n", myrank, mkdir_cmd);
599 #ifdef _LIGHTWEIGHT_KERNEL
600                 printf("NOTICE: not running system(%s)\n", mkdir_cmd);
601 #else
602                 rc = system(mkdir_cmd);
603                 if (rc)
604                         fatal(myrank, "'%s' failed.\n", mkdir_cmd);
605 #endif
606
607                 rc = chdir(dir);
608                 if (rc) {
609                         fatal(myrank, "unable to chdir to '%s'.\n", dir);
610                 }
611         }
612 }
613
614 static inline char *next_file()
615 {
616         if (order == RANDOM) {
617                 sprintf(filename, filefmt, random() % nfiles);
618                 return(filename);
619         }
620
621         /* readdir order */
622
623         dir_entry = readdir(directory);
624         if (dir_entry == NULL) {
625                 rewinddir(directory);
626                 while ((dir_entry = readdir(directory)) != NULL) {
627                         if (dir_entry->d_name[0] != '.')
628                                 return(dir_entry->d_name);
629                 }
630
631                 fatal(myrank, "unable to read directory %s (%s).\n",
632                       dir, strerror(errno));
633         }
634
635         return(dir_entry->d_name);
636 }
637
638 int
639 main(int argc, char *argv[])
640 {
641         int    i, j, fd, rc, nops, lastOps;
642         int ag_ops = 0;
643         double ag_interval = 0;
644         double ag_rate = 0;
645         double rate, avg_rate, effective_rate;
646         double startTime, curTime, lastTime, interval;
647         time_t timestamp;
648         char * file;
649
650         rc = MPI_Init(&argc, &argv);
651         if (rc != MPI_SUCCESS)
652                 fatal(myrank, "MPI_Init failed: %d\n", rc);
653
654         rc = MPI_Comm_size(MPI_COMM_WORLD, &nthreads);
655         if (rc != MPI_SUCCESS)
656                 fatal(myrank, "MPI_Comm_size failed: %d\n", rc);
657
658         rc = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
659         if (rc != MPI_SUCCESS)
660                 fatal(myrank, "MPI_Comm_rank failed: %d\n", rc);
661
662         process_args(argc, argv);
663
664         timestamp = time(0);
665         if ((myrank == 0) || debug) {
666                 printf("%d: %s starting at %s",
667                        myrank, hostname, ctime(&timestamp));
668         }
669
670         /* if we're not measuring creation rates then precreate
671          * the files we're operating on. */
672         if ((mode != CREATE) && (mode != MKNOD) && !ignore &&
673             (mode != UNLINK || recreate)) {
674                 /* create the files in reverse order. When we encounter
675                  * a file that already exists, assume the remainder of 
676                  * the files exist to save time. The timed performance
677                  * test scripts make use of this behavior. */
678                 for (i = end, j = 0; i >= begin; i -= dirthreads) {
679                         sprintf(filename, filefmt, i);
680                         fd = open(filename, openflags, 0644);
681                         if (fd < 0) {
682                                 if (errno == EEXIST)
683                                         break;
684                                 rc = errno;
685                                 fatal(myrank, "precreate open(%s) error: %s\n",
686                                       filename, strerror(rc));
687                         }
688                         j++;
689                         close(fd);
690                 }
691                 dmesg("%d: %s pre-created %d files.\n",myrank,hostname,j);
692
693                 rc = MPI_Barrier(MPI_COMM_WORLD);
694                 if (rc != MPI_SUCCESS)
695                         fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
696         }
697
698         if (order == READDIR) {
699                 directory = opendir(dir);
700                 if (directory == NULL) {
701                         rc = errno;
702                         fatal(myrank, "opendir(%s) error: %s\n",
703                               dir, strerror(rc));
704                 }
705
706                 timestamp = time(0);
707                 j = random() % nfiles;
708                 dmesg("%d: %s initializing dir offset %u: %s",
709                       myrank, hostname, j, ctime(&timestamp));
710
711                 for (i = 0; i <= j; i++) {
712                         if ((dir_entry = readdir(directory)) == NULL) {
713                                 fatal(myrank, "could not read entry number %d "
714                                       "in directory %s.\n", i, dir);
715                         }
716                 }
717
718                 timestamp = time(0);
719                 dmesg("%d: index %d, filename %s, offset %ld: "
720                       "%s initialization complete: %s",
721                       myrank, i, dir_entry->d_name, telldir(directory),
722                       hostname, ctime(&timestamp));
723         }
724
725         if (seconds) {
726                 act.sa_handler = sigalrm_handler;
727                 (void)sigemptyset(&act.sa_mask);
728                 act.sa_flags = 0;
729                 sigaction(SIGALRM, &act, NULL);
730                 alarm(seconds);
731         }
732
733         rc = MPI_Barrier(MPI_COMM_WORLD);
734         if (rc != MPI_SUCCESS)
735                 fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
736
737         startTime = lastTime = MPI_Wtime();
738         nops = lastOps = 0;
739
740         switch (mode) {
741         case CREATE:
742                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
743                         snprintf(filename, sizeof(filename), filefmt, begin);
744                         fd = open(filename, openflags, 0644);
745                         if (fd < 0) {
746                                 rc = errno;
747                                 if (rc == EINTR && alarm_caught)
748                                         break;
749                                 fatal(myrank, "open(%s) error: %s\n",
750                                       filename, strerror(rc));
751                         }
752
753                         if (with_xattr) {
754                                 rc = fsetxattr(fd, xattrname, xattrbuf,
755                                                xattrlen, XATTR_CREATE);
756                                 if (rc) {
757                                         rc = errno;
758                                         if (rc == EINTR && alarm_caught)
759                                                 break;
760                                         fatal(myrank,
761                                               "setxattr(%s) error: %s\n",
762                                               filename, strerror(rc));
763                                 }
764                         }
765                         if (smallwrite) {
766                                 rc = write(fd, xattrbuf, xattrlen);
767                                 if (rc < 0) {
768                                         rc = errno;
769                                         if (rc == EINTR && alarm_caught)
770                                                 break;
771                                         fatal(myrank,
772                                               "write(%s) error: %s\n",
773                                               filename, strerror(rc));
774                                 }
775                         }
776
777                         close(fd);
778                         nops++;
779                         DISPLAY_PROGRESS();
780                 }
781
782                 dmesg("%d: created %d files, last file '%s'.\n",
783                       myrank, nops, filename);
784                 break;
785 #ifdef HAVE_MDC_LOOKUP
786         case LOOKUP:
787                 fd = open(dir, O_RDONLY);
788                 if (fd < 0) {
789                         fatal(myrank, "open(dir == '%s') error: %s\n",
790                               dir, strerror(errno));
791                 }
792
793                 for (; nops < iters && !alarm_caught;) {
794                         char *filename = next_file();
795                         rc = llapi_file_lookup(fd, filename);
796                         if (rc < 0) {
797                                 if (((rc = errno) == EINTR) && alarm_caught)
798                                         break;
799                                 fatal(myrank, "llapi_file_lookup(%s) "
800                                       "error: %s\n", filename, strerror(rc));
801                         }
802
803                         nops++;
804                         DISPLAY_PROGRESS();
805                 }
806                 break;
807 #endif
808         case MKNOD:
809                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
810                         snprintf(filename, sizeof(filename), filefmt, begin);
811                         rc = mknod(filename, S_IFREG | 0644, 0);
812                         if (rc) {
813                                 rc = errno;
814                                 if (rc == EINTR && alarm_caught)
815                                         break;
816                                 fatal(myrank, "mknod(%s) error: %s\n",
817                                       filename, strerror(rc));
818                         }
819
820                         if (with_xattr) {
821                                 rc = setxattr(filename, xattrname, xattrbuf,
822                                               xattrlen, XATTR_CREATE);
823                                 if (rc) {
824                                         rc = errno;
825                                         if (rc == EINTR && alarm_caught)
826                                                 break;
827                                         fatal(myrank,
828                                               "setxattr(%s) error: %s\n",
829                                               filename, strerror(rc));
830                                 }
831                         }
832
833                         nops++;
834                         DISPLAY_PROGRESS();
835                 }
836                 break;
837         case OPEN:
838                 for (; nops < iters && !alarm_caught;) {
839                         file = next_file();
840                         if ((fd = open(file, openflags, 0644)) < 0) {
841                                 if (((rc = errno) == EINTR) && alarm_caught)
842                                         break;
843                                 fatal(myrank, "open(%s) error: %s\n",
844                                       file, strerror(rc));
845                         }
846
847                         close(fd);
848
849                         nops++;
850                         DISPLAY_PROGRESS();
851                 }
852                 break;
853         case STAT:
854                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
855                         sprintf(filename, filefmt, begin);
856                         rc = stat(filename, &statbuf);
857                         if (rc) {
858                                 if (((rc = errno) == EINTR) && alarm_caught)
859                                         break;
860                                 if (((rc = errno) == ENOENT) && ignore)
861                                         continue;
862                                 fatal(myrank, "stat(%s) error: %s\n",
863                                       filename, strerror(rc));
864                         }
865
866                         nops++;
867                         DISPLAY_PROGRESS();
868                 }
869                 break;
870         case UNLINK:
871                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
872                         sprintf(filename, filefmt, begin);
873                         rc = unlink(filename);
874                         if (rc) {
875                                 if (((rc = errno) == EINTR) && alarm_caught)
876                                         break;
877                                 if ((rc = errno) == ENOENT) {
878                                         if (ignore)
879                                                 continue;
880                                         /* no more files to unlink */
881                                         break;
882                                 }
883                                 fatal(myrank, "unlink(%s) error: %s\n",
884                                       filename, strerror(rc));
885                         }
886
887                         nops++;
888                         DISPLAY_PROGRESS();
889                 }
890                 break;
891         case SETXATTR:
892                 for (; begin <= end && !alarm_caught; begin += dirthreads) {
893                         snprintf(filename, sizeof(filename), filefmt, begin);
894                         rc = setxattr(filename, xattrname, xattrbuf, xattrlen,
895                                       XATTR_CREATE);
896                         if (rc) {
897                                 rc = errno;
898                                 if (rc == EINTR && alarm_caught)
899                                         break;
900                                 if (rc == ENOENT && ignore)
901                                         continue;
902                                 fatal(myrank, "setxattr(%s) error: %s\n",
903                                       filename, strerror(rc));
904                         }
905
906                         nops++;
907                         DISPLAY_PROGRESS();
908                 }
909                 break;
910         }
911
912         rc = MPI_Barrier(MPI_COMM_WORLD);
913         if (rc != MPI_SUCCESS)
914                fatal(myrank, "prep MPI_Barrier failed: %d\n", rc);
915         curTime = MPI_Wtime();
916         interval = curTime - startTime;
917         rate = (double) (nops) / interval;
918
919         rc = MPI_Reduce(&nops, &ag_ops, 1, MPI_INT, MPI_SUM, 0,
920                         MPI_COMM_WORLD);
921         if (rc != MPI_SUCCESS) {
922                 fatal(myrank, "Failure in MPI_Reduce of total ops.\n");
923         }
924
925         rc = MPI_Reduce(&interval, &ag_interval, 1, MPI_DOUBLE, MPI_SUM, 0,
926                         MPI_COMM_WORLD);
927         if (rc != MPI_SUCCESS) {
928                 fatal(myrank, "Failure in MPI_Reduce of total interval.\n");
929         }
930
931         rc = MPI_Reduce(&rate, &ag_rate, 1, MPI_DOUBLE, MPI_SUM, 0,
932                         MPI_COMM_WORLD);
933         if (rc != MPI_SUCCESS) {
934                 fatal(myrank, "Failure in MPI_Reduce of aggregated rate.\n");
935         }
936
937         if (myrank == 0) {
938                 curTime = MPI_Wtime();
939                 interval = curTime - startTime;
940                 effective_rate = (double) ag_ops / interval;
941                 avg_rate = (double) ag_ops / ag_interval;
942
943                 printf("Rate: %.2f eff %.2f aggr %.2f avg client %ss/sec "
944                        "(total: %d threads %d %ss %d dirs %d threads/dir %.2f secs)\n",
945                        effective_rate, ag_rate, avg_rate, cmd, nthreads, ag_ops,
946                        cmd, ndirs, dirthreads, interval);
947                 if (mode == UNLINK && !recreate && !ignore && ag_ops != nfiles)
948                         printf("Warning: only unlinked %d files instead of %d"
949                                "\n", ag_ops, nfiles);
950         }
951
952         if (recreate) {
953                 for (begin = beginsave; begin <= end; begin += dirthreads) {
954                         sprintf(filename, filefmt, begin);
955                         if ((fd = open(filename, openflags, 0644)) < 0) {
956                                 rc = errno;
957                                 if (rc == EEXIST)
958                                         break;
959                                 fatal(myrank, "recreate open(%s) error: %s\n",
960                                       filename, strerror(rc));
961                         }
962
963                         close(fd);
964                 }
965         }
966
967         timestamp = time(0);
968         if ((myrank == 0) || debug) {
969                 printf("%d: %s finished at %s",
970                        myrank, hostname, ctime(&timestamp));
971         }
972
973         MPI_Finalize();
974         return(0);
975 }