Whamcloud - gitweb
LU-12043 llite: make sure readahead cover current read
[fs/lustre-release.git] / lustre / tests / fsx.c
1 /*
2  * Copyright (c) 1998-2001 Apple Computer, Inc. All rights reserved.
3  *
4  * Copyright (c) 2012, Intel Corporation.
5  *
6  * @APPLE_LICENSE_HEADER_START@
7  *
8  * The contents of this file constitute Original Code as defined in and
9  * are subject to the Apple Public Source License Version 1.1 (the
10  * "License").  You may not use this file except in compliance with the
11  * License.  Please obtain a copy of the License at
12  * http://www.apple.com/publicsource and read it before using this file.
13  *
14  * This Original Code and all software distributed under the License are
15  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
19  * License for the specific language governing rights and limitations
20  * under the License.
21  *
22  * @APPLE_LICENSE_HEADER_END@
23  *
24  *      File:   fsx.c
25  *      Author: Avadis Tevanian, Jr.
26  *
27  *      File system exerciser.
28  *
29  *      Rewrite and enhancements 1998-2001 Conrad Minshall -- conrad@mac.com
30  *
31  *      Various features from Joe Sokol, Pat Dirks, and Clark Warner.
32  *
33  *      Small changes to work under Linux -- davej@suse.de
34  *
35  *      Sundry porting patches from Guy Harris 12/2001
36  * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.1 2001/12/20 04:15:57 jkh Exp $
37  */
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #if defined(_UWIN) || defined(__linux__)
41 # include <sys/param.h>
42 # include <limits.h>
43 # include <time.h>
44 # include <strings.h>
45 # include <sys/time.h>
46 #endif
47 #include <fcntl.h>
48 #include <sys/mman.h>
49 #ifndef MAP_FILE
50 # define MAP_FILE 0
51 #endif
52 #include <limits.h>
53 #include <signal.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58 #include <stdarg.h>
59 #include <errno.h>
60
61 #define NUMPRINTCOLUMNS 32      /* # columns of data to print on each line */
62
63 /*
64  * Each test run will work with one or more separate file descriptors for the
65  * same file.  This allows testing cache coherency across multiple mountpoints
66  * of the same network filesystem on a single client.
67  */
68 struct test_file {
69         char *path;
70         int fd;
71 } *test_files = NULL;
72
73 int num_test_files;
74 enum fd_iteration_policy {
75         FD_SINGLE,
76         FD_ROTATE,
77         FD_RANDOM,
78 };
79 int fd_policy = FD_RANDOM;
80 int fd_last;
81
82 /*
83  *      A log entry is an operation and a bunch of arguments.
84  */
85
86 struct log_entry {
87         int                     operation;
88         int                     args[3];
89         struct timeval          tv;
90         const struct test_file  *tf;
91 };
92
93 #define LOGSIZE 100000
94
95 struct log_entry        oplog[LOGSIZE]; /* the log */
96 int                     logptr = 0;     /* current position in log */
97 int                     logcount = 0;   /* total ops */
98
99 /*
100  *      Define operations
101  */
102
103 #define OP_READ         1
104 #define OP_WRITE        2
105 #define OP_TRUNCATE     3
106 #define OP_CLOSEOPEN    4
107 #define OP_MAPREAD      5
108 #define OP_MAPWRITE     6
109 #define OP_SKIPPED      7
110
111 int page_size;
112 int page_mask;
113
114 char    *original_buf;                  /* a pointer to the original data */
115 char    *good_buf;                      /* a pointer to the correct data */
116 char    *temp_buf;                      /* a pointer to the current data */
117 char    *fname;                         /* name of our test file */
118 char    logfile[PATH_MAX];              /* name of our log file */
119 char    goodfile[PATH_MAX];             /* name of our test file */
120
121 off_t           file_size = 0;
122 off_t           biggest = 0;
123 char            state[256];
124 unsigned long   testcalls = 0;          /* calls to function "test" */
125
126 long    simulatedopcount = 0;           /* -b flag */
127 int     closeprob = 0;                  /* -c flag */
128 int     debug = 0;                      /* -d flag */
129 long    debugstart = 0;                 /* -D flag */
130 long    maxfilelen = 256 * 1024;        /* -l flag */
131 int     sizechecks = 1;                 /* -n flag disables them */
132 int     maxoplen = 64 * 1024;           /* -o flag */
133 int     quiet = 0;                      /* -q flag */
134 long    progressinterval = 0;           /* -p flag */
135 int     readbdy = 1;                    /* -r flag */
136 int     style = 0;                      /* -s flag */
137 int     truncbdy = 1;                   /* -t flag */
138 int     writebdy = 1;                   /* -w flag */
139 long    monitorstart = -1;              /* -m flag */
140 long    monitorend = -1;                /* -m flag */
141 int     lite = 0;                       /* -L flag */
142 long    numops = -1;                    /* -N flag */
143 int     randomoplen = 1;                /* -O flag disables it */
144 int     seed = 1;                       /* -S flag */
145 int     mapped_writes = 1;              /* -W flag disables */
146 int     mapped_reads = 1;               /* -R flag disables it */
147 int     fsxgoodfd = 0;
148 int     o_direct;                       /* -Z */
149 FILE *  fsxlogf = NULL;
150 int badoff = -1;
151
152 void
153 vwarnc(code, fmt, ap)
154         int code;
155         const char *fmt;
156         va_list ap;
157 {
158         fprintf(stderr, "fsx: ");
159         if (fmt != NULL) {
160                 vfprintf(stderr, fmt, ap);
161                 fprintf(stderr, ": ");
162         }
163         fprintf(stderr, "%s\n", strerror(code));
164 }
165
166
167 void
168 warn(const char * fmt, ...)
169 {
170         va_list ap;
171         va_start(ap, fmt);
172         vwarnc(errno, fmt, ap);
173         va_end(ap);
174 }
175
176
177 void
178 __attribute__((format(printf, 1, 2)))
179 prt(char *fmt, ...)
180 {
181         va_list args;
182
183         va_start(args, fmt);
184         vfprintf(stdout, fmt, args);
185         va_end(args);
186
187         if (fsxlogf) {
188                 va_start(args, fmt);
189                 vfprintf(fsxlogf, fmt, args);
190                 va_end(args);
191         }
192 }
193
194 void
195 prterr(char *prefix)
196 {
197         prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno));
198 }
199
200
201 void
202 log4(int operation, int arg0, int arg1, int arg2, struct timeval *tv,
203         const struct test_file *tf)
204 {
205         struct log_entry *le;
206
207         le = &oplog[logptr];
208         le->operation = operation;
209         le->args[0] = arg0;
210         le->args[1] = arg1;
211         le->args[2] = arg2;
212         le->tv = *tv;
213         le->tf = tf;
214         logptr++;
215         logcount++;
216         if (logptr >= LOGSIZE)
217                 logptr = 0;
218 }
219
220 const char *
221 fill_tf_buf(const struct test_file *tf)
222 {
223         static int max_tf_len;
224         static char tf_buf[32];
225
226         if (fd_policy == FD_SINGLE)
227                 return "";
228
229         if (max_tf_len == 0)
230                 max_tf_len = snprintf(tf_buf, sizeof(tf_buf) - 1,
231                                       "%u", num_test_files - 1);
232
233         sprintf(tf_buf, "[%0*lu]", max_tf_len,
234                 (unsigned long)(tf - test_files));
235
236         return tf_buf;
237 }
238
239 void
240 logdump(void)
241 {
242         int     i, count, down;
243         struct log_entry        *lp;
244
245         prt("LOG DUMP (%d total operations):\n", logcount);
246         if (logcount < LOGSIZE) {
247                 i = 0;
248                 count = logcount;
249         } else {
250                 i = logptr;
251                 count = LOGSIZE;
252         }
253         for ( ; count > 0; count--) {
254                 int opnum;
255
256                 opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE;
257                 lp = &oplog[i];
258                 prt("%d%s: %lu.%06lu ", opnum, fill_tf_buf(lp->tf),
259                     lp->tv.tv_sec, lp->tv.tv_usec);
260
261                 switch (lp->operation) {
262                 case OP_MAPREAD:
263                         prt("MAPREAD  0x%x thru 0x%x (0x%x bytes)",
264                             lp->args[0], lp->args[0] + lp->args[1] - 1,
265                             lp->args[1]);
266                         if (badoff >= lp->args[0] && badoff <
267                                                      lp->args[0] + lp->args[1])
268                                 prt("\t***RRRR***");
269                         break;
270                 case OP_MAPWRITE:
271                         prt("MAPWRITE 0x%x thru 0x%x (0x%x bytes)",
272                             lp->args[0], lp->args[0] + lp->args[1] - 1,
273                             lp->args[1]);
274                         if (badoff >= lp->args[0] && badoff <
275                                                      lp->args[0] + lp->args[1])
276                                 prt("\t******WWWW");
277                         break;
278                 case OP_READ:
279                         prt("READ     0x%x thru 0x%x (0x%x bytes)",
280                             lp->args[0], lp->args[0] + lp->args[1] - 1,
281                             lp->args[1]);
282                         if (badoff >= lp->args[0] &&
283                             badoff < lp->args[0] + lp->args[1])
284                                 prt("\t***RRRR***");
285                         break;
286                 case OP_WRITE:
287                         prt("WRITE    0x%x thru 0x%x (0x%x bytes)",
288                             lp->args[0], lp->args[0] + lp->args[1] - 1,
289                             lp->args[1]);
290                         if (lp->args[0] > lp->args[2])
291                                 prt(" HOLE");
292                         else if (lp->args[0] + lp->args[1] > lp->args[2])
293                                 prt(" EXTEND");
294                         if ((badoff >= lp->args[0] || badoff >=lp->args[2]) &&
295                             badoff < lp->args[0] + lp->args[1])
296                                 prt("\t***WWWW");
297                         break;
298                 case OP_TRUNCATE:
299                         down = lp->args[0] < lp->args[1];
300                         prt("TRUNCATE %s\tfrom 0x%x to 0x%x",
301                             down ? "DOWN" : "UP", lp->args[1], lp->args[0]);
302                         if (badoff >= lp->args[!down] &&
303                             badoff < lp->args[!!down])
304                                 prt("\t******WWWW");
305                         break;
306                 case OP_CLOSEOPEN:
307                         prt("CLOSE/OPEN");
308                         break;
309                 case OP_SKIPPED:
310                         prt("SKIPPED (no operation)");
311                         break;
312                 default:
313                         prt("BOGUS LOG ENTRY (operation code = %d)!",
314                             lp->operation);
315                 }
316                 prt("\n");
317                 i++;
318                 if (i == LOGSIZE)
319                         i = 0;
320         }
321 }
322
323
324 void
325 save_buffer(char *buffer, off_t bufferlength, int fd)
326 {
327         off_t ret;
328         ssize_t byteswritten;
329
330         if (fd <= 0 || bufferlength == 0)
331                 return;
332
333         if (bufferlength > SSIZE_MAX) {
334                 prt("fsx flaw: overflow in save_buffer\n");
335                 exit(67);
336         }
337         if (lite) {
338                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
339                 if (size_by_seek == (off_t)-1)
340                         prterr("save_buffer: lseek eof");
341                 else if (bufferlength > size_by_seek) {
342                         warn("save_buffer: .fsxgood file too short... will"
343                                 "save 0x%llx bytes instead of 0x%llx\n",
344                                 (unsigned long long)size_by_seek,
345                                 (unsigned long long)bufferlength);
346                         bufferlength = size_by_seek;
347                 }
348         }
349
350         ret = lseek(fd, (off_t)0, SEEK_SET);
351         if (ret == (off_t)-1)
352                 prterr("save_buffer: lseek 0");
353
354         byteswritten = write(fd, buffer, (size_t)bufferlength);
355         if (byteswritten != bufferlength) {
356                 if (byteswritten == -1)
357                         prterr("save_buffer write");
358                 else
359                         warn("save_buffer: short write, 0x%x bytes instead"
360                                 "of 0x%llx\n",
361                              (unsigned)byteswritten,
362                              (unsigned long long)bufferlength);
363         }
364 }
365
366
367 void
368 report_failure(int status)
369 {
370         logdump();
371
372         if (fsxgoodfd) {
373                 if (good_buf) {
374                         save_buffer(good_buf, file_size, fsxgoodfd);
375                         prt("Correct content saved for comparison\n");
376                         prt("(maybe hexdump \"%s\" vs \"%s\")\n",
377                             fname, goodfile);
378                 }
379                 close(fsxgoodfd);
380         }
381         exit(status);
382 }
383
384
385 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
386                                         *(((unsigned char *)(cp)) + 1)))
387
388 void
389 check_buffers(unsigned offset, unsigned size)
390 {
391         unsigned char c, t;
392         unsigned i = 0;
393         unsigned n = 0;
394         unsigned op = 0;
395         unsigned bad = 0;
396
397         if (memcmp(good_buf + offset, temp_buf, size) != 0) {
398                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x\n",
399                     offset, size);
400                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
401                 while (size > 0) {
402                         c = good_buf[offset];
403                         t = temp_buf[i];
404                         if (c != t) {
405                                 if (n == 0) {
406                                         bad = short_at(&temp_buf[i]);
407                                         prt("%#07x\t%#06x\t%#06x", offset,
408                                             short_at(&good_buf[offset]), bad);
409                                         op = temp_buf[offset & 1 ? i+1 : i];
410                                 }
411                                 n++;
412                                 badoff = offset;
413                         }
414                         offset++;
415                         i++;
416                         size--;
417                 }
418                 if (n) {
419                         prt("\t%#7x\n", n);
420                         if (bad)
421                                 prt("operation# (mod 256) for the bad data"
422                                         "may be %u\n", ((unsigned)op & 0xff));
423                         else
424                                 prt("operation# (mod 256) for the bad data"
425                                         "unknown, check HOLE and EXTEND ops\n");
426                 } else
427                         prt("????????????????\n");
428                 report_failure(110);
429         }
430 }
431
432 struct test_file *
433 get_tf(void)
434 {
435         unsigned index = 0;
436
437         switch (fd_policy) {
438                 case FD_ROTATE:
439                         index = fd_last++;
440                         break;
441                 case FD_RANDOM:
442                         index = random();
443                         break;
444                 case FD_SINGLE:
445                         index = 0;
446                         break;
447                 default:
448                         prt("unknown policy");
449                         exit(1);
450                         break;
451         }
452         return &test_files[ index % num_test_files ];
453 }
454
455 void
456 assign_fd_policy(char *policy)
457 {
458         if (!strcmp(policy, "random"))
459                 fd_policy = FD_RANDOM;
460         else if (!strcmp(policy, "rotate"))
461                 fd_policy = FD_ROTATE;
462         else {
463                 prt("unknown -I policy: '%s'\n", policy);
464                 exit(1);
465         }
466 }
467
468 int
469 get_fd(void)
470 {
471         struct test_file *tf = get_tf();
472         return tf->fd;
473 }
474
475 static const char *my_basename(const char *path)
476 {
477         char *c = strrchr(path, '/');
478
479         return c ? c++ : path;
480 }
481
482 void
483 open_test_files(char **argv, int argc)
484 {
485         struct test_file *tf;
486         int i;
487
488         num_test_files = argc;
489         if (num_test_files == 1)
490                 fd_policy = FD_SINGLE;
491
492         test_files = calloc(num_test_files, sizeof(*test_files));
493         if (test_files == NULL) {
494                 prterr("reallocating space for test files");
495                 exit(1);
496         }
497
498         for (i = 0, tf = test_files; i < num_test_files; i++, tf++) {
499
500                 tf->path = argv[i];
501                 tf->fd = open(tf->path, O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC)|
502                                 o_direct, 0666);
503                 if (tf->fd < 0) {
504                         prterr(tf->path);
505                         exit(91);
506                 }
507         }
508
509         if (quiet || fd_policy == FD_SINGLE)
510                 return;
511
512         for (i = 0, tf = test_files; i < num_test_files; i++, tf++)
513                 prt("fd %d: %s\n", i, tf->path);
514 }
515
516 void
517 close_test_files(void)
518 {
519         int i;
520         struct test_file *tf;
521
522         for (i = 0, tf = test_files; i < num_test_files; i++, tf++) {
523                 if (close(tf->fd)) {
524                         prterr("close");
525                         report_failure(99);
526                 }
527         }
528 }
529
530
531 void
532 check_size(void)
533 {
534         struct stat     statbuf;
535         off_t   size_by_seek;
536         int fd = get_fd();
537
538         if (fstat(fd, &statbuf)) {
539                 prterr("check_size: fstat");
540                 statbuf.st_size = -1;
541         }
542         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
543         if (file_size != statbuf.st_size || file_size != size_by_seek) {
544                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
545                     (unsigned long long)file_size,
546                     (unsigned long long)statbuf.st_size,
547                     (unsigned long long)size_by_seek);
548                 report_failure(120);
549         }
550 }
551
552
553 void
554 check_trunc_hack(void)
555 {
556         struct stat statbuf;
557         int fd = get_fd();
558
559         /* should not ignore ftruncate(2)'s return value */
560         if (ftruncate(fd, (off_t)0) < 0) {
561                 prterr("trunc_hack: ftruncate(0)");
562                 exit(1);
563         }
564         if (ftruncate(fd, (off_t)100000) < 0) {
565                 prterr("trunc_hack: ftruncate(100000)");
566                 exit(1);
567         }
568         if (fstat(fd, &statbuf)) {
569                 prterr("trunc_hack: fstat");
570                 statbuf.st_size = -1;
571         }
572         if (statbuf.st_size != (off_t)100000) {
573                 prt("no extend on truncate! not posix!\n");
574                 exit(130);
575         }
576         if (ftruncate(fd, 0) < 0) {
577                 prterr("trunc_hack: ftruncate(0) (2nd call)");
578                 exit(1);
579         }
580 }
581
582 void
583 output_line(struct test_file *tf, int op, unsigned offset,
584                 unsigned size, struct timeval *tv)
585 {
586         char *ops[] = {
587                 [OP_READ] = "read",
588                 [OP_WRITE] = "write",
589                 [OP_TRUNCATE] = "trunc from",
590                 [OP_MAPREAD] = "mapread",
591                 [OP_MAPWRITE] = "mapwrite",
592         };
593
594         /* W. */
595         if (!(!quiet && ((progressinterval &&
596                         testcalls % progressinterval == 0) ||
597                        (debug &&
598                         (monitorstart == -1 ||
599                          (offset + size > monitorstart &&
600                           (monitorend == -1 || offset <= monitorend)))))))
601                 return;
602
603         prt("%06lu%s %lu.%06lu %-10s %#08x %s %#08x\t(0x%x bytes)\n",
604                 testcalls, fill_tf_buf(tf), tv->tv_sec, tv->tv_usec,
605                 ops[op],
606                 offset, op == OP_TRUNCATE ? " to " : "thru",
607                 offset + size - 1, size);
608 }
609
610 void
611 doread(unsigned offset, unsigned size)
612 {
613         struct timeval t;
614         off_t ret;
615         unsigned iret;
616         struct test_file *tf = get_tf();
617         int fd = tf->fd;
618
619         offset -= offset % readbdy;
620         if (o_direct)
621                 size -= size % readbdy;
622         gettimeofday(&t, NULL);
623         if (size == 0) {
624                 if (!quiet && testcalls > simulatedopcount && !o_direct)
625                         prt("skipping zero size read\n");
626                 log4(OP_SKIPPED, OP_READ, offset, size, &t, tf);
627                 return;
628         }
629         if (size + offset > file_size) {
630                 if (!quiet && testcalls > simulatedopcount)
631                         prt("skipping seek/read past end of file\n");
632                 log4(OP_SKIPPED, OP_READ, offset, size, &t, tf);
633                 return;
634         }
635
636         log4(OP_READ, offset, size, 0, &t, tf);
637
638         if (testcalls <= simulatedopcount)
639                 return;
640
641         output_line(tf, OP_READ, offset, size, &t);
642
643         ret = lseek(fd, (off_t)offset, SEEK_SET);
644         if (ret == (off_t)-1) {
645                 prterr("doread: lseek");
646                 report_failure(140);
647         }
648         iret = read(fd, temp_buf, size);
649         if (!quiet && (debug > 1 &&
650                         (monitorstart == -1 ||
651                          (offset + size > monitorstart &&
652                           (monitorend == -1 || offset <= monitorend))))) {
653                 gettimeofday(&t, NULL);
654                 prt("       %lu.%06lu read done\n", t.tv_sec, t.tv_usec);
655         }
656         if (iret != size) {
657                 if (iret == -1)
658                         prterr("doread: read");
659                 else
660                         prt("short read: 0x%x bytes instead of 0x%x\n",
661                             iret, size);
662                 report_failure(141);
663         }
664         check_buffers(offset, size);
665 }
666
667
668 void
669 domapread(unsigned offset, unsigned size)
670 {
671         struct timeval t;
672         unsigned pg_offset;
673         unsigned map_size;
674         char    *p;
675         struct test_file *tf = get_tf();
676         int fd = tf->fd;
677
678         offset -= offset % readbdy;
679         gettimeofday(&t, NULL);
680         if (size == 0) {
681                 if (!quiet && testcalls > simulatedopcount)
682                         prt("skipping zero size read\n");
683                 log4(OP_SKIPPED, OP_MAPREAD, offset, size, &t, tf);
684                 return;
685         }
686         if (size + offset > file_size) {
687                 if (!quiet && testcalls > simulatedopcount)
688                         prt("skipping seek/read past end of file\n");
689                 log4(OP_SKIPPED, OP_MAPREAD, offset, size, &t, tf);
690                 return;
691         }
692
693         log4(OP_MAPREAD, offset, size, 0, &t, tf);
694
695         if (testcalls <= simulatedopcount)
696                 return;
697
698         output_line(tf, OP_MAPREAD, offset, size, &t);
699
700         pg_offset = offset & page_mask;
701         map_size  = pg_offset + size;
702
703         if ((p = mmap(0, map_size, PROT_READ, MAP_FILE | MAP_SHARED, fd,
704                       (off_t)(offset - pg_offset))) == MAP_FAILED) {
705                 prterr("domapread: mmap");
706                 report_failure(190);
707         }
708         if (!quiet && (debug > 1 &&
709                         (monitorstart == -1 ||
710                          (offset + size > monitorstart &&
711                           (monitorend == -1 || offset <= monitorend))))) {
712                 gettimeofday(&t, NULL);
713                 prt("       %lu.%06lu mmap done\n", t.tv_sec, t.tv_usec);
714         }
715         memcpy(temp_buf, p + pg_offset, size);
716         if (!quiet && (debug > 1 &&
717                         (monitorstart == -1 ||
718                          (offset + size > monitorstart &&
719                           (monitorend == -1 || offset <= monitorend))))) {
720                 gettimeofday(&t, NULL);
721                 prt("       %lu.%06lu memcpy done\n", t.tv_sec, t.tv_usec);
722         }
723         if (munmap(p, map_size) != 0) {
724                 prterr("domapread: munmap");
725                 report_failure(191);
726         }
727         if (!quiet && (debug > 1 &&
728                         (monitorstart == -1 ||
729                          (offset + size > monitorstart &&
730                           (monitorend == -1 || offset <= monitorend))))) {
731                 gettimeofday(&t, NULL);
732                 prt("       %lu.%06lu munmap done\n", t.tv_sec, t.tv_usec);
733         }
734
735         check_buffers(offset, size);
736 }
737
738
739 void
740 gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size)
741 {
742         while (size--) {
743                 good_buf[offset] = testcalls % 256;
744                 if (offset % 2)
745                         good_buf[offset] += original_buf[offset];
746                 offset++;
747         }
748 }
749
750
751 void
752 dowrite(unsigned offset, unsigned size)
753 {
754         struct timeval t;
755         off_t ret;
756         unsigned iret;
757         struct test_file *tf = get_tf();
758         int fd = tf->fd;
759
760         offset -= offset % writebdy;
761         if (o_direct)
762                 size -= size % writebdy;
763         gettimeofday(&t, NULL);
764         if (size == 0) {
765                 if (!quiet && testcalls > simulatedopcount && !o_direct)
766                         prt("skipping zero size write\n");
767                 log4(OP_SKIPPED, OP_WRITE, offset, size, &t, tf);
768                 return;
769         }
770
771         log4(OP_WRITE, offset, size, file_size, &t, tf);
772
773         gendata(original_buf, good_buf, offset, size);
774         if (file_size < offset + size) {
775                 if (file_size < offset)
776                         memset(good_buf + file_size, '\0', offset - file_size);
777                 file_size = offset + size;
778                 if (lite) {
779                         warn("Lite file size bug in fsx!");
780                         report_failure(149);
781                 }
782         }
783
784         if (testcalls <= simulatedopcount)
785                 return;
786
787         output_line(tf, OP_WRITE, offset, size, &t);
788
789         ret = lseek(fd, (off_t)offset, SEEK_SET);
790         if (ret == (off_t)-1) {
791                 prterr("dowrite: lseek");
792                 report_failure(150);
793         }
794         iret = write(fd, good_buf + offset, size);
795         if (!quiet && (debug > 1 &&
796                         (monitorstart == -1 ||
797                          (offset + size > monitorstart &&
798                           (monitorend == -1 || offset <= monitorend))))) {
799                 gettimeofday(&t, NULL);
800                 prt("       %lu.%06lu write done\n", t.tv_sec, t.tv_usec);
801         }
802         if (iret != size) {
803                 if (iret == -1)
804                         prterr("dowrite: write");
805                 else
806                         prt("short write: 0x%x bytes instead of 0x%x\n",
807                             iret, size);
808                 report_failure(151);
809         }
810 }
811
812
813 void
814 domapwrite(unsigned offset, unsigned size)
815 {
816         struct timeval t;
817         unsigned pg_offset;
818         unsigned map_size;
819         off_t    cur_filesize;
820         char    *p;
821         struct test_file *tf = get_tf();
822         int fd = tf->fd;
823
824         offset -= offset % writebdy;
825         gettimeofday(&t, NULL);
826         if (size == 0) {
827                 if (!quiet && testcalls > simulatedopcount)
828                         prt("skipping zero size write\n");
829                 log4(OP_SKIPPED, OP_MAPWRITE, offset, size, &t, tf);
830                 return;
831         }
832         cur_filesize = file_size;
833
834         log4(OP_MAPWRITE, offset, size, 0, &t, tf);
835
836         gendata(original_buf, good_buf, offset, size);
837         if (file_size < offset + size) {
838                 if (file_size < offset)
839                         memset(good_buf + file_size, '\0', offset - file_size);
840                 file_size = offset + size;
841                 if (lite) {
842                         warn("Lite file size bug in fsx!");
843                         report_failure(200);
844                 }
845         }
846
847         if (testcalls <= simulatedopcount)
848                 return;
849
850         output_line(tf, OP_MAPWRITE, offset, size, &t);
851
852         if (file_size > cur_filesize) {
853                 if (ftruncate(fd, file_size) == -1) {
854                         prterr("domapwrite: ftruncate");
855                         exit(201);
856                 }
857                 if (!quiet && (debug > 1 &&
858                                (monitorstart == -1 ||
859                                 (offset + size > monitorstart &&
860                                  (monitorend == -1 || offset <= monitorend))))) {
861                         gettimeofday(&t, NULL);
862                         prt("       %lu.%06lu truncate done\n", t.tv_sec, t.tv_usec);
863         }
864         }
865         pg_offset = offset & page_mask;
866         map_size  = pg_offset + size;
867
868         if ((p = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_FILE|MAP_SHARED,
869                       fd, (off_t)(offset - pg_offset))) == MAP_FAILED) {
870                 prterr("domapwrite: mmap");
871                 report_failure(202);
872         }
873         if (!quiet && (debug > 1 &&
874                         (monitorstart == -1 ||
875                          (offset + size > monitorstart &&
876                           (monitorend == -1 || offset <= monitorend))))) {
877                 gettimeofday(&t, NULL);
878                 prt("       %lu.%06lu mmap done\n", t.tv_sec, t.tv_usec);
879         }
880         memcpy(p + pg_offset, good_buf + offset, size);
881         if (!quiet && (debug > 1 &&
882                         (monitorstart == -1 ||
883                          (offset + size > monitorstart &&
884                           (monitorend == -1 || offset <= monitorend))))) {
885                 gettimeofday(&t, NULL);
886                 prt("       %lu.%06lu memcpy done\n", t.tv_sec, t.tv_usec);
887         }
888         if (msync(p, map_size, 0) != 0) {
889                 prterr("domapwrite: msync");
890                 report_failure(203);
891         }
892         if (!quiet && (debug > 1 &&
893                         (monitorstart == -1 ||
894                          (offset + size > monitorstart &&
895                           (monitorend == -1 || offset <= monitorend))))) {
896                 gettimeofday(&t, NULL);
897                 prt("       %lu.%06lu msync done\n", t.tv_sec, t.tv_usec);
898         }
899         if (munmap(p, map_size) != 0) {
900                 prterr("domapwrite: munmap");
901                 report_failure(204);
902         }
903         if (!quiet && (debug > 1 &&
904                         (monitorstart == -1 ||
905                          (offset + size > monitorstart &&
906                           (monitorend == -1 || offset <= monitorend))))) {
907                 gettimeofday(&t, NULL);
908                 prt("       %lu.%06lu munmap done\n", t.tv_sec, t.tv_usec);
909         }
910 }
911
912
913 void
914 dotruncate(unsigned size)
915 {
916         struct timeval t;
917         int oldsize = file_size;
918         struct test_file *tf = get_tf();
919         int fd = tf->fd;
920
921         size -= size % truncbdy;
922         gettimeofday(&t, NULL);
923         if (size > biggest) {
924                 biggest = size;
925                 if (!quiet && testcalls > simulatedopcount)
926                         prt("truncating to largest ever: 0x%x\n", size);
927         }
928
929         log4(OP_TRUNCATE, size, (unsigned)file_size, 0, &t, tf);
930
931         if (size > file_size)
932                 memset(good_buf + file_size, '\0', size - file_size);
933         file_size = size;
934
935         if (testcalls <= simulatedopcount)
936                 return;
937
938         output_line(tf, OP_TRUNCATE, oldsize, size, &t);
939
940         if (ftruncate(fd, (off_t)size) == -1) {
941                 prt("ftruncate1: %x\n", size);
942                 prterr("dotruncate: ftruncate");
943                 report_failure(160);
944         }
945         if (!quiet && debug > 1) {
946                 gettimeofday(&t, NULL);
947                 prt("       %lu.%06lu trunc done\n", t.tv_sec, t.tv_usec);
948         }
949 }
950
951
952 void
953 writefileimage()
954 {
955         ssize_t iret;
956         int fd = get_fd();
957
958         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
959                 prterr("writefileimage: lseek");
960                 report_failure(171);
961         }
962         iret = write(fd, good_buf, file_size);
963         if ((off_t)iret != file_size) {
964                 if (iret == -1)
965                         prterr("writefileimage: write");
966                 else
967                         prt("short write: 0x%lx bytes instead of 0x%llx\n",
968                             (unsigned long)iret,
969                             (unsigned long long)file_size);
970                 report_failure(172);
971         }
972         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
973                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
974                 prterr("writefileimage: ftruncate");
975                 report_failure(173);
976         }
977 }
978
979
980 void
981 docloseopen(void)
982 {
983         struct timeval t;
984         struct test_file *tf = get_tf();
985
986         if (testcalls <= simulatedopcount)
987                 return;
988
989         gettimeofday(&t, NULL);
990         log4(OP_CLOSEOPEN, file_size, (unsigned)file_size, 0, &t, tf);
991
992         if (debug)
993                 prt("%06lu %lu.%06lu close/open\n", testcalls, t.tv_sec,
994                     t.tv_usec);
995         if (close(tf->fd)) {
996                 prterr("docloseopen: close");
997                 report_failure(180);
998         }
999         if (!quiet && debug > 1) {
1000                 gettimeofday(&t, NULL);
1001                 prt("       %lu.%06lu close done\n", t.tv_sec, t.tv_usec);
1002         }
1003         tf->fd = open(tf->path, O_RDWR|o_direct, 0);
1004         if (tf->fd < 0) {
1005                 prterr("docloseopen: open");
1006                 report_failure(181);
1007         }
1008         if (!quiet && debug > 1) {
1009                 gettimeofday(&t, NULL);
1010                 prt("       %lu.%06lu open done\n", t.tv_sec, t.tv_usec);
1011         }
1012 }
1013
1014
1015 void
1016 test(void)
1017 {
1018         unsigned long   offset;
1019         unsigned long   size = maxoplen;
1020         unsigned long   rv = random();
1021         unsigned long   op = rv % (3 + !lite + mapped_writes);
1022
1023         /* turn off the map read if necessary */
1024
1025         if (op == 2 && !mapped_reads)
1026             op = 0;
1027
1028         if (simulatedopcount > 0 && testcalls == simulatedopcount)
1029                 writefileimage();
1030
1031         testcalls++;
1032
1033         if (debugstart > 0 && testcalls >= debugstart)
1034                 debug = 1;
1035
1036         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
1037                 prt("%lu...\n", testcalls);
1038
1039         /*
1040          * READ:        op = 0
1041          * WRITE:       op = 1
1042          * MAPREAD:     op = 2
1043          * TRUNCATE:    op = 3
1044          * MAPWRITE:    op = 3 or 4
1045          */
1046         if (lite ? 0 : op == 3 && (style & 1) == 0) /* vanilla truncate? */
1047                 dotruncate(random() % maxfilelen);
1048         else {
1049                 if (randomoplen)
1050                         size = random() % (maxoplen+1);
1051                 if (lite ? 0 : op == 3)
1052                         dotruncate(size);
1053                 else {
1054                         offset = random();
1055                         if (op == 1 || op == (lite ? 3 : 4)) {
1056                                 offset %= maxfilelen;
1057                                 if (offset + size > maxfilelen)
1058                                         size = maxfilelen - offset;
1059                                 if (op != 1)
1060                                         domapwrite(offset, size);
1061                                 else
1062                                         dowrite(offset, size);
1063                         } else {
1064                                 if (file_size)
1065                                         offset %= file_size;
1066                                 else
1067                                         offset = 0;
1068                                 if (offset + size > file_size)
1069                                         size = file_size - offset;
1070                                 if (op != 0)
1071                                         domapread(offset, size);
1072                                 else
1073                                         doread(offset, size);
1074                         }
1075                 }
1076         }
1077         if (sizechecks && testcalls > simulatedopcount)
1078                 check_size();
1079         if (closeprob && (rv >> 3) < (1 << 28) / closeprob)
1080                 docloseopen();
1081 }
1082
1083
1084 void
1085 cleanup(sig)
1086         int     sig;
1087 {
1088         if (sig)
1089                 prt("signal %d\n", sig);
1090         prt("testcalls = %lu\n", testcalls);
1091         exit(sig);
1092 }
1093
1094
1095 void
1096 usage(void)
1097 {
1098         fprintf(stdout, "usage: %s",
1099                 "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m "
1100 "start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t "
1101 "truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] "
1102 "[ -I random|rotate ] fname [additional paths to fname..]\n"
1103 "       -b opnum: beginning operation number (default 1)\n"
1104 "       -c P: 1 in P chance of file close+open at each op (default infinity)\n"
1105 "       -d: debug output for all operations [-d -d = more debugging]\n"
1106 "       -l flen: the upper bound on file size (default 262144)\n"
1107 "       -m startop:endop: monitor (print debug output) specified byte rang"
1108 "(default 0:infinity)\n"
1109 "       -n: no verifications of file size\n"
1110 "       -o oplen: the upper bound on operation size (default 65536)\n"
1111 "       -p progressinterval: debug output at specified operation interval\n"
1112 "       -q: quieter operation\n"
1113 "       -r readbdy: 4096 would make reads page aligned (default 1)\n"
1114 "       -s style: 1 gives smaller truncates (default 0)\n"
1115 "       -t truncbdy: 4096 would make truncates page aligned (default 1)\n"
1116 "       -w writebdy: 4096 would make writes page aligned (default 1)\n"
1117 "       -D startingop: debug output starting at specified operation\n"
1118 "       -L: fsxLite - no file creations & no file size changes\n"
1119 "       -N numops: total # operations to do (default infinity)\n"
1120 "       -O: use oplen (see -o flag) for every op (default random)\n"
1121 "       -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n"
1122 "       -S seed: for random # generator (default 1) 0 gets timestamp\n"
1123 "       -W: mapped write operations DISabled\n"
1124 "       -R: read() system calls only (mapped reads disabled)\n"
1125 "       -Z: O_DIRECT (use -R, -W, -r and -w too)\n"
1126 "       -I: When multiple paths to the file are given each operation uses\n"
1127 "           a different path.  Iterate through them in order with 'rotate'\n"
1128 "           or chose then at 'random'.  (defaults to random)\n"
1129 "       fname: this filename is REQUIRED (no default)\n");
1130         exit(90);
1131 }
1132
1133
1134 int
1135 getnum(char *s, char **e)
1136 {
1137         int ret = -1;
1138
1139         *e = (char *) 0;
1140         ret = strtol(s, e, 0);
1141         if (*e)
1142                 switch (**e) {
1143                 case 'b':
1144                 case 'B':
1145                         ret *= 512;
1146                         *e = *e + 1;
1147                         break;
1148                 case 'k':
1149                 case 'K':
1150                         ret *= 1024;
1151                         *e = *e + 1;
1152                         break;
1153                 case 'm':
1154                 case 'M':
1155                         ret *= 1024*1024;
1156                         *e = *e + 1;
1157                         break;
1158                 case 'w':
1159                 case 'W':
1160                         ret *= 4;
1161                         *e = *e + 1;
1162                         break;
1163                 }
1164         return (ret);
1165 }
1166
1167 int
1168 main(int argc, char **argv)
1169 {
1170         int     i, style, ch;
1171         char    *endp;
1172         int  dirpath = 0;
1173
1174         goodfile[0] = 0;
1175         logfile[0] = 0;
1176
1177         page_size = getpagesize();
1178         page_mask = page_size - 1;
1179
1180         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
1181
1182         while ((ch = getopt(argc, argv,
1183                                 "b:c:dl:m:no:p:qr:s:t:w:D:I:LN:OP:RS:WZ"))
1184                != EOF)
1185                 switch (ch) {
1186                 case 'b':
1187                         simulatedopcount = getnum(optarg, &endp);
1188                         if (!quiet)
1189                                 fprintf(stdout, "Will begin at operation"
1190                                         "%ld\n",
1191                                         simulatedopcount);
1192                         if (simulatedopcount == 0)
1193                                 usage();
1194                         simulatedopcount -= 1;
1195                         break;
1196                 case 'c':
1197                         closeprob = getnum(optarg, &endp);
1198                         if (!quiet)
1199                                 fprintf(stdout,
1200                                         "Chance of close/open is 1 in %d\n",
1201                                         closeprob);
1202                         if (closeprob <= 0)
1203                                 usage();
1204                         break;
1205                 case 'd':
1206                         debug++;
1207                         break;
1208                 case 'l':
1209                         maxfilelen = getnum(optarg, &endp);
1210                         if (maxfilelen <= 0)
1211                                 usage();
1212                         break;
1213                 case 'm':
1214                         monitorstart = getnum(optarg, &endp);
1215                         if (monitorstart < 0)
1216                                 usage();
1217                         if (!endp || *endp++ != ':')
1218                                 usage();
1219                         monitorend = getnum(endp, &endp);
1220                         if (monitorend < 0)
1221                                 usage();
1222                         if (monitorend == 0)
1223                                 monitorend = -1; /* aka infinity */
1224                         debug = 1;
1225                 case 'n':
1226                         sizechecks = 0;
1227                         break;
1228                 case 'o':
1229                         maxoplen = getnum(optarg, &endp);
1230                         if (maxoplen <= 0)
1231                                 usage();
1232                         break;
1233                 case 'p':
1234                         progressinterval = getnum(optarg, &endp);
1235                         if (progressinterval < 0)
1236                                 usage();
1237                         break;
1238                 case 'q':
1239                         quiet = 1;
1240                         break;
1241                 case 'r':
1242                         readbdy = getnum(optarg, &endp);
1243                         if (readbdy <= 0)
1244                                 usage();
1245                         break;
1246                 case 's':
1247                         style = getnum(optarg, &endp);
1248                         if (style < 0 || style > 1)
1249                                 usage();
1250                         break;
1251                 case 't':
1252                         truncbdy = getnum(optarg, &endp);
1253                         if (truncbdy <= 0)
1254                                 usage();
1255                         break;
1256                 case 'w':
1257                         writebdy = getnum(optarg, &endp);
1258                         if (writebdy <= 0)
1259                                 usage();
1260                         break;
1261                 case 'D':
1262                         debugstart = getnum(optarg, &endp);
1263                         if (debugstart < 1)
1264                                 usage();
1265                         break;
1266                 case 'I':
1267                         assign_fd_policy(optarg);
1268                         break;
1269                 case 'L':
1270                         lite = 1;
1271                         break;
1272                 case 'N':
1273                         numops = getnum(optarg, &endp);
1274                         if (numops < 0)
1275                                 usage();
1276                         break;
1277                 case 'O':
1278                         randomoplen = 0;
1279                         break;
1280                 case 'P':
1281                         strncpy(goodfile, optarg, sizeof(goodfile) - 1);
1282                         strcat(goodfile, "/");
1283                         strncpy(logfile, optarg, sizeof(logfile) - 1);
1284                         strcat(logfile, "/");
1285                         dirpath = 1;
1286                         break;
1287                 case 'R':
1288                         mapped_reads = 0;
1289                         break;
1290                 case 'S':
1291                         seed = getnum(optarg, &endp);
1292                         if (seed == 0)
1293                                 seed = time(0) % 10000;
1294                         if (!quiet)
1295                                 fprintf(stdout, "Seed set to %d\n", seed);
1296                         if (seed < 0)
1297                                 usage();
1298                         break;
1299                 case 'W':
1300                         mapped_writes = 0;
1301                         if (!quiet)
1302                                 fprintf(stdout, "mapped writes DISABLED\n");
1303                         break;
1304                 case 'Z':
1305                         o_direct = O_DIRECT;
1306                         break;
1307
1308                 default:
1309                         usage();
1310                         /* NOTREACHED */
1311                 }
1312         argc -= optind;
1313         argv += optind;
1314         if (argc < 1)
1315                 usage();
1316         fname = argv[0];
1317
1318         signal(SIGHUP,  cleanup);
1319         signal(SIGINT,  cleanup);
1320         signal(SIGPIPE, cleanup);
1321         signal(SIGALRM, cleanup);
1322         signal(SIGTERM, cleanup);
1323         signal(SIGXCPU, cleanup);
1324         signal(SIGXFSZ, cleanup);
1325         signal(SIGVTALRM,       cleanup);
1326         signal(SIGUSR1, cleanup);
1327         signal(SIGUSR2, cleanup);
1328
1329         initstate(seed, state, 256);
1330         setstate(state);
1331
1332         open_test_files(argv, argc);
1333
1334         strncat(goodfile, dirpath ? my_basename(fname) : fname, 256);
1335         strcat (goodfile, ".fsxgood");
1336         fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666);
1337         if (fsxgoodfd < 0) {
1338                 prterr(goodfile);
1339                 exit(92);
1340         }
1341         strncat(logfile, dirpath ? my_basename(fname) : fname, 256);
1342         strcat (logfile, ".fsxlog");
1343         fsxlogf = fopen(logfile, "w");
1344         if (fsxlogf == NULL) {
1345                 prterr(logfile);
1346                 exit(93);
1347         }
1348         if (lite) {
1349                 off_t ret;
1350                 int fd = get_fd();
1351                 file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END);
1352                 if (file_size == (off_t)-1) {
1353                         prterr(fname);
1354                         warn("main: lseek eof");
1355                         exit(94);
1356                 }
1357                 ret = lseek(fd, (off_t)0, SEEK_SET);
1358                 if (ret == (off_t)-1) {
1359                         prterr(fname);
1360                         warn("main: lseek 0");
1361                         exit(95);
1362                 }
1363         }
1364         original_buf = (char *) malloc(maxfilelen);
1365         for (i = 0; i < maxfilelen; i++)
1366                 original_buf[i] = random() % 256;
1367         if (o_direct) {
1368                 int ret;
1369
1370                 ret = posix_memalign((void **)&good_buf, writebdy, maxfilelen);
1371                 if (ret) {
1372                         prt("main: posix_memalign failed: %s\n", strerror(ret));
1373                         exit(96);
1374                 }
1375
1376                 ret = posix_memalign((void **)&temp_buf, readbdy, maxoplen);
1377                 if (ret) {
1378                         prt("main: posix_memalign failed: %s\n", strerror(ret));
1379                         exit(97);
1380                 }
1381         } else {
1382                 good_buf = malloc(maxfilelen);
1383                 if (!good_buf) {
1384                         prt("malloc failed.\n");
1385                         exit(98);
1386                 }
1387
1388                 temp_buf = malloc(maxoplen);
1389                 if (!temp_buf) {
1390                         prt("malloc failed.\n");
1391                         exit(99);
1392                 }
1393         }
1394         memset(good_buf, 0, maxfilelen);
1395         memset(temp_buf, 0, maxoplen);
1396
1397         if (lite) {     /* zero entire existing file */
1398                 ssize_t written;
1399                 int fd = get_fd();
1400
1401                 written = write(fd, good_buf, (size_t)maxfilelen);
1402                 if (written != maxfilelen) {
1403                         if (written == -1) {
1404                                 prterr(fname);
1405                                 warn("main: error on write");
1406                         } else
1407                                 warn("main: short write, 0x%x bytes instead"
1408                                         "of 0x%x\n",
1409                                      (unsigned)written, maxfilelen);
1410                         exit(98);
1411                 }
1412         } else
1413                 check_trunc_hack();
1414
1415         while (numops == -1 || numops--)
1416                 test();
1417
1418         close_test_files();
1419         prt("All operations completed A-OK!\n");
1420
1421         exit(0);
1422         return 0;
1423 }