Whamcloud - gitweb
LU-17676 build: configure should prefer to ask if
[fs/lustre-release.git] / lustre / tests / fsx.c
1 /*
2  * Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
3  * Copyright (c) 1998-2001 Apple Computer, Inc. All rights reserved.
4  *
5  * Copyright (c) 2012, Intel Corporation.
6  *
7  * @APPLE_LICENSE_HEADER_START@
8  *
9  * The contents of this file constitute Original Code as defined in and
10  * are subject to the Apple Public Source License Version 1.1 (the
11  * "License").  You may not use this file except in compliance with the
12  * License.  Please obtain a copy of the License at
13  * http://www.apple.com/publicsource and read it before using this file.
14  *
15  * This Original Code and all software distributed under the License are
16  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
20  * License for the specific language governing rights and limitations
21  * under the License.
22  *
23  * @APPLE_LICENSE_HEADER_END@
24  *
25  *      File:   fsx.c
26  *      Author: Avadis Tevanian, Jr.
27  *
28  *      File system exerciser.
29  *
30  *      Rewrite and enhancements 1998-2001 Conrad Minshall -- conrad@mac.com
31  *
32  *      Various features from Joe Sokol, Pat Dirks, and Clark Warner.
33  *
34  *      Small changes to work under Linux -- davej.
35  *
36  *      Sundry porting patches from Guy Harris 12/2001
37  * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.1 2001/12/20 04:15:57 jkh Exp $
38  *
39  *      Checks for mmap last-page zero fill.
40  *
41  *      Add multi-file testing feature -- Zach Brown <zab@clusterfs.com>
42  *
43  *      Add random preallocation calls - Eric Sandeen <sandeen@redhat.com>
44  *
45  * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.2 2003/04/23 23:42:23 jkh Exp $
46  * $DragonFly: src/test/stress/fsx/fsx.c,v 1.2 2005/05/02 19:31:56 dillon Exp $
47  */
48 #ifndef _GNU_SOURCE
49 #define _GNU_SOURCE
50 #endif
51
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 #if defined(_UWIN) || defined(__linux__)
55 # include <sys/param.h>
56 # include <limits.h>
57 # include <time.h>
58 # include <strings.h>
59 #endif
60 #include <sys/time.h>
61 #include <fcntl.h>
62 #include <sys/mman.h>
63 #ifndef MAP_FILE
64 # define MAP_FILE 0
65 #endif
66 #include <limits.h>
67 #include <signal.h>
68 #include <stdio.h>
69 #include <stddef.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <unistd.h>
73 #include <stdarg.h>
74 #include <errno.h>
75 #include <libcfs/util/string.h>
76 #include <setjmp.h>
77
78 #include <linux/lustre/lustre_idl.h>
79 #include <lustre/lustreapi.h>
80
81 /*
82  * Each test run will work with one or more separate file descriptors for the
83  * same file.  This allows testing cache coherency across multiple mountpoints
84  * of the same network filesystem on a single client.
85  */
86 struct test_file {
87         char *path;
88         int fd;
89         int o_direct;
90 } *test_files = NULL, *tf;
91
92 int num_test_files;
93
94 enum fd_iteration_policy {
95         FD_SINGLE,
96         FD_ROTATE,
97         FD_RANDOM,
98 };
99
100 int fd_policy = FD_RANDOM;
101 int fd_last;
102
103 /*
104  *      A log entry is an operation and a bunch of arguments.
105  */
106
107 struct log_entry {
108         int operation;
109         int args[3];
110         struct timeval tv;
111         const struct test_file *tf;
112 };
113
114 #define LOGSIZE 100000
115
116 struct log_entry oplog[LOGSIZE]; /* the log */
117 int logptr; /* current position in log */
118 int logcount; /* total ops */
119 int jmpbuf_good;
120 jmp_buf jmpbuf;
121
122 unsigned int mirror_ids[LUSTRE_MIRROR_COUNT_MAX];
123 /*
124  * Define operations
125  */
126
127 /* common operations */
128 #define OP_READ         0
129 #define OP_WRITE        1
130 #define OP_MAPREAD      2
131 #define OP_MAPWRITE     3
132 #define OP_MAX_LITE     4
133
134 /* !lite operations */
135 #define OP_TRUNCATE             4
136 #define OP_FALLOCATE            5
137 #define OP_PUNCH_HOLE           6
138 #define OP_ZERO_RANGE           7
139 #define OP_CLOSEOPEN            8
140 #define OP_MIRROR_OPS           9
141 #define OP_MAX_FULL             10
142
143 #define MIRROR_EXTEND 0
144 #define MIRROR_SPLIT 1
145 #define MIRROR_RESYNC 2
146 #define MIRROR_OPS 3
147
148 char *mirror_op_str[] = {
149         [MIRROR_EXTEND] = "MIRROR_EXTEND",
150         [MIRROR_SPLIT]  = "MIRROR_SPLIT",
151         [MIRROR_RESYNC] = "MIRROR_RESYNC",
152 };
153
154 #define OP_SKIPPED 101
155 #define OP_DIRECT O_DIRECT
156
157 #ifndef FALLOC_FL_PUNCH_HOLE
158 #define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
159 #endif
160
161 #ifndef FALLOC_FL_KEEP_SIZE
162 #define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
163 #endif
164
165 #ifndef FALLOC_FL_ZERO_RANGE
166 #define FALLOC_FL_ZERO_RANGE 0x10 /* convert range to zeros */
167 #endif
168
169
170 char *original_buf; /* a pointer to the original data */
171 char *good_buf; /* a pointer to the correct data */
172 char *temp_buf; /* a pointer to the current data */
173 char *fname; /* name of our test file */
174 char logfile[PATH_MAX]; /* name of our log file */
175 char goodfile[PATH_MAX]; /* name of our test file */
176
177 struct timeval tv; /* time current operation started */
178 off_t file_size;
179 off_t biggest;
180 char state[256];
181 unsigned long testcalls; /* calls to function "test" */
182
183 long simulatedopcount;                  /* -b flag */
184 int closeprob;                          /* -c flag */
185 int debug ;                             /* -d flag */
186 long debugstart;                        /* -D flag */
187 int flush;                              /* -f flag */
188 int do_fsync;                           /* -y flag */
189 long maxfilelen = 256 * 1024;           /* -l flag */
190 int sizechecks = 1;                     /* -n flag disables them */
191 int maxoplen = 64 * 1024;               /* -o flag */
192 int quiet;                              /* -q flag */
193 long progressinterval;                  /* -p flag */
194 int readbdy = 1;                        /* -r flag */
195 int style;                              /* -s flag */
196 int truncbdy = 1;                       /* -t flag */
197 int writebdy = 1;                       /* -w flag */
198 long monitorstart = -1;                 /* -m flag */
199 long monitorend = -1;                   /* -m flag */
200 long flrmode;                           /* -M flag */
201 int lite;                               /* -L flag */
202 long numops = -1;                       /* -N flag */
203 int randomoplen = 1;                    /* -O flag disables it */
204 int seed = 1;                           /* -S flag */
205 int mapped_writes = 1;                  /* -W flag disables */
206 int fallocate_calls = 1;                /* -F flag disables */
207 int punch_hole_calls = 1;               /* -H flag disables */
208 int zero_range_calls = 1;               /* -z flag disables */
209 int mapped_reads = 1;                   /* -R flag disables it */
210 int fsxgoodfd;
211 int o_direct;                           /* -Z */
212 int fl_keep_size;
213
214 int page_size;
215 int page_mask;
216
217 FILE *fsxlogf;
218 int badoff = -1;
219
220 static void
221 vwarnc(code, fmt, ap)
222         int code;
223         const char *fmt;
224         va_list ap;
225 {
226         fprintf(stderr, "fsx: ");
227         if (fmt) {
228                 vfprintf(stderr, fmt, ap);
229                 fprintf(stderr, ": ");
230         }
231         fprintf(stderr, "%s\n", strerror(code));
232 }
233
234 static void
235 __attribute__((format(__printf__, 1, 2)))
236 warn(const char *fmt, ...)
237 {
238         va_list ap;
239
240         va_start(ap, fmt);
241         vwarnc(errno, fmt, ap);
242         va_end(ap);
243 }
244
245 static void
246 __attribute__((format(__printf__, 1, 2)))
247 prt(char *fmt, ...)
248 {
249         va_list args;
250
251         va_start(args, fmt);
252         vfprintf(stdout, fmt, args);
253         va_end(args);
254
255         if (fsxlogf) {
256                 va_start(args, fmt);
257                 vfprintf(fsxlogf, fmt, args);
258                 va_end(args);
259         }
260 }
261
262 /*
263  * prterr() is now a macro. It internally calls ptrerr_func()
264  * which transparently handles passing of function name.
265  * This version also keeps checkpatch happy.
266  */
267 static void
268 ptrerr_func(const char *func, const char *prefix)
269 {
270         prt("%s: %s%s%s\n", func, prefix, prefix ? ": " : "", strerror(errno));
271 }
272 #define prterr(prefix) ptrerr_func(__func__, prefix)
273
274 static void
275 log4(int operation, int arg0, int arg1, int arg2)
276 {
277         struct log_entry *le;
278
279         le = &oplog[logptr];
280         le->operation = operation;
281         le->args[0] = arg0;
282         le->args[1] = arg1;
283         le->args[2] = arg2;
284         gettimeofday(&tv, NULL);
285         le->tv = tv;
286         le->tf = tf;
287         logptr++;
288         logcount++;
289         if (logptr >= LOGSIZE)
290                 logptr = 0;
291 }
292
293 static const char *
294 fill_tf_buf(const struct test_file *tf)
295 {
296         static int max_tf_len;
297         static char tf_buf[32];
298
299         if (fd_policy == FD_SINGLE)
300                 return "";
301
302         if (max_tf_len == 0)
303                 max_tf_len = scnprintf(tf_buf, sizeof(tf_buf) - 1,
304                                       "%u", num_test_files - 1);
305
306         snprintf(tf_buf, sizeof(tf_buf), "[%0*lu]", max_tf_len,
307                 (unsigned long)(tf - test_files));
308
309         return tf_buf;
310 }
311
312 static void
313 logdump(void)
314 {
315         int i, count, down;
316         struct log_entry *lp;
317         char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
318
319         prt("LOG DUMP (%d total operations):\n", logcount);
320         if (logcount < LOGSIZE) {
321                 i = 0;
322                 count = logcount;
323         } else {
324                 i = logptr;
325                 count = LOGSIZE;
326         }
327         for ( ; count > 0; count--) {
328                 int opnum;
329
330                 opnum = i + 1 + (logcount / LOGSIZE) * LOGSIZE;
331                 lp = &oplog[i];
332                 prt("%d%s: %lu.%06u ", opnum, fill_tf_buf(lp->tf),
333                     lp->tv.tv_sec, (int)lp->tv.tv_usec);
334
335                 switch (lp->operation) {
336                 case OP_MAPREAD:
337                         prt("MAPREAD  0x%05x thru 0x%05x (0x%05x bytes)",
338                             lp->args[0], lp->args[0] + lp->args[1] - 1,
339                             lp->args[1]);
340                         if (badoff >= lp->args[0] && badoff <
341                                                      lp->args[0] + lp->args[1])
342                                 prt("\t***RRRR***");
343                         break;
344                 case OP_MAPWRITE:
345                         prt("MAPWRITE 0x%05x thru 0x%05x (0x%05x bytes)",
346                             lp->args[0], lp->args[0] + lp->args[1] - 1,
347                             lp->args[1]);
348                         if (badoff >= lp->args[0] && badoff <
349                                                      lp->args[0] + lp->args[1])
350                                 prt("\t******WWWW");
351                         break;
352                 case OP_READ:
353                 case OP_READ + OP_DIRECT:
354                         prt("READ%s  0x%05x thru 0x%05x (0x%05x bytes)",
355                             lp->operation & OP_DIRECT ? "_OD" : "   ",
356                             lp->args[0], lp->args[0] + lp->args[1] - 1,
357                             lp->args[1]);
358                         if (badoff >= lp->args[0] &&
359                             badoff < lp->args[0] + lp->args[1])
360                                 prt("\t***RRRR***");
361                         break;
362                 case OP_WRITE:
363                 case OP_WRITE + OP_DIRECT:
364                         prt("WRITE%s 0x%05x thru 0x%05x (0x%05x bytes)",
365                             lp->operation & OP_DIRECT ? "_OD" : "   ",
366                             lp->args[0], lp->args[0] + lp->args[1] - 1,
367                             lp->args[1]);
368                         if (lp->args[0] > lp->args[2])
369                                 prt(" HOLE");
370                         else if (lp->args[0] + lp->args[1] > lp->args[2])
371                                 prt(" EXTEND");
372                         if ((badoff >= lp->args[0] || badoff >= lp->args[2]) &&
373                             badoff < lp->args[0] + lp->args[1])
374                                 prt("\t***WWWW");
375                         break;
376                 case OP_TRUNCATE:
377                         down = lp->args[0] < lp->args[1];
378                         prt("TRUNC%s 0x%05x to 0x%05x",
379                             down ? "_DN" : "_UP", lp->args[1], lp->args[0]);
380                         if (badoff >= lp->args[!down] &&
381                             badoff < lp->args[!!down])
382                                 prt("\t******TTTT");
383                         break;
384                 case OP_FALLOCATE:
385                         /* 0: offset 1: length 2: where alloced */
386                         prt("FALLOC   0x%05x thru 0x%05x\t(0x%05x bytes)%s",
387                             lp->args[0], lp->args[0] + lp->args[1],
388                             lp->args[1], falloc_type[lp->args[2]]);
389                         if (badoff >= lp->args[0] &&
390                             badoff < lp->args[0] + lp->args[1])
391                                 prt("\t******FFFF");
392                         break;
393                 case OP_PUNCH_HOLE:
394                         prt("PUNCH    0x%05x thru 0x%05x\t(0x%05x bytes)",
395                             lp->args[0], lp->args[0] + lp->args[1] - 1,
396                             lp->args[1]);
397                         if (badoff >= lp->args[0] && badoff <
398                                                      lp->args[0] + lp->args[1])
399                                 prt("\t******PPPP");
400                         break;
401                 case OP_ZERO_RANGE:
402                         prt("ZERO     0x%05x thru 0x%05x\t(0x%05x bytes)",
403                             lp->args[0], lp->args[0] + lp->args[1] - 1,
404                             lp->args[1]);
405                         if (badoff >= lp->args[0] && badoff <
406                                                      lp->args[0] + lp->args[1])
407                                 prt("\t******ZZZZ");
408                         break;
409                 case OP_CLOSEOPEN:
410                 case OP_CLOSEOPEN + OP_DIRECT:
411                         prt("CLOSE/OPEN%s",
412                             lp->operation & OP_DIRECT ? "_OD" : "   ");
413                         break;
414                 case OP_MIRROR_OPS: {
415                         prt("%s ", mirror_op_str[lp->args[0]]);
416                         if (lp->args[0] == MIRROR_EXTEND)
417                                 prt("to %d mirrors", lp->args[1] + 1);
418                         else if (lp->args[0] == MIRROR_SPLIT)
419                                 prt("mirror %d to %d mirrors", lp->args[2],
420                                     lp->args[1] - 1);
421                         else if (lp->args[0] == MIRROR_RESYNC)
422                                 prt("%d mirrors", lp->args[1]);
423                         break;
424                 }
425                 case OP_SKIPPED:
426                         prt("SKIPPED (no operation)");
427                         break;
428                 default:
429                         prt("BOGUS LOG ENTRY (operation code = %d)!",
430                             lp->operation);
431                 }
432                 prt("\n");
433                 i++;
434                 if (i == LOGSIZE)
435                         i = 0;
436         }
437 }
438
439 static void
440 save_buffer(char *buffer, off_t bufferlength, int fd)
441 {
442         off_t ret;
443         ssize_t byteswritten;
444
445         if (fd <= 0 || bufferlength == 0)
446                 return;
447
448         if (bufferlength > INT_MAX) {
449                 prt("fsx flaw: overflow in %s\n", __func__);
450                 exit(67);
451         }
452         if (lite) {
453                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
454
455                 if (size_by_seek == (off_t)-1) {
456                         prterr("lseek eof");
457                 } else if (bufferlength > size_by_seek) {
458                         warn("%s: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n",
459                              __func__, (unsigned long long)size_by_seek,
460                              (unsigned long long)bufferlength);
461                         bufferlength = size_by_seek;
462                 }
463         }
464
465         ret = lseek(fd, (off_t)0, SEEK_SET);
466         if (ret == (off_t)-1)
467                 prterr("lseek 0");
468
469         byteswritten = write(fd, buffer, (size_t)bufferlength);
470         if (byteswritten != bufferlength) {
471                 if (byteswritten == -1)
472                         prterr("write");
473                 else
474                         warn("%s: short write, 0x%x bytes instead of 0x%llx\n",
475                              __func__, (unsigned int)byteswritten,
476                              (unsigned long long)bufferlength);
477         }
478 }
479
480 static void
481 report_failure(int status)
482 {
483         logdump();
484         prt("Using seed %d\n", seed);
485
486         if (fsxgoodfd) {
487                 if (good_buf) {
488                         save_buffer(good_buf, file_size, fsxgoodfd);
489                         prt("Correct content saved for comparison\n");
490                         prt("(maybe hexdump \"%s\" vs \"%s\")\n",
491                             fname, goodfile);
492                 }
493                 close(fsxgoodfd);
494         }
495         exit(status);
496 }
497
498 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
499                       *(((unsigned char *)(cp)) + 1)))
500
501 static void
502 check_buffers(unsigned int offset, unsigned int size)
503 {
504         unsigned char c, t;
505         unsigned int i = 0;
506         unsigned int n = 0;
507         unsigned int op = 0;
508         unsigned int bad = 0;
509
510         if (memcmp(good_buf + offset, temp_buf, size) != 0) {
511                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x\n",
512                     offset, size);
513                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
514                 while (size > 0) {
515                         c = good_buf[offset];
516                         t = temp_buf[i];
517                         if (c != t) {
518                                 if (n == 0) {
519                                         bad = short_at(&temp_buf[i]);
520                                         prt("%#07x\t%#06x\t%#06x", offset,
521                                             short_at(&good_buf[offset]), bad);
522                                         op = temp_buf[offset & 1 ? i + 1 : i];
523                                 }
524                                 n++;
525                                 badoff = offset;
526                         }
527                         offset++;
528                         i++;
529                         size--;
530                 }
531                 if (n) {
532                         prt("\t%#7x\n", n);
533                         if (bad)
534                                 prt("operation# (mod 256) for the bad data may be %u\n",
535                                     ((unsigned int)op & 0xff));
536                         else
537                                 prt("operation# (mod 256) for the bad data unknown, check HOLE and EXTEND ops\n");
538                 } else {
539                         prt("????????????????\n");
540                 }
541                 report_failure(110);
542         }
543 }
544
545 static struct test_file *
546 get_tf(void)
547 {
548         unsigned int index = 0;
549
550         switch (fd_policy) {
551         case FD_ROTATE:
552                 index = fd_last++;
553                 break;
554         case FD_RANDOM:
555                 index = random();
556                 break;
557         case FD_SINGLE:
558                 index = 0;
559                 break;
560         default:
561                 prt("unknown policy");
562                 exit(1);
563                 break;
564         }
565         return &test_files[index % num_test_files];
566 }
567
568 static void
569 assign_fd_policy(char *policy)
570 {
571         if (!strcmp(policy, "random")) {
572                 fd_policy = FD_RANDOM;
573         } else if (!strcmp(policy, "rotate")) {
574                 fd_policy = FD_ROTATE;
575         } else {
576                 prt("unknown -I policy: '%s'\n", policy);
577                 exit(1);
578         }
579 }
580
581 static int
582 get_fd(void)
583 {
584         struct test_file *tf = get_tf();
585
586         return tf->fd;
587 }
588
589 static const char *my_basename(const char *path)
590 {
591         char *c = strrchr(path, '/');
592
593         return c ? c++ : path;
594 }
595
596 static void
597 open_test_files(char **argv, int argc)
598 {
599         struct test_file *tf;
600         int i;
601
602         num_test_files = argc;
603         if (num_test_files == 1)
604                 fd_policy = FD_SINGLE;
605
606         test_files = calloc(num_test_files, sizeof(*test_files));
607         if (!test_files) {
608                 prterr("reallocating space for test files");
609                 exit(1);
610         }
611
612         for (i = 0, tf = test_files; i < num_test_files; i++, tf++) {
613                 tf->path = argv[i];
614                 tf->o_direct = (random() % (o_direct + 1)) ? OP_DIRECT : 0;
615                 tf->fd = open(tf->path,
616                               O_RDWR | (lite ? 0 : O_CREAT | O_TRUNC) |
617                               tf->o_direct, 0666);
618                 if (tf->fd < 0) {
619                         prterr(tf->path);
620                         exit(91);
621                 }
622         }
623
624         if (quiet || fd_policy == FD_SINGLE)
625                 return;
626
627         for (i = 0, tf = test_files; i < num_test_files; i++, tf++)
628                 prt("fd %d: %s\n", i, tf->path);
629 }
630
631 static void
632 close_test_files(void)
633 {
634         int i;
635         struct test_file *tf;
636
637         for (i = 0, tf = test_files; i < num_test_files; i++, tf++) {
638                 if (close(tf->fd)) {
639                         prterr("close");
640                         report_failure(99);
641                 }
642         }
643 }
644
645 static void
646 check_size(void)
647 {
648         struct stat statbuf;
649         off_t size_by_seek;
650         int fd = get_fd();
651
652         if (fstat(fd, &statbuf)) {
653                 prterr("fstat");
654                 statbuf.st_size = -1;
655         }
656         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
657         if (file_size != statbuf.st_size || file_size != size_by_seek) {
658                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
659                     (unsigned long long)file_size,
660                     (unsigned long long)statbuf.st_size,
661                     (unsigned long long)size_by_seek);
662                 report_failure(120);
663         }
664 }
665
666 static void
667 check_trunc_hack(void)
668 {
669         struct stat statbuf;
670         int fd = get_fd();
671
672         /* should not ignore ftruncate(2)'s return value */
673         if (ftruncate(fd, (off_t)0) < 0) {
674                 prterr("trunc_hack: ftruncate(0)");
675                 exit(1);
676         }
677         if (ftruncate(fd, (off_t)100000) < 0) {
678                 prterr("trunc_hack: ftruncate(100000)");
679                 exit(1);
680         }
681         if (fstat(fd, &statbuf)) {
682                 prterr("trunc_hack: fstat");
683                 statbuf.st_size = -1;
684         }
685         if (statbuf.st_size != (off_t)100000) {
686                 prt("no extend on truncate! not posix!\n");
687                 exit(130);
688         }
689         if (ftruncate(fd, 0) < 0) {
690                 prterr("trunc_hack: ftruncate(0) (2nd call)");
691                 exit(1);
692         }
693 }
694
695 static void
696 output_line(struct test_file *tf, int op, unsigned int offset,
697             unsigned int size)
698 {
699         char *ops[] = {
700                 [OP_READ] = "read",
701                 [OP_WRITE] = "write",
702                 [OP_TRUNCATE] = "trunc from",
703                 [OP_MAPREAD] = "mapread",
704                 [OP_MAPWRITE] = "mapwrite",
705                 [OP_READ + OP_DIRECT] = "read_OD",
706                 [OP_WRITE + OP_DIRECT] = "write_OD",
707                 [OP_FALLOCATE] = "fallocate",
708                 [OP_PUNCH_HOLE] = "punch from",
709         };
710
711         /* W. */
712         if (!(!quiet &&
713             ((progressinterval && testcalls % progressinterval == 0) ||
714             (debug && (monitorstart == -1 ||
715             (offset + size > monitorstart &&
716             (monitorend == -1 || offset <= monitorend)))))))
717                 return;
718
719         prt("%06lu%s %lu.%06u %-10s %#08x %s %#08x\t(0x%x bytes)\n",
720             testcalls, fill_tf_buf(tf), tv.tv_sec, (int)tv.tv_usec,
721             ops[op], offset, op == OP_TRUNCATE || op == OP_PUNCH_HOLE ?
722             " to " : "thru", offset + size - 1,
723              (int)size < 0 ? -(int)size : size);
724 }
725
726 static void
727 mirror_output_line(struct test_file *tf, int op, int mirrors, int id)
728 {
729         if (!(!quiet &&
730               ((progressinterval && testcalls % progressinterval == 0) ||
731                (debug && (monitorstart == -1)))))
732                 return;
733
734         prt("%06lu %lu.%06u %-10s ",
735             testcalls, tv.tv_sec, (int)tv.tv_usec, mirror_op_str[op]);
736
737         switch (op) {
738         case MIRROR_EXTEND:
739                 prt("to %d mirrors\n", mirrors + 1);
740                 break;
741         case MIRROR_SPLIT:
742                 prt("mirror %d to %d mirrors\n", id, mirrors - 1);
743                 break;
744         case MIRROR_RESYNC:
745                 prt("%d mirrors\n", mirrors);
746                 break;
747         }
748 }
749
750 static void output_debug(unsigned int offset, unsigned int size,
751                          const char *what)
752 {
753         struct timeval t;
754
755         if (!quiet && (debug > 1 && (monitorstart == -1 ||
756             (offset + size >= monitorstart &&
757              (monitorend == -1 || offset <= monitorend))))) {
758                 gettimeofday(&t, NULL);
759                 prt("       %lu.%06u %s\n", t.tv_sec, (int)t.tv_usec, what);
760         }
761 }
762
763 static void
764 doflush(unsigned int offset, unsigned int size)
765 {
766         unsigned int pg_offset;
767         unsigned int map_size;
768         char *p;
769         struct test_file *tf = get_tf();
770         int fd = tf->fd;
771
772         if (tf->o_direct)
773                 return;
774
775         pg_offset = offset & page_mask;
776         map_size  = pg_offset + size;
777
778         p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
779                          MAP_FILE | MAP_SHARED, fd,
780                          (off_t)(offset - pg_offset));
781         if (p == (char *)-1) {
782                 prterr("mmap");
783                 report_failure(202);
784         }
785         if (msync(p, map_size, MS_INVALIDATE) != 0) {
786                 prterr("msync");
787                 report_failure(203);
788         }
789         if (munmap(p, map_size) != 0) {
790                 prterr("munmap");
791                 report_failure(204);
792         }
793         output_debug(offset, size, "flush done");
794 }
795
796 static void
797 doread(unsigned int offset, unsigned int size)
798 {
799         off_t ret;
800         unsigned int iret;
801         struct test_file *tf = get_tf();
802         int fd = tf->fd;
803
804         offset -= offset % readbdy;
805         if (tf->o_direct)
806                 size -= size % readbdy;
807
808         if (size == 0) {
809                 if (!quiet && testcalls > simulatedopcount && !tf->o_direct)
810                         prt("skipping zero size read\n");
811                 log4(OP_SKIPPED, OP_READ, offset, size);
812                 return;
813         }
814         if (size + offset > file_size) {
815                 if (!quiet && testcalls > simulatedopcount)
816                         prt("skipping seek/read past end of file\n");
817                 log4(OP_SKIPPED, OP_READ, offset, size);
818                 return;
819         }
820
821         log4(OP_READ + tf->o_direct, offset, size, 0);
822
823         if (testcalls <= simulatedopcount)
824                 return;
825
826         output_line(tf, OP_READ + tf->o_direct, offset, size);
827
828         ret = lseek(fd, (off_t)offset, SEEK_SET);
829         if (ret == (off_t)-1) {
830                 prterr("lseek");
831                 report_failure(140);
832         }
833         iret = read(fd, temp_buf, size);
834         output_debug(offset, size, "read done");
835         if (iret != size) {
836                 if (iret == -1)
837                         prterr("read");
838                 else
839                         prt("short read: 0x%x bytes instead of 0x%x\n",
840                             iret, size);
841                 report_failure(141);
842         }
843         check_buffers(offset, size);
844 }
845
846 static void
847 check_eofpage(char *s, unsigned int offset, char *p, int size)
848 {
849         long last_page, should_be_zero;
850
851         if (offset + size <= (file_size & ~page_mask))
852                 return;
853         /*
854          * we landed in the last page of the file
855          * test to make sure the VM system provided 0's
856          * beyond the true end of the file mapping
857          * (as required by mmap def in 1996 posix 1003.1)
858          */
859         last_page = ((long)p + (offset & page_mask) + size) & ~page_mask;
860
861         for (should_be_zero = last_page + (file_size & page_mask);
862              should_be_zero < last_page + page_size;
863              should_be_zero++)
864                 if (*(char *)should_be_zero) {
865                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%lx is 0x%04x\n",
866                             s, (long long)file_size - 1,
867                             should_be_zero & page_mask,
868                             short_at(should_be_zero));
869                         report_failure(205);
870                 }
871 }
872
873 static void
874 domapread(unsigned int offset, unsigned int size)
875 {
876         unsigned int pg_offset;
877         unsigned int map_size;
878         char *p;
879         int fd;
880
881         offset -= offset % readbdy;
882         tf = get_tf();
883         fd = tf->fd;
884         if (size == 0) {
885                 if (!quiet && testcalls > simulatedopcount)
886                         prt("skipping zero size read\n");
887                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
888                 return;
889         }
890         if (size + offset > file_size) {
891                 if (!quiet && testcalls > simulatedopcount)
892                         prt("skipping seek/read past end of file\n");
893                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
894                 return;
895         }
896
897         log4(OP_MAPREAD, offset, size, 0);
898
899         if (testcalls <= simulatedopcount)
900                 return;
901
902         output_line(tf, OP_MAPREAD, offset, size);
903
904         pg_offset = offset & page_mask;
905         map_size  = pg_offset + size;
906
907         p = mmap(0, map_size, PROT_READ, MAP_FILE | MAP_SHARED, fd,
908                  (off_t)(offset - pg_offset));
909         if (p == MAP_FAILED) {
910                 prterr("mmap");
911                 report_failure(190);
912         }
913         output_debug(offset, size, "mmap done");
914         if (setjmp(jmpbuf) == 0) {
915                 jmpbuf_good = 1;
916                 memcpy(temp_buf, p + pg_offset, size);
917                 check_eofpage("Read", offset, p, size);
918                 jmpbuf_good = 0;
919         } else {
920                 report_failure(1901);
921         }
922         output_debug(offset, size, "memcpy done");
923         if (munmap(p, map_size) != 0) {
924                 prterr("munmap");
925                 report_failure(191);
926         }
927         output_debug(offset, size, "munmap done");
928
929         check_buffers(offset, size);
930 }
931
932 static void
933 gendata(char *original_buf, char *good_buf, unsigned int offset,
934         unsigned int size)
935 {
936         while (size--) {
937                 good_buf[offset] = testcalls % 256;
938                 if (offset % 2)
939                         good_buf[offset] += original_buf[offset];
940                 offset++;
941         }
942 }
943
944 static void
945 dowrite(unsigned int offset, unsigned int size)
946 {
947         off_t ret;
948         unsigned int iret;
949         int fd;
950
951         tf = get_tf();
952         fd = tf->fd;
953         offset -= offset % writebdy;
954         if (tf->o_direct)
955                 size -= size % writebdy;
956         if (size == 0) {
957                 if (!quiet && testcalls > simulatedopcount && !tf->o_direct)
958                         prt("skipping zero size write\n");
959                 log4(OP_SKIPPED, OP_WRITE, offset, size);
960                 return;
961         }
962
963         log4(OP_WRITE + tf->o_direct, offset, size, file_size);
964
965         gendata(original_buf, good_buf, offset, size);
966         if (file_size < offset + size) {
967                 if (file_size < offset)
968                         memset(good_buf + file_size, '\0', offset - file_size);
969                 file_size = offset + size;
970                 if (lite) {
971                         warn("Lite file size bug in fsx!");
972                         report_failure(149);
973                 }
974         }
975
976         if (testcalls <= simulatedopcount)
977                 return;
978
979         output_line(tf, OP_WRITE + tf->o_direct, offset, size);
980
981         ret = lseek(fd, (off_t)offset, SEEK_SET);
982         if (ret == (off_t)-1) {
983                 prterr("lseek");
984                 report_failure(150);
985         }
986         iret = write(fd, good_buf + offset, size);
987         output_debug(offset, size, "write done");
988         if (iret != size) {
989                 if (iret == -1)
990                         prterr("write");
991                 else
992                         prt("short write: 0x%x bytes instead of 0x%x\n",
993                             iret, size);
994                 report_failure(151);
995         }
996         if (do_fsync) {
997                 if (fsync(fd)) {
998                         prt("fsync() failed: %s\n", strerror(errno));
999                         report_failure(152);
1000                 }
1001                 output_debug(offset, size, "fsync done");
1002         }
1003         if (flush) {
1004                 doflush(offset, size);
1005                 output_debug(offset, size, "flush done");
1006         }
1007 }
1008
1009 static void
1010 domapwrite(unsigned int offset, unsigned int size)
1011 {
1012         unsigned int pg_offset;
1013         unsigned int map_size;
1014         off_t cur_filesize;
1015         char *p;
1016         int fd;
1017
1018         tf = get_tf();
1019         fd = tf->fd;
1020         offset -= offset % writebdy;
1021         if (size == 0) {
1022                 if (!quiet && testcalls > simulatedopcount)
1023                         prt("skipping zero size write\n");
1024                 log4(OP_SKIPPED, OP_MAPWRITE, offset, size);
1025                 return;
1026         }
1027         cur_filesize = file_size;
1028
1029         log4(OP_MAPWRITE, offset, size, 0);
1030
1031         gendata(original_buf, good_buf, offset, size);
1032         if (file_size < offset + size) {
1033                 if (file_size < offset)
1034                         memset(good_buf + file_size, '\0', offset - file_size);
1035                 file_size = offset + size;
1036                 if (lite) {
1037                         warn("Lite file size bug in fsx!");
1038                         report_failure(200);
1039                 }
1040         }
1041
1042         if (testcalls <= simulatedopcount)
1043                 return;
1044
1045         output_line(tf, OP_MAPWRITE, offset, size);
1046
1047         if (file_size > cur_filesize) {
1048                 if (ftruncate(fd, file_size) == -1) {
1049                         prterr("ftruncate");
1050                         exit(201);
1051                 }
1052                 output_debug(offset, size, "truncate done");
1053         }
1054         pg_offset = offset & page_mask;
1055         map_size  = pg_offset + size;
1056
1057         p = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED,
1058                  fd, (off_t)(offset - pg_offset));
1059         if (p == MAP_FAILED) {
1060                 prterr("mmap");
1061                 report_failure(202);
1062         }
1063         output_debug(offset, map_size, "mmap done");
1064         if (setjmp(jmpbuf) == 0) {
1065                 jmpbuf_good = 1;
1066                 memcpy(p + pg_offset, good_buf + offset, size);
1067                 if (msync(p, map_size, MS_SYNC) != 0) {
1068                         prterr("msync");
1069                         report_failure(203);
1070                 }
1071                 check_eofpage("Write", offset, p, size);
1072                 jmpbuf_good = 0;
1073         } else {
1074                 report_failure(2021);
1075         }
1076         output_debug(offset, map_size, "msync done");
1077         if (munmap(p, map_size) != 0) {
1078                 prterr("munmap");
1079                 report_failure(204);
1080         }
1081         output_debug(offset, map_size, "munmap done");
1082 }
1083
1084 static void
1085 dotruncate(unsigned int size)
1086 {
1087         int oldsize = file_size;
1088         int fd;
1089
1090         tf = get_tf();
1091         fd = tf->fd;
1092         size -= size % truncbdy;
1093         if (size > biggest) {
1094                 biggest = size;
1095                 if (!quiet && testcalls > simulatedopcount)
1096                         prt("truncating to largest ever: 0x%x\n", size);
1097         }
1098
1099         log4(OP_TRUNCATE, size, (unsigned int)file_size, 0);
1100
1101         if (size > file_size)
1102                 memset(good_buf + file_size, '\0', size - file_size);
1103         file_size = size;
1104
1105         if (testcalls <= simulatedopcount)
1106                 return;
1107
1108         output_line(tf, OP_TRUNCATE, oldsize, size - oldsize);
1109
1110         if (ftruncate(fd, (off_t)size) == -1) {
1111                 prt("ftruncate: 0x%x\n", size);
1112                 prterr("ftruncate");
1113                 report_failure(160);
1114         }
1115         output_debug(size, 0, "truncate done");
1116 }
1117
1118 static void
1119 do_punch_hole(unsigned int offset, unsigned int length)
1120 {
1121         int max_offset = 0;
1122         int max_len = 0;
1123         int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
1124         int fd;
1125
1126         tf = get_tf();
1127         fd = tf->fd;
1128         if (length == 0) {
1129                 if (!quiet && testcalls > simulatedopcount) {
1130                         prt("skipping zero length punch hole\n");
1131                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
1132                 }
1133                 return;
1134         }
1135
1136         if (file_size <= (loff_t)offset) {
1137                 if (!quiet && testcalls > simulatedopcount) {
1138                         prt("skipping hole punch off the end of the file\n");
1139                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
1140                 }
1141                 return;
1142         }
1143
1144         log4(OP_PUNCH_HOLE, offset, length, 0);
1145
1146         if (testcalls <= simulatedopcount)
1147                 return;
1148
1149         output_line(tf, OP_PUNCH_HOLE, offset, length);
1150         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1151                 prt("punch hole: %x to %x\n", offset, length);
1152                 prterr("fallocate");
1153                 report_failure(161);
1154         }
1155         output_debug(offset, length, "zero_range done");
1156
1157         max_offset = offset < file_size ? offset : file_size;
1158         max_len = max_offset + length <= file_size ? length :
1159                         file_size - max_offset;
1160         memset(good_buf + max_offset, '\0', max_len);
1161 }
1162
1163 static void
1164 do_zero_range(unsigned int offset, unsigned int length)
1165 {
1166         unsigned int end_offset;
1167         int mode = FALLOC_FL_ZERO_RANGE;
1168         int keep_size;
1169         int fd;
1170
1171         tf = get_tf();
1172         fd = tf->fd;
1173         if (length == 0) {
1174                 if (!quiet && testcalls > simulatedopcount) {
1175                         prt("skipping zero length zero range\n");
1176                         log4(OP_SKIPPED, OP_ZERO_RANGE, offset, length);
1177                 }
1178                 return;
1179         }
1180
1181         keep_size = random() % 2;
1182
1183         end_offset = keep_size ? 0 : offset + length;
1184
1185         if (end_offset > biggest) {
1186                 biggest = end_offset;
1187                 if (!quiet && testcalls > simulatedopcount)
1188                         prt("zero_range to largest ever: 0x%x\n", end_offset);
1189         }
1190
1191         /*
1192          * last arg matches fallocate string array index in logdump:
1193          * 0: allocate past EOF
1194          * 1: extending prealloc
1195          * 2: interior prealloc
1196          */
1197         log4(OP_ZERO_RANGE, offset, length,
1198              (end_offset > file_size) ? (keep_size ? 0 : 1) : 2);
1199
1200         if (testcalls <= simulatedopcount)
1201                 return;
1202
1203         output_line(tf, OP_TRUNCATE, offset, length);
1204
1205         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1206                 prt("pzero range: %x to %x\n", offset, length);
1207                 prterr("fallocate");
1208                 report_failure(161);
1209         }
1210         output_debug(offset, length, "zero_range done");
1211
1212         memset(good_buf + offset, '\0', length);
1213 }
1214
1215 /*
1216  * fallocate is basically a no-op unless extending,
1217  * then a lot like a truncate
1218  */
1219 static void
1220 do_preallocate(unsigned int offset, unsigned int length)
1221 {
1222         off_t end_offset;
1223         int keep_size;
1224         int fd;
1225         struct stat statbufs;
1226
1227         tf = get_tf();
1228         fd = tf->fd;
1229         if (length == 0) {
1230                 if (!quiet && testcalls > simulatedopcount)
1231                         prt("skipping zero length fallocate\n");
1232                 log4(OP_SKIPPED, OP_FALLOCATE, offset, length);
1233                 return;
1234         }
1235
1236         keep_size = fl_keep_size && (random() % 2);
1237
1238         end_offset = offset + length;
1239         if (end_offset > biggest) {
1240                 biggest = end_offset;
1241                 if (!quiet && testcalls > simulatedopcount)
1242                         prt("fallocating to largest ever: 0x%jx\n", end_offset);
1243         }
1244
1245         /*
1246          * last arg matches fallocate string array index in logdump:
1247          * 0: allocate past EOF
1248          * 1: extending prealloc
1249          * 2: interior prealloc
1250          */
1251         log4(OP_FALLOCATE, offset, length, (end_offset > file_size) ?
1252              (keep_size ? 0 : 1) : 2);
1253
1254         if (end_offset > file_size && !keep_size) {
1255                 memset(good_buf + file_size, '\0', end_offset - file_size);
1256                 file_size = end_offset;
1257         }
1258
1259         if (testcalls <= simulatedopcount)
1260                 return;
1261
1262         fstat(fd, &statbufs);
1263         if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset,
1264                       (loff_t)length) == -1) {
1265                 prt("fallocate: %x to %x\n", offset, length);
1266                 prterr("fallocate");
1267                 report_failure(161);
1268         }
1269         output_line(tf, OP_FALLOCATE, offset, length);
1270         output_debug(offset, length, "fallocate done");
1271 }
1272
1273 static void
1274 writefileimage()
1275 {
1276         ssize_t iret;
1277         int fd = get_fd();
1278
1279         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
1280                 prterr("lseek");
1281                 report_failure(171);
1282         }
1283         iret = write(fd, good_buf, file_size);
1284         if ((off_t)iret != file_size) {
1285                 if (iret == -1)
1286                         prterr("write");
1287                 else
1288                         prt("short write: 0x%lx bytes instead of 0x%llx\n",
1289                             (unsigned long)iret, (unsigned long long)file_size);
1290                 report_failure(172);
1291         }
1292         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
1293                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
1294                 prterr("ftruncate");
1295                 report_failure(173);
1296         }
1297 }
1298
1299 static void
1300 docloseopen(void)
1301 {
1302         int direct = 0;
1303         const char *tf_num = "";
1304
1305         if (testcalls <= simulatedopcount)
1306                 return;
1307
1308         tf = get_tf();
1309         direct = (random() % (o_direct + 1)) ? OP_DIRECT : 0;
1310         log4(OP_CLOSEOPEN + direct, file_size, (unsigned int)file_size, 0);
1311
1312         if (fd_policy != FD_SINGLE)
1313                 tf_num = fill_tf_buf(tf);
1314
1315         if (debug)
1316                 prt("%06lu %lu.%06u %sclose/open%s\n", testcalls, tv.tv_sec,
1317                     (int)tv.tv_usec, tf_num, direct ? "(O_DIRECT)" : "");
1318         if (close(tf->fd))
1319                 report_failure(180);
1320
1321         output_debug(monitorstart, 0, "close done");
1322         tf->o_direct = direct;
1323         tf->fd = open(tf->path, O_RDWR | tf->o_direct, 0);
1324         if (tf->fd < 0) {
1325                 prterr(tf->o_direct ? "open(O_DIRECT)" : "open");
1326                 report_failure(181);
1327         }
1328         output_debug(monitorstart, 0,
1329                      tf->o_direct ? "open(O_DIRECT) done" : "open done");
1330 }
1331
1332 static int
1333 get_mirror_ids(int fd, unsigned int *ids)
1334 {
1335         struct llapi_layout *layout;
1336         uint16_t count;
1337         int rc;
1338
1339         layout = llapi_layout_get_by_fd(fd, 0);
1340         if (layout == NULL)
1341                 return 0;
1342
1343         /* only get mirror count */
1344         rc = llapi_layout_mirror_count_get(layout, &count);
1345         if (rc < 0)
1346                 prt("llapi_layout_mirror_count_get: %d\n", rc);
1347         if (count == 0)
1348                 return 0;
1349
1350         count = 0;
1351
1352         rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
1353         if (rc < 0) {
1354                 prt("llapi_layout_comp_use(USE_FIRST): %d\n", rc);
1355                 goto free;
1356         }
1357
1358         do {
1359                 unsigned int id;
1360
1361                 rc = llapi_layout_mirror_id_get(layout, &id);
1362                 if (rc < 0) {
1363                         prt("llapi_layout_mirror_id_get: %d\n", rc);
1364                         goto free;
1365                 }
1366
1367                 if (!count || ids[count - 1] != id)
1368                         ids[count++] = id;
1369
1370                 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
1371                 if (rc < 0) {
1372                         prt("llapi_layout_comp_use(USE_NEXT): %d\n", rc);
1373                         goto free;
1374                 }
1375         } while (rc == 0);
1376
1377 free:
1378         llapi_layout_free(layout);
1379
1380         return rc < 0 ? rc : count;
1381 }
1382
1383 static void
1384 do_mirror_ops(int op)
1385 {
1386         int mirror_count;
1387         char cmd[PATH_MAX * 2];
1388         int i = 0;
1389         int rc;
1390
1391         if (testcalls <= simulatedopcount)
1392                 return;
1393
1394         tf = get_tf();
1395
1396         mirror_count = get_mirror_ids(tf->fd, mirror_ids);
1397         if (mirror_count < 0) {
1398                 prterr("get_mirror_ids");
1399                 report_failure(182);
1400         }
1401
1402         switch (op) {
1403         case MIRROR_EXTEND:
1404                 if (mirror_count == LUSTRE_MIRROR_COUNT_MAX)
1405                         return;
1406                 snprintf(cmd, sizeof(cmd), "lfs mirror extend -N -c-1 %s",
1407                          tf->path);
1408                 break;
1409         case MIRROR_SPLIT:
1410                 if (mirror_count == 0 || mirror_count == 1)
1411                         return;
1412
1413                 i = random() % mirror_count;
1414                 if (i == 0)
1415                         i++;
1416
1417                 snprintf(cmd, sizeof(cmd),
1418                          "lfs mirror split -d --mirror-id=%d %s",
1419                          mirror_ids[i], tf->path);
1420                 break;
1421         case MIRROR_RESYNC:
1422                 if (mirror_count < 2)
1423                         return;
1424
1425                 snprintf(cmd, sizeof(cmd),
1426                          "lfs mirror resync %s", tf->path);
1427                 break;
1428         }
1429
1430         if (close(tf->fd))
1431                 report_failure(183);
1432         output_debug(monitorstart, 0, "close done");
1433
1434         log4(OP_MIRROR_OPS, op, mirror_count, i);
1435
1436         mirror_output_line(tf, op, mirror_count, i);
1437
1438         rc = system(cmd);
1439         if (rc < 0) {
1440                 prt("%s: %d\n", cmd, errno);
1441                 report_failure(184);
1442         } else if (WIFEXITED(rc)) {
1443                 rc = WEXITSTATUS(rc);
1444                 if (rc > 0) {
1445                         prt("%s: %d\n", cmd, rc);
1446                         /* mirror split won't delete the last non-stale mirror,
1447                          * and returns EUCLEAN
1448                          */
1449                         if (rc != EUCLEAN)
1450                                 report_failure(184);
1451                 }
1452         }
1453         output_debug(monitorstart, 0, cmd);
1454
1455         switch (op) {
1456         case MIRROR_SPLIT:
1457                 if (mirror_count == 2)
1458                         break;
1459         case MIRROR_EXTEND:
1460         case MIRROR_RESYNC:
1461                 /* verify mirror */
1462                 snprintf(cmd, sizeof(cmd),
1463                          "lfs mirror verify %s", tf->path);
1464
1465                 rc = system(cmd);
1466                 if (rc < 0) {
1467                         prt("mirror op %d: %s: %d\n", op, cmd, errno);
1468                         report_failure(184);
1469                 } else if (WIFEXITED(rc)) {
1470                         rc = WEXITSTATUS(rc);
1471                         if (rc > 0) {
1472                                 prt("mirror op %d: %s: %d\n", op, cmd, rc);
1473                                 snprintf(cmd, sizeof(cmd),
1474                                          "lfs mirror verify -v %s", tf->path);
1475                                 rc = system(cmd);
1476                                 report_failure(184);
1477                         }
1478                 }
1479         }
1480
1481         output_debug(monitorstart, 0, cmd);
1482
1483         tf->fd = open(tf->path, O_RDWR | tf->o_direct, 0);
1484         if (tf->fd < 0) {
1485                 prterr(tf->o_direct ? "open(O_DIRECT)" : "open");
1486                 report_failure(185);
1487         }
1488         output_debug(monitorstart, 0,
1489                      tf->o_direct ? "open(O_DIRECT) done" : "open done");
1490 }
1491
1492 #define TRIM_OFF_LEN(off, len, size)    \
1493 do {                                    \
1494         if (size)                       \
1495                 (off) %= (size);        \
1496         else                            \
1497                 (off) = 0;              \
1498         if ((off) + (len) > (size))     \
1499                 (len) = (size) - (off); \
1500 } while (0)
1501
1502 static void
1503 test(void)
1504 {
1505         unsigned long offset;
1506         unsigned long size = maxoplen;
1507         unsigned long rv = random();
1508         unsigned long op;
1509         int closeopen = 0;
1510
1511         if (simulatedopcount > 0 && testcalls == simulatedopcount)
1512                 writefileimage();
1513
1514         testcalls++;
1515
1516         if (closeprob)
1517                 closeopen = (rv >> 3) < (1 << 28) / closeprob;
1518
1519         if (debugstart > 0 && testcalls >= debugstart)
1520                 debug = 1;
1521
1522         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
1523                 prt("%lu...\n", testcalls);
1524
1525         offset = random();
1526         if (randomoplen)
1527                 size = random() % (maxoplen + 1);
1528
1529         /* calculate appropriate op to run */
1530         if (lite)
1531                 op = rv % OP_MAX_LITE;
1532         else
1533                 op = rv % OP_MAX_FULL;
1534
1535         switch (op) {
1536         case OP_MAPREAD:
1537                 if (!mapped_reads)
1538                         op = OP_READ;
1539                 break;
1540         case OP_MAPWRITE:
1541                 if (!mapped_writes)
1542                         op = OP_WRITE;
1543                 break;
1544         case OP_FALLOCATE:
1545                 if (!fallocate_calls) {
1546                         log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
1547                         goto out;
1548                 }
1549                 break;
1550         case OP_PUNCH_HOLE:
1551                 if (!punch_hole_calls) {
1552                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
1553                         goto out;
1554                 }
1555                 break;
1556         case OP_ZERO_RANGE:
1557                 if (!zero_range_calls) {
1558                         log4(OP_SKIPPED, OP_ZERO_RANGE, offset, size);
1559                         goto out;
1560                 }
1561                 break;
1562         }
1563
1564         switch (op) {
1565         case OP_READ:
1566                 TRIM_OFF_LEN(offset, size, file_size);
1567                 doread(offset, size);
1568                 break;
1569         case OP_WRITE:
1570                 TRIM_OFF_LEN(offset, size, maxfilelen);
1571                 dowrite(offset, size);
1572                 break;
1573         case OP_MAPREAD:
1574                 TRIM_OFF_LEN(offset, size, file_size);
1575                 domapread(offset, size);
1576                 break;
1577         case OP_MAPWRITE:
1578                 TRIM_OFF_LEN(offset, size, maxfilelen);
1579                 domapwrite(offset, size);
1580                 break;
1581         case OP_TRUNCATE:
1582                 if (!style)
1583                         size = random() % maxfilelen;
1584                 dotruncate(size);
1585                 break;
1586         case OP_FALLOCATE:
1587                 TRIM_OFF_LEN(offset, size, maxfilelen);
1588                 do_preallocate(offset, size);
1589                 break;
1590         case OP_PUNCH_HOLE:
1591                 TRIM_OFF_LEN(offset, size, file_size);
1592                 do_punch_hole(offset, size);
1593                 break;
1594         case OP_ZERO_RANGE:
1595                 TRIM_OFF_LEN(offset, size, file_size);
1596                 do_zero_range(offset, size);
1597                 break;
1598         case OP_CLOSEOPEN:
1599                 if (closeopen)
1600                         docloseopen();
1601                 break;
1602         case OP_MIRROR_OPS:
1603                 if (flrmode)
1604                         do_mirror_ops(random() % MIRROR_OPS);
1605                 break;
1606         default:
1607                 prterr("unknown operation %d: Operation not supported");
1608                 report_failure(42);
1609                 break;
1610         }
1611
1612 out:
1613         if (sizechecks && testcalls > simulatedopcount)
1614                 check_size();
1615 }
1616
1617 static void
1618 segv(int sig)
1619 {
1620         if (jmpbuf_good) {
1621                 jmpbuf_good = 0;
1622                 longjmp(jmpbuf, 1);
1623         }
1624         report_failure(9999);
1625 }
1626
1627 static void
1628 cleanup(sig)
1629         int     sig;
1630 {
1631         if (sig)
1632                 prt("signal %d\n", sig);
1633         prt("testcalls = %lu\n", testcalls);
1634         exit(sig);
1635 }
1636
1637 static void
1638 usage(void)
1639 {
1640         fprintf(stdout,
1641                 "usage: fsx [-dfnqFLOW] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [ -I random|rotate ] [-N numops] [-P dirpath] [-S seed] [-Z [prob]] fname [additional paths to fname..]\n"
1642 "       -b opnum: beginning operation number (default 1)\n"
1643 "       -c P: 1 in P chance of file close+open at each op (default infinity)\n"
1644 "       -d: debug output for all operations [-d -d = more debugging]\n"
1645 "       -f flush and invalidate cache after I/O\n"
1646 /* OSX: -d duration: number of hours for the tool to run\n\ */
1647 /* OSX: -e: tests using an extended attribute rather than a file\n\ */
1648 /* OSX: -f forkname: test the named fork of fname\n\ */
1649 /* OSX: -g logpath: path for .fsxlog file\n\ */
1650 /* OSX: -h: write 0s instead of creating holes (i.e. sparse file)\n\ */
1651 /* OSX: -i: interactive mode, hit return before performing each operation\n\ */
1652 "       -l flen: the upper bound on file size (default 262144)\n"
1653 "       -m startop:endop: monitor (print debug output) specified byte range\n"
1654 "          (default 0:infinity)\n"
1655 "       -n: no verifications of file size\n"
1656 "       -o oplen: the upper bound on operation size (default 65536)\n"
1657 "       -p progressinterval: debug output at specified operation interval\n"
1658 "       -q: quieter operation\n"
1659 "       -r readbdy: %1$u would make reads page aligned (default 1)\n"
1660 "       -s style: 1 gives smaller truncates (default 0)\n"
1661 "       -t truncbdy: %1$u would make truncates page aligned (default 1)\n"
1662 "       -w writebdy: %1$u would make writes page aligned (default 1)\n"
1663 /* XFS: -x: preallocate file space before starting, XFS only (default 0)\n\ */
1664 "       -y synchronize changes to a file\n"
1665 /* OSX: -v: debug output for all operations\n\ */
1666 /* XFS: -A: Use the AIO system calls\n" */
1667 /* OSX: -C mix cached and un-cached read/write ops\n\ */
1668 "       -D startingop: debug output starting at specified operation\n"
1669 "       -F: Do not use fallocate (preallocation) calls\n"
1670 /* OSX: -G logsize: #entries in oplog (default 1024)\n\ */
1671 #ifdef FALLOC_FL_PUNCH_HOLE
1672 "       -H: Do not use punch hole calls\n"
1673 #endif
1674 #ifdef FALLOC_FL_ZERO_RANGE
1675 "       -z: Do not use zero range calls\n"
1676 #endif
1677 /* XFS: -C: Do not use collapse range calls\n\ */
1678 "       -I [rotate|random]: When multiple paths to the file are given,\n"
1679 "           each operation uses a different path.  Iterate through them in\n"
1680 "           order with 'rotate' or chose them at 'random'.  (default random)\n"
1681 "       -L: fsxLite - no file creations & no file size changes\n"
1682 /* OSX: -I: start interactive mode since operation opnum\n\ */
1683 "       -M: mirror file test mode\n"
1684 "       -N numops: total # operations to do (default infinity)\n"
1685 "       -O: use oplen (see -o flag) for every op (default random)\n"
1686 "       -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n"
1687 "       -R: read() system calls only (mapped reads disabled)\n"
1688 "       -S seed: for random # generator (default 1) 0 gets timestamp\n"
1689 /* OSX: -T datasize: atomic data element write size [1,2,4] (default 4)\n\ */
1690 "       -W: mapped write operations DISabled\n"
1691 "       -Z[P]: O_DIRECT file IO [1 in P chance for each open] (default off)\n"
1692 "       fname: this filename is REQUIRED (no default)\n",
1693         page_size);
1694         exit(90);
1695 }
1696
1697 static int
1698 getnum(char *s, char **e)
1699 {
1700         int ret = -1;
1701
1702         *e = (char *)0;
1703         ret = strtol(s, e, 0);
1704         if (*e)
1705                 switch (**e) {
1706                 case 'b':
1707                 case 'B':
1708                         ret *= 512;
1709                         *e = *e + 1;
1710                         break;
1711                 case 'k':
1712                 case 'K':
1713                         ret *= 1024;
1714                         *e = *e + 1;
1715                         break;
1716                 case 'm':
1717                 case 'M':
1718                         ret *= 1024 * 1024;
1719                         *e = *e + 1;
1720                         break;
1721                 case 'w':
1722                 case 'W':
1723                         ret *= 4;
1724                         *e = *e + 1;
1725                         break;
1726                 }
1727         return (ret);
1728 }
1729
1730 static int
1731 test_fallocate(int mode)
1732 {
1733         int ret = 0;
1734         int fd = get_fd();
1735
1736         if (!lite) {
1737                 /* Must go more than a page away so let's go 4M to be sure */
1738                 if (fallocate(fd, mode, 0, 4096*1024) && errno == EOPNOTSUPP) {
1739                         if (!quiet)
1740                                 warn("%s: filesystem does not support fallocate mode 0x%x, disabling!",
1741                                      __func__, mode);
1742                 } else {
1743                         ret = 1;
1744                 }
1745
1746                 /* Always call ftruncate since file size might be adjusted
1747                  * by fallocate even on error
1748                  */
1749                 if (ftruncate(fd, 0) == -1)
1750                         warn("ftruncate to 0 size failed");
1751         }
1752         return ret;
1753 }
1754
1755 int
1756 main(int argc, char **argv)
1757 {
1758         int i, style, ch;
1759         char *endp;
1760         int dirpath = 0;
1761
1762         goodfile[0] = 0;
1763         logfile[0] = 0;
1764
1765         page_size = getpagesize();
1766         page_mask = page_size - 1;
1767
1768         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
1769
1770         while ((ch = getopt(argc, argv,
1771                             "b:c:dfl:m:no:p:qr:s:t:w:xyzD:FHI:LMN:OP:RS:WZ::"))
1772                != EOF)
1773                 switch (ch) {
1774                 case 'b':
1775                         simulatedopcount = getnum(optarg, &endp);
1776                         if (!quiet)
1777                                 fprintf(stdout, "Will begin at operation %ld\n",
1778                                         simulatedopcount);
1779                         if (simulatedopcount == 0)
1780                                 usage();
1781                         simulatedopcount -= 1;
1782                         break;
1783                 case 'c':
1784                         closeprob = getnum(optarg, &endp);
1785                         if (!quiet)
1786                                 fprintf(stdout,
1787                                         "Chance of close/open is 1 in %d\n",
1788                                         closeprob);
1789                         if (closeprob <= 0)
1790                                 usage();
1791                         break;
1792                 case 'd':
1793                         debug++;
1794                         break;
1795                 case 'f':
1796                         flush = 1;
1797                         break;
1798                 case 'l':
1799                         maxfilelen = getnum(optarg, &endp);
1800                         if (maxfilelen <= 0)
1801                                 usage();
1802                         break;
1803                 case 'm':
1804                         monitorstart = getnum(optarg, &endp);
1805                         if (monitorstart < 0)
1806                                 usage();
1807                         if (!endp || *endp++ != ':')
1808                                 usage();
1809                         monitorend = getnum(endp, &endp);
1810                         if (monitorend < 0)
1811                                 usage();
1812                         if (monitorend == 0)
1813                                 monitorend = -1; /* aka infinity */
1814                         debug = 1;
1815                 case 'n':
1816                         sizechecks = 0;
1817                         break;
1818                 case 'o':
1819                         maxoplen = getnum(optarg, &endp);
1820                         if (maxoplen <= 0)
1821                                 usage();
1822                         break;
1823                 case 'p':
1824                         progressinterval = getnum(optarg, &endp);
1825                         if (progressinterval <= 0)
1826                                 usage();
1827                         break;
1828                 case 'q':
1829                         quiet = 1;
1830                         break;
1831                 case 'r':
1832                         readbdy = getnum(optarg, &endp);
1833                         if (readbdy <= 0)
1834                                 usage();
1835                         break;
1836                 case 's':
1837                         style = getnum(optarg, &endp);
1838                         if (style < 0 || style > 1)
1839                                 usage();
1840                         break;
1841                 case 't':
1842                         truncbdy = getnum(optarg, &endp);
1843                         if (truncbdy <= 0)
1844                                 usage();
1845                         break;
1846                 case 'w':
1847                         writebdy = getnum(optarg, &endp);
1848                         if (writebdy <= 0)
1849                                 usage();
1850                         break;
1851                 case 'y':
1852                         do_fsync = 1;
1853                         break;
1854                 case 'D':
1855                         debugstart = getnum(optarg, &endp);
1856                         if (debugstart < 1)
1857                                 usage();
1858                         break;
1859                 case 'F':
1860                         fallocate_calls = 0;
1861                         break;
1862                 case 'H':
1863                         punch_hole_calls = 0;
1864                         break;
1865                 case 'z':
1866                         zero_range_calls = 0;
1867                         break;
1868                 case 'I':
1869                         assign_fd_policy(optarg);
1870                         break;
1871                 case 'L':
1872                         lite = 1;
1873                         break;
1874                 case 'M':
1875                         flrmode = 1;
1876                         break;
1877                 case 'N':
1878                         numops = getnum(optarg, &endp);
1879                         if (numops < 0)
1880                                 usage();
1881                         break;
1882                 case 'O':
1883                         randomoplen = 0;
1884                         break;
1885                 case 'P':
1886                         strncpy(goodfile, optarg, sizeof(goodfile) - 1);
1887                         strncat(goodfile, "/", PATH_MAX - strlen(goodfile) - 1);
1888                         strncpy(logfile, optarg, sizeof(logfile) - 1);
1889                         strncat(logfile, "/", PATH_MAX - strlen(logfile) - 1);
1890                         dirpath = 1;
1891                         break;
1892                 case 'R':
1893                         mapped_reads = 0;
1894                         break;
1895                 case 'S':
1896                         seed = getnum(optarg, &endp);
1897                         if (seed == 0)
1898                                 seed = time(0) % 10000;
1899                         if (!quiet)
1900                                 fprintf(stdout, "Seed set to %d\n", seed);
1901                         if (seed < 0)
1902                                 usage();
1903                         break;
1904                 case 'W':
1905                         mapped_writes = 0;
1906                         if (!quiet)
1907                                 fprintf(stdout, "mapped writes DISABLED\n");
1908                         break;
1909                 case 'Z':
1910                         if (optarg)
1911                                 o_direct = getnum(optarg, &endp);
1912                         if (!optarg || o_direct == 0)
1913                                 o_direct = 1;
1914                         break;
1915                 default:
1916                         usage();
1917                         /* NOTREACHED */
1918                 }
1919         argc -= optind;
1920         argv += optind;
1921         if (argc < 1)
1922                 usage();
1923         fname = argv[0];
1924
1925         signal(SIGHUP, cleanup);
1926         signal(SIGINT, cleanup);
1927         signal(SIGPIPE, cleanup);
1928         signal(SIGALRM, cleanup);
1929         signal(SIGTERM, cleanup);
1930         signal(SIGXCPU, cleanup);
1931         signal(SIGXFSZ, cleanup);
1932         signal(SIGVTALRM, cleanup);
1933         signal(SIGUSR1, cleanup);
1934         signal(SIGUSR2, cleanup);
1935         signal(SIGBUS, segv);
1936         signal(SIGSEGV, segv);
1937
1938         initstate(seed, state, 256);
1939         setstate(state);
1940
1941         open_test_files(argv, argc);
1942
1943         strncat(goodfile, dirpath ? my_basename(fname) : fname, 256);
1944         strncat(goodfile, ".fsxgood", PATH_MAX - strlen(goodfile) - 1);
1945         fsxgoodfd = open(goodfile, O_RDWR | O_CREAT | O_TRUNC, 0666);
1946         if (fsxgoodfd < 0) {
1947                 prterr(goodfile);
1948                 exit(92);
1949         }
1950         strncat(logfile, dirpath ? my_basename(fname) : fname, 256);
1951         strncat(logfile, ".fsxlog", PATH_MAX - strlen(logfile) - 1);
1952         fsxlogf = fopen(logfile, "w");
1953         if (!fsxlogf) {
1954                 prterr(logfile);
1955                 exit(93);
1956         }
1957         if (lite) {
1958                 off_t ret;
1959                 int fd = get_fd();
1960
1961                 maxfilelen = lseek(fd, (off_t)0, SEEK_END);
1962                 file_size = maxfilelen;
1963                 if (file_size == (off_t)-1) {
1964                         prterr(fname);
1965                         warn("%s: lseek eof", __func__);
1966                         exit(94);
1967                 }
1968                 ret = lseek(fd, (off_t)0, SEEK_SET);
1969                 if (ret == (off_t)-1) {
1970                         prterr(fname);
1971                         warn("%s: lseek 0", __func__);
1972                         exit(95);
1973                 }
1974         }
1975         original_buf = (char *)malloc(maxfilelen);
1976         if (!original_buf)
1977                 exit(96);
1978         for (i = 0; i < maxfilelen; i++)
1979                 original_buf[i] = random() % 256;
1980         if (o_direct) {
1981                 int ret;
1982
1983                 ret = posix_memalign((void **)&good_buf, writebdy, maxfilelen);
1984                 if (ret) {
1985                         prt("%s: posix_memalign failed: %s\n", __func__,
1986                             strerror(ret));
1987                         exit(96);
1988                 }
1989
1990                 ret = posix_memalign((void **)&temp_buf, readbdy, maxoplen);
1991                 if (ret) {
1992                         prt("%s: posix_memalign failed: %s\n", __func__,
1993                             strerror(ret));
1994                         exit(97);
1995                 }
1996         } else {
1997                 good_buf = malloc(maxfilelen);
1998                 if (!good_buf) {
1999                         prt("malloc failed.\n");
2000                         exit(98);
2001                 }
2002
2003                 temp_buf = malloc(maxoplen);
2004                 if (!temp_buf) {
2005                         prt("malloc failed.\n");
2006                         exit(99);
2007                 }
2008         }
2009         memset(good_buf, 0, maxfilelen);
2010         memset(temp_buf, 0, maxoplen);
2011
2012         if (lite) {     /* zero entire existing file */
2013                 ssize_t written;
2014                 int fd = get_fd();
2015
2016                 written = write(fd, good_buf, (size_t)maxfilelen);
2017                 if (written != maxfilelen) {
2018                         if (written == -1) {
2019                                 prterr(fname);
2020                                 warn("%s: error on write", __func__);
2021                         } else {
2022                                 warn("%s: short write, 0x%x bytes instead of 0x%lx\n",
2023                                      __func__, (unsigned int)written,
2024                                      maxfilelen);
2025                         }
2026                         exit(98);
2027                 }
2028         } else {
2029                 check_trunc_hack();
2030         }
2031
2032         if (fallocate_calls)
2033                 fallocate_calls = test_fallocate(0);
2034
2035         if (punch_hole_calls)
2036                 punch_hole_calls = test_fallocate(FALLOC_FL_PUNCH_HOLE |
2037                                                   FALLOC_FL_KEEP_SIZE);
2038
2039         if (zero_range_calls)
2040                 zero_range_calls = test_fallocate(FALLOC_FL_ZERO_RANGE);
2041
2042         fl_keep_size = test_fallocate(FALLOC_FL_KEEP_SIZE);
2043
2044         while (numops == -1 || numops--)
2045                 test();
2046
2047         close_test_files();
2048         prt("All operations completed A-OK!\n");
2049
2050         free(original_buf);
2051         free(good_buf);
2052         free(temp_buf);
2053
2054         return 0;
2055 }