Whamcloud - gitweb
LU-10994 test: remove netdisk from obdfilter-survey
[fs/lustre-release.git] / lustre / tests / fsx.c
1 /*
2  * Copyright (C) 1991, NeXT Computer, Inc.  All Rights Reserverd.
3  * Copyright (c) 1998-2001 Apple Computer, Inc. All rights reserved.
4  *
5  * Copyright (c) 2012, Intel Corporation.
6  *
7  * @APPLE_LICENSE_HEADER_START@
8  *
9  * The contents of this file constitute Original Code as defined in and
10  * are subject to the Apple Public Source License Version 1.1 (the
11  * "License").  You may not use this file except in compliance with the
12  * License.  Please obtain a copy of the License at
13  * http://www.apple.com/publicsource and read it before using this file.
14  *
15  * This Original Code and all software distributed under the License are
16  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
20  * License for the specific language governing rights and limitations
21  * under the License.
22  *
23  * @APPLE_LICENSE_HEADER_END@
24  *
25  *      File:   fsx.c
26  *      Author: Avadis Tevanian, Jr.
27  *
28  *      File system exerciser.
29  *
30  *      Rewrite and enhancements 1998-2001 Conrad Minshall -- conrad@mac.com
31  *
32  *      Various features from Joe Sokol, Pat Dirks, and Clark Warner.
33  *
34  *      Small changes to work under Linux -- davej.
35  *
36  *      Sundry porting patches from Guy Harris 12/2001
37  * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.1 2001/12/20 04:15:57 jkh Exp $
38  *
39  *      Checks for mmap last-page zero fill.
40  *
41  *      Add multi-file testing feature -- Zach Brown <zab@clusterfs.com>
42  *
43  *      Add random preallocation calls - Eric Sandeen <sandeen@redhat.com>
44  *
45  * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.2 2003/04/23 23:42:23 jkh Exp $
46  * $DragonFly: src/test/stress/fsx/fsx.c,v 1.2 2005/05/02 19:31:56 dillon Exp $
47  */
48 #ifndef _GNU_SOURCE
49 #define _GNU_SOURCE
50 #endif
51
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 #if defined(_UWIN) || defined(__linux__)
55 # include <sys/param.h>
56 # include <limits.h>
57 # include <time.h>
58 # include <strings.h>
59 #endif
60 #include <sys/time.h>
61 #include <fcntl.h>
62 #include <sys/mman.h>
63 #ifndef MAP_FILE
64 # define MAP_FILE 0
65 #endif
66 #include <limits.h>
67 #include <signal.h>
68 #include <stdio.h>
69 #include <stddef.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <unistd.h>
73 #include <stdarg.h>
74 #include <errno.h>
75 #include <libcfs/util/string.h>
76 #include <setjmp.h>
77
78 #include <linux/lustre/lustre_idl.h>
79 #include <lustre/lustreapi.h>
80
81 /*
82  * Each test run will work with one or more separate file descriptors for the
83  * same file.  This allows testing cache coherency across multiple mountpoints
84  * of the same network filesystem on a single client.
85  */
86 struct test_file {
87         char *path;
88         int fd;
89         int o_direct;
90 } *test_files = NULL, *tf;
91
92 int num_test_files;
93
94 enum fd_iteration_policy {
95         FD_SINGLE,
96         FD_ROTATE,
97         FD_RANDOM,
98 };
99
100 int fd_policy = FD_RANDOM;
101 int fd_last;
102
103 /*
104  *      A log entry is an operation and a bunch of arguments.
105  */
106
107 struct log_entry {
108         int operation;
109         int args[3];
110         struct timeval tv;
111         const struct test_file *tf;
112 };
113
114 #define LOGSIZE 100000
115
116 struct log_entry oplog[LOGSIZE]; /* the log */
117 int logptr; /* current position in log */
118 int logcount; /* total ops */
119 int jmpbuf_good;
120 jmp_buf jmpbuf;
121
122 unsigned int mirror_ids[LUSTRE_MIRROR_COUNT_MAX];
123 /*
124  * Define operations
125  */
126
127 /* common operations */
128 #define OP_READ         0
129 #define OP_WRITE        1
130 #define OP_MAPREAD      2
131 #define OP_MAPWRITE     3
132 #define OP_MAX_LITE     4
133
134 /* !lite operations */
135 #define OP_TRUNCATE             4
136 #define OP_FALLOCATE            5
137 #define OP_PUNCH_HOLE           6
138 #define OP_ZERO_RANGE           7
139 #define OP_CLOSEOPEN            8
140 #define OP_MIRROR_OPS           9
141 #define OP_MAX_FULL             10
142
143 #define MIRROR_EXTEND 0
144 #define MIRROR_SPLIT 1
145 #define MIRROR_RESYNC 2
146 #define MIRROR_OPS 3
147
148 char *mirror_op_str[] = {
149         [MIRROR_EXTEND] = "MIRROR_EXTEND",
150         [MIRROR_SPLIT]  = "MIRROR_SPLIT",
151         [MIRROR_RESYNC] = "MIRROR_RESYNC",
152 };
153
154 #define OP_SKIPPED 101
155 #define OP_DIRECT O_DIRECT
156
157 #ifndef FALLOC_FL_PUNCH_HOLE
158 #define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
159 #endif
160
161 #ifndef FALLOC_FL_KEEP_SIZE
162 #define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
163 #endif
164
165 #ifndef FALLOC_FL_ZERO_RANGE
166 #define FALLOC_FL_ZERO_RANGE 0x10 /* convert range to zeros */
167 #endif
168
169
170 char *original_buf; /* a pointer to the original data */
171 char *good_buf; /* a pointer to the correct data */
172 char *temp_buf; /* a pointer to the current data */
173 char *fname; /* name of our test file */
174 char logfile[PATH_MAX]; /* name of our log file */
175 char goodfile[PATH_MAX]; /* name of our test file */
176
177 struct timeval tv; /* time current operation started */
178 off_t file_size;
179 off_t biggest;
180 char state[256];
181 unsigned long testcalls; /* calls to function "test" */
182
183 long simulatedopcount;                  /* -b flag */
184 int closeprob;                          /* -c flag */
185 int debug ;                             /* -d flag */
186 long debugstart;                        /* -D flag */
187 int flush;                              /* -f flag */
188 int do_fsync;                           /* -y flag */
189 long maxfilelen = 256 * 1024;           /* -l flag */
190 int sizechecks = 1;                     /* -n flag disables them */
191 int maxoplen = 64 * 1024;               /* -o flag */
192 int quiet;                              /* -q flag */
193 long progressinterval;                  /* -p flag */
194 int readbdy = 1;                        /* -r flag */
195 int style;                              /* -s flag */
196 int truncbdy = 1;                       /* -t flag */
197 int writebdy = 1;                       /* -w flag */
198 long monitorstart = -1;                 /* -m flag */
199 long monitorend = -1;                   /* -m flag */
200 long flrmode;                           /* -M flag */
201 int lite;                               /* -L flag */
202 long numops = -1;                       /* -N flag */
203 int randomoplen = 1;                    /* -O flag disables it */
204 int seed = 1;                           /* -S flag */
205 int mapped_writes = 1;                  /* -W flag disables */
206 int fallocate_calls = 1;                /* -F flag disables */
207 int punch_hole_calls = 1;               /* -H flag disables */
208 int zero_range_calls = 1;               /* -z flag disables */
209 int mapped_reads = 1;                   /* -R flag disables it */
210 int fsxgoodfd;
211 int o_direct;                           /* -Z */
212 int fl_keep_size;
213
214 int page_size;
215 int page_mask;
216
217 FILE *fsxlogf;
218 int badoff = -1;
219
220 void
221 vwarnc(code, fmt, ap)
222         int code;
223         const char *fmt;
224         va_list ap;
225 {
226         fprintf(stderr, "fsx: ");
227         if (fmt) {
228                 vfprintf(stderr, fmt, ap);
229                 fprintf(stderr, ": ");
230         }
231         fprintf(stderr, "%s\n", strerror(code));
232 }
233
234 void
235 __attribute__((format(__printf__, 1, 2)))
236 warn(const char *fmt, ...)
237 {
238         va_list ap;
239
240         va_start(ap, fmt);
241         vwarnc(errno, fmt, ap);
242         va_end(ap);
243 }
244
245 void
246 __attribute__((format(__printf__, 1, 2)))
247 prt(char *fmt, ...)
248 {
249         va_list args;
250
251         va_start(args, fmt);
252         vfprintf(stdout, fmt, args);
253         va_end(args);
254
255         if (fsxlogf) {
256                 va_start(args, fmt);
257                 vfprintf(fsxlogf, fmt, args);
258                 va_end(args);
259         }
260 }
261
262 /*
263  * prterr() is now a macro. It internally calls ptrerr_func()
264  * which transparently handles passing of function name.
265  * This version also keeps checkpatch happy.
266  */
267 void
268 ptrerr_func(const char *func, const char *prefix)
269 {
270         prt("%s: %s%s%s\n", func, prefix, prefix ? ": " : "", strerror(errno));
271 }
272 #define prterr(prefix) ptrerr_func(__func__, prefix)
273
274 void
275 log4(int operation, int arg0, int arg1, int arg2)
276 {
277         struct log_entry *le;
278
279         le = &oplog[logptr];
280         le->operation = operation;
281         le->args[0] = arg0;
282         le->args[1] = arg1;
283         le->args[2] = arg2;
284         gettimeofday(&tv, NULL);
285         le->tv = tv;
286         le->tf = tf;
287         logptr++;
288         logcount++;
289         if (logptr >= LOGSIZE)
290                 logptr = 0;
291 }
292
293 const char *
294 fill_tf_buf(const struct test_file *tf)
295 {
296         static int max_tf_len;
297         static char tf_buf[32];
298
299         if (fd_policy == FD_SINGLE)
300                 return "";
301
302         if (max_tf_len == 0)
303                 max_tf_len = scnprintf(tf_buf, sizeof(tf_buf) - 1,
304                                       "%u", num_test_files - 1);
305
306         snprintf(tf_buf, sizeof(tf_buf), "[%0*lu]", max_tf_len,
307                 (unsigned long)(tf - test_files));
308
309         return tf_buf;
310 }
311
312 void
313 logdump(void)
314 {
315         int i, count, down;
316         struct log_entry *lp;
317         char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
318
319         prt("LOG DUMP (%d total operations):\n", logcount);
320         if (logcount < LOGSIZE) {
321                 i = 0;
322                 count = logcount;
323         } else {
324                 i = logptr;
325                 count = LOGSIZE;
326         }
327         for ( ; count > 0; count--) {
328                 int opnum;
329
330                 opnum = i + 1 + (logcount / LOGSIZE) * LOGSIZE;
331                 lp = &oplog[i];
332                 prt("%d%s: %lu.%06u ", opnum, fill_tf_buf(lp->tf),
333                     lp->tv.tv_sec, (int)lp->tv.tv_usec);
334
335                 switch (lp->operation) {
336                 case OP_MAPREAD:
337                         prt("MAPREAD  0x%05x thru 0x%05x (0x%05x bytes)",
338                             lp->args[0], lp->args[0] + lp->args[1] - 1,
339                             lp->args[1]);
340                         if (badoff >= lp->args[0] && badoff <
341                                                      lp->args[0] + lp->args[1])
342                                 prt("\t***RRRR***");
343                         break;
344                 case OP_MAPWRITE:
345                         prt("MAPWRITE 0x%05x thru 0x%05x (0x%05x bytes)",
346                             lp->args[0], lp->args[0] + lp->args[1] - 1,
347                             lp->args[1]);
348                         if (badoff >= lp->args[0] && badoff <
349                                                      lp->args[0] + lp->args[1])
350                                 prt("\t******WWWW");
351                         break;
352                 case OP_READ:
353                 case OP_READ + OP_DIRECT:
354                         prt("READ%s  0x%05x thru 0x%05x (0x%05x bytes)",
355                             lp->operation & OP_DIRECT ? "_OD" : "   ",
356                             lp->args[0], lp->args[0] + lp->args[1] - 1,
357                             lp->args[1]);
358                         if (badoff >= lp->args[0] &&
359                             badoff < lp->args[0] + lp->args[1])
360                                 prt("\t***RRRR***");
361                         break;
362                 case OP_WRITE:
363                 case OP_WRITE + OP_DIRECT:
364                         prt("WRITE%s 0x%05x thru 0x%05x (0x%05x bytes)",
365                             lp->operation & OP_DIRECT ? "_OD" : "   ",
366                             lp->args[0], lp->args[0] + lp->args[1] - 1,
367                             lp->args[1]);
368                         if (lp->args[0] > lp->args[2])
369                                 prt(" HOLE");
370                         else if (lp->args[0] + lp->args[1] > lp->args[2])
371                                 prt(" EXTEND");
372                         if ((badoff >= lp->args[0] || badoff >= lp->args[2]) &&
373                             badoff < lp->args[0] + lp->args[1])
374                                 prt("\t***WWWW");
375                         break;
376                 case OP_TRUNCATE:
377                         down = lp->args[0] < lp->args[1];
378                         prt("TRUNC%s 0x%05x to 0x%05x",
379                             down ? "_DN" : "_UP", lp->args[1], lp->args[0]);
380                         if (badoff >= lp->args[!down] &&
381                             badoff < lp->args[!!down])
382                                 prt("\t******TTTT");
383                         break;
384                 case OP_FALLOCATE:
385                         /* 0: offset 1: length 2: where alloced */
386                         prt("FALLOC   0x%05x thru 0x%05x\t(0x%05x bytes)%s",
387                             lp->args[0], lp->args[0] + lp->args[1],
388                             lp->args[1], falloc_type[lp->args[2]]);
389                         if (badoff >= lp->args[0] &&
390                             badoff < lp->args[0] + lp->args[1])
391                                 prt("\t******FFFF");
392                         break;
393                 case OP_PUNCH_HOLE:
394                         prt("PUNCH    0x%05x thru 0x%05x\t(0x%05x bytes)",
395                             lp->args[0], lp->args[0] + lp->args[1] - 1,
396                             lp->args[1]);
397                         if (badoff >= lp->args[0] && badoff <
398                                                      lp->args[0] + lp->args[1])
399                                 prt("\t******PPPP");
400                         break;
401                 case OP_ZERO_RANGE:
402                         prt("ZERO     0x%05x thru 0x%05x\t(0x%05x bytes)",
403                             lp->args[0], lp->args[0] + lp->args[1] - 1,
404                             lp->args[1]);
405                         if (badoff >= lp->args[0] && badoff <
406                                                      lp->args[0] + lp->args[1])
407                                 prt("\t******ZZZZ");
408                         break;
409                 case OP_CLOSEOPEN:
410                 case OP_CLOSEOPEN + OP_DIRECT:
411                         prt("CLOSE/OPEN%s",
412                             lp->operation & OP_DIRECT ? "_OD" : "   ");
413                         break;
414                 case OP_MIRROR_OPS: {
415                         prt("%s ", mirror_op_str[lp->args[0]]);
416                         if (lp->args[0] == MIRROR_EXTEND)
417                                 prt("to %d mirrors", lp->args[1] + 1);
418                         else if (lp->args[0] == MIRROR_SPLIT)
419                                 prt("mirror %d to %d mirrors", lp->args[2],
420                                     lp->args[1] - 1);
421                         else if (lp->args[0] == MIRROR_RESYNC)
422                                 prt("%d mirrors", lp->args[1]);
423                         break;
424                 }
425                 case OP_SKIPPED:
426                         prt("SKIPPED (no operation)");
427                         break;
428                 default:
429                         prt("BOGUS LOG ENTRY (operation code = %d)!",
430                             lp->operation);
431                 }
432                 prt("\n");
433                 i++;
434                 if (i == LOGSIZE)
435                         i = 0;
436         }
437 }
438
439 void
440 save_buffer(char *buffer, off_t bufferlength, int fd)
441 {
442         off_t ret;
443         ssize_t byteswritten;
444
445         if (fd <= 0 || bufferlength == 0)
446                 return;
447
448         if (bufferlength > INT_MAX) {
449                 prt("fsx flaw: overflow in %s\n", __func__);
450                 exit(67);
451         }
452         if (lite) {
453                 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
454
455                 if (size_by_seek == (off_t)-1) {
456                         prterr("lseek eof");
457                 } else if (bufferlength > size_by_seek) {
458                         warn("%s: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n",
459                              __func__, (unsigned long long)size_by_seek,
460                              (unsigned long long)bufferlength);
461                         bufferlength = size_by_seek;
462                 }
463         }
464
465         ret = lseek(fd, (off_t)0, SEEK_SET);
466         if (ret == (off_t)-1)
467                 prterr("lseek 0");
468
469         byteswritten = write(fd, buffer, (size_t)bufferlength);
470         if (byteswritten != bufferlength) {
471                 if (byteswritten == -1)
472                         prterr("write");
473                 else
474                         warn("%s: short write, 0x%x bytes instead of 0x%llx\n",
475                              __func__, (unsigned int)byteswritten,
476                              (unsigned long long)bufferlength);
477         }
478 }
479
480 void
481 report_failure(int status)
482 {
483         logdump();
484         prt("Using seed %d\n", seed);
485
486         if (fsxgoodfd) {
487                 if (good_buf) {
488                         save_buffer(good_buf, file_size, fsxgoodfd);
489                         prt("Correct content saved for comparison\n");
490                         prt("(maybe hexdump \"%s\" vs \"%s\")\n",
491                             fname, goodfile);
492                 }
493                 close(fsxgoodfd);
494         }
495         exit(status);
496 }
497
498 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
499                       *(((unsigned char *)(cp)) + 1)))
500
501 void
502 check_buffers(unsigned int offset, unsigned int size)
503 {
504         unsigned char c, t;
505         unsigned int i = 0;
506         unsigned int n = 0;
507         unsigned int op = 0;
508         unsigned int bad = 0;
509
510         if (memcmp(good_buf + offset, temp_buf, size) != 0) {
511                 prt("READ BAD DATA: offset = 0x%x, size = 0x%x\n",
512                     offset, size);
513                 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
514                 while (size > 0) {
515                         c = good_buf[offset];
516                         t = temp_buf[i];
517                         if (c != t) {
518                                 if (n == 0) {
519                                         bad = short_at(&temp_buf[i]);
520                                         prt("%#07x\t%#06x\t%#06x", offset,
521                                             short_at(&good_buf[offset]), bad);
522                                         op = temp_buf[offset & 1 ? i + 1 : i];
523                                 }
524                                 n++;
525                                 badoff = offset;
526                         }
527                         offset++;
528                         i++;
529                         size--;
530                 }
531                 if (n) {
532                         prt("\t%#7x\n", n);
533                         if (bad)
534                                 prt("operation# (mod 256) for the bad data may be %u\n",
535                                     ((unsigned int)op & 0xff));
536                         else
537                                 prt("operation# (mod 256) for the bad data unknown, check HOLE and EXTEND ops\n");
538                 } else {
539                         prt("????????????????\n");
540                 }
541                 report_failure(110);
542         }
543 }
544
545 struct test_file *
546 get_tf(void)
547 {
548         unsigned int index = 0;
549
550         switch (fd_policy) {
551         case FD_ROTATE:
552                 index = fd_last++;
553                 break;
554         case FD_RANDOM:
555                 index = random();
556                 break;
557         case FD_SINGLE:
558                 index = 0;
559                 break;
560         default:
561                 prt("unknown policy");
562                 exit(1);
563                 break;
564         }
565         return &test_files[index % num_test_files];
566 }
567
568 void
569 assign_fd_policy(char *policy)
570 {
571         if (!strcmp(policy, "random")) {
572                 fd_policy = FD_RANDOM;
573         } else if (!strcmp(policy, "rotate")) {
574                 fd_policy = FD_ROTATE;
575         } else {
576                 prt("unknown -I policy: '%s'\n", policy);
577                 exit(1);
578         }
579 }
580
581 int
582 get_fd(void)
583 {
584         struct test_file *tf = get_tf();
585
586         return tf->fd;
587 }
588
589 static const char *my_basename(const char *path)
590 {
591         char *c = strrchr(path, '/');
592
593         return c ? c++ : path;
594 }
595
596 void
597 open_test_files(char **argv, int argc)
598 {
599         struct test_file *tf;
600         int i;
601
602         num_test_files = argc;
603         if (num_test_files == 1)
604                 fd_policy = FD_SINGLE;
605
606         test_files = calloc(num_test_files, sizeof(*test_files));
607         if (!test_files) {
608                 prterr("reallocating space for test files");
609                 exit(1);
610         }
611
612         for (i = 0, tf = test_files; i < num_test_files; i++, tf++) {
613                 tf->path = argv[i];
614                 tf->o_direct = (random() % (o_direct + 1)) ? OP_DIRECT : 0;
615                 tf->fd = open(tf->path,
616                               O_RDWR | (lite ? 0 : O_CREAT | O_TRUNC) |
617                               tf->o_direct, 0666);
618                 if (tf->fd < 0) {
619                         prterr(tf->path);
620                         exit(91);
621                 }
622         }
623
624         if (quiet || fd_policy == FD_SINGLE)
625                 return;
626
627         for (i = 0, tf = test_files; i < num_test_files; i++, tf++)
628                 prt("fd %d: %s\n", i, tf->path);
629 }
630
631 void
632 close_test_files(void)
633 {
634         int i;
635         struct test_file *tf;
636
637         for (i = 0, tf = test_files; i < num_test_files; i++, tf++) {
638                 if (close(tf->fd)) {
639                         prterr("close");
640                         report_failure(99);
641                 }
642         }
643 }
644
645 void
646 check_size(void)
647 {
648         struct stat statbuf;
649         off_t size_by_seek;
650         int fd = get_fd();
651
652         if (fstat(fd, &statbuf)) {
653                 prterr("fstat");
654                 statbuf.st_size = -1;
655         }
656         size_by_seek = lseek(fd, (off_t)0, SEEK_END);
657         if (file_size != statbuf.st_size || file_size != size_by_seek) {
658                 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
659                     (unsigned long long)file_size,
660                     (unsigned long long)statbuf.st_size,
661                     (unsigned long long)size_by_seek);
662                 report_failure(120);
663         }
664 }
665
666 void
667 check_trunc_hack(void)
668 {
669         struct stat statbuf;
670         int fd = get_fd();
671
672         /* should not ignore ftruncate(2)'s return value */
673         if (ftruncate(fd, (off_t)0) < 0) {
674                 prterr("trunc_hack: ftruncate(0)");
675                 exit(1);
676         }
677         if (ftruncate(fd, (off_t)100000) < 0) {
678                 prterr("trunc_hack: ftruncate(100000)");
679                 exit(1);
680         }
681         if (fstat(fd, &statbuf)) {
682                 prterr("trunc_hack: fstat");
683                 statbuf.st_size = -1;
684         }
685         if (statbuf.st_size != (off_t)100000) {
686                 prt("no extend on truncate! not posix!\n");
687                 exit(130);
688         }
689         if (ftruncate(fd, 0) < 0) {
690                 prterr("trunc_hack: ftruncate(0) (2nd call)");
691                 exit(1);
692         }
693 }
694
695 void
696 output_line(struct test_file *tf, int op, unsigned int offset,
697             unsigned int size)
698 {
699         char *ops[] = {
700                 [OP_READ] = "read",
701                 [OP_WRITE] = "write",
702                 [OP_TRUNCATE] = "trunc from",
703                 [OP_MAPREAD] = "mapread",
704                 [OP_MAPWRITE] = "mapwrite",
705                 [OP_READ + OP_DIRECT] = "read_OD",
706                 [OP_WRITE + OP_DIRECT] = "write_OD",
707                 [OP_FALLOCATE] = "fallocate",
708                 [OP_PUNCH_HOLE] = "punch from",
709         };
710
711         /* W. */
712         if (!(!quiet &&
713             ((progressinterval && testcalls % progressinterval == 0) ||
714             (debug && (monitorstart == -1 ||
715             (offset + size > monitorstart &&
716             (monitorend == -1 || offset <= monitorend)))))))
717                 return;
718
719         prt("%06lu%s %lu.%06u %-10s %#08x %s %#08x\t(0x%x bytes)\n",
720             testcalls, fill_tf_buf(tf), tv.tv_sec, (int)tv.tv_usec,
721             ops[op], offset, op == OP_TRUNCATE || op == OP_PUNCH_HOLE ?
722             " to " : "thru", offset + size - 1,
723              (int)size < 0 ? -(int)size : size);
724 }
725
726 void
727 mirror_output_line(struct test_file *tf, int op, int mirrors, int id)
728 {
729         if (!(!quiet &&
730               ((progressinterval && testcalls % progressinterval == 0) ||
731                (debug && (monitorstart == -1)))))
732                 return;
733
734         prt("%06lu %lu.%06u %-10s ",
735             testcalls, tv.tv_sec, (int)tv.tv_usec, mirror_op_str[op]);
736
737         switch (op) {
738         case MIRROR_EXTEND:
739                 prt("to %d mirrors\n", mirrors + 1);
740                 break;
741         case MIRROR_SPLIT:
742                 prt("mirror %d to %d mirrors\n", id, mirrors - 1);
743                 break;
744         case MIRROR_RESYNC:
745                 prt("%d mirrors\n", mirrors);
746                 break;
747         }
748 }
749
750 void output_debug(unsigned int offset, unsigned int size, const char *what)
751 {
752         struct timeval t;
753
754         if (!quiet && (debug > 1 && (monitorstart == -1 ||
755             (offset + size >= monitorstart &&
756              (monitorend == -1 || offset <= monitorend))))) {
757                 gettimeofday(&t, NULL);
758                 prt("       %lu.%06u %s\n", t.tv_sec, (int)t.tv_usec, what);
759         }
760 }
761
762 void
763 doflush(unsigned int offset, unsigned int size)
764 {
765         unsigned int pg_offset;
766         unsigned int map_size;
767         char *p;
768         struct test_file *tf = get_tf();
769         int fd = tf->fd;
770
771         if (tf->o_direct)
772                 return;
773
774         pg_offset = offset & page_mask;
775         map_size  = pg_offset + size;
776
777         p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
778                          MAP_FILE | MAP_SHARED, fd,
779                          (off_t)(offset - pg_offset));
780         if (p == (char *)-1) {
781                 prterr("mmap");
782                 report_failure(202);
783         }
784         if (msync(p, map_size, MS_INVALIDATE) != 0) {
785                 prterr("msync");
786                 report_failure(203);
787         }
788         if (munmap(p, map_size) != 0) {
789                 prterr("munmap");
790                 report_failure(204);
791         }
792         output_debug(offset, size, "flush done");
793 }
794
795 void
796 doread(unsigned int offset, unsigned int size)
797 {
798         off_t ret;
799         unsigned int iret;
800         struct test_file *tf = get_tf();
801         int fd = tf->fd;
802
803         offset -= offset % readbdy;
804         if (tf->o_direct)
805                 size -= size % readbdy;
806
807         if (size == 0) {
808                 if (!quiet && testcalls > simulatedopcount && !tf->o_direct)
809                         prt("skipping zero size read\n");
810                 log4(OP_SKIPPED, OP_READ, offset, size);
811                 return;
812         }
813         if (size + offset > file_size) {
814                 if (!quiet && testcalls > simulatedopcount)
815                         prt("skipping seek/read past end of file\n");
816                 log4(OP_SKIPPED, OP_READ, offset, size);
817                 return;
818         }
819
820         log4(OP_READ + tf->o_direct, offset, size, 0);
821
822         if (testcalls <= simulatedopcount)
823                 return;
824
825         output_line(tf, OP_READ + tf->o_direct, offset, size);
826
827         ret = lseek(fd, (off_t)offset, SEEK_SET);
828         if (ret == (off_t)-1) {
829                 prterr("lseek");
830                 report_failure(140);
831         }
832         iret = read(fd, temp_buf, size);
833         output_debug(offset, size, "read done");
834         if (iret != size) {
835                 if (iret == -1)
836                         prterr("read");
837                 else
838                         prt("short read: 0x%x bytes instead of 0x%x\n",
839                             iret, size);
840                 report_failure(141);
841         }
842         check_buffers(offset, size);
843 }
844
845 void
846 check_eofpage(char *s, unsigned int offset, char *p, int size)
847 {
848         long last_page, should_be_zero;
849
850         if (offset + size <= (file_size & ~page_mask))
851                 return;
852         /*
853          * we landed in the last page of the file
854          * test to make sure the VM system provided 0's
855          * beyond the true end of the file mapping
856          * (as required by mmap def in 1996 posix 1003.1)
857          */
858         last_page = ((long)p + (offset & page_mask) + size) & ~page_mask;
859
860         for (should_be_zero = last_page + (file_size & page_mask);
861              should_be_zero < last_page + page_size;
862              should_be_zero++)
863                 if (*(char *)should_be_zero) {
864                         prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%lx is 0x%04x\n",
865                             s, (long long)file_size - 1,
866                             should_be_zero & page_mask,
867                             short_at(should_be_zero));
868                         report_failure(205);
869                 }
870 }
871
872 void
873 domapread(unsigned int offset, unsigned int size)
874 {
875         unsigned int pg_offset;
876         unsigned int map_size;
877         char *p;
878         int fd;
879
880         offset -= offset % readbdy;
881         tf = get_tf();
882         fd = tf->fd;
883         if (size == 0) {
884                 if (!quiet && testcalls > simulatedopcount)
885                         prt("skipping zero size read\n");
886                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
887                 return;
888         }
889         if (size + offset > file_size) {
890                 if (!quiet && testcalls > simulatedopcount)
891                         prt("skipping seek/read past end of file\n");
892                 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
893                 return;
894         }
895
896         log4(OP_MAPREAD, offset, size, 0);
897
898         if (testcalls <= simulatedopcount)
899                 return;
900
901         output_line(tf, OP_MAPREAD, offset, size);
902
903         pg_offset = offset & page_mask;
904         map_size  = pg_offset + size;
905
906         p = mmap(0, map_size, PROT_READ, MAP_FILE | MAP_SHARED, fd,
907                  (off_t)(offset - pg_offset));
908         if (p == MAP_FAILED) {
909                 prterr("mmap");
910                 report_failure(190);
911         }
912         output_debug(offset, size, "mmap done");
913         if (setjmp(jmpbuf) == 0) {
914                 jmpbuf_good = 1;
915                 memcpy(temp_buf, p + pg_offset, size);
916                 check_eofpage("Read", offset, p, size);
917                 jmpbuf_good = 0;
918         } else {
919                 report_failure(1901);
920         }
921         output_debug(offset, size, "memcpy done");
922         if (munmap(p, map_size) != 0) {
923                 prterr("munmap");
924                 report_failure(191);
925         }
926         output_debug(offset, size, "munmap done");
927
928         check_buffers(offset, size);
929 }
930
931 void
932 gendata(char *original_buf, char *good_buf, unsigned int offset,
933         unsigned int size)
934 {
935         while (size--) {
936                 good_buf[offset] = testcalls % 256;
937                 if (offset % 2)
938                         good_buf[offset] += original_buf[offset];
939                 offset++;
940         }
941 }
942
943 void
944 dowrite(unsigned int offset, unsigned int size)
945 {
946         off_t ret;
947         unsigned int iret;
948         int fd;
949
950         tf = get_tf();
951         fd = tf->fd;
952         offset -= offset % writebdy;
953         if (tf->o_direct)
954                 size -= size % writebdy;
955         if (size == 0) {
956                 if (!quiet && testcalls > simulatedopcount && !tf->o_direct)
957                         prt("skipping zero size write\n");
958                 log4(OP_SKIPPED, OP_WRITE, offset, size);
959                 return;
960         }
961
962         log4(OP_WRITE + tf->o_direct, offset, size, file_size);
963
964         gendata(original_buf, good_buf, offset, size);
965         if (file_size < offset + size) {
966                 if (file_size < offset)
967                         memset(good_buf + file_size, '\0', offset - file_size);
968                 file_size = offset + size;
969                 if (lite) {
970                         warn("Lite file size bug in fsx!");
971                         report_failure(149);
972                 }
973         }
974
975         if (testcalls <= simulatedopcount)
976                 return;
977
978         output_line(tf, OP_WRITE + tf->o_direct, offset, size);
979
980         ret = lseek(fd, (off_t)offset, SEEK_SET);
981         if (ret == (off_t)-1) {
982                 prterr("lseek");
983                 report_failure(150);
984         }
985         iret = write(fd, good_buf + offset, size);
986         output_debug(offset, size, "write done");
987         if (iret != size) {
988                 if (iret == -1)
989                         prterr("write");
990                 else
991                         prt("short write: 0x%x bytes instead of 0x%x\n",
992                             iret, size);
993                 report_failure(151);
994         }
995         if (do_fsync) {
996                 if (fsync(fd)) {
997                         prt("fsync() failed: %s\n", strerror(errno));
998                         report_failure(152);
999                 }
1000                 output_debug(offset, size, "fsync done");
1001         }
1002         if (flush) {
1003                 doflush(offset, size);
1004                 output_debug(offset, size, "flush done");
1005         }
1006 }
1007
1008 void
1009 domapwrite(unsigned int offset, unsigned int size)
1010 {
1011         unsigned int pg_offset;
1012         unsigned int map_size;
1013         off_t cur_filesize;
1014         char *p;
1015         int fd;
1016
1017         tf = get_tf();
1018         fd = tf->fd;
1019         offset -= offset % writebdy;
1020         if (size == 0) {
1021                 if (!quiet && testcalls > simulatedopcount)
1022                         prt("skipping zero size write\n");
1023                 log4(OP_SKIPPED, OP_MAPWRITE, offset, size);
1024                 return;
1025         }
1026         cur_filesize = file_size;
1027
1028         log4(OP_MAPWRITE, offset, size, 0);
1029
1030         gendata(original_buf, good_buf, offset, size);
1031         if (file_size < offset + size) {
1032                 if (file_size < offset)
1033                         memset(good_buf + file_size, '\0', offset - file_size);
1034                 file_size = offset + size;
1035                 if (lite) {
1036                         warn("Lite file size bug in fsx!");
1037                         report_failure(200);
1038                 }
1039         }
1040
1041         if (testcalls <= simulatedopcount)
1042                 return;
1043
1044         output_line(tf, OP_MAPWRITE, offset, size);
1045
1046         if (file_size > cur_filesize) {
1047                 if (ftruncate(fd, file_size) == -1) {
1048                         prterr("ftruncate");
1049                         exit(201);
1050                 }
1051                 output_debug(offset, size, "truncate done");
1052         }
1053         pg_offset = offset & page_mask;
1054         map_size  = pg_offset + size;
1055
1056         p = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED,
1057                  fd, (off_t)(offset - pg_offset));
1058         if (p == MAP_FAILED) {
1059                 prterr("mmap");
1060                 report_failure(202);
1061         }
1062         output_debug(offset, map_size, "mmap done");
1063         if (setjmp(jmpbuf) == 0) {
1064                 jmpbuf_good = 1;
1065                 memcpy(p + pg_offset, good_buf + offset, size);
1066                 if (msync(p, map_size, MS_SYNC) != 0) {
1067                         prterr("msync");
1068                         report_failure(203);
1069                 }
1070                 check_eofpage("Write", offset, p, size);
1071                 jmpbuf_good = 0;
1072         } else {
1073                 report_failure(2021);
1074         }
1075         output_debug(offset, map_size, "msync done");
1076         if (munmap(p, map_size) != 0) {
1077                 prterr("munmap");
1078                 report_failure(204);
1079         }
1080         output_debug(offset, map_size, "munmap done");
1081 }
1082
1083 void
1084 dotruncate(unsigned int size)
1085 {
1086         int oldsize = file_size;
1087         int fd;
1088
1089         tf = get_tf();
1090         fd = tf->fd;
1091         size -= size % truncbdy;
1092         if (size > biggest) {
1093                 biggest = size;
1094                 if (!quiet && testcalls > simulatedopcount)
1095                         prt("truncating to largest ever: 0x%x\n", size);
1096         }
1097
1098         log4(OP_TRUNCATE, size, (unsigned int)file_size, 0);
1099
1100         if (size > file_size)
1101                 memset(good_buf + file_size, '\0', size - file_size);
1102         file_size = size;
1103
1104         if (testcalls <= simulatedopcount)
1105                 return;
1106
1107         output_line(tf, OP_TRUNCATE, oldsize, size - oldsize);
1108
1109         if (ftruncate(fd, (off_t)size) == -1) {
1110                 prt("ftruncate: 0x%x\n", size);
1111                 prterr("ftruncate");
1112                 report_failure(160);
1113         }
1114         output_debug(size, 0, "truncate done");
1115 }
1116
1117 void
1118 do_punch_hole(unsigned int offset, unsigned int length)
1119 {
1120         int max_offset = 0;
1121         int max_len = 0;
1122         int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
1123         int fd;
1124
1125         tf = get_tf();
1126         fd = tf->fd;
1127         if (length == 0) {
1128                 if (!quiet && testcalls > simulatedopcount) {
1129                         prt("skipping zero length punch hole\n");
1130                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
1131                 }
1132                 return;
1133         }
1134
1135         if (file_size <= (loff_t)offset) {
1136                 if (!quiet && testcalls > simulatedopcount) {
1137                         prt("skipping hole punch off the end of the file\n");
1138                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
1139                 }
1140                 return;
1141         }
1142
1143         log4(OP_PUNCH_HOLE, offset, length, 0);
1144
1145         if (testcalls <= simulatedopcount)
1146                 return;
1147
1148         output_line(tf, OP_PUNCH_HOLE, offset, length);
1149         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1150                 prt("punch hole: %x to %x\n", offset, length);
1151                 prterr("fallocate");
1152                 report_failure(161);
1153         }
1154         output_debug(offset, length, "zero_range done");
1155
1156         max_offset = offset < file_size ? offset : file_size;
1157         max_len = max_offset + length <= file_size ? length :
1158                         file_size - max_offset;
1159         memset(good_buf + max_offset, '\0', max_len);
1160 }
1161
1162 void
1163 do_zero_range(unsigned int offset, unsigned int length)
1164 {
1165         unsigned int end_offset;
1166         int mode = FALLOC_FL_ZERO_RANGE;
1167         int keep_size;
1168         int fd;
1169
1170         tf = get_tf();
1171         fd = tf->fd;
1172         if (length == 0) {
1173                 if (!quiet && testcalls > simulatedopcount) {
1174                         prt("skipping zero length zero range\n");
1175                         log4(OP_SKIPPED, OP_ZERO_RANGE, offset, length);
1176                 }
1177                 return;
1178         }
1179
1180         keep_size = random() % 2;
1181
1182         end_offset = keep_size ? 0 : offset + length;
1183
1184         if (end_offset > biggest) {
1185                 biggest = end_offset;
1186                 if (!quiet && testcalls > simulatedopcount)
1187                         prt("zero_range to largest ever: 0x%x\n", end_offset);
1188         }
1189
1190         /*
1191          * last arg matches fallocate string array index in logdump:
1192          * 0: allocate past EOF
1193          * 1: extending prealloc
1194          * 2: interior prealloc
1195          */
1196         log4(OP_ZERO_RANGE, offset, length,
1197              (end_offset > file_size) ? (keep_size ? 0 : 1) : 2);
1198
1199         if (testcalls <= simulatedopcount)
1200                 return;
1201
1202         output_line(tf, OP_TRUNCATE, offset, length);
1203
1204         if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1205                 prt("pzero range: %x to %x\n", offset, length);
1206                 prterr("fallocate");
1207                 report_failure(161);
1208         }
1209         output_debug(offset, length, "zero_range done");
1210
1211         memset(good_buf + offset, '\0', length);
1212 }
1213
1214 /*
1215  * fallocate is basically a no-op unless extending,
1216  * then a lot like a truncate
1217  */
1218 void
1219 do_preallocate(unsigned int offset, unsigned int length)
1220 {
1221         off_t end_offset;
1222         int keep_size;
1223         int fd;
1224         struct stat statbufs;
1225
1226         tf = get_tf();
1227         fd = tf->fd;
1228         if (length == 0) {
1229                 if (!quiet && testcalls > simulatedopcount)
1230                         prt("skipping zero length fallocate\n");
1231                 log4(OP_SKIPPED, OP_FALLOCATE, offset, length);
1232                 return;
1233         }
1234
1235         keep_size = fl_keep_size && (random() % 2);
1236
1237         end_offset = offset + length;
1238         if (end_offset > biggest) {
1239                 biggest = end_offset;
1240                 if (!quiet && testcalls > simulatedopcount)
1241                         prt("fallocating to largest ever: 0x%jx\n", end_offset);
1242         }
1243
1244         /*
1245          * last arg matches fallocate string array index in logdump:
1246          * 0: allocate past EOF
1247          * 1: extending prealloc
1248          * 2: interior prealloc
1249          */
1250         log4(OP_FALLOCATE, offset, length, (end_offset > file_size) ?
1251              (keep_size ? 0 : 1) : 2);
1252
1253         if (end_offset > file_size && !keep_size) {
1254                 memset(good_buf + file_size, '\0', end_offset - file_size);
1255                 file_size = end_offset;
1256         }
1257
1258         if (testcalls <= simulatedopcount)
1259                 return;
1260
1261         fstat(fd, &statbufs);
1262         if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset,
1263                       (loff_t)length) == -1) {
1264                 prt("fallocate: %x to %x\n", offset, length);
1265                 prterr("fallocate");
1266                 report_failure(161);
1267         }
1268         output_line(tf, OP_FALLOCATE, offset, length);
1269         output_debug(offset, length, "fallocate done");
1270 }
1271
1272 void
1273 writefileimage()
1274 {
1275         ssize_t iret;
1276         int fd = get_fd();
1277
1278         if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
1279                 prterr("lseek");
1280                 report_failure(171);
1281         }
1282         iret = write(fd, good_buf, file_size);
1283         if ((off_t)iret != file_size) {
1284                 if (iret == -1)
1285                         prterr("write");
1286                 else
1287                         prt("short write: 0x%lx bytes instead of 0x%llx\n",
1288                             (unsigned long)iret, (unsigned long long)file_size);
1289                 report_failure(172);
1290         }
1291         if (lite ? 0 : ftruncate(fd, file_size) == -1) {
1292                 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
1293                 prterr("ftruncate");
1294                 report_failure(173);
1295         }
1296 }
1297
1298 void
1299 docloseopen(void)
1300 {
1301         int direct = 0;
1302         const char *tf_num = "";
1303
1304         if (testcalls <= simulatedopcount)
1305                 return;
1306
1307         tf = get_tf();
1308         direct = (random() % (o_direct + 1)) ? OP_DIRECT : 0;
1309         log4(OP_CLOSEOPEN + direct, file_size, (unsigned int)file_size, 0);
1310
1311         if (fd_policy != FD_SINGLE)
1312                 tf_num = fill_tf_buf(tf);
1313
1314         if (debug)
1315                 prt("%06lu %lu.%06u %sclose/open%s\n", testcalls, tv.tv_sec,
1316                     (int)tv.tv_usec, tf_num, direct ? "(O_DIRECT)" : "");
1317         if (close(tf->fd))
1318                 report_failure(180);
1319
1320         output_debug(monitorstart, 0, "close done");
1321         tf->o_direct = direct;
1322         tf->fd = open(tf->path, O_RDWR | tf->o_direct, 0);
1323         if (tf->fd < 0) {
1324                 prterr(tf->o_direct ? "open(O_DIRECT)" : "open");
1325                 report_failure(181);
1326         }
1327         output_debug(monitorstart, 0,
1328                      tf->o_direct ? "open(O_DIRECT) done" : "open done");
1329 }
1330
1331 static int
1332 get_mirror_ids(int fd, unsigned int *ids)
1333 {
1334         struct llapi_layout *layout;
1335         uint16_t count;
1336         int rc;
1337
1338         layout = llapi_layout_get_by_fd(fd, 0);
1339         if (layout == NULL)
1340                 return 0;
1341
1342         /* only get mirror count */
1343         rc = llapi_layout_mirror_count_get(layout, &count);
1344         if (rc < 0)
1345                 prt("llapi_layout_mirror_count_get: %d\n", rc);
1346         if (count == 0)
1347                 return 0;
1348
1349         count = 0;
1350
1351         rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
1352         if (rc < 0) {
1353                 prt("llapi_layout_comp_use(USE_FIRST): %d\n", rc);
1354                 goto free;
1355         }
1356
1357         do {
1358                 unsigned int id;
1359
1360                 rc = llapi_layout_mirror_id_get(layout, &id);
1361                 if (rc < 0) {
1362                         prt("llapi_layout_mirror_id_get: %d\n", rc);
1363                         goto free;
1364                 }
1365
1366                 if (!count || ids[count - 1] != id)
1367                         ids[count++] = id;
1368
1369                 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
1370                 if (rc < 0) {
1371                         prt("llapi_layout_comp_use(USE_NEXT): %d\n", rc);
1372                         goto free;
1373                 }
1374         } while (rc == 0);
1375
1376 free:
1377         llapi_layout_free(layout);
1378
1379         return rc < 0 ? rc : count;
1380 }
1381
1382 void
1383 do_mirror_ops(int op)
1384 {
1385         int mirror_count;
1386         char cmd[PATH_MAX * 2];
1387         int i = 0;
1388         int rc;
1389
1390         if (testcalls <= simulatedopcount)
1391                 return;
1392
1393         tf = get_tf();
1394
1395         mirror_count = get_mirror_ids(tf->fd, mirror_ids);
1396         if (mirror_count < 0) {
1397                 prterr("get_mirror_ids");
1398                 report_failure(182);
1399         }
1400
1401         switch (op) {
1402         case MIRROR_EXTEND:
1403                 if (mirror_count == LUSTRE_MIRROR_COUNT_MAX)
1404                         return;
1405                 snprintf(cmd, sizeof(cmd), "lfs mirror extend -N -c-1 %s",
1406                          tf->path);
1407                 break;
1408         case MIRROR_SPLIT:
1409                 if (mirror_count == 0 || mirror_count == 1)
1410                         return;
1411
1412                 i = random() % mirror_count;
1413                 if (i == 0)
1414                         i++;
1415
1416                 snprintf(cmd, sizeof(cmd),
1417                          "lfs mirror split -d --mirror-id=%d %s",
1418                          mirror_ids[i], tf->path);
1419                 break;
1420         case MIRROR_RESYNC:
1421                 if (mirror_count < 2)
1422                         return;
1423
1424                 snprintf(cmd, sizeof(cmd),
1425                          "lfs mirror resync %s", tf->path);
1426                 break;
1427         }
1428
1429         if (close(tf->fd))
1430                 report_failure(183);
1431         output_debug(monitorstart, 0, "close done");
1432
1433         log4(OP_MIRROR_OPS, op, mirror_count, i);
1434
1435         mirror_output_line(tf, op, mirror_count, i);
1436
1437         rc = system(cmd);
1438         if (rc < 0) {
1439                 prt("%s: %d\n", cmd, errno);
1440                 report_failure(184);
1441         } else if (WIFEXITED(rc)) {
1442                 rc = WEXITSTATUS(rc);
1443                 if (rc > 0) {
1444                         prt("%s: %d\n", cmd, rc);
1445                         report_failure(184);
1446                 }
1447         }
1448         output_debug(monitorstart, 0, cmd);
1449
1450         switch (op) {
1451         case MIRROR_SPLIT:
1452                 if (mirror_count == 2)
1453                         break;
1454         case MIRROR_EXTEND:
1455         case MIRROR_RESYNC:
1456                 /* verify mirror */
1457                 snprintf(cmd, sizeof(cmd),
1458                          "lfs mirror verify %s", tf->path);
1459
1460                 rc = system(cmd);
1461                 if (rc < 0) {
1462                         prt("%s: %d\n", cmd, errno);
1463                         report_failure(184);
1464                 } else if (WIFEXITED(rc)) {
1465                         rc = WEXITSTATUS(rc);
1466                         if (rc > 0) {
1467                                 prt("%s: %d\n", cmd, rc);
1468                                 snprintf(cmd, sizeof(cmd),
1469                                          "lfs mirror verify -v %s", tf->path);
1470                                 rc = system(cmd);
1471                                 report_failure(184);
1472                         }
1473                 }
1474         }
1475
1476         output_debug(monitorstart, 0, cmd);
1477
1478         tf->fd = open(tf->path, O_RDWR | tf->o_direct, 0);
1479         if (tf->fd < 0) {
1480                 prterr(tf->o_direct ? "open(O_DIRECT)" : "open");
1481                 report_failure(185);
1482         }
1483         output_debug(monitorstart, 0,
1484                      tf->o_direct ? "open(O_DIRECT) done" : "open done");
1485 }
1486
1487 #define TRIM_OFF_LEN(off, len, size)    \
1488 do {                                    \
1489         if (size)                       \
1490                 (off) %= (size);        \
1491         else                            \
1492                 (off) = 0;              \
1493         if ((off) + (len) > (size))     \
1494                 (len) = (size) - (off); \
1495 } while (0)
1496
1497 void
1498 test(void)
1499 {
1500         unsigned long offset;
1501         unsigned long size = maxoplen;
1502         unsigned long rv = random();
1503         unsigned long op;
1504         int closeopen = 0;
1505
1506         if (simulatedopcount > 0 && testcalls == simulatedopcount)
1507                 writefileimage();
1508
1509         testcalls++;
1510
1511         if (closeprob)
1512                 closeopen = (rv >> 3) < (1 << 28) / closeprob;
1513
1514         if (debugstart > 0 && testcalls >= debugstart)
1515                 debug = 1;
1516
1517         if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
1518                 prt("%lu...\n", testcalls);
1519
1520         offset = random();
1521         if (randomoplen)
1522                 size = random() % (maxoplen + 1);
1523
1524         /* calculate appropriate op to run */
1525         if (lite)
1526                 op = rv % OP_MAX_LITE;
1527         else
1528                 op = rv % OP_MAX_FULL;
1529
1530         switch (op) {
1531         case OP_MAPREAD:
1532                 if (!mapped_reads)
1533                         op = OP_READ;
1534                 break;
1535         case OP_MAPWRITE:
1536                 if (!mapped_writes)
1537                         op = OP_WRITE;
1538                 break;
1539         case OP_FALLOCATE:
1540                 if (!fallocate_calls) {
1541                         log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
1542                         goto out;
1543                 }
1544                 break;
1545         case OP_PUNCH_HOLE:
1546                 if (!punch_hole_calls) {
1547                         log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
1548                         goto out;
1549                 }
1550                 break;
1551         case OP_ZERO_RANGE:
1552                 if (!zero_range_calls) {
1553                         log4(OP_SKIPPED, OP_ZERO_RANGE, offset, size);
1554                         goto out;
1555                 }
1556                 break;
1557         }
1558
1559         switch (op) {
1560         case OP_READ:
1561                 TRIM_OFF_LEN(offset, size, file_size);
1562                 doread(offset, size);
1563                 break;
1564         case OP_WRITE:
1565                 TRIM_OFF_LEN(offset, size, maxfilelen);
1566                 dowrite(offset, size);
1567                 break;
1568         case OP_MAPREAD:
1569                 TRIM_OFF_LEN(offset, size, file_size);
1570                 domapread(offset, size);
1571                 break;
1572         case OP_MAPWRITE:
1573                 TRIM_OFF_LEN(offset, size, maxfilelen);
1574                 domapwrite(offset, size);
1575                 break;
1576         case OP_TRUNCATE:
1577                 if (!style)
1578                         size = random() % maxfilelen;
1579                 dotruncate(size);
1580                 break;
1581         case OP_FALLOCATE:
1582                 TRIM_OFF_LEN(offset, size, maxfilelen);
1583                 do_preallocate(offset, size);
1584                 break;
1585         case OP_PUNCH_HOLE:
1586                 TRIM_OFF_LEN(offset, size, file_size);
1587                 do_punch_hole(offset, size);
1588                 break;
1589         case OP_ZERO_RANGE:
1590                 TRIM_OFF_LEN(offset, size, file_size);
1591                 do_zero_range(offset, size);
1592                 break;
1593         case OP_CLOSEOPEN:
1594                 if (closeopen)
1595                         docloseopen();
1596                 break;
1597         case OP_MIRROR_OPS:
1598                 if (flrmode)
1599                         do_mirror_ops(random() % MIRROR_OPS);
1600                 break;
1601         default:
1602                 prterr("unknown operation %d: Operation not supported");
1603                 report_failure(42);
1604                 break;
1605         }
1606
1607 out:
1608         if (sizechecks && testcalls > simulatedopcount)
1609                 check_size();
1610 }
1611
1612 void
1613 segv(int sig)
1614 {
1615         if (jmpbuf_good) {
1616                 jmpbuf_good = 0;
1617                 longjmp(jmpbuf, 1);
1618         }
1619         report_failure(9999);
1620 }
1621
1622 void
1623 cleanup(sig)
1624         int     sig;
1625 {
1626         if (sig)
1627                 prt("signal %d\n", sig);
1628         prt("testcalls = %lu\n", testcalls);
1629         exit(sig);
1630 }
1631
1632 void
1633 usage(void)
1634 {
1635         fprintf(stdout,
1636                 "usage: fsx [-dfnqFLOW] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [ -I random|rotate ] [-N numops] [-P dirpath] [-S seed] [-Z [prob]] fname [additional paths to fname..]\n"
1637 "       -b opnum: beginning operation number (default 1)\n"
1638 "       -c P: 1 in P chance of file close+open at each op (default infinity)\n"
1639 "       -d: debug output for all operations [-d -d = more debugging]\n"
1640 "       -f flush and invalidate cache after I/O\n"
1641 /* OSX: -d duration: number of hours for the tool to run\n\ */
1642 /* OSX: -e: tests using an extended attribute rather than a file\n\ */
1643 /* OSX: -f forkname: test the named fork of fname\n\ */
1644 /* OSX: -g logpath: path for .fsxlog file\n\ */
1645 /* OSX: -h: write 0s instead of creating holes (i.e. sparse file)\n\ */
1646 /* OSX: -i: interactive mode, hit return before performing each operation\n\ */
1647 "       -l flen: the upper bound on file size (default 262144)\n"
1648 "       -m startop:endop: monitor (print debug output) specified byte range\n"
1649 "          (default 0:infinity)\n"
1650 "       -n: no verifications of file size\n"
1651 "       -o oplen: the upper bound on operation size (default 65536)\n"
1652 "       -p progressinterval: debug output at specified operation interval\n"
1653 "       -q: quieter operation\n"
1654 "       -r readbdy: %1$u would make reads page aligned (default 1)\n"
1655 "       -s style: 1 gives smaller truncates (default 0)\n"
1656 "       -t truncbdy: %1$u would make truncates page aligned (default 1)\n"
1657 "       -w writebdy: %1$u would make writes page aligned (default 1)\n"
1658 /* XFS: -x: preallocate file space before starting, XFS only (default 0)\n\ */
1659 "       -y synchronize changes to a file\n"
1660 /* OSX: -v: debug output for all operations\n\ */
1661 /* XFS: -A: Use the AIO system calls\n" */
1662 /* OSX: -C mix cached and un-cached read/write ops\n\ */
1663 "       -D startingop: debug output starting at specified operation\n"
1664 "       -F: Do not use fallocate (preallocation) calls\n"
1665 /* OSX: -G logsize: #entries in oplog (default 1024)\n\ */
1666 #ifdef FALLOC_FL_PUNCH_HOLE
1667 "       -H: Do not use punch hole calls\n"
1668 #endif
1669 #ifdef FALLOC_FL_ZERO_RANGE
1670 "       -z: Do not use zero range calls\n"
1671 #endif
1672 /* XFS: -C: Do not use collapse range calls\n\ */
1673 "       -I [rotate|random]: When multiple paths to the file are given,\n"
1674 "           each operation uses a different path.  Iterate through them in\n"
1675 "           order with 'rotate' or chose them at 'random'.  (default random)\n"
1676 "       -L: fsxLite - no file creations & no file size changes\n"
1677 /* OSX: -I: start interactive mode since operation opnum\n\ */
1678 "       -M: mirror file test mode\n"
1679 "       -N numops: total # operations to do (default infinity)\n"
1680 "       -O: use oplen (see -o flag) for every op (default random)\n"
1681 "       -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n"
1682 "       -R: read() system calls only (mapped reads disabled)\n"
1683 "       -S seed: for random # generator (default 1) 0 gets timestamp\n"
1684 /* OSX: -T datasize: atomic data element write size [1,2,4] (default 4)\n\ */
1685 "       -W: mapped write operations DISabled\n"
1686 "       -Z[P]: O_DIRECT file IO [1 in P chance for each open] (default off)\n"
1687 "       fname: this filename is REQUIRED (no default)\n",
1688         page_size);
1689         exit(90);
1690 }
1691
1692 int
1693 getnum(char *s, char **e)
1694 {
1695         int ret = -1;
1696
1697         *e = (char *)0;
1698         ret = strtol(s, e, 0);
1699         if (*e)
1700                 switch (**e) {
1701                 case 'b':
1702                 case 'B':
1703                         ret *= 512;
1704                         *e = *e + 1;
1705                         break;
1706                 case 'k':
1707                 case 'K':
1708                         ret *= 1024;
1709                         *e = *e + 1;
1710                         break;
1711                 case 'm':
1712                 case 'M':
1713                         ret *= 1024 * 1024;
1714                         *e = *e + 1;
1715                         break;
1716                 case 'w':
1717                 case 'W':
1718                         ret *= 4;
1719                         *e = *e + 1;
1720                         break;
1721                 }
1722         return (ret);
1723 }
1724
1725 int
1726 test_fallocate(int mode)
1727 {
1728         int ret = 0;
1729         int fd = get_fd();
1730
1731         if (!lite) {
1732                 /* Must go more than a page away so let's go 4M to be sure */
1733                 if (fallocate(fd, mode, 0, 4096*1024) && errno == EOPNOTSUPP) {
1734                         if (!quiet)
1735                                 warn("%s: filesystem does not support fallocate mode 0x%x, disabling!",
1736                                      __func__, mode);
1737                 } else {
1738                         ret = 1;
1739                 }
1740
1741                 /* Always call ftruncate since file size might be adjusted
1742                  * by fallocate even on error
1743                  */
1744                 if (ftruncate(fd, 0) == -1)
1745                         warn("ftruncate to 0 size failed");
1746         }
1747         return ret;
1748 }
1749
1750 int
1751 main(int argc, char **argv)
1752 {
1753         int i, style, ch;
1754         char *endp;
1755         int dirpath = 0;
1756
1757         goodfile[0] = 0;
1758         logfile[0] = 0;
1759
1760         page_size = getpagesize();
1761         page_mask = page_size - 1;
1762
1763         setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
1764
1765         while ((ch = getopt(argc, argv,
1766                             "b:c:dfl:m:no:p:qr:s:t:w:xyzD:FHI:LMN:OP:RS:WZ::"))
1767                != EOF)
1768                 switch (ch) {
1769                 case 'b':
1770                         simulatedopcount = getnum(optarg, &endp);
1771                         if (!quiet)
1772                                 fprintf(stdout, "Will begin at operation %ld\n",
1773                                         simulatedopcount);
1774                         if (simulatedopcount == 0)
1775                                 usage();
1776                         simulatedopcount -= 1;
1777                         break;
1778                 case 'c':
1779                         closeprob = getnum(optarg, &endp);
1780                         if (!quiet)
1781                                 fprintf(stdout,
1782                                         "Chance of close/open is 1 in %d\n",
1783                                         closeprob);
1784                         if (closeprob <= 0)
1785                                 usage();
1786                         break;
1787                 case 'd':
1788                         debug++;
1789                         break;
1790                 case 'f':
1791                         flush = 1;
1792                         break;
1793                 case 'l':
1794                         maxfilelen = getnum(optarg, &endp);
1795                         if (maxfilelen <= 0)
1796                                 usage();
1797                         break;
1798                 case 'm':
1799                         monitorstart = getnum(optarg, &endp);
1800                         if (monitorstart < 0)
1801                                 usage();
1802                         if (!endp || *endp++ != ':')
1803                                 usage();
1804                         monitorend = getnum(endp, &endp);
1805                         if (monitorend < 0)
1806                                 usage();
1807                         if (monitorend == 0)
1808                                 monitorend = -1; /* aka infinity */
1809                         debug = 1;
1810                 case 'n':
1811                         sizechecks = 0;
1812                         break;
1813                 case 'o':
1814                         maxoplen = getnum(optarg, &endp);
1815                         if (maxoplen <= 0)
1816                                 usage();
1817                         break;
1818                 case 'p':
1819                         progressinterval = getnum(optarg, &endp);
1820                         if (progressinterval <= 0)
1821                                 usage();
1822                         break;
1823                 case 'q':
1824                         quiet = 1;
1825                         break;
1826                 case 'r':
1827                         readbdy = getnum(optarg, &endp);
1828                         if (readbdy <= 0)
1829                                 usage();
1830                         break;
1831                 case 's':
1832                         style = getnum(optarg, &endp);
1833                         if (style < 0 || style > 1)
1834                                 usage();
1835                         break;
1836                 case 't':
1837                         truncbdy = getnum(optarg, &endp);
1838                         if (truncbdy <= 0)
1839                                 usage();
1840                         break;
1841                 case 'w':
1842                         writebdy = getnum(optarg, &endp);
1843                         if (writebdy <= 0)
1844                                 usage();
1845                         break;
1846                 case 'y':
1847                         do_fsync = 1;
1848                         break;
1849                 case 'D':
1850                         debugstart = getnum(optarg, &endp);
1851                         if (debugstart < 1)
1852                                 usage();
1853                         break;
1854                 case 'F':
1855                         fallocate_calls = 0;
1856                         break;
1857                 case 'H':
1858                         punch_hole_calls = 0;
1859                         break;
1860                 case 'z':
1861                         zero_range_calls = 0;
1862                         break;
1863                 case 'I':
1864                         assign_fd_policy(optarg);
1865                         break;
1866                 case 'L':
1867                         lite = 1;
1868                         break;
1869                 case 'M':
1870                         flrmode = 1;
1871                         break;
1872                 case 'N':
1873                         numops = getnum(optarg, &endp);
1874                         if (numops < 0)
1875                                 usage();
1876                         break;
1877                 case 'O':
1878                         randomoplen = 0;
1879                         break;
1880                 case 'P':
1881                         strncpy(goodfile, optarg, sizeof(goodfile) - 1);
1882                         strncat(goodfile, "/", PATH_MAX - strlen(goodfile) - 1);
1883                         strncpy(logfile, optarg, sizeof(logfile) - 1);
1884                         strncat(logfile, "/", PATH_MAX - strlen(logfile) - 1);
1885                         dirpath = 1;
1886                         break;
1887                 case 'R':
1888                         mapped_reads = 0;
1889                         break;
1890                 case 'S':
1891                         seed = getnum(optarg, &endp);
1892                         if (seed == 0)
1893                                 seed = time(0) % 10000;
1894                         if (!quiet)
1895                                 fprintf(stdout, "Seed set to %d\n", seed);
1896                         if (seed < 0)
1897                                 usage();
1898                         break;
1899                 case 'W':
1900                         mapped_writes = 0;
1901                         if (!quiet)
1902                                 fprintf(stdout, "mapped writes DISABLED\n");
1903                         break;
1904                 case 'Z':
1905                         if (optarg)
1906                                 o_direct = getnum(optarg, &endp);
1907                         if (!optarg || o_direct == 0)
1908                                 o_direct = 1;
1909                         break;
1910                 default:
1911                         usage();
1912                         /* NOTREACHED */
1913                 }
1914         argc -= optind;
1915         argv += optind;
1916         if (argc < 1)
1917                 usage();
1918         fname = argv[0];
1919
1920         signal(SIGHUP, cleanup);
1921         signal(SIGINT, cleanup);
1922         signal(SIGPIPE, cleanup);
1923         signal(SIGALRM, cleanup);
1924         signal(SIGTERM, cleanup);
1925         signal(SIGXCPU, cleanup);
1926         signal(SIGXFSZ, cleanup);
1927         signal(SIGVTALRM, cleanup);
1928         signal(SIGUSR1, cleanup);
1929         signal(SIGUSR2, cleanup);
1930         signal(SIGBUS, segv);
1931         signal(SIGSEGV, segv);
1932
1933         initstate(seed, state, 256);
1934         setstate(state);
1935
1936         open_test_files(argv, argc);
1937
1938         strncat(goodfile, dirpath ? my_basename(fname) : fname, 256);
1939         strncat(goodfile, ".fsxgood", PATH_MAX - strlen(goodfile) - 1);
1940         fsxgoodfd = open(goodfile, O_RDWR | O_CREAT | O_TRUNC, 0666);
1941         if (fsxgoodfd < 0) {
1942                 prterr(goodfile);
1943                 exit(92);
1944         }
1945         strncat(logfile, dirpath ? my_basename(fname) : fname, 256);
1946         strncat(logfile, ".fsxlog", PATH_MAX - strlen(logfile) - 1);
1947         fsxlogf = fopen(logfile, "w");
1948         if (!fsxlogf) {
1949                 prterr(logfile);
1950                 exit(93);
1951         }
1952         if (lite) {
1953                 off_t ret;
1954                 int fd = get_fd();
1955
1956                 maxfilelen = lseek(fd, (off_t)0, SEEK_END);
1957                 file_size = maxfilelen;
1958                 if (file_size == (off_t)-1) {
1959                         prterr(fname);
1960                         warn("%s: lseek eof", __func__);
1961                         exit(94);
1962                 }
1963                 ret = lseek(fd, (off_t)0, SEEK_SET);
1964                 if (ret == (off_t)-1) {
1965                         prterr(fname);
1966                         warn("%s: lseek 0", __func__);
1967                         exit(95);
1968                 }
1969         }
1970         original_buf = (char *)malloc(maxfilelen);
1971         if (!original_buf)
1972                 exit(96);
1973         for (i = 0; i < maxfilelen; i++)
1974                 original_buf[i] = random() % 256;
1975         if (o_direct) {
1976                 int ret;
1977
1978                 ret = posix_memalign((void **)&good_buf, writebdy, maxfilelen);
1979                 if (ret) {
1980                         prt("%s: posix_memalign failed: %s\n", __func__,
1981                             strerror(ret));
1982                         exit(96);
1983                 }
1984
1985                 ret = posix_memalign((void **)&temp_buf, readbdy, maxoplen);
1986                 if (ret) {
1987                         prt("%s: posix_memalign failed: %s\n", __func__,
1988                             strerror(ret));
1989                         exit(97);
1990                 }
1991         } else {
1992                 good_buf = malloc(maxfilelen);
1993                 if (!good_buf) {
1994                         prt("malloc failed.\n");
1995                         exit(98);
1996                 }
1997
1998                 temp_buf = malloc(maxoplen);
1999                 if (!temp_buf) {
2000                         prt("malloc failed.\n");
2001                         exit(99);
2002                 }
2003         }
2004         memset(good_buf, 0, maxfilelen);
2005         memset(temp_buf, 0, maxoplen);
2006
2007         if (lite) {     /* zero entire existing file */
2008                 ssize_t written;
2009                 int fd = get_fd();
2010
2011                 written = write(fd, good_buf, (size_t)maxfilelen);
2012                 if (written != maxfilelen) {
2013                         if (written == -1) {
2014                                 prterr(fname);
2015                                 warn("%s: error on write", __func__);
2016                         } else {
2017                                 warn("%s: short write, 0x%x bytes instead of 0x%lx\n",
2018                                      __func__, (unsigned int)written,
2019                                      maxfilelen);
2020                         }
2021                         exit(98);
2022                 }
2023         } else {
2024                 check_trunc_hack();
2025         }
2026
2027         if (fallocate_calls)
2028                 fallocate_calls = test_fallocate(0);
2029
2030         if (punch_hole_calls)
2031                 punch_hole_calls = test_fallocate(FALLOC_FL_PUNCH_HOLE |
2032                                                   FALLOC_FL_KEEP_SIZE);
2033
2034         if (zero_range_calls)
2035                 zero_range_calls = test_fallocate(FALLOC_FL_ZERO_RANGE);
2036
2037         fl_keep_size = test_fallocate(FALLOC_FL_KEEP_SIZE);
2038
2039         while (numops == -1 || numops--)
2040                 test();
2041
2042         close_test_files();
2043         prt("All operations completed A-OK!\n");
2044
2045         free(original_buf);
2046         free(good_buf);
2047         free(temp_buf);
2048
2049         return 0;
2050 }