2 * Copyright (C) 1991, NeXT Computer, Inc. All Rights Reserverd.
3 * Copyright (c) 1998-2001 Apple Computer, Inc. All rights reserved.
5 * Copyright (c) 2012, Intel Corporation.
7 * @APPLE_LICENSE_HEADER_START@
9 * The contents of this file constitute Original Code as defined in and
10 * are subject to the Apple Public Source License Version 1.1 (the
11 * "License"). You may not use this file except in compliance with the
12 * License. Please obtain a copy of the License at
13 * http://www.apple.com/publicsource and read it before using this file.
15 * This Original Code and all software distributed under the License are
16 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
20 * License for the specific language governing rights and limitations
23 * @APPLE_LICENSE_HEADER_END@
26 * Author: Avadis Tevanian, Jr.
28 * File system exerciser.
30 * Rewrite and enhancements 1998-2001 Conrad Minshall -- conrad@mac.com
32 * Various features from Joe Sokol, Pat Dirks, and Clark Warner.
34 * Small changes to work under Linux -- davej.
36 * Sundry porting patches from Guy Harris 12/2001
37 * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.1 2001/12/20 04:15:57 jkh Exp $
39 * Checks for mmap last-page zero fill.
41 * Add multi-file testing feature -- Zach Brown <zab@clusterfs.com>
43 * Add random preallocation calls - Eric Sandeen <sandeen@redhat.com>
45 * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.2 2003/04/23 23:42:23 jkh Exp $
46 * $DragonFly: src/test/stress/fsx/fsx.c,v 1.2 2005/05/02 19:31:56 dillon Exp $
52 #include <sys/types.h>
54 #if defined(_UWIN) || defined(__linux__)
55 # include <sys/param.h>
75 #include <libcfs/util/string.h>
78 #include <linux/lustre/lustre_idl.h>
79 #include <lustre/lustreapi.h>
82 * Each test run will work with one or more separate file descriptors for the
83 * same file. This allows testing cache coherency across multiple mountpoints
84 * of the same network filesystem on a single client.
90 } *test_files = NULL, *tf;
94 enum fd_iteration_policy {
100 int fd_policy = FD_RANDOM;
104 * A log entry is an operation and a bunch of arguments.
111 const struct test_file *tf;
114 #define LOGSIZE 100000
116 struct log_entry oplog[LOGSIZE]; /* the log */
117 int logptr; /* current position in log */
118 int logcount; /* total ops */
122 unsigned int mirror_ids[LUSTRE_MIRROR_COUNT_MAX];
127 /* common operations */
131 #define OP_MAPWRITE 3
132 #define OP_MAX_LITE 4
134 /* !lite operations */
135 #define OP_TRUNCATE 4
136 #define OP_FALLOCATE 5
137 #define OP_PUNCH_HOLE 6
138 #define OP_ZERO_RANGE 7
139 #define OP_CLOSEOPEN 8
140 #define OP_MIRROR_OPS 9
141 #define OP_MAX_FULL 10
143 #define MIRROR_EXTEND 0
144 #define MIRROR_SPLIT 1
145 #define MIRROR_RESYNC 2
148 char *mirror_op_str[] = {
149 [MIRROR_EXTEND] = "MIRROR_EXTEND",
150 [MIRROR_SPLIT] = "MIRROR_SPLIT",
151 [MIRROR_RESYNC] = "MIRROR_RESYNC",
154 #define OP_SKIPPED 101
155 #define OP_DIRECT O_DIRECT
157 #ifndef FALLOC_FL_PUNCH_HOLE
158 #define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
161 #ifndef FALLOC_FL_KEEP_SIZE
162 #define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
165 #ifndef FALLOC_FL_ZERO_RANGE
166 #define FALLOC_FL_ZERO_RANGE 0x10 /* convert range to zeros */
170 char *original_buf; /* a pointer to the original data */
171 char *good_buf; /* a pointer to the correct data */
172 char *temp_buf; /* a pointer to the current data */
173 char *fname; /* name of our test file */
174 char logfile[PATH_MAX]; /* name of our log file */
175 char goodfile[PATH_MAX]; /* name of our test file */
177 struct timeval tv; /* time current operation started */
181 unsigned long testcalls; /* calls to function "test" */
183 long simulatedopcount; /* -b flag */
184 int closeprob; /* -c flag */
185 int debug ; /* -d flag */
186 long debugstart; /* -D flag */
187 int flush; /* -f flag */
188 int do_fsync; /* -y flag */
189 long maxfilelen = 256 * 1024; /* -l flag */
190 int sizechecks = 1; /* -n flag disables them */
191 int maxoplen = 64 * 1024; /* -o flag */
192 int quiet; /* -q flag */
193 long progressinterval; /* -p flag */
194 int readbdy = 1; /* -r flag */
195 int style; /* -s flag */
196 int truncbdy = 1; /* -t flag */
197 int writebdy = 1; /* -w flag */
198 long monitorstart = -1; /* -m flag */
199 long monitorend = -1; /* -m flag */
200 long flrmode; /* -M flag */
201 int lite; /* -L flag */
202 long numops = -1; /* -N flag */
203 int randomoplen = 1; /* -O flag disables it */
204 int seed = 1; /* -S flag */
205 int mapped_writes = 1; /* -W flag disables */
206 int fallocate_calls = 1; /* -F flag disables */
207 int punch_hole_calls = 1; /* -H flag disables */
208 int zero_range_calls = 1; /* -z flag disables */
209 int mapped_reads = 1; /* -R flag disables it */
211 int o_direct; /* -Z */
221 vwarnc(code, fmt, ap)
226 fprintf(stderr, "fsx: ");
228 vfprintf(stderr, fmt, ap);
229 fprintf(stderr, ": ");
231 fprintf(stderr, "%s\n", strerror(code));
235 __attribute__((format(__printf__, 1, 2)))
236 warn(const char *fmt, ...)
241 vwarnc(errno, fmt, ap);
246 __attribute__((format(__printf__, 1, 2)))
252 vfprintf(stdout, fmt, args);
257 vfprintf(fsxlogf, fmt, args);
263 * prterr() is now a macro. It internally calls ptrerr_func()
264 * which transparently handles passing of function name.
265 * This version also keeps checkpatch happy.
268 ptrerr_func(const char *func, const char *prefix)
270 prt("%s: %s%s%s\n", func, prefix, prefix ? ": " : "", strerror(errno));
272 #define prterr(prefix) ptrerr_func(__func__, prefix)
275 log4(int operation, int arg0, int arg1, int arg2)
277 struct log_entry *le;
280 le->operation = operation;
284 gettimeofday(&tv, NULL);
289 if (logptr >= LOGSIZE)
294 fill_tf_buf(const struct test_file *tf)
296 static int max_tf_len;
297 static char tf_buf[32];
299 if (fd_policy == FD_SINGLE)
303 max_tf_len = scnprintf(tf_buf, sizeof(tf_buf) - 1,
304 "%u", num_test_files - 1);
306 snprintf(tf_buf, sizeof(tf_buf), "[%0*lu]", max_tf_len,
307 (unsigned long)(tf - test_files));
316 struct log_entry *lp;
317 char *falloc_type[3] = {"PAST_EOF", "EXTENDING", "INTERIOR"};
319 prt("LOG DUMP (%d total operations):\n", logcount);
320 if (logcount < LOGSIZE) {
327 for ( ; count > 0; count--) {
330 opnum = i + 1 + (logcount / LOGSIZE) * LOGSIZE;
332 prt("%d%s: %lu.%06u ", opnum, fill_tf_buf(lp->tf),
333 lp->tv.tv_sec, (int)lp->tv.tv_usec);
335 switch (lp->operation) {
337 prt("MAPREAD 0x%05x thru 0x%05x (0x%05x bytes)",
338 lp->args[0], lp->args[0] + lp->args[1] - 1,
340 if (badoff >= lp->args[0] && badoff <
341 lp->args[0] + lp->args[1])
345 prt("MAPWRITE 0x%05x thru 0x%05x (0x%05x bytes)",
346 lp->args[0], lp->args[0] + lp->args[1] - 1,
348 if (badoff >= lp->args[0] && badoff <
349 lp->args[0] + lp->args[1])
353 case OP_READ + OP_DIRECT:
354 prt("READ%s 0x%05x thru 0x%05x (0x%05x bytes)",
355 lp->operation & OP_DIRECT ? "_OD" : " ",
356 lp->args[0], lp->args[0] + lp->args[1] - 1,
358 if (badoff >= lp->args[0] &&
359 badoff < lp->args[0] + lp->args[1])
363 case OP_WRITE + OP_DIRECT:
364 prt("WRITE%s 0x%05x thru 0x%05x (0x%05x bytes)",
365 lp->operation & OP_DIRECT ? "_OD" : " ",
366 lp->args[0], lp->args[0] + lp->args[1] - 1,
368 if (lp->args[0] > lp->args[2])
370 else if (lp->args[0] + lp->args[1] > lp->args[2])
372 if ((badoff >= lp->args[0] || badoff >= lp->args[2]) &&
373 badoff < lp->args[0] + lp->args[1])
377 down = lp->args[0] < lp->args[1];
378 prt("TRUNC%s 0x%05x to 0x%05x",
379 down ? "_DN" : "_UP", lp->args[1], lp->args[0]);
380 if (badoff >= lp->args[!down] &&
381 badoff < lp->args[!!down])
385 /* 0: offset 1: length 2: where alloced */
386 prt("FALLOC 0x%05x thru 0x%05x\t(0x%05x bytes)%s",
387 lp->args[0], lp->args[0] + lp->args[1],
388 lp->args[1], falloc_type[lp->args[2]]);
389 if (badoff >= lp->args[0] &&
390 badoff < lp->args[0] + lp->args[1])
394 prt("PUNCH 0x%05x thru 0x%05x\t(0x%05x bytes)",
395 lp->args[0], lp->args[0] + lp->args[1] - 1,
397 if (badoff >= lp->args[0] && badoff <
398 lp->args[0] + lp->args[1])
402 prt("ZERO 0x%05x thru 0x%05x\t(0x%05x bytes)",
403 lp->args[0], lp->args[0] + lp->args[1] - 1,
405 if (badoff >= lp->args[0] && badoff <
406 lp->args[0] + lp->args[1])
410 case OP_CLOSEOPEN + OP_DIRECT:
412 lp->operation & OP_DIRECT ? "_OD" : " ");
414 case OP_MIRROR_OPS: {
415 prt("%s ", mirror_op_str[lp->args[0]]);
416 if (lp->args[0] == MIRROR_EXTEND)
417 prt("to %d mirrors", lp->args[1] + 1);
418 else if (lp->args[0] == MIRROR_SPLIT)
419 prt("mirror %d to %d mirrors", lp->args[2],
421 else if (lp->args[0] == MIRROR_RESYNC)
422 prt("%d mirrors", lp->args[1]);
426 prt("SKIPPED (no operation)");
429 prt("BOGUS LOG ENTRY (operation code = %d)!",
440 save_buffer(char *buffer, off_t bufferlength, int fd)
443 ssize_t byteswritten;
445 if (fd <= 0 || bufferlength == 0)
448 if (bufferlength > INT_MAX) {
449 prt("fsx flaw: overflow in %s\n", __func__);
453 off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END);
455 if (size_by_seek == (off_t)-1) {
457 } else if (bufferlength > size_by_seek) {
458 warn("%s: .fsxgood file too short... will save 0x%llx bytes instead of 0x%llx\n",
459 __func__, (unsigned long long)size_by_seek,
460 (unsigned long long)bufferlength);
461 bufferlength = size_by_seek;
465 ret = lseek(fd, (off_t)0, SEEK_SET);
466 if (ret == (off_t)-1)
469 byteswritten = write(fd, buffer, (size_t)bufferlength);
470 if (byteswritten != bufferlength) {
471 if (byteswritten == -1)
474 warn("%s: short write, 0x%x bytes instead of 0x%llx\n",
475 __func__, (unsigned int)byteswritten,
476 (unsigned long long)bufferlength);
481 report_failure(int status)
484 prt("Using seed %d\n", seed);
488 save_buffer(good_buf, file_size, fsxgoodfd);
489 prt("Correct content saved for comparison\n");
490 prt("(maybe hexdump \"%s\" vs \"%s\")\n",
498 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
499 *(((unsigned char *)(cp)) + 1)))
502 check_buffers(unsigned int offset, unsigned int size)
508 unsigned int bad = 0;
510 if (memcmp(good_buf + offset, temp_buf, size) != 0) {
511 prt("READ BAD DATA: offset = 0x%x, size = 0x%x\n",
513 prt("OFFSET\tGOOD\tBAD\tRANGE\n");
515 c = good_buf[offset];
519 bad = short_at(&temp_buf[i]);
520 prt("%#07x\t%#06x\t%#06x", offset,
521 short_at(&good_buf[offset]), bad);
522 op = temp_buf[offset & 1 ? i + 1 : i];
534 prt("operation# (mod 256) for the bad data may be %u\n",
535 ((unsigned int)op & 0xff));
537 prt("operation# (mod 256) for the bad data unknown, check HOLE and EXTEND ops\n");
539 prt("????????????????\n");
545 static struct test_file *
548 unsigned int index = 0;
561 prt("unknown policy");
565 return &test_files[index % num_test_files];
569 assign_fd_policy(char *policy)
571 if (!strcmp(policy, "random")) {
572 fd_policy = FD_RANDOM;
573 } else if (!strcmp(policy, "rotate")) {
574 fd_policy = FD_ROTATE;
576 prt("unknown -I policy: '%s'\n", policy);
584 struct test_file *tf = get_tf();
589 static const char *my_basename(const char *path)
591 char *c = strrchr(path, '/');
593 return c ? c++ : path;
597 open_test_files(char **argv, int argc)
599 struct test_file *tf;
602 num_test_files = argc;
603 if (num_test_files == 1)
604 fd_policy = FD_SINGLE;
606 test_files = calloc(num_test_files, sizeof(*test_files));
608 prterr("reallocating space for test files");
612 for (i = 0, tf = test_files; i < num_test_files; i++, tf++) {
614 tf->o_direct = (random() % (o_direct + 1)) ? OP_DIRECT : 0;
615 tf->fd = open(tf->path,
616 O_RDWR | (lite ? 0 : O_CREAT | O_TRUNC) |
624 if (quiet || fd_policy == FD_SINGLE)
627 for (i = 0, tf = test_files; i < num_test_files; i++, tf++)
628 prt("fd %d: %s\n", i, tf->path);
632 close_test_files(void)
635 struct test_file *tf;
637 for (i = 0, tf = test_files; i < num_test_files; i++, tf++) {
652 if (fstat(fd, &statbuf)) {
654 statbuf.st_size = -1;
656 size_by_seek = lseek(fd, (off_t)0, SEEK_END);
657 if (file_size != statbuf.st_size || file_size != size_by_seek) {
658 prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n",
659 (unsigned long long)file_size,
660 (unsigned long long)statbuf.st_size,
661 (unsigned long long)size_by_seek);
667 check_trunc_hack(void)
672 /* should not ignore ftruncate(2)'s return value */
673 if (ftruncate(fd, (off_t)0) < 0) {
674 prterr("trunc_hack: ftruncate(0)");
677 if (ftruncate(fd, (off_t)100000) < 0) {
678 prterr("trunc_hack: ftruncate(100000)");
681 if (fstat(fd, &statbuf)) {
682 prterr("trunc_hack: fstat");
683 statbuf.st_size = -1;
685 if (statbuf.st_size != (off_t)100000) {
686 prt("no extend on truncate! not posix!\n");
689 if (ftruncate(fd, 0) < 0) {
690 prterr("trunc_hack: ftruncate(0) (2nd call)");
696 output_line(struct test_file *tf, int op, unsigned int offset,
701 [OP_WRITE] = "write",
702 [OP_TRUNCATE] = "trunc from",
703 [OP_MAPREAD] = "mapread",
704 [OP_MAPWRITE] = "mapwrite",
705 [OP_READ + OP_DIRECT] = "read_OD",
706 [OP_WRITE + OP_DIRECT] = "write_OD",
707 [OP_FALLOCATE] = "fallocate",
708 [OP_PUNCH_HOLE] = "punch from",
713 ((progressinterval && testcalls % progressinterval == 0) ||
714 (debug && (monitorstart == -1 ||
715 (offset + size > monitorstart &&
716 (monitorend == -1 || offset <= monitorend)))))))
719 prt("%06lu%s %lu.%06u %-10s %#08x %s %#08x\t(0x%x bytes)\n",
720 testcalls, fill_tf_buf(tf), tv.tv_sec, (int)tv.tv_usec,
721 ops[op], offset, op == OP_TRUNCATE || op == OP_PUNCH_HOLE ?
722 " to " : "thru", offset + size - 1,
723 (int)size < 0 ? -(int)size : size);
727 mirror_output_line(struct test_file *tf, int op, int mirrors, int id)
730 ((progressinterval && testcalls % progressinterval == 0) ||
731 (debug && (monitorstart == -1)))))
734 prt("%06lu %lu.%06u %-10s ",
735 testcalls, tv.tv_sec, (int)tv.tv_usec, mirror_op_str[op]);
739 prt("to %d mirrors\n", mirrors + 1);
742 prt("mirror %d to %d mirrors\n", id, mirrors - 1);
745 prt("%d mirrors\n", mirrors);
750 static void output_debug(unsigned int offset, unsigned int size,
755 if (!quiet && (debug > 1 && (monitorstart == -1 ||
756 (offset + size >= monitorstart &&
757 (monitorend == -1 || offset <= monitorend))))) {
758 gettimeofday(&t, NULL);
759 prt(" %lu.%06u %s\n", t.tv_sec, (int)t.tv_usec, what);
764 doflush(unsigned int offset, unsigned int size)
766 unsigned int pg_offset;
767 unsigned int map_size;
769 struct test_file *tf = get_tf();
775 pg_offset = offset & page_mask;
776 map_size = pg_offset + size;
778 p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE,
779 MAP_FILE | MAP_SHARED, fd,
780 (off_t)(offset - pg_offset));
781 if (p == (char *)-1) {
785 if (msync(p, map_size, MS_INVALIDATE) != 0) {
789 if (munmap(p, map_size) != 0) {
793 output_debug(offset, size, "flush done");
797 doread(unsigned int offset, unsigned int size)
801 struct test_file *tf = get_tf();
804 offset -= offset % readbdy;
806 size -= size % readbdy;
809 if (!quiet && testcalls > simulatedopcount && !tf->o_direct)
810 prt("skipping zero size read\n");
811 log4(OP_SKIPPED, OP_READ, offset, size);
814 if (size + offset > file_size) {
815 if (!quiet && testcalls > simulatedopcount)
816 prt("skipping seek/read past end of file\n");
817 log4(OP_SKIPPED, OP_READ, offset, size);
821 log4(OP_READ + tf->o_direct, offset, size, 0);
823 if (testcalls <= simulatedopcount)
826 output_line(tf, OP_READ + tf->o_direct, offset, size);
828 ret = lseek(fd, (off_t)offset, SEEK_SET);
829 if (ret == (off_t)-1) {
833 iret = read(fd, temp_buf, size);
834 output_debug(offset, size, "read done");
839 prt("short read: 0x%x bytes instead of 0x%x\n",
843 check_buffers(offset, size);
847 check_eofpage(char *s, unsigned int offset, char *p, int size)
849 long last_page, should_be_zero;
851 if (offset + size <= (file_size & ~page_mask))
854 * we landed in the last page of the file
855 * test to make sure the VM system provided 0's
856 * beyond the true end of the file mapping
857 * (as required by mmap def in 1996 posix 1003.1)
859 last_page = ((long)p + (offset & page_mask) + size) & ~page_mask;
861 for (should_be_zero = last_page + (file_size & page_mask);
862 should_be_zero < last_page + page_size;
864 if (*(char *)should_be_zero) {
865 prt("Mapped %s: non-zero data past EOF (0x%llx) page offset 0x%lx is 0x%04x\n",
866 s, (long long)file_size - 1,
867 should_be_zero & page_mask,
868 short_at(should_be_zero));
874 domapread(unsigned int offset, unsigned int size)
876 unsigned int pg_offset;
877 unsigned int map_size;
881 offset -= offset % readbdy;
885 if (!quiet && testcalls > simulatedopcount)
886 prt("skipping zero size read\n");
887 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
890 if (size + offset > file_size) {
891 if (!quiet && testcalls > simulatedopcount)
892 prt("skipping seek/read past end of file\n");
893 log4(OP_SKIPPED, OP_MAPREAD, offset, size);
897 log4(OP_MAPREAD, offset, size, 0);
899 if (testcalls <= simulatedopcount)
902 output_line(tf, OP_MAPREAD, offset, size);
904 pg_offset = offset & page_mask;
905 map_size = pg_offset + size;
907 p = mmap(0, map_size, PROT_READ, MAP_FILE | MAP_SHARED, fd,
908 (off_t)(offset - pg_offset));
909 if (p == MAP_FAILED) {
913 output_debug(offset, size, "mmap done");
914 if (setjmp(jmpbuf) == 0) {
916 memcpy(temp_buf, p + pg_offset, size);
917 check_eofpage("Read", offset, p, size);
920 report_failure(1901);
922 output_debug(offset, size, "memcpy done");
923 if (munmap(p, map_size) != 0) {
927 output_debug(offset, size, "munmap done");
929 check_buffers(offset, size);
933 gendata(char *original_buf, char *good_buf, unsigned int offset,
937 good_buf[offset] = testcalls % 256;
939 good_buf[offset] += original_buf[offset];
945 dowrite(unsigned int offset, unsigned int size)
953 offset -= offset % writebdy;
955 size -= size % writebdy;
957 if (!quiet && testcalls > simulatedopcount && !tf->o_direct)
958 prt("skipping zero size write\n");
959 log4(OP_SKIPPED, OP_WRITE, offset, size);
963 log4(OP_WRITE + tf->o_direct, offset, size, file_size);
965 gendata(original_buf, good_buf, offset, size);
966 if (file_size < offset + size) {
967 if (file_size < offset)
968 memset(good_buf + file_size, '\0', offset - file_size);
969 file_size = offset + size;
971 warn("Lite file size bug in fsx!");
976 if (testcalls <= simulatedopcount)
979 output_line(tf, OP_WRITE + tf->o_direct, offset, size);
981 ret = lseek(fd, (off_t)offset, SEEK_SET);
982 if (ret == (off_t)-1) {
986 iret = write(fd, good_buf + offset, size);
987 output_debug(offset, size, "write done");
992 prt("short write: 0x%x bytes instead of 0x%x\n",
998 prt("fsync() failed: %s\n", strerror(errno));
1001 output_debug(offset, size, "fsync done");
1004 doflush(offset, size);
1005 output_debug(offset, size, "flush done");
1010 domapwrite(unsigned int offset, unsigned int size)
1012 unsigned int pg_offset;
1013 unsigned int map_size;
1020 offset -= offset % writebdy;
1022 if (!quiet && testcalls > simulatedopcount)
1023 prt("skipping zero size write\n");
1024 log4(OP_SKIPPED, OP_MAPWRITE, offset, size);
1027 cur_filesize = file_size;
1029 log4(OP_MAPWRITE, offset, size, 0);
1031 gendata(original_buf, good_buf, offset, size);
1032 if (file_size < offset + size) {
1033 if (file_size < offset)
1034 memset(good_buf + file_size, '\0', offset - file_size);
1035 file_size = offset + size;
1037 warn("Lite file size bug in fsx!");
1038 report_failure(200);
1042 if (testcalls <= simulatedopcount)
1045 output_line(tf, OP_MAPWRITE, offset, size);
1047 if (file_size > cur_filesize) {
1048 if (ftruncate(fd, file_size) == -1) {
1049 prterr("ftruncate");
1052 output_debug(offset, size, "truncate done");
1054 pg_offset = offset & page_mask;
1055 map_size = pg_offset + size;
1057 p = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED,
1058 fd, (off_t)(offset - pg_offset));
1059 if (p == MAP_FAILED) {
1061 report_failure(202);
1063 output_debug(offset, map_size, "mmap done");
1064 if (setjmp(jmpbuf) == 0) {
1066 memcpy(p + pg_offset, good_buf + offset, size);
1067 if (msync(p, map_size, MS_SYNC) != 0) {
1069 report_failure(203);
1071 check_eofpage("Write", offset, p, size);
1074 report_failure(2021);
1076 output_debug(offset, map_size, "msync done");
1077 if (munmap(p, map_size) != 0) {
1079 report_failure(204);
1081 output_debug(offset, map_size, "munmap done");
1085 dotruncate(unsigned int size)
1087 int oldsize = file_size;
1092 size -= size % truncbdy;
1093 if (size > biggest) {
1095 if (!quiet && testcalls > simulatedopcount)
1096 prt("truncating to largest ever: 0x%x\n", size);
1099 log4(OP_TRUNCATE, size, (unsigned int)file_size, 0);
1101 if (size > file_size)
1102 memset(good_buf + file_size, '\0', size - file_size);
1105 if (testcalls <= simulatedopcount)
1108 output_line(tf, OP_TRUNCATE, oldsize, size - oldsize);
1110 if (ftruncate(fd, (off_t)size) == -1) {
1111 prt("ftruncate: 0x%x\n", size);
1112 prterr("ftruncate");
1113 report_failure(160);
1115 output_debug(size, 0, "truncate done");
1119 do_punch_hole(unsigned int offset, unsigned int length)
1123 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
1129 if (!quiet && testcalls > simulatedopcount) {
1130 prt("skipping zero length punch hole\n");
1131 log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
1136 if (file_size <= (loff_t)offset) {
1137 if (!quiet && testcalls > simulatedopcount) {
1138 prt("skipping hole punch off the end of the file\n");
1139 log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, length);
1144 log4(OP_PUNCH_HOLE, offset, length, 0);
1146 if (testcalls <= simulatedopcount)
1149 output_line(tf, OP_PUNCH_HOLE, offset, length);
1150 if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1151 prt("punch hole: %x to %x\n", offset, length);
1152 prterr("fallocate");
1153 report_failure(161);
1155 output_debug(offset, length, "zero_range done");
1157 max_offset = offset < file_size ? offset : file_size;
1158 max_len = max_offset + length <= file_size ? length :
1159 file_size - max_offset;
1160 memset(good_buf + max_offset, '\0', max_len);
1164 do_zero_range(unsigned int offset, unsigned int length)
1166 unsigned int end_offset;
1167 int mode = FALLOC_FL_ZERO_RANGE;
1174 if (!quiet && testcalls > simulatedopcount) {
1175 prt("skipping zero length zero range\n");
1176 log4(OP_SKIPPED, OP_ZERO_RANGE, offset, length);
1181 keep_size = random() % 2;
1183 end_offset = keep_size ? 0 : offset + length;
1185 if (end_offset > biggest) {
1186 biggest = end_offset;
1187 if (!quiet && testcalls > simulatedopcount)
1188 prt("zero_range to largest ever: 0x%x\n", end_offset);
1192 * last arg matches fallocate string array index in logdump:
1193 * 0: allocate past EOF
1194 * 1: extending prealloc
1195 * 2: interior prealloc
1197 log4(OP_ZERO_RANGE, offset, length,
1198 (end_offset > file_size) ? (keep_size ? 0 : 1) : 2);
1200 if (testcalls <= simulatedopcount)
1203 output_line(tf, OP_TRUNCATE, offset, length);
1205 if (fallocate(fd, mode, (loff_t)offset, (loff_t)length) == -1) {
1206 prt("pzero range: %x to %x\n", offset, length);
1207 prterr("fallocate");
1208 report_failure(161);
1210 output_debug(offset, length, "zero_range done");
1212 memset(good_buf + offset, '\0', length);
1216 * fallocate is basically a no-op unless extending,
1217 * then a lot like a truncate
1220 do_preallocate(unsigned int offset, unsigned int length)
1225 struct stat statbufs;
1230 if (!quiet && testcalls > simulatedopcount)
1231 prt("skipping zero length fallocate\n");
1232 log4(OP_SKIPPED, OP_FALLOCATE, offset, length);
1236 keep_size = fl_keep_size && (random() % 2);
1238 end_offset = offset + length;
1239 if (end_offset > biggest) {
1240 biggest = end_offset;
1241 if (!quiet && testcalls > simulatedopcount)
1242 prt("fallocating to largest ever: 0x%jx\n", end_offset);
1246 * last arg matches fallocate string array index in logdump:
1247 * 0: allocate past EOF
1248 * 1: extending prealloc
1249 * 2: interior prealloc
1251 log4(OP_FALLOCATE, offset, length, (end_offset > file_size) ?
1252 (keep_size ? 0 : 1) : 2);
1254 if (end_offset > file_size && !keep_size) {
1255 memset(good_buf + file_size, '\0', end_offset - file_size);
1256 file_size = end_offset;
1259 if (testcalls <= simulatedopcount)
1262 fstat(fd, &statbufs);
1263 if (fallocate(fd, keep_size ? FALLOC_FL_KEEP_SIZE : 0, (loff_t)offset,
1264 (loff_t)length) == -1) {
1265 prt("fallocate: %x to %x\n", offset, length);
1266 prterr("fallocate");
1267 report_failure(161);
1269 output_line(tf, OP_FALLOCATE, offset, length);
1270 output_debug(offset, length, "fallocate done");
1279 if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
1281 report_failure(171);
1283 iret = write(fd, good_buf, file_size);
1284 if ((off_t)iret != file_size) {
1288 prt("short write: 0x%lx bytes instead of 0x%llx\n",
1289 (unsigned long)iret, (unsigned long long)file_size);
1290 report_failure(172);
1292 if (lite ? 0 : ftruncate(fd, file_size) == -1) {
1293 prt("ftruncate2: %llx\n", (unsigned long long)file_size);
1294 prterr("ftruncate");
1295 report_failure(173);
1303 const char *tf_num = "";
1305 if (testcalls <= simulatedopcount)
1309 direct = (random() % (o_direct + 1)) ? OP_DIRECT : 0;
1310 log4(OP_CLOSEOPEN + direct, file_size, (unsigned int)file_size, 0);
1312 if (fd_policy != FD_SINGLE)
1313 tf_num = fill_tf_buf(tf);
1316 prt("%06lu %lu.%06u %sclose/open%s\n", testcalls, tv.tv_sec,
1317 (int)tv.tv_usec, tf_num, direct ? "(O_DIRECT)" : "");
1319 report_failure(180);
1321 output_debug(monitorstart, 0, "close done");
1322 tf->o_direct = direct;
1323 tf->fd = open(tf->path, O_RDWR | tf->o_direct, 0);
1325 prterr(tf->o_direct ? "open(O_DIRECT)" : "open");
1326 report_failure(181);
1328 output_debug(monitorstart, 0,
1329 tf->o_direct ? "open(O_DIRECT) done" : "open done");
1333 get_mirror_ids(int fd, unsigned int *ids)
1335 struct llapi_layout *layout;
1339 layout = llapi_layout_get_by_fd(fd, 0);
1343 /* only get mirror count */
1344 rc = llapi_layout_mirror_count_get(layout, &count);
1346 prt("llapi_layout_mirror_count_get: %d\n", rc);
1352 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
1354 prt("llapi_layout_comp_use(USE_FIRST): %d\n", rc);
1361 rc = llapi_layout_mirror_id_get(layout, &id);
1363 prt("llapi_layout_mirror_id_get: %d\n", rc);
1367 if (!count || ids[count - 1] != id)
1370 rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_NEXT);
1372 prt("llapi_layout_comp_use(USE_NEXT): %d\n", rc);
1378 llapi_layout_free(layout);
1380 return rc < 0 ? rc : count;
1384 do_mirror_ops(int op)
1387 char cmd[PATH_MAX * 2];
1391 if (testcalls <= simulatedopcount)
1396 mirror_count = get_mirror_ids(tf->fd, mirror_ids);
1397 if (mirror_count < 0) {
1398 prterr("get_mirror_ids");
1399 report_failure(182);
1404 if (mirror_count == LUSTRE_MIRROR_COUNT_MAX)
1406 snprintf(cmd, sizeof(cmd), "lfs mirror extend -N -c-1 %s",
1410 if (mirror_count == 0 || mirror_count == 1)
1413 i = random() % mirror_count;
1417 snprintf(cmd, sizeof(cmd),
1418 "lfs mirror split -d --mirror-id=%d %s",
1419 mirror_ids[i], tf->path);
1422 if (mirror_count < 2)
1425 snprintf(cmd, sizeof(cmd),
1426 "lfs mirror resync %s", tf->path);
1431 report_failure(183);
1432 output_debug(monitorstart, 0, "close done");
1434 log4(OP_MIRROR_OPS, op, mirror_count, i);
1436 mirror_output_line(tf, op, mirror_count, i);
1440 prt("%s: %d\n", cmd, errno);
1441 report_failure(184);
1442 } else if (WIFEXITED(rc)) {
1443 rc = WEXITSTATUS(rc);
1445 prt("%s: %d\n", cmd, rc);
1446 /* mirror split won't delete the last non-stale mirror,
1447 * and returns EUCLEAN
1450 report_failure(184);
1453 output_debug(monitorstart, 0, cmd);
1457 if (mirror_count == 2)
1462 snprintf(cmd, sizeof(cmd),
1463 "lfs mirror verify %s", tf->path);
1467 prt("mirror op %d: %s: %d\n", op, cmd, errno);
1468 report_failure(184);
1469 } else if (WIFEXITED(rc)) {
1470 rc = WEXITSTATUS(rc);
1472 prt("mirror op %d: %s: %d\n", op, cmd, rc);
1473 snprintf(cmd, sizeof(cmd),
1474 "lfs mirror verify -v %s", tf->path);
1476 report_failure(184);
1481 output_debug(monitorstart, 0, cmd);
1483 tf->fd = open(tf->path, O_RDWR | tf->o_direct, 0);
1485 prterr(tf->o_direct ? "open(O_DIRECT)" : "open");
1486 report_failure(185);
1488 output_debug(monitorstart, 0,
1489 tf->o_direct ? "open(O_DIRECT) done" : "open done");
1492 #define TRIM_OFF_LEN(off, len, size) \
1498 if ((off) + (len) > (size)) \
1499 (len) = (size) - (off); \
1505 unsigned long offset;
1506 unsigned long size = maxoplen;
1507 unsigned long rv = random();
1511 if (simulatedopcount > 0 && testcalls == simulatedopcount)
1517 closeopen = (rv >> 3) < (1 << 28) / closeprob;
1519 if (debugstart > 0 && testcalls >= debugstart)
1522 if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0)
1523 prt("%lu...\n", testcalls);
1527 size = random() % (maxoplen + 1);
1529 /* calculate appropriate op to run */
1531 op = rv % OP_MAX_LITE;
1533 op = rv % OP_MAX_FULL;
1545 if (!fallocate_calls) {
1546 log4(OP_SKIPPED, OP_FALLOCATE, offset, size);
1551 if (!punch_hole_calls) {
1552 log4(OP_SKIPPED, OP_PUNCH_HOLE, offset, size);
1557 if (!zero_range_calls) {
1558 log4(OP_SKIPPED, OP_ZERO_RANGE, offset, size);
1566 TRIM_OFF_LEN(offset, size, file_size);
1567 doread(offset, size);
1570 TRIM_OFF_LEN(offset, size, maxfilelen);
1571 dowrite(offset, size);
1574 TRIM_OFF_LEN(offset, size, file_size);
1575 domapread(offset, size);
1578 TRIM_OFF_LEN(offset, size, maxfilelen);
1579 domapwrite(offset, size);
1583 size = random() % maxfilelen;
1587 TRIM_OFF_LEN(offset, size, maxfilelen);
1588 do_preallocate(offset, size);
1591 TRIM_OFF_LEN(offset, size, file_size);
1592 do_punch_hole(offset, size);
1595 TRIM_OFF_LEN(offset, size, file_size);
1596 do_zero_range(offset, size);
1604 do_mirror_ops(random() % MIRROR_OPS);
1607 prterr("unknown operation %d: Operation not supported");
1613 if (sizechecks && testcalls > simulatedopcount)
1624 report_failure(9999);
1632 prt("signal %d\n", sig);
1633 prt("testcalls = %lu\n", testcalls);
1641 "usage: fsx [-dfnqFLOW] [-b opnum] [-c Prob] [-l flen] [-m start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t truncbdy] [-w writebdy] [-D startingop] [ -I random|rotate ] [-N numops] [-P dirpath] [-S seed] [-Z [prob]] fname [additional paths to fname..]\n"
1642 " -b opnum: beginning operation number (default 1)\n"
1643 " -c P: 1 in P chance of file close+open at each op (default infinity)\n"
1644 " -d: debug output for all operations [-d -d = more debugging]\n"
1645 " -f flush and invalidate cache after I/O\n"
1646 /* OSX: -d duration: number of hours for the tool to run\n\ */
1647 /* OSX: -e: tests using an extended attribute rather than a file\n\ */
1648 /* OSX: -f forkname: test the named fork of fname\n\ */
1649 /* OSX: -g logpath: path for .fsxlog file\n\ */
1650 /* OSX: -h: write 0s instead of creating holes (i.e. sparse file)\n\ */
1651 /* OSX: -i: interactive mode, hit return before performing each operation\n\ */
1652 " -l flen: the upper bound on file size (default 262144)\n"
1653 " -m startop:endop: monitor (print debug output) specified byte range\n"
1654 " (default 0:infinity)\n"
1655 " -n: no verifications of file size\n"
1656 " -o oplen: the upper bound on operation size (default 65536)\n"
1657 " -p progressinterval: debug output at specified operation interval\n"
1658 " -q: quieter operation\n"
1659 " -r readbdy: %1$u would make reads page aligned (default 1)\n"
1660 " -s style: 1 gives smaller truncates (default 0)\n"
1661 " -t truncbdy: %1$u would make truncates page aligned (default 1)\n"
1662 " -w writebdy: %1$u would make writes page aligned (default 1)\n"
1663 /* XFS: -x: preallocate file space before starting, XFS only (default 0)\n\ */
1664 " -y synchronize changes to a file\n"
1665 /* OSX: -v: debug output for all operations\n\ */
1666 /* XFS: -A: Use the AIO system calls\n" */
1667 /* OSX: -C mix cached and un-cached read/write ops\n\ */
1668 " -D startingop: debug output starting at specified operation\n"
1669 " -F: Do not use fallocate (preallocation) calls\n"
1670 /* OSX: -G logsize: #entries in oplog (default 1024)\n\ */
1671 #ifdef FALLOC_FL_PUNCH_HOLE
1672 " -H: Do not use punch hole calls\n"
1674 #ifdef FALLOC_FL_ZERO_RANGE
1675 " -z: Do not use zero range calls\n"
1677 /* XFS: -C: Do not use collapse range calls\n\ */
1678 " -I [rotate|random]: When multiple paths to the file are given,\n"
1679 " each operation uses a different path. Iterate through them in\n"
1680 " order with 'rotate' or chose them at 'random'. (default random)\n"
1681 " -L: fsxLite - no file creations & no file size changes\n"
1682 /* OSX: -I: start interactive mode since operation opnum\n\ */
1683 " -M: mirror file test mode\n"
1684 " -N numops: total # operations to do (default infinity)\n"
1685 " -O: use oplen (see -o flag) for every op (default random)\n"
1686 " -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n"
1687 " -R: read() system calls only (mapped reads disabled)\n"
1688 " -S seed: for random # generator (default 1) 0 gets timestamp\n"
1689 /* OSX: -T datasize: atomic data element write size [1,2,4] (default 4)\n\ */
1690 " -W: mapped write operations DISabled\n"
1691 " -Z[P]: O_DIRECT file IO [1 in P chance for each open] (default off)\n"
1692 " fname: this filename is REQUIRED (no default)\n",
1698 getnum(char *s, char **e)
1703 ret = strtol(s, e, 0);
1731 test_fallocate(int mode)
1737 /* Must go more than a page away so let's go 4M to be sure */
1738 if (fallocate(fd, mode, 0, 4096*1024) && errno == EOPNOTSUPP) {
1740 warn("%s: filesystem does not support fallocate mode 0x%x, disabling!",
1746 /* Always call ftruncate since file size might be adjusted
1747 * by fallocate even on error
1749 if (ftruncate(fd, 0) == -1)
1750 warn("ftruncate to 0 size failed");
1756 main(int argc, char **argv)
1765 page_size = getpagesize();
1766 page_mask = page_size - 1;
1768 setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */
1770 while ((ch = getopt(argc, argv,
1771 "b:c:dfl:m:no:p:qr:s:t:w:xyzD:FHI:LMN:OP:RS:WZ::"))
1775 simulatedopcount = getnum(optarg, &endp);
1777 fprintf(stdout, "Will begin at operation %ld\n",
1779 if (simulatedopcount == 0)
1781 simulatedopcount -= 1;
1784 closeprob = getnum(optarg, &endp);
1787 "Chance of close/open is 1 in %d\n",
1799 maxfilelen = getnum(optarg, &endp);
1800 if (maxfilelen <= 0)
1804 monitorstart = getnum(optarg, &endp);
1805 if (monitorstart < 0)
1807 if (!endp || *endp++ != ':')
1809 monitorend = getnum(endp, &endp);
1812 if (monitorend == 0)
1813 monitorend = -1; /* aka infinity */
1819 maxoplen = getnum(optarg, &endp);
1824 progressinterval = getnum(optarg, &endp);
1825 if (progressinterval <= 0)
1832 readbdy = getnum(optarg, &endp);
1837 style = getnum(optarg, &endp);
1838 if (style < 0 || style > 1)
1842 truncbdy = getnum(optarg, &endp);
1847 writebdy = getnum(optarg, &endp);
1855 debugstart = getnum(optarg, &endp);
1860 fallocate_calls = 0;
1863 punch_hole_calls = 0;
1866 zero_range_calls = 0;
1869 assign_fd_policy(optarg);
1878 numops = getnum(optarg, &endp);
1886 strncpy(goodfile, optarg, sizeof(goodfile) - 1);
1887 strncat(goodfile, "/", PATH_MAX - strlen(goodfile) - 1);
1888 strncpy(logfile, optarg, sizeof(logfile) - 1);
1889 strncat(logfile, "/", PATH_MAX - strlen(logfile) - 1);
1896 seed = getnum(optarg, &endp);
1898 seed = time(0) % 10000;
1900 fprintf(stdout, "Seed set to %d\n", seed);
1907 fprintf(stdout, "mapped writes DISABLED\n");
1911 o_direct = getnum(optarg, &endp);
1912 if (!optarg || o_direct == 0)
1925 signal(SIGHUP, cleanup);
1926 signal(SIGINT, cleanup);
1927 signal(SIGPIPE, cleanup);
1928 signal(SIGALRM, cleanup);
1929 signal(SIGTERM, cleanup);
1930 signal(SIGXCPU, cleanup);
1931 signal(SIGXFSZ, cleanup);
1932 signal(SIGVTALRM, cleanup);
1933 signal(SIGUSR1, cleanup);
1934 signal(SIGUSR2, cleanup);
1935 signal(SIGBUS, segv);
1936 signal(SIGSEGV, segv);
1938 initstate(seed, state, 256);
1941 open_test_files(argv, argc);
1943 strncat(goodfile, dirpath ? my_basename(fname) : fname, 256);
1944 strncat(goodfile, ".fsxgood", PATH_MAX - strlen(goodfile) - 1);
1945 fsxgoodfd = open(goodfile, O_RDWR | O_CREAT | O_TRUNC, 0666);
1946 if (fsxgoodfd < 0) {
1950 strncat(logfile, dirpath ? my_basename(fname) : fname, 256);
1951 strncat(logfile, ".fsxlog", PATH_MAX - strlen(logfile) - 1);
1952 fsxlogf = fopen(logfile, "w");
1961 maxfilelen = lseek(fd, (off_t)0, SEEK_END);
1962 file_size = maxfilelen;
1963 if (file_size == (off_t)-1) {
1965 warn("%s: lseek eof", __func__);
1968 ret = lseek(fd, (off_t)0, SEEK_SET);
1969 if (ret == (off_t)-1) {
1971 warn("%s: lseek 0", __func__);
1975 original_buf = (char *)malloc(maxfilelen);
1978 for (i = 0; i < maxfilelen; i++)
1979 original_buf[i] = random() % 256;
1983 ret = posix_memalign((void **)&good_buf, writebdy, maxfilelen);
1985 prt("%s: posix_memalign failed: %s\n", __func__,
1990 ret = posix_memalign((void **)&temp_buf, readbdy, maxoplen);
1992 prt("%s: posix_memalign failed: %s\n", __func__,
1997 good_buf = malloc(maxfilelen);
1999 prt("malloc failed.\n");
2003 temp_buf = malloc(maxoplen);
2005 prt("malloc failed.\n");
2009 memset(good_buf, 0, maxfilelen);
2010 memset(temp_buf, 0, maxoplen);
2012 if (lite) { /* zero entire existing file */
2016 written = write(fd, good_buf, (size_t)maxfilelen);
2017 if (written != maxfilelen) {
2018 if (written == -1) {
2020 warn("%s: error on write", __func__);
2022 warn("%s: short write, 0x%x bytes instead of 0x%lx\n",
2023 __func__, (unsigned int)written,
2032 if (fallocate_calls)
2033 fallocate_calls = test_fallocate(0);
2035 if (punch_hole_calls)
2036 punch_hole_calls = test_fallocate(FALLOC_FL_PUNCH_HOLE |
2037 FALLOC_FL_KEEP_SIZE);
2039 if (zero_range_calls)
2040 zero_range_calls = test_fallocate(FALLOC_FL_ZERO_RANGE);
2042 fl_keep_size = test_fallocate(FALLOC_FL_KEEP_SIZE);
2044 while (numops == -1 || numops--)
2048 prt("All operations completed A-OK!\n");