4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/utils/llverfs.c
34 * Filesystem Verification Tool.
35 * This program tests the correct operation of large filesystems and
36 * the underlying block storage device(s).
37 * This tool have two working modes
41 * In full mode, the program creates a subdirectory in the test
42 * filesystem, writes n(files_in_dir, default=32) large(4GB) files to
43 * the directory with the test pattern at the start of each 4kb block.
44 * The test pattern contains timestamp, relative file offset and per
45 * file unique identifier(inode number). This continues until the
46 * whole filesystem is full and then the tool verifies that the data
47 * in all of the test files is correct.
49 * In partial mode, the tool creates test directories with the
50 * EXT3_TOPDIR_FL flag set (if supported) to spread the directory data
51 * around the block device instead of localizing it in a single place.
52 * The number of directories equals to the number of block groups in the
53 * filesystem (e.g. 65536 directories for 8TB ext3/ext4 filesystem) and
54 * then writes a single 1MB file in each directory. The tool then verifies
55 * that the data in each file is correct.
64 #ifndef _LARGEFILE64_SOURCE
65 #define _LARGEFILE64_SOURCE
67 #ifndef _FILE_OFFSET_BITS
68 #define _FILE_OFFSET_BITS 64
85 #include <sys/types.h>
89 #include <gnu/stubs.h>
90 #include <gnu/stubs.h>
92 #ifdef HAVE_EXT2FS_EXT2FS_H
94 # include <ext2fs/ext2fs.h>
96 # ifndef EXT2_TOPDIR_FL
97 # define EXT2_TOPDIR_FL 0x00020000 /* Top of directory tree */
99 static int fsetflags(const char *path, unsigned int flag)
101 char cmd[PATH_MAX + 128];
104 if (flag != EXT2_TOPDIR_FL) {
109 snprintf(cmd, sizeof(cmd), "chattr +T %s", path);
113 rc = WEXITSTATUS(rc);
122 #define ONE_MB (1024 * 1024)
123 #define ONE_GB ((unsigned long long)(1024 * 1024 * 1024))
124 #define BLOCKSIZE 4096
126 /* Structure for writing test pattern */
128 unsigned long long bd_offset;
129 unsigned long long bd_time;
130 unsigned long long bd_inode;
132 static char *progname; /* name by which this program was run. */
133 static unsigned verbose = 1; /* prints offset in kB, operation rate */
134 static int readoption; /* run test in read-only (verify) mode */
135 static int writeoption; /* run test in write_only mode */
136 char *testdir; /* name of device to be tested. */
137 static unsigned full = 1; /* flag to full check */
138 static int error_count; /* number of IO errors hit during run */
139 char filecount[PATH_MAX]; /* file with total number of files written*/
140 static unsigned long num_files; /* Total number of files for read/write */
141 static loff_t file_size = 4*ONE_GB; /* Size of each file */
142 static unsigned files_in_dir = 32; /* number of files in each directioy */
143 static unsigned num_dirs = 30000; /* total number of directories */
144 const int dirmode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
145 static int isatty_flag;
146 static int perms = S_IRWXU | S_IRGRP | S_IROTH;
148 static struct option const long_opts[] = {
149 { .val = 'c', .name = "chunksize", .has_arg = required_argument },
150 { .val = 'h', .name = "help", .has_arg = no_argument },
151 { .val = 'l', .name = "long", .has_arg = no_argument },
152 { .val = 'l', .name = "full", .has_arg = no_argument },
153 { .val = 'o', .name = "offset", .has_arg = required_argument },
154 { .val = 'p', .name = "partial", .has_arg = required_argument },
155 { .val = 'q', .name = "quiet", .has_arg = required_argument },
156 { .val = 'r', .name = "read", .has_arg = no_argument },
157 { .val = 's', .name = "filesize", .has_arg = no_argument },
158 { .val = 't', .name = "timestamp", .has_arg = required_argument },
159 { .val = 'v', .name = "verbose", .has_arg = no_argument },
160 { .val = 'w', .name = "write", .has_arg = no_argument },
164 * Usages: displays help information, whenever user supply --help option in
165 * command or enters incorrect command line.
167 void usage(int status)
170 printf("\nUsage: %s [OPTION]... <filesystem path> ...\n",
172 printf("Filesystem verification tool.\n"
173 "\t-t {seconds}, --timestamp, set test time"
174 "(default=current time())\n"
175 "\t-o {offset}, --offset, directory starting offset"
176 " from which tests should start\n"
177 "\t-r, --read, run in verify mode\n"
178 "\t-w, --write, run in test-pattern mode, default=rw\n"
180 "\t-p, --partial, for partial check (1MB files)\n"
181 "\t-l, --long, --full check (4GB file with 4k blocks)\n"
182 "\t-c, --chunksize, IO chunk size in MB (default=1)\n"
183 "\t-s, --filesize, file size in MB (default=4096)\n"
184 "\t-h, --help, display this help and exit\n");
190 * open_file: Opens file in specified mode and returns fd.
192 static int open_file(const char *file, int flag)
194 int fd = open(file, flag, perms);
196 fprintf(stderr, "\n%s: Open '%s' failed:%s\n",
197 progname, file, strerror(errno));
203 * Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct.
204 * Returns 0 if test offset and timestamp is correct otherwise 1.
206 int verify_chunk(char *chunk_buf, const size_t chunksize,
207 unsigned long long chunk_off, const unsigned long long time_st,
208 const unsigned long long inode_st, const char *file)
210 struct block_data *bd;
213 for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
214 (char *)chunk_buf < chunk_end;
215 chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
216 bd = (struct block_data *)chunk_buf;
217 if ((bd->bd_offset == chunk_off) && (bd->bd_time == time_st) &&
218 (bd->bd_inode == inode_st))
220 fprintf(stderr, "\n%s: verify %s failed offset/timestamp/inode "
221 "%llu/%llu/%llu: found %llu/%llu/%llu instead\n",
222 progname, file, chunk_off, time_st, inode_st,
223 bd->bd_offset, bd->bd_time, bd->bd_inode);
230 * fill_chunk: Fills the chunk with current or user specified timestamp
231 * and offset. The test pattern is filled at the beginning of
232 * each 4kB(BLOCKSIZE) blocks in chunk_buf.
234 void fill_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
235 const time_t time_st, const ino_t inode_st)
237 struct block_data *bd;
240 for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
241 (char *)chunk_buf < chunk_end;
242 chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
243 bd = (struct block_data *)chunk_buf;
244 bd->bd_offset = chunk_off;
245 bd->bd_time = time_st;
246 bd->bd_inode = inode_st;
251 * Write a chunk to disk, handling errors, interrupted writes, etc.
253 * If there is an IO error hit during the write, it is possible that
254 * this will just show up as a short write, and a subsequent write
255 * will return the actual error. We want to continue in the face of
256 * minor media errors so that we can validate the whole device if
257 * possible, but if there are many errors we don't want to loop forever.
259 * The error count will be returned upon exit to ensure that the
260 * media errors are detected even if nobody is looking at the output.
262 * Returns 0 on success, or -ve errno on failure.
264 int write_retry(int fd, const char *chunk_buf, size_t nrequested,
265 unsigned long long offset, const char *file)
270 nwritten = write(fd, chunk_buf, nrequested);
272 if (errno != ENOSPC) {
273 fprintf(stderr, "\n%s: write %s@%llu+%zi failed: %s\n",
274 progname, file, offset, nrequested,
276 if (error_count++ < 100)
281 if (nwritten < nrequested) {
282 fprintf(stderr, "\n%s: write %s@%llu+%zi short: %ld written\n",
283 progname, file, offset, nrequested, nwritten);
285 chunk_buf += nwritten;
286 nrequested -= nwritten;
294 * write_chunks: write the chunk_buf on the device. The number of write
295 * operations are based on the parameters write_end, offset, and chunksize.
297 * Returns 0 on success, or -ve error number on failure.
299 int write_chunks(int fd, unsigned long long offset,unsigned long long write_end,
300 char *chunk_buf, size_t chunksize, const time_t time_st,
301 const ino_t inode_st, const char *file)
303 unsigned long long stride;
305 stride = full ? chunksize : (ONE_GB - chunksize);
306 for (offset = offset & ~(chunksize - 1); offset < write_end;
310 if (stride != chunksize && lseek64(fd, offset, SEEK_SET) < 0) {
311 fprintf(stderr, "\n%s: lseek66(%s+%llu) failed: %s\n",
312 progname, file, offset, strerror(errno));
315 if (offset + chunksize > write_end)
316 chunksize = write_end - offset;
317 if (!full && offset > chunksize) {
318 fill_chunk(chunk_buf, chunksize, offset, time_st,
320 ret = write_retry(fd, chunk_buf, chunksize,offset,file);
324 if (offset + chunksize > write_end)
325 chunksize = write_end - offset;
327 fill_chunk(chunk_buf, chunksize, offset, time_st, inode_st);
328 ret = write_retry(fd, chunk_buf, chunksize, offset, file);
336 * read_chunk: reads the chunk_buf from the device. The number of read
337 * operations are based on the parameters read_end, offset, and chunksize.
339 int read_chunks(int fd, unsigned long long offset, unsigned long long read_end,
340 char *chunk_buf, size_t chunksize, const time_t time_st,
341 const ino_t inode_st, const char *file)
343 unsigned long long stride;
345 stride = full ? chunksize : (ONE_GB - chunksize);
346 for (offset = offset & ~(chunksize - 1); offset < read_end;
350 if (stride != chunksize && lseek64(fd, offset, SEEK_SET) < 0) {
351 fprintf(stderr, "\n%s: lseek64(%s+%llu) failed: %s\n",
352 progname, file, offset, strerror(errno));
355 if (offset + chunksize > read_end)
356 chunksize = read_end - offset;
358 if (!full && offset > chunksize) {
359 nread = read(fd, chunk_buf, chunksize);
361 fprintf(stderr,"\n%s: read %s@%llu+%zi failed: "
362 "%s\n", progname, file, offset,
363 chunksize, strerror(errno));
367 if (nread < chunksize) {
368 fprintf(stderr, "\n%s: read %s@%llu+%zi short: "
369 "%zi read\n", progname, file, offset,
373 if (verify_chunk(chunk_buf, nread, offset, time_st,
374 inode_st, file) != 0) {
379 /* Need to reset position after read error */
380 if (nread < chunksize &&
381 lseek64(fd, offset, SEEK_SET) == -1) {
383 "\n%s: lseek64(%s@%llu) failed: %s\n",
384 progname, file, offset,strerror(errno));
387 if (offset + chunksize >= read_end)
388 chunksize = read_end - offset;
390 nread = read(fd, chunk_buf, chunksize);
392 fprintf(stderr, "\n%s: read %s@%llu+%zi failed: %s\n",
393 progname, file, offset, chunksize,
398 if (nread < chunksize) {
399 fprintf(stderr, "\n%s: read %s@%llu+%zi short: "
400 "%zi read\n", progname, file, offset,
405 if (verify_chunk(chunk_buf, nread, offset, time_st,
406 inode_st, file) != 0) {
414 * new_file: prepares new filename using file counter and current dir.
416 char *new_file(char *tempfile, char *cur_dir, int file_num)
418 snprintf(tempfile, PATH_MAX, "%s/file%03d", cur_dir, file_num);
423 * new_dir: prepares new dir name using dir counters.
425 char *new_dir(char *tempdir, int dir_num)
427 snprintf(tempdir, PATH_MAX, "%s/llverfs_dir%05d", testdir, dir_num);
432 * calc_total_bytes: calculates total bytes that need to be
433 * written into or read from the filesystem.
435 static unsigned long long calc_total_bytes(const char *op)
437 unsigned long long total_bytes = 0;
438 struct statfs64 statbuf;
441 if (statfs64(testdir, &statbuf) == 0) {
442 if (strcmp(op, "write") == 0)
443 total_bytes = (unsigned long long)
444 (statbuf.f_bavail * statbuf.f_bsize);
445 else if (strcmp(op, "read") == 0)
446 total_bytes = (unsigned long long)
447 (statbuf.f_blocks * statbuf.f_bsize);
449 fprintf(stderr, "\n%s: invalid operation: %s\n",
454 fprintf(stderr, "\n%s: unable to stat %s: %s\n",
455 progname, testdir, strerror(errno));
459 total_bytes = num_dirs * files_in_dir * file_size;
466 * show_rate: displays the current read/write file name and performance,
467 * along with an estimate of how long the whole read/write operation
470 void show_rate(char *op, char *filename, const struct timeval *start_time,
471 const unsigned long long total_bytes,
472 const unsigned long long curr_bytes)
474 static struct timeval last_time;
475 static unsigned long long last_bytes;
477 struct timeval curr_time;
478 double curr_delta, overall_delta, curr_rate, overall_rate;
480 int remain_hours, remain_minutes, remain_seconds;
482 if (last_op != op[0]) {
484 last_time = *start_time;
488 gettimeofday(&curr_time, NULL);
490 curr_delta = (curr_time.tv_sec - last_time.tv_sec) +
491 (double)(curr_time.tv_usec - last_time.tv_usec) / 1000000;
493 overall_delta = (curr_time.tv_sec - start_time->tv_sec) +
494 (double)(curr_time.tv_usec - start_time->tv_usec) / 1000000;
496 curr_rate = (curr_bytes - last_bytes) / curr_delta;
497 overall_rate = curr_bytes / overall_delta;
499 if (curr_rate == 0) {
500 last_time = curr_time;
503 remain_time = (total_bytes - curr_bytes) / curr_rate;
505 remain_hours = remain_time / 3600;
506 remain_minutes = (remain_time - remain_hours * 3600) / 60;
507 remain_seconds = (remain_time - remain_hours * 3600 -
508 remain_minutes * 60);
510 if (curr_delta > 4 || verbose > 2) {
514 printf("%s: %s, current: %5g MB/s, overall: %5g MB/s, "
515 "ETA: %u:%02u:%02u", op, filename,
516 curr_rate / ONE_MB, overall_rate / ONE_MB,
517 remain_hours, remain_minutes, remain_seconds);
524 last_time = curr_time;
525 last_bytes = curr_bytes;
530 * dir_write: This function writes directories and files on device.
531 * it works for both full and partial modes.
533 static int dir_write(char *chunk_buf, size_t chunksize,
534 time_t time_st, unsigned long dir_num)
536 char tempfile[PATH_MAX];
537 char tempdir[PATH_MAX];
540 int file_num = 999999999;
542 struct timeval start_time;
543 unsigned long long total_bytes;
544 unsigned long long curr_bytes = 0;
547 if (!full && fsetflags(testdir, EXT2_TOPDIR_FL))
549 "\n%s: can't set TOPDIR_FL on %s: %s (ignoring)",
550 progname, testdir, strerror(errno));
552 countfile = fopen(filecount, "w");
553 if (countfile == NULL) {
554 fprintf(stderr, "\n%s: creating %s failed :%s\n",
555 progname, filecount, strerror(errno));
558 /* reserve space for the countfile */
559 if (fprintf(countfile, "%lu", num_files) < 1 ||
560 fflush(countfile) != 0) {
561 fprintf(stderr, "\n%s: writing %s failed :%s\n",
562 progname, filecount, strerror(errno));
567 /* calculate total bytes that need to be written */
568 total_bytes = calc_total_bytes("write");
569 if (total_bytes <= 0) {
570 fprintf(stderr, "\n%s: unable to calculate total bytes\n",
576 if (!full && (dir_num != 0))
577 total_bytes -= dir_num * files_in_dir * file_size;
579 gettimeofday(&start_time, NULL);
580 for (; dir_num < num_dirs; num_files++, file_num++) {
583 if (file_num >= files_in_dir) {
585 if (mkdir(new_dir(tempdir, dir_num), dirmode) < 0) {
588 if (errno != EEXIST) {
589 fprintf(stderr, "\n%s: mkdir %s : %s\n",
599 fd = open_file(new_file(tempfile, tempdir, file_num),
600 O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE);
602 if (fstat64(fd, &file) == 0) {
603 inode_st = file.st_ino;
605 fprintf(stderr, "\n%s: write stat '%s': %s",
606 progname, tempfile, strerror(errno));
614 ret = write_chunks(fd, 0, file_size, chunk_buf, chunksize,
615 time_st, inode_st, tempfile);
618 if (ret != -ENOSPC) {
622 curr_bytes = total_bytes;
626 curr_bytes += file_size;
628 show_rate("write", tempfile, &start_time,
629 total_bytes, curr_bytes);
631 fseek(countfile, 0, SEEK_SET);
632 if (fprintf(countfile, "%lu", num_files) < 1 ||
633 fflush(countfile) != 0) {
634 fprintf(stderr, "\n%s: writing %s failed :%s\n",
635 progname, filecount, strerror(errno));
640 show_rate("write_done", tempfile, &start_time, total_bytes, curr_bytes);
651 * dir_read: This function reads directories and files on device.
652 * it works for both full and partial modes.
654 static int dir_read(char *chunk_buf, size_t chunksize,
655 time_t time_st, unsigned long dir_num)
657 char tempfile[PATH_MAX];
658 char tempdir[PATH_MAX];
659 unsigned long count = 0;
663 struct timeval start_time;
664 unsigned long long total_bytes;
665 unsigned long long curr_bytes = 0;
667 /* calculate total bytes that need to be read */
668 total_bytes = calc_total_bytes("read");
669 if (total_bytes <= 0) {
670 fprintf(stderr, "\n%s: unable to calculate total bytes\n",
676 total_bytes -= dir_num * files_in_dir * file_size;
678 gettimeofday(&start_time, NULL);
679 for (count = 0; count < num_files && dir_num < num_dirs; count++) {
683 new_dir(tempdir, dir_num);
687 fd = open_file(new_file(tempfile, tempdir, file_num),
688 O_RDONLY | O_LARGEFILE);
690 if (fstat64(fd, &file) == 0) {
691 inode_st = file.st_ino;
693 fprintf(stderr, "\n%s: read stat '%s': %s\n",
694 progname, tempfile, strerror(errno));
702 if (count == num_files)
703 file_size = file.st_size;
704 ret = read_chunks(fd, 0, file_size, chunk_buf, chunksize,
705 time_st, inode_st, tempfile);
710 curr_bytes += file_size;
712 show_rate("read", tempfile, &start_time,
713 total_bytes, curr_bytes);
715 if (++file_num >= files_in_dir)
719 show_rate("read_done", tempfile, &start_time, total_bytes, curr_bytes);
726 int main(int argc, char **argv)
728 time_t time_st = 0; /* Default timestamp */
729 size_t chunksize = ONE_MB; /* IO chunk size(defailt=1MB) */
730 char *chunk_buf; /* chunk buffer */
732 FILE *countfile = NULL;
733 unsigned long dir_num = 0, dir_num_orig = 0;/* starting directory */
736 progname = strrchr(argv[0], '/') ? strrchr(argv[0], '/') + 1 : argv[0];
737 while ((c = getopt_long(argc, argv, "c:hlo:pqrs:t:vw",
738 long_opts, NULL)) != -1) {
741 chunksize = strtoul(optarg, NULL, 0) * ONE_MB;
742 if (chunksize == 0) {
743 fprintf(stderr, "%s: bad chunk size '%s'\n",
751 case 'o': /* offset */
752 dir_num = strtoul(optarg, NULL, 0);
767 file_size = strtoul(optarg, NULL, 0) * ONE_MB;
768 if (file_size == 0) {
769 fprintf(stderr, "%s: bad file size '%s'\n",
775 time_st = (time_t)strtoul(optarg, NULL, 0);
790 testdir = argv[optind];
793 fprintf(stderr, "%s: pathname not given\n", progname);
797 if (!readoption && !writeoption) {
802 (void) time(&time_st);
803 printf("Timestamp: %lu\n", (unsigned long )time_st);
804 isatty_flag = isatty(STDOUT_FILENO);
807 #ifdef HAVE_EXT2FS_EXT2FS_H
808 struct mntent *tempmnt;
812 if ((fp = setmntent("/etc/mtab", "r")) == NULL) {
813 fprintf(stderr, "%s: fail to open /etc/mtab in read mode :%s\n",
814 progname, strerror(errno));
818 /* find device name using filesystem */
819 while ((tempmnt = getmntent(fp)) != NULL) {
820 if (strcmp(tempmnt->mnt_dir, testdir) == 0)
824 if (tempmnt == NULL) {
825 fprintf(stderr, "%s: no device found for '%s'\n",
831 if (ext2fs_open(tempmnt->mnt_fsname, 0, 0, 0,
832 unix_io_manager, &fs)) {
833 fprintf(stderr, "%s: unable to open ext3 fs on '%s'\n",
840 num_dirs = (fs->super->s_blocks_count +
841 fs->super->s_blocks_per_group - 1) /
842 fs->super->s_blocks_per_group;
844 printf("ext3 block groups: %u, fs blocks: %u "
845 "blocks per group: %u\n",
846 num_dirs, fs->super->s_blocks_count,
847 fs->super->s_blocks_per_group);
853 struct statfs64 statbuf;
856 * Most extN filesystems are formatted with 128MB/group
857 * (32k bitmap = 4KB blocksize * 8 bits/block) * 4KB,
858 * so this is a relatively safe default (somewhat more
859 * or less doesn't make a huge difference for testing).
861 * We want to create one directory per group, together
862 * with the "TOPDIR" feature, so that the directories
863 * are spread across the whole block device.
865 if (statfs64(testdir, &statbuf) == 0) {
866 num_dirs = 1 + (long long)statbuf.f_blocks *
867 statbuf.f_bsize / (128ULL * ONE_MB);
869 printf("dirs: %u, fs blocks: %llu\n",
871 (long long)statbuf.f_blocks);
873 fprintf(stderr, "%s: unable to stat '%s': %s\n",
874 progname, testdir, strerror(errno));
876 printf("dirs: %u\n", num_dirs);
880 chunk_buf = (char *)calloc(chunksize, 1);
881 if (chunk_buf == NULL) {
882 fprintf(stderr, "Memory allocation failed for chunk_buf\n");
885 snprintf(filecount, sizeof(filecount), "%s/%s.filecount",
888 (void)mkdir(testdir, dirmode);
892 num_files = dir_num * files_in_dir;
894 printf("\n%s: %lu files already written\n",
895 progname, num_files);
897 if (dir_write(chunk_buf, chunksize, time_st, dir_num)) {
901 dir_num = dir_num_orig;
905 countfile = fopen(filecount, "r");
906 if (countfile == NULL ||
907 fscanf(countfile, "%lu", &num_files) != 1 ||
909 fprintf(stderr, "\n%s: reading %s failed :%s\n",
910 progname, filecount, strerror(errno));
911 num_files = num_dirs * files_in_dir;
913 num_files -= (dir_num * files_in_dir);
918 if (dir_read(chunk_buf, chunksize, time_st, dir_num)) {
919 fprintf(stderr, "\n%s: Data verification failed\n",