Whamcloud - gitweb
e2scan: a tool for fast namespace/inode scanning
[tools/e2fsprogs.git] / e2scan / e2scan.c
1 #define _GNU_SOURCE
2 #define _FILE_OFFSET_BITS 64
3 #define _XOPEN_SOURCE           /* for getdate */
4 #define _XOPEN_SOURCE_EXTENDED  /* for getdate */
5
6 #include <stdio.h>
7 #include <sys/stat.h>
8 #include <time.h>
9 #include <unistd.h>
10 #include <ext2fs/ext2fs.h>
11 #include <string.h>
12 #include <limits.h>
13 #include <sys/wait.h>
14 #include <sys/errno.h>
15
16 ext2_filsys fs;
17 const char *database = "e2scan.db";
18 int readahead_groups = 1; /* by default readahead one group inode table */
19 FILE *outfile;
20
21 void usage(char *prog)
22 {
23         fprintf(stderr,
24 #if defined(HAVE_SQLITE3) && defined(HAVE_SQLITE3_H)
25                 "\nUsage: %s {-l | -f} [ options ] device-filename\nModes:"
26                 "\t-f: create file database\n"
27 #else
28                 "\nUsage: %s -l [ options ] device-filename\nModes:"
29 #endif
30                 "\t-l: list recently changed files\n"
31                 "Options:\n"
32                 "\t-a groups: readahead 'groups' inode tables (default %d)\n"
33                 "\t-b blocks: buffer 'blocks' inode table blocks\n"
34                 "\t-C chdir: list files relative to 'chdir' in filesystem\n"
35                 "\t-d database: output database filename (default %s)\n"
36                 "\t-D: list not only files, but directories as well\n"
37                 "\t-n filename: list files newer than 'filename'\n"
38                 "\t-N date: list files newer than 'date' (default 1 day, "
39                                                          "0 for all files)\n"
40                 "\t-o outfile: output file list to 'outfile'\n",
41                 prog, readahead_groups, database);
42         exit(1);
43 }
44
45 #define SM_NONE 0
46 #define SM_DATABASE 1 /* -f */
47 #define SM_FILELIST 2 /* -l */
48
49 struct {
50         int mode;
51         int nr;
52         union {
53                 struct {
54                         int fd;
55                         int nr_commands;
56                 } db;
57                 struct {
58                         /* number of files newer than specified time */
59                         ext2_ino_t nr_files;
60                         ext2_ino_t nr_dirs;
61                         /* number of files reported */
62                         ext2_ino_t nr_reported;
63                         time_t mtimestamp;
64                         time_t ctimestamp;
65                         int with_dirs;
66                 } fl;
67         };
68 } scan_data = { .mode = SM_FILELIST, };
69
70 /* db.c */
71 pid_t fork_db_creation(const char *database);
72 void database_iscan_action(ext2_ino_t ino,
73                            struct ext2_inode *inode, int fd, char *buf);
74 int database_dblist_iterate_cb(ext2_ino_t dir, struct ext2_dir_entry *dirent,
75                                int namelen, int fd);
76
77 /* filelist.c */
78 void filelist_iscan_action(ext2_ino_t ino,
79                            struct ext2_inode *inode, char *buf);
80 int filelist_dblist_iterate_cb(ext2_ino_t dirino,
81                                struct ext2_dir_entry *dirent,
82                                int namelen);
83 int create_root_dentries(char *root);
84 void report_root(void);
85
86
87 static void get_timestamps(const char *filename)
88 {
89         struct stat st;
90
91         if (stat(filename, &st) == -1) {
92                 perror("failed to stat file");
93                 exit(1);
94         }
95         scan_data.fl.mtimestamp = st.st_mtime;
96         scan_data.fl.ctimestamp = st.st_ctime;
97 }
98
99 /*
100  * callback for ext2fs_block_iterate2, it adds directory leaf blocks
101  * to dblist
102  */
103 int block_iterate_cb(ext2_filsys fs, blk_t  *block_nr,
104                      e2_blkcnt_t blockcnt,
105                      blk_t ref_block EXT2FS_ATTR((unused)),
106                      int ref_offset EXT2FS_ATTR((unused)),
107                      void *priv_data)
108 {
109         int ret;
110         ext2_ino_t *ino;
111
112         if ((int) blockcnt < 0)
113                 /* skip indirect blocks */
114                 return 0;
115         ret = 0;
116         ino = priv_data;
117         if (ext2fs_add_dir_block(fs->dblist, *ino, *block_nr, (int) blockcnt))
118                 ret |= BLOCK_ABORT;
119
120         return ret;
121 }
122
123 /*
124  * done_group callback for inode scan.
125  * When i-th group of inodes is scanned over, readahead for i+2-th
126  * group is issued. Inode table readahead for two first groups is
127  * issued before scan begin.
128  */
129 errcode_t done_group_callback(ext2_filsys fs, ext2_inode_scan scan,
130                               dgrp_t group, void *vp)
131 {
132         dgrp_t ra_group;
133         unsigned long ra_start;
134         int ra_size;
135
136         if (readahead_groups <= 0)
137                 return 0;
138
139         if (((group + 1) % readahead_groups) != 0)
140                 return 0;
141
142         ra_group = group + 1 + readahead_groups;
143         if (ra_group >= fs->group_desc_count)
144                 return 0;
145
146         ra_start = ext2fs_inode_table_loc(fs, ra_group);
147         if (ra_group + readahead_groups > fs->group_desc_count)
148                 ra_size = fs->group_desc_count - ra_group;
149         else
150                 ra_size = readahead_groups;
151
152         ra_size *= fs->inode_blocks_per_group;
153         io_channel_readahead(fs->io, ra_start, ra_size);
154         return 0;
155 }
156
157 #define DEFAULT_CHUNK_SIZE 16
158 __u32 chunk_size; /* in blocks */
159 int nr_chunks;
160
161 struct chunk {
162         __u32 start;
163         __u32 covered;
164 } *cur_chunk, *ra_chunk, *chunks;
165
166 /* callback for ext2fs_dblist_iterate */
167 static int fill_chunks(ext2_filsys fs, struct ext2_db_entry *db_info,
168                        void *priv_data)
169 {
170         __u32 cur;
171
172         cur = db_info->blk / chunk_size;
173         if (cur_chunk == NULL || cur != cur_chunk->start) {
174                 /* new sweep starts */
175                 if (cur_chunk == NULL)
176                         cur_chunk = chunks;
177                 else
178                         cur_chunk ++;
179
180                 cur_chunk->start = cur;
181                 cur_chunk->covered = 1;
182         } else
183                 cur_chunk->covered ++;
184
185         return 0;
186 }
187
188 /* callback for ext2fs_dblist_iterate */
189 static int count_chunks(ext2_filsys fs, struct ext2_db_entry *db_info,
190                         void *priv_data)
191 {
192         __u32 cur;
193         static __u32 prev = (__u32)-1;
194
195         cur = db_info->blk / chunk_size;
196         if (cur != prev) {
197                 nr_chunks ++;
198                 prev = cur;
199         }
200         return 0;
201 }
202
203 /* create list of chunks, readahead two first of them */
204 static void make_chunk_list(ext2_dblist dblist)
205 {
206         chunk_size = readahead_groups * DEFAULT_CHUNK_SIZE;
207         if (chunk_size == 0)
208                 return;
209
210         ext2fs_dblist_iterate(dblist, count_chunks, NULL);
211         chunks = malloc(sizeof(struct chunk) * nr_chunks);
212         if (chunks == NULL) {
213                 fprintf(stderr, "malloc failed\n");
214                 exit(1);
215         }
216         ext2fs_dblist_iterate(dblist, fill_chunks, NULL);
217
218         /* start readahead for two first chunks */
219         ra_chunk = chunks;
220         cur_chunk = NULL;
221
222         io_channel_readahead(fs->io,
223                              ra_chunk->start * chunk_size,
224                              chunk_size);
225         ra_chunk ++;
226         if (ra_chunk < chunks + nr_chunks)
227                 io_channel_readahead(fs->io,
228                                      ra_chunk->start * chunk_size,
229                                      chunk_size);
230 }
231
232 /*
233  * this is called for each directory block when it is read by dblist
234  * iterator
235  */
236 static int dblist_readahead(void *vp)
237 {
238         if (chunk_size == 0)
239                 return 0;
240         if (cur_chunk == NULL)
241                 cur_chunk = chunks;
242         if (--cur_chunk->covered == 0) {
243                 /*
244                  * last block of current chunk is read, readahead
245                  * chunk is under I/O, get new readahead chunk, move
246                  * current chunk
247                  */
248                 cur_chunk ++;
249                 ra_chunk ++;
250                 if (ra_chunk < chunks + nr_chunks)
251                         io_channel_readahead(fs->io,
252                                              ra_chunk->start * chunk_size,
253                                              chunk_size);
254         }
255         return 0;
256 }
257
258 /*
259  * callback for ext2fs_dblist_dir_iterate to be called for each
260  * directory entry, perform actions common for both database and
261  * filelist modes, call specific functions depending on the mode
262  */
263 static int dblist_iterate_cb(ext2_ino_t dirino, int entry,
264                              struct ext2_dir_entry *dirent,
265                              int offset EXT2FS_ATTR((unused)),
266                              int blocksize EXT2FS_ATTR((unused)),
267                              char *buf EXT2FS_ATTR((unused)),
268                              void *private)
269 {
270         int namelen;
271
272         if (offset == 0) {
273                 /* new directory block is read */
274                 scan_data.nr ++;
275                 dblist_readahead(NULL);
276         }
277
278         if (dirent->inode == 0)
279                 return 0;
280
281         namelen = (dirent->name_len & 0xFF);
282         if (namelen == 2 && !strncmp(dirent->name, "..", 2))
283                 return 0;
284
285         if (namelen == 1 && !strncmp(dirent->name, ".", 1))
286                 return 0;
287
288         if (dirent->inode > fs->super->s_inodes_count) {
289                 fprintf(stderr, "too big ino %u (%.*s)\n",
290                         dirent->inode, namelen, dirent->name);
291                 exit(1);
292         }
293
294         if (scan_data.mode == SM_DATABASE)
295                 return database_dblist_iterate_cb(dirino, dirent, namelen,
296                                                   scan_data.db.fd);
297
298         return filelist_dblist_iterate_cb(dirino, dirent, namelen);
299 }
300
301 int main(int argc, char **argv)
302 {
303         char *root = "/";
304         int inode_buffer_blocks = 0;
305         errcode_t retval;
306         char *block_buf;
307         ext2_inode_scan scan;
308         struct ext2_inode inode;
309         ext2_ino_t ino;
310         dgrp_t nr;
311         time_t t;
312         pid_t pid = 0;
313         int c;
314
315         /*
316          * by default find for files which are modified less than one
317          * day ago
318          */
319         scan_data.fl.mtimestamp = time(NULL) - 60 * 60 * 24;
320         scan_data.fl.ctimestamp = scan_data.fl.mtimestamp;
321         outfile = stdout;
322
323         opterr = 0;
324 #if defined(HAVE_SQLITE3) && defined(HAVE_SQLITE3_H)
325 #define OPTF "f"
326 #else
327 #define OPTF ""
328 #endif
329         while ((c = getopt(argc, argv, "a:b:C:d:D"OPTF"hln:N:o:")) != EOF) {
330                 char *end;
331
332                 switch (c) {
333                 case 'a':
334                         if (optarg == NULL)
335                                 usage(argv[0]);
336                         readahead_groups = strtoul(optarg, &end, 0);
337                         if (*end) {
338                                 fprintf(stderr, "%s: bad -a argument '%s'\n",
339                                         argv[0], optarg);
340                                 usage(argv[0]);
341                         }
342                         break;
343                 case 'b':
344                         inode_buffer_blocks = strtoul(optarg, &end, 0);
345                         if (*end) {
346                                 fprintf(stderr, "%s: bad -b argument '%s'\n",
347                                         argv[0], optarg);
348                                 usage(argv[0]);
349                         }
350                         break;
351                 case 'C':
352                         root = optarg;
353                         break;
354                 case 'd':
355                         database = optarg;
356                         break;
357                 case 'D':
358                         scan_data.fl.with_dirs = 1;
359                         break;
360                 case 'f':
361 #if !defined(HAVE_SQLITE3) || !defined(HAVE_SQLITE3_H)
362                         fprintf(stderr,
363                                 "%s: sqlite3 was not detected on configure, "
364                                 "database creation is not supported\n",argv[0]);
365                         return 1;
366 #endif
367                         scan_data.mode = SM_DATABASE;
368                         break;
369                 case 'l':
370                         scan_data.mode = SM_FILELIST;
371                         break;
372                 case 'n':
373                         get_timestamps(optarg);
374                         break;
375                 case 'N': {
376                         const char *fmts[] = {"%c", /*date/time current locale*/
377                                               "%Ec",/*date/time alt. locale*/
378                                               "%a%t%b%t%d,%t%Y%t%H:%M:%S",
379                                               "%a,%t%d%t%b%t%Y%t%H:%M:%S",
380                                               "%a%t%b%t%d%t%H:%M:%S%t%Z%t%Y",
381                                               "%a%t%b%t%d%t%H:%M:%S%t%Y",
382                                               "%b%t%d%t%H:%M:%S%t%Z%t%Y",
383                                               "%b%t%d%t%H:%M:%S%t%Y",
384                                               "%x%t%X",/*date time*/
385                                               "%Ex%t%EX",/*alternate date time*/
386                                               "%F", /*ISO 8601 date*/
387                                               "%+", /*`date` format*/
388                                               "%s", /*seconds since epoch */
389                                               NULL,
390                                             };
391                         const char **fmt;
392                         struct tm tmptm, *tm = NULL;
393                         time_t now = time(0);
394
395                         tmptm = *localtime(&now);
396
397                         for (fmt = &fmts[0]; *fmt != NULL; fmt++) {
398                                 if (strptime(optarg, *fmt, &tmptm) != NULL) {
399                                         tm = &tmptm;
400                                         break;
401                                 }
402                         }
403
404                         if (tm == NULL) {
405                                 fprintf(stderr, "%s: bad -N argument '%s'\n",
406                                         argv[0], optarg);
407                                 usage(argv[0]);
408                         }
409                         scan_data.fl.mtimestamp = mktime(tm);
410                         scan_data.fl.ctimestamp = scan_data.fl.mtimestamp;
411                         break;
412                         }
413                 case 'o':
414                         outfile = fopen(optarg, "w");
415                         if (outfile == NULL) {
416                                 fprintf(stderr, "%s: can't open '%s': %s\n",
417                                         argv[0], optarg, strerror(errno));
418                                 usage(argv[0]);
419                         }
420                         break;
421                 default:
422                         fprintf(stderr, "%s: unknown option '-%c'\n",
423                                 argv[0], optopt);
424                 case 'h':
425                         usage(argv[0]);
426                 }
427         }
428
429         if (scan_data.mode == SM_NONE || argv[optind] == NULL)
430                 usage(argv[0]);
431
432
433         fprintf(stderr, "generating list of files with\n"
434                 "\tmtime newer than %s"
435                 "\tctime newer than %s",
436                 ctime(&scan_data.fl.mtimestamp),
437                 ctime(&scan_data.fl.ctimestamp));
438
439         retval = ext2fs_open(argv[optind], EXT2_FLAG_SOFTSUPP_FEATURES,
440                              0, 0, unix_io_manager, &fs);
441         if (retval != 0) {
442                 com_err("ext2fs_open", retval, "opening %s\n", argv[optind]);
443                 return 1;
444         }
445
446         t = time(NULL);
447
448         for (nr = 0; nr < fs->group_desc_count; nr ++)
449                 io_channel_readahead(fs->io,
450                                      ext2fs_inode_bitmap_loc(fs, nr), 1);
451         retval = ext2fs_read_inode_bitmap(fs);
452         if (retval) {
453                 com_err("ext2fs_read_inode_bitmap", retval,
454                         "opening inode bitmap on %s\n", argv[optind]);
455                 exit(1);
456         }
457         fprintf(stderr, "inode bitmap is read, %ld seconds\n", time(NULL) - t);
458
459
460         if (inode_buffer_blocks == 0)
461                 inode_buffer_blocks = fs->inode_blocks_per_group;
462
463         retval = ext2fs_open_inode_scan(fs, inode_buffer_blocks, &scan);
464         if (retval) {
465                 com_err("ext2fs_open_inode_scan", retval,
466                         "opening inode scan on %s\n", argv[optind]);
467                 fprintf(stderr, "failed to open inode scan\n");
468                 exit(1);
469         }
470         ext2fs_set_inode_callback(scan, done_group_callback, NULL);
471
472         retval = ext2fs_init_dblist(fs, NULL);
473         if (retval) {
474                 com_err("ext2fs_init_dblist", retval,
475                         "initializing dblist\n");
476                 exit(1);
477         }
478
479         block_buf = (char *)malloc(fs->blocksize * 3);
480         if (block_buf == NULL) {
481                 fprintf(stderr, "%s: failed to allocate memory for block_buf\n",
482                         argv[0]);
483                 exit(1);
484         }
485         memset(block_buf, 0, fs->blocksize * 3);
486
487         switch (scan_data.mode) {
488         case SM_DATABASE:
489                 pid = fork_db_creation(database);
490                 break;
491
492         case SM_FILELIST:
493                 c = create_root_dentries(root);
494                 if (c == ENOENT && strncmp(root, "/ROOT", 5) != 0) {
495                         /* Try again with prepending "/ROOT" */
496                         char newroot[PATH_MAX];
497                         if (snprintf(newroot, PATH_MAX, "/ROOT/%s", root) >=
498                             PATH_MAX) {
499                                 fprintf(stderr, "%s: root path '%s' too long\n",
500                                         argv[0], root);
501                                 exit(1);
502                         }
503                         if (create_root_dentries(newroot) == 0)
504                                 c = 0;
505                 }
506                 if (c == ENOENT)
507                         fprintf(stderr,
508                                 "%s: visible filesystem root '%s' not found\n",
509                                 argv[0], root);
510                 else if (c == EIO)
511                         fprintf(stderr,
512                                 "%s: error reading visible root: '%s'\n",
513                                 argv[0], root);
514                 else if (c == ENOTDIR)
515                         fprintf(stderr,
516                                "%s: visible root '%s' not a directory\n",
517                                argv[0], root);
518                 if (c)
519                         exit(1);
520                 break;
521         default:
522                 break;
523         }
524
525         t = time(NULL);
526         fprintf(stderr, "scanning inode tables .. ");
527         scan_data.nr = 0;
528
529         done_group_callback(fs, scan, -readahead_groups * 2, NULL);
530         done_group_callback(fs, scan, -readahead_groups, NULL);
531         while (ext2fs_get_next_inode(scan, &ino, &inode) == 0) {
532                 if (ino == 0)
533                         break;
534
535                 scan_data.nr ++;
536                 if (ext2fs_fast_test_inode_bitmap2(fs->inode_map, ino) == 0)
537                         /* deleted - always skip for now */
538                         continue;
539                 switch (scan_data.mode) {
540                 case SM_DATABASE:
541                         database_iscan_action(ino, &inode, scan_data.db.fd,
542                                               block_buf);
543                         break;
544
545                 case SM_FILELIST:
546                         filelist_iscan_action(ino, &inode, block_buf);
547                         break;
548
549                 default:
550                         break;
551                 }
552         }
553
554         switch (scan_data.mode) {
555         case SM_DATABASE:
556                 fprintf(stderr,
557                         "done\n\t%d inodes, %ld seconds\n",
558                         scan_data.nr, time(NULL) - t);
559                 break;
560
561         case SM_FILELIST:
562                 fprintf(stderr, "done\n\t%d inodes, %ld seconds, %d files, "
563                         "%d dirs to find\n",
564                         scan_data.nr, time(NULL) - t, scan_data.fl.nr_files,
565                         scan_data.fl.nr_dirs);
566                 if (scan_data.fl.nr_files == 0 && scan_data.fl.nr_dirs == 0) {
567                         ext2fs_close_inode_scan(scan);
568                         ext2fs_close(fs);
569                         free(block_buf);
570                         return 0;
571                 }
572                 break;
573
574         default:
575                 break;
576         }
577
578         t = time(NULL);
579         fprintf(stderr, "scanning directory blocks (%u).. ",
580                 ext2fs_dblist_count(fs->dblist));
581
582         /* root directory does not have name, handle it separately */
583         report_root();
584         /*
585          * we have a list of directory leaf blocks, blocks are sorted,
586          * but can be not very sequential. If such blocks are close to
587          * each other, read throughput can be improved if blocks are
588          * read not sequentially, but all at once in a big
589          * chunk. Create list of those chunks, it will be then used to
590          * issue readahead
591          */
592         make_chunk_list(fs->dblist);
593
594         scan_data.nr = 0;
595         retval = ext2fs_dblist_dir_iterate(fs->dblist,
596                                            DIRENT_FLAG_INCLUDE_EMPTY,
597                                            block_buf,
598                                            dblist_iterate_cb, NULL);
599         if (retval) {
600                 com_err("ext2fs_dblist_dir_iterate", retval,
601                         "dir iterating dblist\n");
602                 exit(1);
603         }
604         if (chunk_size)
605                 free(chunks);
606
607         switch (scan_data.mode) {
608         case SM_DATABASE:
609         {
610                 int status;
611
612                 fprintf(stderr,
613                         "done\n\t%d blocks, %ld seconds, "
614                         "%d records sent to database\n",
615                         scan_data.nr, time(NULL) - t, scan_data.db.nr_commands);
616                 close(scan_data.db.fd);
617                 waitpid(pid, &status, 0);
618                 if (WIFEXITED(status))
619                         fprintf(stderr, "database creation exited with %d\n",
620                                 WEXITSTATUS(status));
621                 break;
622         }
623
624         case SM_FILELIST:
625                 fprintf(stderr,
626                         "done\n\t%d blocks, %ld seconds, %d files reported\n",
627                         scan_data.nr, time(NULL) - t, scan_data.fl.nr_reported);
628                 break;
629
630         default:
631                 break;
632         }
633
634         ext2fs_close_inode_scan(scan);
635         ext2fs_close(fs);
636         free(block_buf);
637
638         return 0;
639 }