Whamcloud - gitweb
e2fsck: optimize the inserting of dir_info_db
[tools/e2fsprogs.git] / e2fsck / dirinfo.c
1 /*
2  * dirinfo.c --- maintains the directory information table for e2fsck.
3  *
4  * Copyright (C) 1993 Theodore Ts'o.  This file may be redistributed
5  * under the terms of the GNU Public License.
6  */
7
8 #undef DIRINFO_DEBUG
9
10 #include <assert.h>
11 #include "config.h"
12 #include "e2fsck.h"
13 #include <sys/stat.h>
14 #include <fcntl.h>
15 #include "uuid/uuid.h"
16
17 #include "ext2fs/ext2fs.h"
18 #include <ext2fs/tdb.h>
19
20 struct dir_info_db {
21         ext2_ino_t      count;
22         ext2_ino_t      size;
23         struct dir_info *array;
24         struct dir_info *last_lookup;
25 #ifdef CONFIG_TDB
26         char            *tdb_fn;
27         TDB_CONTEXT     *tdb;
28 #endif
29 };
30
31 struct dir_info_iter {
32         ext2_ino_t      i;
33 #ifdef CONFIG_TDB
34         TDB_DATA        tdb_iter;
35 #endif
36 };
37
38 struct dir_info_ent {
39         ext2_ino_t              dotdot; /* Parent according to '..' */
40         ext2_ino_t              parent; /* Parent according to treewalk */
41 };
42
43
44 static void e2fsck_put_dir_info(e2fsck_t ctx, struct dir_info *dir);
45
46 #ifdef CONFIG_TDB
47 static void setup_tdb(e2fsck_t ctx, ext2_ino_t num_dirs)
48 {
49         struct dir_info_db      *db = ctx->dir_info;
50         ext2_ino_t              threshold;
51         errcode_t               retval;
52         mode_t                  save_umask;
53         char                    *tdb_dir, uuid[40];
54         int                     fd, enable;
55
56         profile_get_string(ctx->profile, "scratch_files", "directory", 0, 0,
57                            &tdb_dir);
58         profile_get_uint(ctx->profile, "scratch_files",
59                          "numdirs_threshold", 0, 0, &threshold);
60         profile_get_boolean(ctx->profile, "scratch_files",
61                             "dirinfo", 0, 1, &enable);
62
63         if (!enable || !tdb_dir || access(tdb_dir, W_OK) ||
64             (threshold && num_dirs <= threshold))
65                 return;
66
67         retval = ext2fs_get_mem(strlen(tdb_dir) + 64, &db->tdb_fn);
68         if (retval)
69                 return;
70
71         uuid_unparse(ctx->fs->super->s_uuid, uuid);
72         sprintf(db->tdb_fn, "%s/%s-dirinfo-XXXXXX", tdb_dir, uuid);
73         save_umask = umask(077);
74         fd = mkstemp(db->tdb_fn);
75         umask(save_umask);
76         if (fd < 0) {
77                 db->tdb = NULL;
78                 return;
79         }
80
81         if (num_dirs < 99991)
82                 num_dirs = 99991; /* largest 5 digit prime */
83
84         db->tdb = tdb_open(db->tdb_fn, num_dirs, TDB_NOLOCK | TDB_NOSYNC,
85                            O_RDWR | O_CREAT | O_TRUNC, 0600);
86         close(fd);
87 }
88 #endif
89
90 static void setup_db(e2fsck_t ctx)
91 {
92         struct dir_info_db      *db;
93         ext2_ino_t              num_dirs;
94         errcode_t               retval;
95
96         db = (struct dir_info_db *)
97                 e2fsck_allocate_memory(ctx, sizeof(struct dir_info_db),
98                                        "directory map db");
99         db->count = db->size = 0;
100         db->array = 0;
101
102         ctx->dir_info = db;
103
104         retval = ext2fs_get_num_dirs(ctx->fs, &num_dirs);
105         if (retval)
106                 num_dirs = 1024;        /* Guess */
107
108 #ifdef CONFIG_TDB
109         setup_tdb(ctx, num_dirs);
110
111         if (db->tdb) {
112 #ifdef DIRINFO_DEBUG
113                 printf("Note: using tdb!\n");
114 #endif
115                 return;
116         }
117 #endif
118
119         db->size = num_dirs + 10;
120         db->array  = (struct dir_info *)
121                 e2fsck_allocate_memory(ctx, db->size
122                                        * sizeof (struct dir_info),
123                                        "directory map");
124 }
125
126 /*
127  * Return the min index that has ino larger or equal to @ino
128  * If not found, return -ENOENT
129  */
130 static int
131 e2fsck_dir_info_min_larger_equal(struct dir_info_db *dir_info,
132                                  ext2_ino_t ino, ext2_ino_t *index)
133 {
134         ext2_ino_t low = 0;
135         ext2_ino_t mid, high;
136         ext2_ino_t tmp_ino;
137         int found = 0;
138
139         if (dir_info->count == 0)
140                 return -ENOENT;
141
142         high = dir_info->count - 1;
143         while (low <= high) {
144                 /* sum may overflow, but result will fit into mid again */
145                 mid = (unsigned long long)(low + high) / 2;
146                 tmp_ino = dir_info->array[mid].ino;
147                 if (ino == tmp_ino) {
148                         *index = mid;
149                         found = 1;
150                         return 0;
151                 } else if (ino < tmp_ino) {
152                         /*
153                          * The mid ino is larger than @ino, remember the index
154                          * here so we won't miss this ino
155                          */
156                         *index = mid;
157                         found = 1;
158                         if (mid == 0)
159                                 break;
160                         high = mid - 1;
161                 } else {
162                         low = mid + 1;
163                 }
164         }
165
166         if (found)
167                 return 0;
168
169         return -ENOENT;
170 }
171
172 /*
173  *
174  * Insert an inode into the sorted array. The array should have at least one
175  * free slot.
176  *
177  * Normally, add_dir_info is called with each inode in
178  * sequential order; but once in a while (like when pass 3
179  * needs to recreate the root directory or lost+found
180  * directory) it is called out of order.  In those cases, we
181  * need to move the dir_info entries down to make room, since
182  * the dir_info array needs to be sorted by inode number for
183  * get_dir_info()'s sake.
184  */
185 static void e2fsck_insert_dir_info(struct dir_info_db *dir_info, ext2_ino_t ino, ext2_ino_t parent)
186 {
187         ext2_ino_t              index;
188         struct dir_info         *dir;
189         size_t                  dir_size = sizeof(*dir);
190         struct dir_info         *array = dir_info->array;
191         ext2_ino_t              array_count = dir_info->count;
192         int                     err;
193
194         /*
195          * Removing this check won't break anything. But since seqential ino
196          * inserting happens a lot, this check avoids binary search.
197          */
198         if (array_count == 0 || array[array_count - 1].ino < ino) {
199                 dir = &array[array_count];
200                 dir_info->count++;
201                 goto out;
202         }
203
204         err = e2fsck_dir_info_min_larger_equal(dir_info, ino, &index);
205         if (err >= 0 && array[index].ino == ino) {
206                 dir = &array[index];
207                 goto out;
208         }
209         if (err < 0) {
210                 dir = &array[array_count];
211                 dir_info->count++;
212                 goto out;
213         }
214
215         dir = &array[index];
216         memmove((char *)dir + dir_size, dir, dir_size * (array_count - index));
217         dir_info->count++;
218 out:
219         dir->ino = ino;
220         dir->dotdot = parent;
221         dir->parent = parent;
222 }
223
224 /*
225  * This subroutine is called during pass1 to create a directory info
226  * entry.  During pass1, the passed-in parent is 0; it will get filled
227  * in during pass2.
228  */
229 void e2fsck_add_dir_info(e2fsck_t ctx, ext2_ino_t ino, ext2_ino_t parent)
230 {
231         struct dir_info         *dir, *old_array;
232         ext2_ino_t              i, j;
233         errcode_t               retval;
234         unsigned long           old_size;
235
236 #ifdef DIRINFO_DEBUG
237         printf("add_dir_info for inode (%u, %u)...\n", ino, parent);
238 #endif
239         if (!ctx->dir_info)
240                 setup_db(ctx);
241
242         if (ctx->dir_info->count >= ctx->dir_info->size) {
243                 old_size = ctx->dir_info->size * sizeof(struct dir_info);
244                 ctx->dir_info->size += 10;
245                 old_array = ctx->dir_info->array;
246                 retval = ext2fs_resize_mem(old_size, ctx->dir_info->size *
247                                            sizeof(struct dir_info),
248                                            &ctx->dir_info->array);
249                 if (retval) {
250                         fprintf(stderr, "Couldn't reallocate dir_info "
251                                 "structure to %u entries\n",
252                                 ctx->dir_info->size);
253                         fatal_error(ctx, 0);
254                         ctx->dir_info->size -= 10;
255                         return;
256                 }
257                 if (old_array != ctx->dir_info->array)
258                         ctx->dir_info->last_lookup = NULL;
259         }
260
261 #ifdef CONFIG_TDB
262         if (ctx->dir_info->tdb) {
263                 struct dir_info ent;
264
265                 ent.ino = ino;
266                 ent.parent = parent;
267                 ent.dotdot = parent;
268                 e2fsck_put_dir_info(ctx, &ent);
269                 return;
270         }
271 #endif
272
273         e2fsck_insert_dir_info(ctx->dir_info, ino, parent);
274 }
275
276 /*
277  * get_dir_info() --- given an inode number, try to find the directory
278  * information entry for it.
279  */
280 static struct dir_info *e2fsck_get_dir_info(e2fsck_t ctx, ext2_ino_t ino)
281 {
282         struct dir_info_db      *db = ctx->dir_info;
283         ext2_ino_t              index;
284         int                     err;
285
286         if (!db)
287                 return 0;
288
289 #ifdef DIRINFO_DEBUG
290         printf("e2fsck_get_dir_info %u...", ino);
291 #endif
292
293 #ifdef CONFIG_TDB
294         if (db->tdb) {
295                 static struct dir_info  ret_dir_info;
296                 TDB_DATA key, data;
297                 struct dir_info_ent     *buf;
298
299                 key.dptr = (unsigned char *) &ino;
300                 key.dsize = sizeof(ext2_ino_t);
301
302                 data = tdb_fetch(db->tdb, key);
303                 if (!data.dptr) {
304                         if (tdb_error(db->tdb) != TDB_ERR_NOEXIST)
305                                 printf("fetch failed: %s\n",
306                                        tdb_errorstr(db->tdb));
307                         return 0;
308                 }
309
310                 buf = (struct dir_info_ent *) data.dptr;
311                 ret_dir_info.ino = ino;
312                 ret_dir_info.dotdot = buf->dotdot;
313                 ret_dir_info.parent = buf->parent;
314 #ifdef DIRINFO_DEBUG
315                 printf("(%u,%u,%u)\n", ino, buf->dotdot, buf->parent);
316 #endif
317                 free(data.dptr);
318                 return &ret_dir_info;
319         }
320 #endif
321
322         if (db->last_lookup && db->last_lookup->ino == ino)
323                 return db->last_lookup;
324
325         err = e2fsck_dir_info_min_larger_equal(ctx->dir_info, ino, &index);
326         if (err < 0)
327                 return NULL;
328         assert(ino <= ctx->dir_info->array[index].ino);
329         if (ino == ctx->dir_info->array[index].ino) {
330 #ifdef DIRINFO_DEBUG
331                 printf("(%d,%d,%d)\n", ino,
332                        ctx->dir_info->array[index].dotdot,
333                        ctx->dir_info->array[index].parent);
334 #endif
335                 return &ctx->dir_info->array[index];
336         }
337         return NULL;
338 }
339
340 static void e2fsck_put_dir_info(e2fsck_t ctx EXT2FS_NO_TDB_UNUSED,
341                                 struct dir_info *dir EXT2FS_NO_TDB_UNUSED)
342 {
343 #ifdef CONFIG_TDB
344         struct dir_info_db      *db = ctx->dir_info;
345         struct dir_info_ent     buf;
346         TDB_DATA                key, data;
347 #endif
348
349 #ifdef DIRINFO_DEBUG
350         printf("e2fsck_put_dir_info (%u, %u, %u)...", dir->ino, dir->dotdot,
351                dir->parent);
352 #endif
353
354 #ifdef CONFIG_TDB
355         if (!db->tdb)
356                 return;
357
358         buf.parent = dir->parent;
359         buf.dotdot = dir->dotdot;
360
361         key.dptr = (unsigned char *) &dir->ino;
362         key.dsize = sizeof(ext2_ino_t);
363         data.dptr = (unsigned char *) &buf;
364         data.dsize = sizeof(buf);
365
366         if (tdb_store(db->tdb, key, data, TDB_REPLACE) == -1) {
367                 printf("store failed: %s\n", tdb_errorstr(db->tdb));
368         }
369 #endif
370 }
371
372 /*
373  * Free the dir_info structure when it isn't needed any more.
374  */
375 void e2fsck_free_dir_info(e2fsck_t ctx)
376 {
377         if (ctx->dir_info) {
378 #ifdef CONFIG_TDB
379                 if (ctx->dir_info->tdb)
380                         tdb_close(ctx->dir_info->tdb);
381                 if (ctx->dir_info->tdb_fn) {
382                         if (unlink(ctx->dir_info->tdb_fn) < 0)
383                                 com_err("e2fsck_free_dir_info", errno,
384                                         _("while freeing dir_info tdb file"));
385                         ext2fs_free_mem(&ctx->dir_info->tdb_fn);
386                 }
387 #endif
388                 if (ctx->dir_info->array)
389                         ext2fs_free_mem(&ctx->dir_info->array);
390                 ctx->dir_info->array = 0;
391                 ctx->dir_info->size = 0;
392                 ctx->dir_info->count = 0;
393                 ext2fs_free_mem(&ctx->dir_info);
394                 ctx->dir_info = 0;
395         }
396 }
397
398 /*
399  * Return the count of number of directories in the dir_info structure
400  */
401 int e2fsck_get_num_dirinfo(e2fsck_t ctx)
402 {
403         return ctx->dir_info ? ctx->dir_info->count : 0;
404 }
405
406 struct dir_info_iter *e2fsck_dir_info_iter_begin(e2fsck_t ctx)
407 {
408         struct dir_info_iter *iter;
409
410         iter = e2fsck_allocate_memory(ctx, sizeof(struct dir_info_iter),
411                                       "dir_info iterator");
412
413 #ifdef CONFIG_TDB
414         if (ctx->dir_info->tdb)
415                 iter->tdb_iter = tdb_firstkey(ctx->dir_info->tdb);
416 #endif
417
418         return iter;
419 }
420
421 void e2fsck_dir_info_iter_end(e2fsck_t ctx EXT2FS_ATTR((unused)),
422                               struct dir_info_iter *iter)
423 {
424 #ifdef CONFIG_TDB
425         free(iter->tdb_iter.dptr);
426 #endif
427         ext2fs_free_mem(&iter);
428 }
429
430 /*
431  * A simple interator function
432  */
433 struct dir_info *e2fsck_dir_info_iter(e2fsck_t ctx, struct dir_info_iter *iter)
434 {
435         if (!ctx->dir_info || !iter)
436                 return 0;
437
438 #ifdef CONFIG_TDB
439         if (ctx->dir_info->tdb) {
440                 static struct dir_info ret_dir_info;
441                 struct dir_info_ent *buf;
442                 TDB_DATA data, key;
443
444                 if (iter->tdb_iter.dptr == 0)
445                         return 0;
446                 key = iter->tdb_iter;
447                 data = tdb_fetch(ctx->dir_info->tdb, key);
448                 if (!data.dptr) {
449                         printf("iter fetch failed: %s\n",
450                                tdb_errorstr(ctx->dir_info->tdb));
451                         return 0;
452                 }
453                 buf = (struct dir_info_ent *) data.dptr;
454                 ret_dir_info.ino = *((ext2_ino_t *) iter->tdb_iter.dptr);
455                 ret_dir_info.dotdot = buf->dotdot;
456                 ret_dir_info.parent = buf->parent;
457                 iter->tdb_iter = tdb_nextkey(ctx->dir_info->tdb, key);
458                 free(key.dptr);
459                 free(data.dptr);
460                 return &ret_dir_info;
461         }
462 #endif
463
464         if (iter->i >= ctx->dir_info->count)
465                 return 0;
466
467 #ifdef DIRINFO_DEBUG
468         printf("iter(%u, %u, %u)...", ctx->dir_info->array[iter->i].ino,
469                ctx->dir_info->array[iter->i].dotdot,
470                ctx->dir_info->array[iter->i].parent);
471 #endif
472         ctx->dir_info->last_lookup = ctx->dir_info->array + iter->i++;
473         return(ctx->dir_info->last_lookup);
474 }
475
476 /*
477  * This function only sets the parent pointer, and requires that
478  * dirinfo structure has already been created.
479  */
480 int e2fsck_dir_info_set_parent(e2fsck_t ctx, ext2_ino_t ino,
481                                ext2_ino_t parent)
482 {
483         struct dir_info *p;
484
485         p = e2fsck_get_dir_info(ctx, ino);
486         if (!p)
487                 return 1;
488         p->parent = parent;
489         e2fsck_put_dir_info(ctx, p);
490         return 0;
491 }
492
493 /*
494  * This function only sets the dot dot pointer, and requires that
495  * dirinfo structure has already been created.
496  */
497 int e2fsck_dir_info_set_dotdot(e2fsck_t ctx, ext2_ino_t ino,
498                                ext2_ino_t dotdot)
499 {
500         struct dir_info *p;
501
502         p = e2fsck_get_dir_info(ctx, ino);
503         if (!p)
504                 return 1;
505         p->dotdot = dotdot;
506         e2fsck_put_dir_info(ctx, p);
507         return 0;
508 }
509
510 /*
511  * This function only sets the parent pointer, and requires that
512  * dirinfo structure has already been created.
513  */
514 int e2fsck_dir_info_get_parent(e2fsck_t ctx, ext2_ino_t ino,
515                                ext2_ino_t *parent)
516 {
517         struct dir_info *p;
518
519         p = e2fsck_get_dir_info(ctx, ino);
520         if (!p)
521                 return 1;
522         *parent = p->parent;
523         return 0;
524 }
525
526 /*
527  * This function only sets the dot dot pointer, and requires that
528  * dirinfo structure has already been created.
529  */
530 int e2fsck_dir_info_get_dotdot(e2fsck_t ctx, ext2_ino_t ino,
531                                ext2_ino_t *dotdot)
532 {
533         struct dir_info *p;
534
535         p = e2fsck_get_dir_info(ctx, ino);
536         if (!p)
537                 return 1;
538         *dotdot = p->dotdot;
539         return 0;
540 }
541