2 * icount.c --- an efficient inode count abstraction
4 * Copyright (C) 1997 Theodore Ts'o.
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
27 * The data storage strategy used by icount relies on the observation
28 * that most inode counts are either zero (for non-allocated inodes),
29 * one (for most files), and only a few that are two or more
30 * (directories and files that are linked to more than one directory).
32 * Also, e2fsck tends to load the icount data sequentially.
34 * So, we use an inode bitmap to indicate which inodes have a count of
35 * one, and then use a sorted list to store the counts for inodes
36 * which are greater than one.
38 * We also use an optional bitmap to indicate which inodes are already
39 * in the sorted list, to speed up the use of this abstraction by
40 * e2fsck's pass 2. Pass 2 increments inode counts as it finds them,
41 * so this extra bitmap avoids searching the sorted list to see if a
42 * particular inode is on the sorted list already.
45 struct ext2_icount_el {
52 ext2fs_inode_bitmap single;
53 ext2fs_inode_bitmap multiple;
56 ext2_ino_t num_inodes;
58 struct ext2_icount_el *list;
59 struct ext2_icount_el *last_lookup;
67 * We now use a 32-bit counter field because it doesn't cost us
68 * anything extra for the in-memory data structure, due to alignment
69 * padding. But there's no point changing the interface if most of
70 * the time we only care if the number is bigger than 65,000 or not.
71 * So use the following translation function to return a 16-bit count.
73 #define icount_16_xlate(x) (((x) > 65500) ? 65500 : (x))
75 void ext2fs_free_icount(ext2_icount_t icount)
82 ext2fs_free_mem(&icount->list);
84 ext2fs_free_inode_bitmap(icount->single);
86 ext2fs_free_inode_bitmap(icount->multiple);
89 tdb_close(icount->tdb);
91 unlink(icount->tdb_fn);
96 ext2fs_free_mem(&icount);
99 static errcode_t alloc_icount(ext2_filsys fs, int flags, ext2_icount_t *ret)
101 ext2_icount_t icount;
106 retval = ext2fs_get_mem(sizeof(struct ext2_icount), &icount);
109 memset(icount, 0, sizeof(struct ext2_icount));
111 retval = ext2fs_allocate_inode_bitmap(fs, "icount", &icount->single);
115 if (flags & EXT2_ICOUNT_OPT_INCREMENT) {
116 retval = ext2fs_allocate_inode_bitmap(fs, "icount_inc",
121 icount->multiple = 0;
123 icount->magic = EXT2_ET_MAGIC_ICOUNT;
124 icount->num_inodes = fs->super->s_inodes_count;
130 ext2fs_free_icount(icount);
138 __u16 time_hi_and_version;
143 static void unpack_uuid(void *in, struct uuid *uu)
149 tmp = (tmp << 8) | *ptr++;
150 tmp = (tmp << 8) | *ptr++;
151 tmp = (tmp << 8) | *ptr++;
155 tmp = (tmp << 8) | *ptr++;
159 tmp = (tmp << 8) | *ptr++;
160 uu->time_hi_and_version = tmp;
163 tmp = (tmp << 8) | *ptr++;
166 memcpy(uu->node, ptr, 6);
169 static void uuid_unparse(void *uu, char *out)
173 unpack_uuid(uu, &uuid);
175 "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
176 uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
177 uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
178 uuid.node[0], uuid.node[1], uuid.node[2],
179 uuid.node[3], uuid.node[4], uuid.node[5]);
183 errcode_t ext2fs_create_icount_tdb(ext2_filsys fs EXT2FS_NO_TDB_UNUSED,
184 char *tdb_dir EXT2FS_NO_TDB_UNUSED,
185 int flags EXT2FS_NO_TDB_UNUSED,
186 ext2_icount_t *ret EXT2FS_NO_TDB_UNUSED)
189 ext2_icount_t icount;
192 ext2_ino_t num_inodes;
196 retval = alloc_icount(fs, flags, &icount);
200 retval = ext2fs_get_mem(strlen(tdb_dir) + 64, &fn);
203 uuid_unparse(fs->super->s_uuid, uuid);
204 sprintf(fn, "%s/%s-icount-XXXXXX", tdb_dir, uuid);
205 save_umask = umask(077);
209 ext2fs_free_mem(&fn);
215 * This is an overestimate of the size that we will need; the
216 * ideal value is the number of used inodes with a count
217 * greater than 1. OTOH the times when we really need this is
218 * with the backup programs that use lots of hard links, in
219 * which case the number of inodes in use approaches the ideal
222 num_inodes = fs->super->s_inodes_count - fs->super->s_free_inodes_count;
224 icount->tdb = tdb_open(fn, num_inodes, TDB_NOLOCK | TDB_NOSYNC,
225 O_RDWR | O_CREAT | O_TRUNC, 0600);
227 if (icount->tdb == NULL) {
234 ext2fs_free_icount(icount);
237 return EXT2_ET_UNIMPLEMENTED;
241 errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, unsigned int size,
242 ext2_icount_t hint, ext2_icount_t *ret)
244 ext2_icount_t icount;
250 EXT2_CHECK_MAGIC(hint, EXT2_ET_MAGIC_ICOUNT);
251 if (hint->size > size)
252 size = (size_t) hint->size;
255 retval = alloc_icount(fs, flags, &icount);
263 * Figure out how many special case inode counts we will
264 * have. We know we will need one for each directory;
265 * we also need to reserve some extra room for file links
267 retval = ext2fs_get_num_dirs(fs, &icount->size);
270 icount->size += fs->super->s_inodes_count / 50;
273 bytes = (size_t) (icount->size * sizeof(struct ext2_icount_el));
275 printf("Icount allocated %u entries, %d bytes.\n",
276 icount->size, bytes);
278 retval = ext2fs_get_array(icount->size, sizeof(struct ext2_icount_el),
282 memset(icount->list, 0, bytes);
288 * Populate the sorted list with those entries which were
289 * found in the hint icount (since those are ones which will
290 * likely need to be in the sorted list this time around).
293 for (i=0; i < hint->count; i++)
294 icount->list[i].ino = hint->list[i].ino;
295 icount->count = hint->count;
302 ext2fs_free_icount(icount);
306 errcode_t ext2fs_create_icount(ext2_filsys fs, int flags,
310 return ext2fs_create_icount2(fs, flags, size, 0, ret);
314 * insert_icount_el() --- Insert a new entry into the sorted list at a
315 * specified position.
317 static struct ext2_icount_el *insert_icount_el(ext2_icount_t icount,
318 ext2_ino_t ino, int pos)
320 struct ext2_icount_el *el;
322 ext2_ino_t new_size = 0;
325 if (icount->last_lookup && icount->last_lookup->ino == ino)
326 return icount->last_lookup;
328 if (icount->count >= icount->size) {
330 new_size = icount->list[(unsigned)icount->count-1].ino;
331 new_size = (ext2_ino_t) (icount->count *
332 ((float) icount->num_inodes / new_size));
334 if (new_size < (icount->size + 100))
335 new_size = icount->size + 100;
337 printf("Reallocating icount %u entries...\n", new_size);
339 retval = ext2fs_resize_mem((size_t) icount->size *
340 sizeof(struct ext2_icount_el),
342 sizeof(struct ext2_icount_el),
346 icount->size = new_size;
348 num = (int) icount->count - pos;
350 return 0; /* should never happen */
352 memmove(&icount->list[pos+1], &icount->list[pos],
353 sizeof(struct ext2_icount_el) * num);
356 el = &icount->list[pos];
359 icount->last_lookup = el;
364 * get_icount_el() --- given an inode number, try to find icount
365 * information in the sorted list. If the create flag is set,
366 * and we can't find an entry, create one in the sorted list.
368 static struct ext2_icount_el *get_icount_el(ext2_icount_t icount,
369 ext2_ino_t ino, int create)
373 if (!icount || !icount->list)
376 if (create && ((icount->count == 0) ||
377 (ino > icount->list[(unsigned)icount->count-1].ino))) {
378 return insert_icount_el(icount, ino, (unsigned) icount->count);
380 if (icount->count == 0)
383 if (icount->cursor >= icount->count)
385 if (ino == icount->list[icount->cursor].ino)
386 return &icount->list[icount->cursor++];
388 printf("Non-cursor get_icount_el: %u\n", ino);
391 high = (int) icount->count-1;
392 while (low <= high) {
393 mid = ((unsigned)low + (unsigned)high) >> 1;
394 if (ino == icount->list[mid].ino) {
395 icount->cursor = mid+1;
396 return &icount->list[mid];
398 if (ino < icount->list[mid].ino)
404 * If we need to create a new entry, it should be right at
405 * low (where high will be left at low-1).
408 return insert_icount_el(icount, ino, low);
412 static errcode_t set_inode_count(ext2_icount_t icount, ext2_ino_t ino,
415 struct ext2_icount_el *el;
420 key.dptr = (unsigned char *) &ino;
421 key.dsize = sizeof(ext2_ino_t);
422 data.dptr = (unsigned char *) &count;
423 data.dsize = sizeof(__u32);
425 if (tdb_store(icount->tdb, key, data, TDB_REPLACE))
426 return tdb_error(icount->tdb) +
429 if (tdb_delete(icount->tdb, key))
430 return tdb_error(icount->tdb) +
436 el = get_icount_el(icount, ino, 1);
438 return EXT2_ET_NO_MEMORY;
444 static errcode_t get_inode_count(ext2_icount_t icount, ext2_ino_t ino,
447 struct ext2_icount_el *el;
452 key.dptr = (unsigned char *) &ino;
453 key.dsize = sizeof(ext2_ino_t);
455 data = tdb_fetch(icount->tdb, key);
456 if (data.dptr == NULL) {
458 return tdb_error(icount->tdb) + EXT2_ET_TDB_SUCCESS;
461 *count = *((__u32 *) data.dptr);
466 el = get_icount_el(icount, ino, 0);
476 errcode_t ext2fs_icount_validate(ext2_icount_t icount, FILE *out)
480 const char *bad = "bad icount";
482 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
484 if (icount->count > icount->size) {
485 fprintf(out, "%s: count > size\n", bad);
486 return EXT2_ET_INVALID_ARGUMENT;
488 for (i=1; i < icount->count; i++) {
489 if (icount->list[i-1].ino >= icount->list[i].ino) {
490 fprintf(out, "%s: list[%d].ino=%u, list[%d].ino=%u\n",
491 bad, i-1, icount->list[i-1].ino,
492 i, icount->list[i].ino);
493 ret = EXT2_ET_INVALID_ARGUMENT;
499 errcode_t ext2fs_icount_fetch(ext2_icount_t icount, ext2_ino_t ino, __u16 *ret)
502 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
504 if (!ino || (ino > icount->num_inodes))
505 return EXT2_ET_INVALID_ARGUMENT;
507 if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
511 if (icount->multiple &&
512 !ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
516 get_inode_count(icount, ino, &val);
517 *ret = icount_16_xlate(val);
521 errcode_t ext2fs_icount_increment(ext2_icount_t icount, ext2_ino_t ino,
526 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
528 if (!ino || (ino > icount->num_inodes))
529 return EXT2_ET_INVALID_ARGUMENT;
531 if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
533 * If the existing count is 1, then we know there is
534 * no entry in the list.
536 if (set_inode_count(icount, ino, 2))
537 return EXT2_ET_NO_MEMORY;
539 ext2fs_unmark_inode_bitmap2(icount->single, ino);
540 } else if (icount->multiple) {
542 * The count is either zero or greater than 1; if the
543 * inode is set in icount->multiple, then there should
544 * be an entry in the list, so we need to fix it.
546 if (ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
547 get_inode_count(icount, ino, &curr_value);
549 if (set_inode_count(icount, ino, curr_value))
550 return EXT2_ET_NO_MEMORY;
553 * The count was zero; mark the single bitmap
556 ext2fs_mark_inode_bitmap2(icount->single, ino);
563 * The count is either zero or greater than 1; try to
564 * find an entry in the list to determine which.
566 get_inode_count(icount, ino, &curr_value);
568 if (set_inode_count(icount, ino, curr_value))
569 return EXT2_ET_NO_MEMORY;
571 if (icount->multiple)
572 ext2fs_mark_inode_bitmap2(icount->multiple, ino);
574 *ret = icount_16_xlate(curr_value);
578 errcode_t ext2fs_icount_decrement(ext2_icount_t icount, ext2_ino_t ino,
583 if (!ino || (ino > icount->num_inodes))
584 return EXT2_ET_INVALID_ARGUMENT;
586 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
588 if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
589 ext2fs_unmark_inode_bitmap2(icount->single, ino);
590 if (icount->multiple)
591 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
593 set_inode_count(icount, ino, 0);
600 if (icount->multiple &&
601 !ext2fs_test_inode_bitmap2(icount->multiple, ino))
602 return EXT2_ET_INVALID_ARGUMENT;
604 get_inode_count(icount, ino, &curr_value);
606 return EXT2_ET_INVALID_ARGUMENT;
608 if (set_inode_count(icount, ino, curr_value))
609 return EXT2_ET_NO_MEMORY;
612 ext2fs_mark_inode_bitmap2(icount->single, ino);
613 if ((curr_value == 0) && icount->multiple)
614 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
617 *ret = icount_16_xlate(curr_value);
621 errcode_t ext2fs_icount_store(ext2_icount_t icount, ext2_ino_t ino,
624 if (!ino || (ino > icount->num_inodes))
625 return EXT2_ET_INVALID_ARGUMENT;
627 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
630 ext2fs_mark_inode_bitmap2(icount->single, ino);
631 if (icount->multiple)
632 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
636 ext2fs_unmark_inode_bitmap2(icount->single, ino);
637 if (icount->multiple) {
639 * If the icount->multiple bitmap is enabled,
640 * we can just clear both bitmaps and we're done
642 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
644 set_inode_count(icount, ino, 0);
648 if (set_inode_count(icount, ino, count))
649 return EXT2_ET_NO_MEMORY;
650 ext2fs_unmark_inode_bitmap2(icount->single, ino);
651 if (icount->multiple)
652 ext2fs_mark_inode_bitmap2(icount->multiple, ino);
656 ext2_ino_t ext2fs_get_icount_size(ext2_icount_t icount)
658 if (!icount || icount->magic != EXT2_ET_MAGIC_ICOUNT)
667 ext2_icount_t icount;
672 #define INCREMENT 0x03
673 #define DECREMENT 0x04
675 struct test_program {
682 struct test_program prog[] = {
683 { STORE, 42, 42, 42 },
689 { INCREMENT, 5, 0, 1 },
690 { INCREMENT, 5, 0, 2 },
691 { INCREMENT, 5, 0, 3 },
692 { INCREMENT, 5, 0, 4 },
693 { DECREMENT, 5, 0, 3 },
694 { DECREMENT, 5, 0, 2 },
695 { DECREMENT, 5, 0, 1 },
696 { DECREMENT, 5, 0, 0 },
701 { INCREMENT, 1, 0, 2 },
702 { DECREMENT, 2, 0, 1 },
703 { DECREMENT, 2, 0, 0 },
708 struct test_program extended[] = {
743 * Setup the variables for doing the inode scan test.
745 static void setup(void)
748 struct ext2_super_block param;
750 initialize_ext2_error_table();
752 memset(¶m, 0, sizeof(param));
753 ext2fs_blocks_count_set(¶m, 12000);
755 retval = ext2fs_initialize("test fs", EXT2_FLAG_64BITS, ¶m,
756 test_io_manager, &test_fs);
758 com_err("setup", retval,
759 "while initializing filesystem");
762 retval = ext2fs_allocate_tables(test_fs);
764 com_err("setup", retval,
765 "while allocating tables for test filesystem");
770 int run_test(int flags, int size, char *dir, struct test_program *prog)
773 ext2_icount_t icount;
774 struct test_program *pc;
780 retval = ext2fs_create_icount_tdb(test_fs, dir,
783 com_err("run_test", retval,
784 "while creating icount using tdb");
792 retval = ext2fs_create_icount2(test_fs, flags, size, 0,
795 com_err("run_test", retval, "while creating icount");
799 for (pc = prog; pc->cmd != EXIT; pc++) {
802 printf("icount_fetch(%u) = ", pc->ino);
805 retval = ext2fs_icount_store(icount, pc->ino, pc->arg);
807 com_err("run_test", retval,
808 "while calling icount_store");
811 printf("icount_store(%u, %u) = ", pc->ino, pc->arg);
814 retval = ext2fs_icount_increment(icount, pc->ino, 0);
816 com_err("run_test", retval,
817 "while calling icount_increment");
820 printf("icount_increment(%u) = ", pc->ino);
823 retval = ext2fs_icount_decrement(icount, pc->ino, 0);
825 com_err("run_test", retval,
826 "while calling icount_decrement");
829 printf("icount_decrement(%u) = ", pc->ino);
832 retval = ext2fs_icount_fetch(icount, pc->ino, &result);
834 com_err("run_test", retval,
835 "while calling icount_fetch");
838 printf("%u (%s)\n", result, (result == pc->expected) ?
840 if (result != pc->expected)
843 printf("icount size is %u\n", ext2fs_get_icount_size(icount));
844 retval = ext2fs_icount_validate(icount, stdout);
846 com_err("run_test", retval, "while calling icount_validate");
849 ext2fs_free_icount(icount);
854 int main(int argc, char **argv)
859 printf("Standard icount run:\n");
860 failed += run_test(0, 0, 0, prog);
861 printf("\nMultiple bitmap test:\n");
862 failed += run_test(EXT2_ICOUNT_OPT_INCREMENT, 0, 0, prog);
863 printf("\nResizing icount:\n");
864 failed += run_test(0, 3, 0, extended);
865 printf("\nStandard icount run with tdb:\n");
866 failed += run_test(0, 0, ".", prog);
867 printf("\nMultiple bitmap test with tdb:\n");
868 failed += run_test(EXT2_ICOUNT_OPT_INCREMENT, 0, ".", prog);