2 * icount.c --- an efficient inode count abstraction
4 * Copyright (C) 1997 Theodore Ts'o.
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
28 * The data storage strategy used by icount relies on the observation
29 * that most inode counts are either zero (for non-allocated inodes),
30 * one (for most files), and only a few that are two or more
31 * (directories and files that are linked to more than one directory).
33 * Also, e2fsck tends to load the icount data sequentially.
35 * So, we use an inode bitmap to indicate which inodes have a count of
36 * one, and then use a sorted list to store the counts for inodes
37 * which are greater than one.
39 * We also use an optional bitmap to indicate which inodes are already
40 * in the sorted list, to speed up the use of this abstraction by
41 * e2fsck's pass 2. Pass 2 increments inode counts as it finds them,
42 * so this extra bitmap avoids searching the sorted list to see if a
43 * particular inode is on the sorted list already.
46 struct ext2_icount_el {
53 ext2fs_inode_bitmap single;
54 ext2fs_inode_bitmap multiple;
57 ext2_ino_t num_inodes;
59 struct ext2_icount_el *list;
60 struct ext2_icount_el *last_lookup;
69 * We now use a 32-bit counter field because it doesn't cost us
70 * anything extra for the in-memory data structure, due to alignment
71 * padding. But there's no point changing the interface if most of
72 * the time we only care if the number is bigger than 65,000 or not.
73 * So use the following translation function to return a 16-bit count.
75 #define icount_16_xlate(x) (((x) > 65500) ? 65500 : (x))
77 void ext2fs_free_icount(ext2_icount_t icount)
84 ext2fs_free_mem(&icount->list);
86 ext2fs_free_inode_bitmap(icount->single);
88 ext2fs_free_inode_bitmap(icount->multiple);
91 tdb_close(icount->tdb);
93 (void) unlink(icount->tdb_fn);
99 ext2fs_free_mem(&icount->fullmap);
101 ext2fs_free_mem(&icount);
104 static errcode_t alloc_icount(ext2_filsys fs, int flags, ext2_icount_t *ret)
106 ext2_icount_t icount;
111 retval = ext2fs_get_mem(sizeof(struct ext2_icount), &icount);
114 memset(icount, 0, sizeof(struct ext2_icount));
115 icount->magic = EXT2_ET_MAGIC_ICOUNT;
116 icount->num_inodes = fs->super->s_inodes_count;
118 if ((flags & EXT2_ICOUNT_OPT_FULLMAP) &&
119 (flags & EXT2_ICOUNT_OPT_INCREMENT)) {
120 unsigned sz = sizeof(*icount->fullmap) * icount->num_inodes;
122 retval = ext2fs_get_mem(sz, &icount->fullmap);
123 /* If we can't allocate, fall back */
125 memset(icount->fullmap, 0, sz);
131 retval = ext2fs_allocate_inode_bitmap(fs, "icount", &icount->single);
135 if (flags & EXT2_ICOUNT_OPT_INCREMENT) {
136 retval = ext2fs_allocate_inode_bitmap(fs, "icount_inc",
141 icount->multiple = 0;
147 ext2fs_free_icount(icount);
155 __u16 time_hi_and_version;
160 static void unpack_uuid(void *in, struct uuid *uu)
166 tmp = (tmp << 8) | *ptr++;
167 tmp = (tmp << 8) | *ptr++;
168 tmp = (tmp << 8) | *ptr++;
172 tmp = (tmp << 8) | *ptr++;
176 tmp = (tmp << 8) | *ptr++;
177 uu->time_hi_and_version = tmp;
180 tmp = (tmp << 8) | *ptr++;
183 memcpy(uu->node, ptr, 6);
186 static void uuid_unparse(void *uu, char *out)
190 unpack_uuid(uu, &uuid);
192 "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
193 uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
194 uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
195 uuid.node[0], uuid.node[1], uuid.node[2],
196 uuid.node[3], uuid.node[4], uuid.node[5]);
200 errcode_t ext2fs_create_icount_tdb(ext2_filsys fs EXT2FS_NO_TDB_UNUSED,
201 char *tdb_dir EXT2FS_NO_TDB_UNUSED,
202 int flags EXT2FS_NO_TDB_UNUSED,
203 ext2_icount_t *ret EXT2FS_NO_TDB_UNUSED)
206 ext2_icount_t icount;
209 ext2_ino_t num_inodes;
213 retval = alloc_icount(fs, flags, &icount);
217 retval = ext2fs_get_mem(strlen(tdb_dir) + 64, &fn);
220 uuid_unparse(fs->super->s_uuid, uuid);
221 sprintf(fn, "%s/%s-icount-XXXXXX", tdb_dir, uuid);
222 save_umask = umask(077);
226 ext2fs_free_mem(&fn);
232 * This is an overestimate of the size that we will need; the
233 * ideal value is the number of used inodes with a count
234 * greater than 1. OTOH the times when we really need this is
235 * with the backup programs that use lots of hard links, in
236 * which case the number of inodes in use approaches the ideal
239 num_inodes = fs->super->s_inodes_count - fs->super->s_free_inodes_count;
240 if (fs->fs_num_threads)
241 num_inodes /= fs->fs_num_threads;
243 icount->tdb = tdb_open(fn, num_inodes, TDB_NOLOCK | TDB_NOSYNC,
244 O_RDWR | O_CREAT | O_TRUNC, 0600);
246 if (icount->tdb == NULL) {
253 ext2fs_free_icount(icount);
256 return EXT2_ET_UNIMPLEMENTED;
260 errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, unsigned int size,
261 ext2_icount_t hint, ext2_icount_t *ret)
263 ext2_icount_t icount;
269 EXT2_CHECK_MAGIC(hint, EXT2_ET_MAGIC_ICOUNT);
270 if (hint->size > size)
271 size = (size_t) hint->size;
274 retval = alloc_icount(fs, flags, &icount);
285 * Figure out how many special case inode counts we will
286 * have. We know we will need one for each directory;
287 * we also need to reserve some extra room for file links
289 retval = ext2fs_get_num_dirs(fs, &icount->size);
292 icount->size += fs->super->s_inodes_count / 50;
293 if (fs->fs_num_threads)
294 icount->size /= fs->fs_num_threads;
297 bytes = (size_t) (icount->size * sizeof(struct ext2_icount_el));
299 printf("Icount allocated %u entries, %d bytes.\n",
300 icount->size, bytes);
302 retval = ext2fs_get_array(icount->size, sizeof(struct ext2_icount_el),
306 memset(icount->list, 0, bytes);
312 * Populate the sorted list with those entries which were
313 * found in the hint icount (since those are ones which will
314 * likely need to be in the sorted list this time around).
317 for (i=0; i < hint->count; i++)
318 icount->list[i].ino = hint->list[i].ino;
319 icount->count = hint->count;
327 ext2fs_free_icount(icount);
331 errcode_t ext2fs_create_icount(ext2_filsys fs, int flags,
335 return ext2fs_create_icount2(fs, flags, size, 0, ret);
339 * insert_icount_el() --- Insert a new entry into the sorted list at a
340 * specified position.
342 static struct ext2_icount_el *insert_icount_el(ext2_icount_t icount,
343 ext2_ino_t ino, int pos)
345 struct ext2_icount_el *el;
347 ext2_ino_t new_size = 0;
350 if (icount->last_lookup && icount->last_lookup->ino == ino)
351 return icount->last_lookup;
353 if (icount->count >= icount->size) {
355 new_size = icount->list[(unsigned)icount->count-1].ino;
356 new_size = (ext2_ino_t) (icount->count *
357 ((float) icount->num_inodes / new_size));
359 if (new_size < (icount->size + 100))
360 new_size = icount->size + 100;
362 printf("Reallocating icount %u entries...\n", new_size);
364 retval = ext2fs_resize_mem((size_t) icount->size *
365 sizeof(struct ext2_icount_el),
367 sizeof(struct ext2_icount_el),
371 icount->size = new_size;
373 num = (int) icount->count - pos;
375 return 0; /* should never happen */
377 memmove(&icount->list[pos+1], &icount->list[pos],
378 sizeof(struct ext2_icount_el) * num);
381 el = &icount->list[pos];
384 icount->last_lookup = el;
389 * get_icount_el() --- given an inode number, try to find icount
390 * information in the sorted list. If the create flag is set,
391 * and we can't find an entry, create one in the sorted list.
393 static struct ext2_icount_el *get_icount_el(ext2_icount_t icount,
394 ext2_ino_t ino, int create)
398 if (!icount || !icount->list)
401 if (create && ((icount->count == 0) ||
402 (ino > icount->list[(unsigned)icount->count-1].ino))) {
403 return insert_icount_el(icount, ino, (unsigned) icount->count);
405 if (icount->count == 0)
408 if (icount->cursor >= icount->count)
410 if (ino == icount->list[icount->cursor].ino)
411 return &icount->list[icount->cursor++];
413 printf("Non-cursor get_icount_el: %u\n", ino);
416 high = (int) icount->count-1;
417 while (low <= high) {
418 mid = ((unsigned)low + (unsigned)high) >> 1;
419 if (ino == icount->list[mid].ino) {
420 icount->cursor = mid+1;
421 return &icount->list[mid];
423 if (ino < icount->list[mid].ino)
429 * If we need to create a new entry, it should be right at
430 * low (where high will be left at low-1).
433 return insert_icount_el(icount, ino, low);
437 static errcode_t set_inode_count(ext2_icount_t icount, ext2_ino_t ino,
440 struct ext2_icount_el *el;
445 key.dptr = (unsigned char *) &ino;
446 key.dsize = sizeof(ext2_ino_t);
447 data.dptr = (unsigned char *) &count;
448 data.dsize = sizeof(__u32);
450 if (tdb_store(icount->tdb, key, data, TDB_REPLACE))
451 return tdb_error(icount->tdb) +
454 if (tdb_delete(icount->tdb, key))
455 return tdb_error(icount->tdb) +
461 if (icount->fullmap) {
462 icount->fullmap[ino] = icount_16_xlate(count);
466 el = get_icount_el(icount, ino, 1);
468 return EXT2_ET_NO_MEMORY;
474 static errcode_t get_inode_count(ext2_icount_t icount, ext2_ino_t ino,
477 struct ext2_icount_el *el;
482 key.dptr = (unsigned char *) &ino;
483 key.dsize = sizeof(ext2_ino_t);
485 data = tdb_fetch(icount->tdb, key);
486 if (data.dptr == NULL) {
488 return tdb_error(icount->tdb) + EXT2_ET_TDB_SUCCESS;
491 *count = *((__u32 *) data.dptr);
496 if (icount->fullmap) {
497 *count = icount->fullmap[ino];
501 el = get_icount_el(icount, ino, 0);
511 errcode_t ext2fs_icount_validate(ext2_icount_t icount, FILE *out)
515 const char *bad = "bad icount";
517 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
519 if (icount->count > icount->size) {
520 fprintf(out, "%s: count > size\n", bad);
521 return EXT2_ET_INVALID_ARGUMENT;
523 for (i=1; i < icount->count; i++) {
524 if (icount->list[i-1].ino >= icount->list[i].ino) {
525 fprintf(out, "%s: list[%d].ino=%u, list[%d].ino=%u\n",
526 bad, i-1, icount->list[i-1].ino,
527 i, icount->list[i].ino);
528 ret = EXT2_ET_INVALID_ARGUMENT;
534 errcode_t ext2fs_icount_fetch(ext2_icount_t icount, ext2_ino_t ino, __u16 *ret)
537 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
539 if (!ino || (ino > icount->num_inodes))
540 return EXT2_ET_INVALID_ARGUMENT;
542 if (!icount->fullmap) {
543 if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
547 if (icount->multiple &&
548 !ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
553 get_inode_count(icount, ino, &val);
554 *ret = icount_16_xlate(val);
558 errcode_t ext2fs_icount_increment(ext2_icount_t icount, ext2_ino_t ino,
563 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
565 if (!ino || (ino > icount->num_inodes))
566 return EXT2_ET_INVALID_ARGUMENT;
568 if (icount->fullmap) {
569 curr_value = icount_16_xlate(icount->fullmap[ino] + 1);
570 icount->fullmap[ino] = curr_value;
571 } else if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
573 * If the existing count is 1, then we know there is
574 * no entry in the list.
576 if (set_inode_count(icount, ino, 2))
577 return EXT2_ET_NO_MEMORY;
579 ext2fs_unmark_inode_bitmap2(icount->single, ino);
580 } else if (icount->multiple) {
582 * The count is either zero or greater than 1; if the
583 * inode is set in icount->multiple, then there should
584 * be an entry in the list, so we need to fix it.
586 if (ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
587 get_inode_count(icount, ino, &curr_value);
589 if (set_inode_count(icount, ino, curr_value))
590 return EXT2_ET_NO_MEMORY;
593 * The count was zero; mark the single bitmap
596 ext2fs_mark_inode_bitmap2(icount->single, ino);
603 * The count is either zero or greater than 1; try to
604 * find an entry in the list to determine which.
606 get_inode_count(icount, ino, &curr_value);
608 if (set_inode_count(icount, ino, curr_value))
609 return EXT2_ET_NO_MEMORY;
611 if (icount->multiple)
612 ext2fs_mark_inode_bitmap2(icount->multiple, ino);
614 *ret = icount_16_xlate(curr_value);
618 errcode_t ext2fs_icount_decrement(ext2_icount_t icount, ext2_ino_t ino,
623 if (!ino || (ino > icount->num_inodes))
624 return EXT2_ET_INVALID_ARGUMENT;
626 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
628 if (icount->fullmap) {
629 if (!icount->fullmap[ino])
630 return EXT2_ET_INVALID_ARGUMENT;
632 curr_value = --icount->fullmap[ino];
634 *ret = icount_16_xlate(curr_value);
638 if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
639 ext2fs_unmark_inode_bitmap2(icount->single, ino);
640 if (icount->multiple)
641 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
643 set_inode_count(icount, ino, 0);
650 if (icount->multiple &&
651 !ext2fs_test_inode_bitmap2(icount->multiple, ino))
652 return EXT2_ET_INVALID_ARGUMENT;
654 get_inode_count(icount, ino, &curr_value);
656 return EXT2_ET_INVALID_ARGUMENT;
658 if (set_inode_count(icount, ino, curr_value))
659 return EXT2_ET_NO_MEMORY;
662 ext2fs_mark_inode_bitmap2(icount->single, ino);
663 if ((curr_value == 0) && icount->multiple)
664 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
667 *ret = icount_16_xlate(curr_value);
671 errcode_t ext2fs_icount_store(ext2_icount_t icount, ext2_ino_t ino,
674 if (!ino || (ino > icount->num_inodes))
675 return EXT2_ET_INVALID_ARGUMENT;
677 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
680 return set_inode_count(icount, ino, count);
683 ext2fs_mark_inode_bitmap2(icount->single, ino);
684 if (icount->multiple)
685 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
689 ext2fs_unmark_inode_bitmap2(icount->single, ino);
690 if (icount->multiple) {
692 * If the icount->multiple bitmap is enabled,
693 * we can just clear both bitmaps and we're done
695 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
697 set_inode_count(icount, ino, 0);
701 if (set_inode_count(icount, ino, count))
702 return EXT2_ET_NO_MEMORY;
703 ext2fs_unmark_inode_bitmap2(icount->single, ino);
704 if (icount->multiple)
705 ext2fs_mark_inode_bitmap2(icount->multiple, ino);
709 errcode_t ext2fs_icount_merge_full_map(ext2_icount_t src, ext2_icount_t dest)
711 /* TODO: add the support for full map */
715 errcode_t ext2fs_icount_merge_el(ext2_icount_t src, ext2_icount_t dest)
717 int src_count = src->count;
718 int dest_count = dest->count;
719 int size = src_count + dest_count;
720 int size_entry = sizeof(struct ext2_icount_el);
721 struct ext2_icount_el *array;
722 struct ext2_icount_el *array_ptr;
723 struct ext2_icount_el *src_array = src->list;
724 struct ext2_icount_el *dest_array = dest->list;
732 retval = ext2fs_get_array(size, size_entry, &array);
738 * This can be improved by binary search and memcpy, but codes
739 * would be more complex. And if number of bad blocks is small,
740 * the optimization won't improve performance a lot.
742 while (src_index < src_count || dest_index < dest_count) {
743 if (src_index >= src_count) {
744 memcpy(array_ptr, &dest_array[dest_index],
745 (dest_count - dest_index) * size_entry);
748 if (dest_index >= dest_count) {
749 memcpy(array_ptr, &src_array[src_index],
750 (src_count - src_index) * size_entry);
753 if (src_array[src_index].ino < dest_array[dest_index].ino) {
754 *array_ptr = src_array[src_index];
757 assert(src_array[src_index].ino >
758 dest_array[dest_index].ino);
759 *array_ptr = dest_array[dest_index];
765 ext2fs_free_mem(&dest->list);
767 dest->count = src_count + dest_count;
769 dest->last_lookup = NULL;
773 errcode_t ext2fs_icount_merge(ext2_icount_t src, ext2_icount_t dest)
777 if (src->fullmap && !dest->fullmap)
780 if (!src->fullmap && dest->fullmap)
783 if (src->multiple && !dest->multiple)
786 if (!src->multiple && dest->multiple)
790 return ext2fs_icount_merge_full_map(src, dest);
792 retval = ext2fs_merge_bitmap(src->single, dest->single, NULL,
798 retval = ext2fs_merge_bitmap(src->multiple, dest->multiple,
804 retval = ext2fs_icount_merge_el(src, dest);
811 ext2_ino_t ext2fs_get_icount_size(ext2_icount_t icount)
813 if (!icount || icount->magic != EXT2_ET_MAGIC_ICOUNT)
822 ext2_icount_t icount;
827 #define INCREMENT 0x03
828 #define DECREMENT 0x04
830 struct test_program {
837 struct test_program prog[] = {
838 { STORE, 42, 42, 42 },
844 { INCREMENT, 5, 0, 1 },
845 { INCREMENT, 5, 0, 2 },
846 { INCREMENT, 5, 0, 3 },
847 { INCREMENT, 5, 0, 4 },
848 { DECREMENT, 5, 0, 3 },
849 { DECREMENT, 5, 0, 2 },
850 { DECREMENT, 5, 0, 1 },
851 { DECREMENT, 5, 0, 0 },
856 { INCREMENT, 1, 0, 2 },
857 { DECREMENT, 2, 0, 1 },
858 { DECREMENT, 2, 0, 0 },
863 struct test_program extended[] = {
898 * Setup the variables for doing the inode scan test.
900 static void setup(void)
903 struct ext2_super_block param;
905 initialize_ext2_error_table();
907 memset(¶m, 0, sizeof(param));
908 ext2fs_blocks_count_set(¶m, 12000);
910 retval = ext2fs_initialize("test fs", EXT2_FLAG_64BITS, ¶m,
911 test_io_manager, &test_fs);
913 com_err("setup", retval,
914 "while initializing filesystem");
917 retval = ext2fs_allocate_tables(test_fs);
919 com_err("setup", retval,
920 "while allocating tables for test filesystem");
925 int run_test(int flags, int size, char *dir, struct test_program *prog)
928 ext2_icount_t icount;
929 struct test_program *pc;
935 retval = ext2fs_create_icount_tdb(test_fs, dir,
938 com_err("run_test", retval,
939 "while creating icount using tdb");
947 retval = ext2fs_create_icount2(test_fs, flags, size, 0,
950 com_err("run_test", retval, "while creating icount");
954 for (pc = prog; pc->cmd != EXIT; pc++) {
957 printf("icount_fetch(%u) = ", pc->ino);
960 retval = ext2fs_icount_store(icount, pc->ino, pc->arg);
962 com_err("run_test", retval,
963 "while calling icount_store");
966 printf("icount_store(%u, %u) = ", pc->ino, pc->arg);
969 retval = ext2fs_icount_increment(icount, pc->ino, 0);
971 com_err("run_test", retval,
972 "while calling icount_increment");
975 printf("icount_increment(%u) = ", pc->ino);
978 retval = ext2fs_icount_decrement(icount, pc->ino, 0);
980 com_err("run_test", retval,
981 "while calling icount_decrement");
984 printf("icount_decrement(%u) = ", pc->ino);
987 retval = ext2fs_icount_fetch(icount, pc->ino, &result);
989 com_err("run_test", retval,
990 "while calling icount_fetch");
993 printf("%u (%s)\n", result, (result == pc->expected) ?
995 if (result != pc->expected)
998 printf("icount size is %u\n", ext2fs_get_icount_size(icount));
999 retval = ext2fs_icount_validate(icount, stdout);
1001 com_err("run_test", retval, "while calling icount_validate");
1004 ext2fs_free_icount(icount);
1009 int main(int argc, char **argv)
1014 printf("Standard icount run:\n");
1015 failed += run_test(0, 0, 0, prog);
1016 printf("\nMultiple bitmap test:\n");
1017 failed += run_test(EXT2_ICOUNT_OPT_INCREMENT, 0, 0, prog);
1018 printf("\nResizing icount:\n");
1019 failed += run_test(0, 3, 0, extended);
1020 printf("\nStandard icount run with tdb:\n");
1021 failed += run_test(0, 0, ".", prog);
1022 printf("\nMultiple bitmap test with tdb:\n");
1023 failed += run_test(EXT2_ICOUNT_OPT_INCREMENT, 0, ".", prog);
1025 printf("FAILED!\n");