2 * icount.c --- an efficient inode count abstraction
4 * Copyright (C) 1997 Theodore Ts'o.
7 * This file may be redistributed under the terms of the GNU Library
8 * General Public License, version 2.
28 * The data storage strategy used by icount relies on the observation
29 * that most inode counts are either zero (for non-allocated inodes),
30 * one (for most files), and only a few that are two or more
31 * (directories and files that are linked to more than one directory).
33 * Also, e2fsck tends to load the icount data sequentially.
35 * So, we use an inode bitmap to indicate which inodes have a count of
36 * one, and then use a sorted list to store the counts for inodes
37 * which are greater than one.
39 * We also use an optional bitmap to indicate which inodes are already
40 * in the sorted list, to speed up the use of this abstraction by
41 * e2fsck's pass 2. Pass 2 increments inode counts as it finds them,
42 * so this extra bitmap avoids searching the sorted list to see if a
43 * particular inode is on the sorted list already.
46 struct ext2_icount_el {
53 ext2fs_inode_bitmap single;
54 ext2fs_inode_bitmap multiple;
57 ext2_ino_t num_inodes;
59 struct ext2_icount_el *list;
60 struct ext2_icount_el *last_lookup;
69 * We now use a 32-bit counter field because it doesn't cost us
70 * anything extra for the in-memory data structure, due to alignment
71 * padding. But there's no point changing the interface if most of
72 * the time we only care if the number is bigger than 65,000 or not.
73 * So use the following translation function to return a 16-bit count.
75 #define icount_16_xlate(x) (((x) > 65500) ? 65500 : (x))
77 void ext2fs_free_icount(ext2_icount_t icount)
84 ext2fs_free_mem(&icount->list);
86 ext2fs_free_inode_bitmap(icount->single);
88 ext2fs_free_inode_bitmap(icount->multiple);
91 tdb_close(icount->tdb);
93 (void) unlink(icount->tdb_fn);
99 ext2fs_free_mem(&icount->fullmap);
101 ext2fs_free_mem(&icount);
104 static errcode_t alloc_icount(ext2_filsys fs, int flags, ext2_icount_t *ret)
106 ext2_icount_t icount;
111 retval = ext2fs_get_mem(sizeof(struct ext2_icount), &icount);
114 memset(icount, 0, sizeof(struct ext2_icount));
115 icount->magic = EXT2_ET_MAGIC_ICOUNT;
116 icount->num_inodes = fs->super->s_inodes_count;
118 if ((flags & EXT2_ICOUNT_OPT_FULLMAP) &&
119 (flags & EXT2_ICOUNT_OPT_INCREMENT)) {
120 unsigned sz = sizeof(*icount->fullmap) * icount->num_inodes;
122 retval = ext2fs_get_mem(sz, &icount->fullmap);
123 /* If we can't allocate, fall back */
125 memset(icount->fullmap, 0, sz);
131 retval = ext2fs_allocate_inode_bitmap(fs, "icount", &icount->single);
135 if (flags & EXT2_ICOUNT_OPT_INCREMENT) {
136 retval = ext2fs_allocate_inode_bitmap(fs, "icount_inc",
141 icount->multiple = 0;
147 ext2fs_free_icount(icount);
155 __u16 time_hi_and_version;
160 static void unpack_uuid(void *in, struct uuid *uu)
166 tmp = (tmp << 8) | *ptr++;
167 tmp = (tmp << 8) | *ptr++;
168 tmp = (tmp << 8) | *ptr++;
172 tmp = (tmp << 8) | *ptr++;
176 tmp = (tmp << 8) | *ptr++;
177 uu->time_hi_and_version = tmp;
180 tmp = (tmp << 8) | *ptr++;
183 memcpy(uu->node, ptr, 6);
186 static void uuid_unparse(void *uu, char *out)
190 unpack_uuid(uu, &uuid);
192 "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
193 uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
194 uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
195 uuid.node[0], uuid.node[1], uuid.node[2],
196 uuid.node[3], uuid.node[4], uuid.node[5]);
200 errcode_t ext2fs_create_icount_tdb(ext2_filsys fs EXT2FS_NO_TDB_UNUSED,
201 char *tdb_dir EXT2FS_NO_TDB_UNUSED,
202 int flags EXT2FS_NO_TDB_UNUSED,
203 ext2_icount_t *ret EXT2FS_NO_TDB_UNUSED)
206 ext2_icount_t icount;
209 ext2_ino_t num_inodes;
213 retval = alloc_icount(fs, flags, &icount);
217 retval = ext2fs_get_mem(strlen(tdb_dir) + 64, &fn);
220 uuid_unparse(fs->super->s_uuid, uuid);
221 sprintf(fn, "%s/%s-icount-XXXXXX", tdb_dir, uuid);
222 save_umask = umask(077);
226 ext2fs_free_mem(&fn);
232 * This is an overestimate of the size that we will need; the
233 * ideal value is the number of used inodes with a count
234 * greater than 1. OTOH the times when we really need this is
235 * with the backup programs that use lots of hard links, in
236 * which case the number of inodes in use approaches the ideal
239 num_inodes = fs->super->s_inodes_count - fs->super->s_free_inodes_count;
241 icount->tdb = tdb_open(fn, num_inodes, TDB_NOLOCK | TDB_NOSYNC,
242 O_RDWR | O_CREAT | O_TRUNC, 0600);
244 if (icount->tdb == NULL) {
251 ext2fs_free_icount(icount);
254 return EXT2_ET_UNIMPLEMENTED;
258 errcode_t ext2fs_create_icount2(ext2_filsys fs, int flags, unsigned int size,
259 ext2_icount_t hint, ext2_icount_t *ret)
261 ext2_icount_t icount;
267 EXT2_CHECK_MAGIC(hint, EXT2_ET_MAGIC_ICOUNT);
268 if (hint->size > size)
269 size = (size_t) hint->size;
272 retval = alloc_icount(fs, flags, &icount);
283 * Figure out how many special case inode counts we will
284 * have. We know we will need one for each directory;
285 * we also need to reserve some extra room for file links
287 retval = ext2fs_get_num_dirs(fs, &icount->size);
290 icount->size += fs->super->s_inodes_count / 50;
293 bytes = (size_t) (icount->size * sizeof(struct ext2_icount_el));
295 printf("Icount allocated %u entries, %d bytes.\n",
296 icount->size, bytes);
298 retval = ext2fs_get_array(icount->size, sizeof(struct ext2_icount_el),
302 memset(icount->list, 0, bytes);
308 * Populate the sorted list with those entries which were
309 * found in the hint icount (since those are ones which will
310 * likely need to be in the sorted list this time around).
313 for (i=0; i < hint->count; i++)
314 icount->list[i].ino = hint->list[i].ino;
315 icount->count = hint->count;
323 ext2fs_free_icount(icount);
327 errcode_t ext2fs_create_icount(ext2_filsys fs, int flags,
331 return ext2fs_create_icount2(fs, flags, size, 0, ret);
335 * insert_icount_el() --- Insert a new entry into the sorted list at a
336 * specified position.
338 static struct ext2_icount_el *insert_icount_el(ext2_icount_t icount,
339 ext2_ino_t ino, int pos)
341 struct ext2_icount_el *el;
343 ext2_ino_t new_size = 0;
346 if (icount->last_lookup && icount->last_lookup->ino == ino)
347 return icount->last_lookup;
349 if (icount->count >= icount->size) {
351 new_size = icount->list[(unsigned)icount->count-1].ino;
352 new_size = (ext2_ino_t) (icount->count *
353 ((float) icount->num_inodes / new_size));
355 if (new_size < (icount->size + 100))
356 new_size = icount->size + 100;
358 printf("Reallocating icount %u entries...\n", new_size);
360 retval = ext2fs_resize_mem((size_t) icount->size *
361 sizeof(struct ext2_icount_el),
363 sizeof(struct ext2_icount_el),
367 icount->size = new_size;
369 num = (int) icount->count - pos;
371 return 0; /* should never happen */
373 memmove(&icount->list[pos+1], &icount->list[pos],
374 sizeof(struct ext2_icount_el) * num);
377 el = &icount->list[pos];
380 icount->last_lookup = el;
385 * get_icount_el() --- given an inode number, try to find icount
386 * information in the sorted list. If the create flag is set,
387 * and we can't find an entry, create one in the sorted list.
389 static struct ext2_icount_el *get_icount_el(ext2_icount_t icount,
390 ext2_ino_t ino, int create)
394 if (!icount || !icount->list)
397 if (create && ((icount->count == 0) ||
398 (ino > icount->list[(unsigned)icount->count-1].ino))) {
399 return insert_icount_el(icount, ino, (unsigned) icount->count);
401 if (icount->count == 0)
404 if (icount->cursor >= icount->count)
406 if (ino == icount->list[icount->cursor].ino)
407 return &icount->list[icount->cursor++];
409 printf("Non-cursor get_icount_el: %u\n", ino);
412 high = (int) icount->count-1;
413 while (low <= high) {
414 mid = ((unsigned)low + (unsigned)high) >> 1;
415 if (ino == icount->list[mid].ino) {
416 icount->cursor = mid+1;
417 return &icount->list[mid];
419 if (ino < icount->list[mid].ino)
425 * If we need to create a new entry, it should be right at
426 * low (where high will be left at low-1).
429 return insert_icount_el(icount, ino, low);
433 static errcode_t set_inode_count(ext2_icount_t icount, ext2_ino_t ino,
436 struct ext2_icount_el *el;
441 key.dptr = (unsigned char *) &ino;
442 key.dsize = sizeof(ext2_ino_t);
443 data.dptr = (unsigned char *) &count;
444 data.dsize = sizeof(__u32);
446 if (tdb_store(icount->tdb, key, data, TDB_REPLACE))
447 return tdb_error(icount->tdb) +
450 if (tdb_delete(icount->tdb, key))
451 return tdb_error(icount->tdb) +
457 if (icount->fullmap) {
458 icount->fullmap[ino] = icount_16_xlate(count);
462 el = get_icount_el(icount, ino, 1);
464 return EXT2_ET_NO_MEMORY;
470 static errcode_t get_inode_count(ext2_icount_t icount, ext2_ino_t ino,
473 struct ext2_icount_el *el;
478 key.dptr = (unsigned char *) &ino;
479 key.dsize = sizeof(ext2_ino_t);
481 data = tdb_fetch(icount->tdb, key);
482 if (data.dptr == NULL) {
484 return tdb_error(icount->tdb) + EXT2_ET_TDB_SUCCESS;
487 *count = *((__u32 *) data.dptr);
492 if (icount->fullmap) {
493 *count = icount->fullmap[ino];
497 el = get_icount_el(icount, ino, 0);
507 errcode_t ext2fs_icount_validate(ext2_icount_t icount, FILE *out)
511 const char *bad = "bad icount";
513 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
515 if (icount->count > icount->size) {
516 fprintf(out, "%s: count > size\n", bad);
517 return EXT2_ET_INVALID_ARGUMENT;
519 for (i=1; i < icount->count; i++) {
520 if (icount->list[i-1].ino >= icount->list[i].ino) {
521 fprintf(out, "%s: list[%d].ino=%u, list[%d].ino=%u\n",
522 bad, i-1, icount->list[i-1].ino,
523 i, icount->list[i].ino);
524 ret = EXT2_ET_INVALID_ARGUMENT;
530 errcode_t ext2fs_icount_fetch(ext2_icount_t icount, ext2_ino_t ino, __u16 *ret)
533 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
535 if (!ino || (ino > icount->num_inodes))
536 return EXT2_ET_INVALID_ARGUMENT;
538 if (!icount->fullmap) {
539 if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
543 if (icount->multiple &&
544 !ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
549 get_inode_count(icount, ino, &val);
550 *ret = icount_16_xlate(val);
554 errcode_t ext2fs_icount_increment(ext2_icount_t icount, ext2_ino_t ino,
559 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
561 if (!ino || (ino > icount->num_inodes))
562 return EXT2_ET_INVALID_ARGUMENT;
564 if (icount->fullmap) {
565 curr_value = icount_16_xlate(icount->fullmap[ino] + 1);
566 icount->fullmap[ino] = curr_value;
567 } else if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
569 * If the existing count is 1, then we know there is
570 * no entry in the list.
572 if (set_inode_count(icount, ino, 2))
573 return EXT2_ET_NO_MEMORY;
575 ext2fs_unmark_inode_bitmap2(icount->single, ino);
576 } else if (icount->multiple) {
578 * The count is either zero or greater than 1; if the
579 * inode is set in icount->multiple, then there should
580 * be an entry in the list, so we need to fix it.
582 if (ext2fs_test_inode_bitmap2(icount->multiple, ino)) {
583 get_inode_count(icount, ino, &curr_value);
585 if (set_inode_count(icount, ino, curr_value))
586 return EXT2_ET_NO_MEMORY;
589 * The count was zero; mark the single bitmap
592 ext2fs_mark_inode_bitmap2(icount->single, ino);
599 * The count is either zero or greater than 1; try to
600 * find an entry in the list to determine which.
602 get_inode_count(icount, ino, &curr_value);
604 if (set_inode_count(icount, ino, curr_value))
605 return EXT2_ET_NO_MEMORY;
607 if (icount->multiple)
608 ext2fs_mark_inode_bitmap2(icount->multiple, ino);
610 *ret = icount_16_xlate(curr_value);
614 errcode_t ext2fs_icount_decrement(ext2_icount_t icount, ext2_ino_t ino,
619 if (!ino || (ino > icount->num_inodes))
620 return EXT2_ET_INVALID_ARGUMENT;
622 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
624 if (icount->fullmap) {
625 if (!icount->fullmap[ino])
626 return EXT2_ET_INVALID_ARGUMENT;
628 curr_value = --icount->fullmap[ino];
630 *ret = icount_16_xlate(curr_value);
634 if (ext2fs_test_inode_bitmap2(icount->single, ino)) {
635 ext2fs_unmark_inode_bitmap2(icount->single, ino);
636 if (icount->multiple)
637 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
639 set_inode_count(icount, ino, 0);
646 if (icount->multiple &&
647 !ext2fs_test_inode_bitmap2(icount->multiple, ino))
648 return EXT2_ET_INVALID_ARGUMENT;
650 get_inode_count(icount, ino, &curr_value);
652 return EXT2_ET_INVALID_ARGUMENT;
654 if (set_inode_count(icount, ino, curr_value))
655 return EXT2_ET_NO_MEMORY;
658 ext2fs_mark_inode_bitmap2(icount->single, ino);
659 if ((curr_value == 0) && icount->multiple)
660 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
663 *ret = icount_16_xlate(curr_value);
667 errcode_t ext2fs_icount_store(ext2_icount_t icount, ext2_ino_t ino,
670 if (!ino || (ino > icount->num_inodes))
671 return EXT2_ET_INVALID_ARGUMENT;
673 EXT2_CHECK_MAGIC(icount, EXT2_ET_MAGIC_ICOUNT);
676 return set_inode_count(icount, ino, count);
679 ext2fs_mark_inode_bitmap2(icount->single, ino);
680 if (icount->multiple)
681 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
685 ext2fs_unmark_inode_bitmap2(icount->single, ino);
686 if (icount->multiple) {
688 * If the icount->multiple bitmap is enabled,
689 * we can just clear both bitmaps and we're done
691 ext2fs_unmark_inode_bitmap2(icount->multiple, ino);
693 set_inode_count(icount, ino, 0);
697 if (set_inode_count(icount, ino, count))
698 return EXT2_ET_NO_MEMORY;
699 ext2fs_unmark_inode_bitmap2(icount->single, ino);
700 if (icount->multiple)
701 ext2fs_mark_inode_bitmap2(icount->multiple, ino);
705 errcode_t ext2fs_icount_merge_full_map(ext2_icount_t src, ext2_icount_t dest)
707 /* TODO: add the support for full map */
711 errcode_t ext2fs_icount_merge_el(ext2_icount_t src, ext2_icount_t dest)
713 int src_count = src->count;
714 int dest_count = dest->count;
715 int size = src_count + dest_count;
716 int size_entry = sizeof(struct ext2_icount_el);
717 struct ext2_icount_el *array;
718 struct ext2_icount_el *array_ptr;
719 struct ext2_icount_el *src_array = src->list;
720 struct ext2_icount_el *dest_array = dest->list;
728 retval = ext2fs_get_array(size, size_entry, &array);
734 * This can be improved by binary search and memcpy, but codes
735 * would be more complex. And if number of bad blocks is small,
736 * the optimization won't improve performance a lot.
738 while (src_index < src_count || dest_index < dest_count) {
739 if (src_index >= src_count) {
740 memcpy(array_ptr, &dest_array[dest_index],
741 (dest_count - dest_index) * size_entry);
744 if (dest_index >= dest_count) {
745 memcpy(array_ptr, &src_array[src_index],
746 (src_count - src_index) * size_entry);
749 if (src_array[src_index].ino < dest_array[dest_index].ino) {
750 *array_ptr = src_array[src_index];
753 assert(src_array[src_index].ino >
754 dest_array[dest_index].ino);
755 *array_ptr = dest_array[dest_index];
761 ext2fs_free_mem(&dest->list);
763 dest->count = src_count + dest_count;
765 dest->last_lookup = NULL;
769 errcode_t ext2fs_icount_merge(ext2_icount_t src, ext2_icount_t dest)
773 if (src->fullmap && !dest->fullmap)
776 if (!src->fullmap && dest->fullmap)
779 if (src->multiple && !dest->multiple)
782 if (!src->multiple && dest->multiple)
786 return ext2fs_icount_merge_full_map(src, dest);
788 retval = ext2fs_merge_bitmap(src->single, dest->single, NULL,
794 retval = ext2fs_merge_bitmap(src->multiple, dest->multiple,
800 retval = ext2fs_icount_merge_el(src, dest);
807 ext2_ino_t ext2fs_get_icount_size(ext2_icount_t icount)
809 if (!icount || icount->magic != EXT2_ET_MAGIC_ICOUNT)
818 ext2_icount_t icount;
823 #define INCREMENT 0x03
824 #define DECREMENT 0x04
826 struct test_program {
833 struct test_program prog[] = {
834 { STORE, 42, 42, 42 },
840 { INCREMENT, 5, 0, 1 },
841 { INCREMENT, 5, 0, 2 },
842 { INCREMENT, 5, 0, 3 },
843 { INCREMENT, 5, 0, 4 },
844 { DECREMENT, 5, 0, 3 },
845 { DECREMENT, 5, 0, 2 },
846 { DECREMENT, 5, 0, 1 },
847 { DECREMENT, 5, 0, 0 },
852 { INCREMENT, 1, 0, 2 },
853 { DECREMENT, 2, 0, 1 },
854 { DECREMENT, 2, 0, 0 },
859 struct test_program extended[] = {
894 * Setup the variables for doing the inode scan test.
896 static void setup(void)
899 struct ext2_super_block param;
901 initialize_ext2_error_table();
903 memset(¶m, 0, sizeof(param));
904 ext2fs_blocks_count_set(¶m, 12000);
906 retval = ext2fs_initialize("test fs", EXT2_FLAG_64BITS, ¶m,
907 test_io_manager, &test_fs);
909 com_err("setup", retval,
910 "while initializing filesystem");
913 retval = ext2fs_allocate_tables(test_fs);
915 com_err("setup", retval,
916 "while allocating tables for test filesystem");
921 int run_test(int flags, int size, char *dir, struct test_program *prog)
924 ext2_icount_t icount;
925 struct test_program *pc;
931 retval = ext2fs_create_icount_tdb(test_fs, dir,
934 com_err("run_test", retval,
935 "while creating icount using tdb");
943 retval = ext2fs_create_icount2(test_fs, flags, size, 0,
946 com_err("run_test", retval, "while creating icount");
950 for (pc = prog; pc->cmd != EXIT; pc++) {
953 printf("icount_fetch(%u) = ", pc->ino);
956 retval = ext2fs_icount_store(icount, pc->ino, pc->arg);
958 com_err("run_test", retval,
959 "while calling icount_store");
962 printf("icount_store(%u, %u) = ", pc->ino, pc->arg);
965 retval = ext2fs_icount_increment(icount, pc->ino, 0);
967 com_err("run_test", retval,
968 "while calling icount_increment");
971 printf("icount_increment(%u) = ", pc->ino);
974 retval = ext2fs_icount_decrement(icount, pc->ino, 0);
976 com_err("run_test", retval,
977 "while calling icount_decrement");
980 printf("icount_decrement(%u) = ", pc->ino);
983 retval = ext2fs_icount_fetch(icount, pc->ino, &result);
985 com_err("run_test", retval,
986 "while calling icount_fetch");
989 printf("%u (%s)\n", result, (result == pc->expected) ?
991 if (result != pc->expected)
994 printf("icount size is %u\n", ext2fs_get_icount_size(icount));
995 retval = ext2fs_icount_validate(icount, stdout);
997 com_err("run_test", retval, "while calling icount_validate");
1000 ext2fs_free_icount(icount);
1005 int main(int argc, char **argv)
1010 printf("Standard icount run:\n");
1011 failed += run_test(0, 0, 0, prog);
1012 printf("\nMultiple bitmap test:\n");
1013 failed += run_test(EXT2_ICOUNT_OPT_INCREMENT, 0, 0, prog);
1014 printf("\nResizing icount:\n");
1015 failed += run_test(0, 3, 0, extended);
1016 printf("\nStandard icount run with tdb:\n");
1017 failed += run_test(0, 0, ".", prog);
1018 printf("\nMultiple bitmap test with tdb:\n");
1019 failed += run_test(EXT2_ICOUNT_OPT_INCREMENT, 0, ".", prog);
1021 printf("FAILED!\n");