From 4e523bbe00cfa0240b4581eeca0c097b05c9fe3e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 29 Nov 2011 11:24:52 -0500 Subject: [PATCH] e2fsck: speed up runs when using tdb for large atypical filesystems Optimize how the tdb library so that running with [scratch_files] in /etc/e2fsck.conf is more efficient. Use a better hash function, supplied by Rogier Wolff, and supply an estimate of the size of the hash table to tdb_open instead of using the default (which is way too small in most cases). Also, disable the tdb locking and fsync calls, since it's not necessary for our use in this case (which is essentially as cheap swap space; the tdb files do not contain persistent data.) Signed-off-by: "Theodore Ts'o" --- e2fsck/dirinfo.c | 6 +++++- lib/ext2fs/icount.c | 13 ++++++++++++- lib/ext2fs/tdb.c | 8 ++++---- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/e2fsck/dirinfo.c b/e2fsck/dirinfo.c index e98795c..c81777a 100644 --- a/e2fsck/dirinfo.c +++ b/e2fsck/dirinfo.c @@ -67,7 +67,11 @@ static void setup_tdb(e2fsck_t ctx, ext2_ino_t num_dirs) db->tdb = NULL; return; } - db->tdb = tdb_open(db->tdb_fn, 0, TDB_CLEAR_IF_FIRST, + + if (num_dirs < 99991) + num_dirs = 99991; /* largest 5 digit prime */ + + db->tdb = tdb_open(db->tdb_fn, num_dirs, TDB_NOLOCK | TDB_NOSYNC, O_RDWR | O_CREAT | O_TRUNC, 0600); close(fd); } diff --git a/lib/ext2fs/icount.c b/lib/ext2fs/icount.c index 1a18b37..5d64ac4 100644 --- a/lib/ext2fs/icount.c +++ b/lib/ext2fs/icount.c @@ -180,6 +180,7 @@ errcode_t ext2fs_create_icount_tdb(ext2_filsys fs, char *tdb_dir, ext2_icount_t icount; errcode_t retval; char *fn, uuid[40]; + ext2_ino_t num_inodes; int fd; retval = alloc_icount(fs, flags, &icount); @@ -193,8 +194,18 @@ errcode_t ext2fs_create_icount_tdb(ext2_filsys fs, char *tdb_dir, sprintf(fn, "%s/%s-icount-XXXXXX", tdb_dir, uuid); fd = mkstemp(fn); + /* + * This is an overestimate of the size that we will need; the + * ideal value is the number of used inodes with a count + * greater than 1. OTOH the times when we really need this is + * with the backup programs that use lots of hard links, in + * which case the number of inodes in use approaches the ideal + * value. + */ + num_inodes = fs->super->s_inodes_count - fs->super->s_free_inodes_count; + icount->tdb_fn = fn; - icount->tdb = tdb_open(fn, 0, TDB_CLEAR_IF_FIRST, + icount->tdb = tdb_open(fn, num_inodes, TDB_NOLOCK | TDB_NOSYNC, O_RDWR | O_CREAT | O_TRUNC, 0600); if (icount->tdb) { close(fd); diff --git a/lib/ext2fs/tdb.c b/lib/ext2fs/tdb.c index 79534eb..83c834e 100644 --- a/lib/ext2fs/tdb.c +++ b/lib/ext2fs/tdb.c @@ -3711,17 +3711,17 @@ void tdb_enable_seqnum(struct tdb_context *tdb) static struct tdb_context *tdbs = NULL; -/* This is based on the hash algorithm from gdbm */ +/* This is from a hash algorithm suggested by Rogier Wolff */ static unsigned int default_tdb_hash(TDB_DATA *key) { u32 value; /* Used to compute the hash value. */ u32 i; /* Used to cycle through random values. */ /* Set the initial value from the key size. */ - for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) - value = (value + (key->dptr[i] << (i*5 % 24))); + for (value = 0, i=0; i < key->dsize; i++) + value = value * 256 + key->dptr[i] + (value >> 24) * 241; - return (1103515243 * value + 12345); + return value; } -- 1.8.3.1