From 675bdf6af2e4ebddb7ee1c96081afa3f913319e3 Mon Sep 17 00:00:00 2001 From: nikita Date: Mon, 20 Nov 2006 13:40:11 +0000 Subject: [PATCH] iam: 0. do not unlock leaf while advancing index part. 1. avoid binary search when possible. --- .../kernel_patches/patches/ext3-iam-separate.patch | 3 +- .../patches/ext3-pdirops-2.6.9.patch | 132 ++++++++++++++------- 2 files changed, 87 insertions(+), 48 deletions(-) diff --git a/lustre/kernel_patches/patches/ext3-iam-separate.patch b/lustre/kernel_patches/patches/ext3-iam-separate.patch index 35b6db1..6db2c28 100644 --- a/lustre/kernel_patches/patches/ext3-iam-separate.patch +++ b/lustre/kernel_patches/patches/ext3-iam-separate.patch @@ -15,7 +15,7 @@ Index: iam/fs/ext3/iam.c =================================================================== --- iam.orig/fs/ext3/iam.c +++ iam/fs/ext3/iam.c -@@ -0,0 +1,1433 @@ +@@ -0,0 +1,1432 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * @@ -897,7 +897,6 @@ Index: iam/fs/ext3/iam.c + */ + while (result == 0 && iam_leaf_at_end(leaf)) { + do_corr(schedule()); -+ iam_leaf_unlock(leaf); + /* advance index portion of the path */ + result = iam_index_next(iam_it_container(it), path); + if (result == 1) { diff --git a/lustre/kernel_patches/patches/ext3-pdirops-2.6.9.patch b/lustre/kernel_patches/patches/ext3-pdirops-2.6.9.patch index 9877b1c..0c3649c 100644 --- a/lustre/kernel_patches/patches/ext3-pdirops-2.6.9.patch +++ b/lustre/kernel_patches/patches/ext3-pdirops-2.6.9.patch @@ -101,7 +101,7 @@ Index: iam/fs/ext3/namei.c BREAKPOINT(); return 0; } -@@ -241,12 +280,197 @@ struct stats dx_show_entries(struct dx_h +@@ -241,12 +280,238 @@ struct stats dx_show_entries(struct dx_h } #endif /* DX_DEBUG */ @@ -225,6 +225,43 @@ Index: iam/fs/ext3/namei.c +} + +/* ++ * Fast check for frame consistency. ++ */ ++static int dx_check_fast(struct iam_path *path, struct iam_frame *frame) ++{ ++ struct iam_container *bag; ++ struct iam_entry *next; ++ struct iam_entry *last; ++ struct iam_entry *entries; ++ struct iam_entry *at; ++ ++ assert_corr(!dx_index_is_compat(path)); ++ ++ bag = path->ip_container; ++ at = frame->at; ++ entries = frame->entries; ++ last = iam_entry_shift(path, entries, dx_get_count(entries) - 1); ++ ++ if (unlikely(at > last)) ++ return -EAGAIN; ++ ++ if (unlikely(dx_get_block(path, at) != frame->leaf)) ++ return -EAGAIN; ++ ++ if (unlikely(iam_ikeycmp(bag, iam_ikey_at(path, at), ++ path->ip_ikey_target) > 0)) ++ return -EAGAIN; ++ ++ next = iam_entry_shift(path, at, +1); ++ if (next <= last) { ++ if (unlikely(iam_ikeycmp(bag, iam_ikey_at(path, next), ++ path->ip_ikey_target) <= 0)) ++ return -EAGAIN; ++ } ++ return 0; ++} ++ ++/* + * returns 0 if path was unchanged, -EAGAIN otherwise. + */ +static int dx_check_path(struct iam_path *path, struct iam_frame *frame) @@ -232,7 +269,8 @@ Index: iam/fs/ext3/namei.c + int equal; + + dx_lock_bh(frame->bh); -+ equal = frame->leaf == dx_find_ptr(path, frame); ++ equal = dx_check_fast(path, frame) == 0 || ++ frame->leaf == dx_find_ptr(path, frame); + DX_DEVAL(dx_lock_stats.dls_bh_again += !equal); + dx_unlock_bh(frame->bh); + @@ -269,6 +307,9 @@ Index: iam/fs/ext3/namei.c + if (search) { + struct iam_entry *pos; + ++ if (dx_check_fast(path, scan) == 0) ++ continue; ++ + pos = dx_find_position(path, scan); + if (scan->leaf != dx_get_block(path, pos)) { + result = -EAGAIN; @@ -301,7 +342,7 @@ Index: iam/fs/ext3/namei.c struct iam_descr *param; struct iam_frame *frame; -@@ -255,20 +479,19 @@ int dx_lookup(struct iam_path *path) +@@ -255,20 +520,19 @@ int dx_lookup(struct iam_path *path) param = iam_path_descr(path); c = path->ip_container; @@ -332,7 +373,7 @@ Index: iam/fs/ext3/namei.c if (err != 0) break; -@@ -283,53 +506,83 @@ int dx_lookup(struct iam_path *path) +@@ -283,53 +547,83 @@ int dx_lookup(struct iam_path *path) break; assert_inv(dx_node_check(path, frame)); @@ -351,13 +392,27 @@ Index: iam/fs/ext3/namei.c - q = iam_entry_shift(path, m, -1); - else - p = iam_entry_shift(path, m, +1); -- } -- ++ assert(dx_bug11027_check(path, frame)); ++ /* ++ * splitting may change root index block and move hash we're ++ * looking for into another index block so, we have to check ++ * this situation and repeat from begining if path got changed ++ * -bzzz ++ */ ++ if (i > 0) { ++ err = dx_check_path(path, frame - 1); ++ if (err != 0) ++ break; + } + - frame->at = iam_entry_shift(path, p, -1); - if (EXT3_INVARIANT_ON) { // linear search cross check - unsigned n = count - 1; - struct iam_entry *at; -- ++ frame->at = dx_find_position(path, frame); ++ frame->curidx = ptr; ++ frame->leaf = ptr = dx_get_block(path, frame->at); + - at = entries; - while (n--) { - dxtrace(printk(",")); @@ -371,26 +426,11 @@ Index: iam/fs/ext3/namei.c - path->ip_ikey_target)); - } - at = iam_entry_shift(path, at, -1); -+ assert(dx_bug11027_check(path, frame)); -+ /* -+ * splitting may change root index block and move hash we're -+ * looking for into another index block so, we have to check -+ * this situation and repeat from begining if path got changed -+ * -bzzz -+ */ -+ if (i > 0) { -+ err = dx_check_path(path, frame - 1); -+ if (err != 0) - break; - } +- break; +- } - } - assert_corr(at == frame->at); - } -+ -+ frame->at = dx_find_position(path, frame); -+ frame->curidx = ptr; -+ frame->leaf = ptr = dx_get_block(path, frame->at); -+ + dx_unlock_bh(frame->bh); + do_corr(schedule()); } @@ -455,7 +495,7 @@ Index: iam/fs/ext3/namei.c /* * Probe for a directory leaf block to search. * -@@ -339,7 +592,7 @@ int dx_lookup(struct iam_path *path) +@@ -339,7 +633,7 @@ int dx_lookup(struct iam_path *path) * check for this error code, and make sure it never gets reflected * back to userspace. */ @@ -464,7 +504,7 @@ Index: iam/fs/ext3/namei.c struct dx_hash_info *hinfo, struct iam_path *path) { int err; -@@ -347,7 +600,7 @@ static int dx_probe(struct dentry *dentr +@@ -347,7 +641,7 @@ static int dx_probe(struct dentry *dentr assert_corr(path->ip_data != NULL); ipc = container_of(path->ip_data, struct iam_path_compat, ipc_descr); @@ -473,7 +513,7 @@ Index: iam/fs/ext3/namei.c ipc->ipc_hinfo = hinfo; assert_corr(dx_index_is_compat(path)); -@@ -356,6 +609,7 @@ static int dx_probe(struct dentry *dentr +@@ -356,6 +650,7 @@ static int dx_probe(struct dentry *dentr return err; } @@ -481,7 +521,7 @@ Index: iam/fs/ext3/namei.c /* * This function increments the frame pointer to search the next leaf * block, and reads in the necessary intervening nodes if the search -@@ -391,10 +645,13 @@ static int ext3_htree_advance(struct ino +@@ -391,10 +686,13 @@ static int ext3_htree_advance(struct ino * nodes need to be read. */ while (1) { @@ -496,7 +536,7 @@ Index: iam/fs/ext3/namei.c if (p == path->ip_frames) return 0; num_frames++; -@@ -409,7 +666,7 @@ static int ext3_htree_advance(struct ino +@@ -409,7 +707,7 @@ static int ext3_htree_advance(struct ino * If the hash is 1, then continue only if the next page has a * continuation hash of any value. This is used for readdir * handling. Otherwise, check to see if the hash matches the @@ -505,7 +545,7 @@ Index: iam/fs/ext3/namei.c * there's no point to read in the successive index pages. */ iam_get_ikey(path, p->at, (struct iam_ikey *)&bhash); -@@ -425,25 +682,91 @@ static int ext3_htree_advance(struct ino +@@ -425,25 +723,91 @@ static int ext3_htree_advance(struct ino * block so no check is necessary */ while (num_frames--) { @@ -602,7 +642,7 @@ Index: iam/fs/ext3/namei.c } int ext3_htree_next_block(struct inode *dir, __u32 hash, -@@ -649,14 +972,26 @@ void iam_insert_key(struct iam_path *pat +@@ -649,14 +1013,26 @@ void iam_insert_key(struct iam_path *pat struct iam_entry *new = iam_entry_shift(path, frame->at, +1); int count = dx_get_count(entries); @@ -629,7 +669,7 @@ Index: iam/fs/ext3/namei.c } void dx_insert_block(struct iam_path *path, struct iam_frame *frame, -@@ -882,7 +1217,7 @@ static struct buffer_head * ext3_dx_find +@@ -882,7 +1258,7 @@ static struct buffer_head * ext3_dx_find sb = dir->i_sb; /* NFS may look up ".." - look at dx_root directory block */ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ @@ -638,7 +678,7 @@ Index: iam/fs/ext3/namei.c if (*err != 0) return NULL; } else { -@@ -1114,7 +1449,7 @@ struct ext3_dir_entry_2 *move_entries(st +@@ -1114,7 +1490,7 @@ struct ext3_dir_entry_2 *move_entries(st hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk("Split block %i at %x, %i/%i\n", @@ -647,7 +687,7 @@ Index: iam/fs/ext3/namei.c /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split); -@@ -1484,16 +1819,38 @@ static int shift_entries(struct iam_path +@@ -1484,16 +1860,38 @@ static int shift_entries(struct iam_path (char *) iam_entry_shift(path, entries, count1), count2 * iam_entry_size(path)); @@ -689,7 +729,7 @@ Index: iam/fs/ext3/namei.c { struct iam_entry *entries; /* old block contents */ -@@ -1501,6 +1858,8 @@ int split_index_node(handle_t *handle, s +@@ -1501,6 +1899,8 @@ int split_index_node(handle_t *handle, s struct iam_frame *frame, *safe; struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0}; u32 newblock[DX_MAX_TREE_HEIGHT] = {0}; @@ -698,7 +738,7 @@ Index: iam/fs/ext3/namei.c struct inode *dir = iam_path_obj(path); struct iam_descr *descr; int nr_splet; -@@ -1523,12 +1882,14 @@ int split_index_node(handle_t *handle, s +@@ -1523,12 +1923,14 @@ int split_index_node(handle_t *handle, s * - first allocate all necessary blocks * * - insert pointers into them atomically. @@ -717,7 +757,7 @@ Index: iam/fs/ext3/namei.c dxtrace(printk("using %u of %u node entries\n", dx_get_count(entries), dx_get_limit(entries))); -@@ -1536,6 +1897,7 @@ int split_index_node(handle_t *handle, s +@@ -1536,6 +1938,7 @@ int split_index_node(handle_t *handle, s for (nr_splet = 0; frame >= path->ip_frames && dx_get_count(frame->entries) == dx_get_limit(frame->entries); --frame, ++nr_splet) { @@ -725,7 +765,7 @@ Index: iam/fs/ext3/namei.c if (nr_splet == DX_MAX_TREE_HEIGHT) { ext3_warning(dir->i_sb, __FUNCTION__, "Directory index full!\n"); -@@ -1545,14 +1907,53 @@ int split_index_node(handle_t *handle, s +@@ -1545,14 +1948,53 @@ int split_index_node(handle_t *handle, s } safe = frame; @@ -780,7 +820,7 @@ Index: iam/fs/ext3/namei.c BUFFER_TRACE(frame->bh, "get_write_access"); err = ext3_journal_get_write_access(handle, frame->bh); if (err) -@@ -1560,6 +1961,7 @@ int split_index_node(handle_t *handle, s +@@ -1560,6 +2002,7 @@ int split_index_node(handle_t *handle, s } /* Add "safe" node to transaction too */ if (safe + 1 != path->ip_frames) { @@ -788,7 +828,7 @@ Index: iam/fs/ext3/namei.c err = ext3_journal_get_write_access(handle, safe->bh); if (err) goto journal_error; -@@ -1596,16 +1998,21 @@ int split_index_node(handle_t *handle, s +@@ -1596,16 +2039,21 @@ int split_index_node(handle_t *handle, s assert_corr(i == 0); @@ -810,7 +850,7 @@ Index: iam/fs/ext3/namei.c /* Shift frames in the path */ memmove(frames + 2, frames + 1, (sizeof path->ip_frames) - 2 * sizeof frames[0]); -@@ -1613,18 +2020,22 @@ int split_index_node(handle_t *handle, s +@@ -1613,18 +2061,22 @@ int split_index_node(handle_t *handle, s frames[1].at = iam_entry_shift(path, entries2, idx); frames[1].entries = entries = entries2; frames[1].bh = bh2; @@ -833,7 +873,7 @@ Index: iam/fs/ext3/namei.c count = shift_entries(path, frame, count, entries, entries2, newblock[i]); /* Which index block gets the new entry? */ -@@ -1634,33 +2045,44 @@ int split_index_node(handle_t *handle, s +@@ -1634,33 +2086,44 @@ int split_index_node(handle_t *handle, s frame->at = iam_entry_shift(path, entries2, idx - count + d); frame->entries = entries = entries2; @@ -879,7 +919,7 @@ Index: iam/fs/ext3/namei.c if (nr_splet > 0) { /* * Log ->i_size modification. -@@ -1674,6 +2096,12 @@ journal_error: +@@ -1674,6 +2137,12 @@ journal_error: ext3_std_error(dir->i_sb, err); cleanup: @@ -892,7 +932,7 @@ Index: iam/fs/ext3/namei.c for (i = 0; i < ARRAY_SIZE(bh_new); ++i) { if (bh_new[i] != NULL) brelse(bh_new[i]); -@@ -1695,18 +2123,18 @@ static int ext3_dx_add_entry(handle_t *h +@@ -1695,18 +2164,18 @@ static int ext3_dx_add_entry(handle_t *h struct buffer_head * bh = NULL; struct inode *dir = dentry->d_parent->d_inode; struct ext3_dir_entry_2 *de; @@ -913,7 +953,7 @@ Index: iam/fs/ext3/namei.c isize = dir->i_size; err = param->id_ops->id_node_read(path->ip_container, -@@ -1726,7 +2154,7 @@ static int ext3_dx_add_entry(handle_t *h +@@ -1726,7 +2195,7 @@ static int ext3_dx_add_entry(handle_t *h goto cleanup; } @@ -922,7 +962,7 @@ Index: iam/fs/ext3/namei.c if (err) goto cleanup; -@@ -1736,12 +2164,14 @@ static int ext3_dx_add_entry(handle_t *h +@@ -1736,12 +2205,14 @@ static int ext3_dx_add_entry(handle_t *h goto cleanup; assert_inv(dx_node_check(path, frame)); -- 1.8.3.1