From d3990c20fcfaff7c1890cf71b6a2d9c56654ed13 Mon Sep 17 00:00:00 2001 From: nikita Date: Fri, 20 Oct 2006 23:11:09 +0000 Subject: [PATCH] iam: 0. pdirops locking in iam_index_next() (readdir). 1. handle failure to allocate dynlocks. 2. some (rudimentary) statistics code. --- .../kernel_patches/patches/ext3-iam-separate.patch | 34 ++-- .../patches/ext3-pdirops-2.6.9.patch | 183 ++++++++++++++++----- 2 files changed, 163 insertions(+), 54 deletions(-) diff --git a/lustre/kernel_patches/patches/ext3-iam-separate.patch b/lustre/kernel_patches/patches/ext3-iam-separate.patch index 9357c31..1a59475 100644 --- a/lustre/kernel_patches/patches/ext3-iam-separate.patch +++ b/lustre/kernel_patches/patches/ext3-iam-separate.patch @@ -825,22 +825,22 @@ Index: iam/fs/ext3/iam.c + * multiple iterations may be necessary due to empty leaves. + */ + while (result == 0 && iam_leaf_at_end(leaf)) { ++ iam_leaf_unlock(leaf); + /* advance index portion of the path */ + result = iam_index_next(iam_it_container(it), path); + if (result == 1) { + struct dynlock_handle *lh; -+ /* -+ * Lock next leaf, then release lock on the -+ * current one. -+ */ + lh = dx_lock_htree(iam_path_obj(path), + path->ip_frame->leaf, + DLT_WRITE); -+ iam_leaf_fini(leaf); -+ leaf->il_lock = lh; -+ result = iam_leaf_load(path); -+ if (result == 0) -+ iam_leaf_start(leaf); ++ if (lh != NULL) { ++ iam_leaf_fini(leaf); ++ leaf->il_lock = lh; ++ result = iam_leaf_load(path); ++ if (result == 0) ++ iam_leaf_start(leaf); ++ } else ++ result = -ENOMEM; + } else if (result == 0) + /* end of container reached */ + result = +1; @@ -2694,7 +2694,7 @@ Index: iam/fs/ext3/iam_lvar.c =================================================================== --- iam.orig/fs/ext3/iam_lvar.c +++ iam/fs/ext3/iam_lvar.c -@@ -0,0 +1,976 @@ +@@ -0,0 +1,990 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * @@ -2843,6 +2843,11 @@ Index: iam/fs/ext3/iam_lvar.c + offsetof(struct lvar_leaf_entry, vle_key) + e_keysize(ent); +} + ++static void e_print(const struct lvar_leaf_entry *ent) ++{ ++ printk(" %p %8.8x \"%*.*s\"\n", ent, e_hash(ent), ++ e_keysize(ent), e_keysize(ent), e_char(ent)); ++} +#if 0 +static int e_check(const struct iam_leaf *leaf, + const struct lvar_leaf_entry *ent) @@ -2946,6 +2951,15 @@ Index: iam/fs/ext3/iam_lvar.c + return lentry_lvar(l->il_at); +} + ++static void n_print(const struct iam_leaf *l) ++{ ++ struct lvar_leaf_entry *scan; ++ ++ printk("used: %d\n", h_used(n_head(l))); ++ for (scan = n_start(l); scan < n_end(l); scan = e_next(l, scan)) ++ e_print(scan); ++} ++ +#if EXT3_CORRECTNESS_ON +static int n_at_rec(const struct iam_leaf *folio) +{ diff --git a/lustre/kernel_patches/patches/ext3-pdirops-2.6.9.patch b/lustre/kernel_patches/patches/ext3-pdirops-2.6.9.patch index 2d3f4f1..dbb57ec 100644 --- a/lustre/kernel_patches/patches/ext3-pdirops-2.6.9.patch +++ b/lustre/kernel_patches/patches/ext3-pdirops-2.6.9.patch @@ -45,7 +45,7 @@ Index: iam/fs/ext3/namei.c static inline void dx_set_limit(struct iam_entry *entries, unsigned value) { ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); -@@ -241,12 +246,157 @@ struct stats dx_show_entries(struct dx_h +@@ -241,12 +246,182 @@ struct stats dx_show_entries(struct dx_h } #endif /* DX_DEBUG */ @@ -70,10 +70,26 @@ Index: iam/fs/ext3/namei.c + */ +#define BH_DXLock 25 + ++#define DX_DEBUG (1) ++ ++#if DX_DEBUG ++static struct dx_lock_stats { ++ unsigned dls_bh_lock; ++ unsigned dls_bh_busy; ++ unsigned dls_bh_again; ++ unsigned dls_bh_full_again; ++} dx_lock_stats = { 0, }; ++#define DX_DEVAL(x) x ++#else ++#define DX_DEVAL(x) ++#endif ++ +static inline void dx_lock_bh(struct buffer_head volatile *bh) +{ ++ DX_DEVAL(dx_lock_stats.dls_bh_lock++); +#ifdef CONFIG_SMP + while (test_and_set_bit(BH_DXLock, &bh->b_state)) { ++ DX_DEVAL(dx_lock_stats.dls_bh_busy++); + while (test_bit(BH_DXLock, &bh->b_state)) + cpu_relax(); + } @@ -98,7 +114,7 @@ Index: iam/fs/ext3/namei.c + /* + * XXX handle allocation failures. + */ -+ return dynlock_lock(&EXT3_I(dir)->i_htree_lock, value, lt, GFP_KERNEL); ++ return dynlock_lock(&EXT3_I(dir)->i_htree_lock, value, lt, GFP_NOFS); +} + +void dx_unlock_htree(struct inode *dir, struct dynlock_handle *lh) @@ -107,6 +123,18 @@ Index: iam/fs/ext3/namei.c + dynlock_unlock(&EXT3_I(dir)->i_htree_lock, lh); +} + ++static void dx_unlock_array(struct inode *dir, struct dynlock_handle **lh) ++{ ++ int i; ++ ++ for (i = 0; i < DX_MAX_TREE_HEIGHT; ++i, ++lh) { ++ if (*lh != NULL) { ++ dx_unlock_htree(dir, *lh); ++ *lh = NULL; ++ } ++ } ++} ++ +/* + * dx_find_position + * @@ -143,12 +171,11 @@ Index: iam/fs/ext3/namei.c + */ +static int dx_check_path(struct iam_path *path, struct iam_frame *frame) +{ -+ struct iam_entry *e; + int equal; + + dx_lock_bh(frame->bh); -+ e = dx_find_position(path, frame); -+ equal = frame->leaf == dx_get_block(path, e); ++ equal = frame->leaf == dx_get_block(path, frame->at); ++ DX_DEVAL(dx_lock_stats.dls_bh_again += !equal); + dx_unlock_bh(frame->bh); + + return equal ? 0 : -EAGAIN; @@ -179,10 +206,7 @@ Index: iam/fs/ext3/namei.c + */ + result = 0; + for (scan = path->ip_frames; scan < bottom; ++scan) { -+ struct iam_entry *e; -+ -+ e = dx_find_position(path, scan); -+ if (scan->leaf != dx_get_block(path, e)) { ++ if (scan->leaf != dx_get_block(path, scan->at)) { + result = -EAGAIN; + break; + } @@ -193,6 +217,7 @@ Index: iam/fs/ext3/namei.c + */ + for (scan = path->ip_frames; scan < bottom; ++scan) + dx_unlock_bh(scan->bh); ++ DX_DEVAL(dx_lock_stats.dls_bh_full_again += !!result); + return result; +} + @@ -205,7 +230,7 @@ Index: iam/fs/ext3/namei.c struct iam_descr *param; struct iam_frame *frame; -@@ -255,20 +405,17 @@ int dx_lookup(struct iam_path *path) +@@ -255,20 +430,17 @@ int dx_lookup(struct iam_path *path) param = iam_path_descr(path); c = path->ip_container; @@ -234,7 +259,7 @@ Index: iam/fs/ext3/namei.c if (err != 0) break; -@@ -283,53 +430,73 @@ int dx_lookup(struct iam_path *path) +@@ -283,53 +455,77 @@ int dx_lookup(struct iam_path *path) break; assert_inv(dx_node_check(path, frame)); @@ -331,6 +356,11 @@ Index: iam/fs/ext3/namei.c + dir = iam_path_obj(path); + while ((result = dx_lookup(path)) == 0) { + *dl = dx_lock_htree(dir, path->ip_frame->leaf, lt); ++ if (*dl == NULL) { ++ iam_path_fini(path); ++ result = -ENOMEM; ++ break; ++ } + /* + * while locking leaf we just found may get split so we need + * to check this -bzzz @@ -339,7 +369,6 @@ Index: iam/fs/ext3/namei.c + break; + dx_unlock_htree(dir, *dl); + iam_path_fini(path); -+ BREAKPOINT(); + } + return result; +} @@ -347,7 +376,7 @@ Index: iam/fs/ext3/namei.c /* * Probe for a directory leaf block to search. * -@@ -339,7 +506,7 @@ int dx_lookup(struct iam_path *path) +@@ -339,7 +535,7 @@ int dx_lookup(struct iam_path *path) * check for this error code, and make sure it never gets reflected * back to userspace. */ @@ -356,7 +385,7 @@ Index: iam/fs/ext3/namei.c struct dx_hash_info *hinfo, struct iam_path *path) { int err; -@@ -347,7 +514,7 @@ static int dx_probe(struct dentry *dentr +@@ -347,7 +543,7 @@ static int dx_probe(struct dentry *dentr assert_corr(path->ip_data != NULL); ipc = container_of(path->ip_data, struct iam_path_compat, ipc_descr); @@ -365,7 +394,15 @@ Index: iam/fs/ext3/namei.c ipc->ipc_hinfo = hinfo; assert_corr(dx_index_is_compat(path)); -@@ -393,8 +560,10 @@ static int ext3_htree_advance(struct ino +@@ -356,6 +552,7 @@ static int dx_probe(struct dentry *dentr + return err; + } + ++ + /* + * This function increments the frame pointer to search the next leaf + * block, and reads in the necessary intervening nodes if the search +@@ -393,8 +590,10 @@ static int ext3_htree_advance(struct ino while (1) { p->at = iam_entry_shift(path, p->at, +1); if (p->at < iam_entry_shift(path, p->entries, @@ -377,7 +414,7 @@ Index: iam/fs/ext3/namei.c if (p == path->ip_frames) return 0; num_frames++; -@@ -409,7 +578,7 @@ static int ext3_htree_advance(struct ino +@@ -409,7 +608,7 @@ static int ext3_htree_advance(struct ino * If the hash is 1, then continue only if the next page has a * continuation hash of any value. This is used for readdir * handling. Otherwise, check to see if the hash matches the @@ -386,7 +423,7 @@ Index: iam/fs/ext3/namei.c * there's no point to read in the successive index pages. */ iam_get_ikey(path, p->at, (struct iam_ikey *)&bhash); -@@ -425,17 +594,24 @@ static int ext3_htree_advance(struct ino +@@ -425,25 +624,89 @@ static int ext3_htree_advance(struct ino * block so no check is necessary */ while (num_frames--) { @@ -415,17 +452,73 @@ Index: iam/fs/ext3/namei.c assert_inv(dx_node_check(path, p)); } return 1; -@@ -443,6 +619,9 @@ static int ext3_htree_advance(struct ino + } ++int iam_index_lock(struct iam_path *path, struct dynlock_handle **lh) ++{ ++ struct iam_frame *f; ++ ++ for (f = path->ip_frame; f >= path->ip_frames; --f) { ++ *lh = dx_lock_htree(iam_path_obj(path), f->curidx, DLT_WRITE); ++ if (*lh == NULL) ++ return -ENOMEM; ++ lh++; ++ if (f->at < iam_entry_shift(path, f->entries, ++ dx_get_count(f->entries) - 1)) ++ return 1; ++ } ++ return 0; /* end of index... */ ++} ++ ++static int iam_index_advance(struct iam_path *path) ++{ ++ return ext3_htree_advance(iam_path_obj(path), 0, path, NULL, 0); ++} ++ ++/* ++ * Advance index part of @path to point to the next leaf. Returns 1 on ++ * success, 0, when end of container was reached. No locks can be held by ++ * caller. ++ */ int iam_index_next(struct iam_container *c, struct iam_path *path) { +- return ext3_htree_advance(c->ic_object, 0, path, NULL, 0); ++ iam_ptr_t cursor; ++ struct dynlock_handle *lh[DX_MAX_TREE_HEIGHT] = { 0, }; ++ int result; ++ struct inode *object; ++ + /* -+ * XXX pdirops locking is amiss for this case. ++ * Locking for iam_index_next()... is to be described. + */ - return ext3_htree_advance(c->ic_object, 0, path, NULL, 0); ++ ++ object = c->ic_object; ++ cursor = path->ip_frame->leaf; ++ ++ while (1) { ++ result = iam_index_lock(path, lh); ++ if (result <= 0) /* error, or end of index... */ ++ break; ++ ++ result = dx_check_full_path(path); ++ if (result == 0 && cursor == path->ip_frame->leaf) { ++ result = iam_index_advance(path); ++ break; ++ } ++ dx_unlock_array(object, lh); ++ result = dx_lookup(path); ++ while (path->ip_frame->leaf != cursor) { ++ result = iam_index_advance(path); ++ if (result <= 0) ++ break; ++ } ++ } ++ dx_unlock_array(object, lh); ++ return result; } -@@ -882,7 +1061,7 @@ static struct buffer_head * ext3_dx_find + int ext3_htree_next_block(struct inode *dir, __u32 hash, +@@ -882,7 +1145,7 @@ static struct buffer_head * ext3_dx_find sb = dir->i_sb; /* NFS may look up ".." - look at dx_root directory block */ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ @@ -434,7 +527,7 @@ Index: iam/fs/ext3/namei.c if (*err != 0) return NULL; } else { -@@ -1114,7 +1293,7 @@ struct ext3_dir_entry_2 *move_entries(st +@@ -1114,7 +1377,7 @@ struct ext3_dir_entry_2 *move_entries(st hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk("Split block %i at %x, %i/%i\n", @@ -443,7 +536,7 @@ Index: iam/fs/ext3/namei.c /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split); -@@ -1484,16 +1663,40 @@ static int shift_entries(struct iam_path +@@ -1484,16 +1747,40 @@ static int shift_entries(struct iam_path (char *) iam_entry_shift(path, entries, count1), count2 * iam_entry_size(path)); @@ -486,7 +579,7 @@ Index: iam/fs/ext3/namei.c { struct iam_entry *entries; /* old block contents */ -@@ -1501,6 +1704,8 @@ int split_index_node(handle_t *handle, s +@@ -1501,6 +1788,8 @@ int split_index_node(handle_t *handle, s struct iam_frame *frame, *safe; struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0}; u32 newblock[DX_MAX_TREE_HEIGHT] = {0}; @@ -495,7 +588,7 @@ Index: iam/fs/ext3/namei.c struct inode *dir = iam_path_obj(path); struct iam_descr *descr; int nr_splet; -@@ -1523,12 +1728,14 @@ int split_index_node(handle_t *handle, s +@@ -1523,12 +1812,14 @@ int split_index_node(handle_t *handle, s * - first allocate all necessary blocks * * - insert pointers into them atomically. @@ -514,7 +607,7 @@ Index: iam/fs/ext3/namei.c dxtrace(printk("using %u of %u node entries\n", dx_get_count(entries), dx_get_limit(entries))); -@@ -1545,7 +1752,20 @@ int split_index_node(handle_t *handle, s +@@ -1545,7 +1836,25 @@ int split_index_node(handle_t *handle, s } safe = frame; @@ -523,8 +616,13 @@ Index: iam/fs/ext3/namei.c + /* + * Lock all nodes, bottom to top. + */ -+ for (frame = safe, i = 0; i <= nr_splet; ++i, ++frame) ++ for (frame = safe, i = 0; i <= nr_splet; ++i, ++frame) { + lock[i] = dx_lock_htree(dir, frame->curidx, DLT_WRITE); ++ if (lock[i] == NULL) { ++ err = -ENOMEM; ++ goto cleanup; ++ } ++ } + /* + * Check for concurrent index modification. + */ @@ -536,15 +634,19 @@ Index: iam/fs/ext3/namei.c * transaction... */ for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) { bh_new[i] = ext3_append (handle, dir, &newblock[i], &err); -@@ -1553,6 +1773,7 @@ int split_index_node(handle_t *handle, s +@@ -1553,6 +1862,11 @@ int split_index_node(handle_t *handle, s descr->id_ops->id_node_init(path->ip_container, bh_new[i], 0) != 0) goto cleanup; + new_lock[i] = dx_lock_htree(dir, newblock[i], DLT_WRITE); ++ if (new_lock[i] == NULL) { ++ err = -ENOMEM; ++ goto cleanup; ++ } BUFFER_TRACE(frame->bh, "get_write_access"); err = ext3_journal_get_write_access(handle, frame->bh); if (err) -@@ -1602,9 +1823,11 @@ int split_index_node(handle_t *handle, s +@@ -1602,9 +1916,11 @@ int split_index_node(handle_t *handle, s dx_set_limit(entries2, dx_node_limit(path)); /* Set up root */ @@ -556,7 +658,7 @@ Index: iam/fs/ext3/namei.c /* Shift frames in the path */ memmove(frames + 2, frames + 1, -@@ -1635,6 +1858,7 @@ int split_index_node(handle_t *handle, s +@@ -1635,6 +1951,7 @@ int split_index_node(handle_t *handle, s idx - count + d); frame->entries = entries = entries2; swap(frame->bh, bh2); @@ -564,7 +666,7 @@ Index: iam/fs/ext3/namei.c bh_new[i] = bh2; parent->at = iam_entry_shift(path, parent->at, +1); -@@ -1662,6 +1886,8 @@ int split_index_node(handle_t *handle, s +@@ -1662,6 +1979,8 @@ int split_index_node(handle_t *handle, s dx_get_limit(path->ip_frame->entries)); } if (nr_splet > 0) { @@ -573,24 +675,17 @@ Index: iam/fs/ext3/namei.c /* * Log ->i_size modification. */ -@@ -1674,6 +1900,16 @@ journal_error: +@@ -1674,6 +1993,9 @@ journal_error: ext3_std_error(dir->i_sb, err); cleanup: -+ for (i = 0; i < ARRAY_SIZE(lock); ++ i) { -+ if (lock[i] != NULL) -+ dx_unlock_htree(dir, lock[i]); -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(new_lock); ++ i) { -+ if (new_lock[i] != NULL) -+ dx_unlock_htree(dir, new_lock[i]); -+ } ++ dx_unlock_array(dir, lock); ++ dx_unlock_array(dir, new_lock); + for (i = 0; i < ARRAY_SIZE(bh_new); ++i) { if (bh_new[i] != NULL) brelse(bh_new[i]); -@@ -1695,18 +1931,18 @@ static int ext3_dx_add_entry(handle_t *h +@@ -1695,18 +2017,18 @@ static int ext3_dx_add_entry(handle_t *h struct buffer_head * bh = NULL; struct inode *dir = dentry->d_parent->d_inode; struct ext3_dir_entry_2 *de; @@ -611,7 +706,7 @@ Index: iam/fs/ext3/namei.c isize = dir->i_size; err = param->id_ops->id_node_read(path->ip_container, -@@ -1726,7 +1962,7 @@ static int ext3_dx_add_entry(handle_t *h +@@ -1726,7 +2048,7 @@ static int ext3_dx_add_entry(handle_t *h goto cleanup; } @@ -620,7 +715,7 @@ Index: iam/fs/ext3/namei.c if (err) goto cleanup; -@@ -1742,6 +1978,7 @@ static int ext3_dx_add_entry(handle_t *h +@@ -1742,6 +2064,7 @@ static int ext3_dx_add_entry(handle_t *h journal_error: ext3_std_error(dir->i_sb, err); cleanup: -- 1.8.3.1