From: phil Date: Sun, 8 Feb 2004 20:12:10 +0000 (+0000) Subject: merging HEAD (including b_orphan, b_recovery) into b_size X-Git-Tag: v1_7_100~1^248~42 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=9af1c09b85495ca151d2a008d4d0fb3cbac55789;p=fs%2Flustre-release.git merging HEAD (including b_orphan, b_recovery) into b_size --- diff --git a/lustre/include/linux/lvfs_linux.h b/lustre/include/linux/lvfs_linux.h index b38d6f0..71fc431 100644 --- a/lustre/include/linux/lvfs_linux.h +++ b/lustre/include/linux/lvfs_linux.h @@ -1,3 +1,6 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ #ifndef __LVFS_LINUX_H__ #define __LVFS_LINUX_H__ @@ -22,17 +25,14 @@ struct l_file *l_dentry_open(struct obd_run_ctxt *, struct l_dentry *, int flags); struct l_linux_dirent { - ino_t d_ino; - unsigned long d_off; - unsigned short d_reclen; - char d_name[1]; + struct list_head lld_list; + ino_t lld_ino; + unsigned long lld_off; + char lld_name[LL_FID_NAMELEN]; }; - struct l_readdir_callback { - struct l_linux_dirent *current_dir; - struct l_linux_dirent *previous; - int count; - int error; + struct l_linux_dirent *lrc_dirent; + struct list_head *lrc_list; }; #endif diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch index 1da5f7c..6422982 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch @@ -1,8 +1,8 @@ -Index: linux-2.4.20/fs/ext3/extents.c +Index: linux-2.4.24/fs/ext3/extents.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.20/fs/ext3/extents.c 2004-01-24 14:19:29.000000000 +0300 -@@ -0,0 +1,2224 @@ +--- linux-2.4.24.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.4.24/fs/ext3/extents.c 2004-02-06 10:18:42.000000000 +0300 +@@ -0,0 +1,2347 @@ +/* + * Copyright (C) 2003 Alex Tomas + * @@ -88,13 +88,16 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_extents_tree *tree, + struct ext3_ext_path *path) +{ ++ int err; ++ + if (path->p_bh) { + /* path points to block */ -+ return ext3_journal_get_write_access(handle, path->p_bh); ++ err = ext3_journal_get_write_access(handle, path->p_bh); ++ } else { ++ /* path points to leaf/index in inode body */ ++ err = ext3_ext_get_access_for_root(handle, tree); + } -+ -+ /* path points to leaf/index in inode body */ -+ return ext3_ext_get_access_for_root(handle, tree); ++ return err; +} + +/* @@ -106,13 +109,15 @@ Index: linux-2.4.20/fs/ext3/extents.c +static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, + struct ext3_ext_path *path) +{ ++ int err; + if (path->p_bh) { + /* path points to block */ -+ return ext3_journal_dirty_metadata(handle, path->p_bh); ++ err =ext3_journal_dirty_metadata(handle, path->p_bh); ++ } else { ++ /* path points to leaf/index in inode body */ ++ err = ext3_ext_mark_root_dirty(handle, tree); + } -+ -+ /* path points to leaf/index in inode body */ -+ return ext3_ext_mark_root_dirty(handle, tree); ++ return err; +} + +static int inline @@ -148,6 +153,13 @@ Index: linux-2.4.20/fs/ext3/extents.c + return newblock; +} + ++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) ++{ ++ struct ext3_extent_header *neh; ++ neh = EXT_ROOT_HDR(tree); ++ neh->e_generation++; ++} ++ +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) +{ + int size; @@ -268,6 +280,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_extent_idx *ix; + int l = 0, k, r; + ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); + EXT_ASSERT(eh->e_num <= eh->e_max); + EXT_ASSERT(eh->e_num > 0); + @@ -303,6 +316,12 @@ Index: linux-2.4.20/fs/ext3/extents.c + + chix = ix = EXT_FIRST_INDEX(eh); + for (k = 0; k < eh->e_num; k++, ix++) { ++ if (k != 0 && ix->e_block <= ix[-1].e_block) { ++ printk("k=%d, ix=0x%p, first=0x%p\n", k, ++ ix, EXT_FIRST_INDEX(eh)); ++ printk("%u <= %u\n", ++ ix->e_block,ix[-1].e_block); ++ } + EXT_ASSERT(k == 0 || ix->e_block > ix[-1].e_block); + if (block < ix->e_block) + break; @@ -325,6 +344,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_extent *ex; + int l = 0, k, r; + ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); + EXT_ASSERT(eh->e_num <= eh->e_max); + + if (eh->e_num == 0) { @@ -388,6 +408,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + eh = EXT_ROOT_HDR(tree); + eh->e_depth = 0; + eh->e_num = 0; ++ eh->e_magic = EXT3_EXT_MAGIC; + eh->e_max = ext3_ext_space_root(tree); + ext3_ext_mark_root_dirty(handle, tree); + return 0; @@ -406,8 +427,10 @@ Index: linux-2.4.20/fs/ext3/extents.c + EXT_ASSERT(tree->root); + + eh = EXT_ROOT_HDR(tree); ++ EXT_ASSERT(eh); + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->e_max); ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); + EXT_ASSERT(i == 0 || eh->e_num > 0); + + /* account possible depth increase */ @@ -500,6 +523,9 @@ Index: linux-2.4.20/fs/ext3/extents.c + ix->e_leaf = ptr; + curp->p_hdr->e_num++; + ++ EXT_ASSERT(curp->p_hdr->e_num <= curp->p_hdr->e_max); ++ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); ++ + err = ext3_ext_dirty(handle, tree, curp); + ext3_std_error(tree->inode->i_sb, err); + @@ -534,7 +560,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + + /* if current leaf will be splitted, then we should use + * border from split point */ -+ ++ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); + if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { + border = path[depth].p_ext[1].e_block; + ext_debug(tree, "leaf will be splitted." @@ -589,6 +615,8 @@ Index: linux-2.4.20/fs/ext3/extents.c + neh = EXT_BLOCK_HDR(bh); + neh->e_num = 0; + neh->e_max = ext3_ext_space_block(tree); ++ neh->e_magic = EXT3_EXT_MAGIC; ++ neh->e_depth = 0; + ex = EXT_FIRST_EXTENT(neh); + + /* move remain of path[depth] to the new leaf */ @@ -599,10 +627,11 @@ Index: linux-2.4.20/fs/ext3/extents.c + path[depth].p_ext++; + while (path[depth].p_ext <= + EXT_MAX_EXTENT(path[depth].p_hdr)) { -+ ext_debug(tree, "move %d:%d:%d in new leaf\n", ++ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", + path[depth].p_ext->e_block, + path[depth].p_ext->e_start, -+ path[depth].p_ext->e_num); ++ path[depth].p_ext->e_num, ++ newblock); + memmove(ex++, path[depth].p_ext++, + sizeof(struct ext3_extent)); + neh->e_num++; @@ -618,10 +647,10 @@ Index: linux-2.4.20/fs/ext3/extents.c + + /* correct old leaf */ + if (m) { -+ if ((err = ext3_ext_get_access(handle, tree, path))) ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) + goto cleanup; + path[depth].p_hdr->e_num -= m; -+ if ((err = ext3_ext_dirty(handle, tree, path))) ++ if ((err = ext3_ext_dirty(handle, tree, path + depth))) + goto cleanup; + + } @@ -649,33 +678,33 @@ Index: linux-2.4.20/fs/ext3/extents.c + + neh = EXT_BLOCK_HDR(bh); + neh->e_num = 1; ++ neh->e_magic = EXT3_EXT_MAGIC; + neh->e_max = ext3_ext_space_block_idx(tree); ++ neh->e_depth = depth - i; + fidx = EXT_FIRST_INDEX(neh); + fidx->e_block = border; + fidx->e_leaf = oldblock; + -+ ext_debug(tree, "int.index at %d (block %u): %d -> %d\n", -+ i, (unsigned) newblock, -+ (int) border, -+ (int) oldblock); ++ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", ++ i, newblock, border, oldblock); + /* copy indexes */ + m = 0; + path[i].p_idx++; ++ + ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, + EXT_MAX_INDEX(path[i].p_hdr)); + EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == + EXT_LAST_INDEX(path[i].p_hdr)); -+ while (path[i].p_idx <= -+ EXT_MAX_INDEX(path[i].p_hdr)) { -+ ext_debug(tree, "%d: move %d:%d in new index\n", ++ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { ++ ext_debug(tree, "%d: move %d:%d in new index %lu\n", + i, path[i].p_idx->e_block, -+ path[i].p_idx->e_leaf); ++ path[i].p_idx->e_leaf, newblock); + memmove(++fidx, path[i].p_idx++, + sizeof(struct ext3_extent_idx)); + neh->e_num++; ++ EXT_ASSERT(neh->e_num <= neh->e_max); + m++; + } -+ + mark_buffer_uptodate(bh, 1); + unlock_buffer(bh); + @@ -734,12 +763,12 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_ext_path *path, + struct ext3_extent *newext) +{ -+ struct buffer_head *bh; + struct ext3_ext_path *curp = path; + struct ext3_extent_header *neh; + struct ext3_extent_idx *fidx; -+ int len, err = 0; ++ struct buffer_head *bh; + unsigned long newblock; ++ int err = 0; + + newblock = ext3_ext_new_block(handle, tree, path, newext, &err); + if (newblock == 0) @@ -759,14 +788,17 @@ Index: linux-2.4.20/fs/ext3/extents.c + } + + /* move top-level index/leaf into new block */ -+ len = sizeof(struct ext3_extent_header) + -+ sizeof(struct ext3_extent) * curp->p_hdr->e_max; -+ EXT_ASSERT(len >= 0 && len < 4096); -+ memmove(bh->b_data, curp->p_hdr, len); ++ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); + + /* set size of new block */ + neh = EXT_BLOCK_HDR(bh); -+ neh->e_max = ext3_ext_space_block(tree); ++ /* old root could have indexes or leaves ++ * so calculate e_max right way */ ++ if (EXT_DEPTH(tree)) ++ neh->e_max = ext3_ext_space_block_idx(tree); ++ else ++ neh->e_max = ext3_ext_space_block(tree); ++ neh->e_magic = EXT3_EXT_MAGIC; + mark_buffer_uptodate(bh, 1); + unlock_buffer(bh); + @@ -777,9 +809,11 @@ Index: linux-2.4.20/fs/ext3/extents.c + if ((err = ext3_ext_get_access(handle, tree, curp))) + goto out; + ++ curp->p_hdr->e_magic = EXT3_EXT_MAGIC; + curp->p_hdr->e_max = ext3_ext_space_root_idx(tree); + curp->p_hdr->e_num = 1; + curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); ++ /* FIXME: it works, but actually path[0] can be index */ + curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block; + curp->p_idx->e_leaf = newblock; + @@ -839,7 +873,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + path = ext3_ext_find_extent(tree, newext->e_block, path); + if (IS_ERR(path)) + err = PTR_ERR(path); -+ ++ + /* + * only first (depth 0 -> 1) produces free space + * in all other cases we have to split growed tree @@ -1003,15 +1037,15 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_ext_path *path, + struct ext3_extent *newext) +{ -+ int depth, len; + struct ext3_extent_header * eh; -+ struct ext3_extent *ex; ++ struct ext3_extent *ex, *fex; + struct ext3_extent *nearex; /* nearest extent */ + struct ext3_ext_path *npath = NULL; -+ int err; ++ int depth, len, err, next; + + depth = EXT_DEPTH(tree); + ex = path[depth].p_ext; ++ EXT_ASSERT(path[depth].p_hdr); + + /* try to insert block into found extent and return */ + if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { @@ -1021,43 +1055,49 @@ Index: linux-2.4.20/fs/ext3/extents.c + if ((err = ext3_ext_get_access(handle, tree, path + depth))) + return err; + ex->e_num += newext->e_num; -+ err = ext3_ext_dirty(handle, tree, path + depth); -+ return err; ++ eh = path[depth].p_hdr; ++ nearex = ex; ++ goto merge; + } + +repeat: + depth = EXT_DEPTH(tree); + eh = path[depth].p_hdr; -+ if (eh->e_num == eh->e_max) { -+ /* probably next leaf has space for us? */ -+ int next = ext3_ext_next_leaf_block(tree, path); -+ if (next != 0xffffffff) { -+ ext_debug(tree, "next leaf block - %d\n", next); -+ EXT_ASSERT(!npath); -+ npath = ext3_ext_find_extent(tree, next, NULL); -+ if (IS_ERR(npath)) -+ return PTR_ERR(npath); -+ EXT_ASSERT(npath->p_depth == path->p_depth); -+ eh = npath[depth].p_hdr; -+ if (eh->e_num < eh->e_max) { -+ ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->e_num); -+ path = npath; -+ goto repeat; -+ } -+ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->e_num, eh->e_max); ++ if (eh->e_num < eh->e_max) ++ goto has_space; ++ ++ /* probably next leaf has space for us? */ ++ fex = EXT_LAST_EXTENT(eh); ++ next = ext3_ext_next_leaf_block(tree, path); ++ if (newext->e_block > fex->e_block && next != 0xffffffff) { ++ ext_debug(tree, "next leaf block - %d\n", next); ++ EXT_ASSERT(!npath); ++ npath = ext3_ext_find_extent(tree, next, NULL); ++ if (IS_ERR(npath)) ++ return PTR_ERR(npath); ++ EXT_ASSERT(npath->p_depth == path->p_depth); ++ eh = npath[depth].p_hdr; ++ if (eh->e_num < eh->e_max) { ++ ext_debug(tree, "next leaf isnt full(%d)\n", ++ eh->e_num); ++ path = npath; ++ goto repeat; + } -+ /* -+ * there is no free space in found leaf -+ * we're gonna add new leaf in the tree -+ */ -+ err = ext3_ext_create_new_leaf(handle, tree, path, newext); -+ if (err) -+ goto cleanup; -+ goto repeat; ++ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", ++ eh->e_num, eh->e_max); + } + ++ /* ++ * there is no free space in found leaf ++ * we're gonna add new leaf in the tree ++ */ ++ err = ext3_ext_create_new_leaf(handle, tree, path, newext); ++ if (err) ++ goto cleanup; ++ depth = EXT_DEPTH(tree); ++ eh = path[depth].p_hdr; ++ ++has_space: + nearex = path[depth].p_ext; + + if ((err = ext3_ext_get_access(handle, tree, path + depth))) @@ -1091,22 +1131,39 @@ Index: linux-2.4.20/fs/ext3/extents.c + "move %d from 0x%p to 0x%p\n", + newext->e_block, newext->e_start, newext->e_num, + nearex, len, nearex + 1, nearex + 2); -+ + memmove(nearex + 1, nearex, len); + path[depth].p_ext = nearex; + } + -+ if (!err) { -+ eh->e_num++; -+ nearex = path[depth].p_ext; -+ nearex->e_block = newext->e_block; -+ nearex->e_start = newext->e_start; -+ nearex->e_num = newext->e_num; -+ -+ /* time to correct all indexes above */ -+ err = ext3_ext_correct_indexes(handle, tree, path); ++ eh->e_num++; ++ nearex = path[depth].p_ext; ++ nearex->e_block = newext->e_block; ++ nearex->e_start = newext->e_start; ++ nearex->e_num = newext->e_num; ++ ++merge: ++ /* try to merge extents to the right */ ++ while (nearex < EXT_LAST_EXTENT(eh)) { ++ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) ++ break; ++ /* merge with next extent! */ ++ nearex->e_num += nearex[1].e_num; ++ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { ++ len = (EXT_LAST_EXTENT(eh) - nearex - 1) ++ * sizeof(struct ext3_extent); ++ memmove(nearex + 1, nearex + 2, len); ++ } ++ eh->e_num--; ++ EXT_ASSERT(eh->e_num > 0); + } + ++ /* try to merge extents to the left */ ++ ++ /* time to correct all indexes above */ ++ err = ext3_ext_correct_indexes(handle, tree, path); ++ if (err) ++ goto cleanup; ++ + err = ext3_ext_dirty(handle, tree, path + depth); + +cleanup: @@ -1114,7 +1171,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + ext3_ext_drop_refs(npath); + kfree(npath); + } -+ ++ ext3_ext_tree_changed(tree); + return err; +} + @@ -1124,6 +1181,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_ext_path *path = NULL; + struct ext3_extent *ex, cbex; + unsigned long next, start = 0, end = 0; ++ unsigned long last = block + num; + int depth, exists, err = 0; + + EXT_ASSERT(tree); @@ -1131,15 +1189,18 @@ Index: linux-2.4.20/fs/ext3/extents.c + EXT_ASSERT(tree->inode); + EXT_ASSERT(tree->root); + -+ while (num > 0 && block != 0xfffffffff) { ++ while (block < last && block != 0xfffffffff) { ++ num = last - block; + /* find extent for this block */ + path = ext3_ext_find_extent(tree, block, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); ++ path = NULL; + break; + } + + depth = EXT_DEPTH(tree); ++ EXT_ASSERT(path[depth].p_hdr); + ex = path[depth].p_ext; + next = ext3_ext_next_allocated_block(path); + @@ -1148,58 +1209,61 @@ Index: linux-2.4.20/fs/ext3/extents.c + /* there is no extent yet, so try to allocate + * all requested space */ + start = block; -+ end = block + num - 1; ++ end = block + num; + } else if (ex->e_block > block) { + /* need to allocate space before found extent */ + start = block; -+ end = ex->e_block - 1; -+ if (block + num - 1 < end) -+ end = block + num - 1; ++ end = ex->e_block; ++ if (block + num < end) ++ end = block + num; + } else if (block >= ex->e_block + ex->e_num) { + /* need to allocate space after found extent */ + start = block; -+ end = block + num - 1; ++ end = block + num; + if (end >= next) -+ end = next - 1; ++ end = next; + } else if (block >= ex->e_block) { + /* + * some part of requested space is covered + * by found extent + */ + start = block; -+ end = ex->e_block + ex->e_num - 1; -+ if (block + num - 1 < end) -+ end = block + num - 1; ++ end = ex->e_block + ex->e_num; ++ if (block + num < end) ++ end = block + num; + exists = 1; + } else { + BUG(); + } ++ EXT_ASSERT(end > start); + + if (!exists) { + cbex.e_block = start; -+ cbex.e_num = end - start + 1; ++ cbex.e_num = end - start; + cbex.e_start = 0; + } else + cbex = *ex; + ++ EXT_ASSERT(path[depth].p_hdr); + err = func(tree, path, &cbex, exists); ++ ext3_ext_drop_refs(path); ++ + if (err < 0) + break; -+ -+ if (err == EXT_BREAK) { ++ if (err == EXT_REPEAT) ++ continue; ++ else if (err == EXT_BREAK) { + err = 0; + break; + } + + if (EXT_DEPTH(tree) != depth) { + /* depth was changed. we have to realloc path */ -+ ext3_ext_drop_refs(path); + kfree(path); + path = NULL; + } + -+ block += cbex.e_num; -+ num -= cbex.e_num; ++ block = cbex.e_block + cbex.e_num; + } + + if (path) { @@ -1241,6 +1305,9 @@ Index: linux-2.4.20/fs/ext3/extents.c + int depth = EXT_DEPTH(tree); + struct ext3_extent *ex, gex; + ++ if (!tree->cex) ++ return; ++ + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ @@ -1291,7 +1358,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + ex->e_block = cex->e_block; + ex->e_start = cex->e_start; + ex->e_num = cex->e_num; -+ ext_debug(tree, "%lu cached by %lu:%lu:%lu(gap)\n", ++ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", + (unsigned long) block, + (unsigned long) ex->e_block, + (unsigned long) ex->e_num, @@ -1325,7 +1392,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + ext_debug(tree, "index is empty, remove it, free block %d\n", + path->p_idx->e_leaf); + bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->e_leaf); -+ ext3_forget(handle, 0, tree->inode, bh, path->p_idx->e_leaf); ++ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->e_leaf); + ext3_free_blocks(handle, tree->inode, path->p_idx->e_leaf, 1); + return err; +} @@ -1437,6 +1504,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + eh = path[depth].p_hdr; + EXT_ASSERT(eh); + EXT_ASSERT(eh->e_num <= eh->e_max); ++ EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC); + + /* find where to start removing */ + le = ex = EXT_LAST_EXTENT(eh); @@ -1638,6 +1706,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + } + + EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max); ++ EXT_ASSERT(path[i].p_hdr->e_magic == EXT3_EXT_MAGIC); + + if (!path[i].p_idx) { + /* this level hasn't touched yet */ @@ -1696,6 +1765,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + err = ext3_ext_dirty(handle, tree, path); + } + } ++ ext3_ext_tree_changed(tree); + + kfree(path); + ext3_journal_stop(handle, inode); @@ -1712,8 +1782,16 @@ Index: linux-2.4.20/fs/ext3/extents.c + * possible initialization would be here + */ + -+ if (test_opt(sb, EXTENTS)) -+ printk("EXT3-fs: file extents enabled\n"); ++ if (test_opt(sb, EXTENTS)) { ++ printk("EXT3-fs: file extents enabled"); ++#ifdef AGRESSIVE_TEST ++ printk(", agressive tests"); ++#endif ++#ifdef CHECK_BINSEARCH ++ printk(", check binsearch"); ++#endif ++ printk("\n"); ++ } +} + +/* @@ -1736,8 +1814,7 @@ Index: linux-2.4.20/fs/ext3/extents.c +static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) +{ + struct inode *inode = buffer; -+ ext3_mark_inode_dirty(handle, inode); -+ return 0; ++ return ext3_mark_inode_dirty(handle, inode); +} + +static int ext3_ext_mergable(struct ext3_extent *ex1, @@ -1771,6 +1848,8 @@ Index: linux-2.4.20/fs/ext3/extents.c +{ + int needed = ext3_remove_blocks_credits(tree, ex, from, to); + handle_t *handle = ext3_journal_start(tree->inode, needed); ++ struct buffer_head *bh; ++ int i; + + if (IS_ERR(handle)) + return PTR_ERR(handle); @@ -1781,6 +1860,10 @@ Index: linux-2.4.20/fs/ext3/extents.c + start = ex->e_start + ex->e_num - num; + ext_debug(tree, "free last %lu blocks starting %lu\n", + num, start); ++ for (i = 0; i < num; i++) { ++ bh = sb_get_hash_table(tree->inode->i_sb, start + i); ++ ext3_forget(handle, 0, tree->inode, bh, start + i); ++ } + ext3_free_blocks(handle, tree->inode, start, num); + } else if (from == ex->e_block && to <= ex->e_block + ex->e_num - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", @@ -1793,8 +1876,8 @@ Index: linux-2.4.20/fs/ext3/extents.c + return 0; +} + -+static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path) ++static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, ++ unsigned long block) +{ + struct ext3_inode_info *ei = EXT3_I(inode); + unsigned long bg_start; @@ -1802,12 +1885,13 @@ Index: linux-2.4.20/fs/ext3/extents.c + int depth; + + if (path) { ++ struct ext3_extent *ex; + depth = path->p_depth; -+ /* try to find previous block */ -+ if (path[depth].p_ext) -+ return path[depth].p_ext->e_start + -+ path[depth].p_ext->e_num - 1; + ++ /* try to predict block placement */ ++ if ((ex = path[depth].p_ext)) ++ return ex->e_start + (block - ex->e_block); ++ + /* it looks index is empty + * try to find starting from index itself */ + if (path[depth].p_bh) @@ -1819,7 +1903,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); + colour = (current->pid % 16) * + (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ return bg_start + colour; ++ return bg_start + colour + block; +} + +static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, @@ -1840,7 +1924,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + if (ex->e_num == 0) { + ex->e_num = 1; + /* allocate new block for the extent */ -+ goal = ext3_ext_find_goal(inode, path); ++ goal = ext3_ext_find_goal(inode, path, ex->e_block); + ex->e_start = ext3_new_block(handle, inode, goal, 0, 0, err); + if (ex->e_start == 0) { + /* error occured: restore old extent */ @@ -1864,39 +1948,51 @@ Index: linux-2.4.20/fs/ext3/extents.c + tree->remove_extent_credits = ext3_remove_blocks_credits; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = NULL; /* FIXME: add cache store later */ ++ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; +} + -+#if 0 ++#if EXT3_MULTIBLOCK_ALLOCATOR +static int +ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, + struct ext3_ext_path *path, + struct ext3_extent *newex, int exist) +{ + struct inode *inode = tree->inode; ++ struct buffer_head *bh; + int count, err, goal; ++ unsigned long pblock; ++ unsigned long tgen; + loff_t new_i_size; + handle_t *handle; -+ unsigned long pblock; ++ int i; + + if (exist) + return EXT_CONTINUE; + ++ tgen = EXT_GENERATION(tree); + count = ext3_ext_calc_credits_for_insert(tree, path); ++ up_write(&EXT3_I(inode)->truncate_sem); ++ + handle = ext3_journal_start(inode, count + EXT3_ALLOC_NEEDED + 1); -+ if (IS_ERR(handle)) ++ if (IS_ERR(handle)) { ++ down_write(&EXT3_I(inode)->truncate_sem); + return PTR_ERR(handle); ++ } + -+ goal = ext3_ext_find_goal(inode, path); ++ if (tgen != EXT_GENERATION(tree)) { ++ /* the tree has changed. so path can be invalid at moment */ ++ ext3_journal_stop(handle, inode); ++ down_write(&EXT3_I(inode)->truncate_sem); ++ return EXT_REPEAT; ++ } ++ ++ down_write(&EXT3_I(inode)->truncate_sem); ++ goal = ext3_ext_find_goal(inode, path, newex->e_block); + count = newex->e_num; -+#ifdef EXT3_MULTIBLOCK_ALLOCATOR -+ pblock = ext3_new_block(handle, inode, goal, &count, NULL, &err); -+ EXT_ASSERT(count <= num); -+ /* FIXME: error handling here */ -+ EXT_ASSERT(err == 0); -+#else -+ pblock = 0; -+#endif ++ pblock = ext3_new_blocks(handle, inode, &count, goal, &err); ++ if (!pblock) ++ goto out; ++ EXT_ASSERT(count <= newex->e_num); + + /* insert new extent */ + newex->e_start = pblock; @@ -1905,12 +2001,22 @@ Index: linux-2.4.20/fs/ext3/extents.c + if (err) + goto out; + ++ /* block have been allocated for data, so time to drop dirty ++ * in correspondend buffer_heads to prevent corruptions */ ++ for (i = 0; i < newex->e_num; i++) { ++ bh = sb_get_hash_table(inode->i_sb, newex->e_start + i); ++ if (bh) { ++ mark_buffer_clean(bh); ++ wait_on_buffer(bh); ++ clear_bit(BH_Req, &bh->b_state); ++ __brelse(bh); ++ } ++ } ++ + /* correct on-disk inode size */ + if (newex->e_num > 0) { + new_i_size = (loff_t) newex->e_block + newex->e_num; + new_i_size = new_i_size << inode->i_blkbits; -+ if (new_i_size > i_size_read(inode)) -+ new_i_size = i_size_read(inode); + if (new_i_size > EXT3_I(inode)->i_disksize) { + EXT3_I(inode)->i_disksize = new_i_size; + err = ext3_mark_inode_dirty(handle, inode); @@ -1929,14 +2035,13 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_extents_tree tree; + int err; + ++ ext3_init_tree_desc(&tree, inode); + ext_debug(&tree, "blocks %lu-%lu requested for inode %u\n", + block, block + num,(unsigned) inode->i_ino); -+ -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); ++ down_write(&EXT3_I(inode)->truncate_sem); + err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb); + ext3_ext_invalidate_cache(&tree); -+ up(&EXT3_I(inode)->truncate_sem); ++ up_write(&EXT3_I(inode)->truncate_sem); + + return err; +} @@ -1974,6 +2079,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + path = ext3_ext_find_extent(&tree, iblock, NULL); + if (IS_ERR(path)) { + err = PTR_ERR(path); ++ path = NULL; + goto out2; + } + @@ -2009,7 +2115,7 @@ Index: linux-2.4.20/fs/ext3/extents.c + } + + /* allocate new block */ -+ goal = ext3_ext_find_goal(inode, path); ++ goal = ext3_ext_find_goal(inode, path, iblock); + newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); + if (!newblock) + goto out2; @@ -2190,6 +2296,9 @@ Index: linux-2.4.20/fs/ext3/extents.c +{ + int err = 0; + ++ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) ++ return -EINVAL; ++ + if (cmd == EXT3_IOC_GET_EXTENTS) { + struct ext3_extent_buf buf; + struct ext3_extents_tree tree; @@ -2201,8 +2310,10 @@ Index: linux-2.4.20/fs/ext3/extents.c + buf.cur = buf.buffer; + buf.err = 0; + tree.private = &buf; ++ down_write(&EXT3_I(inode)->truncate_sem); + err = ext3_ext_walk_space(&tree, buf.start, 0xffffffff, + ext3_ext_store_extent_cb); ++ up_write(&EXT3_I(inode)->truncate_sem); + if (err == 0) + err = buf.err; + } else if (cmd == EXT3_IOC_GET_TREE_STATS) { @@ -2210,28 +2321,40 @@ Index: linux-2.4.20/fs/ext3/extents.c + struct ext3_extents_tree tree; + + ext3_init_tree_desc(&tree, inode); ++ down_write(&EXT3_I(inode)->truncate_sem); + buf.depth = EXT_DEPTH(&tree); + buf.extents_num = 0; + buf.leaf_num = 0; + tree.private = &buf; + err = ext3_ext_walk_space(&tree, 0, 0xffffffff, + ext3_ext_collect_stats_cb); ++ up_write(&EXT3_I(inode)->truncate_sem); + if (!err) + err = copy_to_user((void *) arg, &buf, sizeof(buf)); + } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { + struct ext3_extents_tree tree; + ext3_init_tree_desc(&tree, inode); ++ down_write(&EXT3_I(inode)->truncate_sem); + err = EXT_DEPTH(&tree); ++ up_write(&EXT3_I(inode)->truncate_sem); + } + + return err; +} + -Index: linux-2.4.20/fs/ext3/ialloc.c ++EXPORT_SYMBOL(ext3_init_tree_desc); ++EXPORT_SYMBOL(ext3_mark_inode_dirty); ++EXPORT_SYMBOL(ext3_ext_invalidate_cache); ++EXPORT_SYMBOL(ext3_ext_insert_extent); ++EXPORT_SYMBOL(ext3_ext_walk_space); ++EXPORT_SYMBOL(ext3_ext_find_goal); ++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); ++ +Index: linux-2.4.24/fs/ext3/ialloc.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/ialloc.c 2004-01-23 19:00:25.000000000 +0300 -+++ linux-2.4.20/fs/ext3/ialloc.c 2004-01-24 00:45:20.000000000 +0300 -@@ -593,11 +593,13 @@ +--- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/fs/ext3/ialloc.c 2004-01-26 23:17:19.000000000 +0300 +@@ -592,11 +592,13 @@ iloc.bh = NULL; goto fail; } @@ -2247,10 +2370,10 @@ Index: linux-2.4.20/fs/ext3/ialloc.c unlock_super (sb); if(DQUOT_ALLOC_INODE(inode)) { DQUOT_DROP(inode); -Index: linux-2.4.20/fs/ext3/inode.c +Index: linux-2.4.24/fs/ext3/inode.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/inode.c 2004-01-23 19:00:25.000000000 +0300 -+++ linux-2.4.20/fs/ext3/inode.c 2004-01-24 04:34:04.000000000 +0300 +--- linux-2.4.24.orig/fs/ext3/inode.c 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/fs/ext3/inode.c 2004-01-26 23:17:19.000000000 +0300 @@ -848,6 +848,15 @@ goto reread; } @@ -2304,7 +2427,7 @@ Index: linux-2.4.20/fs/ext3/inode.c handle = start_transaction(inode); if (IS_ERR(handle)) return; /* AKPM: return what? */ -@@ -2537,6 +2549,9 @@ +@@ -2536,6 +2548,9 @@ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; @@ -2314,7 +2437,7 @@ Index: linux-2.4.20/fs/ext3/inode.c if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else -@@ -2973,7 +2988,7 @@ +@@ -2972,7 +2987,7 @@ /* alloc blocks one by one */ for (i = 0; i < nblocks; i++) { @@ -2323,7 +2446,7 @@ Index: linux-2.4.20/fs/ext3/inode.c &bh_tmp, 1); if (ret) break; -@@ -3049,7 +3064,7 @@ +@@ -3048,7 +3063,7 @@ if (blocks[i] != 0) continue; @@ -2332,24 +2455,26 @@ Index: linux-2.4.20/fs/ext3/inode.c if (rc) { printk(KERN_INFO "ext3_map_inode_page: error %d " "allocating block %ld\n", rc, iblock); -Index: linux-2.4.20/fs/ext3/Makefile +Index: linux-2.4.24/fs/ext3/Makefile =================================================================== ---- linux-2.4.20.orig/fs/ext3/Makefile 2004-01-23 19:00:42.000000000 +0300 -+++ linux-2.4.20/fs/ext3/Makefile 2004-01-24 00:45:20.000000000 +0300 -@@ -13,7 +13,7 @@ +--- linux-2.4.24.orig/fs/ext3/Makefile 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/fs/ext3/Makefile 2004-02-05 18:44:25.000000000 +0300 +@@ -13,7 +13,9 @@ obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \ - xattr_trusted.o + xattr_trusted.o extents.o ++export-objs += extents.o ++ obj-m := $(O_TARGET) export-objs += xattr.o -Index: linux-2.4.20/fs/ext3/super.c +Index: linux-2.4.24/fs/ext3/super.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/super.c 2004-01-23 19:00:25.000000000 +0300 -+++ linux-2.4.20/fs/ext3/super.c 2004-01-24 04:30:14.000000000 +0300 -@@ -623,6 +623,7 @@ +--- linux-2.4.24.orig/fs/ext3/super.c 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/fs/ext3/super.c 2004-01-26 23:17:19.000000000 +0300 +@@ -530,6 +530,7 @@ int i; J_ASSERT(sbi->s_delete_inodes == 0); @@ -2357,7 +2482,7 @@ Index: linux-2.4.20/fs/ext3/super.c ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -796,6 +797,10 @@ +@@ -702,6 +703,10 @@ return 0; } } @@ -2368,7 +2493,7 @@ Index: linux-2.4.20/fs/ext3/super.c else if (!strcmp (this_char, "grpid") || !strcmp (this_char, "bsdgroups")) set_opt (*mount_options, GRPID); -@@ -1485,6 +1490,8 @@ +@@ -1392,6 +1397,8 @@ test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": "writeback"); @@ -2377,11 +2502,11 @@ Index: linux-2.4.20/fs/ext3/super.c return sb; failed_mount3: -Index: linux-2.4.20/fs/ext3/ioctl.c +Index: linux-2.4.24/fs/ext3/ioctl.c =================================================================== ---- linux-2.4.20.orig/fs/ext3/ioctl.c 2004-01-13 17:00:09.000000000 +0300 -+++ linux-2.4.20/fs/ext3/ioctl.c 2004-01-24 14:54:31.000000000 +0300 -@@ -189,6 +189,10 @@ +--- linux-2.4.24.orig/fs/ext3/ioctl.c 2004-01-14 02:58:42.000000000 +0300 ++++ linux-2.4.24/fs/ext3/ioctl.c 2004-01-26 23:17:19.000000000 +0300 +@@ -174,6 +174,10 @@ return ret; } #endif @@ -2392,10 +2517,10 @@ Index: linux-2.4.20/fs/ext3/ioctl.c default: return -ENOTTY; } -Index: linux-2.4.20/include/linux/ext3_fs.h +Index: linux-2.4.24/include/linux/ext3_fs.h =================================================================== ---- linux-2.4.20.orig/include/linux/ext3_fs.h 2004-01-23 19:00:25.000000000 +0300 -+++ linux-2.4.20/include/linux/ext3_fs.h 2004-01-24 01:28:06.000000000 +0300 +--- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-01-14 02:58:45.000000000 +0300 ++++ linux-2.4.24/include/linux/ext3_fs.h 2004-01-30 00:09:37.000000000 +0300 @@ -184,6 +184,7 @@ #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ @@ -2423,7 +2548,7 @@ Index: linux-2.4.20/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H -@@ -687,6 +693,7 @@ +@@ -688,6 +694,7 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned); /* inode.c */ @@ -2431,7 +2556,7 @@ Index: linux-2.4.20/include/linux/ext3_fs.h extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -767,6 +774,14 @@ +@@ -769,6 +776,14 @@ extern struct inode_operations ext3_symlink_inode_operations; extern struct inode_operations ext3_fast_symlink_inode_operations; @@ -2446,11 +2571,11 @@ Index: linux-2.4.20/include/linux/ext3_fs.h #endif /* __KERNEL__ */ -Index: linux-2.4.20/include/linux/ext3_extents.h +Index: linux-2.4.24/include/linux/ext3_extents.h =================================================================== ---- linux-2.4.20.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.20/include/linux/ext3_extents.h 2004-01-24 15:15:11.000000000 +0300 -@@ -0,0 +1,207 @@ +--- linux-2.4.24.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.4.24/include/linux/ext3_extents.h 2004-02-05 20:31:08.000000000 +0300 +@@ -0,0 +1,216 @@ +/* + * Copyright (C) 2003 Alex Tomas + * @@ -2468,6 +2593,8 @@ Index: linux-2.4.20/include/linux/ext3_extents.h + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + ++#ifndef _LINUX_EXT3_EXTENTS ++#define _LINUX_EXT3_EXTENTS + +/* + * with AGRESSIVE_TEST defined capacity of index/leaf blocks @@ -2505,7 +2632,7 @@ Index: linux-2.4.20/include/linux/ext3_extents.h +#define EXT_STATS_ + + -+#define EXT3_ALLOC_NEEDED 2 /* block bitmap + group descriptor */ ++#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ + +/* + * ext3_inode has i_block array (total 60 bytes) @@ -2542,8 +2669,11 @@ Index: linux-2.4.20/include/linux/ext3_extents.h + __u16 e_num; /* number of valid entries */ + __u16 e_max; /* capacity of store in entries */ + __u16 e_depth; /* has tree real underlaying blocks? */ ++ __u32 e_generation; /* generation of the tree */ +}; + ++#define EXT3_EXT_MAGIC 0xf301 ++ +/* + * array of ext3_ext_path contains path to some extent + * creation/lookup routines use it for traversal/splitting/etc @@ -2600,6 +2730,7 @@ Index: linux-2.4.20/include/linux/ext3_extents.h + +#define EXT_CONTINUE 0 +#define EXT_BREAK 1 ++#define EXT_REPEAT 2 + + +#define EXT_FIRST_EXTENT(__hdr__) \ @@ -2625,6 +2756,8 @@ Index: linux-2.4.20/include/linux/ext3_extents.h + ((struct ext3_extent_header *) (bh)->b_data) +#define EXT_DEPTH(_t_) \ + (((struct ext3_extent_header *)((_t_)->root))->e_depth) ++#define EXT_GENERATION(_t_) \ ++ (((struct ext3_extent_header *)((_t_)->root))->e_generation) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); @@ -2657,4 +2790,18 @@ Index: linux-2.4.20/include/linux/ext3_extents.h +extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); +extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); + ++#endif /* _LINUX_EXT3_EXTENTS */ + +Index: linux-2.4.24/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.4.24.orig/include/linux/ext3_fs_i.h 2004-01-24 19:30:22.000000000 +0300 ++++ linux-2.4.24/include/linux/ext3_fs_i.h 2004-01-26 23:17:19.000000000 +0300 +@@ -76,6 +76,8 @@ + * by other means, so we have truncate_sem. + */ + struct rw_semaphore truncate_sem; ++ ++ __u32 i_cached_extent[3]; + }; + + #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch index 43abf92..98fd550 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch @@ -1621,9 +1621,9 @@ Index: linux-2.4.19-pre1/include/linux/dcache.h + void (*d_unpin)(struct dentry *, struct vfsmount *, int); }; -+#define PIN(de,mnt,flag) if (de->d_op && de->d_op->d_pin) \ ++#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ + de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de->d_op && de->d_op->d_unpin) \ ++#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ + de->d_op->d_unpin(de, mnt, flag); + + diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch index 4e37bb8..b49babd 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch @@ -1601,9 +1601,9 @@ Index: linux-2.4.19.SuSE/include/linux/dcache.h + void (*d_unpin)(struct dentry *, struct vfsmount *, int); }; -+#define PIN(de,mnt,flag) if (de->d_op && de->d_op->d_pin) \ ++#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ + de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de->d_op && de->d_op->d_unpin) \ ++#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ + de->d_op->d_unpin(de, mnt, flag); + + diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch index 5f266a8..ace8619 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch @@ -1583,9 +1583,9 @@ + void (*d_unpin)(struct dentry *, struct vfsmount *, int); }; -+#define PIN(de,mnt,flag) if (de->d_op && de->d_op->d_pin) \ ++#define PIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_pin) \ + de->d_op->d_pin(de, mnt, flag); -+#define UNPIN(de,mnt,flag) if (de->d_op && de->d_op->d_unpin) \ ++#define UNPIN(de,mnt,flag) if (de && de->d_op && de->d_op->d_unpin) \ + de->d_op->d_unpin(de, mnt, flag); + + diff --git a/lustre/kernel_patches/targets/rh-2.4.target b/lustre/kernel_patches/targets/rh-2.4.target index 47585d9..275cfff 100644 --- a/lustre/kernel_patches/targets/rh-2.4.target +++ b/lustre/kernel_patches/targets/rh-2.4.target @@ -1,11 +1,12 @@ -KERNEL=linux-2.4.20-20.9.tar.gz +KERNEL=linux-2.4.20-28.9.tar.gz SERIES=rh-2.4.20 VERSION=2.4.20 -EXTRA_VERSION=20.9 +EXTRA_VERSION=28.9_lustre -BASE_ARCHS="i386" +BASE_ARCHS="i586" BIGMEM_ARCHS="" BOOT_ARCHS="" JENSEN_ARCHS="" -SMP_ARCHS="i686" +SMP_ARCHS="i586" UP_ARCHS="" +SRC_ARCHS="i586" diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 0b9f6f3..59dc29e 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -51,26 +51,6 @@ #include "lov_internal.h" -#if 0 -static int lov_logop_cleanup(struct llog_ctxt *ctxt) -{ - struct lov_obd *lov = &ctxt->loc_obd->u.lov; - int i, rc = 0; - - ENTRY; - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - struct obd_device *child = lov->tgts[i].ltd_exp->exp_obd; - struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx); - rc = llog_cleanup(cctxt); - if (rc) { - CERROR("error lov_llog_open %d\n", i); - break; - } - } - RETURN(rc); -} -#endif - /* Add log records for each OSC that this object is striped over, and return * cookies for each one. We _would_ have nice abstraction here, except that * we need to keep cookies in stripe order, even if some are NULL, so that @@ -101,7 +81,8 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, lur->lur_oid = loi->loi_id; lur->lur_ogen = loi->loi_gr; - rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc, numcookies - rc); + rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc, + numcookies - rc); } OBD_FREE(lur, sizeof(*lur)); @@ -110,8 +91,8 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, } static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count, - struct llog_logid *logid, - struct llog_ctxt_gen *gen) + struct llog_logid *logid, + struct llog_gen *gen) { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 61cd57c..935548e 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -50,6 +50,10 @@ #include #include +atomic_t obd_memory; +int obd_memmax; + + /* Debugging check only needed during development */ #ifdef OBD_CTXT_DEBUG # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC) @@ -308,49 +312,45 @@ static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset, { struct l_linux_dirent *dirent; struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf; - int reclen = size_round(offsetof(struct l_linux_dirent, d_name) + namlen + 1); - buf->error = -EINVAL; - if (reclen > buf->count) - return -EINVAL; - dirent = buf->previous; + dirent = buf->lrc_dirent; if (dirent) - dirent->d_off = offset; - dirent = buf->current_dir; - buf->previous = dirent; - dirent->d_ino = ino; - dirent->d_reclen = reclen; - memcpy(dirent->d_name, name, namlen); - ((char *)dirent) += reclen; - buf->current_dir = dirent; - buf->count -= reclen; + dirent->lld_off = offset; + + OBD_ALLOC(dirent, sizeof(*dirent)); + + list_add_tail(&dirent->lld_list, buf->lrc_list); + + buf->lrc_dirent = dirent; + dirent->lld_ino = ino; + LASSERT(sizeof(dirent->lld_name) >= namlen + 1); + memcpy(dirent->lld_name, name, namlen); + return 0; } -long l_readdir(struct file * file, void * dirent, unsigned int count) +long l_readdir(struct file *file, struct list_head *dentry_list) { - struct l_linux_dirent * lastdirent; + struct l_linux_dirent *lastdirent; struct l_readdir_callback buf; int error; - buf.current_dir = (struct l_linux_dirent *)dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; + buf.lrc_dirent = NULL; + buf.lrc_list = dentry_list; error = vfs_readdir(file, l_filldir, &buf); if (error < 0) return error; - error = buf.error; - lastdirent = buf.previous; - if (lastdirent) { - lastdirent->d_off = file->f_pos; - error = count - buf.count; - } - return error; + lastdirent = buf.lrc_dirent; + if (lastdirent) + lastdirent->lld_off = file->f_pos; + + return 0; } EXPORT_SYMBOL(l_readdir); +EXPORT_SYMBOL(obd_memory); +EXPORT_SYMBOL(obd_memmax); #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) @@ -361,6 +361,12 @@ static int __init lvfs_linux_init(void) static void __exit lvfs_linux_exit(void) { + int leaked; + ENTRY; + + leaked = atomic_read(&obd_memory); + CDEBUG(leaked ? D_ERROR : D_INFO, + "obd mem max: %d leaked: %d\n", obd_memmax, leaked); return; } diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index a9b02ee..549c760 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -54,7 +54,7 @@ static int mds_llog_origin_add(struct llog_ctxt *ctxt, static int mds_llog_origin_connect(struct llog_ctxt *ctxt, int count, struct llog_logid *logid, - struct llog_ctxt_gen *gen) + struct llog_gen *gen) { struct obd_device *obd = ctxt->loc_obd; struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; @@ -86,9 +86,7 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode, { struct mds_obd *mds = &obd->u.mds; struct lov_stripe_md *lsm = NULL; -#ifdef ENABLE_ORPHANS struct llog_ctxt *ctxt; -#endif int rc; ENTRY; @@ -101,11 +99,9 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode, if (rc < 0) RETURN(rc); -#ifdef ENABLE_ORPHANS ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT); rc = llog_add(ctxt, NULL, lsm, lustre_msg_buf(repmsg, offset + 1, 0), repmsg->buflens[offset + 1] / sizeof(struct llog_cookie)); -#endif obd_free_memmd(mds->mds_osc_exp, &lsm); diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index ad7ddcd..82ceab4 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -77,7 +77,7 @@ void llog_free_handle(struct llog_handle *loghandle) } EXPORT_SYMBOL(llog_free_handle); -/* returns negative on error; 0 if success; 1 if success & log destroyed */ +/* returns negative on error; 0 if success; 1 if success & log destroyed */ int llog_cancel_rec(struct llog_handle *loghandle, int index) { struct llog_log_hdr *llh = loghandle->lgh_hdr; @@ -101,7 +101,7 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index) if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) && (le32_to_cpu(llh->llh_count) == 1) && - (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) { + (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) { rc = llog_destroy(loghandle); if (rc) CERROR("failure destroying log after last cancel: %d\n", @@ -111,7 +111,7 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index) } rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0); - if (rc) + if (rc) CERROR("failure re-writing header %d\n", rc); LASSERT(rc == 0); RETURN(rc); @@ -144,16 +144,17 @@ int llog_init_handle(struct llog_handle *handle, int flags, GOTO(out, rc); } rc = 0; - + handle->lgh_last_idx = 0; /* header is record with index 0 */ llh->llh_count = cpu_to_le32(1); /* for the header record */ llh->llh_hdr.lrh_type = cpu_to_le32(LLOG_HDR_MAGIC); - llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = cpu_to_le32(LLOG_CHUNK_SIZE); + llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = + cpu_to_le32(LLOG_CHUNK_SIZE); llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0; llh->llh_timestamp = cpu_to_le64(LTIME_S(CURRENT_TIME)); if (uuid) memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid)); - llh->llh_bitmap_offset = cpu_to_le32(offsetof(typeof(*llh), llh_bitmap)); + llh->llh_bitmap_offset = cpu_to_le32(offsetof(typeof(*llh),llh_bitmap)); ext2_set_bit(0, llh->llh_bitmap); out: @@ -165,7 +166,7 @@ out: INIT_LIST_HEAD(&handle->u.phd.phd_entry); else LBUG(); - + if (rc) { OBD_FREE(llh, sizeof(*llh)); handle->lgh_hdr = NULL; @@ -192,12 +193,14 @@ int llog_close(struct llog_handle *loghandle) } EXPORT_SYMBOL(llog_close); -int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data) +int llog_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata) { struct llog_log_hdr *llh = loghandle->lgh_hdr; + struct llog_process_cat_data *cd = catdata; void *buf; __u64 cur_offset = LLOG_CHUNK_SIZE; - int rc = 0, index = 1; + int rc = 0, index = 1, last_index, idx; int saved_index = 0; ENTRY; @@ -205,27 +208,41 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data) if (!buf) RETURN(-ENOMEM); + if (cd != NULL) + index = cd->first_idx + 1; + if (cd != NULL && cd->last_idx) + last_index = cd->last_idx; + else + last_index = LLOG_BITMAP_BYTES * 8 - 1; + + while (rc == 0) { struct llog_rec_hdr *rec; - + /* skip records not set in bitmap */ - while (index < (LLOG_BITMAP_BYTES * 8) && + while (index <= last_index && !ext2_test_bit(index, llh->llh_bitmap)) ++index; - LASSERT(index <= LLOG_BITMAP_BYTES * 8); - if (index == LLOG_BITMAP_BYTES * 8) + LASSERT(index <= last_index + 1); + if (index == last_index + 1) break; /* get the buf with our target record; avoid old garbage */ memset(buf, 0, LLOG_CHUNK_SIZE); - rc = llog_next_block(loghandle, &saved_index, index, + rc = llog_next_block(loghandle, &saved_index, index, &cur_offset, buf, LLOG_CHUNK_SIZE); if (rc) GOTO(out, rc); rec = buf; - index = le32_to_cpu(rec->lrh_index); + idx = le32_to_cpu(rec->lrh_index); + if (idx < index) + CDEBUG(D_HA, "index %u : idx %u\n", index, idx); + while (idx < index) { + rec = ((void *)rec + le32_to_cpu(rec->lrh_len)); + idx ++; + } /* process records in buffer, starting where we found one */ while ((void *)rec < buf + LLOG_CHUNK_SIZE) { @@ -235,13 +252,20 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data) /* if set, process the callback on this record */ if (ext2_test_bit(index, llh->llh_bitmap)) { rc = cb(loghandle, rec, data); - if (rc) + if (rc == LLOG_PROC_BREAK) { + CWARN("recovery from log: "LPX64":%x" + " stopped\n", + loghandle->lgh_id.lgl_oid, + loghandle->lgh_id.lgl_ogen); + GOTO(out, rc); + } + if (rc) GOTO(out, rc); } /* next record, still in buffer? */ ++index; - if (index > LLOG_BITMAP_BYTES * 8 - 1) + if (index > last_index) GOTO(out, rc = 0); rec = ((void *)rec + le32_to_cpu(rec->lrh_len)); } diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index 4c49a10..b0e82fe 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -52,41 +52,44 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) struct llog_handle *loghandle; struct llog_log_hdr *llh; struct llog_logid_rec rec; - int rc, index, bitmap_size, i; + int rc, index, bitmap_size; ENTRY; + llh = cathandle->lgh_hdr; + bitmap_size = sizeof(llh->llh_bitmap) * 8; + + index = (cathandle->lgh_last_idx + 1) % bitmap_size; + + /* maximum number of available slots in catlog is bitmap_size - 2 */ + if (llh->llh_cat_idx == cpu_to_le32(index)) { + CERROR("no free catalog slots for log...\n"); + RETURN(ERR_PTR(-ENOSPC)); + } else { + if (index == 0) + index = 1; + if (ext2_set_bit(index, llh->llh_bitmap)) { + CERROR("argh, index %u already set in log bitmap?\n", + index); + LBUG(); /* should never happen */ + } + cathandle->lgh_last_idx = index; + llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) + 1); + llh->llh_tail.lrt_index = cpu_to_le32(index); + } + rc = llog_create(cathandle->lgh_ctxt, &loghandle, NULL, NULL); if (rc) RETURN(ERR_PTR(rc)); - rc = llog_init_handle(loghandle, - LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY, + rc = llog_init_handle(loghandle, + LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY, &cathandle->lgh_hdr->llh_tgtuuid); if (rc) GOTO(out_destroy, rc); - /* Find first free entry */ - llh = cathandle->lgh_hdr; - bitmap_size = sizeof(llh->llh_bitmap) * 8; - for (i = 0, index = le32_to_cpu(llh->llh_count); i < bitmap_size; - i++, index++) { - index %= bitmap_size; - if (ext2_set_bit(index, llh->llh_bitmap)) { - /* XXX This should trigger log clean up or similar */ - CERROR("catalog index %d is still in use\n", index); - } else { - cathandle->lgh_last_idx = index; - llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) + 1); - break; - } - } - if (i == bitmap_size) { - CERROR("no free catalog slots for log...\n"); - GOTO(out_destroy, rc = -ENOSPC); - } - CWARN("new recovery log "LPX64":%x for index %u of catalog "LPX64"\n", - loghandle->lgh_id.lgl_oid, loghandle->lgh_id.lgl_ogen, index, - cathandle->lgh_id.lgl_oid); + CDEBUG(D_HA, "new recovery log "LPX64":%x for index %u of catalog " + LPX64"\n", loghandle->lgh_id.lgl_oid, loghandle->lgh_id.lgl_ogen, + index, cathandle->lgh_id.lgl_oid); /* build the record for this log in the catalog */ rec.lid_hdr.lrh_len = cpu_to_le32(sizeof(rec)); rec.lid_hdr.lrh_index = cpu_to_le32(index); @@ -96,7 +99,7 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) rec.lid_tail.lrt_index = cpu_to_le32(index); /* update the catalog: header and record */ - rc = llog_write_rec(cathandle, &rec.lid_hdr, + rc = llog_write_rec(cathandle, &rec.lid_hdr, &loghandle->u.phd.phd_cookie, 1, NULL, index); if (rc < 0) { GOTO(out_destroy, rc); @@ -115,7 +118,10 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) } EXPORT_SYMBOL(llog_cat_new_log); -/* Assumes caller has already pushed us into the kernel context and is locking. +/* Open an existent log handle and add it to the open list. + * This log handle will be closed when all of the records in it are removed. + * + * Assumes caller has already pushed us into the kernel context and is locking. * We return a lock on the handle to ensure nobody yanks it from us. */ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, @@ -128,7 +134,7 @@ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, if (cathandle == NULL) RETURN(-EBADF); - list_for_each_entry(loghandle, &cathandle->u.chd.chd_head, + list_for_each_entry(loghandle, &cathandle->u.chd.chd_head, u.phd.phd_entry) { struct llog_logid *cgl = &loghandle->lgh_id; if (cgl->lgl_oid == logid->lgl_oid) { @@ -139,7 +145,6 @@ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, continue; } loghandle->u.phd.phd_cat_handle = cathandle; - cathandle->u.chd.chd_current_log = loghandle; GOTO(out, rc = 0); } } @@ -151,15 +156,14 @@ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, } else { rc = llog_init_handle(loghandle, LLOG_F_IS_PLAIN, NULL); if (!rc) { - list_add(&loghandle->u.phd.phd_entry, + list_add(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head); - cathandle->u.chd.chd_current_log = loghandle; } } if (!rc) { loghandle->u.phd.phd_cat_handle = cathandle; loghandle->u.phd.phd_cookie.lgc_lgl = cathandle->lgh_id; - loghandle->u.phd.phd_cookie.lgc_index = + loghandle->u.phd.phd_cookie.lgc_index = le32_to_cpu(loghandle->lgh_hdr->llh_cat_idx); } @@ -174,7 +178,7 @@ int llog_cat_put(struct llog_handle *cathandle) int rc; ENTRY; - list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head, + list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head, u.phd.phd_entry) { int err = llog_close(loghandle); if (err) @@ -195,7 +199,7 @@ EXPORT_SYMBOL(llog_cat_put); * * NOTE: loghandle is write-locked upon successful return */ -static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, +static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, int create) { struct llog_handle *loghandle = NULL; @@ -205,7 +209,7 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, loghandle = cathandle->u.chd.chd_current_log; if (loghandle) { struct llog_log_hdr *llh = loghandle->lgh_hdr; - if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap) * 8) - 1) { + if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) { down_write(&loghandle->lgh_lock); up_read(&cathandle->lgh_lock); RETURN(loghandle); @@ -226,7 +230,7 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, loghandle = cathandle->u.chd.chd_current_log; if (loghandle) { struct llog_log_hdr *llh = loghandle->lgh_hdr; - if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap) * 8) - 1) { + if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) { down_write(&loghandle->lgh_lock); up_write(&cathandle->lgh_lock); RETURN(loghandle); @@ -235,7 +239,7 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, CDEBUG(D_INODE, "creating new log\n"); loghandle = llog_cat_new_log(cathandle); - if (loghandle) + if (!IS_ERR(loghandle)) down_write(&loghandle->lgh_lock); up_write(&cathandle->lgh_lock); RETURN(loghandle); @@ -247,7 +251,7 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, * Assumes caller has already pushed us into the kernel context. */ int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, - struct llog_cookie *reccookie, void *buf) + struct llog_cookie *reccookie, void *buf) { struct llog_handle *loghandle; int rc; @@ -260,6 +264,7 @@ int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, /* loghandle is already locked by llog_cat_current_log() for us */ rc = llog_write_rec(loghandle, rec, reccookie, 1, buf, -1); up_write(&loghandle->lgh_lock); + RETURN(rc); } EXPORT_SYMBOL(llog_cat_add_rec); @@ -293,15 +298,20 @@ int llog_cat_cancel_records(struct llog_handle *cathandle, int count, down_write(&loghandle->lgh_lock); rc = llog_cancel_rec(loghandle, cookies->lgc_index); up_write(&loghandle->lgh_lock); - + if (rc == 1) { /* log has been destroyed */ index = loghandle->u.phd.phd_cookie.lgc_index; if (cathandle->u.chd.chd_current_log == loghandle) cathandle->u.chd.chd_current_log = NULL; llog_free_handle(loghandle); - + LASSERT(index); + llog_cat_set_first_idx(cathandle, index); rc = llog_cancel_rec(cathandle, index); + if (rc == 0) + CDEBUG(D_HA, "cancel plain log at index %u " + "of catalog "LPX64"\n", + index, cathandle->lgh_id.lgl_oid); } } up_write(&cathandle->lgh_lock); @@ -310,7 +320,8 @@ int llog_cat_cancel_records(struct llog_handle *cathandle, int count, } EXPORT_SYMBOL(llog_cat_cancel_records); -int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, void *data) +int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, + void *data) { struct llog_process_data *d = data; struct llog_logid_rec *lir = (struct llog_logid_rec *)rec; @@ -321,33 +332,85 @@ int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, v CERROR("invalid record in catalog\n"); RETURN(-EINVAL); } - CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n", + CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n", lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen, le32_to_cpu(rec->lrh_index), cat_llh->lgh_id.lgl_oid); rc = llog_cat_id2handle(cat_llh, &llh, &lir->lid_id); if (rc) { - CERROR("Cannot find handle for log "LPX64"\n", lir->lid_id.lgl_oid); + CERROR("Cannot find handle for log "LPX64"\n", + lir->lid_id.lgl_oid); RETURN(rc); - } + } - rc = llog_process(llh, d->lpd_cb, d->lpd_data); + rc = llog_process(llh, d->lpd_cb, d->lpd_data, NULL); RETURN(rc); } int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data) { struct llog_process_data d; + struct llog_process_cat_data cd; + struct llog_log_hdr *llh = cat_llh->lgh_hdr; int rc; ENTRY; + + LASSERT(llh->llh_flags &cpu_to_le32(LLOG_F_IS_CAT)); d.lpd_data = data; d.lpd_cb = cb; - rc = llog_process(cat_llh, llog_cat_process_cb, &d); + if (llh->llh_cat_idx > cat_llh->lgh_last_idx) { + CWARN("catlog "LPX64" crosses index zero\n", + cat_llh->lgh_id.lgl_oid); + + cd.first_idx = le32_to_cpu(llh->llh_cat_idx); + cd.last_idx = 0; + rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd); + if (rc != 0) + RETURN(rc); + + cd.first_idx = 0; + cd.last_idx = cat_llh->lgh_last_idx; + rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd); + } else { + rc = llog_process(cat_llh, llog_cat_process_cb, &d, NULL); + } + RETURN(rc); } EXPORT_SYMBOL(llog_cat_process); +int llog_cat_set_first_idx(struct llog_handle *cathandle, int index) +{ + struct llog_log_hdr *llh = cathandle->lgh_hdr; + int i, bitmap_size, idx; + ENTRY; + + bitmap_size = sizeof(llh->llh_bitmap) * 8; + if (llh->llh_cat_idx == cpu_to_le32(index - 1)) { + idx = le32_to_cpu(llh->llh_cat_idx) + 1; + llh->llh_cat_idx = cpu_to_le32(idx); + if (idx == cathandle->lgh_last_idx) + goto out; + for (i = (index + 1) % bitmap_size; + i != cathandle->lgh_last_idx; + i = (i + 1) % bitmap_size) { + if (!ext2_test_bit(i, llh->llh_bitmap)) { + idx = le32_to_cpu(llh->llh_cat_idx) + 1; + llh->llh_cat_idx = cpu_to_le32(idx); + } else if (i == 0) { + llh->llh_cat_idx = 0; + } else { + break; + } + } +out: + CDEBUG(D_HA, "set catlog "LPX64" first idx %u\n", + cathandle->lgh_id.lgl_oid,le32_to_cpu(llh->llh_cat_idx)); + } + + RETURN(0); +} #if 0 /* Assumes caller has already pushed us into the kernel context. */ @@ -366,7 +429,7 @@ int llog_cat_init(struct llog_handle *cathandle, struct obd_uuid *tgtuuid) if (cathandle->lgh_file->f_dentry->d_inode->i_size == 0) { llog_write_rec(cathandle, &llh->llh_hdr, NULL, 0, NULL, 0); -write_hdr: +write_hdr: rc = lustre_fwrite(cathandle->lgh_file, llh, LLOG_CHUNK_SIZE, &offset); if (rc != LLOG_CHUNK_SIZE) { diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index 9c9abb7..d01441a 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -26,7 +26,7 @@ /* helper functions for calling the llog obd methods */ -int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, +int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, int count, struct llog_logid *logid, struct llog_operations *op) { int rc = 0; @@ -49,7 +49,7 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, if (op->lop_setup) rc = op->lop_setup(obd, index, disk_obd, count, logid); - if (ctxt && rc) + if (ctxt && rc) OBD_FREE(ctxt, sizeof(*ctxt)); RETURN(rc); @@ -61,7 +61,6 @@ int llog_cleanup(struct llog_ctxt *ctxt) int rc = 0; ENTRY; - down(&ctxt->loc_sem); LASSERT(ctxt); if (CTXTP(ctxt, cleanup)) @@ -70,7 +69,6 @@ int llog_cleanup(struct llog_ctxt *ctxt) ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL; class_export_put(ctxt->loc_exp); ctxt->loc_exp = NULL; - up(&ctxt->loc_sem); OBD_FREE(ctxt, sizeof(*ctxt)); RETURN(rc); @@ -84,29 +82,25 @@ int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) if (!ctxt) RETURN(0); - down(&ctxt->loc_sem); - if (ctxt->loc_llcd && CTXTP(ctxt, sync)) + + if (CTXTP(ctxt, sync)) rc = CTXTP(ctxt, sync)(ctxt, exp); - else - up(&ctxt->loc_sem); RETURN(rc); } EXPORT_SYMBOL(llog_sync); -int llog_add(struct llog_ctxt *ctxt, - struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies) +int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + struct lov_stripe_md *lsm, struct llog_cookie *logcookies, + int numcookies) { int rc; ENTRY; LASSERT(ctxt); - down(&ctxt->loc_sem); CTXT_CHECK_OP(ctxt, add, -EOPNOTSUPP); rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies); - up(&ctxt->loc_sem); RETURN(rc); } EXPORT_SYMBOL(llog_add); @@ -125,7 +119,7 @@ int llog_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, EXPORT_SYMBOL(llog_cancel); /* callback func for llog_process in llog_obd_origin_setup */ -static int cat_cancel_cb(struct llog_handle *cathandle, +static int cat_cancel_cb(struct llog_handle *cathandle, struct llog_rec_hdr *rec, void *data) { struct llog_logid_rec *lir = (struct llog_logid_rec *)rec; @@ -138,35 +132,36 @@ static int cat_cancel_cb(struct llog_handle *cathandle, CERROR("invalid record in catalog\n"); RETURN(-EINVAL); } - CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n", + CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n", lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen, le32_to_cpu(rec->lrh_index), cathandle->lgh_id.lgl_oid); rc = llog_cat_id2handle(cathandle, &loghandle, &lir->lid_id); if (rc) { - CERROR("Cannot find handle for log "LPX64"\n", lir->lid_id.lgl_oid); + CERROR("Cannot find handle for log "LPX64"\n", + lir->lid_id.lgl_oid); RETURN(rc); - } - + } + llh = loghandle->lgh_hdr; if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) && (le32_to_cpu(llh->llh_count) == 1)) { rc = llog_destroy(loghandle); if (rc) - CERROR("failure destroying log during postsetup: %d\n", rc); + CERROR("failure destroying log in postsetup: %d\n", rc); LASSERT(rc == 0); index = loghandle->u.phd.phd_cookie.lgc_index; - if (cathandle->u.chd.chd_current_log == loghandle) - cathandle->u.chd.chd_current_log = NULL; llog_free_handle(loghandle); - + LASSERT(index); + llog_cat_set_first_idx(cathandle, index); rc = llog_cancel_rec(cathandle, index); if (rc == 0) - CWARN("cancel log "LPX64":%x at index %u of catalog "LPX64"\n", - lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen, - le32_to_cpu(rec->lrh_index), cathandle->lgh_id.lgl_oid); + CWARN("cancel log "LPX64":%x at index %u of catalog " + LPX64"\n", lir->lid_id.lgl_oid, + lir->lid_id.lgl_ogen, le32_to_cpu(rec->lrh_index), + cathandle->lgh_id.lgl_oid); } RETURN(rc); @@ -174,8 +169,9 @@ static int cat_cancel_cb(struct llog_handle *cathandle, /* lop_setup method for filter/osc */ // XXX how to set exports -int llog_obd_origin_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, - int count, struct llog_logid *logid) +int llog_obd_origin_setup(struct obd_device *obd, int index, + struct obd_device *disk_obd, int count, + struct llog_logid *logid) { struct llog_ctxt *ctxt; struct llog_handle *handle; @@ -187,20 +183,19 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, struct obd_device * RETURN(0); LASSERT(count == 1); - + ctxt = llog_get_context(obd, index); LASSERT(ctxt); - log_gen_init(ctxt); + llog_gen_init(ctxt); - down(&ctxt->loc_sem); if (logid->lgl_oid) rc = llog_create(ctxt, &handle, logid, NULL); else { rc = llog_create(ctxt, &handle, NULL, NULL); - if (!rc) + if (!rc) *logid = handle->lgh_id; } - if (rc) + if (rc) GOTO(out, rc); ctxt->loc_handle = handle; @@ -210,11 +205,10 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, struct obd_device * if (rc) GOTO(out, rc); - rc = llog_process(handle, (llog_cb_t)cat_cancel_cb, NULL); - if (rc) + rc = llog_process(handle, (llog_cb_t)cat_cancel_cb, NULL, NULL); + if (rc) CERROR("llog_process with cat_cancel_cb failed: %d\n", rc); out: - up(&ctxt->loc_sem); if (ctxt && rc) { obd->obd_llog_ctxt[index] = NULL; OBD_FREE(ctxt, sizeof(*ctxt)); @@ -229,33 +223,35 @@ int llog_obd_origin_cleanup(struct llog_ctxt *ctxt) struct llog_log_hdr *llh; int rc, index; ENTRY; - + if (!ctxt) return 0; cathandle = ctxt->loc_handle; if (cathandle) { - list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head, - u.phd.phd_entry) { + list_for_each_entry_safe(loghandle, n, + &cathandle->u.chd.chd_head, + u.phd.phd_entry) { llh = loghandle->lgh_hdr; - if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) && + if ((le32_to_cpu(llh->llh_flags) & + LLOG_F_ZAP_WHEN_EMPTY) && (le32_to_cpu(llh->llh_count) == 1)) { rc = llog_destroy(loghandle); if (rc) - CERROR("failure destroying log during cleanup: %d\n", - rc); + CERROR("failure destroying log during " + "cleanup: %d\n", rc); LASSERT(rc == 0); index = loghandle->u.phd.phd_cookie.lgc_index; - if (cathandle->u.chd.chd_current_log == loghandle) - cathandle->u.chd.chd_current_log = NULL; llog_free_handle(loghandle); - + LASSERT(index); + llog_cat_set_first_idx(cathandle, index); rc = llog_cancel_rec(cathandle, index); if (rc == 0) - CWARN("cancel plain log at index %u of catalog "LPX64"\n", - index, cathandle->lgh_id.lgl_oid); + CDEBUG(D_HA, "cancel plain log at index" + " %u of catalog "LPX64"\n", + index,cathandle->lgh_id.lgl_oid); } } llog_cat_put(ctxt->loc_handle); @@ -264,7 +260,6 @@ int llog_obd_origin_cleanup(struct llog_ctxt *ctxt) } EXPORT_SYMBOL(llog_obd_origin_cleanup); - /* add for obdfilter/sz and mds/unlink */ int llog_obd_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, @@ -314,7 +309,7 @@ int llog_cat_initialize(struct obd_device *obd, int count) CERROR("rc: %d\n", rc); GOTO(out, rc); } - + out: OBD_FREE(idarray, size); RETURN(rc); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 80742c8..63fd22b 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -39,66 +39,240 @@ #include "ptlrpc_internal.h" -/* should this take an imp_sem to ensure connect is single threaded? */ -int ptlrpc_connect_import(struct obd_import *imp) +struct ptlrpc_connect_async_args { + __u64 pcaa_peer_committed; + int pcaa_initial_connect; + int pcaa_was_invalid; +}; + +/* A CLOSED import should remain so. */ +#define IMPORT_SET_STATE_NOLOCK(imp, state) \ +do { \ + if (imp->imp_state != LUSTRE_IMP_CLOSED) { \ + CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n", \ + imp, imp->imp_target_uuid.uuid, \ + ptlrpc_import_state_name(imp->imp_state), \ + ptlrpc_import_state_name(state)); \ + imp->imp_state = state; \ + } \ +} while(0) + +#define IMPORT_SET_STATE(imp, state) \ +do { \ + unsigned long flags; \ + \ + spin_lock_irqsave(&imp->imp_lock, flags); \ + IMPORT_SET_STATE_NOLOCK(imp, state); \ + spin_unlock_irqrestore(&imp->imp_lock, flags); \ +} while(0) + + +static int ptlrpc_connect_interpret(struct ptlrpc_request *request, + void * data, int rc); +int ptlrpc_import_recovery_state_machine(struct obd_import *imp); + +/* Only this function is allowed to change the import state when it is + * CLOSED. I would rather refcount the import and free it after + * disconnection like we do with exports. To do that, the client_obd + * will need to save the peer info somewhere other than in the import, + * though. */ +int ptlrpc_init_import(struct obd_import *imp) +{ + unsigned long flags; + + spin_lock_irqsave(&imp->imp_lock, flags); + + imp->imp_generation++; + imp->imp_state = LUSTRE_IMP_NEW; + + spin_unlock_irqrestore(&imp->imp_lock, flags); + + return 0; +} + +/* Returns true if import was FULL, false if import was already not + * connected. + */ +int ptlrpc_set_import_discon(struct obd_import *imp) +{ + unsigned long flags; + int rc = 0; + + spin_lock_irqsave(&imp->imp_lock, flags); + + if (imp->imp_state == LUSTRE_IMP_FULL) { + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); + rc = 1; + } else { + CDEBUG(D_HA, "%p %s: import already not connected: %s\n", + imp,imp->imp_client->cli_name, + ptlrpc_import_state_name(imp->imp_state)); + } + spin_unlock_irqrestore(&imp->imp_lock, flags); + + return rc; +} + +void ptlrpc_fail_import(struct obd_import *imp, int generation) +{ + ENTRY; + + LASSERT (!imp->imp_dlm_fake); + + if (ptlrpc_set_import_discon(imp)) + ptlrpc_handle_failed_import(imp); + + EXIT; +} + +int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) { struct obd_device *obd = imp->imp_obd; - int msg_flags; int initial_connect = 0; int rc; __u64 committed_before_reconnect = 0; + int was_invalid = 0; struct ptlrpc_request *request; - struct lustre_handle old_hdl; int size[] = {sizeof(imp->imp_target_uuid), sizeof(obd->obd_uuid), sizeof(imp->imp_dlm_handle)}; char *tmp[] = {imp->imp_target_uuid.uuid, obd->obd_uuid.uuid, (char *)&imp->imp_dlm_handle}; + struct ptlrpc_connect_async_args *aa; unsigned long flags; spin_lock_irqsave(&imp->imp_lock, flags); - if (imp->imp_state == LUSTRE_IMP_CONNECTING) { + if (imp->imp_state == LUSTRE_IMP_CLOSED) { spin_unlock_irqrestore(&imp->imp_lock, flags); + CERROR("can't connect to a closed import\n"); + RETURN(-EINVAL); + } else if (imp->imp_state == LUSTRE_IMP_FULL) { + spin_unlock_irqrestore(&imp->imp_lock, flags); + CERROR("already connected\n"); + RETURN(0); + } else if (imp->imp_state == LUSTRE_IMP_CONNECTING) { + spin_unlock_irqrestore(&imp->imp_lock, flags); + CERROR("already connecting\n"); RETURN(-EALREADY); - } else { - LASSERT(imp->imp_state == LUSTRE_IMP_DISCON); } - CDEBUG(D_HA, "%s: new state: CONNECTING\n", - imp->imp_client->cli_name); - imp->imp_state = LUSTRE_IMP_CONNECTING; + + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING); + imp->imp_conn_cnt++; + imp->imp_last_replay_transno = 0; + if (imp->imp_remote_handle.cookie == 0) { initial_connect = 1; } else { - committed_before_reconnect = imp->imp_peer_committed_transno; + committed_before_reconnect = imp->imp_peer_committed_transno;; + + } + + if (imp->imp_invalid) { + imp->imp_invalid = 0; + was_invalid = 1; } + spin_unlock_irqrestore(&imp->imp_lock, flags); + if (new_uuid) { + struct ptlrpc_connection *conn; + struct obd_uuid uuid; + struct obd_export *dlmexp; + + obd_str2uuid(&uuid, new_uuid); + + conn = ptlrpc_uuid_to_connection(&uuid); + if (!conn) + GOTO(out, rc = -ENOENT); + + CDEBUG(D_HA, "switching import %s/%s from %s to %s\n", + imp->imp_target_uuid.uuid, imp->imp_obd->obd_name, + imp->imp_connection->c_remote_uuid.uuid, + conn->c_remote_uuid.uuid); + + /* Switch the import's connection and the DLM export's + * connection (which are almost certainly the same, but we + * keep distinct refs just to make things clearer. I think. */ + if (imp->imp_connection) + ptlrpc_put_connection(imp->imp_connection); + /* We hand off the ref from ptlrpc_get_connection. */ + imp->imp_connection = conn; + + dlmexp = class_conn2export(&imp->imp_dlm_handle); + + LASSERT(dlmexp != NULL); + + if (dlmexp->exp_connection) + ptlrpc_put_connection(dlmexp->exp_connection); + dlmexp->exp_connection = ptlrpc_connection_addref(conn); + class_export_put(dlmexp); + + } + request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp); if (!request) GOTO(out, rc = -ENOMEM); request->rq_send_state = LUSTRE_IMP_CONNECTING; request->rq_replen = lustre_msg_size(0, NULL); + request->rq_interpret_reply = ptlrpc_connect_interpret; + + LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args)); + aa = (struct ptlrpc_connect_async_args *)&request->rq_async_args; + memset(aa, 0, sizeof *aa); - // lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER); + aa->pcaa_peer_committed = committed_before_reconnect; + aa->pcaa_initial_connect = initial_connect; + aa->pcaa_was_invalid = was_invalid; - rc = ptlrpc_queue_wait(request); - if (rc) { - GOTO(free_req, rc); + if (aa->pcaa_initial_connect) + imp->imp_replayable = 1; + ptlrpcd_add_req(request); + rc = 0; +out: + if (rc != 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); + } + + RETURN(rc); +} + +static int ptlrpc_connect_interpret(struct ptlrpc_request *request, + void * data, int rc) +{ + struct ptlrpc_connect_async_args *aa = data; + struct obd_import *imp = request->rq_import; + struct lustre_handle old_hdl; + unsigned long flags; + int msg_flags; + ENTRY; + + spin_lock_irqsave(&imp->imp_lock, flags); + if (imp->imp_state == LUSTRE_IMP_CLOSED) { + spin_unlock_irqrestore(&imp->imp_lock, flags); + RETURN(0); } + spin_unlock_irqrestore(&imp->imp_lock, flags); + + if (rc) + GOTO(out, rc); msg_flags = lustre_msg_get_op_flags(request->rq_repmsg); - if (initial_connect) { - CDEBUG(D_HA, "%s: new state: FULL\n", - imp->imp_client->cli_name); - if (msg_flags & MSG_CONNECT_REPLAYABLE) + if (aa->pcaa_initial_connect) { + if (msg_flags & MSG_CONNECT_REPLAYABLE) { + CDEBUG(D_HA, "connected to replayable target: %s\n", + imp->imp_target_uuid.uuid); imp->imp_replayable = 1; + ptlrpc_pinger_add_import(imp); + } else { + imp->imp_replayable = 0; + } imp->imp_remote_handle = request->rq_repmsg->handle; - imp->imp_state = LUSTRE_IMP_FULL; - GOTO(free_req, rc = 0); + IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); + GOTO(finish, rc = 0); } /* Determine what recovery state to move the import to. */ @@ -110,7 +284,7 @@ int ptlrpc_connect_import(struct obd_import *imp) ", failed\n", imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid, imp->imp_dlm_handle.cookie); - GOTO(free_req, rc = -ENOTCONN); + GOTO(out, rc = -ENOTCONN); } if (memcmp(&imp->imp_remote_handle, &request->rq_repmsg->handle, @@ -127,23 +301,17 @@ int ptlrpc_connect_import(struct obd_import *imp) imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid); } - CDEBUG(D_HA, "%s: new state: RECOVER\n", - imp->imp_client->cli_name); - imp->imp_state = LUSTRE_IMP_RECOVER; + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); } else if (MSG_CONNECT_RECOVERING & msg_flags) { - CDEBUG(D_HA, "%s: new state: REPLAY\n", - imp->imp_client->cli_name); LASSERT(imp->imp_replayable); imp->imp_state = LUSTRE_IMP_RECOVER; imp->imp_remote_handle = request->rq_repmsg->handle; - imp->imp_state = LUSTRE_IMP_REPLAY; + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY); } else { - CDEBUG(D_HA, "%s: new state: EVICTED\n", - imp->imp_client->cli_name); imp->imp_remote_handle = request->rq_repmsg->handle; - imp->imp_state = LUSTRE_IMP_EVICTED; + IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); } /* Sanity checks for a reconnected import. */ @@ -153,31 +321,150 @@ int ptlrpc_connect_import(struct obd_import *imp) "after reconnect. We should LBUG right here.\n"); } - if (request->rq_repmsg->last_committed < committed_before_reconnect) { + if (request->rq_repmsg->last_committed < aa->pcaa_peer_committed) { CERROR("%s went back in time (transno "LPD64 " was previously committed, server now claims "LPD64 ")! is shared storage not coherent?\n", imp->imp_target_uuid.uuid, - committed_before_reconnect, + aa->pcaa_peer_committed, request->rq_repmsg->last_committed); } - free_req: - ptlrpc_req_finished(request); +finish: + rc = ptlrpc_import_recovery_state_machine(imp); + if (rc != 0) { + if (aa->pcaa_was_invalid) { + ptlrpc_set_import_active(imp, 0); + } + if (rc == -ENOTCONN) { + CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery;" + "invalidating and reconnecting\n", + imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); + ptlrpc_connect_import(imp, NULL); + RETURN(0); + } + } out: - if (rc != 0) - imp->imp_state = LUSTRE_IMP_DISCON; + if (rc != 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); + if (aa->pcaa_initial_connect && !imp->imp_initial_recov) + GOTO(norecov, rc); + CDEBUG(D_ERROR, + "recovery of %s on %s failed (%d); restarting\n", + imp->imp_target_uuid.uuid, + (char *)imp->imp_connection->c_remote_uuid.uuid, rc); + ptlrpc_handle_failed_import(imp); + } + +norecov: + wake_up(&imp->imp_recovery_waitq); RETURN(rc); } +static int completed_replay_interpret(struct ptlrpc_request *req, + void * data, int rc) +{ + atomic_dec(&req->rq_import->imp_replay_inflight); + ptlrpc_import_recovery_state_machine(req->rq_import); + RETURN(0); +} + +static int signal_completed_replay(struct obd_import *imp) + { + struct ptlrpc_request *req; + ENTRY; + + LASSERT(atomic_read(&imp->imp_replay_inflight) == 0); + atomic_inc(&imp->imp_replay_inflight); + + req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL); + if (!req) + RETURN(-ENOMEM); + + req->rq_replen = lustre_msg_size(0, NULL); + req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT; + req->rq_reqmsg->flags |= MSG_LAST_REPLAY; + req->rq_timeout *= 3; + req->rq_interpret_reply = completed_replay_interpret; + + ptlrpcd_add_req(req); + RETURN(0); +} + + +int ptlrpc_import_recovery_state_machine(struct obd_import *imp) +{ + int rc = 0; + + if (imp->imp_state == LUSTRE_IMP_EVICTED) { + CDEBUG(D_HA, "evicted from %s@%s; invalidating\n", + imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); + ptlrpc_set_import_active(imp, 0); + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + } + + if (imp->imp_state == LUSTRE_IMP_REPLAY) { + CDEBUG(D_HA, "replay requested by %s\n", + imp->imp_target_uuid.uuid); + rc = ptlrpc_replay_next(imp); + if (rc == 0 && atomic_read(&imp->imp_replay_inflight) == 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS); + rc = ldlm_replay_locks(imp); + if (rc) + GOTO(out, rc); + } + rc = 0; + } + + if (imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS) { + if (atomic_read(&imp->imp_replay_inflight) == 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_WAIT); + rc = signal_completed_replay(imp); + if (rc) + GOTO(out, rc); + } + } + + if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) { + if (atomic_read(&imp->imp_replay_inflight) == 0) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + } + } + + if (imp->imp_state == LUSTRE_IMP_RECOVER) { + CDEBUG(D_HA, "reconnected to %s@%s\n", + imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); + + ptlrpc_set_import_active(imp, 1); + ptlrpc_resend(imp); + IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); + } + + if (imp->imp_state == LUSTRE_IMP_FULL) { + wake_up(&imp->imp_recovery_waitq); + ptlrpc_wake_delayed(imp); + } + + out: + RETURN(rc); +} + +static int back_to_sleep(void *unused) +{ + return 0; +} int ptlrpc_disconnect_import(struct obd_import *imp) { struct ptlrpc_request *request; int rq_opc; int rc = 0; + unsigned long flags; ENTRY; switch (imp->imp_connect_op) { @@ -190,12 +477,28 @@ int ptlrpc_disconnect_import(struct obd_import *imp) RETURN(-EINVAL); } + + if (ptlrpc_import_in_recovery(imp)) { + struct l_wait_info lwi; + lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), back_to_sleep, + NULL, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), &lwi); + + } + + spin_lock_irqsave(&imp->imp_lock, flags); + if (imp->imp_state != LUSTRE_IMP_FULL) { + GOTO(out, 0); + } + spin_unlock_irqrestore(&imp->imp_lock, flags); + request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL); if (request) { /* For non-replayable connections, don't attempt reconnect if this fails */ - if (!imp->imp_obd->obd_replayable) { - imp->imp_state = LUSTRE_IMP_DISCON; + if (!imp->imp_replayable) { + IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); request->rq_send_state = LUSTRE_IMP_DISCON; } request->rq_replen = lustre_msg_size(0, NULL); @@ -203,8 +506,12 @@ int ptlrpc_disconnect_import(struct obd_import *imp) ptlrpc_req_finished(request); } - imp->imp_state = LUSTRE_IMP_DISCON; + spin_lock_irqsave(&imp->imp_lock, flags); +out: + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED); memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle)); + spin_unlock_irqrestore(&imp->imp_lock, flags); + RETURN(rc); } diff --git a/lustre/ptlrpc/llog_client.c b/lustre/ptlrpc/llog_client.c index 5524843..8accba6 100644 --- a/lustre/ptlrpc/llog_client.c +++ b/lustre/ptlrpc/llog_client.c @@ -194,6 +194,7 @@ static int llog_client_read_header(struct llog_handle *handle) GOTO(out, rc =-EFAULT); } memcpy(handle->lgh_hdr, hdr, sizeof (*hdr)); + handle->lgh_last_idx = le32_to_cpu(handle->lgh_hdr->llh_tail.lrt_index); out: if (req) diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c index 1dd2f9a..0694bd1 100644 --- a/lustre/ptlrpc/llog_net.c +++ b/lustre/ptlrpc/llog_net.c @@ -45,9 +45,9 @@ #ifdef __KERNEL__ int llog_origin_connect(struct llog_ctxt *ctxt, int count, - struct llog_logid *logid, - struct llog_ctxt_gen *gen) + struct llog_logid *logid, struct llog_gen *gen) { + struct llog_gen_rec *lgr; struct obd_import *imp; struct ptlrpc_request *request; struct llogd_conn_body *req_body; @@ -55,11 +55,31 @@ int llog_origin_connect(struct llog_ctxt *ctxt, int count, int rc; ENTRY; + if (list_empty(&ctxt->loc_handle->u.chd.chd_head)) { + CDEBUG(D_HA, "there is no record related to ctxt %p", ctxt); + RETURN(0); + } + + /* FIXME what value for gen->conn_cnt */ + LLOG_GEN_INC(ctxt->loc_gen); + + /* first add llog_gen_rec */ + OBD_ALLOC(lgr, sizeof(*lgr)); + if (!lgr) + RETURN(-ENOMEM); + lgr->lgr_hdr.lrh_len = lgr->lgr_tail.lrt_len = sizeof(*lgr); + lgr->lgr_hdr.lrh_type = LLOG_GEN_REC; + lgr->lgr_gen = ctxt->loc_gen; + rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1); + OBD_FREE(lgr, sizeof(*lgr)); + if (rc != 1) + RETURN(rc); + LASSERT(ctxt->loc_imp); imp = ctxt->loc_imp; request = ptlrpc_prep_req(imp, LLOG_ORIGIN_CONNECT, 1, &size, NULL); - if (!request) + if (!request) RETURN(-ENOMEM); req_body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*req_body)); @@ -87,9 +107,9 @@ int llog_handle_connect(struct ptlrpc_request *req) req_body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_body)); ctxt = llog_get_context(obd, req_body->lgdc_ctxt_idx); - rc = llog_connect(ctxt, 1, &req_body->lgdc_logid, + rc = llog_connect(ctxt, 1, &req_body->lgdc_logid, &req_body->lgdc_gen); - if (rc != 0) + if (rc != 0) CERROR("failed at llog_relp_connect\n"); RETURN(rc); diff --git a/lustre/scripts/lbuild b/lustre/scripts/lbuild index 327ae91..0e682c5 100755 --- a/lustre/scripts/lbuild +++ b/lustre/scripts/lbuild @@ -136,7 +136,7 @@ check_options() [ -d "$KERNELDIR" ] || \ usage 1 "$KERNELDIR is not a directory." - if [ "$RELEASE" = "no" ] ; then + if ! (( $RELEASE )) ; then [ "$TAG" ] || \ usage 1 "When building a snapshot, a tag name must be used." fi @@ -186,10 +186,10 @@ load_target() if [ "$EXTRA_VERSION_save" ] ; then EXTRA_VERSION="$EXTRA_VERSION_save" - else - EXTRA_VERSION="${EXTRA_VERSION}_${TAG//_/}.${TIMESTAMP}" + elif ! (( $RELEASE )) ; then + EXTRA_VERSION="${EXTRA_VERSION}-${TAG//_/}.${TIMESTAMP}" fi - EXTRA_VERSION=${EXTRA_VERSION//-/_/} + # EXTRA_VERSION=${EXTRA_VERSION//-/_} ALL_ARCHS="$BASE_ARCHS $BIGMEM_ARCHS $BOOT_ARCHS $JENSEN_ARCHS $SMP_ARCHS $UP_ARCHS" @@ -253,7 +253,7 @@ unpack_linux() patch_linux() { - FULL_PATCH="$PWD/lustre-kernel-${target}-${EXTRA_VERSION}.patch" + FULL_PATCH="$PWD/lustre-kernel-${TARGET}-${EXTRA_VERSION}.patch" [ -f "$FULL_PATCH" ] && rm -f "$FULL_PATCH" pushd linux >/dev/null echo -n "Applying patches:" @@ -295,24 +295,26 @@ clean_linux() prep_build() { # make .spec file - sed -e s/@KERNEL_VERSION@/$VERSION/g \ - -e s/@KERNEL_RELEASE@/$EXTRA_VERSION/g \ - -e s/@KERNEL_SOURCE@/$KERNEL/g \ - -e s/@LUSTRE_SOURCE@/${LUSTRE##*/}/g \ - -e s/@LUSTRE_TARGET@/$TARGET/g \ - -e s/@CONFIGURE_FLAGS@/$CONFIGURE_FLAGS/g \ - -e s/@BASE_ARCHS@/$BASE_ARCHS/g \ - -e s/@BIGMEM_ARCHS@/$BIGMEM_ARCHS/g \ - -e s/@BOOT_ARCHS@/$BOOT_ARCHS/g \ - -e s/@JENSEN_ARCHS@/$BOOT_ARCHS/g \ - -e s/@SMP_ARCHS@/$SMP_ARCHS/g \ - -e s/@UP_ARCHS@/$UP_ARCHS/g \ + sed -e "s/@KERNEL_VERSION@/$VERSION/g" \ + -e "s/@KERNEL_EXTRA_VERSION@/$EXTRA_VERSION/g" \ + -e "s^@KERNEL_RELEASE@^${EXTRA_VERSION//-/_}^g" \ + -e "s/@KERNEL_SOURCE@/$KERNEL/g" \ + -e "s/@LUSTRE_SOURCE@/${LUSTRE##*/}/g" \ + -e "s/@LUSTRE_TARGET@/$TARGET/g" \ + -e "s/@CONFIGURE_FLAGS@/$CONFIGURE_FLAGS/g" \ + -e "s/@BASE_ARCHS@/$BASE_ARCHS/g" \ + -e "s/@BIGMEM_ARCHS@/$BIGMEM_ARCHS/g" \ + -e "s/@BOOT_ARCHS@/$BOOT_ARCHS/g" \ + -e "s/@JENSEN_ARCHS@/$BOOT_ARCHS/g" \ + -e "s/@SMP_ARCHS@/$SMP_ARCHS/g" \ + -e "s/@UP_ARCHS@/$UP_ARCHS/g" \ < $TOPDIR/lustre/scripts/lustre-kernel-2.4.spec.in \ > lustre-kernel-2.4.spec [ -d SRPMS ] || mkdir SRPMS [ -d RPMS ] || mkdir RPMS [ -d BUILD ] || mkdir BUILD [ -d SOURCES ] || mkdir SOURCES + cp $TOPDIR/lustre/scripts/linux-rhconfig.h SOURCES cp $TOPDIR/lustre/scripts/linux-merge-config.awk SOURCES cp $TOPDIR/lustre/scripts/linux-merge-modules.awk SOURCES cp "$LUSTRE" "$KERNEL_FILE" SOURCES diff --git a/lustre/scripts/lmake b/lustre/scripts/lmake index addbe4f..3a851a8 100755 --- a/lustre/scripts/lmake +++ b/lustre/scripts/lmake @@ -6,7 +6,7 @@ KERNELDIR= TARGET= # Not sure what to put here # TARGET_ARCH=$(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -TARGET_ARCH="i386" +TARGET_ARCH= TARGET_CONFIG= JOBS=1 CONFIGURE_FLAGS= @@ -208,10 +208,11 @@ load_target() [ -r "$SERIES_FILE" ] || \ fatal 1 "Target $TARGET's series $SERIES missing from $TOPDIR/kernel_patches/series." - CONFIG_TARGET="$TARGET${TARGET_ARCH:+-$TARGET_ARCH}${TARGET_CONFIG:+-$TARGET_CONFIG}" + TARGET_ARCH=${TARGET_ARCH:-$BASE_ARCHS} + CONFIG_TARGET="$TARGET-${TARGET_ARCH}${TARGET_CONFIG:+-$TARGET_CONFIG}" CONFIG_FILE="$TOPDIR/kernel_patches/kernel_configs/kernel-$VERSION-$CONFIG_TARGET.config" - [ -r "$CONFIG_FILE" ] || \ - fatal 1 "Target $TARGET's config file $CONFIG missing from $TOPDIR/kernel_patches/configs." + [ -r "$CONFIG_FILE" ] || + fatal 1 "Target $TARGET's config file $CONFIG_FILE missing from $TOPDIR/kernel_patches/configs." if [ "$EXTRA_VERSION_save" ] ; then EXTRA_VERSION="$EXTRA_VERSION_save" diff --git a/lustre/scripts/lustre-kernel-2.4.spec.in b/lustre/scripts/lustre-kernel-2.4.spec.in index c30bb54..0999212 100644 --- a/lustre/scripts/lustre-kernel-2.4.spec.in +++ b/lustre/scripts/lustre-kernel-2.4.spec.in @@ -9,6 +9,7 @@ Summary: The Linux kernel (the core of the Linux operating system) # adding some text to the end of the version number. # %define kversion @KERNEL_VERSION@ +%define kextraver @KERNEL_EXTRA_VERSION@ %define release @KERNEL_RELEASE@ # /usr/src/%{kslnk} -> /usr/src/linux-%{KVERREL} %define kslnk linux-2.4 @@ -47,6 +48,7 @@ Summary: The Linux kernel (the core of the Linux operating system) %define buildjensen 0 %define buildsmp 0 %define buildup 0 +%define buildsrc 0 %ifarch @BASE_ARCHS@ %define buildbase 1 @@ -124,7 +126,7 @@ Second, per-architecture exclusions (ifarch) Name: kernel Version: %{kversion} Release: %{release}%{?targetboard:%{targetboard}}%{?debuglevel_1:.dbg} -%define KVERREL %{PACKAGE_VERSION}-%{PACKAGE_RELEASE} +%define KVERREL %{PACKAGE_VERSION}-%{kextraver}%{?targetboard:%{targetboard}}%{?debuglevel_1:.dbg} License: GPL Group: System Environment/Kernel ExclusiveArch: %{all_x86} x86_64 @@ -149,6 +151,7 @@ Buildroot: /var/tmp/%{name}-%{PACKAGE_VERSION}-root Source0: @LUSTRE_SOURCE@ Source1: @KERNEL_SOURCE@ +Source15: linux-rhconfig.h Source16: linux-merge-config.awk Source17: linux-merge-modules.awk @@ -318,7 +321,7 @@ DependKernel() --target @LUSTRE_TARGET@ \ --target-arch %{_target_cpu} \ ${target_config} \ - --extraversion %{release} \ + --extraversion %{kextraver} \ -j $RPM_BUILD_NCPUS } @@ -331,7 +334,23 @@ BuildKernel() --target @LUSTRE_TARGET@ \ --target-arch %{_target_cpu} \ ${target_config} \ - --extraversion %{release} \ + --extraversion %{kextraver} \ + --kerneldir $RPM_SOURCE_DIR \ + -j $RPM_BUILD_NCPUS \ + --destdir $RPM_BUILD_ROOT \ + -- @CONFIGURE_FLAGS@ +} + +BuildLustre() +{ + target_config=${1:+--target-config $1} + sh -x ./scripts/lmake \ + --build-lustre \ + --install-lustre \ + --target @LUSTRE_TARGET@ \ + --target-arch %{_target_cpu} \ + ${target_config} \ + --extraversion %{kextraver} \ --kerneldir $RPM_SOURCE_DIR \ -j $RPM_BUILD_NCPUS \ --destdir $RPM_BUILD_ROOT \ @@ -343,7 +362,7 @@ SaveHeaders() sh -x ./scripts/lmake \ --save-headers \ --target @LUSTRE_TARGET@ \ - --extraversion %{release} \ + --extraversion %{kextraver} \ --destdir $RPM_BUILD_ROOT } @@ -366,8 +385,10 @@ BuildKernel smp %endif # we want this one last, so that it is the one populating /usr/bin -%if %{buildup} || %{buildbase} +%if %{buildup} && %{buildbase} BuildKernel +%elseif %{buildbase} +BuildLustre %endif %if %{buildbase} @@ -401,7 +422,7 @@ pushd linux >/dev/null mkdir -p $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} rm -f drivers/net/hamradio/soundmodem/gentbl scripts/mkdep tar cf - . | tar xf - -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} -perl -p -i -e "s/^EXTRAVERSION.*/EXTRAVERSION = -%{release}custom/" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile +perl -p -i -e "s/^EXTRAVERSION.*/EXTRAVERSION = -%{kextraver}custom/" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile ln -sf linux-%{KVERREL} $RPM_BUILD_ROOT/usr/src/linux # install -m 644 %{SOURCE10} $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} @@ -443,7 +464,7 @@ for l in $list; do done echo '#endif' >> modversions.h sed 's,$,autoconf.h,' ../../savedheaders/list | awk -f %{SOURCE16} >> autoconf.h -# install -m 644 %{SOURCE15} rhconfig.h +install -m 644 %{SOURCE15} rhconfig.h echo "#include " >> version.h keyword=if for i in smp BOOT BOOTsmp bigmem up ; do @@ -753,6 +774,7 @@ exit 0 %files -n lustre-lite-utils %defattr(-, root, root) %doc lustre/COPYING lustre/BUGS lustre/ChangeLog lustre/README lustre/doc/lustre.pdf +/sbin/* %{_sbindir}/* %{_bindir}/* %{_libdir}/lustre/python diff --git a/lustre/tests/cfg/insanity-mdev.sh b/lustre/tests/cfg/insanity-mdev.sh index 5e69356..ff34d6d 100644 --- a/lustre/tests/cfg/insanity-mdev.sh +++ b/lustre/tests/cfg/insanity-mdev.sh @@ -2,16 +2,18 @@ mds_HOST=${mds_HOST:-mdev4} mdsfailover_HOST=${mdsfailover_HOST:-mdev5} ost1_HOST=${ost1_HOST:-mdev2} ost2_HOST=${ost2_HOST:-mdev3} +EXTRA_OSTS=${EXTRA_OSTS:-mdev7} client_HOST=client LIVE_CLIENT=${LIVE_CLIENT:-mdev6} # This should always be a list, not a regexp -FAIL_CLIENTS=${FAIL_CLIENTS:-mdev7} +#FAIL_CLIENTS=${FAIL_CLIENTS:-mdev7} +FAIL_CLIENTS=${FAIL_CLIENTS:-""} NETTYPE=${NETTYPE:-tcp} TIMEOUT=${TIMEOUT:-30} -#PTLDEBUG=${PTLDEBUG:-'"ha|info|ioctl|malloc"'} PTLDEBUG=${PTLDEBUG:-0} +SUBSYSTEM=${SUBSYSTEM:-0} MOUNT=${MOUNT:-"/mnt/lustre"} UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh} diff --git a/lustre/tests/cfg/mdev.sh b/lustre/tests/cfg/mdev.sh index ec8edf2..dd373ba 100644 --- a/lustre/tests/cfg/mdev.sh +++ b/lustre/tests/cfg/mdev.sh @@ -11,7 +11,8 @@ MOUNT1=${MOUNT1:-$MOUNT} MOUNT2=${MOUNT2:-"/mnt/lustre2"} DIR=${DIR:-$MOUNT} DIR2=${DIR2:-$MOUNT1} -PTLDEBUG=${PTLDEBUG:-0} +PTLDEBUG=${PTLDEBUG:-0x3f0400} +SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff} PDSH=${PDSH:-pdsh -S -w} MDSDEV=${MDSDEV:-/dev/sda1} diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index f6f77db..0f58491 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -2,6 +2,11 @@ # requirement: # add uml1 uml2 uml3 in your /etc/hosts +# FIXME - there is no reason to use all of these different +# return codes, espcially when most of them are mapped to something +# else anyway. The combination of test number and return code +# figure out what failed. + set -e SRCDIR=`dirname $0` @@ -18,6 +23,12 @@ init_test_env $@ FORCE=${FORCE:-" --force"} +if [ "$VERBOSE" == "true" ]; then + CMDVERBOSE="" +else + CMDVERBOSE=" > /dev/null" +fi + gen_config() { rm -f $XMLCONFIG @@ -40,33 +51,33 @@ gen_second_config() { start_mds() { echo "start mds service on `facet_active_host mds`" - start mds --reformat $MDSLCONFARGS > /dev/null || return 94 + start mds --reformat $MDSLCONFARGS $CMDVERBOSE || return 94 } stop_mds() { echo "stop mds service on `facet_active_host mds`" - stop mds $@ > /dev/null || return 97 + stop mds $@ $CMDVERBOSE || return 97 } start_ost() { echo "start ost service on `facet_active_host ost`" - start ost --reformat $OSTLCONFARGS > /dev/null || return 95 + start ost --reformat $OSTLCONFARGS $CMDVERBOSE || return 95 } stop_ost() { echo "stop ost service on `facet_active_host ost`" - stop ost $@ > /dev/null || return 98 + stop ost $@ $CMDVERBOSE || return 98 } mount_client() { local MOUNTPATH=$1 echo "mount lustre on ${MOUNTPATH}....." - zconf_mount $MOUNTPATH > /dev/null || return 96 + zconf_mount `hostname` $MOUNTPATH $CMDVERBOSE || return 96 } umount_client() { local MOUNTPATH=$1 echo "umount lustre on ${MOUNTPATH}....." - zconf_umount $MOUNTPATH > /dev/null || return 97 + zconf_umount $MOUNTPATH $CMDVERBOSE || return 97 } manual_umount_client(){ @@ -81,9 +92,15 @@ setup() { } cleanup() { - umount_client $MOUNT || return -200 - stop_mds || return -201 - stop_ost || return -202 + umount_client $MOUNT || return 200 + stop_mds || return 201 + stop_ost || return 202 + # catch case where these return just fine, but modules are still not unloaded + /sbin/lsmod | grep -q portals + if [ 1 -ne $? ]; then + echo "modules still loaded..." + return 203 + fi } check_mount() { @@ -112,18 +129,18 @@ test_0() { start_mds mount_client $MOUNT check_mount || return 41 - cleanup + cleanup || return $? } run_test 0 "single mount setup" test_1() { start_ost echo "start ost second time..." - start ost --reformat $OSTLCONFARGS > /dev/null + start ost --reformat $OSTLCONFARGS $CMDVERBOSE start_mds mount_client $MOUNT check_mount || return 42 - cleanup + cleanup || return $? } run_test 1 "start up ost twice" @@ -131,11 +148,11 @@ test_2() { start_ost start_mds echo "start mds second time.." - start mds --reformat $MDSLCONFARGS > /dev/null + start mds --reformat $MDSLCONFARGS $CMDVERBOSE mount_client $MOUNT check_mount || return 43 - cleanup + cleanup || return $? } run_test 2 "start up mds twice" @@ -146,7 +163,7 @@ test_3() { check_mount || return 44 umount_client $MOUNT - cleanup + cleanup || return $? } run_test 3 "mount client twice" @@ -154,28 +171,39 @@ test_4() { setup touch $DIR/$tfile || return 85 stop_ost ${FORCE} - - # cleanup may return an error from the failed - # disconnects; for now I'll consider this successful - # if all the modules have unloaded. - if ! cleanup ; then - lsmod | grep -q portals && return 1 - fi + cleanup + eno=$? + # ok for ost to fail shutdown + if [ 202 -ne $eno ]; then + return $eno; + fi return 0 } run_test 4 "force cleanup ost, then cleanup" test_5() { setup - touch $DIR/$tfile || return 86 - stop_mds ${FORCE} || return 98 + touch $DIR/$tfile || return 1 + stop_mds ${FORCE} || return 2 # cleanup may return an error from the failed # disconnects; for now I'll consider this successful # if all the modules have unloaded. - if ! cleanup ; then - lsmod | grep -q portals && return 1 - fi + umount $MOUNT & + UMOUNT_PID=$! + sleep $TIMEOUT + echo "killing umount" + kill -TERM $UMOUNT_PID + wait $UMOUNT_PID + + # cleanup client modules + $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null + + # stop_mds is a no-op here, and should not fail + stop_mds || return 4 + stop_ost || return 5 + + lsmod | grep -q portals && return 6 return 0 } run_test 5 "force cleanup mds, then cleanup" @@ -185,14 +213,14 @@ test_6() { manual_umount_client mount_client ${MOUNT} || return 87 touch $DIR/a || return 86 - cleanup + cleanup || return $? } run_test 6 "manual umount, then mount again" test_7() { setup manual_umount_client - cleanup + cleanup || return $? } run_test 7 "manual umount, then cleanup" @@ -226,34 +254,55 @@ test_9() { start_ost start_mds mount_client $MOUNT - [ "`cat /proc/sys/portals/debug`" = "1" ] && \ - echo "lmc --debug success" || return 1 - [ "`cat /proc/sys/portals/subsystem_debug`" = "16777216" ] && \ - echo "lmc --subsystem success" || return 1 + CHECK_PTLDEBUG="`cat /proc/sys/portals/debug`" + if [ $CHECK_PTLDEBUG = "1" ]; then + echo "lmc --debug success" + else + echo "lmc --debug: want 1, have $CHECK_PTLDEBUG" + return 1 + fi + CHECK_SUBSYSTEM="`cat /proc/sys/portals/subsystem_debug`" + if [ $CHECK_SUBSYSTEM = "2" ]; then + echo "lmc --subsystem success" + else + echo "lmc --subsystem: want 2, have $CHECK_SUBSYSTEM" + return 1 + fi check_mount || return 41 - cleanup + cleanup || return $? # the new PTLDEBUG/SUBSYSTEM used for lconf --ptldebug/subsystem - PTLDEBUG="inode" - SUBSYSTEM="mds" + PTLDEBUG="inode+trace" + SUBSYSTEM="mds+ost" # check lconf --ptldebug/subsystem overriding lmc --ptldebug/subsystem start_ost start_mds + CHECK_PTLDEBUG="`do_facet mds cat /proc/sys/portals/debug`" + if [ $CHECK_PTLDEBUG = "3" ]; then + echo "lconf --debug success" + else + echo "lconf --debug: want 3, have $CHECK_PTLDEBUG" + return 1 + fi + CHECK_SUBSYSTEM="`do_facet mds cat /proc/sys/portals/subsystem_debug`" + if [ $CHECK_SUBSYSTEM = "20" ]; then + echo "lconf --subsystem success" + else + echo "lconf --subsystem: want 20, have $CHECK_SUBSYSTEM" + return 1 + fi mount_client $MOUNT - [ "`cat /proc/sys/portals/debug`" = "2" ] && \ - echo "lconf --debug overriding success" || return 1 - [ "`cat /proc/sys/portals/subsystem_debug`" = "33554432" ] && \ - echo "lconf --subsystem overriding success" || return 1 check_mount || return 41 - cleanup + cleanup || return $? # resume the old configuration PTLDEBUG=$OLDPTLDEBUG SUBSYSTEM=$OLDSUBSYSTEM gen_config } -run_test 9 "test --ptldebug and --subsystem for lmc" + +run_test 9 "test --ptldebug and --subsystem for lmc and lconf" test_10() { OLDXMLCONFIG=$XMLCONFIG @@ -293,4 +342,157 @@ test_11() { } run_test 11 "use default lov configuration (should return error)" +test_12() { + OLDXMLCONFIG=$XMLCONFIG + XMLCONFIG="batch.xml" + BATCHFILE="batchfile" + + # test double quote + [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG + [ -f "$BATCHFILE" ] && rm -f $BATCHFILE + echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE + echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128\"" >> $BATCHFILE + # --mkfsoptions "-I 128" + do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? + if [ `sed -n '/>-I 128 $BATCHFILE + echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128" >> $BATCHFILE + # --mkfsoptions "-I 128 + do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? + echo "unmatched double quote should return error" + + # test single quote + rm -f $BATCHFILE + echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE + echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128'" >> $BATCHFILE + # --mkfsoptions '-I 128' + do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? + if [ `sed -n '/>-I 128 $BATCHFILE + echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128" >> $BATCHFILE + # --mkfsoptions '-I 128 + do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? + echo "unmatched single quote should return error" + + # test backslash + rm -f $BATCHFILE + echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE + echo "--add mds --node localhost --mds mds1 --mkfsoptions \-\I\ \128" >> $BATCHFILE + # --mkfsoptions \-\I\ \128 + do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? + if [ `sed -n '/>-I 128 $BATCHFILE + echo "--add mds --node localhost --mds mds1 --mkfsoptions -I\ 128\\" >> $BATCHFILE + # --mkfsoptions -I\ 128\ + do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? + echo "backslash followed by nothing should return error" + + rm -f $BATCHFILE + XMLCONFIG=$OLDXMLCONFIG +} +run_test 12 "lmc --batch, with single/double quote, backslash in batchfile" + +test_13() { + OLDXMLCONFIG=$XMLCONFIG + XMLCONFIG="conf13-1.xml" + SECONDXMLCONFIG="conf13-2.xml" + + # check long uuid will be truncated properly and uniquely + echo "To generate XML configuration file(with long ost name): $XMLCONFIG" + [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG + do_lmc --add net --node localhost --nid localhost.localdomain --nettype tcp + do_lmc --add mds --node localhost --mds mds1_name_longer_than_31characters + do_lmc --add mds --node localhost --mds mds2_name_longer_than_31characters + if [ ! -f "$XMLCONFIG" ]; then + echo "Error:no file $XMLCONFIG created!" + return 1 + fi + EXPECTEDMDS1UUID="e_longer_than_31characters_UUID" + EXPECTEDMDS2UUID="longer_than_31characters_UUID_2" + FOUNDMDS1UUID=`awk -F"'" '//{print $2}' $XMLCONFIG` + EXPECTEDSTRING="mkfsoptions>-V" + if [ $EXPECTEDSTRING != $FOUNDSTRING ]; then + echo "Error:expected string: $EXPECTEDSTRING; found: $FOUNDSTRING" + return 1 + fi + echo "Success:mkfsoptions for ost written to xml file correctly." + + # mount lustre to test lconf mkfsoptions-parsing + echo "mount lustre" + start_ost + start_mds + mount_client $MOUNT || return $? + cleanup + echo "lconf mkfsoptions-parsing for ost success" + + gen_config +} +run_test 14 "test mkfsoptions of ost for lmc and lconf" + equals_msg "Done" diff --git a/lustre/tests/replay-single-upcall.sh b/lustre/tests/replay-single-upcall.sh index 17e04c9..59c1371 100755 --- a/lustre/tests/replay-single-upcall.sh +++ b/lustre/tests/replay-single-upcall.sh @@ -9,6 +9,10 @@ mkdir -p $TESTDIR/logs exec >> $TESTDIR/logs/recovery-`hostname`.log exec 2>&1 +echo ========================================== +echo "start upcall: `date`" +echo "command line: $0 $*" + set -xv failed_import() { diff --git a/lustre/tests/run-llog.sh b/lustre/tests/run-llog.sh index 6a4ffc1..5d46e2b 100644 --- a/lustre/tests/run-llog.sh +++ b/lustre/tests/run-llog.sh @@ -1,6 +1,10 @@ #!/bin/bash PATH=`dirname $0`:`dirname $0`/../utils:$PATH TMP=${TMP:-/tmp} + +MDS=`ls /proc/fs/lustre/mds | grep -v num_refs | head -1` +[ -z "$MDS" ] && echo "no MDS available, skipping llog test" && exit 0 + insmod ../obdclass/llog_test.o || exit 1 lctl modules > $TMP/ogdb-`hostname` echo "NOW reload debugging syms.." @@ -9,7 +13,7 @@ RC=0 lctl <