merging HEAD (including b_orphan, b_recovery) into b_size

author phil <phil>

Sun, 8 Feb 2004 20:12:10 +0000 (20:12 +0000)

committer phil <phil>

Sun, 8 Feb 2004 20:12:10 +0000 (20:12 +0000)
author phil <phil>
Sun, 8 Feb 2004 20:12:10 +0000 (20:12 +0000)
committer phil <phil>
Sun, 8 Feb 2004 20:12:10 +0000 (20:12 +0000)
diff --git a/lustre/include/linux/lvfs_linux.h b/lustre/include/linux/lvfs_linux.h

index b38d6f0..71fc431 100644 (file)
--- a/lustre/include/linux/lvfs_linux.h
+++ b/lustre/include/linux/lvfs_linux.h
@@ -1,3 +1,6 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */ 
  #ifndef __LVFS_LINUX_H__
  #define __LVFS_LINUX_H__
  
@@ -22,17 +25,14 @@ struct l_file *l_dentry_open(struct obd_run_ctxt *, struct l_dentry *,
                               int flags);
  
  struct l_linux_dirent {
-        ino_t           d_ino;
-        unsigned long   d_off;
-        unsigned short  d_reclen;
-        char            d_name[1]; 
+        struct list_head lld_list;
+        ino_t           lld_ino;
+        unsigned long   lld_off;
+        char            lld_name[LL_FID_NAMELEN];
  };
-
  struct l_readdir_callback {
-        struct l_linux_dirent *current_dir;
-        struct l_linux_dirent *previous;
-        int count;
-        int error;
+        struct l_linux_dirent *lrc_dirent;
+        struct list_head      *lrc_list;
  };
  
  #endif
diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch

index 1da5f7c..6422982 100644 (file)
--- a/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch
+++ b/lustre/kernel_patches/patches/ext3-extents-2.4.20.patch
@@ -1,8 +1,8 @@
-Index: linux-2.4.20/fs/ext3/extents.c
+Index: linux-2.4.24/fs/ext3/extents.c
  ===================================================================
---- linux-2.4.20.orig/fs/ext3/extents.c        2003-01-30 13:24:37.000000000 +0300
-+++ linux-2.4.20/fs/ext3/extents.c     2004-01-24 14:19:29.000000000 +0300
-@@ -0,0 +1,2224 @@
+--- linux-2.4.24.orig/fs/ext3/extents.c        2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.24/fs/ext3/extents.c     2004-02-06 10:18:42.000000000 +0300
+@@ -0,0 +1,2347 @@
  +/*
  + * Copyright (C) 2003 Alex Tomas <alex@clusterfs.com>
  + *
@@ -88,13 +88,16 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +                              struct ext3_extents_tree *tree,
  +                              struct ext3_ext_path *path)
  +{
++      int err;
++
  +      if (path->p_bh) {
  +              /* path points to block */
-+              return ext3_journal_get_write_access(handle, path->p_bh);
++              err = ext3_journal_get_write_access(handle, path->p_bh);
++      } else {
++              /* path points to leaf/index in inode body */
++              err = ext3_ext_get_access_for_root(handle, tree);
  +      }
-+
-+      /* path points to leaf/index in inode body */
-+      return ext3_ext_get_access_for_root(handle, tree);
++      return err;
  +}
  +
  +/*
@@ -106,13 +109,15 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree,
  +                              struct ext3_ext_path *path)
  +{
++      int err;
  +      if (path->p_bh) {
  +              /* path points to block */
-+              return ext3_journal_dirty_metadata(handle, path->p_bh);
++              err =ext3_journal_dirty_metadata(handle, path->p_bh);
++      } else {
++              /* path points to leaf/index in inode body */
++              err = ext3_ext_mark_root_dirty(handle, tree);
  +      }
-+
-+      /* path points to leaf/index in inode body */
-+      return ext3_ext_mark_root_dirty(handle, tree);
++      return err;
  +}
  +
  +static int inline
@@ -148,6 +153,13 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      return newblock;
  +}
  +
++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
++{
++      struct ext3_extent_header *neh;
++      neh = EXT_ROOT_HDR(tree);
++      neh->e_generation++;
++}
++
  +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
  +{
  +      int size;
@@ -268,6 +280,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      struct ext3_extent_idx *ix;
  +      int l = 0, k, r;
  +
++      EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC);
  +      EXT_ASSERT(eh->e_num <= eh->e_max);
  +      EXT_ASSERT(eh->e_num > 0);
  +
@@ -303,6 +316,12 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +
  +              chix = ix = EXT_FIRST_INDEX(eh);
  +              for (k = 0; k < eh->e_num; k++, ix++) {
++                      if (k != 0 && ix->e_block <= ix[-1].e_block) {
++                              printk("k=%d, ix=0x%p, first=0x%p\n", k,
++                                      ix, EXT_FIRST_INDEX(eh));
++                              printk("%u <= %u\n",
++                                      ix->e_block,ix[-1].e_block);
++                      }
  +                      EXT_ASSERT(k == 0 || ix->e_block > ix[-1].e_block);
  +                      if (block < ix->e_block) 
  +                              break;
@@ -325,6 +344,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      struct ext3_extent *ex;
  +      int l = 0, k, r;
  +
++      EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC);
  +      EXT_ASSERT(eh->e_num <= eh->e_max);
  +
  +      if (eh->e_num == 0) {
@@ -388,6 +408,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      eh = EXT_ROOT_HDR(tree);
  +      eh->e_depth = 0;
  +      eh->e_num = 0;
++      eh->e_magic = EXT3_EXT_MAGIC;
  +      eh->e_max = ext3_ext_space_root(tree);
  +      ext3_ext_mark_root_dirty(handle, tree);
  +      return 0;
@@ -406,8 +427,10 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      EXT_ASSERT(tree->root);
  +
  +      eh = EXT_ROOT_HDR(tree);
++      EXT_ASSERT(eh);
  +      i = depth = EXT_DEPTH(tree);
  +      EXT_ASSERT(eh->e_max);
++      EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC);
  +      EXT_ASSERT(i == 0 || eh->e_num > 0);
  +      
  +      /* account possible depth increase */
@@ -500,6 +523,9 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      ix->e_leaf = ptr;
  +      curp->p_hdr->e_num++;
  +
++      EXT_ASSERT(curp->p_hdr->e_num <= curp->p_hdr->e_max);
++      EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr));
++
  +      err = ext3_ext_dirty(handle, tree, curp);
  +      ext3_std_error(tree->inode->i_sb, err);
  +
@@ -534,7 +560,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +
  +      /* if current leaf will be splitted, then we should use 
  +       * border from split point */
-+      
++      EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr));
  +      if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
  +              border = path[depth].p_ext[1].e_block;
  +              ext_debug(tree, "leaf will be splitted."
@@ -589,6 +615,8 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      neh = EXT_BLOCK_HDR(bh);
  +      neh->e_num = 0;
  +      neh->e_max = ext3_ext_space_block(tree);
++      neh->e_magic = EXT3_EXT_MAGIC;
++      neh->e_depth = 0;
  +      ex = EXT_FIRST_EXTENT(neh);
  +
  +      /* move remain of path[depth] to the new leaf */
@@ -599,10 +627,11 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      path[depth].p_ext++;
  +      while (path[depth].p_ext <=
  +                      EXT_MAX_EXTENT(path[depth].p_hdr)) {
-+              ext_debug(tree, "move %d:%d:%d in new leaf\n",
++              ext_debug(tree, "move %d:%d:%d in new leaf %lu\n",
  +                              path[depth].p_ext->e_block,
  +                              path[depth].p_ext->e_start,
-+                              path[depth].p_ext->e_num);
++                              path[depth].p_ext->e_num,
++                              newblock);
  +              memmove(ex++, path[depth].p_ext++,
  +                              sizeof(struct ext3_extent));
  +              neh->e_num++;
@@ -618,10 +647,10 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +
  +      /* correct old leaf */
  +      if (m) {
-+              if ((err = ext3_ext_get_access(handle, tree, path)))
++              if ((err = ext3_ext_get_access(handle, tree, path + depth)))
  +                      goto cleanup;
  +              path[depth].p_hdr->e_num -= m;
-+              if ((err = ext3_ext_dirty(handle, tree, path)))
++              if ((err = ext3_ext_dirty(handle, tree, path + depth)))
  +                      goto cleanup;
  +              
  +      }
@@ -649,33 +678,33 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +
  +              neh = EXT_BLOCK_HDR(bh);
  +              neh->e_num = 1;
++              neh->e_magic = EXT3_EXT_MAGIC;
  +              neh->e_max = ext3_ext_space_block_idx(tree);
++              neh->e_depth = depth - i; 
  +              fidx = EXT_FIRST_INDEX(neh);
  +              fidx->e_block = border;
  +              fidx->e_leaf = oldblock;
  +
-+              ext_debug(tree, "int.index at %d (block %u): %d -> %d\n",
-+                              i, (unsigned) newblock,
-+                              (int) border,
-+                              (int) oldblock);
++              ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n",
++                              i, newblock, border, oldblock);
  +              /* copy indexes */
  +              m = 0;
  +              path[i].p_idx++;
++
  +              ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx,
  +                              EXT_MAX_INDEX(path[i].p_hdr));
  +              EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
  +                              EXT_LAST_INDEX(path[i].p_hdr));
-+              while (path[i].p_idx <=
-+                              EXT_MAX_INDEX(path[i].p_hdr)) {
-+                      ext_debug(tree, "%d: move %d:%d in new index\n",
++              while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
++                      ext_debug(tree, "%d: move %d:%d in new index %lu\n",
  +                                      i, path[i].p_idx->e_block,
-+                                      path[i].p_idx->e_leaf);
++                                      path[i].p_idx->e_leaf, newblock);
  +                      memmove(++fidx, path[i].p_idx++,
  +                                      sizeof(struct ext3_extent_idx));
  +                      neh->e_num++;
++                      EXT_ASSERT(neh->e_num <= neh->e_max);
  +                      m++;
  +              }
-+
  +              mark_buffer_uptodate(bh, 1);
  +              unlock_buffer(bh);
  +
@@ -734,12 +763,12 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +                                      struct ext3_ext_path *path,
  +                                      struct ext3_extent *newext)
  +{
-+      struct buffer_head *bh;
  +      struct ext3_ext_path *curp = path;
  +      struct ext3_extent_header *neh;
  +      struct ext3_extent_idx *fidx;
-+      int len, err = 0;
++      struct buffer_head *bh;
  +      unsigned long newblock;
++      int err = 0;
  +
  +      newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
  +      if (newblock == 0)
@@ -759,14 +788,17 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      }
  +
  +      /* move top-level index/leaf into new block */
-+      len = sizeof(struct ext3_extent_header) +
-+              sizeof(struct ext3_extent) * curp->p_hdr->e_max;
-+      EXT_ASSERT(len >= 0 && len < 4096);
-+      memmove(bh->b_data, curp->p_hdr, len);
++      memmove(bh->b_data, curp->p_hdr, tree->buffer_len);
  +
  +      /* set size of new block */
  +      neh = EXT_BLOCK_HDR(bh);
-+      neh->e_max = ext3_ext_space_block(tree);
++      /* old root could have indexes or leaves
++       * so calculate e_max right way */
++      if (EXT_DEPTH(tree))
++              neh->e_max = ext3_ext_space_block_idx(tree);
++      else
++              neh->e_max = ext3_ext_space_block(tree);
++      neh->e_magic = EXT3_EXT_MAGIC;
  +      mark_buffer_uptodate(bh, 1);
  +      unlock_buffer(bh);
  +
@@ -777,9 +809,11 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      if ((err = ext3_ext_get_access(handle, tree, curp)))
  +              goto out;
  +
++      curp->p_hdr->e_magic = EXT3_EXT_MAGIC;
  +      curp->p_hdr->e_max = ext3_ext_space_root_idx(tree);
  +      curp->p_hdr->e_num = 1;
  +      curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
++      /* FIXME: it works, but actually path[0] can be index */
  +      curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block;
  +      curp->p_idx->e_leaf = newblock;
  +
@@ -839,7 +873,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              path = ext3_ext_find_extent(tree, newext->e_block, path);
  +              if (IS_ERR(path))
  +                      err = PTR_ERR(path);
-+      
++
  +              /*
  +               * only first (depth 0 -> 1) produces free space
  +               * in all other cases we have to split growed tree
@@ -1003,15 +1037,15 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +                              struct ext3_ext_path *path,
  +                              struct ext3_extent *newext)
  +{
-+      int depth, len;
  +      struct ext3_extent_header * eh;
-+      struct ext3_extent *ex;
++      struct ext3_extent *ex, *fex;
  +      struct ext3_extent *nearex; /* nearest extent */
  +      struct ext3_ext_path *npath = NULL;
-+      int err;
++      int depth, len, err, next;
  +
  +      depth = EXT_DEPTH(tree);
  +      ex = path[depth].p_ext;
++      EXT_ASSERT(path[depth].p_hdr);
  +
  +      /* try to insert block into found extent and return */
  +      if (ex && ext3_can_extents_be_merged(tree, ex, newext)) {
@@ -1021,43 +1055,49 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              if ((err = ext3_ext_get_access(handle, tree, path + depth)))
  +                      return err;
  +              ex->e_num += newext->e_num;
-+              err = ext3_ext_dirty(handle, tree, path + depth);
-+              return err;
++              eh = path[depth].p_hdr;
++              nearex = ex;
++              goto merge;
  +      }
  +
  +repeat:
  +      depth = EXT_DEPTH(tree);
  +      eh = path[depth].p_hdr;
-+      if (eh->e_num == eh->e_max) {
-+              /* probably next leaf has space for us? */
-+              int next = ext3_ext_next_leaf_block(tree, path);
-+              if (next != 0xffffffff) {
-+                      ext_debug(tree, "next leaf block - %d\n", next);
-+                      EXT_ASSERT(!npath);
-+                      npath = ext3_ext_find_extent(tree, next, NULL);
-+                      if (IS_ERR(npath))
-+                              return PTR_ERR(npath);
-+                      EXT_ASSERT(npath->p_depth == path->p_depth);
-+                      eh = npath[depth].p_hdr;
-+                      if (eh->e_num < eh->e_max) {
-+                              ext_debug(tree, "next leaf isnt full(%d)\n",
-+                                              eh->e_num);
-+                              path = npath;
-+                              goto repeat;
-+                      }
-+                      ext_debug(tree, "next leaf hasno free space(%d,%d)\n",
-+                                      eh->e_num, eh->e_max);
++      if (eh->e_num < eh->e_max)
++              goto has_space;
++
++      /* probably next leaf has space for us? */
++      fex = EXT_LAST_EXTENT(eh);
++      next = ext3_ext_next_leaf_block(tree, path);
++      if (newext->e_block > fex->e_block && next != 0xffffffff) {
++              ext_debug(tree, "next leaf block - %d\n", next);
++              EXT_ASSERT(!npath);
++              npath = ext3_ext_find_extent(tree, next, NULL);
++              if (IS_ERR(npath))
++                      return PTR_ERR(npath);
++              EXT_ASSERT(npath->p_depth == path->p_depth);
++              eh = npath[depth].p_hdr;
++              if (eh->e_num < eh->e_max) {
++                      ext_debug(tree, "next leaf isnt full(%d)\n",
++                                      eh->e_num);
++                      path = npath;
++                      goto repeat;
  +              }
-+              /*
-+               * there is no free space in found leaf
-+               * we're gonna add new leaf in the tree
-+               */
-+              err = ext3_ext_create_new_leaf(handle, tree, path, newext);
-+              if (err)
-+                      goto cleanup;
-+              goto repeat;
++              ext_debug(tree, "next leaf hasno free space(%d,%d)\n",
++                              eh->e_num, eh->e_max);
  +      }
  +
++      /*
++       * there is no free space in found leaf
++       * we're gonna add new leaf in the tree
++       */
++      err = ext3_ext_create_new_leaf(handle, tree, path, newext);
++      if (err)
++              goto cleanup;
++      depth = EXT_DEPTH(tree);
++      eh = path[depth].p_hdr;
++
++has_space:
  +      nearex = path[depth].p_ext;
  +
  +      if ((err = ext3_ext_get_access(handle, tree, path + depth)))
@@ -1091,22 +1131,39 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +                              "move %d from 0x%p to 0x%p\n",
  +                              newext->e_block, newext->e_start, newext->e_num,
  +                              nearex, len, nearex + 1, nearex + 2);
-+              
  +              memmove(nearex + 1, nearex, len);
  +              path[depth].p_ext = nearex;
  +      }
  +
-+      if (!err) {
-+              eh->e_num++;
-+              nearex = path[depth].p_ext;
-+              nearex->e_block = newext->e_block;
-+              nearex->e_start = newext->e_start;
-+              nearex->e_num = newext->e_num;
-+
-+              /* time to correct all indexes above */
-+              err = ext3_ext_correct_indexes(handle, tree, path);
++      eh->e_num++;
++      nearex = path[depth].p_ext;
++      nearex->e_block = newext->e_block;
++      nearex->e_start = newext->e_start;
++      nearex->e_num = newext->e_num;
++
++merge:
++      /* try to merge extents to the right */
++      while (nearex < EXT_LAST_EXTENT(eh)) {
++              if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1))
++                      break;
++              /* merge with next extent! */
++              nearex->e_num += nearex[1].e_num;
++              if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
++                      len = (EXT_LAST_EXTENT(eh) - nearex - 1)
++                                      * sizeof(struct ext3_extent);
++                      memmove(nearex + 1, nearex + 2, len);
++              }
++              eh->e_num--;
++              EXT_ASSERT(eh->e_num > 0);
  +      }
  +
++      /* try to merge extents to the left */
++
++      /* time to correct all indexes above */
++      err = ext3_ext_correct_indexes(handle, tree, path);
++      if (err)
++              goto cleanup;
++
  +      err = ext3_ext_dirty(handle, tree, path + depth);
  +
  +cleanup:
@@ -1114,7 +1171,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              ext3_ext_drop_refs(npath);
  +              kfree(npath);
  +      }
-+              
++      ext3_ext_tree_changed(tree);
  +      return err;
  +}
  +
@@ -1124,6 +1181,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      struct ext3_ext_path *path = NULL;
  +      struct ext3_extent *ex, cbex;
  +      unsigned long next, start = 0, end = 0;
++      unsigned long last = block + num;
  +      int depth, exists, err = 0;
  +
  +      EXT_ASSERT(tree);
@@ -1131,15 +1189,18 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      EXT_ASSERT(tree->inode);
  +      EXT_ASSERT(tree->root);
  +
-+      while (num > 0 && block != 0xfffffffff) {
++      while (block < last && block != 0xfffffffff) {
++              num = last - block;
  +              /* find extent for this block */
  +              path = ext3_ext_find_extent(tree, block, path);
  +              if (IS_ERR(path)) {
  +                      err = PTR_ERR(path);
++                      path = NULL;
  +                      break;
  +              }
  +
  +              depth = EXT_DEPTH(tree);
++              EXT_ASSERT(path[depth].p_hdr);
  +              ex = path[depth].p_ext;
  +              next = ext3_ext_next_allocated_block(path);
  +
@@ -1148,58 +1209,61 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +                      /* there is no extent yet, so try to allocate
  +                       * all requested space */
  +                      start = block;
-+                      end = block + num - 1;
++                      end = block + num;
  +              } else if (ex->e_block > block) {
  +                      /* need to allocate space before found extent */
  +                      start = block;
-+                      end = ex->e_block - 1;
-+                      if (block + num - 1 < end)
-+                              end = block + num - 1;
++                      end = ex->e_block;
++                      if (block + num < end)
++                              end = block + num;
  +              } else if (block >= ex->e_block + ex->e_num) {
  +                      /* need to allocate space after found extent */
  +                      start = block;
-+                      end = block + num - 1;
++                      end = block + num;
  +                      if (end >= next)
-+                              end = next - 1;
++                              end = next;
  +              } else if (block >= ex->e_block) {
  +                      /* 
  +                       * some part of requested space is covered
  +                       * by found extent
  +                       */
  +                      start = block;
-+                      end = ex->e_block + ex->e_num - 1;
-+                      if (block + num - 1 < end)
-+                              end = block + num - 1;
++                      end = ex->e_block + ex->e_num;
++                      if (block + num < end)
++                              end = block + num;
  +                      exists = 1;
  +              } else {
  +                      BUG();
  +              }
++              EXT_ASSERT(end > start);
  +
  +              if (!exists) {
  +                      cbex.e_block = start;
-+                      cbex.e_num = end - start + 1;
++                      cbex.e_num = end - start;
  +                      cbex.e_start = 0;
  +              } else
  +                      cbex = *ex;
  +
++              EXT_ASSERT(path[depth].p_hdr);
  +              err = func(tree, path, &cbex, exists);
++              ext3_ext_drop_refs(path);
++
  +              if (err < 0)
  +                      break;
-+
-+              if (err == EXT_BREAK) {
++              if (err == EXT_REPEAT)
++                      continue;
++              else if (err == EXT_BREAK) {
  +                      err = 0;
  +                      break;
  +              }
  +
  +              if (EXT_DEPTH(tree) != depth) {
  +                      /* depth was changed. we have to realloc path */
-+                      ext3_ext_drop_refs(path);
  +                      kfree(path);
  +                      path = NULL;
  +              }
  +
-+              block += cbex.e_num;
-+              num -= cbex.e_num;
++              block = cbex.e_block + cbex.e_num;
  +      }
  +
  +      if (path) {
@@ -1241,6 +1305,9 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      int depth = EXT_DEPTH(tree);
  +      struct ext3_extent *ex, gex;
  +
++      if (!tree->cex)
++              return;
++
  +      ex = path[depth].p_ext;
  +      if (ex == NULL) {
  +              /* there is no extent yet, so gap is [0;-] */
@@ -1291,7 +1358,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              ex->e_block = cex->e_block;
  +              ex->e_start = cex->e_start;
  +              ex->e_num = cex->e_num;
-+              ext_debug(tree, "%lu cached by %lu:%lu:%lu(gap)\n",
++              ext_debug(tree, "%lu cached by %lu:%lu:%lu\n",
  +                              (unsigned long) block,
  +                              (unsigned long) ex->e_block,
  +                              (unsigned long) ex->e_num,
@@ -1325,7 +1392,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      ext_debug(tree, "index is empty, remove it, free block %d\n",
  +                      path->p_idx->e_leaf);
  +      bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->e_leaf);
-+      ext3_forget(handle, 0, tree->inode, bh, path->p_idx->e_leaf);
++      ext3_forget(handle, 1, tree->inode, bh, path->p_idx->e_leaf);
  +      ext3_free_blocks(handle, tree->inode, path->p_idx->e_leaf, 1);
  +      return err;
  +}
@@ -1437,6 +1504,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      eh = path[depth].p_hdr;
  +      EXT_ASSERT(eh);
  +      EXT_ASSERT(eh->e_num <= eh->e_max);
++      EXT_ASSERT(eh->e_magic == EXT3_EXT_MAGIC);
  +      
  +      /* find where to start removing */
  +      le = ex = EXT_LAST_EXTENT(eh);
@@ -1638,6 +1706,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              }
  +
  +              EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max);
++              EXT_ASSERT(path[i].p_hdr->e_magic == EXT3_EXT_MAGIC);
  +              
  +              if (!path[i].p_idx) {
  +                      /* this level hasn't touched yet */
@@ -1696,6 +1765,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +                      err = ext3_ext_dirty(handle, tree, path);
  +              }
  +      }
++      ext3_ext_tree_changed(tree);
  +
  +      kfree(path);
  +      ext3_journal_stop(handle, inode);
@@ -1712,8 +1782,16 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +       * possible initialization would be here
  +       */
  +
-+      if (test_opt(sb, EXTENTS))
-+              printk("EXT3-fs: file extents enabled\n");
++      if (test_opt(sb, EXTENTS)) {
++              printk("EXT3-fs: file extents enabled");
++#ifdef AGRESSIVE_TEST
++              printk(", agressive tests");
++#endif
++#ifdef CHECK_BINSEARCH
++              printk(", check binsearch");
++#endif
++              printk("\n");
++      }
  +}
  +
  +/*
@@ -1736,8 +1814,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer)
  +{
  +      struct inode *inode = buffer;
-+      ext3_mark_inode_dirty(handle, inode);
-+      return 0;
++      return ext3_mark_inode_dirty(handle, inode);
  +}
  +
  +static int ext3_ext_mergable(struct ext3_extent *ex1,
@@ -1771,6 +1848,8 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +{
  +      int needed = ext3_remove_blocks_credits(tree, ex, from, to);
  +      handle_t *handle = ext3_journal_start(tree->inode, needed);
++      struct buffer_head *bh;
++      int i;
  +
  +      if (IS_ERR(handle))
  +              return PTR_ERR(handle);
@@ -1781,6 +1860,10 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              start = ex->e_start + ex->e_num - num;
  +              ext_debug(tree, "free last %lu blocks starting %lu\n",
  +                              num, start);
++              for (i = 0; i < num; i++) {
++                      bh = sb_get_hash_table(tree->inode->i_sb, start + i);
++                      ext3_forget(handle, 0, tree->inode, bh, start + i);
++              }
  +              ext3_free_blocks(handle, tree->inode, start, num);
  +      } else if (from == ex->e_block && to <= ex->e_block + ex->e_num - 1) {
  +              printk("strange request: removal %lu-%lu from %u:%u\n",
@@ -1793,8 +1876,8 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      return 0;
  +}
  +
-+static int ext3_ext_find_goal(struct inode *inode,
-+                              struct ext3_ext_path *path)
++static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path,
++                              unsigned long block)
  +{
  +      struct ext3_inode_info *ei = EXT3_I(inode);
  +      unsigned long bg_start;
@@ -1802,12 +1885,13 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      int depth;
  +      
  +      if (path) {
++              struct ext3_extent *ex;
  +              depth = path->p_depth;
-+              /* try to find previous block */
-+              if (path[depth].p_ext)
-+                      return path[depth].p_ext->e_start +
-+                              path[depth].p_ext->e_num - 1;
  +              
++              /* try to predict block placement */
++              if ((ex = path[depth].p_ext))
++                      return ex->e_start + (block - ex->e_block);
++
  +              /* it looks index is empty
  +               * try to find starting from index itself */
  +              if (path[depth].p_bh)
@@ -1819,7 +1903,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
  +      colour = (current->pid % 16) *
  +                      (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
-+      return bg_start + colour;
++      return bg_start + colour + block;
  +}
  +
  +static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree,
@@ -1840,7 +1924,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      if (ex->e_num == 0) {
  +              ex->e_num = 1;
  +              /* allocate new block for the extent */
-+              goal = ext3_ext_find_goal(inode, path);
++              goal = ext3_ext_find_goal(inode, path, ex->e_block);
  +              ex->e_start = ext3_new_block(handle, inode, goal, 0, 0, err);
  +              if (ex->e_start == 0) {
  +                      /* error occured: restore old extent */
@@ -1864,39 +1948,51 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      tree->remove_extent_credits = ext3_remove_blocks_credits;
  +      tree->buffer = (void *) inode;
  +      tree->buffer_len = sizeof(EXT3_I(inode)->i_data);
-+      tree->cex = NULL;       /* FIXME: add cache store later */
++      tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent;
  +}
  +
-+#if 0
++#if EXT3_MULTIBLOCK_ALLOCATOR
  +static int
  +ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
  +                      struct ext3_ext_path *path,
  +                      struct ext3_extent *newex, int exist)
  +{
  +      struct inode *inode = tree->inode;
++      struct buffer_head *bh;
  +      int count, err, goal;
++      unsigned long pblock;
++      unsigned long tgen;
  +      loff_t new_i_size;
  +      handle_t *handle;
-+      unsigned long pblock;
++      int i;
  +
  +      if (exist)
  +              return EXT_CONTINUE;
  +
++      tgen = EXT_GENERATION(tree);
  +      count = ext3_ext_calc_credits_for_insert(tree, path);
++      up_write(&EXT3_I(inode)->truncate_sem);
++
  +      handle = ext3_journal_start(inode, count + EXT3_ALLOC_NEEDED + 1);
-+      if (IS_ERR(handle))
++      if (IS_ERR(handle)) {
++              down_write(&EXT3_I(inode)->truncate_sem);
  +              return PTR_ERR(handle);
++      }
  +
-+      goal = ext3_ext_find_goal(inode, path);
++      if (tgen != EXT_GENERATION(tree)) {
++              /* the tree has changed. so path can be invalid at moment */
++              ext3_journal_stop(handle, inode);
++              down_write(&EXT3_I(inode)->truncate_sem);
++              return EXT_REPEAT;
++      }
++
++      down_write(&EXT3_I(inode)->truncate_sem);
++      goal = ext3_ext_find_goal(inode, path, newex->e_block);
  +      count = newex->e_num;
-+#ifdef EXT3_MULTIBLOCK_ALLOCATOR
-+      pblock = ext3_new_block(handle, inode, goal, &count, NULL, &err);
-+      EXT_ASSERT(count <= num);
-+      /* FIXME: error handling here */
-+      EXT_ASSERT(err == 0);
-+#else
-+      pblock = 0;
-+#endif
++      pblock = ext3_new_blocks(handle, inode, &count, goal, &err);
++      if (!pblock)
++              goto out;
++      EXT_ASSERT(count <= newex->e_num);
  +
  +      /* insert new extent */
  +      newex->e_start = pblock;
@@ -1905,12 +2001,22 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      if (err)
  +              goto out;
  +
++      /* block have been allocated for data, so time to drop dirty
++       * in correspondend buffer_heads to prevent corruptions */
++      for (i = 0; i < newex->e_num; i++) {
++              bh = sb_get_hash_table(inode->i_sb, newex->e_start + i);
++              if (bh) {
++                      mark_buffer_clean(bh);
++                      wait_on_buffer(bh);
++                      clear_bit(BH_Req, &bh->b_state);
++                      __brelse(bh);
++              }
++      }
++
  +      /* correct on-disk inode size */
  +      if (newex->e_num > 0) {
  +              new_i_size = (loff_t) newex->e_block + newex->e_num;
  +              new_i_size = new_i_size << inode->i_blkbits;
-+              if (new_i_size > i_size_read(inode))
-+                      new_i_size = i_size_read(inode);
  +              if (new_i_size > EXT3_I(inode)->i_disksize) {
  +                      EXT3_I(inode)->i_disksize = new_i_size;
  +                      err = ext3_mark_inode_dirty(handle, inode);
@@ -1929,14 +2035,13 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      struct ext3_extents_tree tree;
  +      int err;
  +
++      ext3_init_tree_desc(&tree, inode);
  +      ext_debug(&tree, "blocks %lu-%lu requested for inode %u\n",
  +                      block, block + num,(unsigned) inode->i_ino);
-+
-+      ext3_init_tree_desc(&tree, inode);
-+      down(&EXT3_I(inode)->truncate_sem);
++      down_write(&EXT3_I(inode)->truncate_sem);
  +      err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb);
  +      ext3_ext_invalidate_cache(&tree);
-+      up(&EXT3_I(inode)->truncate_sem);
++      up_write(&EXT3_I(inode)->truncate_sem);
  +
  +      return err;
  +}
@@ -1974,6 +2079,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      path = ext3_ext_find_extent(&tree, iblock, NULL);
  +      if (IS_ERR(path)) {
  +              err = PTR_ERR(path);
++              path = NULL;
  +              goto out2;
  +      }
  +
@@ -2009,7 +2115,7 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +      }
  +
  +      /* allocate new block */
-+      goal = ext3_ext_find_goal(inode, path);
++      goal = ext3_ext_find_goal(inode, path, iblock);
  +      newblock = ext3_new_block(handle, inode, goal, 0, 0, &err);
  +      if (!newblock)
  +              goto out2;
@@ -2190,6 +2296,9 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +{
  +      int err = 0;
  +
++      if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
++              return -EINVAL;
++
  +      if (cmd == EXT3_IOC_GET_EXTENTS) {
  +              struct ext3_extent_buf buf;
  +              struct ext3_extents_tree tree;
@@ -2201,8 +2310,10 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              buf.cur = buf.buffer;
  +              buf.err = 0;
  +              tree.private = &buf;
++              down_write(&EXT3_I(inode)->truncate_sem);
  +              err = ext3_ext_walk_space(&tree, buf.start, 0xffffffff,
  +                                              ext3_ext_store_extent_cb);
++              up_write(&EXT3_I(inode)->truncate_sem);
  +              if (err == 0)
  +                      err = buf.err;
  +      } else if (cmd == EXT3_IOC_GET_TREE_STATS) {
@@ -2210,28 +2321,40 @@ Index: linux-2.4.20/fs/ext3/extents.c
  +              struct ext3_extents_tree tree;
  +
  +              ext3_init_tree_desc(&tree, inode);
++              down_write(&EXT3_I(inode)->truncate_sem);
  +              buf.depth = EXT_DEPTH(&tree);
  +              buf.extents_num = 0;
  +              buf.leaf_num = 0;
  +              tree.private = &buf;
  +              err = ext3_ext_walk_space(&tree, 0, 0xffffffff,
  +                                              ext3_ext_collect_stats_cb);
++              up_write(&EXT3_I(inode)->truncate_sem);
  +              if (!err)
  +                      err = copy_to_user((void *) arg, &buf, sizeof(buf));
  +      } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) {
  +              struct ext3_extents_tree tree;
  +              ext3_init_tree_desc(&tree, inode);
++              down_write(&EXT3_I(inode)->truncate_sem);
  +              err = EXT_DEPTH(&tree);
++              up_write(&EXT3_I(inode)->truncate_sem);
  +      }
  +
  +      return err;
  +}
  +
-Index: linux-2.4.20/fs/ext3/ialloc.c
++EXPORT_SYMBOL(ext3_init_tree_desc);
++EXPORT_SYMBOL(ext3_mark_inode_dirty);
++EXPORT_SYMBOL(ext3_ext_invalidate_cache);
++EXPORT_SYMBOL(ext3_ext_insert_extent);
++EXPORT_SYMBOL(ext3_ext_walk_space);
++EXPORT_SYMBOL(ext3_ext_find_goal);
++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert);
++
+Index: linux-2.4.24/fs/ext3/ialloc.c
  ===================================================================
---- linux-2.4.20.orig/fs/ext3/ialloc.c 2004-01-23 19:00:25.000000000 +0300
-+++ linux-2.4.20/fs/ext3/ialloc.c      2004-01-24 00:45:20.000000000 +0300
-@@ -593,11 +593,13 @@
+--- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-01-14 02:58:45.000000000 +0300
++++ linux-2.4.24/fs/ext3/ialloc.c      2004-01-26 23:17:19.000000000 +0300
+@@ -592,11 +592,13 @@
                 iloc.bh = NULL;
                 goto fail;
         }
@@ -2247,10 +2370,10 @@ Index: linux-2.4.20/fs/ext3/ialloc.c
         unlock_super (sb);
         if(DQUOT_ALLOC_INODE(inode)) {
                 DQUOT_DROP(inode);
-Index: linux-2.4.20/fs/ext3/inode.c
+Index: linux-2.4.24/fs/ext3/inode.c
  ===================================================================
---- linux-2.4.20.orig/fs/ext3/inode.c  2004-01-23 19:00:25.000000000 +0300
-+++ linux-2.4.20/fs/ext3/inode.c       2004-01-24 04:34:04.000000000 +0300
+--- linux-2.4.24.orig/fs/ext3/inode.c  2004-01-14 02:58:45.000000000 +0300
++++ linux-2.4.24/fs/ext3/inode.c       2004-01-26 23:17:19.000000000 +0300
  @@ -848,6 +848,15 @@
         goto reread;
   }
@@ -2304,7 +2427,7 @@ Index: linux-2.4.20/fs/ext3/inode.c
         handle = start_transaction(inode);
         if (IS_ERR(handle))
                 return;         /* AKPM: return what? */
-@@ -2537,6 +2549,9 @@
+@@ -2536,6 +2548,9 @@
         int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
         int ret;
         
@@ -2314,7 +2437,7 @@ Index: linux-2.4.20/fs/ext3/inode.c
         if (ext3_should_journal_data(inode))
                 ret = 3 * (bpp + indirects) + 2;
         else
-@@ -2973,7 +2988,7 @@
+@@ -2972,7 +2987,7 @@
   
         /* alloc blocks one by one */
         for (i = 0; i < nblocks; i++) {
@@ -2323,7 +2446,7 @@ Index: linux-2.4.20/fs/ext3/inode.c
                                                 &bh_tmp, 1);
                 if (ret)
                         break;
-@@ -3049,7 +3064,7 @@
+@@ -3048,7 +3063,7 @@
                   if (blocks[i] != 0)
                           continue;
   
@@ -2332,24 +2455,26 @@ Index: linux-2.4.20/fs/ext3/inode.c
                   if (rc) {
                           printk(KERN_INFO "ext3_map_inode_page: error %d "
                                  "allocating block %ld\n", rc, iblock);
-Index: linux-2.4.20/fs/ext3/Makefile
+Index: linux-2.4.24/fs/ext3/Makefile
  ===================================================================
---- linux-2.4.20.orig/fs/ext3/Makefile 2004-01-23 19:00:42.000000000 +0300
-+++ linux-2.4.20/fs/ext3/Makefile      2004-01-24 00:45:20.000000000 +0300
-@@ -13,7 +13,7 @@
+--- linux-2.4.24.orig/fs/ext3/Makefile 2004-01-14 02:58:45.000000000 +0300
++++ linux-2.4.24/fs/ext3/Makefile      2004-02-05 18:44:25.000000000 +0300
+@@ -13,7 +13,9 @@
   
   obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
                 ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \
  -              xattr_trusted.o
  +              xattr_trusted.o extents.o
++export-objs += extents.o
++
   obj-m    := $(O_TARGET)
   
   export-objs += xattr.o
-Index: linux-2.4.20/fs/ext3/super.c
+Index: linux-2.4.24/fs/ext3/super.c
  ===================================================================
---- linux-2.4.20.orig/fs/ext3/super.c  2004-01-23 19:00:25.000000000 +0300
-+++ linux-2.4.20/fs/ext3/super.c       2004-01-24 04:30:14.000000000 +0300
-@@ -623,6 +623,7 @@
+--- linux-2.4.24.orig/fs/ext3/super.c  2004-01-14 02:58:45.000000000 +0300
++++ linux-2.4.24/fs/ext3/super.c       2004-01-26 23:17:19.000000000 +0300
+@@ -530,6 +530,7 @@
         int i;
   
         J_ASSERT(sbi->s_delete_inodes == 0);
@@ -2357,7 +2482,7 @@ Index: linux-2.4.20/fs/ext3/super.c
         ext3_xattr_put_super(sb);
         journal_destroy(sbi->s_journal);
         if (!(sb->s_flags & MS_RDONLY)) {
-@@ -796,6 +797,10 @@
+@@ -702,6 +703,10 @@
                                 return 0;
                         }
                 }
@@ -2368,7 +2493,7 @@ Index: linux-2.4.20/fs/ext3/super.c
                 else if (!strcmp (this_char, "grpid") ||
                          !strcmp (this_char, "bsdgroups"))
                         set_opt (*mount_options, GRPID);
-@@ -1485,6 +1490,8 @@
+@@ -1392,6 +1397,8 @@
                 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
                 "writeback");
   
@@ -2377,11 +2502,11 @@ Index: linux-2.4.20/fs/ext3/super.c
         return sb;
   
   failed_mount3:
-Index: linux-2.4.20/fs/ext3/ioctl.c
+Index: linux-2.4.24/fs/ext3/ioctl.c
  ===================================================================
---- linux-2.4.20.orig/fs/ext3/ioctl.c  2004-01-13 17:00:09.000000000 +0300
-+++ linux-2.4.20/fs/ext3/ioctl.c       2004-01-24 14:54:31.000000000 +0300
-@@ -189,6 +189,10 @@
+--- linux-2.4.24.orig/fs/ext3/ioctl.c  2004-01-14 02:58:42.000000000 +0300
++++ linux-2.4.24/fs/ext3/ioctl.c       2004-01-26 23:17:19.000000000 +0300
+@@ -174,6 +174,10 @@
                         return ret;
                 }
   #endif
@@ -2392,10 +2517,10 @@ Index: linux-2.4.20/fs/ext3/ioctl.c
         default:
                 return -ENOTTY;
         }
-Index: linux-2.4.20/include/linux/ext3_fs.h
+Index: linux-2.4.24/include/linux/ext3_fs.h
  ===================================================================
---- linux-2.4.20.orig/include/linux/ext3_fs.h  2004-01-23 19:00:25.000000000 +0300
-+++ linux-2.4.20/include/linux/ext3_fs.h       2004-01-24 01:28:06.000000000 +0300
+--- linux-2.4.24.orig/include/linux/ext3_fs.h  2004-01-14 02:58:45.000000000 +0300
++++ linux-2.4.24/include/linux/ext3_fs.h       2004-01-30 00:09:37.000000000 +0300
  @@ -184,6 +184,7 @@
   #define EXT3_IMAGIC_FL                        0x00002000 /* AFS directory */
   #define EXT3_JOURNAL_DATA_FL          0x00004000 /* file data should be journaled */
@@ -2423,7 +2548,7 @@ Index: linux-2.4.20/include/linux/ext3_fs.h
   
   /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
   #ifndef _LINUX_EXT2_FS_H
-@@ -687,6 +693,7 @@
+@@ -688,6 +694,7 @@
   extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
   
   /* inode.c */
@@ -2431,7 +2556,7 @@ Index: linux-2.4.20/include/linux/ext3_fs.h
   extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
   extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
   extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -767,6 +774,14 @@
+@@ -769,6 +776,14 @@
   extern struct inode_operations ext3_symlink_inode_operations;
   extern struct inode_operations ext3_fast_symlink_inode_operations;
   
@@ -2446,11 +2571,11 @@ Index: linux-2.4.20/include/linux/ext3_fs.h
   
   #endif        /* __KERNEL__ */
   
-Index: linux-2.4.20/include/linux/ext3_extents.h
+Index: linux-2.4.24/include/linux/ext3_extents.h
  ===================================================================
---- linux-2.4.20.orig/include/linux/ext3_extents.h     2003-01-30 13:24:37.000000000 +0300
-+++ linux-2.4.20/include/linux/ext3_extents.h  2004-01-24 15:15:11.000000000 +0300
-@@ -0,0 +1,207 @@
+--- linux-2.4.24.orig/include/linux/ext3_extents.h     2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.24/include/linux/ext3_extents.h  2004-02-05 20:31:08.000000000 +0300
+@@ -0,0 +1,216 @@
  +/*
  + * Copyright (C) 2003 Alex Tomas <alex@clusterfs.com>
  + *
@@ -2468,6 +2593,8 @@ Index: linux-2.4.20/include/linux/ext3_extents.h
  + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
  + */
  +
++#ifndef _LINUX_EXT3_EXTENTS
++#define _LINUX_EXT3_EXTENTS
  +
  +/*
  + * with AGRESSIVE_TEST defined capacity of index/leaf blocks
@@ -2505,7 +2632,7 @@ Index: linux-2.4.20/include/linux/ext3_extents.h
  +#define EXT_STATS_
  +
  +
-+#define EXT3_ALLOC_NEEDED     2       /* block bitmap + group descriptor */
++#define EXT3_ALLOC_NEEDED     3       /* block bitmap + group desc. + sb */
  +
  +/*
  + * ext3_inode has i_block array (total 60 bytes)
@@ -2542,8 +2669,11 @@ Index: linux-2.4.20/include/linux/ext3_extents.h
  +      __u16   e_num;          /* number of valid entries */
  +      __u16   e_max;          /* capacity of store in entries */
  +      __u16   e_depth;        /* has tree real underlaying blocks? */
++      __u32   e_generation;   /* generation of the tree */
  +};
  +
++#define EXT3_EXT_MAGIC                0xf301
++
  +/*
  + * array of ext3_ext_path contains path to some extent
  + * creation/lookup routines use it for traversal/splitting/etc
@@ -2600,6 +2730,7 @@ Index: linux-2.4.20/include/linux/ext3_extents.h
  +
  +#define EXT_CONTINUE  0
  +#define EXT_BREAK     1
++#define EXT_REPEAT    2
  +
  +
  +#define EXT_FIRST_EXTENT(__hdr__) \
@@ -2625,6 +2756,8 @@ Index: linux-2.4.20/include/linux/ext3_extents.h
  +      ((struct ext3_extent_header *) (bh)->b_data)
  +#define EXT_DEPTH(_t_)        \
  +      (((struct ext3_extent_header *)((_t_)->root))->e_depth)
++#define EXT_GENERATION(_t_)   \
++      (((struct ext3_extent_header *)((_t_)->root))->e_generation)
  +
  +
  +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
@@ -2657,4 +2790,18 @@ Index: linux-2.4.20/include/linux/ext3_extents.h
  +extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long);
  +extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *);
  +
++#endif /* _LINUX_EXT3_EXTENTS */
  +
+Index: linux-2.4.24/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.4.24.orig/include/linux/ext3_fs_i.h        2004-01-24 19:30:22.000000000 +0300
++++ linux-2.4.24/include/linux/ext3_fs_i.h     2004-01-26 23:17:19.000000000 +0300
+@@ -76,6 +76,8 @@
+        * by other means, so we have truncate_sem.
+        */
+       struct rw_semaphore truncate_sem;
++
++      __u32 i_cached_extent[3];
+ };
+ 
+ #endif        /* _LINUX_EXT3_FS_I */
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch

index 43abf92..98fd550 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch
@@ -1621,9 +1621,9 @@ Index: linux-2.4.19-pre1/include/linux/dcache.h
  +      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
   };
   
-+#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++#define PIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_pin) \
  +                              de->d_op->d_pin(de, mnt, flag);
-+#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++#define UNPIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_unpin) \
  +                              de->d_op->d_unpin(de, mnt, flag);
  +
  +
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch

index 4e37bb8..b49babd 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch
@@ -1601,9 +1601,9 @@ Index: linux-2.4.19.SuSE/include/linux/dcache.h
  +      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
   };
   
-+#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++#define PIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_pin) \
  +                              de->d_op->d_pin(de, mnt, flag);
-+#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++#define UNPIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_unpin) \
  +                              de->d_op->d_unpin(de, mnt, flag);
  +
  +
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch

index 5f266a8..ace8619 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch
@@ -1583,9 +1583,9 @@
  +      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
   };
   
-+#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++#define PIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_pin) \
  +                              de->d_op->d_pin(de, mnt, flag);
-+#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++#define UNPIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_unpin) \
  +                              de->d_op->d_unpin(de, mnt, flag);
  +
  +
diff --git a/lustre/kernel_patches/targets/rh-2.4.target b/lustre/kernel_patches/targets/rh-2.4.target

index 47585d9..275cfff 100644 (file)
--- a/lustre/kernel_patches/targets/rh-2.4.target
+++ b/lustre/kernel_patches/targets/rh-2.4.target
@@ -1,11 +1,12 @@
-KERNEL=linux-2.4.20-20.9.tar.gz
+KERNEL=linux-2.4.20-28.9.tar.gz
  SERIES=rh-2.4.20
  VERSION=2.4.20
-EXTRA_VERSION=20.9
+EXTRA_VERSION=28.9_lustre
  
-BASE_ARCHS="i386"
+BASE_ARCHS="i586"
  BIGMEM_ARCHS=""
  BOOT_ARCHS=""
  JENSEN_ARCHS=""
-SMP_ARCHS="i686"
+SMP_ARCHS="i586"
  UP_ARCHS=""
+SRC_ARCHS="i586"
diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c

index 0b9f6f3..59dc29e 100644 (file)
--- a/lustre/lov/lov_log.c
+++ b/lustre/lov/lov_log.c
@@ -51,26 +51,6 @@
  
  #include "lov_internal.h"
  
-#if 0
-static int lov_logop_cleanup(struct llog_ctxt *ctxt)
-{
-        struct lov_obd *lov = &ctxt->loc_obd->u.lov;
-        int i, rc = 0;
-
-        ENTRY;
-        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                struct obd_device *child = lov->tgts[i].ltd_exp->exp_obd;
-                struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx);
-                rc = llog_cleanup(cctxt);
-                if (rc) {
-                        CERROR("error lov_llog_open %d\n", i);
-                        break;
-                }
-        }
-        RETURN(rc);
-}
-#endif
-
  /* Add log records for each OSC that this object is striped over, and return
   * cookies for each one.  We _would_ have nice abstraction here, except that
   * we need to keep cookies in stripe order, even if some are NULL, so that
@@ -101,7 +81,8 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt,
  
                  lur->lur_oid = loi->loi_id;
                  lur->lur_ogen = loi->loi_gr;
-                rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc, numcookies - rc);
+                rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc,
+                                numcookies - rc);
  
          }
          OBD_FREE(lur, sizeof(*lur));
@@ -110,8 +91,8 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt,
  }
  
  static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count,
-                                   struct llog_logid *logid,
-                                   struct llog_ctxt_gen *gen)
+                                   struct llog_logid *logid, 
+                                   struct llog_gen *gen)
  {
          struct obd_device *obd = ctxt->loc_obd;
          struct lov_obd *lov = &obd->u.lov;
diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c

index 61cd57c..935548e 100644 (file)
--- a/lustre/lvfs/lvfs_linux.c
+++ b/lustre/lvfs/lvfs_linux.c
@@ -50,6 +50,10 @@
  #include <linux/obd.h>
  #include <linux/lustre_lib.h>
  
+atomic_t obd_memory;
+int obd_memmax;
+
+
  /* Debugging check only needed during development */
  #ifdef OBD_CTXT_DEBUG
  # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
@@ -308,49 +312,45 @@ static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
  {
          struct l_linux_dirent *dirent;
          struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
-        int reclen = size_round(offsetof(struct l_linux_dirent, d_name) + namlen + 1);
          
-        buf->error = -EINVAL;
-        if (reclen > buf->count)
-                return -EINVAL;
-        dirent = buf->previous;
+        dirent = buf->lrc_dirent;
          if (dirent)
-               dirent->d_off = offset; 
-        dirent = buf->current_dir;
-        buf->previous = dirent;
-        dirent->d_ino = ino;
-        dirent->d_reclen = reclen;
-        memcpy(dirent->d_name, name, namlen);
-        ((char *)dirent) += reclen;
-        buf->current_dir = dirent;
-        buf->count -= reclen; 
+               dirent->lld_off = offset; 
+
+        OBD_ALLOC(dirent, sizeof(*dirent));
+
+        list_add_tail(&dirent->lld_list, buf->lrc_list);
+
+        buf->lrc_dirent = dirent;
+        dirent->lld_ino = ino;
+        LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
+        memcpy(dirent->lld_name, name, namlen);
+
          return 0;
  }
  
-long l_readdir(struct file * file, void * dirent, unsigned int count)
+long l_readdir(struct file *file, struct list_head *dentry_list)
  {
-        struct l_linux_dirent * lastdirent;
+        struct l_linux_dirent *lastdirent;
          struct l_readdir_callback buf;
          int error;
  
-        buf.current_dir = (struct l_linux_dirent *)dirent;
-        buf.previous = NULL;
-        buf.count = count;
-        buf.error = 0;
+        buf.lrc_dirent = NULL;
+        buf.lrc_list = dentry_list; 
  
          error = vfs_readdir(file, l_filldir, &buf);
          if (error < 0)
                  return error;
-        error = buf.error;
-        lastdirent = buf.previous;
  
-        if (lastdirent) {
-                lastdirent->d_off = file->f_pos;
-                error = count - buf.count;        
-        }
-        return error; 
+        lastdirent = buf.lrc_dirent;
+        if (lastdirent)
+                lastdirent->lld_off = file->f_pos;
+
+        return 0; 
  }
  EXPORT_SYMBOL(l_readdir);
+EXPORT_SYMBOL(obd_memory);
+EXPORT_SYMBOL(obd_memmax);
  
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
  
@@ -361,6 +361,12 @@ static int __init lvfs_linux_init(void)
  
  static void __exit lvfs_linux_exit(void)
  {
+        int leaked;
+        ENTRY;
+
+        leaked = atomic_read(&obd_memory);
+        CDEBUG(leaked ? D_ERROR : D_INFO,
+               "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
  
          return;
  }
diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c

index a9b02ee..549c760 100644 (file)
--- a/lustre/mds/mds_log.c
+++ b/lustre/mds/mds_log.c
@@ -54,7 +54,7 @@ static int mds_llog_origin_add(struct llog_ctxt *ctxt,
  
  static int mds_llog_origin_connect(struct llog_ctxt *ctxt, int count,
                                     struct llog_logid *logid,
-                                   struct llog_ctxt_gen *gen) 
+                                   struct llog_gen *gen)
  {
          struct obd_device *obd = ctxt->loc_obd;
          struct obd_device *lov_obd = obd->u.mds.mds_osc_obd;
@@ -86,9 +86,7 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode,
  {
          struct mds_obd *mds = &obd->u.mds;
          struct lov_stripe_md *lsm = NULL;
-#ifdef ENABLE_ORPHANS
          struct llog_ctxt *ctxt;
-#endif
          int rc;
          ENTRY;
  
@@ -101,11 +99,9 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode,
          if (rc < 0)
                  RETURN(rc);
  
-#ifdef ENABLE_ORPHANS
          ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT);
          rc = llog_add(ctxt, NULL, lsm, lustre_msg_buf(repmsg, offset + 1, 0),
                        repmsg->buflens[offset + 1] / sizeof(struct llog_cookie));
-#endif
  
          obd_free_memmd(mds->mds_osc_exp, &lsm);
  
diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c

index ad7ddcd..82ceab4 100644 (file)
--- a/lustre/obdclass/llog.c
+++ b/lustre/obdclass/llog.c
@@ -77,7 +77,7 @@ void llog_free_handle(struct llog_handle *loghandle)
  }
  EXPORT_SYMBOL(llog_free_handle);
  
-/* returns negative on error; 0 if success; 1 if success & log destroyed */ 
+/* returns negative on error; 0 if success; 1 if success & log destroyed */
  int llog_cancel_rec(struct llog_handle *loghandle, int index)
  {
          struct llog_log_hdr *llh = loghandle->lgh_hdr;
@@ -101,7 +101,7 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index)
  
          if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) &&
              (le32_to_cpu(llh->llh_count) == 1) &&
-            (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) { 
+            (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
                  rc = llog_destroy(loghandle);
                  if (rc)
                          CERROR("failure destroying log after last cancel: %d\n",
@@ -111,7 +111,7 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index)
          }
  
          rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
-        if (rc) 
+        if (rc)
                  CERROR("failure re-writing header %d\n", rc);
          LASSERT(rc == 0);
          RETURN(rc);
@@ -144,16 +144,17 @@ int llog_init_handle(struct llog_handle *handle, int flags,
                  GOTO(out, rc);
          }
          rc = 0;
-        
+
          handle->lgh_last_idx = 0; /* header is record with index 0 */
          llh->llh_count = cpu_to_le32(1);         /* for the header record */
          llh->llh_hdr.lrh_type = cpu_to_le32(LLOG_HDR_MAGIC);
-        llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = cpu_to_le32(LLOG_CHUNK_SIZE);
+        llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len =
+                cpu_to_le32(LLOG_CHUNK_SIZE);
          llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
          llh->llh_timestamp = cpu_to_le64(LTIME_S(CURRENT_TIME));
          if (uuid)
                  memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
-        llh->llh_bitmap_offset = cpu_to_le32(offsetof(typeof(*llh), llh_bitmap));
+        llh->llh_bitmap_offset = cpu_to_le32(offsetof(typeof(*llh),llh_bitmap));
          ext2_set_bit(0, llh->llh_bitmap);
  
  out:
@@ -165,7 +166,7 @@ out:
                  INIT_LIST_HEAD(&handle->u.phd.phd_entry);
          else
                  LBUG();
-        
+
          if (rc) {
                  OBD_FREE(llh, sizeof(*llh));
                  handle->lgh_hdr = NULL;
@@ -192,12 +193,14 @@ int llog_close(struct llog_handle *loghandle)
  }
  EXPORT_SYMBOL(llog_close);
  
-int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data)
+int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
+                 void *data, void *catdata)
  {
          struct llog_log_hdr *llh = loghandle->lgh_hdr;
+        struct llog_process_cat_data *cd = catdata;
          void *buf;
          __u64 cur_offset = LLOG_CHUNK_SIZE;
-        int rc = 0, index = 1;
+        int rc = 0, index = 1, last_index, idx;
          int saved_index = 0;
          ENTRY;
  
@@ -205,27 +208,41 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data)
          if (!buf)
                  RETURN(-ENOMEM);
  
+        if (cd != NULL)
+                index = cd->first_idx + 1;
+        if (cd != NULL && cd->last_idx)
+                last_index = cd->last_idx;
+        else
+                last_index = LLOG_BITMAP_BYTES * 8 - 1;
+
+
          while (rc == 0) {
                  struct llog_rec_hdr *rec;
-                
+
                  /* skip records not set in bitmap */
-                while (index < (LLOG_BITMAP_BYTES * 8) &&
+                while (index <= last_index &&
                         !ext2_test_bit(index, llh->llh_bitmap))
                          ++index;
  
-                LASSERT(index <= LLOG_BITMAP_BYTES * 8);
-                if (index == LLOG_BITMAP_BYTES * 8)
+                LASSERT(index <= last_index + 1);
+                if (index == last_index + 1)
                          break;
  
                  /* get the buf with our target record; avoid old garbage */
                  memset(buf, 0, LLOG_CHUNK_SIZE);
-                rc = llog_next_block(loghandle, &saved_index, index, 
+                rc = llog_next_block(loghandle, &saved_index, index,
                                       &cur_offset, buf, LLOG_CHUNK_SIZE);
                  if (rc)
                          GOTO(out, rc);
  
                  rec = buf;
-                index = le32_to_cpu(rec->lrh_index);
+                idx = le32_to_cpu(rec->lrh_index);
+                if (idx < index)
+                        CDEBUG(D_HA, "index %u : idx %u\n", index, idx);
+                while (idx < index) {
+                        rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
+                        idx ++;
+                }
  
                  /* process records in buffer, starting where we found one */
                  while ((void *)rec < buf + LLOG_CHUNK_SIZE) {
@@ -235,13 +252,20 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data)
                          /* if set, process the callback on this record */
                          if (ext2_test_bit(index, llh->llh_bitmap)) {
                                  rc = cb(loghandle, rec, data);
-                                if (rc) 
+                                if (rc == LLOG_PROC_BREAK) {
+                                        CWARN("recovery from log: "LPX64":%x"
+                                              " stopped\n",
+                                              loghandle->lgh_id.lgl_oid,
+                                              loghandle->lgh_id.lgl_ogen);
+                                        GOTO(out, rc);
+                                }
+                                if (rc)
                                          GOTO(out, rc);
                          }
  
                          /* next record, still in buffer? */
                          ++index;
-                        if (index > LLOG_BITMAP_BYTES * 8 - 1)
+                        if (index > last_index)
                                  GOTO(out, rc = 0);
                          rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
                  }
diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c

index 4c49a10..b0e82fe 100644 (file)
--- a/lustre/obdclass/llog_cat.c
+++ b/lustre/obdclass/llog_cat.c
@@ -52,41 +52,44 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle)
          struct llog_handle *loghandle;
          struct llog_log_hdr *llh;
          struct llog_logid_rec rec;
-        int rc, index, bitmap_size, i;
+        int rc, index, bitmap_size;
          ENTRY;
  
+        llh = cathandle->lgh_hdr;
+        bitmap_size = sizeof(llh->llh_bitmap) * 8;
+
+        index = (cathandle->lgh_last_idx + 1) % bitmap_size;
+
+        /* maximum number of available slots in catlog is bitmap_size - 2 */
+        if (llh->llh_cat_idx == cpu_to_le32(index)) {
+                CERROR("no free catalog slots for log...\n");
+                RETURN(ERR_PTR(-ENOSPC));
+        } else {
+                if (index == 0)
+                        index = 1;
+                if (ext2_set_bit(index, llh->llh_bitmap)) {
+                        CERROR("argh, index %u already set in log bitmap?\n",
+                               index);
+                        LBUG(); /* should never happen */
+                }
+                cathandle->lgh_last_idx = index;
+                llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) + 1);
+                llh->llh_tail.lrt_index = cpu_to_le32(index);
+        }
+
          rc = llog_create(cathandle->lgh_ctxt, &loghandle, NULL, NULL);
          if (rc)
                  RETURN(ERR_PTR(rc));
  
-        rc = llog_init_handle(loghandle, 
-                              LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY, 
+        rc = llog_init_handle(loghandle,
+                              LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY,
                                &cathandle->lgh_hdr->llh_tgtuuid);
          if (rc)
                  GOTO(out_destroy, rc);
  
-        /* Find first free entry */
-        llh = cathandle->lgh_hdr;
-        bitmap_size = sizeof(llh->llh_bitmap) * 8;
-        for (i = 0, index = le32_to_cpu(llh->llh_count); i < bitmap_size; 
-             i++, index++) {
-                index %= bitmap_size;
-                if (ext2_set_bit(index, llh->llh_bitmap)) {
-                        /* XXX This should trigger log clean up or similar */
-                        CERROR("catalog index %d is still in use\n", index);
-                } else {
-                        cathandle->lgh_last_idx = index;
-                        llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) + 1);
-                        break;
-                }
-        }
-        if (i == bitmap_size) {
-                CERROR("no free catalog slots for log...\n");
-                GOTO(out_destroy, rc = -ENOSPC);
-        }
-        CWARN("new recovery log "LPX64":%x for index %u of catalog "LPX64"\n",
-               loghandle->lgh_id.lgl_oid, loghandle->lgh_id.lgl_ogen, index,
-               cathandle->lgh_id.lgl_oid);
+        CDEBUG(D_HA, "new recovery log "LPX64":%x for index %u of catalog "
+               LPX64"\n", loghandle->lgh_id.lgl_oid, loghandle->lgh_id.lgl_ogen,
+               index, cathandle->lgh_id.lgl_oid);
          /* build the record for this log in the catalog */
          rec.lid_hdr.lrh_len = cpu_to_le32(sizeof(rec));
          rec.lid_hdr.lrh_index = cpu_to_le32(index);
@@ -96,7 +99,7 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle)
          rec.lid_tail.lrt_index = cpu_to_le32(index);
  
          /* update the catalog: header and record */
-        rc = llog_write_rec(cathandle, &rec.lid_hdr, 
+        rc = llog_write_rec(cathandle, &rec.lid_hdr,
                              &loghandle->u.phd.phd_cookie, 1, NULL, index);
          if (rc < 0) {
                  GOTO(out_destroy, rc);
@@ -115,7 +118,10 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle)
  }
  EXPORT_SYMBOL(llog_cat_new_log);
  
-/* Assumes caller has already pushed us into the kernel context and is locking.
+/* Open an existent log handle and add it to the open list.
+ * This log handle will be closed when all of the records in it are removed.
+ *
+ * Assumes caller has already pushed us into the kernel context and is locking.
   * We return a lock on the handle to ensure nobody yanks it from us.
   */
  int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res,
@@ -128,7 +134,7 @@ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res,
          if (cathandle == NULL)
                  RETURN(-EBADF);
  
-        list_for_each_entry(loghandle, &cathandle->u.chd.chd_head, 
+        list_for_each_entry(loghandle, &cathandle->u.chd.chd_head,
                              u.phd.phd_entry) {
                  struct llog_logid *cgl = &loghandle->lgh_id;
                  if (cgl->lgl_oid == logid->lgl_oid) {
@@ -139,7 +145,6 @@ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res,
                                  continue;
                          }
                          loghandle->u.phd.phd_cat_handle = cathandle;
-                        cathandle->u.chd.chd_current_log = loghandle;
                          GOTO(out, rc = 0);
                  }
          }
@@ -151,15 +156,14 @@ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res,
          } else {
                  rc = llog_init_handle(loghandle, LLOG_F_IS_PLAIN, NULL);
                  if (!rc) {
-                        list_add(&loghandle->u.phd.phd_entry, 
+                        list_add(&loghandle->u.phd.phd_entry,
                                   &cathandle->u.chd.chd_head);
-                        cathandle->u.chd.chd_current_log = loghandle;
                  }
          }
          if (!rc) {
                  loghandle->u.phd.phd_cat_handle = cathandle;
                  loghandle->u.phd.phd_cookie.lgc_lgl = cathandle->lgh_id;
-                loghandle->u.phd.phd_cookie.lgc_index = 
+                loghandle->u.phd.phd_cookie.lgc_index =
                          le32_to_cpu(loghandle->lgh_hdr->llh_cat_idx);
          }
  
@@ -174,7 +178,7 @@ int llog_cat_put(struct llog_handle *cathandle)
          int rc;
          ENTRY;
  
-        list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head, 
+        list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head,
                                   u.phd.phd_entry) {
                  int err = llog_close(loghandle);
                  if (err)
@@ -195,7 +199,7 @@ EXPORT_SYMBOL(llog_cat_put);
   *
   * NOTE: loghandle is write-locked upon successful return
   */
-static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, 
+static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle,
                                                  int create)
  {
          struct llog_handle *loghandle = NULL;
@@ -205,7 +209,7 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle,
          loghandle = cathandle->u.chd.chd_current_log;
          if (loghandle) {
                  struct llog_log_hdr *llh = loghandle->lgh_hdr;
-                if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap) * 8) - 1) {
+                if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) {
                          down_write(&loghandle->lgh_lock);
                          up_read(&cathandle->lgh_lock);
                          RETURN(loghandle);
@@ -226,7 +230,7 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle,
          loghandle = cathandle->u.chd.chd_current_log;
          if (loghandle) {
                  struct llog_log_hdr *llh = loghandle->lgh_hdr;
-                if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap) * 8) - 1) {
+                if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) {
                          down_write(&loghandle->lgh_lock);
                          up_write(&cathandle->lgh_lock);
                          RETURN(loghandle);
@@ -235,7 +239,7 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle,
  
          CDEBUG(D_INODE, "creating new log\n");
          loghandle = llog_cat_new_log(cathandle);
-        if (loghandle)
+        if (!IS_ERR(loghandle))
                  down_write(&loghandle->lgh_lock);
          up_write(&cathandle->lgh_lock);
          RETURN(loghandle);
@@ -247,7 +251,7 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle,
   * Assumes caller has already pushed us into the kernel context.
   */
  int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec,
-                    struct llog_cookie *reccookie, void *buf)
+                     struct llog_cookie *reccookie, void *buf)
  {
          struct llog_handle *loghandle;
          int rc;
@@ -260,6 +264,7 @@ int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec,
          /* loghandle is already locked by llog_cat_current_log() for us */
          rc = llog_write_rec(loghandle, rec, reccookie, 1, buf, -1);
          up_write(&loghandle->lgh_lock);
+
          RETURN(rc);
  }
  EXPORT_SYMBOL(llog_cat_add_rec);
@@ -293,15 +298,20 @@ int llog_cat_cancel_records(struct llog_handle *cathandle, int count,
                  down_write(&loghandle->lgh_lock);
                  rc = llog_cancel_rec(loghandle, cookies->lgc_index);
                  up_write(&loghandle->lgh_lock);
-                
+
                  if (rc == 1) {          /* log has been destroyed */
                          index = loghandle->u.phd.phd_cookie.lgc_index;
                          if (cathandle->u.chd.chd_current_log == loghandle)
                                  cathandle->u.chd.chd_current_log = NULL;
                          llog_free_handle(loghandle);
-                        
+
                          LASSERT(index);
+                        llog_cat_set_first_idx(cathandle, index);
                          rc = llog_cancel_rec(cathandle, index);
+                        if (rc == 0)
+                                CDEBUG(D_HA, "cancel plain log at index %u "
+                                       "of catalog "LPX64"\n",
+                                       index, cathandle->lgh_id.lgl_oid);
                  }
          }
          up_write(&cathandle->lgh_lock);
@@ -310,7 +320,8 @@ int llog_cat_cancel_records(struct llog_handle *cathandle, int count,
  }
  EXPORT_SYMBOL(llog_cat_cancel_records);
  
-int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, void *data)
+int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec,
+                        void *data)
  {
          struct llog_process_data *d = data;
          struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
@@ -321,33 +332,85 @@ int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, v
                  CERROR("invalid record in catalog\n");
                  RETURN(-EINVAL);
          }
-        CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n", 
+        CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n",
                 lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen,
                 le32_to_cpu(rec->lrh_index), cat_llh->lgh_id.lgl_oid);
  
          rc = llog_cat_id2handle(cat_llh, &llh, &lir->lid_id);
          if (rc) {
-                CERROR("Cannot find handle for log "LPX64"\n", lir->lid_id.lgl_oid);
+                CERROR("Cannot find handle for log "LPX64"\n",
+                       lir->lid_id.lgl_oid);
                  RETURN(rc);
-        }        
+        }
  
-        rc = llog_process(llh, d->lpd_cb, d->lpd_data);
+        rc = llog_process(llh, d->lpd_cb, d->lpd_data, NULL);
          RETURN(rc);
  }
  
  int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data)
  {
          struct llog_process_data d;
+        struct llog_process_cat_data cd;
+        struct llog_log_hdr *llh = cat_llh->lgh_hdr;
          int rc;
          ENTRY;
+
+        LASSERT(llh->llh_flags &cpu_to_le32(LLOG_F_IS_CAT));
          d.lpd_data = data;
          d.lpd_cb = cb;
  
-        rc = llog_process(cat_llh, llog_cat_process_cb, &d);
+        if (llh->llh_cat_idx > cat_llh->lgh_last_idx) {
+                CWARN("catlog "LPX64" crosses index zero\n",
+                      cat_llh->lgh_id.lgl_oid);
+
+                cd.first_idx = le32_to_cpu(llh->llh_cat_idx);
+                cd.last_idx = 0;
+                rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd);
+                if (rc != 0)
+                        RETURN(rc);
+
+                cd.first_idx = 0;
+                cd.last_idx = cat_llh->lgh_last_idx;
+                rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd);
+        } else {
+                rc = llog_process(cat_llh, llog_cat_process_cb, &d, NULL);
+        }
+
          RETURN(rc);
  }
  EXPORT_SYMBOL(llog_cat_process);
  
+int llog_cat_set_first_idx(struct llog_handle *cathandle, int index)
+{
+        struct llog_log_hdr *llh = cathandle->lgh_hdr;
+        int i, bitmap_size, idx;
+        ENTRY;
+
+        bitmap_size = sizeof(llh->llh_bitmap) * 8;
+        if (llh->llh_cat_idx == cpu_to_le32(index - 1)) {
+                idx = le32_to_cpu(llh->llh_cat_idx) + 1;
+                llh->llh_cat_idx = cpu_to_le32(idx);
+                if (idx == cathandle->lgh_last_idx)
+                        goto out;
+                for (i = (index + 1) % bitmap_size;
+                     i != cathandle->lgh_last_idx;
+                     i = (i + 1) % bitmap_size) {
+                        if (!ext2_test_bit(i, llh->llh_bitmap)) {
+                                idx = le32_to_cpu(llh->llh_cat_idx) + 1;
+                                llh->llh_cat_idx = cpu_to_le32(idx);
+                        } else if (i == 0) {
+                                llh->llh_cat_idx = 0;
+                        } else {
+                                break;
+                        }
+                }
+out:
+                CDEBUG(D_HA, "set catlog "LPX64" first idx %u\n",
+                       cathandle->lgh_id.lgl_oid,le32_to_cpu(llh->llh_cat_idx));
+        }
+
+        RETURN(0);
+}
  
  #if 0
  /* Assumes caller has already pushed us into the kernel context. */
@@ -366,7 +429,7 @@ int llog_cat_init(struct llog_handle *cathandle, struct obd_uuid *tgtuuid)
          if (cathandle->lgh_file->f_dentry->d_inode->i_size == 0) {
                  llog_write_rec(cathandle, &llh->llh_hdr, NULL, 0, NULL, 0);
  
-write_hdr:    
+write_hdr:
                  rc = lustre_fwrite(cathandle->lgh_file, llh, LLOG_CHUNK_SIZE,
                                     &offset);
                  if (rc != LLOG_CHUNK_SIZE) {
diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c

index 9c9abb7..d01441a 100644 (file)
--- a/lustre/obdclass/llog_obd.c
+++ b/lustre/obdclass/llog_obd.c
@@ -26,7 +26,7 @@
  
  /* helper functions for calling the llog obd methods */
  
-int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, 
+int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
                 int count, struct llog_logid *logid, struct llog_operations *op)
  {
          int rc = 0;
@@ -49,7 +49,7 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
  
          if (op->lop_setup)
                  rc = op->lop_setup(obd, index, disk_obd, count, logid);
-        if (ctxt && rc) 
+        if (ctxt && rc)
                  OBD_FREE(ctxt, sizeof(*ctxt));
  
          RETURN(rc);
@@ -61,7 +61,6 @@ int llog_cleanup(struct llog_ctxt *ctxt)
          int rc = 0;
          ENTRY;
  
-        down(&ctxt->loc_sem);
          LASSERT(ctxt);
  
          if (CTXTP(ctxt, cleanup))
@@ -70,7 +69,6 @@ int llog_cleanup(struct llog_ctxt *ctxt)
          ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL;
          class_export_put(ctxt->loc_exp);
          ctxt->loc_exp = NULL;
-        up(&ctxt->loc_sem);
          OBD_FREE(ctxt, sizeof(*ctxt));
  
          RETURN(rc);
@@ -84,29 +82,25 @@ int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
  
          if (!ctxt)
                  RETURN(0);
-        down(&ctxt->loc_sem);
-        if (ctxt->loc_llcd && CTXTP(ctxt, sync))
+
+        if (CTXTP(ctxt, sync))
                  rc = CTXTP(ctxt, sync)(ctxt, exp);
-        else
-                up(&ctxt->loc_sem);
  
          RETURN(rc);
  }
  EXPORT_SYMBOL(llog_sync);
  
-int llog_add(struct llog_ctxt *ctxt,
-                 struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
-                 struct llog_cookie *logcookies, int numcookies)
+int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
+                struct lov_stripe_md *lsm, struct llog_cookie *logcookies,
+                int numcookies)
  {
          int rc;
          ENTRY;
  
          LASSERT(ctxt);
-        down(&ctxt->loc_sem);
          CTXT_CHECK_OP(ctxt, add, -EOPNOTSUPP);
  
          rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies);
-        up(&ctxt->loc_sem);
          RETURN(rc);
  }
  EXPORT_SYMBOL(llog_add);
@@ -125,7 +119,7 @@ int llog_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm,
  EXPORT_SYMBOL(llog_cancel);
  
  /* callback func for llog_process in llog_obd_origin_setup */
-static int cat_cancel_cb(struct llog_handle *cathandle, 
+static int cat_cancel_cb(struct llog_handle *cathandle,
                            struct llog_rec_hdr *rec, void *data)
  {
          struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
@@ -138,35 +132,36 @@ static int cat_cancel_cb(struct llog_handle *cathandle,
                  CERROR("invalid record in catalog\n");
                  RETURN(-EINVAL);
          }
-        CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n", 
+        CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n",
                 lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen,
                 le32_to_cpu(rec->lrh_index), cathandle->lgh_id.lgl_oid);
  
          rc = llog_cat_id2handle(cathandle, &loghandle, &lir->lid_id);
          if (rc) {
-                CERROR("Cannot find handle for log "LPX64"\n", lir->lid_id.lgl_oid);
+                CERROR("Cannot find handle for log "LPX64"\n",
+                       lir->lid_id.lgl_oid);
                  RETURN(rc);
-        }        
-        
+        }
+
          llh = loghandle->lgh_hdr;
          if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) &&
              (le32_to_cpu(llh->llh_count) == 1)) {
                  rc = llog_destroy(loghandle);
                  if (rc)
-                        CERROR("failure destroying log during postsetup: %d\n", rc);
+                        CERROR("failure destroying log in postsetup: %d\n", rc);
                  LASSERT(rc == 0);
  
                  index = loghandle->u.phd.phd_cookie.lgc_index;
-                if (cathandle->u.chd.chd_current_log == loghandle)
-                        cathandle->u.chd.chd_current_log = NULL;
                  llog_free_handle(loghandle);
-                                                                                                                
+
                  LASSERT(index);
+                llog_cat_set_first_idx(cathandle, index);
                  rc = llog_cancel_rec(cathandle, index);
                  if (rc == 0)
-                        CWARN("cancel log "LPX64":%x at index %u of catalog "LPX64"\n", 
-                               lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen,
-                               le32_to_cpu(rec->lrh_index), cathandle->lgh_id.lgl_oid);
+                        CWARN("cancel log "LPX64":%x at index %u of catalog "
+                              LPX64"\n", lir->lid_id.lgl_oid,
+                              lir->lid_id.lgl_ogen, le32_to_cpu(rec->lrh_index),
+                              cathandle->lgh_id.lgl_oid);
          }
  
          RETURN(rc);
@@ -174,8 +169,9 @@ static int cat_cancel_cb(struct llog_handle *cathandle,
  
  /* lop_setup method for filter/osc */
  // XXX how to set exports
-int llog_obd_origin_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
-                          int count, struct llog_logid *logid)
+int llog_obd_origin_setup(struct obd_device *obd, int index,
+                          struct obd_device *disk_obd, int count,
+                          struct llog_logid *logid)
  {
          struct llog_ctxt *ctxt;
          struct llog_handle *handle;
@@ -187,20 +183,19 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, struct obd_device *
                  RETURN(0);
  
          LASSERT(count == 1);
-        
+
          ctxt = llog_get_context(obd, index);
          LASSERT(ctxt);
-        log_gen_init(ctxt);
+        llog_gen_init(ctxt);
  
-        down(&ctxt->loc_sem);
          if (logid->lgl_oid)
                  rc = llog_create(ctxt, &handle, logid, NULL);
          else {
                  rc = llog_create(ctxt, &handle, NULL, NULL);
-                if (!rc) 
+                if (!rc)
                          *logid = handle->lgh_id;
          }
-        if (rc) 
+        if (rc)
                  GOTO(out, rc);
  
          ctxt->loc_handle = handle;
@@ -210,11 +205,10 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, struct obd_device *
          if (rc)
                  GOTO(out, rc);
  
-        rc = llog_process(handle, (llog_cb_t)cat_cancel_cb, NULL);
-        if (rc) 
+        rc = llog_process(handle, (llog_cb_t)cat_cancel_cb, NULL, NULL);
+        if (rc)
                  CERROR("llog_process with cat_cancel_cb failed: %d\n", rc);
   out:
-        up(&ctxt->loc_sem);
          if (ctxt && rc) {
                  obd->obd_llog_ctxt[index] = NULL;
                  OBD_FREE(ctxt, sizeof(*ctxt));
@@ -229,33 +223,35 @@ int llog_obd_origin_cleanup(struct llog_ctxt *ctxt)
          struct llog_log_hdr *llh;
          int rc, index;
          ENTRY;
-        
+
          if (!ctxt)
                  return 0;
  
          cathandle = ctxt->loc_handle;
          if (cathandle) {
-                list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head, 
-                                 u.phd.phd_entry) {
+                list_for_each_entry_safe(loghandle, n,
+                                         &cathandle->u.chd.chd_head,
+                                         u.phd.phd_entry) {
                          llh = loghandle->lgh_hdr;
-                        if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) &&
+                        if ((le32_to_cpu(llh->llh_flags) &
+                                LLOG_F_ZAP_WHEN_EMPTY) &&
                              (le32_to_cpu(llh->llh_count) == 1)) {
                                  rc = llog_destroy(loghandle);
                                  if (rc)
-                                        CERROR("failure destroying log during cleanup: %d\n",
-                                               rc);
+                                        CERROR("failure destroying log during "
+                                               "cleanup: %d\n", rc);
                                  LASSERT(rc == 0);
  
                                  index = loghandle->u.phd.phd_cookie.lgc_index;
-                                if (cathandle->u.chd.chd_current_log == loghandle)
-                                        cathandle->u.chd.chd_current_log = NULL;
                                  llog_free_handle(loghandle);
-                                                                                                                             
+
                                  LASSERT(index);
+                                llog_cat_set_first_idx(cathandle, index);
                                  rc = llog_cancel_rec(cathandle, index);
                                  if (rc == 0)
-                                        CWARN("cancel plain log at index %u of catalog "LPX64"\n", 
-                                              index, cathandle->lgh_id.lgl_oid);
+                                        CDEBUG(D_HA, "cancel plain log at index"
+                                               " %u of catalog "LPX64"\n",
+                                               index,cathandle->lgh_id.lgl_oid);
                          }
                  }
                  llog_cat_put(ctxt->loc_handle);
@@ -264,7 +260,6 @@ int llog_obd_origin_cleanup(struct llog_ctxt *ctxt)
  }
  EXPORT_SYMBOL(llog_obd_origin_cleanup);
  
-
  /* add for obdfilter/sz and mds/unlink */
  int llog_obd_origin_add(struct llog_ctxt *ctxt,
                          struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
@@ -314,7 +309,7 @@ int llog_cat_initialize(struct obd_device *obd, int count)
                  CERROR("rc: %d\n", rc);
                  GOTO(out, rc);
          }
-                        
+
   out:
          OBD_FREE(idarray, size);
          RETURN(rc);
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index 80742c8..63fd22b 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -39,66 +39,240 @@
  
  #include "ptlrpc_internal.h"
  
-/* should this take an imp_sem to ensure connect is single threaded? */
-int ptlrpc_connect_import(struct obd_import *imp)
+struct ptlrpc_connect_async_args {
+         __u64 pcaa_peer_committed;
+        int pcaa_initial_connect;
+        int pcaa_was_invalid;
+};
+
+/* A CLOSED import should remain so. */
+#define IMPORT_SET_STATE_NOLOCK(imp, state)                                    \
+do {                                                                           \
+        if (imp->imp_state != LUSTRE_IMP_CLOSED) {                             \
+               CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n",    \
+                      imp, imp->imp_target_uuid.uuid,                          \
+                      ptlrpc_import_state_name(imp->imp_state),                \
+                      ptlrpc_import_state_name(state));                        \
+               imp->imp_state = state;                                         \
+        }                                                                      \
+} while(0)
+
+#define IMPORT_SET_STATE(imp, state)                    \
+do {                                                    \
+        unsigned long flags;                            \
+                                                        \
+        spin_lock_irqsave(&imp->imp_lock, flags);       \
+        IMPORT_SET_STATE_NOLOCK(imp, state);            \
+        spin_unlock_irqrestore(&imp->imp_lock, flags);  \
+} while(0)
+
+
+static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
+                                    void * data, int rc);
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
+
+/* Only this function is allowed to change the import state when it is
+ * CLOSED. I would rather refcount the import and free it after
+ * disconnection like we do with exports. To do that, the client_obd
+ * will need to save the peer info somewhere other than in the import,
+ * though. */
+int ptlrpc_init_import(struct obd_import *imp)
+{
+        unsigned long flags;
+        
+        spin_lock_irqsave(&imp->imp_lock, flags);
+
+        imp->imp_generation++;
+        imp->imp_state =  LUSTRE_IMP_NEW;
+
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+        return 0;
+}
+
+/* Returns true if import was FULL, false if import was already not
+ * connected.
+ */
+int ptlrpc_set_import_discon(struct obd_import *imp)
+{
+        unsigned long flags;
+        int rc = 0;
+        
+        spin_lock_irqsave(&imp->imp_lock, flags);
+
+        if (imp->imp_state == LUSTRE_IMP_FULL) {
+                IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+                rc = 1;
+        } else {
+                CDEBUG(D_HA, "%p %s: import already not connected: %s\n",
+                       imp,imp->imp_client->cli_name, 
+                       ptlrpc_import_state_name(imp->imp_state));
+        }
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+        return rc;
+}
+
+void ptlrpc_fail_import(struct obd_import *imp, int generation)
+{
+        ENTRY;
+
+        LASSERT (!imp->imp_dlm_fake);
+
+        if (ptlrpc_set_import_discon(imp))
+                ptlrpc_handle_failed_import(imp);
+
+        EXIT;
+}
+
+int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
  {
          struct obd_device *obd = imp->imp_obd;
-        int msg_flags;
          int initial_connect = 0;
          int rc;
          __u64 committed_before_reconnect = 0;
+        int was_invalid = 0;
          struct ptlrpc_request *request;
-        struct lustre_handle old_hdl;
          int size[] = {sizeof(imp->imp_target_uuid),
                                   sizeof(obd->obd_uuid),
                                   sizeof(imp->imp_dlm_handle)};
          char *tmp[] = {imp->imp_target_uuid.uuid,
                         obd->obd_uuid.uuid,
                         (char *)&imp->imp_dlm_handle};
+        struct ptlrpc_connect_async_args *aa;
          unsigned long flags;
  
          spin_lock_irqsave(&imp->imp_lock, flags);
-        if (imp->imp_state == LUSTRE_IMP_CONNECTING) {
+        if (imp->imp_state == LUSTRE_IMP_CLOSED) {
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
+                CERROR("can't connect to a closed import\n");
+                RETURN(-EINVAL);
+        } else if (imp->imp_state == LUSTRE_IMP_FULL) {
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+                CERROR("already connected\n");
+                RETURN(0);
+        } else if (imp->imp_state == LUSTRE_IMP_CONNECTING) {
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+                CERROR("already connecting\n");
                  RETURN(-EALREADY);
-        } else {
-                LASSERT(imp->imp_state == LUSTRE_IMP_DISCON);
          }
-        CDEBUG(D_HA, "%s: new state: CONNECTING\n", 
-               imp->imp_client->cli_name);
-        imp->imp_state = LUSTRE_IMP_CONNECTING;
+
+        IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
+
          imp->imp_conn_cnt++; 
+        imp->imp_last_replay_transno = 0;
+
          if (imp->imp_remote_handle.cookie == 0) {
                  initial_connect = 1;
          } else {
-                committed_before_reconnect = imp->imp_peer_committed_transno;
+                committed_before_reconnect = imp->imp_peer_committed_transno;;
+
+        }
+
+        if (imp->imp_invalid) {
+                imp->imp_invalid = 0;
+                was_invalid = 1;
          }
+
          spin_unlock_irqrestore(&imp->imp_lock, flags);
  
+        if (new_uuid) {
+                struct ptlrpc_connection *conn;
+                struct obd_uuid uuid;
+                struct obd_export *dlmexp;
+
+                obd_str2uuid(&uuid, new_uuid);
+
+                conn = ptlrpc_uuid_to_connection(&uuid);
+                if (!conn)
+                        GOTO(out, rc = -ENOENT);
+
+                CDEBUG(D_HA, "switching import %s/%s from %s to %s\n",
+                       imp->imp_target_uuid.uuid, imp->imp_obd->obd_name,
+                       imp->imp_connection->c_remote_uuid.uuid,
+                       conn->c_remote_uuid.uuid);
+
+                /* Switch the import's connection and the DLM export's
+                 * connection (which are almost certainly the same, but we
+                 * keep distinct refs just to make things clearer. I think. */
+                if (imp->imp_connection)
+                        ptlrpc_put_connection(imp->imp_connection);
+                /* We hand off the ref from ptlrpc_get_connection. */
+                imp->imp_connection = conn;
+
+                dlmexp = class_conn2export(&imp->imp_dlm_handle);
+                
+                LASSERT(dlmexp != NULL);
+
+                if (dlmexp->exp_connection)
+                        ptlrpc_put_connection(dlmexp->exp_connection);
+                dlmexp->exp_connection = ptlrpc_connection_addref(conn);
+                class_export_put(dlmexp);
+
+        }
+
          request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
          if (!request)
                  GOTO(out, rc = -ENOMEM);
  
          request->rq_send_state = LUSTRE_IMP_CONNECTING;
          request->rq_replen = lustre_msg_size(0, NULL);
+        request->rq_interpret_reply = ptlrpc_connect_interpret;
+
+        LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args));
+        aa = (struct ptlrpc_connect_async_args *)&request->rq_async_args;
+        memset(aa, 0, sizeof *aa);
  
-        // lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER);
+        aa->pcaa_peer_committed = committed_before_reconnect;
+        aa->pcaa_initial_connect = initial_connect;
+        aa->pcaa_was_invalid = was_invalid;
  
-        rc = ptlrpc_queue_wait(request);
-        if (rc) {
-                GOTO(free_req, rc);
+        if (aa->pcaa_initial_connect)
+                imp->imp_replayable = 1;
+        ptlrpcd_add_req(request);
+        rc = 0;
+out:
+        if (rc != 0) {
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
+        }
+
+        RETURN(rc);
+}
+
+static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
+                                    void * data, int rc)
+{
+        struct ptlrpc_connect_async_args *aa = data;
+        struct obd_import *imp = request->rq_import;
+        struct lustre_handle old_hdl;
+        unsigned long flags;
+        int msg_flags;
+        ENTRY;
+        
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        if (imp->imp_state == LUSTRE_IMP_CLOSED) {
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+                RETURN(0);
          }
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+        if (rc)
+                GOTO(out, rc);
  
          msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
  
-        if (initial_connect) {
-                CDEBUG(D_HA, "%s: new state: FULL\n", 
-                       imp->imp_client->cli_name);
-                if (msg_flags & MSG_CONNECT_REPLAYABLE)
+        if (aa->pcaa_initial_connect) {
+                if (msg_flags & MSG_CONNECT_REPLAYABLE) {
+                        CDEBUG(D_HA, "connected to replayable target: %s\n",
+                               imp->imp_target_uuid.uuid);
                          imp->imp_replayable = 1;
+                        ptlrpc_pinger_add_import(imp);
+                } else {
+                        imp->imp_replayable = 0;
+                }
                  imp->imp_remote_handle = request->rq_repmsg->handle;
-                imp->imp_state = LUSTRE_IMP_FULL;
-                GOTO(free_req, rc = 0);
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+                GOTO(finish, rc = 0);
          }
  
          /* Determine what recovery state to move the import to. */
@@ -110,7 +284,7 @@ int ptlrpc_connect_import(struct obd_import *imp)
                                 ", failed\n", imp->imp_target_uuid.uuid,
                                 imp->imp_connection->c_remote_uuid.uuid,
                                 imp->imp_dlm_handle.cookie);
-                        GOTO(free_req, rc = -ENOTCONN);
+                        GOTO(out, rc = -ENOTCONN);
                  }
  
                  if (memcmp(&imp->imp_remote_handle, &request->rq_repmsg->handle,
@@ -127,23 +301,17 @@ int ptlrpc_connect_import(struct obd_import *imp)
                                 imp->imp_target_uuid.uuid, 
                                 imp->imp_connection->c_remote_uuid.uuid);
                  }
-                CDEBUG(D_HA, "%s: new state: RECOVER\n", 
-                       imp->imp_client->cli_name);
-                imp->imp_state = LUSTRE_IMP_RECOVER;
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
          } 
          else if (MSG_CONNECT_RECOVERING & msg_flags) {
-                CDEBUG(D_HA, "%s: new state: REPLAY\n", 
-                       imp->imp_client->cli_name);
                  LASSERT(imp->imp_replayable);
                  imp->imp_state = LUSTRE_IMP_RECOVER;
                  imp->imp_remote_handle = request->rq_repmsg->handle;
-                imp->imp_state = LUSTRE_IMP_REPLAY;
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
          } 
          else {
-                CDEBUG(D_HA, "%s: new state: EVICTED\n", 
-                       imp->imp_client->cli_name);
                  imp->imp_remote_handle = request->rq_repmsg->handle;
-                imp->imp_state = LUSTRE_IMP_EVICTED;
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
          }
          
          /* Sanity checks for a reconnected import. */
@@ -153,31 +321,150 @@ int ptlrpc_connect_import(struct obd_import *imp)
                         "after reconnect. We should LBUG right here.\n");
          }
  
-        if (request->rq_repmsg->last_committed < committed_before_reconnect) {
+        if (request->rq_repmsg->last_committed < aa->pcaa_peer_committed) {
                  CERROR("%s went back in time (transno "LPD64
                         " was previously committed, server now claims "LPD64
                         ")! is shared storage not coherent?\n",
                         imp->imp_target_uuid.uuid,
-                       committed_before_reconnect,
+                       aa->pcaa_peer_committed,
                         request->rq_repmsg->last_committed);
          }
  
- free_req:
-        ptlrpc_req_finished(request);
+finish:
+        rc = ptlrpc_import_recovery_state_machine(imp);
+        if (rc != 0) {
+                if (aa->pcaa_was_invalid) {
+                        ptlrpc_set_import_active(imp, 0);
+                }                
  
+                if (rc == -ENOTCONN) {
+                        CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery;"
+                               "invalidating and reconnecting\n",
+                               imp->imp_target_uuid.uuid,
+                               imp->imp_connection->c_remote_uuid.uuid);
+                        ptlrpc_connect_import(imp, NULL);
+                        RETURN(0);
+                } 
+        }
   out:
-        if (rc != 0)
-                imp->imp_state = LUSTRE_IMP_DISCON;
+        if (rc != 0) {
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
+                if (aa->pcaa_initial_connect && !imp->imp_initial_recov)
+                        GOTO(norecov, rc);
+                CDEBUG(D_ERROR, 
+                       "recovery of %s on %s failed (%d); restarting\n",
+                       imp->imp_target_uuid.uuid,
+                       (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
+                ptlrpc_handle_failed_import(imp);
+        }
+
+norecov:
+        wake_up(&imp->imp_recovery_waitq);
          RETURN(rc);
  }
  
+static int completed_replay_interpret(struct ptlrpc_request *req,
+                                    void * data, int rc)
+{
+        atomic_dec(&req->rq_import->imp_replay_inflight);
+        ptlrpc_import_recovery_state_machine(req->rq_import);
+        RETURN(0);
+}
+
+static int signal_completed_replay(struct obd_import *imp)
+ {
+        struct ptlrpc_request *req;
+        ENTRY;
+
+        LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
+        atomic_inc(&imp->imp_replay_inflight);
+
+        req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
+        if (!req)
+                RETURN(-ENOMEM);
+
+        req->rq_replen = lustre_msg_size(0, NULL);
+        req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
+        req->rq_reqmsg->flags |= MSG_LAST_REPLAY;
+        req->rq_timeout *= 3; 
+        req->rq_interpret_reply = completed_replay_interpret;
+
+        ptlrpcd_add_req(req);
+        RETURN(0);
+}
+
+
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
+{
+        int rc = 0;
+
+        if (imp->imp_state == LUSTRE_IMP_EVICTED) {
+                CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
+                       imp->imp_target_uuid.uuid,
+                       imp->imp_connection->c_remote_uuid.uuid);
+                ptlrpc_set_import_active(imp, 0);
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+        } 
+        
+        if (imp->imp_state == LUSTRE_IMP_REPLAY) {
+                CDEBUG(D_HA, "replay requested by %s\n",
+                       imp->imp_target_uuid.uuid);
+                rc = ptlrpc_replay_next(imp);
+                if (rc == 0 && atomic_read(&imp->imp_replay_inflight) == 0) {
+                        IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
+                        rc = ldlm_replay_locks(imp);
+                        if (rc)
+                                GOTO(out, rc);
+                }
+                rc = 0;
+        }
+
+        if (imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS) {
+                if (atomic_read(&imp->imp_replay_inflight) == 0) {
+                        IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_WAIT);
+                        rc = signal_completed_replay(imp);
+                        if (rc)
+                                GOTO(out, rc);
+                }
  
+        }
+
+        if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) {
+                if (atomic_read(&imp->imp_replay_inflight) == 0) {
+                        IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+                }
+        }
+
+        if (imp->imp_state == LUSTRE_IMP_RECOVER) {
+                CDEBUG(D_HA, "reconnected to %s@%s\n",
+                       imp->imp_target_uuid.uuid,
+                       imp->imp_connection->c_remote_uuid.uuid);
+
+                ptlrpc_set_import_active(imp, 1);
+                ptlrpc_resend(imp);
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
+        } 
+
+        if (imp->imp_state == LUSTRE_IMP_FULL) {
+                wake_up(&imp->imp_recovery_waitq);
+                ptlrpc_wake_delayed(imp);
+        }
+
+ out:
+        RETURN(rc);
+}
+
+static int back_to_sleep(void *unused) 
+{
+       return 0;
+}
  
  int ptlrpc_disconnect_import(struct obd_import *imp)
  {
          struct ptlrpc_request *request;
          int rq_opc;
          int rc = 0;
+        unsigned long flags;
          ENTRY;
  
          switch (imp->imp_connect_op) {
@@ -190,12 +477,28 @@ int ptlrpc_disconnect_import(struct obd_import *imp)
                  RETURN(-EINVAL);
          }
  
+
+        if (ptlrpc_import_in_recovery(imp)) {
+                struct l_wait_info lwi;
+                lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), back_to_sleep, 
+                                       NULL, NULL);
+                rc = l_wait_event(imp->imp_recovery_waitq, 
+                                  !ptlrpc_import_in_recovery(imp), &lwi);
+
+        }
+
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        if (imp->imp_state != LUSTRE_IMP_FULL) {
+                GOTO(out, 0);
+        }
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+
          request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL);
          if (request) {
                  /* For non-replayable connections, don't attempt
                     reconnect if this fails */
-                if (!imp->imp_obd->obd_replayable) {
-                        imp->imp_state = LUSTRE_IMP_DISCON;
+                if (!imp->imp_replayable) {
+                        IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);
                          request->rq_send_state =  LUSTRE_IMP_DISCON;
                  }
                  request->rq_replen = lustre_msg_size(0, NULL);
@@ -203,8 +506,12 @@ int ptlrpc_disconnect_import(struct obd_import *imp)
                  ptlrpc_req_finished(request);
          }
  
-        imp->imp_state = LUSTRE_IMP_DISCON;
+        spin_lock_irqsave(&imp->imp_lock, flags);
+out:
+        IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
          memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+
          RETURN(rc);
  }
  
diff --git a/lustre/ptlrpc/llog_client.c b/lustre/ptlrpc/llog_client.c

index 5524843..8accba6 100644 (file)
--- a/lustre/ptlrpc/llog_client.c
+++ b/lustre/ptlrpc/llog_client.c
@@ -194,6 +194,7 @@ static int llog_client_read_header(struct llog_handle *handle)
                  GOTO(out, rc =-EFAULT);
         }
          memcpy(handle->lgh_hdr, hdr, sizeof (*hdr));
+        handle->lgh_last_idx = le32_to_cpu(handle->lgh_hdr->llh_tail.lrt_index);
  
  out:
          if (req)
diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c

index 1dd2f9a..0694bd1 100644 (file)
--- a/lustre/ptlrpc/llog_net.c
+++ b/lustre/ptlrpc/llog_net.c
@@ -45,9 +45,9 @@
  
  #ifdef __KERNEL__
  int llog_origin_connect(struct llog_ctxt *ctxt, int count,
-                        struct llog_logid *logid,
-                        struct llog_ctxt_gen *gen)
+                        struct llog_logid *logid, struct llog_gen *gen)
  {
+        struct llog_gen_rec *lgr;
          struct obd_import *imp;
          struct ptlrpc_request *request;
          struct llogd_conn_body *req_body;
@@ -55,11 +55,31 @@ int llog_origin_connect(struct llog_ctxt *ctxt, int count,
          int rc;
          ENTRY;
  
+        if (list_empty(&ctxt->loc_handle->u.chd.chd_head)) {
+                CDEBUG(D_HA, "there is no record related to ctxt %p", ctxt);
+                RETURN(0);
+        }
+
+        /* FIXME what value for gen->conn_cnt */
+        LLOG_GEN_INC(ctxt->loc_gen);
+
+        /* first add llog_gen_rec */
+        OBD_ALLOC(lgr, sizeof(*lgr));
+        if (!lgr)
+                RETURN(-ENOMEM);
+        lgr->lgr_hdr.lrh_len = lgr->lgr_tail.lrt_len = sizeof(*lgr);
+        lgr->lgr_hdr.lrh_type = LLOG_GEN_REC;
+        lgr->lgr_gen = ctxt->loc_gen;
+        rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1);
+        OBD_FREE(lgr, sizeof(*lgr));
+        if (rc != 1)
+                RETURN(rc);
+
          LASSERT(ctxt->loc_imp);
          imp = ctxt->loc_imp;
  
          request = ptlrpc_prep_req(imp, LLOG_ORIGIN_CONNECT, 1, &size, NULL);
-        if (!request) 
+        if (!request)
                  RETURN(-ENOMEM);
  
          req_body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*req_body));
@@ -87,9 +107,9 @@ int llog_handle_connect(struct ptlrpc_request *req)
          req_body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*req_body));
  
          ctxt = llog_get_context(obd, req_body->lgdc_ctxt_idx);
-        rc = llog_connect(ctxt, 1, &req_body->lgdc_logid, 
+        rc = llog_connect(ctxt, 1, &req_body->lgdc_logid,
                            &req_body->lgdc_gen);
-        if (rc != 0) 
+        if (rc != 0)
                  CERROR("failed at llog_relp_connect\n");
  
          RETURN(rc);
diff --git a/lustre/scripts/lbuild b/lustre/scripts/lbuild

index 327ae91..0e682c5 100755 (executable)
--- a/lustre/scripts/lbuild
+++ b/lustre/scripts/lbuild
@@ -136,7 +136,7 @@ check_options()
      [ -d "$KERNELDIR" ] || \
         usage 1 "$KERNELDIR is not a directory."
  
-    if [ "$RELEASE" = "no" ] ; then
+    if ! (( $RELEASE )) ; then
         [ "$TAG" ] || \
             usage 1 "When building a snapshot, a tag name must be used."
      fi
@@ -186,10 +186,10 @@ load_target()
  
      if [ "$EXTRA_VERSION_save" ] ; then
         EXTRA_VERSION="$EXTRA_VERSION_save"
-    else
-       EXTRA_VERSION="${EXTRA_VERSION}_${TAG//_/}.${TIMESTAMP}"
+    elif ! (( $RELEASE )) ; then
+       EXTRA_VERSION="${EXTRA_VERSION}-${TAG//_/}.${TIMESTAMP}"
      fi
-    EXTRA_VERSION=${EXTRA_VERSION//-/_/}
+    # EXTRA_VERSION=${EXTRA_VERSION//-/_}
  
      ALL_ARCHS="$BASE_ARCHS $BIGMEM_ARCHS $BOOT_ARCHS $JENSEN_ARCHS $SMP_ARCHS $UP_ARCHS"
  
@@ -253,7 +253,7 @@ unpack_linux()
  
  patch_linux()
  {
-    FULL_PATCH="$PWD/lustre-kernel-${target}-${EXTRA_VERSION}.patch"
+    FULL_PATCH="$PWD/lustre-kernel-${TARGET}-${EXTRA_VERSION}.patch"
      [ -f "$FULL_PATCH" ] && rm -f "$FULL_PATCH"
      pushd linux >/dev/null
      echo -n "Applying patches:"
@@ -295,24 +295,26 @@ clean_linux()
  prep_build()
  {
      # make .spec file
-    sed -e s/@KERNEL_VERSION@/$VERSION/g \
-       -e s/@KERNEL_RELEASE@/$EXTRA_VERSION/g \
-       -e s/@KERNEL_SOURCE@/$KERNEL/g \
-       -e s/@LUSTRE_SOURCE@/${LUSTRE##*/}/g \
-       -e s/@LUSTRE_TARGET@/$TARGET/g \
-       -e s/@CONFIGURE_FLAGS@/$CONFIGURE_FLAGS/g \
-       -e s/@BASE_ARCHS@/$BASE_ARCHS/g \
-       -e s/@BIGMEM_ARCHS@/$BIGMEM_ARCHS/g \
-       -e s/@BOOT_ARCHS@/$BOOT_ARCHS/g \
-       -e s/@JENSEN_ARCHS@/$BOOT_ARCHS/g \
-       -e s/@SMP_ARCHS@/$SMP_ARCHS/g \
-       -e s/@UP_ARCHS@/$UP_ARCHS/g \
+    sed -e "s/@KERNEL_VERSION@/$VERSION/g" \
+       -e "s/@KERNEL_EXTRA_VERSION@/$EXTRA_VERSION/g" \
+       -e "s^@KERNEL_RELEASE@^${EXTRA_VERSION//-/_}^g" \
+       -e "s/@KERNEL_SOURCE@/$KERNEL/g" \
+       -e "s/@LUSTRE_SOURCE@/${LUSTRE##*/}/g" \
+       -e "s/@LUSTRE_TARGET@/$TARGET/g" \
+       -e "s/@CONFIGURE_FLAGS@/$CONFIGURE_FLAGS/g" \
+       -e "s/@BASE_ARCHS@/$BASE_ARCHS/g" \
+       -e "s/@BIGMEM_ARCHS@/$BIGMEM_ARCHS/g" \
+       -e "s/@BOOT_ARCHS@/$BOOT_ARCHS/g" \
+       -e "s/@JENSEN_ARCHS@/$BOOT_ARCHS/g" \
+       -e "s/@SMP_ARCHS@/$SMP_ARCHS/g" \
+       -e "s/@UP_ARCHS@/$UP_ARCHS/g" \
         < $TOPDIR/lustre/scripts/lustre-kernel-2.4.spec.in \
         > lustre-kernel-2.4.spec
      [ -d SRPMS ] || mkdir SRPMS
      [ -d RPMS ] || mkdir RPMS
      [ -d BUILD ] || mkdir BUILD
      [ -d SOURCES ] || mkdir SOURCES
+    cp $TOPDIR/lustre/scripts/linux-rhconfig.h SOURCES
      cp $TOPDIR/lustre/scripts/linux-merge-config.awk SOURCES
      cp $TOPDIR/lustre/scripts/linux-merge-modules.awk SOURCES
      cp "$LUSTRE" "$KERNEL_FILE" SOURCES
diff --git a/lustre/scripts/lmake b/lustre/scripts/lmake

index addbe4f..3a851a8 100755 (executable)
--- a/lustre/scripts/lmake
+++ b/lustre/scripts/lmake
@@ -6,7 +6,7 @@ KERNELDIR=
  TARGET=
  # Not sure what to put here
  # TARGET_ARCH=$(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
-TARGET_ARCH="i386"
+TARGET_ARCH=
  TARGET_CONFIG=
  JOBS=1
  CONFIGURE_FLAGS=
@@ -208,10 +208,11 @@ load_target()
      [ -r "$SERIES_FILE" ] || \
         fatal 1 "Target $TARGET's series $SERIES missing from $TOPDIR/kernel_patches/series."
  
-    CONFIG_TARGET="$TARGET${TARGET_ARCH:+-$TARGET_ARCH}${TARGET_CONFIG:+-$TARGET_CONFIG}"
+    TARGET_ARCH=${TARGET_ARCH:-$BASE_ARCHS}
+    CONFIG_TARGET="$TARGET-${TARGET_ARCH}${TARGET_CONFIG:+-$TARGET_CONFIG}"
      CONFIG_FILE="$TOPDIR/kernel_patches/kernel_configs/kernel-$VERSION-$CONFIG_TARGET.config"
-    [ -r "$CONFIG_FILE" ] || \
-       fatal 1 "Target $TARGET's config file $CONFIG missing from $TOPDIR/kernel_patches/configs."
+    [ -r "$CONFIG_FILE" ] ||
+       fatal 1 "Target $TARGET's config file $CONFIG_FILE missing from $TOPDIR/kernel_patches/configs."
  
      if [ "$EXTRA_VERSION_save" ] ; then
         EXTRA_VERSION="$EXTRA_VERSION_save"
diff --git a/lustre/scripts/lustre-kernel-2.4.spec.in b/lustre/scripts/lustre-kernel-2.4.spec.in

index c30bb54..0999212 100644 (file)
--- a/lustre/scripts/lustre-kernel-2.4.spec.in
+++ b/lustre/scripts/lustre-kernel-2.4.spec.in
@@ -9,6 +9,7 @@ Summary: The Linux kernel (the core of the Linux operating system)
  # adding some text to the end of the version number.
  #
  %define kversion @KERNEL_VERSION@
+%define kextraver @KERNEL_EXTRA_VERSION@
  %define release @KERNEL_RELEASE@
  # /usr/src/%{kslnk} -> /usr/src/linux-%{KVERREL}
  %define kslnk linux-2.4
@@ -47,6 +48,7 @@ Summary: The Linux kernel (the core of the Linux operating system)
  %define buildjensen 0
  %define buildsmp 0
  %define buildup 0
+%define buildsrc 0
  
  %ifarch @BASE_ARCHS@
  %define buildbase 1
@@ -124,7 +126,7 @@ Second, per-architecture exclusions (ifarch)
  Name: kernel
  Version: %{kversion}
  Release: %{release}%{?targetboard:%{targetboard}}%{?debuglevel_1:.dbg}
-%define KVERREL %{PACKAGE_VERSION}-%{PACKAGE_RELEASE}
+%define KVERREL %{PACKAGE_VERSION}-%{kextraver}%{?targetboard:%{targetboard}}%{?debuglevel_1:.dbg}
  License: GPL
  Group: System Environment/Kernel
  ExclusiveArch: %{all_x86} x86_64
@@ -149,6 +151,7 @@ Buildroot: /var/tmp/%{name}-%{PACKAGE_VERSION}-root
  Source0: @LUSTRE_SOURCE@
  Source1: @KERNEL_SOURCE@
  
+Source15: linux-rhconfig.h
  Source16: linux-merge-config.awk
  Source17: linux-merge-modules.awk
  
@@ -318,7 +321,7 @@ DependKernel()
         --target @LUSTRE_TARGET@ \
         --target-arch %{_target_cpu} \
         ${target_config} \
-       --extraversion %{release} \
+       --extraversion %{kextraver} \
         -j $RPM_BUILD_NCPUS
  }
  
@@ -331,7 +334,23 @@ BuildKernel()
         --target @LUSTRE_TARGET@ \
         --target-arch %{_target_cpu} \
         ${target_config} \
-       --extraversion %{release} \
+       --extraversion %{kextraver} \
+       --kerneldir $RPM_SOURCE_DIR \
+       -j $RPM_BUILD_NCPUS \
+       --destdir $RPM_BUILD_ROOT \
+       -- @CONFIGURE_FLAGS@
+}
+
+BuildLustre()
+{
+  target_config=${1:+--target-config $1}
+  sh -x ./scripts/lmake \
+       --build-lustre \
+       --install-lustre \
+       --target @LUSTRE_TARGET@ \
+       --target-arch %{_target_cpu} \
+       ${target_config} \
+       --extraversion %{kextraver} \
         --kerneldir $RPM_SOURCE_DIR \
         -j $RPM_BUILD_NCPUS \
         --destdir $RPM_BUILD_ROOT \
@@ -343,7 +362,7 @@ SaveHeaders()
    sh -x ./scripts/lmake \
         --save-headers \
         --target @LUSTRE_TARGET@ \
-       --extraversion %{release} \
+       --extraversion %{kextraver} \
         --destdir $RPM_BUILD_ROOT
  }
  
@@ -366,8 +385,10 @@ BuildKernel smp
  %endif
  
  # we want this one last, so that it is the one populating /usr/bin
-%if %{buildup} || %{buildbase}
+%if %{buildup} && %{buildbase}
  BuildKernel
+%elseif %{buildbase}
+BuildLustre
  %endif
  
  %if %{buildbase}
@@ -401,7 +422,7 @@ pushd linux >/dev/null
  mkdir -p $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
  rm -f drivers/net/hamradio/soundmodem/gentbl scripts/mkdep
  tar cf - . | tar xf - -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
-perl -p -i -e "s/^EXTRAVERSION.*/EXTRAVERSION = -%{release}custom/" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile
+perl -p -i -e "s/^EXTRAVERSION.*/EXTRAVERSION = -%{kextraver}custom/" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile
  ln -sf linux-%{KVERREL} $RPM_BUILD_ROOT/usr/src/linux
  # install -m 644 %{SOURCE10}  $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}
  
@@ -443,7 +464,7 @@ for l in $list; do
  done
  echo '#endif' >> modversions.h
  sed 's,$,autoconf.h,' ../../savedheaders/list | awk -f %{SOURCE16} >> autoconf.h
-# install -m 644 %{SOURCE15} rhconfig.h
+install -m 644 %{SOURCE15} rhconfig.h
  echo "#include <linux/rhconfig.h>" >> version.h
  keyword=if
  for i in smp BOOT BOOTsmp bigmem  up ; do
@@ -753,6 +774,7 @@ exit 0
  %files -n lustre-lite-utils
  %defattr(-, root, root)
  %doc lustre/COPYING lustre/BUGS lustre/ChangeLog lustre/README lustre/doc/lustre.pdf
+/sbin/*
  %{_sbindir}/*
  %{_bindir}/*
  %{_libdir}/lustre/python
diff --git a/lustre/tests/cfg/insanity-mdev.sh b/lustre/tests/cfg/insanity-mdev.sh

index 5e69356..ff34d6d 100644 (file)
--- a/lustre/tests/cfg/insanity-mdev.sh
+++ b/lustre/tests/cfg/insanity-mdev.sh
@@ -2,16 +2,18 @@ mds_HOST=${mds_HOST:-mdev4}
  mdsfailover_HOST=${mdsfailover_HOST:-mdev5}
  ost1_HOST=${ost1_HOST:-mdev2}
  ost2_HOST=${ost2_HOST:-mdev3}
+EXTRA_OSTS=${EXTRA_OSTS:-mdev7}
  client_HOST=client
  LIVE_CLIENT=${LIVE_CLIENT:-mdev6}
  # This should always be a list, not a regexp
-FAIL_CLIENTS=${FAIL_CLIENTS:-mdev7}
+#FAIL_CLIENTS=${FAIL_CLIENTS:-mdev7}
+FAIL_CLIENTS=${FAIL_CLIENTS:-""}
  
  NETTYPE=${NETTYPE:-tcp}
  
  TIMEOUT=${TIMEOUT:-30}
-#PTLDEBUG=${PTLDEBUG:-'"ha|info|ioctl|malloc"'}
  PTLDEBUG=${PTLDEBUG:-0}
+SUBSYSTEM=${SUBSYSTEM:-0}
  MOUNT=${MOUNT:-"/mnt/lustre"}
  UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh}
  
diff --git a/lustre/tests/cfg/mdev.sh b/lustre/tests/cfg/mdev.sh

index ec8edf2..dd373ba 100644 (file)
--- a/lustre/tests/cfg/mdev.sh
+++ b/lustre/tests/cfg/mdev.sh
@@ -11,7 +11,8 @@ MOUNT1=${MOUNT1:-$MOUNT}
  MOUNT2=${MOUNT2:-"/mnt/lustre2"}
  DIR=${DIR:-$MOUNT}
  DIR2=${DIR2:-$MOUNT1}
-PTLDEBUG=${PTLDEBUG:-0}
+PTLDEBUG=${PTLDEBUG:-0x3f0400}
+SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff}
  PDSH=${PDSH:-pdsh -S -w}
  
  MDSDEV=${MDSDEV:-/dev/sda1}
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh

index f6f77db..0f58491 100644 (file)
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -2,6 +2,11 @@
  # requirement:
  #      add uml1 uml2 uml3 in your /etc/hosts
  
+# FIXME - there is no reason to use all of these different
+#   return codes, espcially when most of them are mapped to something
+#   else anyway.  The combination of test number and return code
+#   figure out what failed.
+
  set -e
  
  SRCDIR=`dirname $0`
@@ -18,6 +23,12 @@ init_test_env $@
  
  FORCE=${FORCE:-" --force"}
  
+if [ "$VERBOSE" == "true" ]; then
+       CMDVERBOSE=""
+else
+       CMDVERBOSE=" > /dev/null"
+fi
+
  gen_config() {
         rm -f $XMLCONFIG
  
@@ -40,33 +51,33 @@ gen_second_config() {
  
  start_mds() {
         echo "start mds service on `facet_active_host mds`"
-       start mds --reformat $MDSLCONFARGS > /dev/null || return 94
+       start mds --reformat $MDSLCONFARGS $CMDVERBOSE || return 94
  }
  stop_mds() {
         echo "stop mds service on `facet_active_host mds`"
-       stop mds $@ > /dev/null || return 97 
+       stop mds $@ $CMDVERBOSE || return 97 
  }
  
  start_ost() {
         echo "start ost service on `facet_active_host ost`"
-       start ost --reformat $OSTLCONFARGS > /dev/null || return 95
+       start ost --reformat $OSTLCONFARGS $CMDVERBOSE || return 95
  }
  
  stop_ost() {
         echo "stop ost service on `facet_active_host ost`"
-       stop ost $@ > /dev/null || return 98 
+       stop ost $@ $CMDVERBOSE || return 98 
  }
  
  mount_client() {
         local MOUNTPATH=$1
         echo "mount lustre on ${MOUNTPATH}....."
-       zconf_mount $MOUNTPATH > /dev/null || return 96
+       zconf_mount `hostname`  $MOUNTPATH $CMDVERBOSE || return 96
  }
  
  umount_client() {
         local MOUNTPATH=$1
         echo "umount lustre on ${MOUNTPATH}....."
-       zconf_umount $MOUNTPATH > /dev/null || return 97
+       zconf_umount $MOUNTPATH $CMDVERBOSE || return 97
  }
  
  manual_umount_client(){
@@ -81,9 +92,15 @@ setup() {
  }
  
  cleanup() {
-       umount_client $MOUNT || return -200
-       stop_mds  || return -201
-       stop_ost || return -202
+       umount_client $MOUNT || return 200
+       stop_mds  || return 201
+       stop_ost || return 202
+       # catch case where these return just fine, but modules are still not unloaded
+       /sbin/lsmod | grep -q portals 
+       if [ 1 -ne $? ]; then
+               echo "modules still loaded..."
+               return 203
+       fi
  }
  
  check_mount() {
@@ -112,18 +129,18 @@ test_0() {
         start_mds       
         mount_client $MOUNT  
         check_mount || return 41
-       cleanup  
+       cleanup || return $?
  }
  run_test 0 "single mount setup"
  
  test_1() {
         start_ost
         echo "start ost second time..."
-       start ost --reformat $OSTLCONFARGS > /dev/null 
+       start ost --reformat $OSTLCONFARGS $CMDVERBOSE 
         start_mds       
         mount_client $MOUNT
         check_mount || return 42
-       cleanup 
+       cleanup || return $?
  }
  run_test 1 "start up ost twice"
  
@@ -131,11 +148,11 @@ test_2() {
         start_ost
         start_mds       
         echo "start mds second time.."
-       start mds --reformat $MDSLCONFARGS > /dev/null 
+       start mds --reformat $MDSLCONFARGS $CMDVERBOSE 
         
         mount_client $MOUNT  
         check_mount || return 43
-       cleanup 
+       cleanup || return $?
  }
  run_test 2 "start up mds twice"
  
@@ -146,7 +163,7 @@ test_3() {
         check_mount || return 44
         
         umount_client $MOUNT    
-       cleanup  
+       cleanup  || return $?
  }
  run_test 3 "mount client twice"
  
@@ -154,28 +171,39 @@ test_4() {
         setup
         touch $DIR/$tfile || return 85
         stop_ost ${FORCE}
-
-       # cleanup may return an error from the failed 
-       # disconnects; for now I'll consider this successful 
-       # if all the modules have unloaded.
-       if ! cleanup ; then
-           lsmod | grep -q portals && return 1
-        fi
+       cleanup 
+       eno=$?
+       # ok for ost to fail shutdown
+       if [ 202 -ne $eno ]; then
+               return $eno;
+       fi
         return 0
  }
  run_test 4 "force cleanup ost, then cleanup"
  
  test_5() {
         setup
-       touch $DIR/$tfile || return 86
-       stop_mds ${FORCE} || return 98
+       touch $DIR/$tfile || return 1
+       stop_mds ${FORCE} || return 2
  
         # cleanup may return an error from the failed 
         # disconnects; for now I'll consider this successful 
         # if all the modules have unloaded.
-       if ! cleanup ; then
-           lsmod | grep -q portals && return 1
-        fi
+       umount $MOUNT &
+       UMOUNT_PID=$!
+       sleep $TIMEOUT
+       echo "killing umount"
+       kill -TERM $UMOUNT_PID
+       wait $UMOUNT_PID 
+
+       # cleanup client modules
+       $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null 
+       
+       # stop_mds is a no-op here, and should not fail
+       stop_mds  || return 4
+       stop_ost || return 5
+
+       lsmod | grep -q portals && return 6
         return 0
  }
  run_test 5 "force cleanup mds, then cleanup"
@@ -185,14 +213,14 @@ test_6() {
         manual_umount_client
         mount_client ${MOUNT} || return 87
         touch $DIR/a || return 86
-       cleanup 
+       cleanup  || return $?
  }
  run_test 6 "manual umount, then mount again"
  
  test_7() {
         setup
         manual_umount_client
-       cleanup 
+       cleanup || return $?
  }
  run_test 7 "manual umount, then cleanup"
  
@@ -226,34 +254,55 @@ test_9() {
          start_ost
          start_mds
          mount_client $MOUNT
-        [ "`cat /proc/sys/portals/debug`" = "1" ] && \
-           echo "lmc --debug success" || return 1
-        [ "`cat /proc/sys/portals/subsystem_debug`" = "16777216" ] && \
-           echo "lmc --subsystem success" || return 1
+        CHECK_PTLDEBUG="`cat /proc/sys/portals/debug`"
+        if [ $CHECK_PTLDEBUG = "1" ]; then
+           echo "lmc --debug success"
+        else
+           echo "lmc --debug: want 1, have $CHECK_PTLDEBUG"
+           return 1
+        fi
+        CHECK_SUBSYSTEM="`cat /proc/sys/portals/subsystem_debug`"
+        if [ $CHECK_SUBSYSTEM = "2" ]; then
+           echo "lmc --subsystem success"
+        else
+           echo "lmc --subsystem: want 2, have $CHECK_SUBSYSTEM"
+           return 1
+        fi
          check_mount || return 41
-        cleanup
+        cleanup || return $?
  
          # the new PTLDEBUG/SUBSYSTEM used for lconf --ptldebug/subsystem
-        PTLDEBUG="inode"
-        SUBSYSTEM="mds"
+        PTLDEBUG="inode+trace"
+        SUBSYSTEM="mds+ost"
  
          # check lconf --ptldebug/subsystem overriding lmc --ptldebug/subsystem
          start_ost
          start_mds
+        CHECK_PTLDEBUG="`do_facet mds cat /proc/sys/portals/debug`"
+        if [ $CHECK_PTLDEBUG = "3" ]; then
+           echo "lconf --debug success"
+        else
+           echo "lconf --debug: want 3, have $CHECK_PTLDEBUG"
+           return 1
+        fi
+        CHECK_SUBSYSTEM="`do_facet mds cat /proc/sys/portals/subsystem_debug`"
+        if [ $CHECK_SUBSYSTEM = "20" ]; then
+           echo "lconf --subsystem success"
+        else
+           echo "lconf --subsystem: want 20, have $CHECK_SUBSYSTEM"
+           return 1
+        fi
          mount_client $MOUNT
-        [ "`cat /proc/sys/portals/debug`" = "2" ] && \
-           echo "lconf --debug overriding success" || return 1
-        [ "`cat /proc/sys/portals/subsystem_debug`" = "33554432" ] && \
-           echo "lconf --subsystem overriding success" || return 1
          check_mount || return 41
-        cleanup
+        cleanup || return $?
  
          # resume the old configuration
          PTLDEBUG=$OLDPTLDEBUG
          SUBSYSTEM=$OLDSUBSYSTEM
          gen_config
  }
-run_test 9 "test --ptldebug and --subsystem for lmc"
+
+run_test 9 "test --ptldebug and --subsystem for lmc and lconf"
  
  test_10() {
          OLDXMLCONFIG=$XMLCONFIG
@@ -293,4 +342,157 @@ test_11() {
  }
  run_test 11 "use default lov configuration (should return error)"
  
+test_12() {
+        OLDXMLCONFIG=$XMLCONFIG
+        XMLCONFIG="batch.xml"
+        BATCHFILE="batchfile"
+
+        # test double quote
+        [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG
+        [ -f "$BATCHFILE" ] && rm -f $BATCHFILE
+        echo "--add net --node  localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE
+        echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128\"" >> $BATCHFILE
+        # --mkfsoptions "-I 128"
+        do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $?
+        if [ `sed -n '/>-I 128</p' $XMLCONFIG | wc -l` -eq 1 ]; then
+                echo "matched double quote success"
+        else
+                echo "matched double quote fail"
+                return 1
+        fi 
+        rm -f $XMLCONFIG
+        rm -f $BATCHFILE
+        echo "--add net --node  localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE
+        echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128" >> $BATCHFILE
+        # --mkfsoptions "-I 128
+        do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $?
+        echo "unmatched double quote should return error"
+
+        # test single quote
+        rm -f $BATCHFILE
+        echo "--add net --node  localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE
+        echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128'" >> $BATCHFILE
+        # --mkfsoptions '-I 128'
+        do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $?
+        if [ `sed -n '/>-I 128</p' $XMLCONFIG | wc -l` -eq 1 ]; then
+                echo "matched single quote success"
+        else
+                echo "matched single quote fail"
+                return 1
+        fi
+        rm -f $XMLCONFIG
+        rm -f $BATCHFILE
+        echo "--add net --node  localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE
+        echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128" >> $BATCHFILE
+        # --mkfsoptions '-I 128
+        do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $?
+        echo "unmatched single quote should return error"
+
+        # test backslash
+        rm -f $BATCHFILE
+        echo "--add net --node  localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE
+        echo "--add mds --node localhost --mds mds1 --mkfsoptions \-\I\ \128" >> $BATCHFILE
+        # --mkfsoptions \-\I\ \128
+        do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $?
+        if [ `sed -n '/>-I 128</p' $XMLCONFIG | wc -l` -eq 1 ]; then
+                echo "backslash followed by a whitespace/letter success"
+        else
+                echo "backslash followed by a whitespace/letter fail"
+                return 1
+        fi
+        rm -f $XMLCONFIG
+        rm -f $BATCHFILE
+        echo "--add net --node  localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE
+        echo "--add mds --node localhost --mds mds1 --mkfsoptions -I\ 128\\" >> $BATCHFILE
+        # --mkfsoptions -I\ 128\
+        do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $?
+        echo "backslash followed by nothing should return error"
+
+        rm -f $BATCHFILE
+        XMLCONFIG=$OLDXMLCONFIG
+}
+run_test 12 "lmc --batch, with single/double quote, backslash in batchfile"
+
+test_13() {
+        OLDXMLCONFIG=$XMLCONFIG
+        XMLCONFIG="conf13-1.xml"
+        SECONDXMLCONFIG="conf13-2.xml"
+
+        # check long uuid will be truncated properly and uniquely
+        echo "To generate XML configuration file(with long ost name): $XMLCONFIG"
+        [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG
+        do_lmc --add net --node localhost --nid localhost.localdomain --nettype tcp
+        do_lmc --add mds --node localhost --mds mds1_name_longer_than_31characters
+        do_lmc --add mds --node localhost --mds mds2_name_longer_than_31characters
+        if [ ! -f "$XMLCONFIG" ]; then
+                echo "Error:no file $XMLCONFIG created!"
+                return 1
+        fi
+        EXPECTEDMDS1UUID="e_longer_than_31characters_UUID"
+        EXPECTEDMDS2UUID="longer_than_31characters_UUID_2"
+        FOUNDMDS1UUID=`awk -F"'" '/<mds uuid=/{print $2}' $XMLCONFIG | sed -n '1p'`
+        FOUNDMDS2UUID=`awk -F"'" '/<mds uuid=/{print $2}' $XMLCONFIG | sed -n '2p'`
+        if [ $EXPECTEDMDS1UUID != $FOUNDMDS1UUID ]; then
+                echo "Error:expected uuid for mds1: $EXPECTEDMDS1UUID; found: $FOUNDMDS1UUID"
+                return 1
+        fi
+        if [ $EXPECTEDMDS2UUID != $FOUNDMDS2UUID ]; then
+                echo "Error:expected uuid for mds2: $EXPECTEDMDS2UUID; found: $FOUNDMDS2UUID"
+                return 1
+        fi
+        echo "Success:long uuid truncated successfully and being unique."
+
+        # check multiple invocations for lmc generate same XML configuration file
+        rm -f $XMLCONFIG
+        echo "Generate the first XML configuration file"
+        gen_config
+        echo "mv $XMLCONFIG to $SECONDXMLCONFIG"
+        mv $XMLCONFIG $SECONDXMLCONFIG || return $?
+        echo "Generate the second XML configuration file"
+        gen_config
+        if [ `diff $XMLCONFIG $SECONDXMLCONFIG | wc -l` -eq 0 ]; then
+                echo "Success:multiple invocations for lmc generate same XML file"
+        else
+                echo "Error: multiple invocations for lmc generate different XML file"
+                return 1
+        fi
+
+        rm -f $XMLCONFIG
+        rm -f $SECONDXMLCONFIG
+        XMLCONFIG=$OLDXMLCONFIG
+}
+run_test 13 "check new_uuid of lmc operating correctly"
+
+test_14() {
+        rm -f $XMLCONFIG
+
+        # create xml file with --mkfsoptions for ost
+        echo "create xml file with --mkfsoptions for ost"
+        add_mds mds --dev $MDSDEV --size $MDSSIZE
+        add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
+            --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
+        add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE \
+            --mkfsoptions -V
+        add_client client mds --lov lov1 --path $MOUNT
+
+        FOUNDSTRING=`awk -F"<" '/<mkfsoptions>/{print $2}' $XMLCONFIG`
+        EXPECTEDSTRING="mkfsoptions>-V"
+        if [ $EXPECTEDSTRING != $FOUNDSTRING ]; then
+                echo "Error:expected string: $EXPECTEDSTRING; found: $FOUNDSTRING"
+                return 1
+        fi
+        echo "Success:mkfsoptions for ost written to xml file correctly."
+
+        # mount lustre to test lconf mkfsoptions-parsing
+        echo "mount lustre"
+        start_ost
+        start_mds
+        mount_client $MOUNT || return $?
+        cleanup
+        echo "lconf mkfsoptions-parsing for ost success"
+
+        gen_config
+}
+run_test 14 "test mkfsoptions of ost for lmc and lconf"
+
  equals_msg "Done"
diff --git a/lustre/tests/replay-single-upcall.sh b/lustre/tests/replay-single-upcall.sh

index 17e04c9..59c1371 100755 (executable)
--- a/lustre/tests/replay-single-upcall.sh
+++ b/lustre/tests/replay-single-upcall.sh
@@ -9,6 +9,10 @@ mkdir -p $TESTDIR/logs
  exec >> $TESTDIR/logs/recovery-`hostname`.log
  exec 2>&1
  
+echo ==========================================
+echo "start upcall: `date`"
+echo "command line: $0 $*"
+
  set -xv
  
  failed_import() {
diff --git a/lustre/tests/run-llog.sh b/lustre/tests/run-llog.sh

index 6a4ffc1..5d46e2b 100644 (file)
--- a/lustre/tests/run-llog.sh
+++ b/lustre/tests/run-llog.sh
@@ -1,6 +1,10 @@
  #!/bin/bash
  PATH=`dirname $0`:`dirname $0`/../utils:$PATH
  TMP=${TMP:-/tmp}
+
+MDS=`ls /proc/fs/lustre/mds | grep -v num_refs | head -1`
+[ -z "$MDS" ] && echo "no MDS available, skipping llog test" && exit 0
+
  insmod ../obdclass/llog_test.o || exit 1
  lctl modules > $TMP/ogdb-`hostname`
  echo "NOW reload debugging syms.."
@@ -9,7 +13,7 @@ RC=0
  lctl <<EOT || RC=2
  newdev
  attach llog_test llt_name llt_uuid
-setup mds1
+setup $MDS
  EOT
  
  # Using ignore_errors will allow lctl to cleanup even if the test fails.
author	phil <phil>
	Sun, 8 Feb 2004 20:12:10 +0000 (20:12 +0000)
committer	phil <phil>
	Sun, 8 Feb 2004 20:12:10 +0000 (20:12 +0000)
lustre/include/linux/lvfs_linux.h		patch \| blob \| history
lustre/kernel_patches/patches/ext3-extents-2.4.20.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.19-pre1.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.4.22-rh.patch		patch \| blob \| history
lustre/kernel_patches/targets/rh-2.4.target		patch \| blob \| history
lustre/lov/lov_log.c		patch \| blob \| history
lustre/lvfs/lvfs_linux.c		patch \| blob \| history
lustre/mds/mds_log.c		patch \| blob \| history
lustre/obdclass/llog.c		patch \| blob \| history
lustre/obdclass/llog_cat.c		patch \| blob \| history
lustre/obdclass/llog_obd.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history
lustre/ptlrpc/llog_client.c		patch \| blob \| history
lustre/ptlrpc/llog_net.c		patch \| blob \| history
lustre/scripts/lbuild		patch \| blob \| history
lustre/scripts/lmake		patch \| blob \| history
lustre/scripts/lustre-kernel-2.4.spec.in		patch \| blob \| history
lustre/tests/cfg/insanity-mdev.sh		patch \| blob \| history
lustre/tests/cfg/mdev.sh		patch \| blob \| history
lustre/tests/conf-sanity.sh		patch \| blob \| history
lustre/tests/replay-single-upcall.sh		patch \| blob \| history
lustre/tests/run-llog.sh		patch \| blob \| history