- landed b_hd_cray_merge3

author yury <yury>

Mon, 4 Apr 2005 13:13:25 +0000 (13:13 +0000)

committer yury <yury>

Mon, 4 Apr 2005 13:13:25 +0000 (13:13 +0000)
author yury <yury>
Mon, 4 Apr 2005 13:13:25 +0000 (13:13 +0000)
committer yury <yury>
Mon, 4 Apr 2005 13:13:25 +0000 (13:13 +0000)
diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch

new file mode 100644 (file)

index 0000000..cad7b54
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch
@@ -0,0 +1,2844 @@
+%patch
+Index: linux-2.6.5-sles9/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/extents.c   2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/extents.c        2004-11-09 02:25:56.143726112 +0300
+@@ -0,0 +1,2313 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
++ */
++
++/*
++ * Extents support for EXT3
++ *
++ * TODO:
++ *   - ext3_ext_walk_space() sould not use ext3_ext_find_extent()
++ *   - ext3_ext_calc_credits() could take 'mergable' into account
++ *   - ext3*_error() should be used in some situations
++ *   - find_goal() [to be tested and improved]
++ *   - smart tree reduction
++ *   - arch-independence
++ *     common on-disk format for big/little-endian arch
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/time.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/smp_lock.h>
++#include <linux/highuid.h>
++#include <linux/pagemap.h>
++#include <linux/quotaops.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ext3_extents.h>
++#include <asm/uaccess.h>
++
++static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
++{
++      int err;
++
++      if (handle->h_buffer_credits > needed)
++              return handle;
++      if (!ext3_journal_extend(handle, needed))
++              return handle;
++      err = ext3_journal_restart(handle, needed);
++      
++      return handle;
++}
++
++static int inline
++ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree)
++{
++      if (tree->ops->get_write_access)
++              return tree->ops->get_write_access(h,tree->buffer);
++      else
++              return 0;
++}
++
++static int inline
++ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree)
++{
++      if (tree->ops->mark_buffer_dirty)
++              return tree->ops->mark_buffer_dirty(h,tree->buffer);
++      else
++              return 0;
++}
++
++/*
++ * could return:
++ *  - EROFS
++ *  - ENOMEM
++ */
++static int ext3_ext_get_access(handle_t *handle,
++                              struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++      int err;
++
++      if (path->p_bh) {
++              /* path points to block */
++              err = ext3_journal_get_write_access(handle, path->p_bh);
++      } else {
++              /* path points to leaf/index in inode body */
++              err = ext3_ext_get_access_for_root(handle, tree);
++      }
++      return err;
++}
++
++/*
++ * could return:
++ *  - EROFS
++ *  - ENOMEM
++ *  - EIO
++ */
++static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++      int err;
++      if (path->p_bh) {
++              /* path points to block */
++              err =ext3_journal_dirty_metadata(handle, path->p_bh);
++      } else {
++              /* path points to leaf/index in inode body */
++              err = ext3_ext_mark_root_dirty(handle, tree);
++      }
++      return err;
++}
++
++static int inline
++ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, struct ext3_extent *ex,
++                      int *err)
++{
++      int goal, depth, newblock;
++      struct inode *inode;
++
++      EXT_ASSERT(tree);
++      if (tree->ops->new_block)
++              return tree->ops->new_block(handle, tree, path, ex, err);
++
++      inode = tree->inode;
++      depth = EXT_DEPTH(tree);
++      if (path && depth > 0) {
++              goal = path[depth-1].p_block;
++      } else {
++              struct ext3_inode_info *ei = EXT3_I(inode);
++              unsigned long bg_start;
++              unsigned long colour;
++
++              bg_start = (ei->i_block_group *
++                              EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++                      le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++              colour = (current->pid % 16) *
++                      (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++              goal = bg_start + colour;
++      }
++
++      newblock = ext3_new_block(handle, inode, goal, err);
++      return newblock;
++}
++
++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
++{
++      struct ext3_extent_header *neh;
++      neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation++;
++}
++
++static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
++{
++      int size;
++
++      size = (tree->inode->i_sb->s_blocksize -
++                      sizeof(struct ext3_extent_header))
++                              / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++      size = 6;
++#endif
++      return size;
++}
++
++static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree)
++{
++      int size;
++
++      size = (tree->inode->i_sb->s_blocksize -
++                      sizeof(struct ext3_extent_header))
++                              / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++      size = 5;
++#endif
++      return size;
++}
++
++static inline int ext3_ext_space_root(struct ext3_extents_tree *tree)
++{
++      int size;
++
++      size = (tree->buffer_len - sizeof(struct ext3_extent_header))
++                      / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++      size = 3;
++#endif
++      return size;
++}
++
++static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree)
++{
++      int size;
++
++      size = (tree->buffer_len -
++                      sizeof(struct ext3_extent_header))
++                      / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++      size = 4;
++#endif
++      return size;
++}
++
++static void ext3_ext_show_path(struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++#ifdef EXT_DEBUG
++      int k, l = path->p_depth;
++
++      ext_debug(tree, "path:");
++      for (k = 0; k <= l; k++, path++) {
++              if (path->p_idx) {
++                      ext_debug(tree, "  %d->%d", path->p_idx->ei_block,
++                                      path->p_idx->ei_leaf);
++              } else if (path->p_ext) {
++                      ext_debug(tree, "  %d:%d:%d",
++                                      path->p_ext->ee_block,
++                                      path->p_ext->ee_len,
++                                      path->p_ext->ee_start);
++              } else
++                      ext_debug(tree, "  []");
++      }
++      ext_debug(tree, "\n");
++#endif
++}
++
++static void ext3_ext_show_leaf(struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++#ifdef EXT_DEBUG
++      int depth = EXT_DEPTH(tree);
++      struct ext3_extent_header *eh;
++      struct ext3_extent *ex;
++      int i;
++
++      if (!path)
++              return;
++
++      eh = path[depth].p_hdr;
++      ex = EXT_FIRST_EXTENT(eh);
++
++      for (i = 0; i < eh->eh_entries; i++, ex++) {
++              ext_debug(tree, "%d:%d:%d ",
++                              ex->ee_block, ex->ee_len, ex->ee_start);
++      }
++      ext_debug(tree, "\n");
++#endif
++}
++
++static void ext3_ext_drop_refs(struct ext3_ext_path *path)
++{
++      int depth = path->p_depth;
++      int i;
++
++      for (i = 0; i <= depth; i++, path++)
++              if (path->p_bh) {
++                      brelse(path->p_bh);
++                      path->p_bh = NULL;
++              }
++}
++
++/*
++ * binary search for closest index by given block
++ */
++static inline void
++ext3_ext_binsearch_idx(struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, int block)
++{
++      struct ext3_extent_header *eh = path->p_hdr;
++      struct ext3_extent_idx *ix;
++      int l = 0, k, r;
++
++      EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++      EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++      EXT_ASSERT(eh->eh_entries > 0);
++
++      ext_debug(tree, "binsearch for %d(idx):  ", block);
++
++      path->p_idx = ix = EXT_FIRST_INDEX(eh);
++
++      r = k = eh->eh_entries;
++      while (k > 1) {
++              k = (r - l) / 2;
++              if (block < ix[l + k].ei_block)
++                      r -= k;
++              else
++                      l += k;
++              ext_debug(tree, "%d:%d:%d ", k, l, r);
++      }
++
++      ix += l;
++      path->p_idx = ix;
++      ext_debug(tree, "  -> %d->%d ", path->p_idx->ei_block, path->p_idx->ei_leaf);
++
++      while (l++ < r) {
++              if (block < ix->ei_block) 
++                      break;
++              path->p_idx = ix++;
++      }
++      ext_debug(tree, "  -> %d->%d\n", path->p_idx->ei_block,
++                      path->p_idx->ei_leaf);
++
++#ifdef CHECK_BINSEARCH 
++      {
++              struct ext3_extent_idx *chix;
++
++              chix = ix = EXT_FIRST_INDEX(eh);
++              for (k = 0; k < eh->eh_entries; k++, ix++) {
++                      if (k != 0 && ix->ei_block <= ix[-1].ei_block) {
++                              printk("k=%d, ix=0x%p, first=0x%p\n", k,
++                                      ix, EXT_FIRST_INDEX(eh));
++                              printk("%u <= %u\n",
++                                      ix->ei_block,ix[-1].ei_block);
++                      }
++                      EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block);
++                      if (block < ix->ei_block) 
++                              break;
++                      chix = ix;
++              }
++              EXT_ASSERT(chix == path->p_idx);
++      }
++#endif
++
++}
++
++/*
++ * binary search for closest extent by given block
++ */
++static inline void
++ext3_ext_binsearch(struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, int block)
++{
++      struct ext3_extent_header *eh = path->p_hdr;
++      struct ext3_extent *ex;
++      int l = 0, k, r;
++
++      EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++      EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++
++      if (eh->eh_entries == 0) {
++              /*
++               * this leaf is empty yet:
++               *  we get such a leaf in split/add case
++               */
++              return;
++      }
++      
++      ext_debug(tree, "binsearch for %d:  ", block);
++
++      path->p_ext = ex = EXT_FIRST_EXTENT(eh);
++
++      r = k = eh->eh_entries;
++      while (k > 1) {
++              k = (r - l) / 2;
++              if (block < ex[l + k].ee_block)
++                      r -= k;
++              else
++                      l += k;
++              ext_debug(tree, "%d:%d:%d ", k, l, r);
++      }
++
++      ex += l;
++      path->p_ext = ex;
++      ext_debug(tree, "  -> %d:%d:%d ", path->p_ext->ee_block,
++                      path->p_ext->ee_start, path->p_ext->ee_len);
++
++      while (l++ < r) {
++              if (block < ex->ee_block) 
++                      break;
++              path->p_ext = ex++;
++      }
++      ext_debug(tree, "  -> %d:%d:%d\n", path->p_ext->ee_block,
++                      path->p_ext->ee_start, path->p_ext->ee_len);
++
++#ifdef CHECK_BINSEARCH 
++      {
++              struct ext3_extent *chex;
++
++              chex = ex = EXT_FIRST_EXTENT(eh);
++              for (k = 0; k < eh->eh_entries; k++, ex++) {
++                      EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block);
++                      if (block < ex->ee_block) 
++                              break;
++                      chex = ex;
++              }
++              EXT_ASSERT(chex == path->p_ext);
++      }
++#endif
++
++}
++
++int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree)
++{
++      struct ext3_extent_header *eh;
++
++      BUG_ON(tree->buffer_len == 0);
++      ext3_ext_get_access_for_root(handle, tree);
++      eh = EXT_ROOT_HDR(tree);
++      eh->eh_depth = 0;
++      eh->eh_entries = 0;
++      eh->eh_magic = EXT3_EXT_MAGIC;
++      eh->eh_max = ext3_ext_space_root(tree);
++      ext3_ext_mark_root_dirty(handle, tree);
++      ext3_ext_invalidate_cache(tree);
++      return 0;
++}
++
++struct ext3_ext_path *
++ext3_ext_find_extent(struct ext3_extents_tree *tree, int block,
++                      struct ext3_ext_path *path)
++{
++      struct ext3_extent_header *eh;
++      struct buffer_head *bh;
++      int depth, i, ppos = 0;
++
++      EXT_ASSERT(tree);
++      EXT_ASSERT(tree->inode);
++      EXT_ASSERT(tree->root);
++
++      eh = EXT_ROOT_HDR(tree);
++      EXT_ASSERT(eh);
++      i = depth = EXT_DEPTH(tree);
++      EXT_ASSERT(eh->eh_max);
++      EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++      EXT_ASSERT(i == 0 || eh->eh_entries > 0);
++      
++      /* account possible depth increase */
++      if (!path) {
++              path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2),
++                              GFP_NOFS);
++              if (!path)
++                      return ERR_PTR(-ENOMEM);
++      }
++      memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++      path[0].p_hdr = eh;
++
++      /* walk through the tree */
++      while (i) {
++              ext_debug(tree, "depth %d: num %d, max %d\n",
++                              ppos, eh->eh_entries, eh->eh_max);
++              ext3_ext_binsearch_idx(tree, path + ppos, block);
++              path[ppos].p_block = path[ppos].p_idx->ei_leaf;
++              path[ppos].p_depth = i;
++              path[ppos].p_ext = NULL;
++
++              bh = sb_bread(tree->inode->i_sb, path[ppos].p_block);
++              if (!bh) {
++                      ext3_ext_drop_refs(path);
++                      kfree(path);
++                      return ERR_PTR(-EIO);
++              }
++              eh = EXT_BLOCK_HDR(bh);
++              ppos++;
++              EXT_ASSERT(ppos <= depth);
++              path[ppos].p_bh = bh;
++              path[ppos].p_hdr = eh;
++              i--;
++      }
++
++      path[ppos].p_depth = i;
++      path[ppos].p_hdr = eh;
++      path[ppos].p_ext = NULL;
++
++      /* find extent */
++      ext3_ext_binsearch(tree, path + ppos, block);
++
++      ext3_ext_show_path(tree, path);
++
++      return path;
++}
++
++/*
++ * insert new index [logical;ptr] into the block at cupr
++ * it check where to insert: before curp or after curp
++ */
++static int ext3_ext_insert_index(handle_t *handle,
++                              struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *curp,
++                              int logical, int ptr)
++{
++      struct ext3_extent_idx *ix;
++      int len, err;
++
++      if ((err = ext3_ext_get_access(handle, tree, curp)))
++              return err;
++
++      EXT_ASSERT(logical != curp->p_idx->ei_block);
++      len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
++      if (logical > curp->p_idx->ei_block) {
++              /* insert after */
++              if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
++                      len = (len - 1) * sizeof(struct ext3_extent_idx);
++                      len = len < 0 ? 0 : len;
++                      ext_debug(tree, "insert new index %d after: %d. "
++                                      "move %d from 0x%p to 0x%p\n",
++                                      logical, ptr, len,
++                                      (curp->p_idx + 1), (curp->p_idx + 2));
++                      memmove(curp->p_idx + 2, curp->p_idx + 1, len);
++              }
++              ix = curp->p_idx + 1;
++      } else {
++              /* insert before */
++              len = len * sizeof(struct ext3_extent_idx);
++              len = len < 0 ? 0 : len;
++              ext_debug(tree, "insert new index %d before: %d. "
++                              "move %d from 0x%p to 0x%p\n",
++                              logical, ptr, len,
++                              curp->p_idx, (curp->p_idx + 1));
++              memmove(curp->p_idx + 1, curp->p_idx, len);
++              ix = curp->p_idx;
++      }
++
++      ix->ei_block = logical;
++      ix->ei_leaf = ptr;
++      curp->p_hdr->eh_entries++;
++
++      EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max);
++      EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr));
++
++      err = ext3_ext_dirty(handle, tree, curp);
++      ext3_std_error(tree->inode->i_sb, err);
++
++      return err;
++}
++
++/*
++ * routine inserts new subtree into the path, using free index entry
++ * at depth 'at:
++ *  - allocates all needed blocks (new leaf and all intermediate index blocks)
++ *  - makes decision where to split
++ *  - moves remaining extens and index entries (right to the split point)
++ *    into the newly allocated blocks
++ *  - initialize subtree
++ */
++static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path,
++                              struct ext3_extent *newext, int at)
++{
++      struct buffer_head *bh = NULL;
++      int depth = EXT_DEPTH(tree);
++      struct ext3_extent_header *neh;
++      struct ext3_extent_idx *fidx;
++      struct ext3_extent *ex;
++      int i = at, k, m, a;
++      unsigned long newblock, oldblock, border;
++      int *ablocks = NULL; /* array of allocated blocks */
++      int err = 0;
++
++      /* make decision: where to split? */
++      /* FIXME: now desicion is simplest: at current extent */
++
++      /* if current leaf will be splitted, then we should use 
++       * border from split point */
++      EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr));
++      if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
++              border = path[depth].p_ext[1].ee_block;
++              ext_debug(tree, "leaf will be splitted."
++                              " next leaf starts at %d\n",
++                              (int)border);
++      } else {
++              border = newext->ee_block;
++              ext_debug(tree, "leaf will be added."
++                              " next leaf starts at %d\n",
++                              (int)border);
++      }
++
++      /* 
++       * if error occurs, then we break processing
++       * and turn filesystem read-only. so, index won't
++       * be inserted and tree will be in consistent
++       * state. next mount will repair buffers too
++       */
++
++      /*
++       * get array to track all allocated blocks
++       * we need this to handle errors and free blocks
++       * upon them
++       */
++      ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
++      if (!ablocks)
++              return -ENOMEM;
++      memset(ablocks, 0, sizeof(unsigned long) * depth);
++
++      /* allocate all needed blocks */
++      ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at);
++      for (a = 0; a < depth - at; a++) {
++              newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
++              if (newblock == 0)
++                      goto cleanup;
++              ablocks[a] = newblock;
++      }
++
++      /* initialize new leaf */
++      newblock = ablocks[--a];
++      EXT_ASSERT(newblock);
++      bh = sb_getblk(tree->inode->i_sb, newblock);
++      if (!bh) {
++              err = -EIO;
++              goto cleanup;
++      }
++      lock_buffer(bh);
++
++      if ((err = ext3_journal_get_create_access(handle, bh)))
++              goto cleanup;
++
++      neh = EXT_BLOCK_HDR(bh);
++      neh->eh_entries = 0;
++      neh->eh_max = ext3_ext_space_block(tree);
++      neh->eh_magic = EXT3_EXT_MAGIC;
++      neh->eh_depth = 0;
++      ex = EXT_FIRST_EXTENT(neh);
++
++      /* move remain of path[depth] to the new leaf */
++      EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max);
++      /* start copy from next extent */
++      /* TODO: we could do it by single memmove */
++      m = 0;
++      path[depth].p_ext++;
++      while (path[depth].p_ext <=
++                      EXT_MAX_EXTENT(path[depth].p_hdr)) {
++              ext_debug(tree, "move %d:%d:%d in new leaf %lu\n",
++                              path[depth].p_ext->ee_block,
++                              path[depth].p_ext->ee_start,
++                              path[depth].p_ext->ee_len,
++                              newblock);
++              memmove(ex++, path[depth].p_ext++,
++                              sizeof(struct ext3_extent));
++              neh->eh_entries++;
++              m++;
++      }
++      set_buffer_uptodate(bh);
++      unlock_buffer(bh);
++
++      if ((err = ext3_journal_dirty_metadata(handle, bh)))
++              goto cleanup;   
++      brelse(bh);
++      bh = NULL;
++
++      /* correct old leaf */
++      if (m) {
++              if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++                      goto cleanup;
++              path[depth].p_hdr->eh_entries -= m;
++              if ((err = ext3_ext_dirty(handle, tree, path + depth)))
++                      goto cleanup;
++              
++      }
++
++      /* create intermediate indexes */
++      k = depth - at - 1;
++      EXT_ASSERT(k >= 0);
++      if (k)
++              ext_debug(tree, "create %d intermediate indices\n", k);
++      /* insert new index into current index block */
++      /* current depth stored in i var */
++      i = depth - 1;
++      while (k--) {
++              oldblock = newblock;
++              newblock = ablocks[--a];
++              bh = sb_getblk(tree->inode->i_sb, newblock);
++              if (!bh) {
++                      err = -EIO;
++                      goto cleanup;
++              }
++              lock_buffer(bh);
++
++              if ((err = ext3_journal_get_create_access(handle, bh)))
++                      goto cleanup;
++
++              neh = EXT_BLOCK_HDR(bh);
++              neh->eh_entries = 1;
++              neh->eh_magic = EXT3_EXT_MAGIC;
++              neh->eh_max = ext3_ext_space_block_idx(tree);
++              neh->eh_depth = depth - i; 
++              fidx = EXT_FIRST_INDEX(neh);
++              fidx->ei_block = border;
++              fidx->ei_leaf = oldblock;
++
++              ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n",
++                              i, newblock, border, oldblock);
++              /* copy indexes */
++              m = 0;
++              path[i].p_idx++;
++
++              ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx,
++                              EXT_MAX_INDEX(path[i].p_hdr));
++              EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
++                              EXT_LAST_INDEX(path[i].p_hdr));
++              while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
++                      ext_debug(tree, "%d: move %d:%d in new index %lu\n",
++                                      i, path[i].p_idx->ei_block,
++                                      path[i].p_idx->ei_leaf, newblock);
++                      memmove(++fidx, path[i].p_idx++,
++                                      sizeof(struct ext3_extent_idx));
++                      neh->eh_entries++;
++                      EXT_ASSERT(neh->eh_entries <= neh->eh_max);
++                      m++;
++              }
++              set_buffer_uptodate(bh);
++              unlock_buffer(bh);
++
++              if ((err = ext3_journal_dirty_metadata(handle, bh)))
++                      goto cleanup;
++              brelse(bh);
++              bh = NULL;
++
++              /* correct old index */
++              if (m) {
++                      err = ext3_ext_get_access(handle, tree, path + i);
++                      if (err)
++                              goto cleanup;
++                      path[i].p_hdr->eh_entries -= m;
++                      err = ext3_ext_dirty(handle, tree, path + i);
++                      if (err)
++                              goto cleanup;
++              }
++
++              i--;
++      }
++
++      /* insert new index */
++      if (!err)
++              err = ext3_ext_insert_index(handle, tree, path + at,
++                                              border, newblock);
++
++cleanup:
++      if (bh) {
++              if (buffer_locked(bh))
++                      unlock_buffer(bh);
++              brelse(bh);
++      }
++
++      if (err) {
++              /* free all allocated blocks in error case */
++              for (i = 0; i < depth; i++) {
++                      if (!ablocks[i])
++                              continue;
++                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++              }
++      }
++      kfree(ablocks);
++
++      return err;
++}
++
++/*
++ * routine implements tree growing procedure:
++ *  - allocates new block
++ *  - moves top-level data (index block or leaf) into the new block
++ *  - initialize new top-level, creating index that points to the
++ *    just created block
++ */
++static int ext3_ext_grow_indepth(handle_t *handle,
++                                      struct ext3_extents_tree *tree,
++                                      struct ext3_ext_path *path,
++                                      struct ext3_extent *newext)
++{
++      struct ext3_ext_path *curp = path;
++      struct ext3_extent_header *neh;
++      struct ext3_extent_idx *fidx;
++      struct buffer_head *bh;
++      unsigned long newblock;
++      int err = 0;
++
++      newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
++      if (newblock == 0)
++              return err;
++
++      bh = sb_getblk(tree->inode->i_sb, newblock);
++      if (!bh) {
++              err = -EIO;
++              ext3_std_error(tree->inode->i_sb, err);
++              return err;
++      }
++      lock_buffer(bh);
++
++      if ((err = ext3_journal_get_create_access(handle, bh))) {
++              unlock_buffer(bh);
++              goto out;       
++      }
++
++      /* move top-level index/leaf into new block */
++      memmove(bh->b_data, curp->p_hdr, tree->buffer_len);
++
++      /* set size of new block */
++      neh = EXT_BLOCK_HDR(bh);
++      /* old root could have indexes or leaves
++       * so calculate e_max right way */
++      if (EXT_DEPTH(tree))
++              neh->eh_max = ext3_ext_space_block_idx(tree);
++      else
++              neh->eh_max = ext3_ext_space_block(tree);
++      neh->eh_magic = EXT3_EXT_MAGIC;
++      set_buffer_uptodate(bh);
++      unlock_buffer(bh);
++
++      if ((err = ext3_journal_dirty_metadata(handle, bh)))
++              goto out;
++
++      /* create index in new top-level index: num,max,pointer */
++      if ((err = ext3_ext_get_access(handle, tree, curp)))
++              goto out;
++
++      curp->p_hdr->eh_magic = EXT3_EXT_MAGIC;
++      curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree);
++      curp->p_hdr->eh_entries = 1;
++      curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
++      /* FIXME: it works, but actually path[0] can be index */
++      curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
++      curp->p_idx->ei_leaf = newblock;
++
++      neh = EXT_ROOT_HDR(tree);
++      fidx = EXT_FIRST_INDEX(neh);
++      ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n",
++                      neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); 
++
++      neh->eh_depth = path->p_depth + 1;
++      err = ext3_ext_dirty(handle, tree, curp);
++out:
++      brelse(bh);
++
++      return err;
++}
++
++/*
++ * routine finds empty index and adds new leaf. if no free index found
++ * then it requests in-depth growing
++ */
++static int ext3_ext_create_new_leaf(handle_t *handle,
++                                      struct ext3_extents_tree *tree,
++                                      struct ext3_ext_path *path,
++                                      struct ext3_extent *newext)
++{
++      struct ext3_ext_path *curp;
++      int depth, i, err = 0;
++
++repeat:
++      i = depth = EXT_DEPTH(tree);
++      
++      /* walk up to the tree and look for free index entry */
++      curp = path + depth;
++      while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
++              i--;
++              curp--;
++      }
++
++      /* we use already allocated block for index block
++       * so, subsequent data blocks should be contigoues */
++      if (EXT_HAS_FREE_INDEX(curp)) {
++              /* if we found index with free entry, then use that
++               * entry: create all needed subtree and add new leaf */
++              err = ext3_ext_split(handle, tree, path, newext, i);
++
++              /* refill path */
++              ext3_ext_drop_refs(path);
++              path = ext3_ext_find_extent(tree, newext->ee_block, path);
++              if (IS_ERR(path))
++                      err = PTR_ERR(path);
++      } else {
++              /* tree is full, time to grow in depth */
++              err = ext3_ext_grow_indepth(handle, tree, path, newext);
++
++              /* refill path */
++              ext3_ext_drop_refs(path);
++              path = ext3_ext_find_extent(tree, newext->ee_block, path);
++              if (IS_ERR(path))
++                      err = PTR_ERR(path);
++
++              /*
++               * only first (depth 0 -> 1) produces free space
++               * in all other cases we have to split growed tree
++               */
++              depth = EXT_DEPTH(tree);
++              if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
++                      /* now we need split */
++                      goto repeat;
++              }
++      }
++
++      if (err)
++              return err;
++
++      return 0;
++}
++
++/*
++ * returns allocated block in subsequent extent or EXT_MAX_BLOCK
++ * NOTE: it consider block number from index entry as
++ * allocated block. thus, index entries have to be consistent
++ * with leafs
++ */
++static unsigned long
++ext3_ext_next_allocated_block(struct ext3_ext_path *path)
++{
++      int depth;
++
++      EXT_ASSERT(path != NULL);
++      depth = path->p_depth;
++
++      if (depth == 0 && path->p_ext == NULL)
++              return EXT_MAX_BLOCK;
++
++      /* FIXME: what if index isn't full ?! */
++      while (depth >= 0) {
++              if (depth == path->p_depth) {
++                      /* leaf */
++                      if (path[depth].p_ext !=
++                                      EXT_LAST_EXTENT(path[depth].p_hdr))
++                              return path[depth].p_ext[1].ee_block;
++              } else {
++                      /* index */
++                      if (path[depth].p_idx !=
++                                      EXT_LAST_INDEX(path[depth].p_hdr))
++                              return path[depth].p_idx[1].ei_block;
++              }
++              depth--;        
++      }
++
++      return EXT_MAX_BLOCK;
++}
++
++/*
++ * returns first allocated block from next leaf or EXT_MAX_BLOCK
++ */
++static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree,
++                                               struct ext3_ext_path *path)
++{
++      int depth;
++
++      EXT_ASSERT(path != NULL);
++      depth = path->p_depth;
++
++      /* zero-tree has no leaf blocks at all */
++      if (depth == 0)
++              return EXT_MAX_BLOCK;
++
++      /* go to index block */
++      depth--;
++      
++      while (depth >= 0) {
++              if (path[depth].p_idx !=
++                              EXT_LAST_INDEX(path[depth].p_hdr))
++                      return path[depth].p_idx[1].ei_block;
++              depth--;        
++      }
++
++      return EXT_MAX_BLOCK;
++}
++
++/*
++ * if leaf gets modified and modified extent is first in the leaf
++ * then we have to correct all indexes above
++ * TODO: do we need to correct tree in all cases?
++ */
++int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++      struct ext3_extent_header *eh;
++      int depth = EXT_DEPTH(tree);    
++      struct ext3_extent *ex;
++      unsigned long border;
++      int k, err = 0;
++      
++      eh = path[depth].p_hdr;
++      ex = path[depth].p_ext;
++      EXT_ASSERT(ex);
++      EXT_ASSERT(eh);
++      
++      if (depth == 0) {
++              /* there is no tree at all */
++              return 0;
++      }
++      
++      if (ex != EXT_FIRST_EXTENT(eh)) {
++              /* we correct tree if first leaf got modified only */
++              return 0;
++      }
++      
++      /*
++       * TODO: we need correction if border is smaller then current one
++       */
++      k = depth - 1;
++      border = path[depth].p_ext->ee_block;
++      if ((err = ext3_ext_get_access(handle, tree, path + k)))
++              return err;
++      path[k].p_idx->ei_block = border;
++      if ((err = ext3_ext_dirty(handle, tree, path + k)))
++              return err;
++
++      while (k--) {
++              /* change all left-side indexes */
++              if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
++                      break;
++              if ((err = ext3_ext_get_access(handle, tree, path + k)))
++                      break;
++              path[k].p_idx->ei_block = border;
++              if ((err = ext3_ext_dirty(handle, tree, path + k)))
++                      break;
++      }
++
++      return err;
++}
++
++static int inline
++ext3_can_extents_be_merged(struct ext3_extents_tree *tree,
++                              struct ext3_extent *ex1,
++                              struct ext3_extent *ex2)
++{
++      if (ex1->ee_block + ex1->ee_len != ex2->ee_block)
++              return 0;
++
++#ifdef AGRESSIVE_TEST
++      if (ex1->ee_len >= 4)
++              return 0;
++#endif
++
++      if (!tree->ops->mergable)
++              return 1;
++
++      return tree->ops->mergable(ex1, ex2);
++}
++
++/*
++ * this routine tries to merge requsted extent into the existing
++ * extent or inserts requested extent as new one into the tree,
++ * creating new leaf in no-space case
++ */
++int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path,
++                              struct ext3_extent *newext)
++{
++      struct ext3_extent_header * eh;
++      struct ext3_extent *ex, *fex;
++      struct ext3_extent *nearex; /* nearest extent */
++      struct ext3_ext_path *npath = NULL;
++      int depth, len, err, next;
++
++      EXT_ASSERT(newext->ee_len > 0);
++      EXT_ASSERT(newext->ee_len < EXT_CACHE_MARK);
++      depth = EXT_DEPTH(tree);
++      ex = path[depth].p_ext;
++      EXT_ASSERT(path[depth].p_hdr);
++
++      /* try to insert block into found extent and return */
++      if (ex && ext3_can_extents_be_merged(tree, ex, newext)) {
++              ext_debug(tree, "append %d block to %d:%d (from %d)\n",
++                              newext->ee_len, ex->ee_block, ex->ee_len,
++                              ex->ee_start);
++              if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++                      return err;
++              ex->ee_len += newext->ee_len;
++              eh = path[depth].p_hdr;
++              nearex = ex;
++              goto merge;
++      }
++
++repeat:
++      depth = EXT_DEPTH(tree);
++      eh = path[depth].p_hdr;
++      if (eh->eh_entries < eh->eh_max)
++              goto has_space;
++
++      /* probably next leaf has space for us? */
++      fex = EXT_LAST_EXTENT(eh);
++      next = ext3_ext_next_leaf_block(tree, path);
++      if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) {
++              ext_debug(tree, "next leaf block - %d\n", next);
++              EXT_ASSERT(!npath);
++              npath = ext3_ext_find_extent(tree, next, NULL);
++              if (IS_ERR(npath))
++                      return PTR_ERR(npath);
++              EXT_ASSERT(npath->p_depth == path->p_depth);
++              eh = npath[depth].p_hdr;
++              if (eh->eh_entries < eh->eh_max) {
++                      ext_debug(tree, "next leaf isnt full(%d)\n",
++                                      eh->eh_entries);
++                      path = npath;
++                      goto repeat;
++              }
++              ext_debug(tree, "next leaf hasno free space(%d,%d)\n",
++                              eh->eh_entries, eh->eh_max);
++      }
++
++      /*
++       * there is no free space in found leaf
++       * we're gonna add new leaf in the tree
++       */
++      err = ext3_ext_create_new_leaf(handle, tree, path, newext);
++      if (err)
++              goto cleanup;
++      depth = EXT_DEPTH(tree);
++      eh = path[depth].p_hdr;
++
++has_space:
++      nearex = path[depth].p_ext;
++
++      if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++              goto cleanup;
++
++      if (!nearex) {
++              /* there is no extent in this leaf, create first one */
++              ext_debug(tree, "first extent in the leaf: %d:%d:%d\n",
++                              newext->ee_block, newext->ee_start,
++                              newext->ee_len);
++              path[depth].p_ext = EXT_FIRST_EXTENT(eh);
++      } else if (newext->ee_block > nearex->ee_block) {
++              EXT_ASSERT(newext->ee_block != nearex->ee_block);
++              if (nearex != EXT_LAST_EXTENT(eh)) {
++                      len = EXT_MAX_EXTENT(eh) - nearex;
++                      len = (len - 1) * sizeof(struct ext3_extent);
++                      len = len < 0 ? 0 : len;
++                      ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, "
++                                      "move %d from 0x%p to 0x%p\n",
++                                      newext->ee_block, newext->ee_start,
++                                      newext->ee_len,
++                                      nearex, len, nearex + 1, nearex + 2);
++                      memmove(nearex + 2, nearex + 1, len);
++              }
++              path[depth].p_ext = nearex + 1;
++      } else {
++              EXT_ASSERT(newext->ee_block != nearex->ee_block);
++              len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent);
++              len = len < 0 ? 0 : len;
++              ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, "
++                              "move %d from 0x%p to 0x%p\n",
++                              newext->ee_block, newext->ee_start, newext->ee_len,
++                              nearex, len, nearex + 1, nearex + 2);
++              memmove(nearex + 1, nearex, len);
++              path[depth].p_ext = nearex;
++      }
++
++      eh->eh_entries++;
++      nearex = path[depth].p_ext;
++      nearex->ee_block = newext->ee_block;
++      nearex->ee_start = newext->ee_start;
++      nearex->ee_len = newext->ee_len;
++      /* FIXME: support for large fs */
++      nearex->ee_start_hi = 0;
++
++merge:
++      /* try to merge extents to the right */
++      while (nearex < EXT_LAST_EXTENT(eh)) {
++              if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1))
++                      break;
++              /* merge with next extent! */
++              nearex->ee_len += nearex[1].ee_len;
++              if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
++                      len = (EXT_LAST_EXTENT(eh) - nearex - 1)
++                                      * sizeof(struct ext3_extent);
++                      memmove(nearex + 1, nearex + 2, len);
++              }
++              eh->eh_entries--;
++              EXT_ASSERT(eh->eh_entries > 0);
++      }
++
++      /* try to merge extents to the left */
++
++      /* time to correct all indexes above */
++      err = ext3_ext_correct_indexes(handle, tree, path);
++      if (err)
++              goto cleanup;
++
++      err = ext3_ext_dirty(handle, tree, path + depth);
++
++cleanup:
++      if (npath) {
++              ext3_ext_drop_refs(npath);
++              kfree(npath);
++      }
++      ext3_ext_tree_changed(tree);
++      ext3_ext_invalidate_cache(tree);
++      return err;
++}
++
++int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block,
++                      unsigned long num, ext_prepare_callback func)
++{
++      struct ext3_ext_path *path = NULL;
++      struct ext3_extent *ex, cbex;
++      unsigned long next, start = 0, end = 0;
++      unsigned long last = block + num;
++      int depth, exists, err = 0;
++
++      EXT_ASSERT(tree);
++      EXT_ASSERT(func);
++      EXT_ASSERT(tree->inode);
++      EXT_ASSERT(tree->root);
++
++      while (block < last && block != EXT_MAX_BLOCK) {
++              num = last - block;
++              /* find extent for this block */
++              path = ext3_ext_find_extent(tree, block, path);
++              if (IS_ERR(path)) {
++                      err = PTR_ERR(path);
++                      path = NULL;
++                      break;
++              }
++
++              depth = EXT_DEPTH(tree);
++              EXT_ASSERT(path[depth].p_hdr);
++              ex = path[depth].p_ext;
++              next = ext3_ext_next_allocated_block(path);
++
++              exists = 0;
++              if (!ex) {
++                      /* there is no extent yet, so try to allocate
++                       * all requested space */
++                      start = block;
++                      end = block + num;
++              } else if (ex->ee_block > block) {
++                      /* need to allocate space before found extent */
++                      start = block;
++                      end = ex->ee_block;
++                      if (block + num < end)
++                              end = block + num;
++              } else if (block >= ex->ee_block + ex->ee_len) {
++                      /* need to allocate space after found extent */
++                      start = block;
++                      end = block + num;
++                      if (end >= next)
++                              end = next;
++              } else if (block >= ex->ee_block) {
++                      /* 
++                       * some part of requested space is covered
++                       * by found extent
++                       */
++                      start = block;
++                      end = ex->ee_block + ex->ee_len;
++                      if (block + num < end)
++                              end = block + num;
++                      exists = 1;
++              } else {
++                      BUG();
++              }
++              EXT_ASSERT(end > start);
++
++              if (!exists) {
++                      cbex.ee_block = start;
++                      cbex.ee_len = end - start;
++                      cbex.ee_start = 0;
++              } else
++                      cbex = *ex;
++
++              EXT_ASSERT(path[depth].p_hdr);
++              err = func(tree, path, &cbex, exists);
++              ext3_ext_drop_refs(path);
++
++              if (err < 0)
++                      break;
++              if (err == EXT_REPEAT)
++                      continue;
++              else if (err == EXT_BREAK) {
++                      err = 0;
++                      break;
++              }
++
++              if (EXT_DEPTH(tree) != depth) {
++                      /* depth was changed. we have to realloc path */
++                      kfree(path);
++                      path = NULL;
++              }
++
++              block = cbex.ee_block + cbex.ee_len;
++      }
++
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
++
++      return err;
++}
++
++static inline void
++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block,
++                      __u32 len, __u32 start, int type)
++{
++      EXT_ASSERT(len > 0);
++      if (tree->cex) {
++              tree->cex->ec_type = type;
++              tree->cex->ec_block = block;
++              tree->cex->ec_len = len;
++              tree->cex->ec_start = start;
++      }
++}
++
++/*
++ * this routine calculate boundaries of the gap requested block fits into
++ * and cache this gap
++ */
++static inline void
++ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path,
++                              unsigned long block)
++{
++      int depth = EXT_DEPTH(tree);
++      unsigned long lblock, len;
++      struct ext3_extent *ex;
++
++      if (!tree->cex)
++              return;
++
++      ex = path[depth].p_ext;
++      if (ex == NULL) {
++              /* there is no extent yet, so gap is [0;-] */
++              lblock = 0;
++              len = EXT_MAX_BLOCK;
++              ext_debug(tree, "cache gap(whole file):");
++      } else if (block < ex->ee_block) {
++              lblock = block;
++              len = ex->ee_block - block;
++              ext_debug(tree, "cache gap(before): %lu [%lu:%lu]",
++                              (unsigned long) block,
++                              (unsigned long) ex->ee_block,
++                              (unsigned long) ex->ee_len);
++      } else if (block >= ex->ee_block + ex->ee_len) {
++              lblock = ex->ee_block + ex->ee_len;
++              len = ext3_ext_next_allocated_block(path);
++              ext_debug(tree, "cache gap(after): [%lu:%lu] %lu",
++                              (unsigned long) ex->ee_block,
++                              (unsigned long) ex->ee_len,
++                              (unsigned long) block);
++              EXT_ASSERT(len > lblock);
++              len = len - lblock;
++      } else {
++              lblock = len = 0;
++              BUG();
++      }
++
++      ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len);
++      ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP);
++}
++
++static inline int
++ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block,
++                      struct ext3_extent *ex)
++{
++      struct ext3_ext_cache *cex = tree->cex;
++
++      /* is there cache storage at all? */
++      if (!cex)
++              return EXT3_EXT_CACHE_NO;
++
++      /* has cache valid data? */
++      if (cex->ec_type == EXT3_EXT_CACHE_NO)
++              return EXT3_EXT_CACHE_NO;
++
++      EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP ||
++                      cex->ec_type == EXT3_EXT_CACHE_EXTENT);
++      if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
++              ex->ee_block = cex->ec_block;
++              ex->ee_start = cex->ec_start;
++              ex->ee_len = cex->ec_len;
++              ext_debug(tree, "%lu cached by %lu:%lu:%lu\n",
++                              (unsigned long) block,
++                              (unsigned long) ex->ee_block,
++                              (unsigned long) ex->ee_len,
++                              (unsigned long) ex->ee_start);
++              return cex->ec_type;
++      }
++
++      /* not in cache */
++      return EXT3_EXT_CACHE_NO;
++}
++
++/*
++ * routine removes index from the index block
++ * it's used in truncate case only. thus all requests are for
++ * last index in the block only
++ */
++int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path)
++{
++      struct buffer_head *bh;
++      int err;
++      
++      /* free index block */
++      path--;
++      EXT_ASSERT(path->p_hdr->eh_entries);
++      if ((err = ext3_ext_get_access(handle, tree, path)))
++              return err;
++      path->p_hdr->eh_entries--;
++      if ((err = ext3_ext_dirty(handle, tree, path)))
++              return err;
++      ext_debug(tree, "index is empty, remove it, free block %d\n",
++                      path->p_idx->ei_leaf);
++      bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
++      ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
++      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++      return err;
++}
++
++int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree,
++                                      struct ext3_ext_path *path)
++{
++      int depth = EXT_DEPTH(tree);
++      int needed;
++
++      if (path) {
++              /* probably there is space in leaf? */
++              if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max)
++                      return 1;
++      }
++      
++      /*
++       * the worste case we're expecting is creation of the
++       * new root (growing in depth) with index splitting
++       * for splitting we have to consider depth + 1 because
++       * previous growing could increase it
++       */
++      depth = depth + 1;
++
++      /* 
++       * growing in depth:
++       * block allocation + new root + old root
++       */
++      needed = EXT3_ALLOC_NEEDED + 2;
++
++      /* index split. we may need:
++       *   allocate intermediate indexes and new leaf
++       *   change two blocks at each level, but root
++       *   modify root block (inode)
++       */
++      needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1;
++
++      return needed;
++}
++
++static int
++ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, unsigned long start,
++                      unsigned long end)
++{
++      struct ext3_extent *ex, tex;
++      struct ext3_ext_path *npath;
++      int depth, creds, err;
++
++      depth = EXT_DEPTH(tree);
++      ex = path[depth].p_ext;
++      EXT_ASSERT(ex);
++      EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1);
++      EXT_ASSERT(ex->ee_block < start);
++
++      /* calculate tail extent */
++      tex.ee_block = end + 1;
++      EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len);
++      tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block;
++
++      creds = ext3_ext_calc_credits_for_insert(tree, path);
++      handle = ext3_ext_journal_restart(handle, creds);
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++      
++      /* calculate head extent. use primary extent */
++      err = ext3_ext_get_access(handle, tree, path + depth);
++      if (err)
++              return err;
++      ex->ee_len = start - ex->ee_block;
++      err = ext3_ext_dirty(handle, tree, path + depth);
++      if (err)
++              return err;
++
++      /* FIXME: some callback to free underlying resource
++       * and correct ee_start? */
++      ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n",
++                      ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len);
++
++      npath = ext3_ext_find_extent(tree, ex->ee_block, NULL);
++      if (IS_ERR(npath))
++              return PTR_ERR(npath);
++      depth = EXT_DEPTH(tree);
++      EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block);
++      EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len);
++
++      err = ext3_ext_insert_extent(handle, tree, npath, &tex);
++      ext3_ext_drop_refs(npath);
++      kfree(npath);
++
++      return err;
++                      
++}
++
++static int
++ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, unsigned long start,
++                      unsigned long end)
++{
++      struct ext3_extent *ex, *fu = NULL, *lu, *le;
++      int err = 0, correct_index = 0;
++      int depth = EXT_DEPTH(tree), credits;
++      struct ext3_extent_header *eh;
++      unsigned a, b, block, num;
++
++      ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end);
++      if (!path[depth].p_hdr)
++              path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh);
++      eh = path[depth].p_hdr;
++      EXT_ASSERT(eh);
++      EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++      EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++      
++      /* find where to start removing */
++      le = ex = EXT_LAST_EXTENT(eh);
++      while (ex != EXT_FIRST_EXTENT(eh)) {
++              if (ex->ee_block <= end)
++                      break;
++              ex--;
++      }
++
++      if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) {
++              /* removal of internal part of the extent requested
++               * tail and head must be placed in different extent
++               * so, we have to insert one more extent */
++              path[depth].p_ext = ex;
++              return ext3_ext_split_for_rm(handle, tree, path, start, end);
++      }
++      
++      lu = ex;
++      while (ex >= EXT_FIRST_EXTENT(eh) &&
++                      ex->ee_block + ex->ee_len > start) {
++              ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len);
++              path[depth].p_ext = ex;
++      
++              a = ex->ee_block > start ? ex->ee_block : start;
++              b = ex->ee_block + ex->ee_len - 1 < end ?
++                      ex->ee_block + ex->ee_len - 1 : end;
++              
++              ext_debug(tree, "  border %u:%u\n", a, b);
++
++              if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) {
++                      block = 0;
++                      num = 0;
++                      BUG();
++              } else if (a != ex->ee_block) {
++                      /* remove tail of the extent */
++                      block = ex->ee_block;
++                      num = a - block;
++              } else if (b != ex->ee_block + ex->ee_len - 1) {
++                      /* remove head of the extent */
++                      block = a;
++                      num = b - a;
++              } else {
++                      /* remove whole extent: excelent! */
++                      block = ex->ee_block; 
++                      num = 0;
++                      EXT_ASSERT(a == ex->ee_block &&
++                                      b == ex->ee_block + ex->ee_len - 1);
++              }
++
++              if (ex == EXT_FIRST_EXTENT(eh))
++                      correct_index = 1;
++
++              credits = 1;
++              if (correct_index)
++                      credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1;
++              if (tree->ops->remove_extent_credits)
++                      credits+=tree->ops->remove_extent_credits(tree,ex,a,b);
++              
++              handle = ext3_ext_journal_restart(handle, credits);
++              if (IS_ERR(handle)) {
++                      err = PTR_ERR(handle);
++                      goto out;
++              }
++
++              err = ext3_ext_get_access(handle, tree, path + depth);
++              if (err)
++                      goto out;
++
++              if (tree->ops->remove_extent)
++                      err = tree->ops->remove_extent(tree, ex, a, b);
++              if (err)
++                      goto out;
++
++              if (num == 0) {
++                      /* this extent is removed entirely mark slot unused */
++                      ex->ee_start = 0;
++                      eh->eh_entries--;
++                      fu = ex;
++              }
++
++              ex->ee_block = block;
++              ex->ee_len = num;
++
++              err = ext3_ext_dirty(handle, tree, path + depth);
++              if (err)
++                      goto out;
++
++              ext_debug(tree, "new extent: %u:%u:%u\n",
++                              ex->ee_block, ex->ee_len, ex->ee_start);
++              ex--;
++      }
++
++      if (fu) {
++              /* reuse unused slots */
++              while (lu < le) {
++                      if (lu->ee_start) {
++                              *fu = *lu;
++                              lu->ee_start = 0;
++                              fu++;
++                      }
++                      lu++;
++              }
++      }
++
++      if (correct_index && eh->eh_entries)
++              err = ext3_ext_correct_indexes(handle, tree, path);
++
++      /* if this leaf is free, then we should
++       * remove it from index block above */
++      if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
++              err = ext3_ext_rm_idx(handle, tree, path + depth);
++
++out:
++      return err;
++}
++
++
++static struct ext3_extent_idx *
++ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block)
++{
++      struct ext3_extent_idx *ix;
++      
++      ix = EXT_LAST_INDEX(hdr);
++      while (ix != EXT_FIRST_INDEX(hdr)) {
++              if (ix->ei_block <= block)
++                      break;
++              ix--;
++      }
++      return ix;
++}
++
++/*
++ * returns 1 if current index have to be freed (even partial)
++ */
++static int inline
++ext3_ext_more_to_rm(struct ext3_ext_path *path)
++{
++      EXT_ASSERT(path->p_idx);
++
++      if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
++              return 0;
++
++      /*
++       * if truncate on deeper level happened it it wasn't partial
++       * so we have to consider current index for truncation
++       */
++      if (path->p_hdr->eh_entries == path->p_block)
++              return 0;
++      return 1;
++}
++
++int ext3_ext_remove_space(struct ext3_extents_tree *tree,
++                              unsigned long start, unsigned long end)
++{
++      struct inode *inode = tree->inode;
++      struct super_block *sb = inode->i_sb;
++      int depth = EXT_DEPTH(tree);
++      struct ext3_ext_path *path;
++      handle_t *handle;
++      int i = 0, err = 0;
++
++      ext_debug(tree, "space to be removed: %lu:%lu\n", start, end);
++
++      /* probably first extent we're gonna free will be last in block */
++      handle = ext3_journal_start(inode, depth + 1);
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++
++      ext3_ext_invalidate_cache(tree);
++
++      /*
++       * we start scanning from right side freeing all the blocks
++       * after i_size and walking into the deep
++       */
++      path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL);
++      if (IS_ERR(path)) {
++              ext3_error(sb, "ext3_ext_remove_space",
++                              "Can't allocate path array");
++              ext3_journal_stop(handle);
++              return -ENOMEM;
++      }
++      memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++      path[i].p_hdr = EXT_ROOT_HDR(tree);
++      
++      while (i >= 0 && err == 0) {
++              if (i == depth) {
++                      /* this is leaf block */
++                      err = ext3_ext_rm_leaf(handle, tree, path, start, end);
++                      /* root level have p_bh == NULL, brelse() eats this */
++                      brelse(path[i].p_bh);
++                      i--;
++                      continue;
++              }
++              
++              /* this is index block */
++              if (!path[i].p_hdr) {
++                      ext_debug(tree, "initialize header\n");
++                      path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh);
++              }
++
++              EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max);
++              EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC);
++              
++              if (!path[i].p_idx) {
++                      /* this level hasn't touched yet */
++                      path[i].p_idx =
++                              ext3_ext_last_covered(path[i].p_hdr, end);
++                      path[i].p_block = path[i].p_hdr->eh_entries + 1;
++                      ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n",
++                                      path[i].p_hdr, path[i].p_hdr->eh_entries);
++              } else {
++                      /* we've already was here, see at next index */
++                      path[i].p_idx--;
++              }
++
++              ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n",
++                              i, EXT_FIRST_INDEX(path[i].p_hdr),
++                              path[i].p_idx);
++              if (ext3_ext_more_to_rm(path + i)) {
++                      /* go to the next level */
++                      ext_debug(tree, "move to level %d (block %d)\n",
++                                      i + 1, path[i].p_idx->ei_leaf);
++                      memset(path + i + 1, 0, sizeof(*path));
++                      path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf);
++                      if (!path[i+1].p_bh) {
++                              /* should we reset i_size? */
++                              err = -EIO;
++                              break;
++                      }
++                      /* put actual number of indexes to know is this
++                       * number got changed at the next iteration */
++                      path[i].p_block = path[i].p_hdr->eh_entries;
++                      i++;
++              } else {
++                      /* we finish processing this index, go up */
++                      if (path[i].p_hdr->eh_entries == 0 && i > 0) {
++                              /* index is empty, remove it
++                               * handle must be already prepared by the
++                               * truncatei_leaf() */
++                              err = ext3_ext_rm_idx(handle, tree, path + i);
++                      }
++                      /* root level have p_bh == NULL, brelse() eats this */
++                      brelse(path[i].p_bh);
++                      i--;
++                      ext_debug(tree, "return to level %d\n", i);
++              }
++      }
++
++      /* TODO: flexible tree reduction should be here */
++      if (path->p_hdr->eh_entries == 0) {
++              /*
++               * truncate to zero freed all the tree
++               * so, we need to correct eh_depth
++               */
++              err = ext3_ext_get_access(handle, tree, path);
++              if (err == 0) {
++                      EXT_ROOT_HDR(tree)->eh_depth = 0;
++                      EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree);
++                      err = ext3_ext_dirty(handle, tree, path);
++              }
++      }
++      ext3_ext_tree_changed(tree);
++
++      kfree(path);
++      ext3_journal_stop(handle);
++
++      return err;
++}
++
++int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks)
++{
++      int lcap, icap, rcap, leafs, idxs, num;
++
++      rcap = ext3_ext_space_root(tree);
++      if (blocks <= rcap) {
++              /* all extents fit to the root */
++              return 0;
++      }
++
++      rcap = ext3_ext_space_root_idx(tree);
++      lcap = ext3_ext_space_block(tree);
++      icap = ext3_ext_space_block_idx(tree);
++
++      num = leafs = (blocks + lcap - 1) / lcap;
++      if (leafs <= rcap) {
++              /* all pointers to leafs fit to the root */
++              return leafs;
++      }
++
++      /* ok. we need separate index block(s) to link all leaf blocks */
++      idxs = (leafs + icap - 1) / icap;
++      do {
++              num += idxs;
++              idxs = (idxs + icap - 1) / icap;
++      } while (idxs > rcap);
++
++      return num;
++}
++
++/*
++ * called at mount time
++ */
++void ext3_ext_init(struct super_block *sb)
++{
++      /*
++       * possible initialization would be here
++       */
++
++      if (test_opt(sb, EXTENTS)) {
++              printk("EXT3-fs: file extents enabled");
++#ifdef AGRESSIVE_TEST
++              printk(", agressive tests");
++#endif
++#ifdef CHECK_BINSEARCH
++              printk(", check binsearch");
++#endif
++              printk("\n");
++      }
++}
++
++/*
++ * called at umount time
++ */
++void ext3_ext_release(struct super_block *sb)
++{
++}
++
++/************************************************************************
++ * VFS related routines
++ ************************************************************************/
++
++static int ext3_get_inode_write_access(handle_t *handle, void *buffer)
++{
++      /* we use in-core data, not bh */
++      return 0;
++}
++
++static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer)
++{
++      struct inode *inode = buffer;
++      return ext3_mark_inode_dirty(handle, inode);
++}
++
++static int ext3_ext_mergable(struct ext3_extent *ex1,
++                              struct ext3_extent *ex2)
++{
++      /* FIXME: support for large fs */
++      if (ex1->ee_start + ex1->ee_len == ex2->ee_start)
++              return 1;
++      return 0;
++}
++
++static int
++ext3_remove_blocks_credits(struct ext3_extents_tree *tree,
++                              struct ext3_extent *ex,
++                              unsigned long from, unsigned long to)
++{
++      int needed;
++      
++      /* at present, extent can't cross block group */;
++      needed = 4; /* bitmap + group desc + sb + inode */
++
++#ifdef CONFIG_QUOTA
++      needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++      return needed;
++}
++
++static int
++ext3_remove_blocks(struct ext3_extents_tree *tree,
++                              struct ext3_extent *ex,
++                              unsigned long from, unsigned long to)
++{
++      int needed = ext3_remove_blocks_credits(tree, ex, from, to);
++      handle_t *handle = ext3_journal_start(tree->inode, needed);
++      struct buffer_head *bh;
++      int i;
++
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++      if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
++              /* tail removal */
++              unsigned long num, start;
++              num = ex->ee_block + ex->ee_len - from;
++              start = ex->ee_start + ex->ee_len - num;
++              ext_debug(tree, "free last %lu blocks starting %lu\n",
++                              num, start);
++              for (i = 0; i < num; i++) {
++                      bh = sb_find_get_block(tree->inode->i_sb, start + i);
++                      ext3_forget(handle, 0, tree->inode, bh, start + i);
++              }
++              ext3_free_blocks(handle, tree->inode, start, num);
++      } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
++              printk("strange request: removal %lu-%lu from %u:%u\n",
++                      from, to, ex->ee_block, ex->ee_len);
++      } else {
++              printk("strange request: removal(2) %lu-%lu from %u:%u\n",
++                      from, to, ex->ee_block, ex->ee_len);
++      }
++      ext3_journal_stop(handle);
++      return 0;
++}
++
++static int ext3_ext_find_goal(struct inode *inode,
++                              struct ext3_ext_path *path, unsigned long block)
++{
++      struct ext3_inode_info *ei = EXT3_I(inode);
++      unsigned long bg_start;
++      unsigned long colour;
++      int depth;
++      
++      if (path) {
++              struct ext3_extent *ex;
++              depth = path->p_depth;
++              
++              /* try to predict block placement */
++              if ((ex = path[depth].p_ext))
++                      return ex->ee_start + (block - ex->ee_block);
++
++              /* it looks index is empty
++               * try to find starting from index itself */
++              if (path[depth].p_bh)
++                      return path[depth].p_bh->b_blocknr;
++      }
++
++      /* OK. use inode's group */
++      bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++              le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++      colour = (current->pid % 16) *
++                      (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++      return bg_start + colour + block;
++}
++
++static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path,
++                              struct ext3_extent *ex, int *err)
++{
++      struct inode *inode = tree->inode;
++      int newblock, goal;
++      
++      EXT_ASSERT(path);
++      EXT_ASSERT(ex);
++      EXT_ASSERT(ex->ee_start);
++      EXT_ASSERT(ex->ee_len);
++      
++      /* reuse block from the extent to order data/metadata */
++      newblock = ex->ee_start++;
++      ex->ee_len--;
++      if (ex->ee_len == 0) {
++              ex->ee_len = 1;
++              /* allocate new block for the extent */
++              goal = ext3_ext_find_goal(inode, path, ex->ee_block);
++              ex->ee_start = ext3_new_block(handle, inode, goal, err);
++              if (ex->ee_start == 0) {
++                      /* error occured: restore old extent */
++                      ex->ee_start = newblock;
++                      return 0;
++              }
++      }
++      return newblock;
++}
++
++static struct ext3_extents_helpers ext3_blockmap_helpers = {
++      .get_write_access       = ext3_get_inode_write_access,
++      .mark_buffer_dirty      = ext3_mark_buffer_dirty,
++      .mergable               = ext3_ext_mergable,
++      .new_block              = ext3_new_block_cb,
++      .remove_extent          = ext3_remove_blocks,
++      .remove_extent_credits  = ext3_remove_blocks_credits,
++};
++
++void ext3_init_tree_desc(struct ext3_extents_tree *tree,
++                              struct inode *inode)
++{
++      tree->inode = inode;
++      tree->root = (void *) EXT3_I(inode)->i_data;
++      tree->buffer = (void *) inode;
++      tree->buffer_len = sizeof(EXT3_I(inode)->i_data);
++      tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent;
++      tree->ops = &ext3_blockmap_helpers;
++}
++
++int ext3_ext_get_block(handle_t *handle, struct inode *inode,
++                      long iblock, struct buffer_head *bh_result,
++                      int create, int extend_disksize)
++{
++      struct ext3_ext_path *path = NULL;
++      struct ext3_extent newex;
++      struct ext3_extent *ex;
++      int goal, newblock, err = 0, depth;
++      struct ext3_extents_tree tree;
++
++      clear_buffer_new(bh_result);
++      ext3_init_tree_desc(&tree, inode);
++      ext_debug(&tree, "block %d requested for inode %u\n",
++                      (int) iblock, (unsigned) inode->i_ino);
++      down(&EXT3_I(inode)->truncate_sem);
++
++      /* check in cache */
++      if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) {
++              if (goal == EXT3_EXT_CACHE_GAP) {
++                      if (!create) {
++                              /* block isn't allocated yet and
++                               * user don't want to allocate it */
++                              goto out2;
++                      }
++                      /* we should allocate requested block */
++              } else if (goal == EXT3_EXT_CACHE_EXTENT) {
++                      /* block is already allocated */
++                      newblock = iblock - newex.ee_block + newex.ee_start;
++                      goto out;
++              } else {
++                      EXT_ASSERT(0);
++              }
++      }
++
++      /* find extent for this block */
++      path = ext3_ext_find_extent(&tree, iblock, NULL);
++      if (IS_ERR(path)) {
++              err = PTR_ERR(path);
++              path = NULL;
++              goto out2;
++      }
++
++      depth = EXT_DEPTH(&tree);
++
++      /*
++       * consistent leaf must not be empty
++       * this situations is possible, though, _during_ tree modification
++       * this is why assert can't be put in ext3_ext_find_extent()
++       */
++      EXT_ASSERT(path[depth].p_ext != NULL || depth == 0);
++
++      if ((ex = path[depth].p_ext)) {
++              /* if found exent covers block, simple return it */
++              if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) {
++                      newblock = iblock - ex->ee_block + ex->ee_start;
++                      ext_debug(&tree, "%d fit into %d:%d -> %d\n",
++                                      (int) iblock, ex->ee_block, ex->ee_len,
++                                      newblock);
++                      ext3_ext_put_in_cache(&tree, ex->ee_block,
++                                              ex->ee_len, ex->ee_start,
++                                              EXT3_EXT_CACHE_EXTENT);
++                      goto out;
++              }
++      }
++
++      /*
++       * requested block isn't allocated yet
++       * we couldn't try to create block if create flag is zero 
++       */
++      if (!create) {
++              /* put just found gap into cache to speedup subsequest reqs */
++              ext3_ext_put_gap_in_cache(&tree, path, iblock);
++              goto out2;
++      }
++
++      /* allocate new block */
++      goal = ext3_ext_find_goal(inode, path, iblock);
++      newblock = ext3_new_block(handle, inode, goal, &err);
++      if (!newblock)
++              goto out2;
++      ext_debug(&tree, "allocate new block: goal %d, found %d\n",
++                      goal, newblock);
++
++      /* try to insert new extent into found leaf and return */
++      newex.ee_block = iblock;
++      newex.ee_start = newblock;
++      newex.ee_len = 1;
++      err = ext3_ext_insert_extent(handle, &tree, path, &newex);
++      if (err)
++              goto out2;
++      
++      if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize)
++              EXT3_I(inode)->i_disksize = inode->i_size;
++
++      /* previous routine could use block we allocated */
++      newblock = newex.ee_start;
++      set_buffer_new(bh_result);
++
++      ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len,
++                              newex.ee_start, EXT3_EXT_CACHE_EXTENT);
++out:
++      ext3_ext_show_leaf(&tree, path);
++      map_bh(bh_result, inode->i_sb, newblock);
++out2:
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
++      up(&EXT3_I(inode)->truncate_sem);
++
++      return err;     
++}
++
++void ext3_ext_truncate(struct inode * inode, struct page *page)
++{
++      struct address_space *mapping = inode->i_mapping;
++      struct super_block *sb = inode->i_sb;
++      struct ext3_extents_tree tree;
++      unsigned long last_block;
++      handle_t *handle;
++      int err = 0;
++
++      ext3_init_tree_desc(&tree, inode);
++
++      /*
++       * probably first extent we're gonna free will be last in block
++       */
++      err = ext3_writepage_trans_blocks(inode) + 3;
++      handle = ext3_journal_start(inode, err);
++      if (IS_ERR(handle)) {
++              if (page) {
++                      clear_highpage(page);
++                      flush_dcache_page(page);
++                      unlock_page(page);
++                      page_cache_release(page);
++              }
++              return;
++      }
++
++      if (page)
++              ext3_block_truncate_page(handle, page, mapping, inode->i_size);
++
++      down(&EXT3_I(inode)->truncate_sem);
++      ext3_ext_invalidate_cache(&tree);
++
++      /* 
++       * TODO: optimization is possible here
++       * probably we need not scaning at all,
++       * because page truncation is enough
++       */
++      if (ext3_orphan_add(handle, inode))
++              goto out_stop;
++
++      /* we have to know where to truncate from in crash case */
++      EXT3_I(inode)->i_disksize = inode->i_size;
++      ext3_mark_inode_dirty(handle, inode);
++
++      last_block = (inode->i_size + sb->s_blocksize - 1)
++                      >> EXT3_BLOCK_SIZE_BITS(sb);
++      err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK);
++      
++      /* In a multi-transaction truncate, we only make the final
++       * transaction synchronous */
++      if (IS_SYNC(inode))
++              handle->h_sync = 1;
++
++out_stop:
++      /*
++       * If this was a simple ftruncate(), and the file will remain alive
++       * then we need to clear up the orphan record which we created above.
++       * However, if this was a real unlink then we were called by
++       * ext3_delete_inode(), and we allow that function to clean up the
++       * orphan info for us.
++       */
++      if (inode->i_nlink)
++              ext3_orphan_del(handle, inode);
++
++      up(&EXT3_I(inode)->truncate_sem);
++      ext3_journal_stop(handle);
++}
++
++/*
++ * this routine calculate max number of blocks we could modify
++ * in order to allocate new block for an inode
++ */
++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num)
++{
++      struct ext3_extents_tree tree;
++      int needed;
++      
++      ext3_init_tree_desc(&tree, inode);
++      
++      needed = ext3_ext_calc_credits_for_insert(&tree, NULL);
++
++      /* caller want to allocate num blocks */
++      needed *= num;
++      
++#ifdef CONFIG_QUOTA
++      /* 
++       * FIXME: real calculation should be here
++       * it depends on blockmap format of qouta file
++       */
++      needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++
++      return needed;
++}
++
++void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode)
++{
++      struct ext3_extents_tree tree;
++
++      ext3_init_tree_desc(&tree, inode);
++      ext3_extent_tree_init(handle, &tree);
++}
++
++int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks)
++{
++      struct ext3_extents_tree tree;
++
++      ext3_init_tree_desc(&tree, inode);
++      return ext3_ext_calc_metadata_amount(&tree, blocks);
++}
++      
++static int
++ext3_ext_store_extent_cb(struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path,
++                      struct ext3_extent *newex, int exist)
++{
++      struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private;
++
++      if (!exist)
++              return EXT_CONTINUE;
++      if (buf->err < 0)
++              return EXT_BREAK;
++      if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen)
++              return EXT_BREAK;
++
++      if (!copy_to_user(buf->cur, newex, sizeof(*newex))) {
++              buf->err++;
++              buf->cur += sizeof(*newex);
++      } else {
++              buf->err = -EFAULT;
++              return EXT_BREAK;
++      }
++      return EXT_CONTINUE;
++}
++
++static int
++ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path,
++                      struct ext3_extent *ex, int exist)
++{
++      struct ext3_extent_tree_stats *buf =
++              (struct ext3_extent_tree_stats *) tree->private;
++      int depth;
++
++      if (!exist)
++              return EXT_CONTINUE;
++
++      depth = EXT_DEPTH(tree);
++      buf->extents_num++;
++      if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr))
++              buf->leaf_num++;
++      return EXT_CONTINUE;
++}
++
++int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
++              unsigned long arg)
++{
++      int err = 0;
++
++      if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
++              return -EINVAL;
++
++      if (cmd == EXT3_IOC_GET_EXTENTS) {
++              struct ext3_extent_buf buf;
++              struct ext3_extents_tree tree;
++
++              if (copy_from_user(&buf, (void *) arg, sizeof(buf)))
++                      return -EFAULT;
++
++              ext3_init_tree_desc(&tree, inode);
++              buf.cur = buf.buffer;
++              buf.err = 0;
++              tree.private = &buf;
++              down(&EXT3_I(inode)->truncate_sem);
++              err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK,
++                                              ext3_ext_store_extent_cb);
++              up(&EXT3_I(inode)->truncate_sem);
++              if (err == 0)
++                      err = buf.err;
++      } else if (cmd == EXT3_IOC_GET_TREE_STATS) {
++              struct ext3_extent_tree_stats buf;
++              struct ext3_extents_tree tree;
++
++              ext3_init_tree_desc(&tree, inode);
++              down(&EXT3_I(inode)->truncate_sem);
++              buf.depth = EXT_DEPTH(&tree);
++              buf.extents_num = 0;
++              buf.leaf_num = 0;
++              tree.private = &buf;
++              err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK,
++                                              ext3_ext_collect_stats_cb);
++              up(&EXT3_I(inode)->truncate_sem);
++              if (!err)
++                      err = copy_to_user((void *) arg, &buf, sizeof(buf));
++      } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) {
++              struct ext3_extents_tree tree;
++              ext3_init_tree_desc(&tree, inode);
++              down(&EXT3_I(inode)->truncate_sem);
++              err = EXT_DEPTH(&tree);
++              up(&EXT3_I(inode)->truncate_sem);
++      }
++
++      return err;
++}
++
++EXPORT_SYMBOL(ext3_init_tree_desc);
++EXPORT_SYMBOL(ext3_mark_inode_dirty);
++EXPORT_SYMBOL(ext3_ext_invalidate_cache);
++EXPORT_SYMBOL(ext3_ext_insert_extent);
++EXPORT_SYMBOL(ext3_ext_walk_space);
++EXPORT_SYMBOL(ext3_ext_find_goal);
++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert);
++
+Index: linux-2.6.5-sles9/fs/ext3/ialloc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c    2004-11-09 02:22:55.763148128 +0300
++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2004-11-09 02:23:21.587222272 +0300
+@@ -647,6 +647,10 @@
+               DQUOT_FREE_INODE(inode);
+               goto fail2;
+       }
++      if (test_opt(sb, EXTENTS)) {
++              EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
++              ext3_extents_initialize_blockmap(handle, inode);
++      }
+       err = ext3_mark_inode_dirty(handle, inode);
+       if (err) {
+               ext3_std_error(sb, err);
+Index: linux-2.6.5-sles9/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/inode.c     2004-11-09 02:22:55.767147520 +0300
++++ linux-2.6.5-sles9/fs/ext3/inode.c  2004-11-09 02:23:21.592221512 +0300
+@@ -796,6 +796,17 @@
+       goto reread;
+ }
+ 
++static inline int
++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block,
++              struct buffer_head *bh, int create, int extend_disksize)
++{
++      if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++              return ext3_ext_get_block(handle, inode, block, bh, create,
++                                              extend_disksize);
++      return ext3_get_block_handle(handle, inode, block, bh, create,
++                                      extend_disksize);
++}
++
+ static int ext3_get_block(struct inode *inode, sector_t iblock,
+                       struct buffer_head *bh_result, int create)
+ {
+@@ -806,8 +817,8 @@
+               handle = ext3_journal_current_handle();
+               J_ASSERT(handle != 0);
+       }
+-      ret = ext3_get_block_handle(handle, inode, iblock,
+-                              bh_result, create, 1);
++      ret = ext3_get_block_wrap(handle, inode, iblock,
++                                      bh_result, create, 1);
+       return ret;
+ }
+ 
+@@ -833,8 +844,8 @@
+               }
+       }
+       if (ret == 0)
+-              ret = ext3_get_block_handle(handle, inode, iblock,
+-                                      bh_result, create, 0);
++              ret = ext3_get_block_wrap(handle, inode, iblock,
++                                              bh_result, create, 0);
+       if (ret == 0)
+               bh_result->b_size = (1 << inode->i_blkbits);
+       return ret;
+@@ -855,7 +866,7 @@
+       dummy.b_state = 0;
+       dummy.b_blocknr = -1000;
+       buffer_trace_init(&dummy.b_history);
+-      *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
++      *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1);
+       if (!*errp && buffer_mapped(&dummy)) {
+               struct buffer_head *bh;
+               bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1587,7 +1598,7 @@
+  * This required during truncate. We need to physically zero the tail end
+  * of that block so it doesn't yield old data if the file is later grown.
+  */
+-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
++int ext3_block_truncate_page(handle_t *handle, struct page *page,
+               struct address_space *mapping, loff_t from)
+ {
+       unsigned long index = from >> PAGE_CACHE_SHIFT;
+@@ -2083,6 +2094,9 @@
+                       return;
+       }
+ 
++      if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++              return ext3_ext_truncate(inode, page);
++
+       handle = start_transaction(inode);
+       if (IS_ERR(handle)) {
+               if (page) {
+@@ -2789,6 +2803,9 @@
+       int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+       int ret;
+ 
++      if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++              return ext3_ext_writepage_trans_blocks(inode, bpp);
++ 
+       if (ext3_should_journal_data(inode))
+               ret = 3 * (bpp + indirects) + 2;
+       else
+Index: linux-2.6.5-sles9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/Makefile    2004-11-09 02:18:27.604914376 +0300
++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+ 
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+-         ioctl.o namei.o super.o symlink.o hash.o
++         ioctl.o namei.o super.o symlink.o hash.o extents.o
+ 
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.5-sles9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/super.c     2004-11-09 02:22:56.450043704 +0300
++++ linux-2.6.5-sles9/fs/ext3/super.c  2004-11-09 02:23:21.597220752 +0300
+@@ -389,6 +389,7 @@
+       struct ext3_super_block *es = sbi->s_es;
+       int i;
+ 
++      ext3_ext_release(sb);
+       ext3_xattr_put_super(sb);
+       journal_destroy(sbi->s_journal);
+       if (!(sb->s_flags & MS_RDONLY)) {
+@@ -447,6 +448,10 @@
+ #endif
+       ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+       ei->vfs_inode.i_version = 1;
++      ei->i_cached_extent[0] = 0;
++      ei->i_cached_extent[1] = 0;
++      ei->i_cached_extent[2] = 0;
++      ei->i_cached_extent[3] = 0;
+       return &ei->vfs_inode;
+ }
+ 
+@@ -537,7 +542,7 @@
+       Opt_commit, Opt_journal_update, Opt_journal_inum,
+       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+       Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_err,
++      Opt_err, Opt_extents, Opt_extdebug
+ };
+ 
+ static match_table_t tokens = {
+@@ -582,6 +587,8 @@
+       {Opt_iopen, "iopen"},
+       {Opt_noiopen, "noiopen"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
++      {Opt_extents, "extents"},
++      {Opt_extdebug, "extdebug"},
+       {Opt_err, NULL}
+ };
+ 
+@@ -797,6 +804,12 @@
+                       break;
+               case Opt_ignore:
+                       break;
++              case Opt_extents:
++                      set_opt (sbi->s_mount_opt, EXTENTS);
++                      break;
++              case Opt_extdebug:
++                      set_opt (sbi->s_mount_opt, EXTDEBUG);
++                      break;
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1449,6 +1462,8 @@
+       percpu_counter_mod(&sbi->s_dirs_counter,
+               ext3_count_dirs(sb));
+ 
++      ext3_ext_init(sb);
++ 
+       return 0;
+ 
+ failed_mount3:
+Index: linux-2.6.5-sles9/fs/ext3/ioctl.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c     2004-11-09 02:15:44.610693264 +0300
++++ linux-2.6.5-sles9/fs/ext3/ioctl.c  2004-11-09 02:23:52.991448104 +0300
+@@ -124,6 +124,10 @@
+                       err = ext3_change_inode_journal_flag(inode, jflag);
+               return err;
+       }
++      case EXT3_IOC_GET_EXTENTS:
++      case EXT3_IOC_GET_TREE_STATS:
++      case EXT3_IOC_GET_TREE_DEPTH:
++              return ext3_ext_ioctl(inode, filp, cmd, arg);
+       case EXT3_IOC_GETVERSION:
+       case EXT3_IOC_GETVERSION_OLD:
+               return put_user(inode->i_generation, (int *) arg);
+Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h     2004-11-09 02:22:58.767691368 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs.h  2004-11-09 02:25:17.238640584 +0300
+@@ -186,6 +186,7 @@
+ #define EXT3_DIRSYNC_FL                       0x00010000 /* dirsync behaviour (directories only) */
+ #define EXT3_TOPDIR_FL                        0x00020000 /* Top of directory hierarchies*/
+ #define EXT3_RESERVED_FL              0x80000000 /* reserved for ext3 lib */
++#define EXT3_EXTENTS_FL                       0x00080000 /* Inode uses extents */
+ 
+ #define EXT3_FL_USER_VISIBLE          0x0003DFFF /* User visible flags */
+ #define EXT3_FL_USER_MODIFIABLE               0x000380FF /* User modifiable flags */
+@@ -211,6 +212,9 @@
+ #endif
+ #define EXT3_IOC_GETRSVSZ             _IOR('f', 5, long)
+ #define EXT3_IOC_SETRSVSZ             _IOW('f', 6, long)
++#define EXT3_IOC_GET_EXTENTS          _IOR('f', 7, long)
++#define EXT3_IOC_GET_TREE_DEPTH               _IOR('f', 8, long)
++#define EXT3_IOC_GET_TREE_STATS               _IOR('f', 9, long)
+ 
+ /*
+  * Structure of an inode on the disk
+@@ -333,6 +337,8 @@
+ #define EXT3_MOUNT_BARRIER            0x20000 /* Use block barriers */
+ #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -729,6 +735,7 @@
+ 
+ 
+ /* inode.c */
++extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t);
+ extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+@@ -802,6 +809,14 @@
+ extern struct inode_operations ext3_symlink_inode_operations;
+ extern struct inode_operations ext3_fast_symlink_inode_operations;
+ 
++/* extents.c */
++extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
++extern int ext3_ext_get_block(handle_t *, struct inode *, long,
++                              struct buffer_head *, int, int);
++extern void ext3_ext_truncate(struct inode *, struct page *);
++extern void ext3_ext_init(struct super_block *);
++extern void ext3_ext_release(struct super_block *);
++extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
+ 
+ #endif        /* __KERNEL__ */
+ 
+Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h        2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_extents.h     2004-11-09 02:23:21.606219384 +0300
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
++ */
++
++#ifndef _LINUX_EXT3_EXTENTS
++#define _LINUX_EXT3_EXTENTS
++
++/*
++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks
++ * become very little, so index split, in-depth growing and
++ * other hard changes happens much more often
++ * this is for debug purposes only
++ */
++#define AGRESSIVE_TEST_
++
++/*
++ * if CHECK_BINSEARCH defined, then results of binary search
++ * will be checked by linear search
++ */
++#define CHECK_BINSEARCH_
++
++/*
++ * if EXT_DEBUG is defined you can use 'extdebug' mount option
++ * to get lots of info what's going on
++ */
++#define EXT_DEBUG_
++#ifdef EXT_DEBUG
++#define ext_debug(tree,fmt,a...)                      \
++do {                                                  \
++      if (test_opt((tree)->inode->i_sb, EXTDEBUG))    \
++              printk(fmt, ##a);                       \
++} while (0);
++#else
++#define ext_debug(tree,fmt,a...)
++#endif
++
++/*
++ * if EXT_STATS is defined then stats numbers are collected
++ * these number will be displayed at umount time
++ */
++#define EXT_STATS_
++
++
++#define EXT3_ALLOC_NEEDED     3       /* block bitmap + group desc. + sb */
++
++/*
++ * ext3_inode has i_block array (total 60 bytes)
++ * first 4 bytes are used to store:
++ *  - tree depth (0 mean there is no tree yet. all extents in the inode)
++ *  - number of alive extents in the inode
++ */
++
++/*
++ * this is extent on-disk structure
++ * it's used at the bottom of the tree
++ */
++struct ext3_extent {
++      __u32   ee_block;       /* first logical block extent covers */
++      __u16   ee_len;         /* number of blocks covered by extent */
++      __u16   ee_start_hi;    /* high 16 bits of physical block */
++      __u32   ee_start;       /* low 32 bigs of physical block */
++};
++
++/*
++ * this is index on-disk structure
++ * it's used at all the levels, but the bottom
++ */
++struct ext3_extent_idx {
++      __u32   ei_block;       /* index covers logical blocks from 'block' */
++      __u32   ei_leaf;        /* pointer to the physical block of the next *
++                               * level. leaf or next index could bet here */
++      __u16   ei_leaf_hi;     /* high 16 bits of physical block */
++      __u16   ei_unused;
++};
++
++/*
++ * each block (leaves and indexes), even inode-stored has header
++ */
++struct ext3_extent_header {   
++      __u16   eh_magic;       /* probably will support different formats */   
++      __u16   eh_entries;     /* number of valid entries */
++      __u16   eh_max;         /* capacity of store in entries */
++      __u16   eh_depth;       /* has tree real underlaying blocks? */
++      __u32   eh_generation;  /* generation of the tree */
++};
++
++#define EXT3_EXT_MAGIC                0xf30a
++
++/*
++ * array of ext3_ext_path contains path to some extent
++ * creation/lookup routines use it for traversal/splitting/etc
++ * truncate uses it to simulate recursive walking
++ */
++struct ext3_ext_path {
++      __u32                           p_block;
++      __u16                           p_depth;
++      struct ext3_extent              *p_ext;
++      struct ext3_extent_idx          *p_idx;
++      struct ext3_extent_header       *p_hdr;
++      struct buffer_head              *p_bh;
++};
++
++/*
++ * structure for external API
++ */
++
++/*
++ * storage for cached extent
++ */
++struct ext3_ext_cache {
++      __u32   ec_start;
++      __u32   ec_block;
++      __u32   ec_len;
++      __u32   ec_type;
++};
++
++#define EXT3_EXT_CACHE_NO     0
++#define EXT3_EXT_CACHE_GAP    1
++#define EXT3_EXT_CACHE_EXTENT 2
++
++/*
++ * ext3_extents_tree is used to pass initial information
++ * to top-level extents API
++ */
++struct ext3_extents_helpers;
++struct ext3_extents_tree {
++      struct inode *inode;    /* inode which tree belongs to */
++      void *root;             /* ptr to data top of tree resides at */
++      void *buffer;           /* will be passed as arg to ^^ routines */
++      int buffer_len;
++      void *private;
++      struct ext3_ext_cache *cex;/* last found extent */
++      struct ext3_extents_helpers *ops;
++};
++
++struct ext3_extents_helpers {
++      int (*get_write_access)(handle_t *h, void *buffer);
++      int (*mark_buffer_dirty)(handle_t *h, void *buffer);
++      int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2);
++      int (*remove_extent_credits)(struct ext3_extents_tree *,
++                                      struct ext3_extent *, unsigned long,
++                                      unsigned long);
++      int (*remove_extent)(struct ext3_extents_tree *,
++                              struct ext3_extent *, unsigned long,
++                              unsigned long);
++      int (*new_block)(handle_t *, struct ext3_extents_tree *,
++                              struct ext3_ext_path *, struct ext3_extent *,
++                              int *);
++};
++
++/*
++ * to be called by ext3_ext_walk_space()
++ * negative retcode - error
++ * positive retcode - signal for ext3_ext_walk_space(), see below
++ * callback must return valid extent (passed or newly created)
++ */
++typedef int (*ext_prepare_callback)(struct ext3_extents_tree *,
++                                      struct ext3_ext_path *,
++                                      struct ext3_extent *, int);
++
++#define EXT_CONTINUE  0
++#define EXT_BREAK     1
++#define EXT_REPEAT    2
++
++
++#define EXT_MAX_BLOCK 0xffffffff
++#define EXT_CACHE_MARK        0xffff
++
++
++#define EXT_FIRST_EXTENT(__hdr__) \
++      ((struct ext3_extent *) (((char *) (__hdr__)) +         \
++                               sizeof(struct ext3_extent_header)))
++#define EXT_FIRST_INDEX(__hdr__) \
++      ((struct ext3_extent_idx *) (((char *) (__hdr__)) +     \
++                                   sizeof(struct ext3_extent_header)))
++#define EXT_HAS_FREE_INDEX(__path__) \
++      ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max)
++#define EXT_LAST_EXTENT(__hdr__) \
++      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1)
++#define EXT_LAST_INDEX(__hdr__) \
++      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1)
++#define EXT_MAX_EXTENT(__hdr__) \
++      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_MAX_INDEX(__hdr__) \
++      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++
++#define EXT_ROOT_HDR(tree) \
++      ((struct ext3_extent_header *) (tree)->root)
++#define EXT_BLOCK_HDR(bh) \
++      ((struct ext3_extent_header *) (bh)->b_data)
++#define EXT_DEPTH(_t_)        \
++      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
++#define EXT_GENERATION(_t_)   \
++      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++
++
++#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
++
++
++/*
++ * this structure is used to gather extents from the tree via ioctl
++ */
++struct ext3_extent_buf {
++      unsigned long start;
++      int buflen;
++      void *buffer;
++      void *cur;
++      int err;
++};
++
++/*
++ * this structure is used to collect stats info about the tree
++ */
++struct ext3_extent_tree_stats {
++      int depth;
++      int extents_num;
++      int leaf_num;
++};
++
++extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *);
++extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *);
++extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *);
++extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback);
++extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long);
++extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *);
++extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *);
++extern int ext3_ext_calc_blockmap_metadata(struct inode *, int);
++
++static inline void
++ext3_ext_invalidate_cache(struct ext3_extents_tree *tree)
++{
++      if (tree->cex)
++              tree->cex->ec_type = EXT3_EXT_CACHE_NO;
++}
++
++
++#endif /* _LINUX_EXT3_EXTENTS */
++
+Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h   2004-11-09 02:22:55.780145544 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h        2004-11-09 02:23:21.606219384 +0300
+@@ -128,6 +128,8 @@
+        */
+       struct semaphore truncate_sem;
+       struct inode vfs_inode;
++
++      __u32 i_cached_extent[4];
+ };
+ 
+ #endif        /* _LINUX_EXT3_FS_I */
+
+%diffstat
+ fs/ext3/Makefile             |    2 
+ fs/ext3/extents.c            | 2313 +++++++++++++++++++++++++++++++++++++++++++
+ fs/ext3/ialloc.c             |    4 
+ fs/ext3/inode.c              |   29 
+ fs/ext3/ioctl.c              |    4 
+ fs/ext3/super.c              |   17 
+ include/linux/ext3_extents.h |  252 ++++
+ include/linux/ext3_fs.h      |   15 
+ include/linux/ext3_fs_i.h    |    2 
+ 9 files changed, 2630 insertions(+), 8 deletions(-)
+
diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch

new file mode 100644 (file)

index 0000000..2408cc7
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
@@ -0,0 +1,1738 @@
+Index: linux-2.6.5-sles9/fs/ext3/mballoc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c   2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/mballoc.c        2004-11-09 02:34:25.181340632 +0300
+@@ -0,0 +1,1428 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
++ */
++
++
++/*
++ * mballoc.c contains the multiblocks allocation routines
++ */
++
++#include <linux/config.h>
++#include <linux/time.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/module.h>
++
++/*
++ * TODO:
++ *   - do not scan from the beginning, try to remember first free block
++ *   - mb_mark_used_* may allocate chunk right after splitting buddy
++ *   - special flag to advice allocator to look for requested + N blocks
++ *     this may improve interaction between extents and mballoc
++ */
++
++/*
++ * with AGRESSIVE_CHECK allocator runs consistency checks over
++ * structures. this checks slow things down a lot
++ */
++#define AGGRESSIVE_CHECK__
++
++/*
++ */
++#define MB_DEBUG__
++#ifdef MB_DEBUG
++#define mb_debug(fmt,a...)    printk(fmt, ##a)
++#else
++#define mb_debug(fmt,a...)
++#endif
++
++/*
++ * where to save buddies structures beetween umount/mount (clean case only)
++ */
++#define EXT3_BUDDY_FILE               ".buddy"
++
++/*
++ * max. number of chunks to be tracked in ext3_free_extent struct
++ */
++#define MB_ARR_SIZE   32
++
++struct ext3_allocation_context {
++      struct super_block *ac_sb;
++
++      /* search goals */
++      int ac_g_group;
++      int ac_g_start;
++      int ac_g_len;
++      int ac_g_flags;
++      
++      /* the best found extent */
++      int ac_b_group;
++      int ac_b_start;
++      int ac_b_len;
++      
++      /* number of iterations done. we have to track to limit searching */
++      int ac_repeats;
++      int ac_groups_scanned;
++      int ac_status;
++};
++
++#define AC_STATUS_CONTINUE    1
++#define AC_STATUS_FOUND               2
++
++
++struct ext3_buddy {
++      void *bd_bitmap;
++      void *bd_buddy;
++      int bd_blkbits;
++      struct buffer_head *bd_bh;
++      struct buffer_head *bd_bh2;
++      struct ext3_buddy_group_blocks *bd_bd;
++      struct super_block *bd_sb;
++};
++
++struct ext3_free_extent {
++      int fe_start;
++      int fe_len;
++      unsigned char fe_orders[MB_ARR_SIZE];
++      unsigned char fe_nums;
++      unsigned char fe_back;
++};
++
++#define in_range(b, first, len)       ((b) >= (first) && (b) <= (first) + (len) - 1)
++
++
++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
++int ext3_mb_reserve_blocks(struct super_block *, int);
++void ext3_mb_release_blocks(struct super_block *, int);
++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
++void ext3_mb_free_committed_blocks(struct super_block *);
++
++#define mb_correct_addr_and_bit(bit,addr)     \
++{                                             \
++      if ((unsigned) addr & 1) {              \
++              bit += 8;                       \
++              addr--;                         \
++      }                                       \
++      if ((unsigned) addr & 2) {              \
++              bit += 16;                      \
++              addr--;                         \
++              addr--;                         \
++      }                                       \
++}
++
++static inline int mb_test_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      return test_bit(bit, addr);
++}
++
++static inline void mb_set_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      set_bit(bit, addr);
++}
++
++static inline void mb_clear_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      clear_bit(bit, addr);
++}
++
++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
++{
++      int i = 1;
++      void *bb;
++
++      J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++      J_ASSERT(max != NULL);
++
++      if (order > e3b->bd_blkbits + 1)
++              return NULL;
++
++      /* at order 0 we see each particular block */
++      *max = 1 << (e3b->bd_blkbits + 3);
++      if (order == 0)
++              return e3b->bd_bitmap;
++
++      bb = e3b->bd_buddy;
++      *max = *max >> 1;
++      while (i < order) {
++              bb += 1 << (e3b->bd_blkbits - i);
++              i++;
++              *max = *max >> 1;
++      }
++      return bb;
++}
++
++static int ext3_mb_load_desc(struct super_block *sb, int group,
++                              struct ext3_buddy *e3b)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
++      J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++
++      /* load bitmap */
++      e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++      if (e3b->bd_bh == NULL) {
++              ext3_error(sb, "ext3_mb_load_desc",
++                              "can't get block for buddy bitmap\n");
++              goto out;
++      }
++      if (!buffer_uptodate(e3b->bd_bh)) {
++              ll_rw_block(READ, 1, &e3b->bd_bh);
++              wait_on_buffer(e3b->bd_bh);
++      }
++      J_ASSERT(buffer_uptodate(e3b->bd_bh));
++
++      /* load buddy */
++      e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++      if (e3b->bd_bh2 == NULL) {
++              ext3_error(sb, "ext3_mb_load_desc",
++                              "can't get block for buddy bitmap\n");
++              goto out;
++      }
++      if (!buffer_uptodate(e3b->bd_bh2)) {
++              ll_rw_block(READ, 1, &e3b->bd_bh2);
++              wait_on_buffer(e3b->bd_bh2);
++      }
++      J_ASSERT(buffer_uptodate(e3b->bd_bh2));
++
++      e3b->bd_bitmap = e3b->bd_bh->b_data;
++      e3b->bd_buddy = e3b->bd_bh2->b_data;
++      e3b->bd_blkbits = sb->s_blocksize_bits;
++      e3b->bd_bd = sbi->s_buddy_blocks + group;
++      e3b->bd_sb = sb;
++
++      return 0;
++out:
++      brelse(e3b->bd_bh);
++      brelse(e3b->bd_bh2);
++      e3b->bd_bh = NULL;
++      e3b->bd_bh2 = NULL;
++      return -EIO;
++}
++
++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
++{
++      mark_buffer_dirty(e3b->bd_bh);
++      mark_buffer_dirty(e3b->bd_bh2);
++}
++
++static void ext3_mb_release_desc(struct ext3_buddy *e3b)
++{
++      brelse(e3b->bd_bh);
++      brelse(e3b->bd_bh2);
++}
++
++#ifdef AGGRESSIVE_CHECK
++static void mb_check_buddy(struct ext3_buddy *e3b)
++{
++      int order = e3b->bd_blkbits + 1;
++      int max, max2, i, j, k, count;
++      void *buddy, *buddy2;
++
++      if (!test_opt(e3b->bd_sb, MBALLOC))
++              return;
++
++      while (order > 1) {
++              buddy = mb_find_buddy(e3b, order, &max);
++              J_ASSERT(buddy);
++              buddy2 = mb_find_buddy(e3b, order - 1, &max2);
++              J_ASSERT(buddy2);
++              J_ASSERT(buddy != buddy2);
++              J_ASSERT(max * 2 == max2);
++
++              count = 0;
++              for (i = 0; i < max; i++) {
++
++                      if (!mb_test_bit(i, buddy)) {
++                              /* only single bit in buddy2 may be 1 */
++                              if (mb_test_bit(i << 1, buddy2))
++                                      J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
++                              else if (mb_test_bit((i << 1) + 1, buddy2))
++                                      J_ASSERT(!mb_test_bit(i << 1, buddy2));
++                              continue;
++                      }
++
++                      /* both bits in buddy2 must be 0 */
++                      J_ASSERT(!mb_test_bit(i << 1, buddy2));
++                      J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
++
++                      for (j = 0; j < (1 << order); j++) {
++                              k = (i * (1 << order)) + j;
++                              J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
++                      }
++                      count++;
++              }
++              J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
++              order--;
++      }
++
++      buddy = mb_find_buddy(e3b, 0, &max);
++      for (i = 0; i < max; i++) {
++              if (mb_test_bit(i, buddy))
++                      continue;
++              /* check used bits only */
++              for (j = 0; j < e3b->bd_blkbits + 1; j++) {
++                      buddy2 = mb_find_buddy(e3b, j, &max2);
++                      k = i >> j;
++                      J_ASSERT(k < max2);
++                      J_ASSERT(!mb_test_bit(k, buddy2));
++              }
++      }
++}
++#else
++#define mb_check_buddy(e3b)
++#endif
++
++static inline void
++ext3_lock_group(struct super_block *sb, int group)
++{
++      spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static inline void
++ext3_unlock_group(struct super_block *sb, int group)
++{
++      spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
++{
++      int order = 1;
++      void *bb;
++
++      J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++      J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
++
++      bb = e3b->bd_buddy;
++      while (order <= e3b->bd_blkbits + 1) {
++              block = block >> 1;
++              if (mb_test_bit(block, bb)) {
++                      /* this block is part of buddy of order 'order' */
++                      return order;
++              }
++              bb += 1 << (e3b->bd_blkbits - order);
++              order++;
++      }
++      return 0;
++}
++
++static inline void mb_clear_bits(void *bm, int cur, int len)
++{
++      __u32 *addr;
++
++      len = cur + len;
++      while (cur < len) {
++              if ((cur & 31) == 0 && (len - cur) >= 32) {
++                      /* fast path: clear whole word at once */
++                      addr = bm + (cur >> 3);
++                      *addr = 0;
++                      cur += 32;
++                      continue;
++              }
++              mb_clear_bit(cur, bm);
++              cur++;
++      }
++}
++
++static inline void mb_set_bits(void *bm, int cur, int len)
++{
++      __u32 *addr;
++
++      len = cur + len;
++      while (cur < len) {
++              if ((cur & 31) == 0 && (len - cur) >= 32) {
++                      /* fast path: clear whole word at once */
++                      addr = bm + (cur >> 3);
++                      *addr = 0xffffffff;
++                      cur += 32;
++                      continue;
++              }
++              mb_set_bit(cur, bm);
++              cur++;
++      }
++}
++
++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
++{
++      int block, max, order;
++      void *buddy, *buddy2;
++
++      mb_check_buddy(e3b);
++      while (count-- > 0) {
++              block = first++;
++              order = 0;
++
++              J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
++              mb_set_bit(block, e3b->bd_bitmap);
++              e3b->bd_bd->bb_counters[order]++;
++
++              /* start of the buddy */
++              buddy = mb_find_buddy(e3b, order, &max);
++
++              do {
++                      block &= ~1UL;
++                      if (!mb_test_bit(block, buddy) ||
++                                      !mb_test_bit(block + 1, buddy))
++                              break;
++
++                      /* both the buddies are free, try to coalesce them */
++                      buddy2 = mb_find_buddy(e3b, order + 1, &max);
++
++                      if (!buddy2)
++                              break;
++
++                      if (order > 0) {
++                              /* for special purposes, we don't clear
++                               * free bits in bitmap */
++                              mb_clear_bit(block, buddy);
++                              mb_clear_bit(block + 1, buddy);
++                      }
++                      e3b->bd_bd->bb_counters[order]--;
++                      e3b->bd_bd->bb_counters[order]--;
++
++                      block = block >> 1;
++                      order++;
++                      e3b->bd_bd->bb_counters[order]++;
++
++                      mb_set_bit(block, buddy2);
++                      buddy = buddy2;
++              } while (1);
++      }
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++/*
++ * returns 1 if out extent is enough to fill needed space
++ */
++int mb_make_backward_extent(struct ext3_free_extent *in,
++                              struct ext3_free_extent *out, int needed)
++{
++      int i;
++
++      J_ASSERT(in);
++      J_ASSERT(out);
++      J_ASSERT(in->fe_nums < MB_ARR_SIZE);
++
++      out->fe_len = 0;
++      out->fe_start = in->fe_start + in->fe_len;
++      out->fe_nums = 0;
++
++      /* for single-chunk extent we need not back order
++       * also, if an extent doesn't fill needed space
++       * then it makes no sense to try back order becase
++       * if we select this extent then it'll be use as is */
++      if (in->fe_nums < 2 || in->fe_len < needed)
++              return 0;
++
++      i = in->fe_nums - 1;
++      while (i >= 0 && out->fe_len < needed) {
++              out->fe_len += (1 << in->fe_orders[i]);
++              out->fe_start -= (1 << in->fe_orders[i]);
++              i--;
++      }
++      /* FIXME: in some situation fe_orders may be too small to hold
++       * all the buddies */
++      J_ASSERT(out->fe_len >= needed);
++      
++      for (i++; i < in->fe_nums; i++)
++              out->fe_orders[out->fe_nums++] = in->fe_orders[i];
++      J_ASSERT(out->fe_nums < MB_ARR_SIZE);
++      out->fe_back = 1;
++
++      return 1;
++}
++
++int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
++                      int needed, struct ext3_free_extent *ex)
++{
++      int space = needed;
++      int next, max, ord;
++      void *buddy;
++
++      J_ASSERT(ex != NULL);
++
++      ex->fe_nums = 0;
++      ex->fe_len = 0;
++      
++      buddy = mb_find_buddy(e3b, order, &max);
++      J_ASSERT(buddy);
++      J_ASSERT(block < max);
++      if (!mb_test_bit(block, buddy))
++              goto nofree;
++
++      if (order == 0) {
++              /* find actual order */
++              order = mb_find_order_for_block(e3b, block);
++              block = block >> order;
++      }
++
++      ex->fe_orders[ex->fe_nums++] = order;
++      ex->fe_len = 1 << order;
++      ex->fe_start = block << order;
++      ex->fe_back = 0;
++
++      while ((space = space - (1 << order)) > 0) {
++
++              buddy = mb_find_buddy(e3b, order, &max);
++              J_ASSERT(buddy);
++
++              if (block + 1 >= max)
++                      break;
++
++              next = (block + 1) * (1 << order);
++              if (!mb_test_bit(next, e3b->bd_bitmap))
++                      break;
++
++              ord = mb_find_order_for_block(e3b, next);
++
++              if ((1 << ord) >= needed) {
++                      /* we dont want to coalesce with self-enough buddies */
++                      break;
++              }
++              order = ord;
++              block = next >> order;
++              ex->fe_len += 1 << order;
++
++              if (ex->fe_nums < MB_ARR_SIZE)
++                      ex->fe_orders[ex->fe_nums++] = order;
++      }
++
++nofree:
++      J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
++      return ex->fe_len;
++}
++
++static int mb_mark_used_backward(struct ext3_buddy *e3b,
++                                      struct ext3_free_extent *ex, int len)
++{
++      int start = ex->fe_start, len0 = len;
++      int ord, mlen, max, cur;
++      void *buddy;
++
++      start = ex->fe_start + ex->fe_len - 1;
++      while (len) {
++              ord = mb_find_order_for_block(e3b, start);
++              if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
++                              len >= (1 << ord)) {
++                      /* the whole chunk may be allocated at once! */
++                      mlen = 1 << ord;
++                      buddy = mb_find_buddy(e3b, ord, &max);
++                      J_ASSERT((start >> ord) < max);
++                      mb_clear_bit(start >> ord, buddy);
++                      e3b->bd_bd->bb_counters[ord]--;
++                      start -= mlen;
++                      len -= mlen;
++                      J_ASSERT(len >= 0);
++                      J_ASSERT(start >= 0);
++                      continue;
++              }
++
++              /* we have to split large buddy */
++              J_ASSERT(ord > 0);
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_clear_bit(start >> ord, buddy);
++              e3b->bd_bd->bb_counters[ord]--;
++
++              ord--;
++              cur = (start >> ord) & ~1U;
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_set_bit(cur, buddy);
++              mb_set_bit(cur + 1, buddy);
++              e3b->bd_bd->bb_counters[ord]++;
++              e3b->bd_bd->bb_counters[ord]++;
++      }
++
++      /* now drop all the bits in bitmap */
++      mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
++
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++static int mb_mark_used_forward(struct ext3_buddy *e3b,
++                              struct ext3_free_extent *ex, int len)
++{
++      int start = ex->fe_start, len0 = len;
++      int ord, mlen, max, cur;
++      void *buddy;
++
++      while (len) {
++              ord = mb_find_order_for_block(e3b, start);
++
++              if (((start >> ord) << ord) == start && len >= (1 << ord)) {
++                      /* the whole chunk may be allocated at once! */
++                      mlen = 1 << ord;
++                      buddy = mb_find_buddy(e3b, ord, &max);
++                      J_ASSERT((start >> ord) < max);
++                      mb_clear_bit(start >> ord, buddy);
++                      e3b->bd_bd->bb_counters[ord]--;
++                      start += mlen;
++                      len -= mlen;
++                      J_ASSERT(len >= 0);
++                      continue;
++              }
++
++              /* we have to split large buddy */
++              J_ASSERT(ord > 0);
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_clear_bit(start >> ord, buddy);
++              e3b->bd_bd->bb_counters[ord]--;
++
++              ord--;
++              cur = (start >> ord) & ~1U;
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_set_bit(cur, buddy);
++              mb_set_bit(cur + 1, buddy);
++              e3b->bd_bd->bb_counters[ord]++;
++              e3b->bd_bd->bb_counters[ord]++;
++      }
++
++      /* now drop all the bits in bitmap */
++      mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
++
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++int inline mb_mark_used(struct ext3_buddy *e3b,
++                      struct ext3_free_extent *ex, int len)
++{
++      int err;
++
++      J_ASSERT(ex);
++      if (ex->fe_back == 0)
++              err = mb_mark_used_forward(e3b, ex, len);
++      else
++              err = mb_mark_used_backward(e3b, ex, len);
++      return err;
++}
++
++int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
++                              struct ext3_buddy *e3b, int group)
++{
++      struct super_block *sb = ac->ac_sb;
++      int err, gorder, max, i;
++      struct ext3_free_extent curex;
++
++      /* let's know order of allocation */
++      gorder = 0;
++      while (ac->ac_g_len > (1 << gorder))
++              gorder++;
++
++      if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
++              /* someone asks for space at this specified block
++               * probably he wants to merge it into existing extent */
++              if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
++                      /* good. at least one block is free */
++                      max = mb_find_extent(e3b, 0, ac->ac_g_start,
++                                              ac->ac_g_len, &curex);
++                      max = min(curex.fe_len, ac->ac_g_len);
++                      mb_mark_used(e3b, &curex, max);
++                      
++                      ac->ac_b_group = group;
++                      ac->ac_b_start = curex.fe_start;
++                      ac->ac_b_len = max;
++                      ac->ac_status = AC_STATUS_FOUND;
++                      err = 0;
++                      goto out;
++              }
++              /* don't try to find goal anymore */
++              ac->ac_g_flags &= ~1;
++      }
++
++      i = 0;
++      while (1) {
++              i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
++              if (i >= sb->s_blocksize * 8)
++                      break;
++
++              max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
++              if (max >= ac->ac_g_len) {
++                      max = min(curex.fe_len, ac->ac_g_len);
++                      mb_mark_used(e3b, &curex, max);
++                      
++                      ac->ac_b_group = group;
++                      ac->ac_b_start = curex.fe_start;
++                      ac->ac_b_len = max;
++                      ac->ac_status = AC_STATUS_FOUND;
++                      break;
++              }
++              i += max;
++      }
++
++      return 0;
++
++out:
++      return err;
++}
++
++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
++{
++      struct ext3_group_desc *gdp;
++      int free_blocks;
++
++      gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
++      if (!gdp)
++              return 0;
++      free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++      if (free_blocks == 0)
++              return 0;
++
++      /* someone wants this block very much */
++      if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
++              return 1;
++
++      /* FIXME: I'd like to take fragmentation into account here */
++      if (cr == 0) {
++              if (free_blocks >= ac->ac_g_len >> 1)
++                      return 1;
++      } else if (cr == 1) {
++              if (free_blocks >= ac->ac_g_len >> 2)
++                      return 1;
++      } else if (cr == 2) {
++              return 1;
++      } else {
++              BUG();
++      }
++      return 0;
++}
++
++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++                      unsigned long goal, int *len, int flags, int *errp)
++{
++      struct buffer_head *bitmap_bh = NULL;
++      struct ext3_allocation_context ac;
++      int i, group, block, cr, err = 0;
++      struct ext3_group_desc *gdp;
++      struct ext3_super_block *es;
++      struct buffer_head *gdp_bh;
++      struct ext3_sb_info *sbi;
++      struct super_block *sb;
++      struct ext3_buddy e3b;
++
++      J_ASSERT(len != NULL);
++      J_ASSERT(*len > 0);
++
++      sb = inode->i_sb;
++      if (!sb) {
++              printk("ext3_mb_new_nblocks: nonexistent device");
++              return 0;
++      }
++
++      if (!test_opt(sb, MBALLOC)) {
++              static int ext3_mballoc_warning = 0;
++              if (ext3_mballoc_warning == 0) {
++                      printk(KERN_ERR "EXT3-fs: multiblock request with "
++                              "mballoc disabled!\n");
++                      ext3_mballoc_warning++;
++              }
++              *len = 1;
++              err = ext3_new_block_old(handle, inode, goal, errp);
++              return err;
++      }
++
++      ext3_mb_poll_new_transaction(sb, handle);
++
++      sbi = EXT3_SB(sb);
++      es = EXT3_SB(sb)->s_es;
++
++      if (!(flags & 2)) {
++              /* someone asks for non-reserved blocks */
++              BUG_ON(*len > 1);
++              err = ext3_mb_reserve_blocks(sb, 1);
++              if (err) {
++                      *errp = err;
++                      return 0;
++              }
++      }
++
++      /*
++       * Check quota for allocation of this blocks.
++       */
++      while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
++              *len -= 1;
++      if (*len == 0) {
++              *errp = -EDQUOT;
++              block = 0;
++              goto out;
++      }
++
++      /* start searching from the goal */
++      if (goal < le32_to_cpu(es->s_first_data_block) ||
++          goal >= le32_to_cpu(es->s_blocks_count))
++              goal = le32_to_cpu(es->s_first_data_block);
++      group = (goal - le32_to_cpu(es->s_first_data_block)) /
++                      EXT3_BLOCKS_PER_GROUP(sb);
++      block = ((goal - le32_to_cpu(es->s_first_data_block)) %
++                      EXT3_BLOCKS_PER_GROUP(sb));
++
++      /* set up allocation goals */
++      ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
++      ac.ac_status = 0;
++      ac.ac_groups_scanned = 0;
++      ac.ac_sb = inode->i_sb;
++      ac.ac_g_group = group;
++      ac.ac_g_start = block;
++      ac.ac_g_len = *len;
++      ac.ac_g_flags = flags;
++
++      /* loop over the groups */
++      for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
++              for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
++                      if (group == EXT3_SB(sb)->s_groups_count)
++                              group = 0;
++
++                      /* check is group good for our criteries */
++                      if (!mb_good_group(&ac, group, cr))
++                              continue;
++
++                      err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
++                      if (err)
++                              goto out_err;
++
++                      ext3_lock_group(sb, group);
++                      if (!mb_good_group(&ac, group, cr)) {
++                              /* someone did allocation from this group */
++                              ext3_unlock_group(sb, group);
++                              ext3_mb_release_desc(&e3b);
++                              continue;
++                      }
++
++                      err = ext3_mb_new_in_group(&ac, &e3b, group);
++                      ext3_unlock_group(sb, group);
++                      if (ac.ac_status == AC_STATUS_FOUND)
++                              ext3_mb_dirty_buddy(&e3b);
++                      ext3_mb_release_desc(&e3b);
++                      if (err)
++                              goto out_err;
++                      if (ac.ac_status == AC_STATUS_FOUND)
++                              break;
++              }
++      }
++
++      if (ac.ac_status != AC_STATUS_FOUND) {
++              /* unfortunately, we can't satisfy this request */
++              J_ASSERT(ac.ac_b_len == 0);
++              DQUOT_FREE_BLOCK(inode, *len);
++              *errp = -ENOSPC;
++              block = 0;
++              goto out;
++      }
++
++      /* good news - free block(s) have been found. now it's time
++       * to mark block(s) in good old journaled bitmap */
++      block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++                      + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
++
++      /* we made a desicion, now mark found blocks in good old
++       * bitmap to be journaled */
++
++      ext3_debug("using block group %d(%d)\n",
++                      ac.ac_b_group.group, gdp->bg_free_blocks_count);
++
++      bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
++      if (!bitmap_bh) {
++              *errp = -EIO;
++              goto out_err;
++      }
++
++      err = ext3_journal_get_write_access(handle, bitmap_bh);
++      if (err) {
++              *errp = err;
++              goto out_err;
++      }
++
++      gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
++      if (!gdp) {
++              *errp = -EIO;
++              goto out_err;
++      }
++      
++      err = ext3_journal_get_write_access(handle, gdp_bh);
++      if (err)
++              goto out_err;
++
++      block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++                              + le32_to_cpu(es->s_first_data_block);
++
++      if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
++          block == le32_to_cpu(gdp->bg_inode_bitmap) ||
++          in_range(block, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group))
++              ext3_error(sb, "ext3_new_block",
++                          "Allocating block in system zone - "
++                          "block = %u", block);
++#if 0
++      for (i = 0; i < ac.ac_b_len; i++)
++              J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
++#endif
++      mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
++
++      ext3_lock_group(sb, ac.ac_b_group);
++      gdp->bg_free_blocks_count =
++                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 
++                                      ac.ac_b_len);
++      ext3_unlock_group(sb, ac.ac_b_group);
++      percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
++
++      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++      if (err)
++              goto out_err;
++      err = ext3_journal_dirty_metadata(handle, gdp_bh);
++      if (err)
++              goto out_err;
++
++      sb->s_dirt = 1;
++      *errp = 0;
++      brelse(bitmap_bh);
++
++      /* drop non-allocated, but dquote'd blocks */
++      J_ASSERT(*len >= ac.ac_b_len);
++      DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
++
++      *len = ac.ac_b_len;
++      J_ASSERT(block != 0);
++      goto out;
++
++out_err:
++      /* if we've already allocated something, roll it back */
++      if (ac.ac_status == AC_STATUS_FOUND) {
++              /* FIXME: free blocks here */
++      }
++
++      DQUOT_FREE_BLOCK(inode, *len);
++      brelse(bitmap_bh);
++      *errp = err;
++      block = 0;
++out:
++      if (!(flags & 2)) {
++              /* block wasn't reserved before and we reserved it
++               * at the beginning of allocation. it doesn't matter
++               * whether we allocated anything or we failed: time
++               * to release reservation. NOTE: because I expect
++               * any multiblock request from delayed allocation
++               * path only, here is single block always */
++              ext3_mb_release_blocks(sb, 1);
++      }
++      return block;
++}
++
++int ext3_mb_generate_buddy(struct super_block *sb, int group)
++{
++      struct buffer_head *bh;
++      int i, err, count = 0;
++      struct ext3_buddy e3b;
++      
++      err = ext3_mb_load_desc(sb, group, &e3b);
++      if (err)
++              goto out;
++      memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
++      memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
++
++      bh = read_block_bitmap(sb, group);
++      if (bh == NULL) {
++              err = -EIO; 
++              goto out2;
++      }
++
++      /* loop over the blocks, nad create buddies for free ones */
++      for (i = 0; i < sb->s_blocksize * 8; i++) {
++              if (!mb_test_bit(i, (void *) bh->b_data)) {
++                      mb_free_blocks(&e3b, i, 1);
++                      count++;
++              }
++      }
++      brelse(bh);
++      mb_check_buddy(&e3b);
++      ext3_mb_dirty_buddy(&e3b);
++
++out2:
++      ext3_mb_release_desc(&e3b);
++out:
++      return err;
++}
++
++EXPORT_SYMBOL(ext3_mb_new_blocks);
++
++#define MB_CREDITS    \
++      (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS +   \
++              + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
++
++int ext3_mb_init_backend(struct super_block *sb)
++{
++      struct inode *root = sb->s_root->d_inode;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      struct dentry *db;
++      tid_t target;
++      int err, i;
++
++      sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++                                      sbi->s_groups_count, GFP_KERNEL);
++      if (sbi->s_buddy_blocks == NULL) {
++              printk("can't allocate mem for buddy maps\n");
++              return -ENOMEM;
++      }
++      memset(sbi->s_buddy_blocks, 0,
++              sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++      sbi->s_buddy = NULL;
++
++      down(&root->i_sem);
++      db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
++                              strlen(EXT3_BUDDY_FILE));
++      if (IS_ERR(db)) {
++              err = PTR_ERR(db);
++              printk("can't lookup buddy file: %d\n", err);
++              goto out;
++      }
++
++      if (db->d_inode != NULL) {
++              sbi->s_buddy = igrab(db->d_inode);
++              goto map;
++      }
++
++      err = ext3_create(root, db, S_IFREG, NULL);
++      if (err) {
++              printk("error while creation buddy file: %d\n", err);
++      } else {
++              sbi->s_buddy = igrab(db->d_inode);
++      }
++
++map:
++      for (i = 0; i < sbi->s_groups_count; i++) {
++              struct buffer_head *bh = NULL;
++              handle_t *handle;
++
++              handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
++              if (IS_ERR(handle)) {
++                      err = PTR_ERR(handle);
++                      goto out2;
++              }
++              
++              /* allocate block for bitmap */
++              bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
++              if (bh == NULL) {
++                      printk("can't get block for buddy bitmap: %d\n", err);
++                      goto out2;
++              }
++              sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++              brelse(bh);
++
++              /* allocate block for buddy */
++              bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
++              if (bh == NULL) {
++                      printk("can't get block for buddy: %d\n", err);
++                      goto out2;
++              }
++              sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++              brelse(bh);
++              ext3_journal_stop(handle);
++              spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
++              sbi->s_buddy_blocks[i].bb_md_cur = NULL;
++              sbi->s_buddy_blocks[i].bb_tid = 0;
++      }
++
++      if (journal_start_commit(sbi->s_journal, &target))
++              log_wait_commit(sbi->s_journal, target);
++
++out2:
++      dput(db);
++out:
++      up(&root->i_sem);
++      return err;
++}
++
++int ext3_mb_release(struct super_block *sb)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      
++      if (!test_opt(sb, MBALLOC))
++              return 0;
++
++      /* release freed, non-committed blocks */
++      spin_lock(&sbi->s_md_lock);
++      list_splice_init(&sbi->s_closed_transaction,
++                      &sbi->s_committed_transaction);
++      list_splice_init(&sbi->s_active_transaction,
++                      &sbi->s_committed_transaction);
++      spin_unlock(&sbi->s_md_lock);
++      ext3_mb_free_committed_blocks(sb);
++
++      if (sbi->s_buddy_blocks)
++              kfree(sbi->s_buddy_blocks);
++      if (sbi->s_buddy)
++              iput(sbi->s_buddy);
++      if (sbi->s_blocks_reserved)
++              printk("ext3-fs: %ld blocks being reserved at umount!\n",
++                              sbi->s_blocks_reserved);
++      return 0;
++}
++
++int ext3_mb_init(struct super_block *sb)
++{
++      struct ext3_super_block *es;
++      int i;
++
++      if (!test_opt(sb, MBALLOC))
++              return 0;
++
++      /* init file for buddy data */
++      clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++      ext3_mb_init_backend(sb);
++
++      es = EXT3_SB(sb)->s_es;
++      for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++              ext3_mb_generate_buddy(sb, i);
++      spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
++      spin_lock_init(&EXT3_SB(sb)->s_md_lock);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
++      set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++      printk("EXT3-fs: mballoc enabled\n");
++      return 0;
++}
++
++void ext3_mb_free_committed_blocks(struct super_block *sb)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int err, i, count = 0, count2 = 0;
++      struct ext3_free_metadata *md;
++      struct ext3_buddy e3b;
++
++      if (list_empty(&sbi->s_committed_transaction))
++              return;
++
++      /* there is committed blocks to be freed yet */
++      do {
++              /* get next array of blocks */
++              md = NULL;
++              spin_lock(&sbi->s_md_lock);
++              if (!list_empty(&sbi->s_committed_transaction)) {
++                      md = list_entry(sbi->s_committed_transaction.next,
++                                      struct ext3_free_metadata, list);
++                      list_del(&md->list);
++              }
++              spin_unlock(&sbi->s_md_lock);
++
++              if (md == NULL)
++                      break;
++
++              mb_debug("gonna free %u blocks in group %u (0x%p):",
++                              md->num, md->group, md);
++
++              err = ext3_mb_load_desc(sb, md->group, &e3b);
++              BUG_ON(err != 0);
++
++              /* there are blocks to put in buddy to make them really free */
++              count += md->num;
++              count2++;
++              ext3_lock_group(sb, md->group);
++              for (i = 0; i < md->num; i++) {
++                      mb_debug(" %u", md->blocks[i]);
++                      mb_free_blocks(&e3b, md->blocks[i], 1);
++              }
++              mb_debug("\n");
++              ext3_unlock_group(sb, md->group);
++
++              kfree(md);
++              ext3_mb_dirty_buddy(&e3b);
++              ext3_mb_release_desc(&e3b);
++
++      } while (md);
++      mb_debug("freed %u blocks in %u structures\n", count, count2);
++}
++
++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      if (sbi->s_last_transaction == handle->h_transaction->t_tid)
++              return;
++
++      /* new transaction! time to close last one and free blocks for
++       * committed transaction. we know that only transaction can be
++       * active, so previos transaction can be being logged and we
++       * know that transaction before previous is known to be alreade
++       * logged. this means that now we may free blocks freed in all
++       * transactions before previous one. hope I'm clear enough ... */
++
++      spin_lock(&sbi->s_md_lock);
++      if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
++              mb_debug("new transaction %lu, old %lu\n",
++                              (unsigned long) handle->h_transaction->t_tid,
++                              (unsigned long) sbi->s_last_transaction);
++              list_splice_init(&sbi->s_closed_transaction,
++                                      &sbi->s_committed_transaction);
++              list_splice_init(&sbi->s_active_transaction,
++                                      &sbi->s_closed_transaction);
++              sbi->s_last_transaction = handle->h_transaction->t_tid;
++      }
++      spin_unlock(&sbi->s_md_lock);
++
++      ext3_mb_free_committed_blocks(sb);
++}
++
++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
++                              int group, int block, int count)
++{
++      struct ext3_buddy_group_blocks *db = e3b->bd_bd;
++      struct super_block *sb = e3b->bd_sb;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      struct ext3_free_metadata *md;
++      int i;
++
++      ext3_lock_group(sb, group);
++      for (i = 0; i < count; i++) {
++              md = db->bb_md_cur;
++              if (md && db->bb_tid != handle->h_transaction->t_tid) {
++                      db->bb_md_cur = NULL;
++                      md = NULL;
++              }
++
++              if (md == NULL) {
++                      ext3_unlock_group(sb, group);
++                      md = kmalloc(sizeof(*md), GFP_KERNEL);
++                      if (md == NULL)
++                              return -ENOMEM;
++                      md->num = 0;
++                      md->group = group;
++
++                      ext3_lock_group(sb, group);
++                      if (db->bb_md_cur == NULL) {
++                              spin_lock(&sbi->s_md_lock);
++                              list_add(&md->list, &sbi->s_active_transaction);
++                              spin_unlock(&sbi->s_md_lock);
++                              db->bb_md_cur = md;
++                              db->bb_tid = handle->h_transaction->t_tid;
++                              mb_debug("new md 0x%p for group %u\n",
++                                                      md, md->group);
++                      } else {
++                              kfree(md);
++                              md = db->bb_md_cur;
++                      }
++              }
++
++              BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
++              md->blocks[md->num] = block + i;
++              md->num++;
++              if (md->num == EXT3_BB_MAX_BLOCKS) {
++                      /* no more space, put full container on a sb's list */
++                      db->bb_md_cur = NULL;
++              }
++      }
++      ext3_unlock_group(sb, group);
++      return 0;
++}
++
++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
++                      unsigned long block, unsigned long count, int metadata)
++{
++      struct buffer_head *bitmap_bh = NULL;
++      struct ext3_group_desc *gdp;
++      struct ext3_super_block *es;
++      unsigned long bit, overflow;
++      struct buffer_head *gd_bh;
++      unsigned long block_group;
++      struct ext3_sb_info *sbi;
++      struct super_block *sb;
++      struct ext3_buddy e3b;
++      int err = 0, ret;
++
++      sb = inode->i_sb;
++      if (!sb) {
++              printk ("ext3_free_blocks: nonexistent device");
++              return;
++      }
++
++      ext3_mb_poll_new_transaction(sb, handle);
++
++      sbi = EXT3_SB(sb);
++      es = EXT3_SB(sb)->s_es;
++      if (block < le32_to_cpu(es->s_first_data_block) ||
++          block + count < block ||
++          block + count > le32_to_cpu(es->s_blocks_count)) {
++              ext3_error (sb, "ext3_free_blocks",
++                          "Freeing blocks not in datazone - "
++                          "block = %lu, count = %lu", block, count);
++              goto error_return;
++      }
++
++      ext3_debug("freeing block %lu\n", block);
++
++do_more:
++      overflow = 0;
++      block_group = (block - le32_to_cpu(es->s_first_data_block)) /
++                    EXT3_BLOCKS_PER_GROUP(sb);
++      bit = (block - le32_to_cpu(es->s_first_data_block)) %
++                    EXT3_BLOCKS_PER_GROUP(sb);
++      /*
++       * Check to see if we are freeing blocks across a group
++       * boundary.
++       */
++      if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
++              overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
++              count -= overflow;
++      }
++      brelse(bitmap_bh);
++      bitmap_bh = read_block_bitmap(sb, block_group);
++      if (!bitmap_bh)
++              goto error_return;
++      gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
++      if (!gdp)
++              goto error_return;
++
++      if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
++          in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
++          in_range (block, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group) ||
++          in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group))
++              ext3_error (sb, "ext3_free_blocks",
++                          "Freeing blocks in system zones - "
++                          "Block = %lu, count = %lu",
++                          block, count);
++
++      BUFFER_TRACE(bitmap_bh, "getting write access");
++      err = ext3_journal_get_write_access(handle, bitmap_bh);
++      if (err)
++              goto error_return;
++
++      /*
++       * We are about to modify some metadata.  Call the journal APIs
++       * to unshare ->b_data if a currently-committing transaction is
++       * using it
++       */
++      BUFFER_TRACE(gd_bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, gd_bh);
++      if (err)
++              goto error_return;
++
++      err = ext3_mb_load_desc(sb, block_group, &e3b);
++      if (err)
++              goto error_return;
++
++      if (metadata) {
++              /* blocks being freed are metadata. these blocks shouldn't
++               * be used until this transaction is committed */
++              ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
++      } else { 
++              ext3_lock_group(sb, block_group);
++              mb_free_blocks(&e3b, bit, count);
++              gdp->bg_free_blocks_count =
++                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++              ext3_unlock_group(sb, block_group);
++              percpu_counter_mod(&sbi->s_freeblocks_counter, count);
++      }
++      
++      ext3_mb_dirty_buddy(&e3b);
++      ext3_mb_release_desc(&e3b);
++
++      /* FIXME: undo logic will be implemented later and another way */
++      mb_clear_bits(bitmap_bh->b_data, bit, count);
++      DQUOT_FREE_BLOCK(inode, count);
++
++      /* We dirtied the bitmap block */
++      BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
++      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++
++      /* And the group descriptor block */
++      BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
++      ret = ext3_journal_dirty_metadata(handle, gd_bh);
++      if (!err) err = ret;
++
++      if (overflow && !err) {
++              block += count;
++              count = overflow;
++              goto do_more;
++      }
++      sb->s_dirt = 1;
++error_return:
++      brelse(bitmap_bh);
++      ext3_std_error(sb, err);
++      return;
++}
++
++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int free, ret = -ENOSPC;
++
++      BUG_ON(blocks < 0);
++      spin_lock(&sbi->s_reserve_lock);
++      free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++      if (blocks <= free - sbi->s_blocks_reserved) {
++              sbi->s_blocks_reserved += blocks;
++              ret = 0;
++      }
++      spin_unlock(&sbi->s_reserve_lock);
++      return ret;
++}
++
++void ext3_mb_release_blocks(struct super_block *sb, int blocks)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      BUG_ON(blocks < 0);
++      spin_lock(&sbi->s_reserve_lock);
++      sbi->s_blocks_reserved -= blocks;
++      WARN_ON(sbi->s_blocks_reserved < 0);
++      if (sbi->s_blocks_reserved < 0)
++              sbi->s_blocks_reserved = 0;
++      spin_unlock(&sbi->s_reserve_lock);
++}
++
++int ext3_new_block(handle_t *handle, struct inode *inode,
++                      unsigned long goal, int *errp)
++{
++      int ret, len;
++
++      if (!test_opt(inode->i_sb, MBALLOC)) {
++              ret = ext3_new_block_old(handle, inode, goal, errp);
++              goto out;
++      }
++      len = 1;
++      ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
++out:
++      return ret;
++}
++
++
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++                      unsigned long block, unsigned long count, int metadata)
++{
++      if (!test_opt(inode->i_sb, MBALLOC))
++              ext3_free_blocks_old(handle, inode, block, count);
++      else
++              ext3_mb_free_blocks(handle, inode, block, count, metadata);
++      return;
++}
++
+Index: linux-2.6.5-sles9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/super.c     2004-11-09 02:23:21.597220752 +0300
++++ linux-2.6.5-sles9/fs/ext3/super.c  2004-11-09 02:26:12.572228600 +0300
+@@ -389,6 +389,7 @@
+       struct ext3_super_block *es = sbi->s_es;
+       int i;
+ 
++      ext3_mb_release(sb);
+       ext3_ext_release(sb);
+       ext3_xattr_put_super(sb);
+       journal_destroy(sbi->s_journal);
+@@ -542,7 +543,7 @@
+       Opt_commit, Opt_journal_update, Opt_journal_inum,
+       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+       Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_err, Opt_extents, Opt_extdebug
++      Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
+ };
+ 
+ static match_table_t tokens = {
+@@ -589,6 +590,7 @@
+       {Opt_iopen_nopriv, "iopen_nopriv"},
+       {Opt_extents, "extents"},
+       {Opt_extdebug, "extdebug"},
++      {Opt_mballoc, "mballoc"},
+       {Opt_err, NULL}
+ };
+ 
+@@ -810,6 +812,9 @@
+               case Opt_extdebug:
+                       set_opt (sbi->s_mount_opt, EXTDEBUG);
+                       break;
++              case Opt_mballoc:
++                      set_opt (sbi->s_mount_opt, MBALLOC);
++                      break;
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1463,7 +1468,8 @@
+               ext3_count_dirs(sb));
+ 
+       ext3_ext_init(sb);
+- 
++      ext3_mb_init(sb);
++
+       return 0;
+ 
+ failed_mount3:
+Index: linux-2.6.5-sles9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/Makefile    2004-11-09 02:23:21.593221360 +0300
++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+ 
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+-         ioctl.o namei.o super.o symlink.o hash.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
+ 
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.5-sles9/fs/ext3/balloc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c    2004-11-03 08:36:51.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300
+@@ -78,7 +78,7 @@
+  *
+  * Return buffer_head on success or NULL in case of failure.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+       struct ext3_group_desc * desc;
+@@ -274,7 +274,7 @@
+ }
+ 
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
++void ext3_free_blocks_old(handle_t *handle, struct inode *inode,
+                       unsigned long block, unsigned long count)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+@@ -1142,7 +1142,7 @@
+  * bitmap, and then for any free bit if that fails.
+  * This function also updates quota and i_blocks field.
+  */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       unsigned long goal, int *errp)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.5-sles9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/namei.c     2004-11-09 02:18:27.616912552 +0300
++++ linux-2.6.5-sles9/fs/ext3/namei.c  2004-11-09 02:26:12.580227384 +0300
+@@ -1640,7 +1640,7 @@
+  * If the create succeeds, we fill in the inode information
+  * with d_instantiate(). 
+  */
+-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
++int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+               struct nameidata *nd)
+ {
+       handle_t *handle; 
+Index: linux-2.6.5-sles9/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/inode.c     2004-11-09 02:23:21.592221512 +0300
++++ linux-2.6.5-sles9/fs/ext3/inode.c  2004-11-09 02:26:12.587226320 +0300
+@@ -572,7 +572,7 @@
+               ext3_journal_forget(handle, branch[i].bh);
+       }
+       for (i = 0; i < keys; i++)
+-              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+       return err;
+ }
+ 
+@@ -673,7 +673,7 @@
+       if (err == -EAGAIN)
+               for (i = 0; i < num; i++)
+                       ext3_free_blocks(handle, inode, 
+-                                       le32_to_cpu(where[i].key), 1);
++                                       le32_to_cpu(where[i].key), 1, 1);
+       return err;
+ }
+ 
+@@ -1829,7 +1829,7 @@
+               }
+       }
+ 
+-      ext3_free_blocks(handle, inode, block_to_free, count);
++      ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+ 
+ /**
+@@ -2000,7 +2000,7 @@
+                               ext3_journal_test_restart(handle, inode);
+                       }
+ 
+-                      ext3_free_blocks(handle, inode, nr, 1);
++                      ext3_free_blocks(handle, inode, nr, 1, 1);
+ 
+                       if (parent_bh) {
+                               /*
+Index: linux-2.6.5-sles9/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/extents.c   2004-11-09 02:25:56.143726112 +0300
++++ linux-2.6.5-sles9/fs/ext3/extents.c        2004-11-09 02:26:12.591225712 +0300
+@@ -740,7 +740,7 @@
+               for (i = 0; i < depth; i++) {
+                       if (!ablocks[i])
+                               continue;
+-                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+               }
+       }
+       kfree(ablocks);
+@@ -1391,7 +1391,7 @@
+                       path->p_idx->ei_leaf);
+       bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+-      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+       return err;
+ }
+ 
+@@ -1879,10 +1879,12 @@
+       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+       handle_t *handle = ext3_journal_start(tree->inode, needed);
+       struct buffer_head *bh;
+-      int i;
++      int i, metadata = 0;
+ 
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
++      if (S_ISDIR(tree->inode->i_mode))
++              metadata = 1;
+       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+               /* tail removal */
+               unsigned long num, start;
+@@ -1894,7 +1896,7 @@
+                       bh = sb_find_get_block(tree->inode->i_sb, start + i);
+                       ext3_forget(handle, 0, tree->inode, bh, start + i);
+               }
+-              ext3_free_blocks(handle, tree->inode, start, num);
++              ext3_free_blocks(handle, tree->inode, start, num, metadata);
+       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+               printk("strange request: removal %lu-%lu from %u:%u\n",
+                       from, to, ex->ee_block, ex->ee_len);
+Index: linux-2.6.5-sles9/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/xattr.c     2004-11-09 02:22:55.777146000 +0300
++++ linux-2.6.5-sles9/fs/ext3/xattr.c  2004-11-09 02:26:12.593225408 +0300
+@@ -1366,7 +1366,7 @@
+                       new_bh = sb_getblk(sb, block);
+                       if (!new_bh) {
+ getblk_failed:
+-                              ext3_free_blocks(handle, inode, block, 1);
++                              ext3_free_blocks(handle, inode, block, 1, 1);
+                               error = -EIO;
+                               goto cleanup;
+                       }
+@@ -1408,7 +1408,7 @@
+               if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+                       /* Free the old block. */
+                       ea_bdebug(old_bh, "freeing");
+-                      ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
++                      ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
+ 
+                       /* ext3_forget() calls bforget() for us, but we
+                          let our caller release old_bh, so we need to
+@@ -1504,7 +1504,7 @@
+       lock_buffer(bh);
+       if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+               ext3_xattr_cache_remove(bh);
+-              ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
++              ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
+               get_bh(bh);
+               ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
+       } else {
+Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h     2004-11-09 02:25:17.238640584 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs.h  2004-11-09 02:26:12.596224952 +0300
+@@ -57,6 +57,8 @@
+ #define ext3_debug(f, a...)   do {} while (0)
+ #endif
+ 
++#define EXT3_MULTIBLOCK_ALLOCATOR     1
++
+ /*
+  * Special inodes numbers
+  */
+@@ -339,6 +341,7 @@
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC            0x400000/* Buddy allocation support */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -698,7 +701,7 @@
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+-                            unsigned long);
++                            unsigned long, int);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h  2004-11-09 02:20:51.598024096 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h       2004-11-09 02:28:18.753046200 +0300
+@@ -23,10 +23,30 @@
+ #define EXT_INCLUDE
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #endif
+ #include <linux/rbtree.h>
+ 
++#define EXT3_BB_MAX_BLOCKS    30
++struct ext3_free_metadata {
++      unsigned short group;
++      unsigned short num;
++      unsigned short blocks[EXT3_BB_MAX_BLOCKS];
++      struct list_head list;
++};
++
++#define EXT3_BB_MAX_ORDER     14
++
++struct ext3_buddy_group_blocks {
++      sector_t        bb_bitmap;
++      sector_t        bb_buddy;
++      spinlock_t      bb_lock;
++      unsigned        bb_counters[EXT3_BB_MAX_ORDER];
++      struct ext3_free_metadata *bb_md_cur;
++      unsigned long bb_tid;
++};
++
+ /*
+  * third extended-fs super-block data in memory
+  */
+@@ -78,6 +98,17 @@
+       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
+       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
+ #endif
++
++      /* for buddy allocator */
++      struct ext3_buddy_group_blocks *s_buddy_blocks;
++      struct inode *s_buddy;
++      long s_blocks_reserved;
++      spinlock_t s_reserve_lock;
++      struct list_head s_active_transaction;
++      struct list_head s_closed_transaction;
++      struct list_head s_committed_transaction;
++      spinlock_t s_md_lock;
++      tid_t s_last_transaction;
+ };
+ 
+ #endif        /* _LINUX_EXT3_FS_SB */
diff --git a/ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch b/ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch

new file mode 100644 (file)

index 0000000..b20be23
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch
@@ -0,0 +1,170 @@
+Index: linux-2.6.7/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/namei.c   2004-06-15 23:19:36.000000000 -0600
++++ linux-2.6.7/fs/ext3/namei.c        2004-08-20 17:48:54.000000000 -0600
+@@ -1596,11 +1596,17 @@ static int ext3_delete_entry (handle_t *
+ static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+ {
+       inode->i_nlink++;
++      if (is_dx(inode) && inode->i_nlink > 1) {
++              /* limit is 16-bit i_links_count */
++              if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2)
++                      inode->i_nlink = 1;
++        }
+ }
+ 
+ static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+ {
+-      inode->i_nlink--;
++      if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
++              inode->i_nlink--;
+ }
+ 
+ static int ext3_add_nondir(handle_t *handle,
+@@ -1693,7 +1698,7 @@ static int ext3_mkdir(struct inode * dir
+       struct ext3_dir_entry_2 * de;
+       int err;
+ 
+-      if (dir->i_nlink >= EXT3_LINK_MAX)
++      if (EXT3_DIR_LINK_MAXED(dir))
+               return -EMLINK;
+ 
+       handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -1715,7 +1720,7 @@ static int ext3_mkdir(struct inode * dir
+       inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+       dir_block = ext3_bread (handle, inode, 0, 1, &err);
+       if (!dir_block) {
+-              inode->i_nlink--; /* is this nlink == 0? */
++              ext3_dec_count(handle, inode); /* is this nlink == 0? */
+               ext3_mark_inode_dirty(handle, inode);
+               iput (inode);
+               goto out_stop;
+@@ -1747,7 +1752,7 @@ static int ext3_mkdir(struct inode * dir
+               iput (inode);
+               goto out_stop;
+       }
+-      dir->i_nlink++;
++      ext3_inc_count(handle, dir);
+       ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+       d_instantiate(dentry, inode);
+@@ -2010,10 +2015,10 @@ static int ext3_rmdir (struct inode * di
+       retval = ext3_delete_entry(handle, dir, de, bh);
+       if (retval)
+               goto end_rmdir;
+-      if (inode->i_nlink != 2)
+-              ext3_warning (inode->i_sb, "ext3_rmdir",
+-                            "empty directory has nlink!=2 (%d)",
+-                            inode->i_nlink);
++      if (!EXT3_DIR_LINK_EMPTY(inode))
++              ext3_warning(inode->i_sb, "ext3_rmdir",
++                           "empty directory has too many links (%d)",
++                           inode->i_nlink);
+       inode->i_version++;
+       inode->i_nlink = 0;
+       /* There's no need to set i_disksize: the fact that i_nlink is
+@@ -2023,7 +2028,7 @@ static int ext3_rmdir (struct inode * di
+       ext3_orphan_add(handle, inode);
+       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+       ext3_mark_inode_dirty(handle, inode);
+-      dir->i_nlink--;
++      ext3_dec_count(handle, dir);
+       ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+ 
+@@ -2074,7 +2079,7 @@ static int ext3_unlink(struct inode * di
+       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+       ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+-      inode->i_nlink--;
++      ext3_dec_count(handle, inode);
+       if (!inode->i_nlink)
+               ext3_orphan_add(handle, inode);
+       inode->i_ctime = dir->i_ctime;
+@@ -2146,7 +2151,7 @@ static int ext3_link (struct dentry * ol
+       struct inode *inode = old_dentry->d_inode;
+       int err;
+ 
+-      if (inode->i_nlink >= EXT3_LINK_MAX)
++      if (EXT3_DIR_LINK_MAXED(inode))
+               return -EMLINK;
+ 
+       handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -2230,8 +2235,8 @@ static int ext3_rename (struct inode * o
+               if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+                       goto end_rename;
+               retval = -EMLINK;
+-              if (!new_inode && new_dir!=old_dir &&
+-                              new_dir->i_nlink >= EXT3_LINK_MAX)
++              if (!new_inode && new_dir != old_dir &&
++                  EXT3_DIR_LINK_MAXED(new_dir))
+                       goto end_rename;
+       }
+       if (!new_bh) {
+@@ -2288,7 +2293,7 @@ static int ext3_rename (struct inode * o
+       }
+ 
+       if (new_inode) {
+-              new_inode->i_nlink--;
++              ext3_dec_count(handle, new_inode);
+               new_inode->i_ctime = CURRENT_TIME;
+       }
+       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+@@ -2299,11 +2304,11 @@ static int ext3_rename (struct inode * o
+               PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
+               BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+               ext3_journal_dirty_metadata(handle, dir_bh);
+-              old_dir->i_nlink--;
++              ext3_dec_count(handle, old_dir);
+               if (new_inode) {
+-                      new_inode->i_nlink--;
++                      ext3_dec_count(handle, new_inode);
+               } else {
+-                      new_dir->i_nlink++;
++                      ext3_inc_count(handle, new_dir);
+                       ext3_update_dx_flag(new_dir);
+                       ext3_mark_inode_dirty(handle, new_dir);
+               }
+Index: linux-2.6.7/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/ext3_fs.h   2004-06-15 23:19:36.000000000 -0600
++++ linux-2.6.7/include/linux/ext3_fs.h        2004-08-20 17:41:27.000000000 -0600
+@@ -41,7 +41,7 @@ struct statfs;
+ /*
+  * Always enable hashed directories
+  */
+-#define CONFIG_EXT3_INDEX
++#define CONFIG_EXT3_INDEX 1
+ 
+ /*
+  * Debug code
+@@ -79,7 +81,7 @@
+ /*
+  * Maximal count of links to a file
+  */
+-#define EXT3_LINK_MAX         32000
++#define EXT3_LINK_MAX         65000
+ 
+ /*
+  * Macro-instructions used to manage several block sizes
+@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
+  */
+ 
+ #ifdef CONFIG_EXT3_INDEX
+-  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
+-                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
++                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
+                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
+-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
+-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
++                                (is_dx(dir) && (dir)->i_nlink == 1))
+ #else
+   #define is_dx(dir) 0
+-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
+ #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
+ #endif
+ 
diff --git a/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch b/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch

index 8a8d115..4a51eb8 100644 (file)
--- a/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch
+++ b/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch
@@ -8,8 +8,8 @@
  
  Index: linux-stage/fs/ext3/Makefile
  ===================================================================
---- linux-stage.orig/fs/ext3/Makefile  2004-05-11 17:21:20.000000000 -0400
-+++ linux-stage/fs/ext3/Makefile       2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/Makefile  2004-11-03 14:41:24.747805262 -0500
++++ linux-stage/fs/ext3/Makefile       2004-11-03 14:41:25.123696274 -0500
  @@ -4,7 +4,7 @@
   
   obj-$(CONFIG_EXT3_FS) += ext3.o
@@ -21,8 +21,8 @@ Index: linux-stage/fs/ext3/Makefile
   ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  Index: linux-stage/fs/ext3/inode.c
  ===================================================================
---- linux-stage.orig/fs/ext3/inode.c   2004-05-11 17:21:21.000000000 -0400
-+++ linux-stage/fs/ext3/inode.c        2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/inode.c   2004-11-03 14:41:25.040720333 -0500
++++ linux-stage/fs/ext3/inode.c        2004-11-03 14:46:08.458515670 -0500
  @@ -37,6 +37,7 @@
   #include <linux/mpage.h>
   #include <linux/uio.h>
@@ -31,20 +31,20 @@ Index: linux-stage/fs/ext3/inode.c
   #include "acl.h"
   
   /*
-@@ -2472,6 +2473,9 @@
-       ei->i_acl = EXT3_ACL_NOT_CACHED;
+@@ -2401,6 +2402,9 @@
         ei->i_default_acl = EXT3_ACL_NOT_CACHED;
   #endif
-+      if (ext3_iopen_get_inode(inode))
-+              return;
-+
+       ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
++    
++    if (ext3_iopen_get_inode(inode))
++        return;
+ 
         if (ext3_get_inode_loc(inode, &iloc, 0))
                 goto bad_inode;
-       bh = iloc.bh;
  Index: linux-stage/fs/ext3/iopen.c
  ===================================================================
  --- linux-stage.orig/fs/ext3/iopen.c   1969-12-31 19:00:00.000000000 -0500
-+++ linux-stage/fs/ext3/iopen.c        2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/iopen.c        2004-11-03 14:41:25.125695694 -0500
  @@ -0,0 +1,272 @@
  +/*
  + * linux/fs/ext3/iopen.c
@@ -321,7 +321,7 @@ Index: linux-stage/fs/ext3/iopen.c
  Index: linux-stage/fs/ext3/iopen.h
  ===================================================================
  --- linux-stage.orig/fs/ext3/iopen.h   1969-12-31 19:00:00.000000000 -0500
-+++ linux-stage/fs/ext3/iopen.h        2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/iopen.h        2004-11-03 14:41:25.126695404 -0500
  @@ -0,0 +1,15 @@
  +/*
  + * iopen.h
@@ -340,8 +340,8 @@ Index: linux-stage/fs/ext3/iopen.h
  +                                         struct inode *inode, int rehash);
  Index: linux-stage/fs/ext3/namei.c
  ===================================================================
---- linux-stage.orig/fs/ext3/namei.c   2004-05-11 17:21:20.000000000 -0400
-+++ linux-stage/fs/ext3/namei.c        2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/namei.c   2004-11-03 14:41:24.957744391 -0500
++++ linux-stage/fs/ext3/namei.c        2004-11-03 14:41:25.127695114 -0500
  @@ -37,6 +37,7 @@
   #include <linux/buffer_head.h>
   #include <linux/smp_lock.h>
@@ -373,7 +373,7 @@ Index: linux-stage/fs/ext3/namei.c
   }
   
   
-@@ -2019,10 +2021,6 @@
+@@ -2029,10 +2031,6 @@
                               inode->i_nlink);
         inode->i_version++;
         inode->i_nlink = 0;
@@ -384,7 +384,7 @@ Index: linux-stage/fs/ext3/namei.c
         ext3_orphan_add(handle, inode);
         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         ext3_mark_inode_dirty(handle, inode);
-@@ -2139,6 +2137,23 @@
+@@ -2152,6 +2150,23 @@
         return err;
   }
   
@@ -408,7 +408,7 @@ Index: linux-stage/fs/ext3/namei.c
   static int ext3_link (struct dentry * old_dentry,
                 struct inode * dir, struct dentry *dentry)
   {
-@@ -2161,7 +2176,8 @@
+@@ -2175,7 +2190,8 @@
         ext3_inc_count(handle, inode);
         atomic_inc(&inode->i_count);
   
@@ -416,14 +416,14 @@ Index: linux-stage/fs/ext3/namei.c
  +      err = ext3_add_link(handle, dentry, inode);
  +      ext3_orphan_del(handle,inode);
         ext3_journal_stop(handle);
-       return err;
- }
+       if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+               goto retry;
  Index: linux-stage/fs/ext3/super.c
  ===================================================================
---- linux-stage.orig/fs/ext3/super.c   2004-05-11 17:21:21.000000000 -0400
-+++ linux-stage/fs/ext3/super.c        2004-05-11 17:44:53.000000000 -0400
-@@ -536,7 +536,7 @@
-       Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
+--- linux-stage.orig/fs/ext3/super.c   2004-11-03 14:41:25.043719463 -0500
++++ linux-stage/fs/ext3/super.c        2004-11-03 14:41:25.129694535 -0500
+@@ -534,7 +534,7 @@
+       Opt_reservation, Opt_noreservation, Opt_noload,
         Opt_commit, Opt_journal_update, Opt_journal_inum,
         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
  -      Opt_ignore, Opt_barrier,
@@ -441,7 +441,7 @@ Index: linux-stage/fs/ext3/super.c
         {Opt_err, NULL}
   };
   
-@@ -772,6 +775,18 @@
+@@ -778,6 +781,18 @@
                         else
                                 clear_opt(sbi->s_mount_opt, BARRIER);
                         break;
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index 083b5b0..be83e1e 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -1,5 +1,40 @@
-tbd  Cluster File Systems, Inc. <info@clusterfs.com>
-       * version 1.2.x
+tbd         Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.3.4
+       * bug fixes
+       - fixes from lustre 1.2.8
+       - print NAL number in %x format (4645)
+       - the watchdog thread now runs as interruptible (5246)
+       - drop import inflight refcount on signal_completed_replay error (5255)
+       * miscellania
+       - add pid to ldlm debugging output (4922)
+
+2004-10-08  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.3.3
+       * bug fixes
+       - properly handle portals process identifiers in messages (4165)
+       - finish default directory EA handling (3048)
+       - fixes from lustre 1.2.7
+       - removed PTL_MD_KIOV usage under CRAY_PORTALS (4420)
+       - allow EADDRNOTAVAIL as retry for connect in liblustre tcpnal (4822)
+
+2004-09-16  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.3.2
+       * bug fixes
+       - many liblustre fixes
+       - fixes from lustre 1.2.6
+       * miscellania
+       - update to new libsysio-head-0806
+       - reorganization of lov code
+
+2004-08-30  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.3.1
+       * bug fixes
+       - add locking for mmapped files (2828)
+       - lmc/lconf changes to support multiple interfaces (3376)
+       - fixes from lustre 1.2.5
+
+2004-08-14  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.3.0
         * bug fixes
         - don't dereference NULL peer_ni in ldlm_handle_ast_error (3258)
         - don't allow unlinking open directory if it isn't empty (2904)
@@ -9,6 +44,137 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
         - chose better nal ids in liblustre (3292)
         - initialize liblustre with uid/group membership (2862)
          - let lconf resolve symlinked-to devices (4629)
+       - balance journal closure when 2.6 filter write fails (3401)
+       - add second rpc_lock and last_rcvd info for close reqs (3462)
+       - don't hold llog sem during network request (3652)
+       - update server last transno after client disconnects (2525)
+       - replace config semaphore with spinlock (3306)
+       - ext3 exents and multi-block allocation (3024)
+       - service time statistics in /proc
+       - minor fixes to liblustre build (3317)
+       - client recovery without upcall (3262)
+       - use transno after validating reply (3892)
+       - use different name for 2nd ptlrpcd thread (3887)
+       - get a client lock in ll_inode_revalidate_it (3597)
+       - direct IO reads on OST (4048)
+       - process timed out requests if import state changes (3754)
+       - ignore -ENOENT errors in osc_destroy (3639)
+       - fixes from lustre 1.2.0-1.2.4
+       * miscellania
+       - use "CATALOGS" for the llog catalogs, not "CATLIST" (old) (b=2841)
+       - added kernel patch for /dev/sd I/O stats (4385)
+
+2004-11-16  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.2.8
+       * bug fixes
+       - fix TCP_NODELAY bug, which caused extreme perf regression (5134)
+       - allocate qswnal tx descriptors singly to avoid fragmentation (4504)
+       - don't LBUG on obdo_alloc() failure, use OBD_SLAB_ALLOC() (4800)
+       - fix NULL dereference in /proc/sys/portals/routes (4827)
+       - allow failed mdc_close() operations to be interrupted (4561)
+       - stop precreate on OST before MDS would time out on it (4778)
+       - don't free dentries not owned by NFS code, check generation (4806)
+       - fix lsm leak if mds_create_objects() fails (4801)
+       - limit debug_daemon file size, always print CERROR messages (4789)
+       - use transno after validating reply (3892)
+       - process timed out requests if import state changes (3754)
+       - update mtime on OST during writes, return in glimpse (4829)
+       - add mkfsoptions to LDAP (4679)
+       - use ->max_readahead method instead of zapping global ra (5039)
+       - don't interrupt __l_wait_event() during strace
+       * miscellania
+       - add software watchdogs to catch hung threads quickly (4941)
+       - make lustrefs init script start after nfs is mounted
+       - fix CWARN/ERROR duplication (4930)
+       - return async write errors to application if possible (2248)
+       - update barely-supported suse-2.4.21-171 series (4842)
+       - support for sles 9 %post scripts
+       - support for building 2.6 kernel-source packages
+       - support for sles km_* packages
+
+2004-10-07  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.2.7
+       * bug fixes
+       - ignore -ENOENT errors in osc_destroy (3639)
+       - notify osc create thread that OSC is being cleaned up (4600)
+       - add nettype argument for llmount in #5d in conf-sanity.sh (3936)
+       - reconstruct ost_handle() like mds_handle() (4657)
+       - create a new thread to do import eviction to avoid deadlock (3969)
+       - let lconf resolve symlinked-to devices (4629)
+       - don't unlink "objects" from directory with default EA (4554)
+       - hold socknal file ref over connect in case target is down (4394)
+       - allow more than 32000 subdirectories in a single directory (3244) 
+       - OST returns ENOSPC from object create when no space left (4539)
+       - don't send truncate RPC if file size isn't changing (4410)
+       - limit OSC precreate to 1/2 of value OST considers bogus (4778)
+       - bind to privileged port in socknal and tcpnal (3689)
+       * miscellania
+       - rate limit CERROR/CWARN console message to avoid overload (4519)
+       - basic mmap support (3918)
+       - kernel patch series update from b1_4 (4711)
+
+2004-09-16  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.2.6
+       * bug fixes
+       - avoid crash during MDS cleanup with OST shut down (2775)
+       - fix loi_list_lock/oig_lock inversion on interrupted IO (4136)
+       - don't use bad inodes on the MDS (3744)
+       - dynamic object preallocation to improve recovery speed (4236)
+       - don't hold spinlock over lock dumping or change debug flags (4401)
+       - don't zero obd_dev when it is force cleaned (3651)
+       - "lctl deactivate" will stop automatic recovery attempts (3406)
+       - look for existing replayed locks to avoid duplicates (3764)
+       - don't resolve lock handle twice in recovery avoiding race (4401)
+       - revalidate should check working dir is a directory (4134)
+       * miscellania
+       - don't always mark "slow" obdfilter messages as errors (4418)
+
+2004-08-24  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.2.5
+       * bug fixes
+       - don't close LustreDB during write_conf until it is done (3860)
+       - fix typo in lconf for_each_profile (3821)
+       - allow dumping logs from multiple threads at one time (3820)
+       - don't allow multiple threads in OSC recovery (3812)
+       - fix debug_size parameters (3864)
+       - fix mds_postrecov to initialize import for llog ctxt (3121)
+       - replace config semaphore with spinlock (3306)
+       - be sure to send a reply for a CANCEL rpc with bad export (3863)
+       - don't allow enqueue to complete on a destroyed export (3822)
+       - down write_lock before checking llog header bitmap (3825)
+       - recover from lock replay timeout (3764)
+       - up llog sem before sending rpc (3652)
+       - reduce ns lock hold times when setting kms (3267)
+       - change a dlm LBUG to LASSERTF, to maybe learn something (4228)
+       - fix NULL deref and obd_dev leak on setup error (3312)
+       - replace some LBUG about llog ops with error handling (3841)
+       - don't match INVALID dentries from d_lookup and spin (3784)
+       - hold dcache_lock while marking dentries INVALID and hashing (4255)
+       - fix invalid assertion in ptlrpc_set_wait (3880)
+       * miscellania
+       - add libwrap support for the TCP acceptor (3996)
+       - add /proc/sys/portals/routes for non-root route listing (3994)
+       - allow setting MDS UUID in .xml (2580)
+       - print the stack of a process that LBUGs (4228)
+
+2004-07-14  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.2.4
+       * bug fixes
+       - don't cleanup request in ll_file_open() on failed MDS open (3430)
+       - make sure to unset replay flag from failed open requests (3440)
+       - if default stripe count is 0, use OST count for inode size (3636)
+       - update parent mtime/ctime on client for create/unlink (2611)
+       - drop dentry ref in ext3_add_link from open_connect_dentry (3266)
+       - free recovery state on server during a forced cleanup (3571)
+       - unregister_reply for resent reqs (3063)
+       - loop back devices mounting and status check on 2.6 (3563)
+       - fix resource-creation race that can provoke i_size == 0 (3513)
+       - don't try to use bad inodes returned from MDS/OST fs lookup (3688)
+       - more debugging for page-accounting assertion (3746)
+       - return -ENOENT instead of asserting if ost getattr+unlink race (3558)
+       - avoid deadlock after precreation failure (3758)
+       - fix race and lock order deadlock in orphan handling (3450, 3750)
+       - add validity checks when grabbing inodes from l_ast_data (3599)
         * miscellania
         - drop scimac NAL (unmaintained)
  
diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4

index dae4f44..61c5ab2 100644 (file)
--- a/lustre/autoconf/lustre-core.m4
+++ b/lustre/autoconf/lustre-core.m4
@@ -443,6 +443,7 @@ AM_CONDITIONAL(SNAPFS, test x$enable_snapfs = xyes)
  AM_CONDITIONAL(SMFS, test x$enable_smfs = xyes)
  AM_CONDITIONAL(GSS, test x$enable_gss = xyes)
  AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
+AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes)
  AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
  ])
  
diff --git a/lustre/conf/lustre.dtd b/lustre/conf/lustre.dtd

index 8fd57d8..0e8ad33 100644 (file)
--- a/lustre/conf/lustre.dtd
+++ b/lustre/conf/lustre.dtd
@@ -53,7 +53,7 @@
  <!ATTLIST ptlrpc %object.attr;>
  
  <!ELEMENT osd (fstype | devpath | devsize | autoformat | 
-               target_ref | node_ref | journalsize )*>
+               target_ref | node_ref | journalsize | mkfsoptions)*>
  <!ATTLIST osd %object.attr; 
                osdtype (obdfilter | obdecho) 'obdfilter'>
  
@@ -71,7 +71,7 @@
                failover ( 1 | 0 ) #IMPLIED>
  
  <!ELEMENT mdsdev (fstype | devpath | devsize | autoformat | 
-                  target_ref | node_ref | journalsize )*>
+                  target_ref | node_ref | journalsize | mkfsoptions)*>
  <!ATTLIST mdsdev %object.attr;>
  
  <!ELEMENT lov (mds_ref |(obd_ref)+)*>
@@ -90,6 +90,7 @@
  <!ELEMENT ptldebug      %object.content;>
  <!ELEMENT subsystem     %object.content;>
  <!ELEMENT journalsize   %object.content;>
+<!ELEMENT mkfsoptions   %object.content;>
  <!ELEMENT fstype        %object.content;>
  <!ELEMENT nid           %object.content;>
  <!ELEMENT port          %object.content;>
diff --git a/lustre/conf/lustre2ldif.xsl b/lustre/conf/lustre2ldif.xsl

index 3713ec8..8c3c24a 100644 (file)
--- a/lustre/conf/lustre2ldif.xsl
+++ b/lustre/conf/lustre2ldif.xsl
@@ -122,6 +122,9 @@ devsize: <value-of select="devsize"/>
  <if test="journalsize">
  journalsize: <value-of select="journalsize"/>
  </if>
+<if test="mkfsoptions">
+mkfsoptions: <value-of select="mkfsoptions"/>
+</if>
  nodeRef: <value-of select="node_ref/@uuidref"/>
  targetRef: <value-of select="target_ref/@uuidref"/>
  <text>
@@ -173,6 +176,9 @@ devsize: <value-of select="devsize"/>
  <if test="journalsize">
  journalsize: <value-of select="journalsize"/>
  </if>
+<if test="mkfsoptions">
+mkfsoptions: <value-of select="mkfsoptions"/>
+</if>
  <text>
  </text>
  </template>
diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h

index c99e6a5..72fb1aa 100644 (file)
--- a/lustre/include/liblustre.h
+++ b/lustre/include/liblustre.h
@@ -25,13 +25,18 @@
  #define LIBLUSTRE_H__
  
  #include <sys/mman.h>
-#ifndef  __CYGWIN__
-#include <stdint.h>
-#include <asm/page.h>
-#else
-#include <sys/types.h>
-#include "ioctl.h"
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_ASM_PAGE_H
+# include <asm/page.h>
+#endif
+#ifdef HAVE_SYS_USER_H
+# include <sys/user.h>
  #endif
+
+#include "ioctl.h"
+
  #include <stdio.h>
  #include <sys/ioctl.h>
  #include <stdlib.h>
@@ -625,6 +630,7 @@ static inline int schedule_timeout(signed long t)
  }
  
  #define lock_kernel() do {} while (0)
+#define unlock_kernel() do {} while (0)
  #define daemonize(l) do {} while (0)
  #define sigfillset(l) do {} while (0)
  #define recalc_sigpending(l) do {} while (0)
@@ -786,5 +792,4 @@ int liblustre_wait_event(int timeout);
  #include <linux/lustre_export.h>
  #include <linux/lustre_net.h>
  
-
  #endif
diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h

index 17ad969..2e61e82 100644 (file)
--- a/lustre/include/linux/lprocfs_status.h
+++ b/lustre/include/linux/lprocfs_status.h
@@ -186,26 +186,26 @@ extern int lprocfs_register_stats(struct proc_dir_entry *root,
                                   const char *name,
                                    struct lprocfs_stats *stats);
  
-#define LPROCFS_INIT_MULTI_VARS(array, size)                              \
-void lprocfs_init_multi_vars(unsigned int idx,                            \
-                             struct lprocfs_static_vars *x)               \
-{                                                                         \
-   struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \
-   LASSERT(glob != 0);                                                    \
-   LASSERT(idx < (unsigned int)(size));                                   \
-   x->module_vars = glob[idx].module_vars;                                \
-   x->obd_vars = glob[idx].obd_vars;                                      \
-}                                                                         \
-
-#define LPROCFS_INIT_VARS(name, vclass, vinstance)           \
-void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x)  \
-{                                                      \
-        x->module_vars = vclass;                       \
-        x->obd_vars = vinstance;                       \
-}                                                      \
-
-#define lprocfs_init_vars(NAME, VAR)     \
-do {      \
+#define LPROCFS_INIT_MULTI_VARS(array, size)                                   \
+void lprocfs_init_multi_vars(unsigned int idx,                                 \
+                             struct lprocfs_static_vars *x)                    \
+{                                                                              \
+       struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \
+       LASSERT(glob != 0);                                                    \
+       LASSERT(idx < (unsigned int)(size));                                   \
+       x->module_vars = glob[idx].module_vars;                                \
+       x->obd_vars = glob[idx].obd_vars;                                      \
+}                                                                              \
+
+#define LPROCFS_INIT_VARS(name, vclass, vinstance)                            \
+void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x)                \
+{                                                                                     \
+        x->module_vars = vclass;                                                      \
+        x->obd_vars = vinstance;                                                      \
+}                                                                                     \
+
+#define lprocfs_init_vars(NAME, VAR)                                          \
+do {                                                                          \
          extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *);  \
          lprocfs_##NAME##_init_vars(VAR);                                       \
  } while (0)
diff --git a/lustre/include/linux/lustre_cfg.h b/lustre/include/linux/lustre_cfg.h

index 3f2038f..e2b3cd3 100644 (file)
--- a/lustre/include/linux/lustre_cfg.h
+++ b/lustre/include/linux/lustre_cfg.h
@@ -280,6 +280,7 @@ struct lustre_mount_data {
          uint32_t lmd_nal;
          uint32_t lmd_server_ipaddr;
          uint32_t lmd_port;
+        uint32_t lmd_async;
          uint32_t lmd_nllu;
          uint32_t lmd_nllg;
          char     lmd_security[16];
diff --git a/lustre/include/linux/lustre_commit_confd.h b/lustre/include/linux/lustre_commit_confd.h

index 6183596..94f72b3 100644 (file)
--- a/lustre/include/linux/lustre_commit_confd.h
+++ b/lustre/include/linux/lustre_commit_confd.h
@@ -67,6 +67,7 @@ struct llog_commit_daemon {
  
  /* ptlrpc/recov_thread.c */
  int llog_start_commit_thread(void);
+int llog_cleanup_commit_master(int force);
  struct llog_canceld_ctxt *llcd_grab(void);
  void llcd_send(struct llog_canceld_ctxt *llcd);
  
diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h

index 03a88a4..2adb2bd 100644 (file)
--- a/lustre/include/linux/lustre_compat25.h
+++ b/lustre/include/linux/lustre_compat25.h
@@ -63,7 +63,6 @@ int groups_search(struct group_info *ginfo, gid_t grp);
          } while (0)
  
  #define groups_sort(gi) do {} while (0)
-
  #define GROUP_AT(gi, i) ((gi)->small_block[(i)])
  
  static inline int cleanup_group_info(void)
@@ -174,6 +173,10 @@ static inline void lustre_daemonize_helper(void)
                  page->private = 0; \
          } while(0)
  
+#ifndef smp_num_cpus
+#define smp_num_cpus    num_online_cpus()
+#endif
+
  #define kiobuf bio
  
  #include <linux/proc_fs.h>
@@ -257,7 +260,11 @@ static inline void cond_resched(void)
  
  static inline int mapping_mapped(struct address_space *mapping)
  {
-        return mapping->i_mmap_shared ? 1 : 0;
+        if (mapping->i_mmap_shared)
+                return 1;
+        if (mapping->i_mmap)
+                return 1;
+        return 0;
  }
  
  /* to find proc_dir_entry from inode. 2.6 has native one -bzzz */
@@ -327,6 +334,12 @@ static inline int mapping_has_pages(struct address_space *mapping)
  #define ll_vfs_symlink(dir, dentry, path, mode) vfs_symlink(dir, dentry, path, mode)
  #endif
  
+#ifndef container_of
+#define container_of(ptr, type, member) ({                      \
+                const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+                (type *)( (char *)__mptr - offsetof(type,member) );})
+#endif
+
  #ifdef HAVE_I_ALLOC_SEM
  #define UP_WRITE_I_ALLOC_SEM(i) do { up_write(&(i)->i_alloc_sem); } while (0)
  #define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0)
diff --git a/lustre/include/linux/lustre_debug.h b/lustre/include/linux/lustre_debug.h

index 669c0e8..95ff69f 100644 (file)
--- a/lustre/include/linux/lustre_debug.h
+++ b/lustre/include/linux/lustre_debug.h
@@ -42,6 +42,10 @@ do { if (offset > ASSERT_MAX_SIZE_MB << 20) {                           \
          OP;                                                             \
  }} while(0)
  
+#define LL_CDEBUG_PAGE(mask, page, fmt, arg...)                         \
+        CDEBUG(mask, "page %p map %p ind %lu priv %0lx: " fmt,          \
+               page, page->mapping, page->index, page->private, ## arg)
+
  /* lib/debug.c */
  int dump_lniobuf(struct niobuf_local *lnb);
  int dump_rniobuf(struct niobuf_remote *rnb);
diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h

index ef991c2..a54ad3e 100644 (file)
--- a/lustre/include/linux/lustre_dlm.h
+++ b/lustre/include/linux/lustre_dlm.h
@@ -177,6 +177,7 @@ struct ldlm_namespace {
          struct list_head       ns_unused_list; /* all root resources in ns */
          int                    ns_nr_unused;
          unsigned int           ns_max_unused;
+        unsigned long          ns_next_dump;   /* next dump time */
  
          spinlock_t             ns_counter_lock;
          __u64                  ns_locks;
@@ -252,6 +253,8 @@ struct ldlm_lock {
          /* Server-side-only members */
          struct list_head      l_pending_chain;  /* callbacks pending */
          unsigned long         l_callback_timeout;
+
+        __u32                 l_pid;            /* pid which created this lock */
  };
  
  #define LDLM_PLAIN       10
@@ -311,22 +314,23 @@ do {                                                                          \
                  CDEBUG(level, "### " format                                   \
                         " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
                         "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: "   \
-                       LPX64" expref: %d\n" , ## a, lock,                     \
+                       LPX64" expref: %d pid: %u\n" , ## a, lock,             \
                         lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
                         lock->l_readers, lock->l_writers,                      \
                         ldlm_lockname[lock->l_granted_mode],                   \
                         ldlm_lockname[lock->l_req_mode],                       \
                         lock->l_flags, lock->l_remote_handle.cookie,           \
                         lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
                  break;                                                        \
          }                                                                     \
          if (lock->l_resource->lr_type == LDLM_EXTENT) {                       \
                  CDEBUG(level, "### " format                                   \
                         " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
                         "res: "LPU64"/"LPU64"/"LPU64" rrc: %d type: %s ["LPU64 \
-                       "->"LPU64"] (req "LPU64"->"LPU64") flags: %x remote: " \
-                       LPX64" expref: %d\n" , ## a,                           \
+                      "->"LPU64"] (req "LPU64"->"LPU64") flags: %x remote: " \
+                      LPX64" expref: %d pid: %u\n" , ## a,                   \
                         lock->l_resource->lr_namespace->ns_name, lock,         \
                         lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
                         lock->l_readers, lock->l_writers,                      \
@@ -342,15 +346,16 @@ do {                                                                          \
                         lock->l_req_extent.start, lock->l_req_extent.end,      \
                         lock->l_flags, lock->l_remote_handle.cookie,           \
                         lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
                  break;                                                        \
          }                                                                     \
          if (lock->l_resource->lr_type == LDLM_FLOCK) {                        \
                  CDEBUG(level, "### " format                                   \
                         " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
                         "res: "LPU64"/"LPU64"/"LPU64" rrc: %d type: %s pid: "  \
-                       LPU64" ["LPU64"->"LPU64"] flags: %x remote: "LPX64     \
-                       " expref: %d\n" , ## a,                                \
+                      LPU64" " "["LPU64"->"LPU64"] flags: %x remote: "LPX64  \
+                       " expref: %d pid: %u\n" , ## a,                        \
                         lock->l_resource->lr_namespace->ns_name, lock,         \
                         lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
                         lock->l_readers, lock->l_writers,                      \
@@ -366,14 +371,16 @@ do {                                                                          \
                         lock->l_policy_data.l_flock.end,                       \
                         lock->l_flags, lock->l_remote_handle.cookie,           \
                         lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                      lock->l_pid);                                          \
                  break;                                                        \
          }                                                                     \
          if (lock->l_resource->lr_type == LDLM_IBITS) {                        \
                  CDEBUG(level, "### " format                                   \
                         " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
                         "res: "LPU64"/"LPU64"/"LPU64" bits "LPX64" rrc: %d "   \
-                       "type: %s flags: %x remote: "LPX64" expref: %d\n" , ## a,\
+                      "type: %s flags: %x remote: "LPX64" expref: %d "       \
+                      "pid %u\n" , ## a,                                     \
                         lock->l_resource->lr_namespace->ns_name,               \
                         lock, lock->l_handle.h_cookie,                         \
                         atomic_read (&lock->l_refc),                           \
@@ -388,14 +395,16 @@ do {                                                                          \
                         ldlm_typename[lock->l_resource->lr_type],              \
                         lock->l_flags, lock->l_remote_handle.cookie,           \
                         lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                      lock->l_pid);                                          \
                  break;                                                        \
          }                                                                     \
          {                                                                     \
                  CDEBUG(level, "### " format                                   \
                         " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64"/"LPU64"/"LPU64" rrc: %d type: %s "\
-                       "flags: %x remote: "LPX64" expref: %d\n" , ## a,       \
+                       "res: "LPU64"/"LPU64"/"LPU64"/"LPU64" rrc: %d type: %s " \
+                       "flags: %x remote: "LPX64" expref: %d "                \
+                      "pid: %u\n" , ## a,                                    \
                         lock->l_resource->lr_namespace->ns_name,               \
                         lock, lock->l_handle.h_cookie,                         \
                         atomic_read (&lock->l_refc),                           \
@@ -410,7 +419,8 @@ do {                                                                          \
                         ldlm_typename[lock->l_resource->lr_type],              \
                         lock->l_flags, lock->l_remote_handle.cookie,           \
                         lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
          }                                                                     \
  } while (0)
  
@@ -542,9 +552,9 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
                              struct ldlm_lock *lock);
  void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
  void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(void);
-void ldlm_namespace_dump(struct ldlm_namespace *);
-void ldlm_resource_dump(struct ldlm_resource *);
+void ldlm_dump_all_namespaces(int level);
+void ldlm_namespace_dump(int level, struct ldlm_namespace *);
+void ldlm_resource_dump(int level, struct ldlm_resource *);
  int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
                                struct ldlm_res_id);
  
diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h

index 2e4e760..7404bd7 100644 (file)
--- a/lustre/include/linux/lustre_export.h
+++ b/lustre/include/linux/lustre_export.h
@@ -63,9 +63,9 @@ struct filter_export_data {
          struct filter_client_data *fed_fcd;
          loff_t                     fed_lr_off;
          int                        fed_lr_idx;
-        unsigned long              fed_dirty;    /* in bytes */
-        unsigned long              fed_grant;    /* in bytes */
-        unsigned long              fed_pending;  /* bytes just being written */
+        long                       fed_dirty;    /* in bytes */
+        long                       fed_grant;    /* in bytes */
+        long                       fed_pending;  /* bytes just being written */
  };
  
  struct obd_export {
@@ -86,7 +86,8 @@ struct obd_export {
          unsigned long             exp_flags;
          int                       exp_failed:1,
                                    exp_replay_needed:1,
-                                  exp_libclient:1; /* liblustre client? */
+                                  exp_libclient:1, /* liblustre client? */
+                                  exp_sync:1;
          union {
                  struct mds_export_data    eu_mds_data;
                  struct filter_export_data eu_filter_data;
diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h

index 62b5686..70d027b 100644 (file)
--- a/lustre/include/linux/lustre_fsfilt.h
+++ b/lustre/include/linux/lustre_fsfilt.h
@@ -39,10 +39,19 @@ struct fsfilt_objinfo {
          int fso_bufcnt;
  };
  
+/* lustre EA type (MEA, LOV, etc.) */
+enum ea_type {
+        EA_LOV   = (1 << 0),
+        EA_MEA   = (1 << 1),
+        EA_SID   = (1 << 2),
+        EA_MID   = (1 << 3)
+};
+
  struct fsfilt_operations {
          struct list_head fs_list;
          struct module *fs_owner;
          char   *fs_type;
+        
          void   *(* fs_start)(struct inode *inode, int op, void *desc_private,
                               int logs);
          void   *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso,
@@ -57,23 +66,12 @@ struct fsfilt_operations {
                                 struct iattr *iattr, int do_trunc);
          int     (* fs_iocontrol)(struct inode *inode, struct file *file,
                                   unsigned int cmd, unsigned long arg);
-        
-        /* two methods for getting lov EA and setting it back to inode xattr. */
-        int     (* fs_set_md)(struct inode *inode, void *handle, void *md,
-                              int size);
-        int     (* fs_get_md)(struct inode *inode, void *md, int size);
-
-        /* two methods for getting MID (master id) EA and setting it back to
-         * inode xattr. */
-        int     (* fs_set_mid)(struct inode *inode, void *handle, void *fid,
-                               int size);
-        int     (* fs_get_mid)(struct inode *inode, void *fid, int size);
  
-        /* two methods for getting self id EA and setting it back to inode
-         * xattr. */
-        int     (* fs_set_sid)(struct inode *inode, void *handle, void *sid,
-                               int size);
-        int     (* fs_get_sid)(struct inode *inode, void *sid, int size);
+        /* two methods for setting getting diff. kind of EAs from inode. */
+        int     (* fs_set_md)(struct inode *inode, void *handle, void *md,
+                              int size, enum ea_type type);
+        int     (* fs_get_md)(struct inode *inode, void *md, int size,
+                              enum ea_type type);
  
          int     (* fs_send_bio)(int rw, struct inode *inode, void *bio);
  
@@ -84,11 +82,10 @@ struct fsfilt_operations {
  
          ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count,
                                  loff_t *offset);
-        int     (* fs_add_journal_cb)(struct obd_device *obd, 
+        int     (* fs_add_journal_cb)(struct obd_device *obd,
                                        struct super_block *sb,
-                                      __u64 last_rcvd, void *handle, 
-                                      fsfilt_cb_t cb_func,
-                                      void *cb_data);
+                                      __u64 last_rcvd, void *handle,
+                                      fsfilt_cb_t cb_func, void *cb_data);
          int     (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
          int     (* fs_sync)(struct super_block *sb);
          int     (* fs_map_inode_pages)(struct inode *inode, struct page **page,
@@ -228,6 +225,16 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
  #define LMV_EA  1
  #define LOV_EA  0
  
+#define fsfilt_check_slow(start, timeout, msg)                          \
+do {                                                                    \
+        if (time_before(jiffies, start + 15 * HZ))                      \
+                break;                                                  \
+        else if (time_before(jiffies, start + timeout / 2 * HZ))        \
+                CWARN("slow %s %lus\n", msg, (jiffies - start) / HZ);   \
+        else                                                            \
+                CERROR("slow %s %lus\n", msg, (jiffies - start) / HZ);  \
+} while (0)
+
  static inline void *
  fsfilt_start_ops(struct fsfilt_operations *ops, struct inode *inode,
                   int op, struct obd_trans_info *oti, int logs)
@@ -246,8 +253,7 @@ fsfilt_start_ops(struct fsfilt_operations *ops, struct inode *inode,
                          LBUG();
                  }
          }
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, 60, "journal start");
          return handle;
  }
  
@@ -280,8 +286,7 @@ fsfilt_commit_ops(struct fsfilt_operations *ops, struct super_block *sb,
          int rc = ops->fs_commit(sb, inode, handle, force_sync);
          CDEBUG(D_INFO, "committing handle %p\n", handle);
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, 60, "journal start");
  
          return rc;
  }
@@ -322,8 +327,7 @@ fsfilt_brw_start_log(struct obd_device *obd, int objcount,
                          LBUG();
                  }
          }
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "journal start");
  
          return handle;
  }
@@ -344,8 +348,7 @@ fsfilt_commit_async(struct obd_device *obd, struct inode *inode,
          int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle);
  
          CDEBUG(D_INFO, "committing handle %p (async)\n", *wait_handle);
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "journal start");
  
          return rc;
  }
@@ -356,8 +359,7 @@ fsfilt_commit_wait(struct obd_device *obd, struct inode *inode, void *handle)
          unsigned long now = jiffies;
          int rc = obd->obd_fsops->fs_commit_wait(inode, handle);
          CDEBUG(D_INFO, "waiting for completion %p\n", handle);
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "journal start");
          return rc;
  }
  
@@ -368,8 +370,7 @@ fsfilt_setattr(struct obd_device *obd, struct dentry *dentry,
          unsigned long now = jiffies;
          int rc;
          rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "setattr");
          return rc;
  }
  
@@ -391,44 +392,24 @@ static inline int fsfilt_setup(struct obd_device *obd,
  
  static inline int
  fsfilt_set_md(struct obd_device *obd, struct inode *inode,
-              void *handle, void *md, int size)
+              void *handle, void *md, int size, enum ea_type type)
  {
-        return obd->obd_fsops->fs_set_md(inode, handle, md, size);
+        if (!obd->obd_fsops->fs_set_md)
+                return -ENOSYS;
+        
+        return obd->obd_fsops->fs_set_md(inode, handle, md,
+                                         size, type);
  }
  
  static inline int
  fsfilt_get_md(struct obd_device *obd, struct inode *inode,
-              void *md, int size)
-{
-        return obd->obd_fsops->fs_get_md(inode, md, size);
-}
-
-static inline int
-fsfilt_set_mid(struct obd_device *obd, struct inode *inode,
-               void *handle, void *mid, int size)
+              void *md, int size, enum ea_type type)
  {
-        return obd->obd_fsops->fs_set_mid(inode, handle, mid, size);
-}
-
-static inline int
-fsfilt_get_mid(struct obd_device *obd, struct inode *inode,
-               void *mid, int size)
-{
-        return obd->obd_fsops->fs_get_mid(inode, mid, size);
-}
-
-static inline int
-fsfilt_set_sid(struct obd_device *obd, struct inode *inode,
-               void *handle, void *sid, int size)
-{
-        return obd->obd_fsops->fs_set_sid(inode, handle, sid, size);
-}
-
-static inline int
-fsfilt_get_sid(struct obd_device *obd, struct inode *inode,
-               void *sid, int size)
-{
-        return obd->obd_fsops->fs_get_sid(inode, sid, size);
+        if (!obd->obd_fsops->fs_get_md)
+                return -ENOSYS;
+        
+        return obd->obd_fsops->fs_get_md(inode, md, size,
+                                         type);
  }
  
  static inline int fsfilt_send_bio(int rw, struct obd_device *obd,
@@ -463,8 +444,7 @@ fsfilt_putpage(struct obd_device *obd, struct inode *inode,
  
          rc = obd->obd_fsops->fs_putpage(inode, page);
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("long putpage time %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "putpage");
  
          return rc;
  }
@@ -486,8 +466,7 @@ fsfilt_getpage(struct obd_device *obd, struct inode *inode,
  
          page = obd->obd_fsops->fs_getpage(inode, index);
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("long getpage time %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "getpage");
  
          return page;
  }
diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h

index 184572f..903d713 100644 (file)
--- a/lustre/include/linux/lustre_idl.h
+++ b/lustre/include/linux/lustre_idl.h
@@ -48,9 +48,15 @@
  #ifndef _LUSTRE_IDL_H_
  #define _LUSTRE_IDL_H_
  
+#ifdef HAVE_ASM_TYPES_H
+#include <asm/types.h>
+#else
+#include "types.h"
+#endif
+
+
  #ifdef __KERNEL__
  # include <linux/ioctl.h>
-# include <asm/types.h>
  # include <linux/types.h>
  # include <linux/list.h>
  # include <linux/string.h> /* for strncpy, below */
@@ -59,7 +65,6 @@
  #ifdef __CYGWIN__
  # include <sys/types.h>
  #else
-# include <asm/types.h>
  # include <stdint.h>
  #endif
  # include <libcfs/list.h>
@@ -214,9 +219,10 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
  #define MSG_CONNECT_RECOVERING  0x1
  #define MSG_CONNECT_RECONNECT   0x2
  #define MSG_CONNECT_REPLAYABLE  0x4
-//#define MSG_CONNECT_PEER        0x8
+#define MSG_CONNECT_PEER        0x8
  #define MSG_CONNECT_LIBCLIENT   0x10
  #define MSG_CONNECT_INITIAL     0x20
+#define MSG_CONNECT_ASYNC       0x40
  
  /*
   *   OST requests: OBDO & OBD request records
@@ -381,8 +387,9 @@ struct lov_mds_md_v0 {            /* LOV EA mds/wire data (little-endian) */
  #define OBD_MD_FLDIREA  (0x0000000020000000LL)    /* dir's extended attribute data */
  #define OBD_MD_REINT    (0x0000000040000000LL)    /* reintegrate oa */
  #define OBD_MD_FID      (0x0000000080000000LL)    /* lustre_id data */
-#define OBD_MD_FLEALIST (0x0000000100000000LL)    /* list extended attributes */
-#define OBD_MD_FLACL_ACCESS (0x0000000200000000LL) /*access acl*/
+#define OBD_MD_MEA      (0x0000000100000000LL)    /* shows we are interested in MEA */
+#define OBD_MD_FLEALIST (0x0000000200000000LL)    /* list extended attributes */
+#define OBD_MD_FLACL_ACCESS (0x0000000400000000LL) /*access acl*/
  
  #define OBD_MD_FLNOTOBD (~(OBD_MD_FLBLOCKS | OBD_MD_LINKNAME |          \
                             OBD_MD_FLEASIZE | OBD_MD_FLHANDLE |          \
@@ -432,6 +439,9 @@ extern void lustre_swab_obd_statfs (struct obd_statfs *os);
  
  #define OBD_OBJECT_EOF 0xffffffffffffffffULL
  
+#define OST_MIN_PRECREATE 32
+#define OST_MAX_PRECREATE 20000
+
  struct obd_ioobj {
          obd_id               ioo_id;
          obd_gr               ioo_gr;
@@ -552,6 +562,13 @@ typedef enum {
  /* INODE LOCK PARTS */
  #define MDS_INODELOCK_LOOKUP 0x000001  /* dentry, mode, owner, group */
  #define MDS_INODELOCK_UPDATE 0x000002  /* size, links, timestamps */
+#define MDS_INODELOCK_OPEN   0x000004  /* for opened files */
+
+/* do not forget to increase MDS_INODELOCK_MAXSHIFT when adding new bits */
+#define MDS_INODELOCK_MAXSHIFT 2
+
+/* this FULL lock is useful to take on unlink sort of operations */
+#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1)
  
  /* lustre store cookie */
  struct lustre_stc {
diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h

index d3c182c..ad4faaa 100644 (file)
--- a/lustre/include/linux/lustre_import.h
+++ b/lustre/include/linux/lustre_import.h
@@ -28,10 +28,9 @@ enum lustre_imp_state {
  
  static inline char * ptlrpc_import_state_name(enum lustre_imp_state state)
  {
-        
          static char* import_state_names[] = {
-                "<UNKNOWN>", "CLOSED",  "NEW", "DISCONN", 
-                "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", 
+                "<UNKNOWN>", "CLOSED",  "NEW", "DISCONN",
+                "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
                  "RECOVER", "FULL", "EVICTED",
          };
  
@@ -101,7 +100,8 @@ struct obd_import {
          int                       imp_invalid:1, imp_replayable:1,
                                    imp_dlm_fake:1, imp_server_timeout:1,
                                    imp_initial_recov:1, imp_force_verify:1,
-                                  imp_pingable:1, imp_resend_replay:1;
+                                  imp_pingable:1, imp_resend_replay:1,
+                                  imp_deactive:1;
          __u32                     imp_connect_op;
          __u32                     imp_connect_flags;
  };
diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h

index 8851c35..c4ec73e 100644 (file)
--- a/lustre/include/linux/lustre_lib.h
+++ b/lustre/include/linux/lustre_lib.h
@@ -463,6 +463,7 @@ static inline void obd_ioctl_freedata(char *buf, int len)
  #define OBD_IOC_PROCESS_CFG            _IOWR('f', 184, long)
  #define OBD_IOC_DUMP_LOG               _IOWR('f', 185, long)
  #define OBD_IOC_CLEAR_LOG              _IOWR('f', 186, long)
+#define OBD_IOC_START                  _IOWR('f', 187, long)
  
  #define OBD_IOC_CATLOGLIST             _IOWR('f', 190, long)
  #define OBD_IOC_LLOG_INFO              _IOWR('f', 191, long)
@@ -609,6 +610,7 @@ do {                                                                           \
          int __timed_out = 0;                                                   \
          unsigned long irqflags;                                                \
          sigset_t blocked;                                                      \
+        signed long timeout_remaining;                                         \
                                                                                 \
          init_waitqueue_entry(&__wait, current);                                \
          if (excl)                                                              \
@@ -622,18 +624,15 @@ do {                                                                           \
          else                                                                   \
              blocked = l_w_e_set_sigs(0);                                       \
                                                                                 \
+        timeout_remaining = info->lwi_timeout;                                 \
+                                                                               \
          for (;;) {                                                             \
              set_current_state(TASK_INTERRUPTIBLE);                             \
              if (condition)                                                     \
                      break;                                                     \
-            if (signal_pending(current)) {                                     \
-                if (info->lwi_on_signal)                                       \
-                        info->lwi_on_signal(info->lwi_cb_data);                \
-                ret = -EINTR;                                                  \
-                break;                                                         \
-            }                                                                  \
              if (info->lwi_timeout && !__timed_out) {                           \
-                if (schedule_timeout(info->lwi_timeout) == 0) {                \
+                timeout_remaining = schedule_timeout(timeout_remaining);       \
+                if (timeout_remaining == 0) {                                  \
                      __timed_out = 1;                                           \
                      if (!info->lwi_on_timeout ||                               \
                          info->lwi_on_timeout(info->lwi_cb_data)) {             \
@@ -647,6 +646,24 @@ do {                                                                           \
              } else {                                                           \
                  schedule();                                                    \
              }                                                                  \
+            if (condition)                                                     \
+                    break;                                                     \
+            if (signal_pending(current)) {                                     \
+                    if (__timed_out) {                                         \
+                            break;                                             \
+                    } else {                                                   \
+                            /* We have to do this here because some signals */ \
+                            /* are not blockable - ie from strace(1).       */ \
+                            /* In these cases we want to schedule_timeout() */ \
+                            /* again, because we don't want that to return  */ \
+                            /* -EINTR when the RPC actually succeeded.      */ \
+                            /* the RECALC_SIGPENDING below will deliver the */ \
+                            /* signal properly.                             */ \
+                            SIGNAL_MASK_LOCK(current, irqflags);               \
+                            CLEAR_SIGPENDING;                                  \
+                            SIGNAL_MASK_UNLOCK(current, irqflags);             \
+                    }                                                          \
+            }                                                                  \
          }                                                                      \
                                                                                 \
          SIGNAL_MASK_LOCK(current, irqflags);                                   \
@@ -654,6 +671,12 @@ do {                                                                           \
          RECALC_SIGPENDING;                                                     \
          SIGNAL_MASK_UNLOCK(current, irqflags);                                 \
                                                                                 \
+        if (__timed_out && signal_pending(current)) {                          \
+                if (info->lwi_on_signal)                                       \
+                        info->lwi_on_signal(info->lwi_cb_data);                \
+                ret = -EINTR;                                                  \
+        }                                                                      \
+                                                                               \
          current->state = TASK_RUNNING;                                         \
          remove_wait_queue(&wq, &__wait);                                       \
  } while(0)
@@ -661,18 +684,21 @@ do {                                                                           \
  #else /* !__KERNEL__ */
  #define __l_wait_event(wq, condition, info, ret, excl)                         \
  do {                                                                           \
-        int timeout = info->lwi_timeout, elapse;                               \
+        long timeout = info->lwi_timeout, elapse, last = 0;                    \
          int __timed_out = 0;                                                   \
-        long last;                                                             \
                                                                                 \
-        last = time(NULL);                                                     \
+        if (info->lwi_timeout == 0)                                            \
+            timeout = 1000000000;                                              \
+        else                                                                   \
+            last = time(NULL);                                                 \
+                                                                               \
          for (;;) {                                                             \
              if (condition)                                                     \
                  break;                                                         \
              if (liblustre_wait_event(timeout)) {                               \
-                if (timeout == 0)                                              \
+                if (timeout == 0 || info->lwi_timeout == 0)                    \
                          continue;                                              \
-                elapse = (int) (time(NULL) - last);                            \
+                elapse = time(NULL) - last;                                    \
                  if (elapse) {                                                  \
                          last += elapse;                                        \
                          timeout -= elapse;                                     \
diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h

index 866d429..cdd5875 100644 (file)
--- a/lustre/include/linux/lustre_lite.h
+++ b/lustre/include/linux/lustre_lite.h
@@ -77,6 +77,7 @@ struct ll_inode_info {
          struct lustre_id        lli_id;    /* full lustre_id */
          char                   *lli_symlink_name;
          struct semaphore        lli_open_sem;
+        struct semaphore        lli_size_sem;
          __u64                   lli_maxbytes;
          __u64                   lli_io_epoch;
          unsigned long           lli_flags;
@@ -98,6 +99,16 @@ struct ll_inode_info {
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
          struct inode            lli_vfs_inode;
  #endif
+        struct semaphore        lli_och_sem; /* Protects access to och pointers
+                                                and their usage counters */
+        /* We need all three because every inode may be opened in different
+           modes */
+        struct obd_client_handle *lli_mds_read_och;
+        __u64                   lli_open_fd_read_count;
+        struct obd_client_handle *lli_mds_write_och;
+        __u64                   lli_open_fd_write_count;
+        struct obd_client_handle *lli_mds_exec_och;
+        __u64                   lli_open_fd_exec_count;
          struct posix_acl       *lli_acl_access;
  };
  
@@ -195,6 +206,10 @@ ll_prepare_mdc_data(struct mdc_op_data *data, struct inode *i1,
  #include <linux/lustre_idl.h>
  #endif /* __KERNEL__ */
  
+#define LLAP_FROM_COOKIE(c)                                                    \
+        (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC),     \
+         (struct ll_async_page *)(c))
+
  #include <lustre/lustre_user.h>
  
  #endif
diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h

index 1694a5f..d260d2e 100644 (file)
--- a/lustre/include/linux/lustre_log.h
+++ b/lustre/include/linux/lustre_log.h
@@ -97,6 +97,8 @@ struct llog_process_cat_data {
  
  int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res,
                         struct llog_logid *logid);
+int class_config_dump_handler(struct llog_handle * handle,
+                              struct llog_rec_hdr *rec, void *data);
  int llog_cat_put(struct llog_handle *cathandle);
  int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec,
                       struct llog_cookie *reccookie, void *buf,
@@ -514,5 +516,4 @@ static inline void llog_create_lock_free(struct llog_create_locks *lcl)
          size = offset + sizeof(struct rw_semaphore *) * lcl->lcl_count;
          OBD_FREE(lcl, size);
  }
-
  #endif
diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h

index da6aafe..2d53a04 100644 (file)
--- a/lustre/include/linux/lustre_mds.h
+++ b/lustre/include/linux/lustre_mds.h
@@ -130,7 +130,12 @@ struct mds_client_data {
          __u64 mcd_last_xid;     /* xid for the last transaction */
          __u32 mcd_last_result;  /* result from last RPC */
          __u32 mcd_last_data;    /* per-op data (disposition for open &c.) */
-        __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 64];
+        /* for MDS_CLOSE requests */
+        __u64 mcd_last_close_transno; /* last completed transaction ID */
+        __u64 mcd_last_close_xid;     /* xid for the last transaction */
+        __u32 mcd_last_close_result;  /* result from last RPC */
+        __u32 mcd_last_close_data;  /* per-op data (disposition for open &c.) */
+        __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 88];
  };
  
  /* simple uid/gid mapping hash table */
@@ -325,18 +330,24 @@ int mdc_done_writing(struct obd_export *, struct obdo *);
  #define IOC_REQUEST_CLOSE               _IOWR('f', 35, long)
  #define IOC_REQUEST_MAX_NR               35
  
-#define MDS_CHECK_RESENT(req, reconstruct)                                     \
-{                                                                              \
-        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {               \
-                struct mds_client_data *mcd =                                  \
-                        req->rq_export->exp_mds_data.med_mcd;                  \
-                if (mcd->mcd_last_xid == req->rq_xid) {                        \
-                        reconstruct;                                           \
-                        RETURN(req->rq_repmsg->status);                        \
-                }                                                              \
-                DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")", \
-                          mcd->mcd_last_xid);                                  \
-        }                                                                      \
+#define MDS_CHECK_RESENT(req, reconstruct)                              \
+{                                                                       \
+        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {        \
+                struct mds_client_data *mcd =                           \
+                        req->rq_export->exp_mds_data.med_mcd;           \
+                                                                        \
+                if (le64_to_cpu(mcd->mcd_last_xid) == req->rq_xid) {    \
+                        reconstruct;                                    \
+                        RETURN(le32_to_cpu(mcd->mcd_last_result));      \
+                }                                                       \
+                if (le64_to_cpu(mcd->mcd_last_close_xid) == req->rq_xid) { \
+                        reconstruct;                                    \
+                        RETURN(le32_to_cpu(mcd->mcd_last_close_result));\
+                }                                                       \
+                DEBUG_REQ(D_HA, req, "no reply for RESENT req"          \
+                          "(have "LPD64", and "LPD64")",                \
+                          mcd->mcd_last_xid, mcd->mcd_last_close_xid);  \
+        }                                                               \
  }
  
  #endif
diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h

index 019e1de..e3fd354 100644 (file)
--- a/lustre/include/linux/lustre_net.h
+++ b/lustre/include/linux/lustre_net.h
@@ -110,7 +110,7 @@
  #define MDT_MAX_THREADS 32UL
  #define MDT_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \
                                    MDT_MAX_THREADS), 2UL)
-#define MDS_NBUFS       (64 * smp_num_cpus) 
+#define MDS_NBUFS       (64 * smp_num_cpus)
  #define MDS_BUFSIZE     (8 * 1024)
  /* Assume file name length = FNAME_MAX = 256 (true for extN).
   *        path name length = PATH_MAX = 4096
@@ -131,7 +131,7 @@
  #define OST_MAX_THREADS 36UL
  #define OST_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \
                                    OST_MAX_THREADS), 2UL)
-#define OST_NBUFS       (64 * smp_num_cpus) 
+#define OST_NBUFS       (64 * smp_num_cpus)
  #define OST_BUFSIZE     (8 * 1024)
  /* OST_MAXREQSIZE ~= 1640 bytes =
   * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
@@ -147,9 +147,7 @@
  #define PTLBD_MAXREQSIZE 1024
  
  struct ptlrpc_peer {
-/*      bugfix #4615 
- */
-        ptl_process_id_t  peer_id;      
+        ptl_process_id_t  peer_id;
          struct ptlrpc_ni *peer_ni;
  };
  
@@ -224,7 +222,7 @@ struct ptlrpc_request_set {
          wait_queue_head_t *set_wakeup_ptr;
          struct list_head  set_requests;
          set_interpreter_func    set_interpret; /* completion callback */
-        union ptlrpc_async_args set_args; /* completion context */
+        void              *set_arg; /* completion context */
          /* locked so that any old caller can communicate requests to
           * the set holder who can then fold them into the lock-free set */
          spinlock_t        set_new_req_lock;
@@ -348,6 +346,9 @@ struct ptlrpc_request {
          struct timeval                     rq_arrival_time; /* request arrival time */
          struct ptlrpc_reply_state         *rq_reply_state; /* separated reply state */
          struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer */
+#if CRAY_PORTALS
+        ptl_uid_t                          rq_uid; /* peer uid, used in MDS only */
+#endif
          
          /* client-only incoming reply */
          ptl_handle_md_t      rq_reply_md_h;
@@ -355,6 +356,7 @@ struct ptlrpc_request {
          struct ptlrpc_cb_id  rq_reply_cbid;
          
          struct ptlrpc_peer rq_peer; /* XXX see service.c can this be factored away? */
+        char               rq_peerstr[PTL_NALFMT_SIZE];
          struct obd_export *rq_export;
          struct obd_import *rq_import;
          
@@ -509,7 +511,8 @@ struct ptlrpc_service {
          int              srv_n_difficult_replies; /* # 'difficult' replies */
          int              srv_n_active_reqs;     /* # reqs being served */
          int              srv_rqbd_timeout;      /* timeout before re-posting reqs */
-        
+        int              srv_watchdog_timeout; /* soft watchdog timeout, in ms */
+
          __u32 srv_req_portal;
          __u32 srv_rep_portal;
  
@@ -533,7 +536,7 @@ struct ptlrpc_service {
  
          struct proc_dir_entry   *srv_procroot;
          struct lprocfs_stats    *srv_stats;
-        
+
          struct ptlrpc_srv_ni srv_interfaces[0];
  };
  
@@ -543,7 +546,6 @@ static inline char *ptlrpc_peernid2str(struct ptlrpc_peer *p, char *str)
          return (portals_nid2str(p->peer_ni->pni_number, p->peer_id.nid, str));
  }
  
-/*      For bug #4615   */
  static inline char *ptlrpc_id2str(struct ptlrpc_peer *p, char *str)
  {
          LASSERT(p->peer_ni != NULL);
@@ -684,7 +686,8 @@ void ptlrpc_save_llog_lock (struct ptlrpc_request *req,
  void ptlrpc_commit_replies (struct obd_device *obd);
  void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs);
  struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size,
-                                       int req_portal, int rep_portal, 
+                                       int req_portal, int rep_portal,
+                                       int watchdog_timeout, /* in ms */
                                         svc_handler_t, char *name,
                                         struct proc_dir_entry *proc_entry);
  void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
diff --git a/lustre/include/linux/lustre_smfs.h b/lustre/include/linux/lustre_smfs.h

index 7f83f04..792cbe9 100644 (file)
--- a/lustre/include/linux/lustre_smfs.h
+++ b/lustre/include/linux/lustre_smfs.h
@@ -26,7 +26,9 @@
  #ifndef __LUSTRE_SMFS_H
  #define __LUSTRE_SMFS_H
  
+#include <linux/lustre_fsfilt.h>
  #include <linux/namei.h>
+
  struct snap_inode_info {
         int sn_flags;           /*the flags indicated inode type */
         int sn_gen;             /*the inode generation*/
@@ -509,8 +511,10 @@ extern int smfs_write_extents(struct inode *dir, struct dentry *dentry,
                                unsigned long from, unsigned long num);
  extern int smfs_rec_setattr(struct inode *dir, struct dentry *dentry,
                              struct iattr *attr);
-extern int smfs_rec_precreate(struct dentry *dentry, int *num, struct obdo *oa);
-extern int smfs_rec_md(struct inode *inode, void * lmm, int lmm_size);
+extern int smfs_rec_precreate(struct dentry *dentry, int *num,
+                              struct obdo *oa);
+extern int smfs_rec_md(struct inode *inode, void * lmm, int lmm_size, 
+                      enum ea_type type);
  extern int smfs_rec_unpack(struct smfs_proc_args *args, char *record,
                             char **pbuf, int *opcode);
         
diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h

index 96898fd..ffa1396 100644 (file)
--- a/lustre/include/linux/lvfs.h
+++ b/lustre/include/linux/lvfs.h
@@ -1,6 +1,28 @@
  /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   * vim:expandtab:shiftwidth=8:tabstop=8:
- */ 
+ *
+ *  Copyright (C) 2001, 2002, 2003, 2004 Cluster File Systems, Inc.
+ *
+ *  Author: <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * lustre VFS/process permission interface
+ */
+
  #ifndef __LVFS_H__
  #define __LVFS_H__
  
@@ -13,7 +35,7 @@
  #include <linux/namei.h>
  #include <linux/lustre_compat25.h>
  #include <linux/lvfs_linux.h>
-#endif 
+#endif
  
  #ifdef LIBLUSTRE
  #include <lvfs_user_fs.h>
@@ -57,6 +79,13 @@ struct lvfs_run_ctxt {
  #endif
  };
  
+struct lvfs_obd_ctxt {
+        struct vfsmount *loc_mnt;
+        atomic_t         loc_refcount;
+        char            *loc_name;
+        struct list_head loc_list; 
+};
+
  #ifdef OBD_CTXT_DEBUG
  #define OBD_SET_CTXT_MAGIC(ctxt) (ctxt)->magic = OBD_RUN_CTXT_MAGIC
  #else
@@ -81,7 +110,9 @@ int lustre_fread(struct file *file, void *buf, int len, loff_t *off);
  int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off);
  int lustre_fsync(struct file *file);
  long l_readdir(struct file * file, struct list_head *dentry_list);
-
+int lvfs_mount_fs(char *name, char *fstype, char *options, int flags, 
+                  struct lvfs_obd_ctxt **lvfs_ctxt);
+void lvfs_umount_fs(struct lvfs_obd_ctxt *lvfs_ctxt);
  static inline void l_dput(struct dentry *de)
  {
          if (!de || IS_ERR(de))
@@ -121,6 +152,17 @@ ll_lookup_one_len(const char *name, struct dentry *dparent, int namelen)
  #else
          up(&dparent->d_inode->i_sem);
  #endif
+
+        if (IS_ERR(dchild) || dchild->d_inode == NULL)
+                return dchild;
+
+        if (is_bad_inode(dchild->d_inode)) {
+                CERROR("bad inode returned %lu/%u\n",
+                       dchild->d_inode->i_ino, dchild->d_inode->i_generation);
+                dput(dchild);
+                dchild = ERR_PTR(-ENOENT);
+        }
+
          return dchild;
  }
  
diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h

index a7f8b5f..9727313 100644 (file)
--- a/lustre/include/linux/obd.h
+++ b/lustre/include/linux/obd.h
@@ -205,6 +205,7 @@ struct filter_obd {
          const char          *fo_fstype;
          struct super_block  *fo_sb;
          struct vfsmount     *fo_vfsmnt;
+        struct lvfs_obd_ctxt *fo_lvfs_ctxt;
  
          int                    fo_group_count;
          struct dentry         *fo_dentry_O;     /* the "O"bject directory dentry */
@@ -315,8 +316,10 @@ struct client_obd {
          struct obd_service_time  cl_enter_stime;
  
          struct mdc_rpc_lock     *cl_rpc_lock;
-        struct mdc_rpc_lock     *cl_setattr_lock;
+        struct mdc_rpc_lock     *cl_setattr_lock; 
+        struct mdc_rpc_lock     *cl_close_lock;
          struct osc_creator       cl_oscc;
+        int                      cl_async:1;
  };
  
  /* Like a client, with some hangers-on.  Keep mc_client_obd first so that we
@@ -338,6 +341,7 @@ struct mds_obd {
          struct super_block              *mds_sb;
          struct vfsmount                 *mds_vfsmnt;
          struct dentry                   *mds_id_de;
+        struct lvfs_obd_ctxt            *mds_lvfs_ctxt;
          int                              mds_max_mdsize;
          int                              mds_max_cookiesize;
          struct file                     *mds_rcvd_filp;
@@ -388,6 +392,7 @@ struct mds_obd {
          gid_t                            mds_squash_gid;
          ptl_nid_t                        mds_nosquash_nid;
          atomic_t                         mds_real_clients;
+        atomic_t                         mds_open_count;
          struct dentry                   *mds_id_dir;
          int                              mds_obd_type;
          struct dentry                   *mds_unnamed_dir; /* for mdt_obd_create only */
@@ -468,7 +473,17 @@ struct cm_obd {
          int                     master_group;
          struct cmobd_write_service *write_srv;
  };
-        
+
+struct conf_obd {
+        struct super_block      *cfobd_sb;
+        struct vfsmount         *cfobd_vfsmnt;
+        struct dentry           *cfobd_logs_dir;
+        struct dentry           *cfobd_objects_dir;
+        struct dentry           *cfobd_pending_dir;
+        struct llog_handle      *cfobd_cfg_llh;
+        struct lvfs_obd_ctxt    *cfobd_lvfs_ctxt;
+};
+
  struct lov_tgt_desc {
          struct obd_uuid         uuid;
          __u32                   ltd_gen;
@@ -481,7 +496,7 @@ struct lov_obd {
          struct lov_desc         desc;
          int                     bufsize;
          int                     refcount;
-        int                     lo_catalog_loaded:1;
+        int                     lo_catalog_loaded:1, async:1;
          struct semaphore        lov_llog_sem;
          unsigned long           lov_connect_flags;
          struct lov_tgt_desc    *tgts;
@@ -649,7 +664,7 @@ struct obd_device {
                  struct mds_obd           mds;
                  struct client_obd        cli;
                  struct ost_obd           ost;
-                struct echo_client_obd   echo_client;
+                struct echo_client_obd   echocli;
                  struct echo_obd          echo;
                  struct recovd_obd        recovd;
                  struct lov_obd           lov;
@@ -658,6 +673,7 @@ struct obd_device {
                  struct mgmtcli_obd       mgmtcli;
                  struct lmv_obd           lmv;
                  struct cm_obd            cm;
+                struct conf_obd          conf;
          } u;
          
          /* fields used by LProcFS */
@@ -761,6 +777,8 @@ struct obd_ops {
          int (*o_teardown_async_page)(struct obd_export *exp,
                                       struct lov_stripe_md *lsm,
                                       struct lov_oinfo *loi, void *cookie);
+        int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
+                            obd_off size, int shrink);
          int (*o_punch)(struct obd_export *exp, struct obdo *oa,
                         struct lov_stripe_md *ea, obd_size start,
                         obd_size end, struct obd_trans_info *oti);
diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h

index 6bb4dca..66cecd7 100644 (file)
--- a/lustre/include/linux/obd_class.h
+++ b/lustre/include/linux/obd_class.h
@@ -89,8 +89,6 @@ struct config_llog_instance {
          struct obd_uuid cfg_uuid;
          ptl_nid_t cfg_local_nid;
  };
-int class_config_parse_llog(struct llog_ctxt *ctxt, char *name,
-                            struct config_llog_instance *cfg);
  
  int class_config_process_llog(struct llog_ctxt *ctxt, char *name,
                                struct config_llog_instance *cfg);
@@ -175,103 +173,24 @@ void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_valid valid);
  int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_valid compare);
  void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
  
-static inline int obd_check_conn(struct lustre_handle *conn)
-{
-        struct obd_device *obd;
-        if (!conn) {
-                CERROR("NULL conn\n");
-                RETURN(-ENOTCONN);
-        }
-
-        obd = class_conn2obd(conn);
-        if (!obd) {
-                CERROR("NULL obd\n");
-                RETURN(-ENODEV);
-        }
-
-        if (!obd->obd_attached) {
-                CERROR("obd %d not attached\n", obd->obd_minor);
-                RETURN(-ENODEV);
-        }
-
-        if (!obd->obd_set_up) {
-                CERROR("obd %d not setup\n", obd->obd_minor);
-                RETURN(-ENODEV);
-        }
-
-        if (!obd->obd_type) {
-                CERROR("obd %d not typed\n", obd->obd_minor);
-                RETURN(-ENODEV);
-        }
-
-        if (!obd->obd_type->typ_ops) {
-                CERROR("obd_check_conn: obd %d no operations\n",
-                       obd->obd_minor);
-                RETURN(-EOPNOTSUPP);
-        }
-        return 0;
-}
-
-
  #define OBT(dev)        (dev)->obd_type
  #define OBP(dev, op)    (dev)->obd_type->typ_ops->o_ ## op
  #define MDP(dev, op)    (dev)->obd_type->typ_md_ops->m_ ## op
  #define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op
  
-/* Ensure obd_setup: used for disconnect which might be called while
-   an obd is stopping. */
-#define OBD_CHECK_SETUP(conn, exp)                                      \
-do {                                                                    \
-        if (!(conn)) {                                                  \
-                CERROR("NULL connection\n");                            \
-                RETURN(-EINVAL);                                        \
-        }                                                               \
-                                                                        \
-        exp = class_conn2export(conn);                                  \
-        if (!(exp)) {                                                   \
-                CERROR("No export for conn "LPX64"\n", (conn)->cookie); \
-                RETURN(-EINVAL);                                        \
-        }                                                               \
-                                                                        \
-        if (!(exp)->exp_obd->obd_set_up) {                              \
-                CERROR("Device %d not setup\n",                         \
-                       (exp)->exp_obd->obd_minor);                      \
-                class_export_put(exp);                                  \
-                RETURN(-EINVAL);                                        \
-        }                                                               \
-} while (0)
-
-/* Ensure obd_setup and !obd_stopping. */
-#define OBD_CHECK_ACTIVE(conn, exp)                                     \
-do {                                                                    \
-        if (!(conn)) {                                                  \
-                CERROR("NULL connection\n");                            \
-                RETURN(-EINVAL);                                        \
-        }                                                               \
-                                                                        \
-        exp = class_conn2export(conn);                                  \
-        if (!(exp)) {                                                   \
-                CERROR("No export for conn "LPX64"\n", (conn)->cookie); \
-                RETURN(-EINVAL);                                        \
-        }                                                               \
-                                                                        \
-        if (!(exp)->exp_obd->obd_set_up || (exp)->exp_obd->obd_stopping) { \
-                CERROR("Device %d not setup\n",                         \
-                       (exp)->exp_obd->obd_minor);                      \
-                class_export_put(exp);                                  \
-                RETURN(-EINVAL);                                        \
-        }                                                               \
-} while (0)
-
  /* Ensure obd_setup: used for cleanup which must be called
     while obd is stopping */
-#define OBD_CHECK_DEV_STOPPING(obd)                             \
+#define OBD_CHECK_DEV(obd)                                      \
  do {                                                            \
          if (!(obd)) {                                           \
                  CERROR("NULL device\n");                        \
                  RETURN(-ENODEV);                                \
          }                                                       \
-                                                                \
+} while (0)
+
+#define OBD_CHECK_DEV_STOPPING(obd)                             \
+do {                                                            \
+        OBD_CHECK_DEV(obd);                                     \
          if (!(obd)->obd_set_up) {                               \
                  CERROR("Device %d not setup\n",                 \
                         (obd)->obd_minor);                       \
@@ -288,11 +207,7 @@ do {                                                            \
  /* ensure obd_setup and !obd_stopping */
  #define OBD_CHECK_DEV_ACTIVE(obd)                               \
  do {                                                            \
-        if (!(obd)) {                                           \
-                CERROR("NULL device\n");                        \
-                RETURN(-ENODEV);                                \
-        }                                                       \
-                                                                \
+        OBD_CHECK_DEV(obd);                                     \
          if (!(obd)->obd_set_up || (obd)->obd_stopping) {        \
                  CERROR("Device %d not setup\n",                 \
                         (obd)->obd_minor);                       \
@@ -313,7 +228,7 @@ do {                                                            \
                  coffset = (unsigned int)(obd)->obd_cntr_base +  \
                          OBD_COUNTER_OFFSET(op);                 \
                  LASSERT(coffset < (obd)->obd_stats->ls_num);    \
-                lprocfs_counter_incr((obd)->obd_stats, coffset); \
+                lprocfs_counter_incr((obd)->obd_stats, coffset);\
          }
  
  #define MD_COUNTER_OFFSET(op)                                  \
@@ -340,8 +255,8 @@ do {                                                            \
  do {                                                            \
          if (!OBT(obd) || !MDP((obd), op)) {\
                  if (err)                                        \
-                        CERROR("obd_md" #op ": dev %d no operation\n",    \
-                               obd->obd_minor);                 \
+                        CERROR("md_" #op ": dev %s/%d no operation\n", \
+                               obd->obd_name, obd->obd_minor);  \
                  RETURN(err);                                    \
          }                                                       \
  } while (0)
@@ -358,8 +273,9 @@ do {                                                            \
                  RETURN(-EOPNOTSUPP);                            \
          }                                                       \
          if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \
-                CERROR("obd_" #op ": dev %d no operation\n",    \
-                       (exp)->exp_obd->obd_minor);              \
+                CERROR("obd_" #op ": dev %s/%d no operation\n", \
+                       (exp)->exp_obd->obd_name,                \
+                      (exp)->exp_obd->obd_minor);              \
                  RETURN(-EOPNOTSUPP);                            \
          }                                                       \
  } while (0)
@@ -368,8 +284,8 @@ do {                                                            \
  do {                                                            \
          if (!OBT(obd) || !OBP((obd), op)) {\
                  if (err)                                        \
-                        CERROR("obd_" #op ": dev %d no operation\n",    \
-                               obd->obd_minor);                         \
+                        CERROR("obd_" #op ": dev %s/%d no operation\n", \
+                               obd->obd_name, obd->obd_minor);  \
                  RETURN(err);                                    \
          }                                                       \
  } while (0)
@@ -386,8 +302,9 @@ do {                                                            \
                  RETURN(-EOPNOTSUPP);                            \
          }                                                       \
          if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \
-                CERROR("obd_" #op ": dev %d no operation\n",    \
-                       (exp)->exp_obd->obd_minor);              \
+                CERROR("obd_" #op ": dev %s/%d no operation\n", \
+                       (exp)->exp_obd->obd_name,               \
+                      (exp)->exp_obd->obd_minor);              \
                  RETURN(-EOPNOTSUPP);                            \
          }                                                       \
  } while (0)
@@ -1035,6 +952,20 @@ static inline int obd_write_extents(struct obd_export *exp,
          RETURN(rc);
  }
  
+static inline int obd_adjust_kms(struct obd_export *exp,
+                                 struct lov_stripe_md *lsm,
+                                 obd_off size, int shrink)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_OP(exp->exp_obd, adjust_kms, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, adjust_kms);
+
+        rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink);
+        RETURN(rc);
+}
+
  static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
                                  int len, void *karg, void *uarg)
  {
@@ -1169,6 +1100,11 @@ static inline void obd_import_event(struct obd_device *obd,
                                      struct obd_import *imp,
                                      enum obd_import_event event)
  {
+        if (!obd) {
+                CERROR("NULL device\n");
+                EXIT;
+                return;
+        }
          if (obd->obd_set_up && OBP(obd, import_event)) {
                  OBD_COUNTER_INCREMENT(obd, import_event);
                  OBP(obd, import_event)(obd, imp, event);
@@ -1186,6 +1122,7 @@ static inline int obd_llog_connect(struct obd_export *exp,
  static inline int obd_notify(struct obd_device *obd, struct obd_device *watched,
                               int active, void *data)
  {
+        OBD_CHECK_DEV(obd);
          if (!obd->obd_set_up) {
                  CERROR("obd %s not set up\n", obd->obd_name);
                  return -EINVAL;
@@ -1204,6 +1141,7 @@ static inline int obd_register_observer(struct obd_device *obd,
                                          struct obd_device *observer)
  {
          ENTRY;
+        OBD_CHECK_DEV(obd);
          if (obd->obd_observer && observer)
                  RETURN(-EALREADY);
          obd->obd_observer = observer;
@@ -1534,8 +1472,6 @@ static inline struct obdo *obdo_alloc(void)
  
  static inline void obdo_free(struct obdo *oa)
  {
-        if (!oa)
-                return;
          OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa));
  }
  
diff --git a/lustre/include/linux/obd_lov.h b/lustre/include/linux/obd_lov.h

index cf3ccec..9692a9b 100644 (file)
--- a/lustre/include/linux/obd_lov.h
+++ b/lustre/include/linux/obd_lov.h
@@ -7,21 +7,6 @@
  
  #define OBD_LOV_DEVICENAME "lov"
  
-struct lov_brw_async_args {
-        struct lov_stripe_md  *aa_lsm;
-        struct obdo           *aa_obdos;
-        struct obdo           *aa_oa;
-        struct brw_page       *aa_ioarr;
-        obd_count              aa_oa_bufs;
-};
-
-struct lov_getattr_async_args {
-        struct lov_stripe_md  *aa_lsm;
-        struct obdo           *aa_oa;
-        struct obdo           *aa_obdos;
-        struct lov_obd        *aa_lov;
-};
-
  static inline int lov_stripe_md_size(int stripes)
  {
          return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo);
diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h

index 64db5f7..06e6144 100644 (file)
--- a/lustre/include/linux/obd_support.h
+++ b/lustre/include/linux/obd_support.h
@@ -33,14 +33,15 @@
  #include <linux/lustre_compat25.h>
  
  /* global variables */
-extern atomic_t obd_memory;
  extern int obd_memmax;
+extern atomic_t obd_memory;
+
  extern unsigned int obd_fail_loc;
-extern unsigned int obd_dump_on_timeout;
  extern unsigned int obd_timeout;
  extern unsigned int ldlm_timeout;
  extern char obd_lustre_upcall[128];
  extern unsigned int obd_sync_filter;
+extern unsigned int obd_dump_on_timeout;
  extern wait_queue_head_t obd_race_waitq;
  
  #define OBD_FAIL_MDS                     0x100
@@ -84,6 +85,9 @@ extern wait_queue_head_t obd_race_waitq;
  #define OBD_FAIL_MDS_DONE_WRITING_NET    0x126
  #define OBD_FAIL_MDS_DONE_WRITING_PACK   0x127
  #define OBD_FAIL_MDS_ALLOC_OBDO          0x128
+#define OBD_FAIL_MDS_PAUSE_OPEN          0x129
+#define OBD_FAIL_MDS_STATFS_LCW_SLEEP    0x12a
+#define OBD_FAIL_MDS_OPEN_CREATE         0x12b
  
  #define OBD_FAIL_OST                     0x200
  #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -153,6 +157,8 @@ extern wait_queue_head_t obd_race_waitq;
  #define OBD_FAIL_SVCGSS_INIT_REQ         0x780
  #define OBD_FAIL_SVCGSS_INIT_REP         0x781
  
+#define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
+
  /* preparation for a more advanced failure testbed (not functional yet) */
  #define OBD_FAIL_MASK_SYS    0x0000FF00
  #define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
@@ -185,6 +191,13 @@ do {                                                                         \
          }                                                                    \
  } while(0)
  
+#define OBD_FAIL_GOTO(id, label, ret)                                        \
+do {                                                                         \
+        if (OBD_FAIL_CHECK_ONCE(id)) {                                       \
+                GOTO(label, (ret));                                          \
+        }                                                                    \
+} while(0)
+
  #define OBD_FAIL_TIMEOUT(id, secs)                                           \
  do {                                                                         \
          if  (OBD_FAIL_CHECK_ONCE(id)) {                                      \
@@ -262,22 +275,119 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb)
  
  extern atomic_t portal_kmemory;
  
-#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
-do {                                                                          \
-        (ptr) = kmalloc(size, (gfp_mask));                                    \
-        if ((ptr) == NULL) {                                                  \
-                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
-        } else {                                                              \
-                memset(ptr, 0, size);                                         \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \
-                       (int)(size), ptr, atomic_read(&obd_memory));           \
-        }                                                                     \
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+#define MEM_LOC_LEN 128
+
+struct mtrack {
+        struct hlist_node m_hash;
+        char m_loc[MEM_LOC_LEN];
+        void *m_ptr;
+        int m_size;
+};
+
+void lvfs_memdbg_insert(struct mtrack *mt);
+void lvfs_memdbg_remove(struct mtrack *mt);
+struct mtrack *lvfs_memdbg_find(void *ptr);
+
+int lvfs_memdbg_check_insert(struct mtrack *mt);
+struct mtrack *lvfs_memdbg_check_remove(void *ptr);
+
+static inline struct mtrack *
+__new_mtrack(void *ptr, int size,
+             char *file, int line)
+{
+        struct mtrack *mt;
+
+        mt = kmalloc(sizeof(*mt), GFP_KERNEL);
+        if (!mt)
+                return NULL;
+
+        snprintf(mt->m_loc, sizeof(mt->m_loc) - 1,
+                 "%s:%d", file, line);
+
+        mt->m_size = size;
+        mt->m_ptr = ptr;
+        return mt;
+}
+
+static inline void
+__free_mtrack(struct mtrack *mt)
+{
+        kfree(mt);
+}
+
+static inline int
+__get_mtrack(void *ptr, int size,
+             char *file, int line)
+{
+        struct mtrack *mt;
+
+        mt = __new_mtrack(ptr, size, file, line);
+        if (!mt) {
+                CWARN("can't allocate new memory track\n");
+                return 0;
+        }
+        
+        if (!lvfs_memdbg_check_insert(mt))
+                __free_mtrack(mt);
+        
+        return 1;
+}
+
+static inline int
+__put_mtrack(void *ptr, int size,
+             char *file, int line)
+{
+        struct mtrack *mt;
+
+        if (!(mt = lvfs_memdbg_check_remove(ptr))) {
+                CWARN("ptr 0x%p is not allocated. Attempt to free "
+                      "not allocated memory at %s:%d\n", ptr,
+                      file, line);
+                return 0;
+        } else {
+                if (mt->m_size != size) {
+                        CWARN("freeing memory chunk of different size "
+                              "than allocated (%d != %d) at %s:%d\n",
+                              mt->m_size, size, file, line);
+                }
+                __free_mtrack(mt);
+                return 1;
+        }
+}
+
+#define get_mtrack(ptr, size, file, line)                                            \
+        __get_mtrack((ptr), (size), (file), (line))
+
+#define put_mtrack(ptr, size, file, line)                                            \
+        __put_mtrack((ptr), (size), (file), (line))
+
+#else /* !CONFIG_DEBUG_MEMORY */
+
+#define get_mtrack(ptr, size, file, line)                                            \
+        do {} while (0)
+
+#define put_mtrack(ptr, size, file, line)                                            \
+        do {} while (0)
+#endif /* !CONFIG_DEBUG_MEMORY */
+
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                           \
+do {                                                                                 \
+        (ptr) = kmalloc(size, (gfp_mask));                                           \
+        if ((ptr) == NULL) {                                                         \
+                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",         \
+                       (int)(size), __FILE__, __LINE__);                             \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n",        \
+                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));      \
+        } else {                                                                     \
+                memset(ptr, 0, size);                                                \
+                atomic_add(size, &obd_memory);                                       \
+                if (atomic_read(&obd_memory) > obd_memmax)                           \
+                        obd_memmax = atomic_read(&obd_memory);                       \
+                get_mtrack((ptr), (size), __FILE__, __LINE__);                       \
+                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n",        \
+                       (int)(size), (ptr), atomic_read(&obd_memory));                \
+        }                                                                            \
  } while (0)
  
  #ifndef OBD_GFP_MASK
@@ -290,22 +400,23 @@ do {                                                                          \
  #ifdef __arch_um__
  # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
  #else
-# define OBD_VMALLOC(ptr, size)                                               \
-do {                                                                          \
-        (ptr) = vmalloc(size);                                                \
-        if ((ptr) == NULL) {                                                  \
-                CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
-        } else {                                                              \
-                memset(ptr, 0, size);                                         \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \
-                       (int)(size), ptr, atomic_read(&obd_memory));           \
-        }                                                                     \
+# define OBD_VMALLOC(ptr, size)                                                      \
+do {                                                                                 \
+        (ptr) = vmalloc(size);                                                       \
+        if ((ptr) == NULL) {                                                         \
+                CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",         \
+                       (int)(size), __FILE__, __LINE__);                             \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n",        \
+                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));      \
+        } else {                                                                     \
+                memset(ptr, 0, size);                                                \
+                atomic_add(size, &obd_memory);                                       \
+                if (atomic_read(&obd_memory) > obd_memmax)                           \
+                        obd_memmax = atomic_read(&obd_memory);                       \
+                get_mtrack((ptr), (size), __FILE__, __LINE__);                       \
+                CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n",        \
+                       (int)(size), ptr, atomic_read(&obd_memory));                  \
+        }                                                                            \
  } while (0)
  #endif
  
@@ -316,69 +427,75 @@ do {                                                                          \
  #endif
  
  #if POISON_BULK
-#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE);       \
+#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE);              \
                                      kunmap(page); } while (0)
  #else
  #define POISON_PAGE(page, val) do { } while (0)
  #endif
  
-#define OBD_FREE(ptr, size)                                                   \
-do {                                                                          \
-        LASSERT(ptr);                                                         \
-        atomic_sub(size, &obd_memory);                                        \
-        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",           \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        POISON(ptr, 0x5a, size);                                              \
-        kfree(ptr);                                                           \
-        (ptr) = (void *)0xdeadbeef;                                           \
+#define OBD_FREE(ptr, size)                                                          \
+do {                                                                                 \
+        LASSERT(ptr);                                                                \
+        put_mtrack((ptr), (size), __FILE__, __LINE__);                               \
+        atomic_sub(size, &obd_memory);                                               \
+        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",                  \
+               (int)(size), ptr, atomic_read(&obd_memory));                          \
+        POISON(ptr, 0x5a, size);                                                     \
+        kfree(ptr);                                                                  \
+        (ptr) = (void *)0xdeadbeef;                                                  \
  } while (0)
  
  #ifdef __arch_um__
  # define OBD_VFREE(ptr, size) OBD_FREE(ptr, size)
  #else
-# define OBD_VFREE(ptr, size)                                                 \
-do {                                                                          \
-        LASSERT(ptr);                                                         \
-        atomic_sub(size, &obd_memory);                                        \
-        CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n",           \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        POISON(ptr, 0x5a, size);                                              \
-        vfree(ptr);                                                           \
-        (ptr) = (void *)0xdeadbeef;                                           \
+# define OBD_VFREE(ptr, size)                                                        \
+do {                                                                                 \
+        LASSERT(ptr);                                                                \
+        put_mtrack((ptr), (size), __FILE__, __LINE__);                               \
+        atomic_sub(size, &obd_memory);                                               \
+        CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n",                  \
+               (int)(size), ptr, atomic_read(&obd_memory));                          \
+        POISON(ptr, 0x5a, size);                                                     \
+        vfree(ptr);                                                                  \
+        (ptr) = (void *)0xdeadbeef;                                                  \
  } while (0)
  #endif
  
-/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
- * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
- * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
-#define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
-do {                                                                          \
-        LASSERT(!in_interrupt());                                             \
-        (ptr) = kmem_cache_alloc(slab, (type));                               \
-        if ((ptr) == NULL) {                                                  \
-                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
-        } else {                                                              \
-                memset(ptr, 0, size);                                         \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\
-                       (int)(size), ptr, atomic_read(&obd_memory));           \
-        }                                                                     \
+/*
+ * we memset() the slab object to 0 when allocation succeeds, so DO NOT HAVE A
+ * CTOR THAT DOES ANYTHING. Its work will be cleared here. We'd love to assert
+ * on that, but slab.c keeps kmem_cache_s all to itself.
+ */
+#define OBD_SLAB_ALLOC(ptr, slab, type, size)                                         \
+do {                                                                                  \
+        LASSERT(!in_interrupt());                                                     \
+        (ptr) = kmem_cache_alloc(slab, (type));                                       \
+        if ((ptr) == NULL) {                                                          \
+                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n",         \
+                       (int)(size), __FILE__, __LINE__);                              \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n",         \
+                       atomic_read(&obd_memory), atomic_read(&portal_kmemory));       \
+        } else {                                                                      \
+                memset(ptr, 0, size);                                                 \
+                atomic_add(size, &obd_memory);                                        \
+                if (atomic_read(&obd_memory) > obd_memmax)                            \
+                        obd_memmax = atomic_read(&obd_memory);                        \
+                get_mtrack((ptr), (size), __FILE__, __LINE__);                        \
+                CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",        \
+                       (int)(size), ptr, atomic_read(&obd_memory));                   \
+        }                                                                             \
  } while (0)
  
-#define OBD_SLAB_FREE(ptr, slab, size)                                        \
-do {                                                                          \
-        LASSERT(ptr);                                                         \
-        CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n",       \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        atomic_sub(size, &obd_memory);                                        \
-        POISON(ptr, 0x5a, size);                                              \
-        kmem_cache_free(slab, ptr);                                           \
-        (ptr) = (void *)0xdeadbeef;                                           \
+#define OBD_SLAB_FREE(ptr, slab, size)                                                \
+do {                                                                                  \
+        LASSERT(ptr);                                                                 \
+        CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n",               \
+               (int)(size), ptr, atomic_read(&obd_memory));                           \
+        put_mtrack((ptr), (size), __FILE__, __LINE__);                                \
+        atomic_sub(size, &obd_memory);                                                \
+        POISON(ptr, 0x5a, size);                                                      \
+        kmem_cache_free(slab, ptr);                                                   \
+        (ptr) = (void *)0xdeadbeef;                                                   \
  } while (0)
  
  #endif
diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h

index e1e758f..c3de92a 100644 (file)
--- a/lustre/include/lustre/lustre_user.h
+++ b/lustre/include/lustre/lustre_user.h
@@ -22,7 +22,13 @@
   */
  #ifndef _LUSTRE_USER_H
  #define _LUSTRE_USER_H
+
+#ifdef HAVE_ASM_TYPES_H
  #include <asm/types.h>
+#else
+#include "types.h"
+#endif
+
  #ifdef __KERNEL__
  #include <linux/string.h>
  #else
@@ -32,18 +38,17 @@
  /* for statfs() */
  #define LL_SUPER_MAGIC 0x0BD00BD0
  
-
  #define IOC_MDC_TYPE         'i'
  #define IOC_MDC_GETSTRIPE    _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
  #define IOC_MDC_SHOWFID      _IOWR(IOC_MDC_TYPE, 23, struct lustre_id *)
  
  #ifndef EXT3_IOC_GETFLAGS
-#define        EXT3_IOC_GETFLAGS               _IOR('f', 1, long)
-#define        EXT3_IOC_SETFLAGS               _IOW('f', 2, long)
-#define        EXT3_IOC_GETVERSION             _IOR('f', 3, long)
-#define        EXT3_IOC_SETVERSION             _IOW('f', 4, long)
-#define        EXT3_IOC_GETVERSION_OLD         _IOR('v', 1, long)
-#define        EXT3_IOC_SETVERSION_OLD         _IOW('v', 2, long)
+#define        EXT3_IOC_GETFLAGS               _IOR('f', 1, long)
+#define        EXT3_IOC_SETFLAGS               _IOW('f', 2, long)
+#define        EXT3_IOC_GETVERSION             _IOR('f', 3, long)
+#define        EXT3_IOC_SETVERSION             _IOW('f', 4, long)
+#define        EXT3_IOC_GETVERSION_OLD         _IOR('v', 1, long)
+#define        EXT3_IOC_SETVERSION_OLD         _IOW('v', 2, long)
  #endif
  
  #define LL_IOC_GETFLAGS                 _IOR ('f', 151, long)
diff --git a/lustre/include/types.h b/lustre/include/types.h

new file mode 100644 (file)

index 0000000..5389d37
--- /dev/null
+++ b/lustre/include/types.h
@@ -0,0 +1,27 @@
+#ifndef _LUSTRE_TYPES_H
+#define _LUSTRE_TYPES_H
+
+typedef unsigned short umode_t;
+
+#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \
+       !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H))
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#endif
diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config

new file mode 100644 (file)

index 0000000..a8afabf
--- /dev/null
+++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config
@@ -0,0 +1,1424 @@
+#
+# Automatically generated make config: don't edit
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# General setup
+#
+CONFIG_IA64=y
+# CONFIG_HIGHPTE is not set
+CONFIG_HIGHMEM=y
+CONFIG_HIGHIO=y
+# CONFIG_ISA is not set
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SBUS is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+CONFIG_IA64_GENERIC=y
+# CONFIG_IA64_DIG is not set
+# CONFIG_IA64_HP_SIM is not set
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_SGI_SN1 is not set
+# CONFIG_IA64_SGI_SN2 is not set
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_L1_CACHE_SHIFT=7
+CONFIG_IA64_MCA=y
+CONFIG_PM=y
+CONFIG_KCORE_ELF=y
+CONFIG_FORCE_MAX_ZONEORDER=15
+# CONFIG_HUGETLB_PAGE_SIZE_4GB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_1GB is not set
+CONFIG_HUGETLB_PAGE_SIZE_256MB=y
+# CONFIG_HUGETLB_PAGE_SIZE_64MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_16MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_4MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_256KB is not set
+# CONFIG_IA64_PAL_IDLE is not set
+CONFIG_SMP=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_EFI_VARS=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_NET=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+CONFIG_AUDIT=m
+CONFIG_ACPI=y
+CONFIG_ACPI_EFI=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_KERNEL_CONFIG=y
+
+#
+# ACPI Support
+#
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI=y
+CONFIG_ACPI_EFI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_PCI=y
+CONFIG_PCI_NAMES=y
+CONFIG_HOTPLUG=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+# CONFIG_HOTPLUG_PCI_COMPAQ is not set
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+CONFIG_HOTPLUG_PCI_ACPI=m
+
+#
+# PCMCIA/CardBus support
+#
+# CONFIG_PCMCIA is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=y
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_TUX=m
+CONFIG_TUX_EXTCGI=y
+# CONFIG_TUX_EXTENDED_LOG is not set
+# CONFIG_TUX_DEBUG is not set
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_NAT=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_TOS=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+
+#
+#   IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_UNCLEAN=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_MIRROR=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_NAT_LOCAL=y
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_COMPAT_IPCHAINS=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_COMPAT_IPFWADM=m
+CONFIG_IP_NF_NAT_NEEDED=y
+
+#
+#   IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=16
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+
+#
+#   IPv6: Netfilter Configuration
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=y
+# CONFIG_KHTTPD is not set
+# CONFIG_ATM is not set
+CONFIG_VLAN_8021Q=m
+
+#
+#  
+#
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+
+#
+# Appletalk devices
+#
+CONFIG_DEV_APPLETALK=y
+CONFIG_COPS_DAYNA=y
+CONFIG_COPS_TANGENT=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_DECNET=m
+CONFIG_DECNET_SIOCGIFCONF=y
+CONFIG_DECNET_ROUTER=y
+CONFIG_DECNET_ROUTE_FWMARK=y
+CONFIG_BRIDGE=m
+# CONFIG_X25 is not set
+CONFIG_EDP2=m
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+CONFIG_NET_DIVERT=y
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_CSZ=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Plug and Play configuration
+#
+# CONFIG_PNP is not set
+# CONFIG_ISAPNP is not set
+# CONFIG_PNPBIOS is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_CISS_SCSI_TAPE=y
+# CONFIG_CISS_MONITOR_THREAD is not set
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_SX8=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_STATS=y
+CONFIG_DISKDUMP=m
+
+#
+# IEEE 1394 (FireWire) support (EXPERIMENTAL)
+#
+CONFIG_IEEE1394=m
+
+#
+# Device Drivers
+#
+
+#
+#   Texas Instruments PCILynx requires I2C bit-banging
+#
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+CONFIG_IEEE1394_SBP2_PHYS_DMA=y
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_PCI=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_LAN=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_LVM=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=m
+# CONFIG_FUSION_BOOT is not set
+CONFIG_FUSION_MAX_SGE=40
+# CONFIG_FUSION_ISENSE is not set
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+CONFIG_NET_FC=y
+
+#
+# ATA/IDE/MFM/RLL support
+#
+CONFIG_IDE=y
+
+#
+# IDE, ATA and ATAPI Block devices
+#
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+# CONFIG_IDEDISK_STROKE is not set
+# CONFIG_BLK_DEV_IDECS is not set
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_IDETAPE=m
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+# CONFIG_BLK_DEV_CMD640 is not set
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+# CONFIG_BLK_DEV_ISAPNP is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_PCI_WIP is not set
+CONFIG_BLK_DEV_ADMA100=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+# CONFIG_WDC_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+# CONFIG_AMD74XX_OVERRIDE is not set
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+# CONFIG_PDC202XX_BURST is not set
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+# CONFIG_BLK_DEV_RZ1000 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_DMA_NONPCI is not set
+CONFIG_BLK_DEV_PDC202XX=y
+CONFIG_BLK_DEV_IDE_MODES=y
+CONFIG_BLK_DEV_ATARAID=m
+CONFIG_BLK_DEV_ATARAID_PDC=m
+CONFIG_BLK_DEV_ATARAID_HPT=m
+CONFIG_BLK_DEV_ATARAID_SII=m
+
+#
+# SCSI support
+#
+CONFIG_SCSI=m
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_SD_EXTRA_DEVS=256
+CONFIG_SD_IOSTATS=y
+CONFIG_SCSI_DUMP=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_SR_EXTRA_DEVS=4
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_DEBUG_QUEUES=y
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AHA1740 is not set
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_PROBE_EISA_VL is not set
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC7XXX_OLD=m
+CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y
+CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_OLD_PROC_STATS=y
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_AM53C974 is not set
+CONFIG_SCSI_MEGARAID=m
+CONFIG_SCSI_MEGARAID2=m
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_SVW=m
+CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_SATA_NV=m
+CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_SATA_SX4=m
+CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIS=m
+CONFIG_SCSI_SATA_VIA=m
+CONFIG_SCSI_SATA_VITESSE=m
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_DMA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+CONFIG_SCSI_GDTH=m
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+CONFIG_SCSI_IPS=m
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_NCR53C7xx is not set
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_NCR53C8XX=m
+CONFIG_SCSI_SYM53C8XX=m
+CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8
+CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32
+CONFIG_SCSI_NCR53C8XX_SYNC=40
+# CONFIG_SCSI_NCR53C8XX_PROFILE is not set
+# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set
+# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_QLOGIC_ISP=m
+CONFIG_SCSI_QLOGIC_FC=m
+# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
+CONFIG_SCSI_QLOGIC_1280=m
+# CONFIG_SCSI_SIM710 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+CONFIG_SCSI_NSP32=m
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+# CONFIG_SUNLANCE is not set
+CONFIG_HAPPYMEAL=m
+# CONFIG_SUNBMAC is not set
+# CONFIG_SUNQE is not set
+CONFIG_SUNGEM=m
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_ELMC is not set
+# CONFIG_ELMC_II is not set
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+CONFIG_AMD8111_ETH=m
+CONFIG_ADAPTEC_STARFIRE=m
+# CONFIG_APRICOT is not set
+CONFIG_B44=m
+# CONFIG_CS89x0 is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+CONFIG_TULIP_MMIO=y
+# CONFIG_DE4X5 is not set
+# CONFIG_DGRS is not set
+CONFIG_DM9102=m
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+# CONFIG_LNE390 is not set
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+# CONFIG_NE3210 is not set
+# CONFIG_ES3210 is not set
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_SIS900=m
+CONFIG_EPIC100=m
+# CONFIG_SUNDANCE is not set
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_TLAN=m
+CONFIG_TC35815=m
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_DL2K=m
+CONFIG_E1000=m
+CONFIG_E1000_NAPI=y
+# CONFIG_MYRI_SBUS is not set
+CONFIG_NS83820=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_R8169=m
+CONFIG_SK98LIN=m
+CONFIG_TIGON3=m
+# CONFIG_FDDI is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Token Ring devices
+#
+CONFIG_TR=y
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+# CONFIG_TMS380TR is not set
+CONFIG_NET_FC=y
+CONFIG_IPHASE5526=m
+# CONFIG_RCPCI is not set
+CONFIG_SHAPER=m
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# CD-ROM drivers (not for SCSI or IDE/ATAPI drives)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Input core support
+#
+CONFIG_INPUT=m
+CONFIG_INPUT_KEYBDEV=m
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=m
+
+#
+# Character devices
+#
+CONFIG_VT=y
+# CONFIG_ECC is not set
+CONFIG_VT_CONSOLE=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_CONSOLE=y
+CONFIG_SERIAL_HCDP=y
+CONFIG_SERIAL_ACPI=y
+CONFIG_HP_DIVA=y
+CONFIG_SERIAL_EXTENDED=y
+CONFIG_SERIAL_MANY_PORTS=y
+CONFIG_SERIAL_SHARE_IRQ=y
+# CONFIG_SERIAL_DETECT_IRQ is not set
+CONFIG_SERIAL_MULTIPORT=y
+# CONFIG_HUB6 is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=2048
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+CONFIG_MK712_MOUSE=m
+
+#
+# Joysticks
+#
+CONFIG_INPUT_GAMEPORT=m
+CONFIG_INPUT_NS558=m
+CONFIG_INPUT_LIGHTNING=m
+CONFIG_INPUT_PCIGAME=m
+CONFIG_INPUT_CS461X=m
+CONFIG_INPUT_EMU10K1=m
+CONFIG_INPUT_SERIO=m
+CONFIG_INPUT_SERPORT=m
+
+#
+# Joysticks
+#
+CONFIG_INPUT_ANALOG=m
+CONFIG_INPUT_A3D=m
+CONFIG_INPUT_ADI=m
+CONFIG_INPUT_COBRA=m
+CONFIG_INPUT_GF2K=m
+CONFIG_INPUT_GRIP=m
+CONFIG_INPUT_INTERACT=m
+CONFIG_INPUT_TMDC=m
+CONFIG_INPUT_SIDEWINDER=m
+CONFIG_INPUT_IFORCE_USB=m
+CONFIG_INPUT_IFORCE_232=m
+CONFIG_INPUT_WARRIOR=m
+CONFIG_INPUT_MAGELLAN=m
+CONFIG_INPUT_SPACEORB=m
+CONFIG_INPUT_SPACEBALL=m
+CONFIG_INPUT_STINGER=m
+# CONFIG_INPUT_DB9 is not set
+# CONFIG_INPUT_GAMECON is not set
+# CONFIG_INPUT_TURBOGRAFX is not set
+# CONFIG_QIC02_TAPE is not set
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_KCS=m
+CONFIG_IPMI_WATCHDOG=m
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_HANGCHECK_DELAY=m
+# CONFIG_SCx200_GPIO is not set
+CONFIG_INTEL_RNG=m
+# CONFIG_AMD_PM768 is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+CONFIG_AGP=m
+CONFIG_AGP_INTEL=y
+# CONFIG_AGP_I810 is not set
+# CONFIG_AGP_VIA is not set
+# CONFIG_AGP_AMD is not set
+CONFIG_AGP_AMD_8151=y
+# CONFIG_AGP_SIS is not set
+# CONFIG_AGP_ALI is not set
+# CONFIG_AGP_SWORKS is not set
+CONFIG_AGP_I460=y
+CONFIG_AGP_HP_ZX1=y
+CONFIG_DRM=y
+# CONFIG_DRM_OLD is not set
+
+#
+# DRM 4.1 drivers
+#
+CONFIG_DRM_NEW=y
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_GAMMA=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+# CONFIG_DRM_I810 is not set
+# CONFIG_DRM_I810_XFREE_41 is not set
+# CONFIG_DRM_I830 is not set
+CONFIG_DRM_MGA=m
+# CONFIG_DRM_SIS is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+CONFIG_QUOTA=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+# CONFIG_QIFACE_COMPAT is not set
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+CONFIG_HFS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_XATTR_SHARING=y
+CONFIG_EXT3_FS_XATTR_USER=y
+CONFIG_EXT3_FS_XATTR_TRUSTED=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_JBD=m
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_UMSDOS_FS=m
+CONFIG_VFAT_FS=m
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_CRAMFS=m
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_DEBUG=y
+# CONFIG_JFS_STATISTICS is not set
+CONFIG_MINIX_FS=m
+CONFIG_VXFS_FS=m
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+CONFIG_ROMFS_FS=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_XATTR_SHARING=y
+CONFIG_EXT2_FS_XATTR_USER=y
+CONFIG_SYSV_FS=m
+CONFIG_UDF_FS=m
+CONFIG_UDF_RW=y
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_CODA_FS=m
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFS_ACL=y
+# CONFIG_ROOT_NFS is not set
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_ACL=y
+CONFIG_NFSD_TCP=y
+CONFIG_SUNRPC=m
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_FS_MBCACHE=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_FS_MBCACHE=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_SMB_NLS=y
+CONFIG_NLS=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Console drivers
+#
+CONFIG_VGA_CONSOLE=y
+
+#
+# Frame-buffer support
+#
+CONFIG_FB=y
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FB_RIVA is not set
+# CONFIG_FB_CLGEN is not set
+# CONFIG_FB_PM2 is not set
+CONFIG_FB_PM3=m
+# CONFIG_FB_CYBER2000 is not set
+CONFIG_FB_VGA16=m
+CONFIG_FB_MATROX=m
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G450=y
+CONFIG_FB_MATROX_G100=y
+# CONFIG_FB_MATROX_PROC is not set
+CONFIG_FB_MATROX_MULTIHEAD=y
+# CONFIG_FB_ATY is not set
+# CONFIG_FB_RADEON is not set
+CONFIG_FB_ATY128=m
+# CONFIG_FB_INTEL is not set
+# CONFIG_FB_SIS is not set
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_VOODOO1=m
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FBCON_ADVANCED is not set
+CONFIG_FBCON_CFB8=y
+CONFIG_FBCON_CFB16=y
+CONFIG_FBCON_CFB24=y
+CONFIG_FBCON_CFB32=y
+CONFIG_FBCON_VGA_PLANES=m
+CONFIG_FBCON_HGA=m
+# CONFIG_FBCON_FONTWIDTH8_ONLY is not set
+# CONFIG_FBCON_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_PCI_CONSOLE=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+CONFIG_SOUND_ALI5455=m
+# CONFIG_SOUND_BT878 is not set
+CONFIG_SOUND_CMPCI=m
+CONFIG_SOUND_CMPCI_FM=y
+CONFIG_SOUND_CMPCI_FMIO=388
+CONFIG_SOUND_CMPCI_FMIO=388
+CONFIG_SOUND_CMPCI_MIDI=y
+CONFIG_SOUND_CMPCI_MPUIO=330
+CONFIG_SOUND_CMPCI_JOYSTICK=y
+CONFIG_SOUND_CMPCI_CM8738=y
+# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set
+CONFIG_SOUND_CMPCI_SPDIFLOOP=y
+CONFIG_SOUND_CMPCI_SPEAKERS=2
+CONFIG_SOUND_EMU10K1=m
+CONFIG_MIDI_EMU10K1=y
+CONFIG_SOUND_AUDIGY=m
+CONFIG_SOUND_FUSION=m
+CONFIG_SOUND_CS4281=m
+CONFIG_SOUND_ES1370=m
+CONFIG_SOUND_ES1371=m
+CONFIG_SOUND_ESSSOLO1=m
+CONFIG_SOUND_MAESTRO=m
+CONFIG_SOUND_MAESTRO3=m
+CONFIG_SOUND_FORTE=m
+CONFIG_SOUND_ICH=m
+CONFIG_SOUND_RME96XX=m
+CONFIG_SOUND_SONICVIBES=m
+CONFIG_SOUND_TRIDENT=m
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+CONFIG_MIDI_VIA82CXXX=y
+CONFIG_SOUND_OSS=m
+# CONFIG_SOUND_TRACEINIT is not set
+CONFIG_SOUND_DMAP=y
+# CONFIG_SOUND_AD1816 is not set
+CONFIG_SOUND_AD1889=m
+# CONFIG_SOUND_SGALAXY is not set
+# CONFIG_SOUND_ADLIB is not set
+# CONFIG_SOUND_ACI_MIXER is not set
+# CONFIG_SOUND_CS4232 is not set
+# CONFIG_SOUND_SSCAPE is not set
+# CONFIG_SOUND_GUS is not set
+CONFIG_SOUND_VMIDI=m
+# CONFIG_SOUND_TRIX is not set
+# CONFIG_SOUND_MSS is not set
+# CONFIG_SOUND_MPU401 is not set
+# CONFIG_SOUND_NM256 is not set
+# CONFIG_SOUND_MAD16 is not set
+# CONFIG_SOUND_PAS is not set
+# CONFIG_PAS_JOYSTICK is not set
+# CONFIG_SOUND_PSS is not set
+# CONFIG_SOUND_SB is not set
+# CONFIG_SOUND_AWE32_SYNTH is not set
+# CONFIG_SOUND_KAHLUA is not set
+# CONFIG_SOUND_WAVEFRONT is not set
+# CONFIG_SOUND_MAUI is not set
+# CONFIG_SOUND_YM3812 is not set
+# CONFIG_SOUND_OPL3SA1 is not set
+# CONFIG_SOUND_OPL3SA2 is not set
+CONFIG_SOUND_YMFPCI=m
+CONFIG_SOUND_YMFPCI_LEGACY=y
+# CONFIG_SOUND_UART6850 is not set
+# CONFIG_SOUND_AEDSP16 is not set
+# CONFIG_SOUND_TVMIXER is not set
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_UHCI=m
+CONFIG_USB_UHCI_ALT=m
+CONFIG_USB_OHCI=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+# CONFIG_USB_EMI26 is not set
+
+#
+#   USB Bluetooth can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+CONFIG_USB_STORAGE_HP8200e=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+CONFIG_USB_HIDDEV=y
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+CONFIG_USB_AIPTEK=m
+CONFIG_USB_WACOM=m
+CONFIG_USB_KBTAB=m
+CONFIG_USB_POWERMATE=m
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_DC2XX is not set
+CONFIG_USB_MDC800=m
+CONFIG_USB_SCANNER=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_HPUSBSCSI=m
+
+#
+# USB Multimedia devices
+#
+
+#
+#   Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network adaptors
+#
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_CATC=m
+# CONFIG_USB_AX8817X is not set
+CONFIG_USB_CDCETHER=m
+CONFIG_USB_USBNET=m
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+CONFIG_USB_SERIAL=m
+# CONFIG_USB_SERIAL_DEBUG is not set
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SERIAL_OMNINET=m
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_RIO500=m
+CONFIG_USB_AUERSWALD=m
+CONFIG_USB_TIGL=m
+CONFIG_USB_BRLVGER=m
+CONFIG_USB_LCD=m
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_TEST=m
+
+#
+# Library routines
+#
+CONFIG_CRC32=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_QSORT=y
+
+#
+# Bluetooth support
+#
+CONFIG_BLUEZ=m
+CONFIG_BLUEZ_L2CAP=m
+CONFIG_BLUEZ_SCO=m
+CONFIG_BLUEZ_RFCOMM=m
+CONFIG_BLUEZ_RFCOMM_TTY=y
+CONFIG_BLUEZ_BNEP=m
+CONFIG_BLUEZ_BNEP_MC_FILTER=y
+CONFIG_BLUEZ_BNEP_PROTO_FILTER=y
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BLUEZ_HCIUSB=m
+CONFIG_BLUEZ_USB_SCO=y
+CONFIG_BLUEZ_USB_ZERO_PACKET=y
+CONFIG_BLUEZ_HCIUART=m
+CONFIG_BLUEZ_HCIUART_H4=y
+CONFIG_BLUEZ_HCIUART_BCSP=y
+CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y
+# CONFIG_BLUEZ_HCIDTL1 is not set
+# CONFIG_BLUEZ_HCIBT3C is not set
+# CONFIG_BLUEZ_HCIBLUECARD is not set
+# CONFIG_BLUEZ_HCIBTUART is not set
+CONFIG_BLUEZ_HCIVHCI=m
+
+#
+# Simulated drivers
+#
+# CONFIG_HP_SIMETH is not set
+# CONFIG_HP_SIMSERIAL is not set
+# CONFIG_HP_SIMSCSI is not set
+
+#
+# Additional device driver support
+#
+CONFIG_NET_BROADCOM=m
+CONFIG_CIPE=m
+# CONFIG_CRYPTO_AEP is not set
+CONFIG_CRYPTO_BROADCOM=m
+# CONFIG_MEGARAC is not set
+CONFIG_FC_QLA2100=m
+CONFIG_FC_QLA2200=m
+CONFIG_FC_QLA2300=m
+CONFIG_SCSI_ISCSI=m
+# CONFIG_SCSI_IPR is not set
+CONFIG_SCSI_LPFC=m
+
+#
+# Kernel hacking
+#
+CONFIG_IA64_GRANULE_16MB=y
+# CONFIG_IA64_GRANULE_64MB is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_IA64_PRINT_HAZARDS=y
+# CONFIG_DISABLE_VHPT is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_IA64_EARLY_PRINTK is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_KALLSYMS=y
diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config

new file mode 100644 (file)

index 0000000..a8afabf
--- /dev/null
+++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config
@@ -0,0 +1,1424 @@
+#
+# Automatically generated make config: don't edit
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# General setup
+#
+CONFIG_IA64=y
+# CONFIG_HIGHPTE is not set
+CONFIG_HIGHMEM=y
+CONFIG_HIGHIO=y
+# CONFIG_ISA is not set
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SBUS is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+CONFIG_IA64_GENERIC=y
+# CONFIG_IA64_DIG is not set
+# CONFIG_IA64_HP_SIM is not set
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_SGI_SN1 is not set
+# CONFIG_IA64_SGI_SN2 is not set
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_L1_CACHE_SHIFT=7
+CONFIG_IA64_MCA=y
+CONFIG_PM=y
+CONFIG_KCORE_ELF=y
+CONFIG_FORCE_MAX_ZONEORDER=15
+# CONFIG_HUGETLB_PAGE_SIZE_4GB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_1GB is not set
+CONFIG_HUGETLB_PAGE_SIZE_256MB=y
+# CONFIG_HUGETLB_PAGE_SIZE_64MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_16MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_4MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_256KB is not set
+# CONFIG_IA64_PAL_IDLE is not set
+CONFIG_SMP=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_EFI_VARS=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_NET=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+CONFIG_AUDIT=m
+CONFIG_ACPI=y
+CONFIG_ACPI_EFI=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_KERNEL_CONFIG=y
+
+#
+# ACPI Support
+#
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI=y
+CONFIG_ACPI_EFI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_PCI=y
+CONFIG_PCI_NAMES=y
+CONFIG_HOTPLUG=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+# CONFIG_HOTPLUG_PCI_COMPAQ is not set
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+CONFIG_HOTPLUG_PCI_ACPI=m
+
+#
+# PCMCIA/CardBus support
+#
+# CONFIG_PCMCIA is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=y
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_TUX=m
+CONFIG_TUX_EXTCGI=y
+# CONFIG_TUX_EXTENDED_LOG is not set
+# CONFIG_TUX_DEBUG is not set
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_NAT=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_TOS=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+
+#
+#   IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_UNCLEAN=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_MIRROR=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_NAT_LOCAL=y
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_COMPAT_IPCHAINS=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_COMPAT_IPFWADM=m
+CONFIG_IP_NF_NAT_NEEDED=y
+
+#
+#   IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=16
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+
+#
+#   IPv6: Netfilter Configuration
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=y
+# CONFIG_KHTTPD is not set
+# CONFIG_ATM is not set
+CONFIG_VLAN_8021Q=m
+
+#
+#  
+#
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+
+#
+# Appletalk devices
+#
+CONFIG_DEV_APPLETALK=y
+CONFIG_COPS_DAYNA=y
+CONFIG_COPS_TANGENT=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_DECNET=m
+CONFIG_DECNET_SIOCGIFCONF=y
+CONFIG_DECNET_ROUTER=y
+CONFIG_DECNET_ROUTE_FWMARK=y
+CONFIG_BRIDGE=m
+# CONFIG_X25 is not set
+CONFIG_EDP2=m
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+CONFIG_NET_DIVERT=y
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_CSZ=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Plug and Play configuration
+#
+# CONFIG_PNP is not set
+# CONFIG_ISAPNP is not set
+# CONFIG_PNPBIOS is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_CISS_SCSI_TAPE=y
+# CONFIG_CISS_MONITOR_THREAD is not set
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_SX8=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_STATS=y
+CONFIG_DISKDUMP=m
+
+#
+# IEEE 1394 (FireWire) support (EXPERIMENTAL)
+#
+CONFIG_IEEE1394=m
+
+#
+# Device Drivers
+#
+
+#
+#   Texas Instruments PCILynx requires I2C bit-banging
+#
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+CONFIG_IEEE1394_SBP2_PHYS_DMA=y
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_PCI=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_LAN=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_LVM=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=m
+# CONFIG_FUSION_BOOT is not set
+CONFIG_FUSION_MAX_SGE=40
+# CONFIG_FUSION_ISENSE is not set
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+CONFIG_NET_FC=y
+
+#
+# ATA/IDE/MFM/RLL support
+#
+CONFIG_IDE=y
+
+#
+# IDE, ATA and ATAPI Block devices
+#
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+# CONFIG_IDEDISK_STROKE is not set
+# CONFIG_BLK_DEV_IDECS is not set
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_IDETAPE=m
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+# CONFIG_BLK_DEV_CMD640 is not set
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+# CONFIG_BLK_DEV_ISAPNP is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_PCI_WIP is not set
+CONFIG_BLK_DEV_ADMA100=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+# CONFIG_WDC_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+# CONFIG_AMD74XX_OVERRIDE is not set
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+# CONFIG_PDC202XX_BURST is not set
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+# CONFIG_BLK_DEV_RZ1000 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_DMA_NONPCI is not set
+CONFIG_BLK_DEV_PDC202XX=y
+CONFIG_BLK_DEV_IDE_MODES=y
+CONFIG_BLK_DEV_ATARAID=m
+CONFIG_BLK_DEV_ATARAID_PDC=m
+CONFIG_BLK_DEV_ATARAID_HPT=m
+CONFIG_BLK_DEV_ATARAID_SII=m
+
+#
+# SCSI support
+#
+CONFIG_SCSI=m
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_SD_EXTRA_DEVS=256
+CONFIG_SD_IOSTATS=y
+CONFIG_SCSI_DUMP=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_SR_EXTRA_DEVS=4
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_DEBUG_QUEUES=y
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AHA1740 is not set
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_PROBE_EISA_VL is not set
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC7XXX_OLD=m
+CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y
+CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_OLD_PROC_STATS=y
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_AM53C974 is not set
+CONFIG_SCSI_MEGARAID=m
+CONFIG_SCSI_MEGARAID2=m
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_SVW=m
+CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_SATA_NV=m
+CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_SATA_SX4=m
+CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIS=m
+CONFIG_SCSI_SATA_VIA=m
+CONFIG_SCSI_SATA_VITESSE=m
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_DMA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+CONFIG_SCSI_GDTH=m
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+CONFIG_SCSI_IPS=m
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_NCR53C7xx is not set
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_NCR53C8XX=m
+CONFIG_SCSI_SYM53C8XX=m
+CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8
+CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32
+CONFIG_SCSI_NCR53C8XX_SYNC=40
+# CONFIG_SCSI_NCR53C8XX_PROFILE is not set
+# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set
+# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_QLOGIC_ISP=m
+CONFIG_SCSI_QLOGIC_FC=m
+# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
+CONFIG_SCSI_QLOGIC_1280=m
+# CONFIG_SCSI_SIM710 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+CONFIG_SCSI_NSP32=m
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+# CONFIG_SUNLANCE is not set
+CONFIG_HAPPYMEAL=m
+# CONFIG_SUNBMAC is not set
+# CONFIG_SUNQE is not set
+CONFIG_SUNGEM=m
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_ELMC is not set
+# CONFIG_ELMC_II is not set
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+CONFIG_AMD8111_ETH=m
+CONFIG_ADAPTEC_STARFIRE=m
+# CONFIG_APRICOT is not set
+CONFIG_B44=m
+# CONFIG_CS89x0 is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+CONFIG_TULIP_MMIO=y
+# CONFIG_DE4X5 is not set
+# CONFIG_DGRS is not set
+CONFIG_DM9102=m
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+# CONFIG_LNE390 is not set
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+# CONFIG_NE3210 is not set
+# CONFIG_ES3210 is not set
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_SIS900=m
+CONFIG_EPIC100=m
+# CONFIG_SUNDANCE is not set
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_TLAN=m
+CONFIG_TC35815=m
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_DL2K=m
+CONFIG_E1000=m
+CONFIG_E1000_NAPI=y
+# CONFIG_MYRI_SBUS is not set
+CONFIG_NS83820=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_R8169=m
+CONFIG_SK98LIN=m
+CONFIG_TIGON3=m
+# CONFIG_FDDI is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Token Ring devices
+#
+CONFIG_TR=y
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+# CONFIG_TMS380TR is not set
+CONFIG_NET_FC=y
+CONFIG_IPHASE5526=m
+# CONFIG_RCPCI is not set
+CONFIG_SHAPER=m
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# CD-ROM drivers (not for SCSI or IDE/ATAPI drives)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Input core support
+#
+CONFIG_INPUT=m
+CONFIG_INPUT_KEYBDEV=m
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=m
+
+#
+# Character devices
+#
+CONFIG_VT=y
+# CONFIG_ECC is not set
+CONFIG_VT_CONSOLE=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_CONSOLE=y
+CONFIG_SERIAL_HCDP=y
+CONFIG_SERIAL_ACPI=y
+CONFIG_HP_DIVA=y
+CONFIG_SERIAL_EXTENDED=y
+CONFIG_SERIAL_MANY_PORTS=y
+CONFIG_SERIAL_SHARE_IRQ=y
+# CONFIG_SERIAL_DETECT_IRQ is not set
+CONFIG_SERIAL_MULTIPORT=y
+# CONFIG_HUB6 is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=2048
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+CONFIG_MK712_MOUSE=m
+
+#
+# Joysticks
+#
+CONFIG_INPUT_GAMEPORT=m
+CONFIG_INPUT_NS558=m
+CONFIG_INPUT_LIGHTNING=m
+CONFIG_INPUT_PCIGAME=m
+CONFIG_INPUT_CS461X=m
+CONFIG_INPUT_EMU10K1=m
+CONFIG_INPUT_SERIO=m
+CONFIG_INPUT_SERPORT=m
+
+#
+# Joysticks
+#
+CONFIG_INPUT_ANALOG=m
+CONFIG_INPUT_A3D=m
+CONFIG_INPUT_ADI=m
+CONFIG_INPUT_COBRA=m
+CONFIG_INPUT_GF2K=m
+CONFIG_INPUT_GRIP=m
+CONFIG_INPUT_INTERACT=m
+CONFIG_INPUT_TMDC=m
+CONFIG_INPUT_SIDEWINDER=m
+CONFIG_INPUT_IFORCE_USB=m
+CONFIG_INPUT_IFORCE_232=m
+CONFIG_INPUT_WARRIOR=m
+CONFIG_INPUT_MAGELLAN=m
+CONFIG_INPUT_SPACEORB=m
+CONFIG_INPUT_SPACEBALL=m
+CONFIG_INPUT_STINGER=m
+# CONFIG_INPUT_DB9 is not set
+# CONFIG_INPUT_GAMECON is not set
+# CONFIG_INPUT_TURBOGRAFX is not set
+# CONFIG_QIC02_TAPE is not set
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_KCS=m
+CONFIG_IPMI_WATCHDOG=m
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_HANGCHECK_DELAY=m
+# CONFIG_SCx200_GPIO is not set
+CONFIG_INTEL_RNG=m
+# CONFIG_AMD_PM768 is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+CONFIG_AGP=m
+CONFIG_AGP_INTEL=y
+# CONFIG_AGP_I810 is not set
+# CONFIG_AGP_VIA is not set
+# CONFIG_AGP_AMD is not set
+CONFIG_AGP_AMD_8151=y
+# CONFIG_AGP_SIS is not set
+# CONFIG_AGP_ALI is not set
+# CONFIG_AGP_SWORKS is not set
+CONFIG_AGP_I460=y
+CONFIG_AGP_HP_ZX1=y
+CONFIG_DRM=y
+# CONFIG_DRM_OLD is not set
+
+#
+# DRM 4.1 drivers
+#
+CONFIG_DRM_NEW=y
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_GAMMA=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+# CONFIG_DRM_I810 is not set
+# CONFIG_DRM_I810_XFREE_41 is not set
+# CONFIG_DRM_I830 is not set
+CONFIG_DRM_MGA=m
+# CONFIG_DRM_SIS is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+CONFIG_QUOTA=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+# CONFIG_QIFACE_COMPAT is not set
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+CONFIG_HFS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_XATTR_SHARING=y
+CONFIG_EXT3_FS_XATTR_USER=y
+CONFIG_EXT3_FS_XATTR_TRUSTED=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_JBD=m
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_UMSDOS_FS=m
+CONFIG_VFAT_FS=m
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_CRAMFS=m
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_DEBUG=y
+# CONFIG_JFS_STATISTICS is not set
+CONFIG_MINIX_FS=m
+CONFIG_VXFS_FS=m
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+CONFIG_ROMFS_FS=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_XATTR_SHARING=y
+CONFIG_EXT2_FS_XATTR_USER=y
+CONFIG_SYSV_FS=m
+CONFIG_UDF_FS=m
+CONFIG_UDF_RW=y
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_CODA_FS=m
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFS_ACL=y
+# CONFIG_ROOT_NFS is not set
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_ACL=y
+CONFIG_NFSD_TCP=y
+CONFIG_SUNRPC=m
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_FS_MBCACHE=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_FS_MBCACHE=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_SMB_NLS=y
+CONFIG_NLS=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Console drivers
+#
+CONFIG_VGA_CONSOLE=y
+
+#
+# Frame-buffer support
+#
+CONFIG_FB=y
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FB_RIVA is not set
+# CONFIG_FB_CLGEN is not set
+# CONFIG_FB_PM2 is not set
+CONFIG_FB_PM3=m
+# CONFIG_FB_CYBER2000 is not set
+CONFIG_FB_VGA16=m
+CONFIG_FB_MATROX=m
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G450=y
+CONFIG_FB_MATROX_G100=y
+# CONFIG_FB_MATROX_PROC is not set
+CONFIG_FB_MATROX_MULTIHEAD=y
+# CONFIG_FB_ATY is not set
+# CONFIG_FB_RADEON is not set
+CONFIG_FB_ATY128=m
+# CONFIG_FB_INTEL is not set
+# CONFIG_FB_SIS is not set
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_VOODOO1=m
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FBCON_ADVANCED is not set
+CONFIG_FBCON_CFB8=y
+CONFIG_FBCON_CFB16=y
+CONFIG_FBCON_CFB24=y
+CONFIG_FBCON_CFB32=y
+CONFIG_FBCON_VGA_PLANES=m
+CONFIG_FBCON_HGA=m
+# CONFIG_FBCON_FONTWIDTH8_ONLY is not set
+# CONFIG_FBCON_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_PCI_CONSOLE=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+CONFIG_SOUND_ALI5455=m
+# CONFIG_SOUND_BT878 is not set
+CONFIG_SOUND_CMPCI=m
+CONFIG_SOUND_CMPCI_FM=y
+CONFIG_SOUND_CMPCI_FMIO=388
+CONFIG_SOUND_CMPCI_FMIO=388
+CONFIG_SOUND_CMPCI_MIDI=y
+CONFIG_SOUND_CMPCI_MPUIO=330
+CONFIG_SOUND_CMPCI_JOYSTICK=y
+CONFIG_SOUND_CMPCI_CM8738=y
+# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set
+CONFIG_SOUND_CMPCI_SPDIFLOOP=y
+CONFIG_SOUND_CMPCI_SPEAKERS=2
+CONFIG_SOUND_EMU10K1=m
+CONFIG_MIDI_EMU10K1=y
+CONFIG_SOUND_AUDIGY=m
+CONFIG_SOUND_FUSION=m
+CONFIG_SOUND_CS4281=m
+CONFIG_SOUND_ES1370=m
+CONFIG_SOUND_ES1371=m
+CONFIG_SOUND_ESSSOLO1=m
+CONFIG_SOUND_MAESTRO=m
+CONFIG_SOUND_MAESTRO3=m
+CONFIG_SOUND_FORTE=m
+CONFIG_SOUND_ICH=m
+CONFIG_SOUND_RME96XX=m
+CONFIG_SOUND_SONICVIBES=m
+CONFIG_SOUND_TRIDENT=m
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+CONFIG_MIDI_VIA82CXXX=y
+CONFIG_SOUND_OSS=m
+# CONFIG_SOUND_TRACEINIT is not set
+CONFIG_SOUND_DMAP=y
+# CONFIG_SOUND_AD1816 is not set
+CONFIG_SOUND_AD1889=m
+# CONFIG_SOUND_SGALAXY is not set
+# CONFIG_SOUND_ADLIB is not set
+# CONFIG_SOUND_ACI_MIXER is not set
+# CONFIG_SOUND_CS4232 is not set
+# CONFIG_SOUND_SSCAPE is not set
+# CONFIG_SOUND_GUS is not set
+CONFIG_SOUND_VMIDI=m
+# CONFIG_SOUND_TRIX is not set
+# CONFIG_SOUND_MSS is not set
+# CONFIG_SOUND_MPU401 is not set
+# CONFIG_SOUND_NM256 is not set
+# CONFIG_SOUND_MAD16 is not set
+# CONFIG_SOUND_PAS is not set
+# CONFIG_PAS_JOYSTICK is not set
+# CONFIG_SOUND_PSS is not set
+# CONFIG_SOUND_SB is not set
+# CONFIG_SOUND_AWE32_SYNTH is not set
+# CONFIG_SOUND_KAHLUA is not set
+# CONFIG_SOUND_WAVEFRONT is not set
+# CONFIG_SOUND_MAUI is not set
+# CONFIG_SOUND_YM3812 is not set
+# CONFIG_SOUND_OPL3SA1 is not set
+# CONFIG_SOUND_OPL3SA2 is not set
+CONFIG_SOUND_YMFPCI=m
+CONFIG_SOUND_YMFPCI_LEGACY=y
+# CONFIG_SOUND_UART6850 is not set
+# CONFIG_SOUND_AEDSP16 is not set
+# CONFIG_SOUND_TVMIXER is not set
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_UHCI=m
+CONFIG_USB_UHCI_ALT=m
+CONFIG_USB_OHCI=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+# CONFIG_USB_EMI26 is not set
+
+#
+#   USB Bluetooth can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+CONFIG_USB_STORAGE_HP8200e=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+CONFIG_USB_HIDDEV=y
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+CONFIG_USB_AIPTEK=m
+CONFIG_USB_WACOM=m
+CONFIG_USB_KBTAB=m
+CONFIG_USB_POWERMATE=m
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_DC2XX is not set
+CONFIG_USB_MDC800=m
+CONFIG_USB_SCANNER=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_HPUSBSCSI=m
+
+#
+# USB Multimedia devices
+#
+
+#
+#   Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network adaptors
+#
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_CATC=m
+# CONFIG_USB_AX8817X is not set
+CONFIG_USB_CDCETHER=m
+CONFIG_USB_USBNET=m
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+CONFIG_USB_SERIAL=m
+# CONFIG_USB_SERIAL_DEBUG is not set
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SERIAL_OMNINET=m
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_RIO500=m
+CONFIG_USB_AUERSWALD=m
+CONFIG_USB_TIGL=m
+CONFIG_USB_BRLVGER=m
+CONFIG_USB_LCD=m
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_TEST=m
+
+#
+# Library routines
+#
+CONFIG_CRC32=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_QSORT=y
+
+#
+# Bluetooth support
+#
+CONFIG_BLUEZ=m
+CONFIG_BLUEZ_L2CAP=m
+CONFIG_BLUEZ_SCO=m
+CONFIG_BLUEZ_RFCOMM=m
+CONFIG_BLUEZ_RFCOMM_TTY=y
+CONFIG_BLUEZ_BNEP=m
+CONFIG_BLUEZ_BNEP_MC_FILTER=y
+CONFIG_BLUEZ_BNEP_PROTO_FILTER=y
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BLUEZ_HCIUSB=m
+CONFIG_BLUEZ_USB_SCO=y
+CONFIG_BLUEZ_USB_ZERO_PACKET=y
+CONFIG_BLUEZ_HCIUART=m
+CONFIG_BLUEZ_HCIUART_H4=y
+CONFIG_BLUEZ_HCIUART_BCSP=y
+CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y
+# CONFIG_BLUEZ_HCIDTL1 is not set
+# CONFIG_BLUEZ_HCIBT3C is not set
+# CONFIG_BLUEZ_HCIBLUECARD is not set
+# CONFIG_BLUEZ_HCIBTUART is not set
+CONFIG_BLUEZ_HCIVHCI=m
+
+#
+# Simulated drivers
+#
+# CONFIG_HP_SIMETH is not set
+# CONFIG_HP_SIMSERIAL is not set
+# CONFIG_HP_SIMSCSI is not set
+
+#
+# Additional device driver support
+#
+CONFIG_NET_BROADCOM=m
+CONFIG_CIPE=m
+# CONFIG_CRYPTO_AEP is not set
+CONFIG_CRYPTO_BROADCOM=m
+# CONFIG_MEGARAC is not set
+CONFIG_FC_QLA2100=m
+CONFIG_FC_QLA2200=m
+CONFIG_FC_QLA2300=m
+CONFIG_SCSI_ISCSI=m
+# CONFIG_SCSI_IPR is not set
+CONFIG_SCSI_LPFC=m
+
+#
+# Kernel hacking
+#
+CONFIG_IA64_GRANULE_16MB=y
+# CONFIG_IA64_GRANULE_64MB is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_IA64_PRINT_HAZARDS=y
+# CONFIG_DISABLE_VHPT is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_IA64_EARLY_PRINTK is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_KALLSYMS=y
diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config

new file mode 100644 (file)

index 0000000..349bca7
--- /dev/null
+++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config
@@ -0,0 +1,2845 @@
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_GENERIC_ISA_DMA=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+# CONFIG_STANDALONE is not set
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_HOTPLUG=y
+CONFIG_EVLOG=y
+# CONFIG_EVLOG_FWPRINTK is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_EMBEDDED is not set
+
+#
+# Class Based Kernel Resource Management
+#
+CONFIG_CKRM=y
+CONFIG_RCFS_FS=m
+CONFIG_CKRM_TYPE_TASKCLASS=y
+CONFIG_CKRM_RES_NUMTASKS=m
+CONFIG_CKRM_TYPE_SOCKETCLASS=y
+CONFIG_CKRM_RBCE=m
+CONFIG_CKRM_CRBCE=m
+CONFIG_DELAY_ACCT=y
+CONFIG_KALLSYMS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+# CONFIG_X86_PC is not set
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+CONFIG_X86_GENERICARCH=y
+# CONFIG_X86_ES7000 is not set
+CONFIG_X86_CYCLONE_TIMER=y
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+# CONFIG_M686 is not set
+CONFIG_MPENTIUMII=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+CONFIG_X86_GENERIC=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_HPET_EMULATE_RTC is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=128
+CONFIG_SCHED_SMT=y
+# CONFIG_PREEMPT is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_TSC=y
+CONFIG_X86_MCE=y
+# CONFIG_X86_MCE_NONFATAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+CONFIG_TOSHIBA=m
+CONFIG_I8K=m
+CONFIG_MICROCODE=m
+CONFIG_X86_MSR=m
+CONFIG_X86_CPUID=m
+
+#
+# Firmware Drivers
+#
+CONFIG_EDD=m
+# CONFIG_NOHIGHMEM is not set
+# CONFIG_HIGHMEM4G is not set
+CONFIG_HIGHMEM64G=y
+CONFIG_HIGHMEM=y
+CONFIG_X86_PAE=y
+# CONFIG_NUMA is not set
+CONFIG_HIGHPTE=y
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MTRR=y
+CONFIG_EFI=y
+CONFIG_IRQBALANCE=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_BOOT_IOREMAP=y
+CONFIG_REGPARM=y
+
+#
+# Special options
+#
+CONFIG_PROC_MM=y
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+# CONFIG_SOFTWARE_SUSPEND is not set
+# CONFIG_PM_DISK is not set
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SLEEP_PROC_FS=y
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+# CONFIG_ACPI_ASUS is not set
+CONFIG_ACPI_TOSHIBA=m
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_X86_PM_TIMER=y
+CONFIG_ACPI_INITRD=y
+
+#
+# APM (Advanced Power Management) BIOS Support
+#
+CONFIG_APM=y
+# CONFIG_APM_IGNORE_USER_SUSPEND is not set
+CONFIG_APM_DO_ENABLE=y
+# CONFIG_APM_CPU_IDLE is not set
+CONFIG_APM_DISPLAY_BLANK=y
+# CONFIG_APM_RTC_IS_GMT is not set
+CONFIG_APM_ALLOW_INTS=y
+# CONFIG_APM_REAL_MODE_POWER_OFF is not set
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_PROC_INTF=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_GOV_USERSPACE=m
+CONFIG_CPU_FREQ_GOV_ONDEMAND=m
+# CONFIG_CPU_FREQ_24_API is not set
+CONFIG_CPU_FREQ_TABLE=m
+
+#
+# CPUFreq processor drivers
+#
+CONFIG_X86_ACPI_CPUFREQ=m
+# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
+CONFIG_X86_POWERNOW_K6=m
+CONFIG_X86_POWERNOW_K7=m
+CONFIG_X86_POWERNOW_K8=m
+CONFIG_X86_POWERNOW_K8_ACPI=y
+CONFIG_X86_GX_SUSPMOD=m
+CONFIG_X86_SPEEDSTEP_CENTRINO=m
+CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y
+# CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI is not set
+CONFIG_X86_SPEEDSTEP_ICH=m
+CONFIG_X86_SPEEDSTEP_SMI=m
+CONFIG_X86_P4_CLOCKMOD=m
+CONFIG_X86_SPEEDSTEP_LIB=m
+CONFIG_X86_LONGRUN=m
+CONFIG_X86_LONGHAUL=m
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+# CONFIG_PCI_USE_VECTOR is not set
+# CONFIG_PCI_LEGACY_PROC is not set
+# CONFIG_PCI_NAMES is not set
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+CONFIG_SCx200=m
+
+#
+# PCMCIA/CardBus support
+#
+CONFIG_PCMCIA=m
+# CONFIG_PCMCIA_DEBUG is not set
+CONFIG_YENTA=m
+CONFIG_CARDBUS=y
+CONFIG_I82092=m
+CONFIG_I82365=m
+CONFIG_TCIC=m
+CONFIG_PCMCIA_PROBE=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI_FAKE=m
+CONFIG_HOTPLUG_PCI_COMPAQ=m
+CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y
+CONFIG_HOTPLUG_PCI_IBM=m
+CONFIG_HOTPLUG_PCI_AMD=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_HOTPLUG_PCI_CPCI=y
+CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
+CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
+CONFIG_HOTPLUG_PCI_PCIE=m
+# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_AOUT=m
+CONFIG_BINFMT_MISC=m
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_FW_LOADER=m
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+CONFIG_MTD=m
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_PARTITIONS=m
+CONFIG_MTD_CONCAT=m
+CONFIG_MTD_REDBOOT_PARTS=m
+CONFIG_MTD_CMDLINE_PARTS=m
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=m
+CONFIG_MTD_BLOCK=m
+# CONFIG_MTD_BLOCK_RO is not set
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=m
+CONFIG_MTD_JEDECPROBE=m
+CONFIG_MTD_GEN_PROBE=m
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_NOSWAP=y
+# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_GEOMETRY is not set
+CONFIG_MTD_CFI_INTELEXT=m
+CONFIG_MTD_CFI_AMDSTD=m
+CONFIG_MTD_CFI_STAA=m
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+CONFIG_MTD_ABSENT=m
+CONFIG_MTD_OBSOLETE_CHIPS=y
+CONFIG_MTD_AMDSTD=m
+CONFIG_MTD_SHARP=m
+CONFIG_MTD_JEDEC=m
+
+#
+# Mapping drivers for chip access
+#
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=m
+CONFIG_MTD_PHYSMAP_START=0x8000000
+CONFIG_MTD_PHYSMAP_LEN=0x4000000
+CONFIG_MTD_PHYSMAP_BUSWIDTH=2
+CONFIG_MTD_PNC2000=m
+CONFIG_MTD_SC520CDP=m
+CONFIG_MTD_NETSC520=m
+CONFIG_MTD_SBC_GXX=m
+CONFIG_MTD_ELAN_104NC=m
+CONFIG_MTD_OCTAGON=m
+CONFIG_MTD_VMAX=m
+CONFIG_MTD_SCx200_DOCFLASH=m
+CONFIG_MTD_AMD76XROM=m
+CONFIG_MTD_ICH2ROM=m
+CONFIG_MTD_SCB2_FLASH=m
+CONFIG_MTD_NETtel=m
+CONFIG_MTD_DILNETPC=m
+CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000
+CONFIG_MTD_L440GX=m
+CONFIG_MTD_PCI=m
+
+#
+# Self-contained MTD device drivers
+#
+CONFIG_MTD_PMC551=m
+CONFIG_MTD_PMC551_BUGFIX=y
+# CONFIG_MTD_PMC551_DEBUG is not set
+CONFIG_MTD_SLRAM=m
+CONFIG_MTD_MTDRAM=m
+CONFIG_MTDRAM_TOTAL_SIZE=4096
+CONFIG_MTDRAM_ERASE_SIZE=128
+CONFIG_MTD_BLKMTD=m
+
+#
+# Disk-On-Chip Device Drivers
+#
+CONFIG_MTD_DOC2000=m
+CONFIG_MTD_DOC2001=m
+CONFIG_MTD_DOC2001PLUS=m
+CONFIG_MTD_DOCPROBE=m
+CONFIG_MTD_DOCPROBE_ADVANCED=y
+CONFIG_MTD_DOCPROBE_ADDRESS=0x0000
+CONFIG_MTD_DOCPROBE_HIGH=y
+CONFIG_MTD_DOCPROBE_55AA=y
+
+#
+# NAND Flash Device Drivers
+#
+CONFIG_MTD_NAND=m
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NAND_IDS=m
+
+#
+# Parallel port support
+#
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_PC_CML1=m
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_PARPORT_PC_SUPERIO=y
+CONFIG_PARPORT_PC_PCMCIA=m
+CONFIG_PARPORT_OTHER=y
+CONFIG_PARPORT_1284=y
+
+#
+# Plug and Play support
+#
+CONFIG_PNP=y
+# CONFIG_PNP_DEBUG is not set
+
+#
+# Protocols
+#
+CONFIG_ISAPNP=y
+CONFIG_PNPBIOS=y
+CONFIG_PNPBIOS_PROC_FS=y
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_XD=m
+CONFIG_PARIDE=m
+CONFIG_PARIDE_PARPORT=m
+
+#
+# Parallel IDE high-level drivers
+#
+CONFIG_PARIDE_PD=m
+CONFIG_PARIDE_PCD=m
+CONFIG_PARIDE_PF=m
+CONFIG_PARIDE_PT=m
+CONFIG_PARIDE_PG=m
+
+#
+# Parallel IDE protocol modules
+#
+CONFIG_PARIDE_ATEN=m
+CONFIG_PARIDE_BPCK=m
+CONFIG_PARIDE_BPCK6=m
+CONFIG_PARIDE_COMM=m
+CONFIG_PARIDE_DSTR=m
+CONFIG_PARIDE_FIT2=m
+CONFIG_PARIDE_FIT3=m
+CONFIG_PARIDE_EPAT=m
+CONFIG_PARIDE_EPATC8=y
+CONFIG_PARIDE_EPIA=m
+CONFIG_PARIDE_FRIQ=m
+CONFIG_PARIDE_FRPW=m
+CONFIG_PARIDE_KBIC=m
+CONFIG_PARIDE_KTTI=m
+CONFIG_PARIDE_ON20=m
+CONFIG_PARIDE_ON26=m
+CONFIG_BLK_CPQ_DA=m
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_CISS_SCSI_TAPE=y
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_CARMEL=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=64000
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_LBD=y
+CONFIG_CIPHER_TWOFISH=m
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_IDEDISK_STROKE=y
+CONFIG_BLK_DEV_IDECS=m
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_IDETAPE=m
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+# CONFIG_IDE_TASKFILE_IO is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_CMD640=y
+CONFIG_BLK_DEV_CMD640_ENHANCED=y
+CONFIG_BLK_DEV_IDEPNP=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_OFFBOARD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_OPTI621=y
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+CONFIG_IDEDMA_ONLYDISK=y
+CONFIG_BLK_DEV_ADMA=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+# CONFIG_WDC_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_ATIIXP=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5520=m
+CONFIG_BLK_DEV_CS5530=m
+CONFIG_BLK_DEV_HPT34X=y
+CONFIG_HPT34X_AUTODMA=y
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_SC1200=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_NS87415=y
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+CONFIG_BLK_DEV_TRM290=y
+CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_IDE_CHIPSETS=y
+
+#
+# Note: most of these also require special kernel boot parameters
+#
+CONFIG_BLK_DEV_4DRIVES=y
+CONFIG_BLK_DEV_ALI14XX=y
+CONFIG_BLK_DEV_DTC2278=y
+CONFIG_BLK_DEV_HT6560B=y
+# CONFIG_BLK_DEV_PDC4030 is not set
+CONFIG_BLK_DEV_QD65XX=y
+CONFIG_BLK_DEV_UMC8672=y
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=m
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+CONFIG_CHR_DEV_SCH=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+CONFIG_SCSI_7000FASST=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_AHA152X=m
+CONFIG_SCSI_AHA1542=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=5000
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+CONFIG_SCSI_AIC7XXX_OLD=m
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_AIC79XX_NEW is not set
+CONFIG_SCSI_DPT_I2O=m
+CONFIG_SCSI_ADVANSYS=m
+CONFIG_SCSI_IN2000=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_LEGACY=m
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_SVW=m
+CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIS=m
+CONFIG_SCSI_SATA_VIA=m
+CONFIG_SCSI_SATA_VITESSE=m
+CONFIG_SCSI_BUSLOGIC=m
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_CPQFCTS is not set
+CONFIG_SCSI_DMX3191D=m
+CONFIG_SCSI_DTC3280=m
+CONFIG_SCSI_EATA=m
+CONFIG_SCSI_EATA_TAGGED_QUEUE=y
+CONFIG_SCSI_EATA_LINKED_COMMANDS=y
+CONFIG_SCSI_EATA_MAX_TAGS=16
+CONFIG_SCSI_EATA_PIO=m
+CONFIG_SCSI_FUTURE_DOMAIN=m
+CONFIG_SCSI_GDTH=m
+CONFIG_SCSI_GENERIC_NCR5380=m
+CONFIG_SCSI_GENERIC_NCR5380_MMIO=m
+CONFIG_SCSI_GENERIC_NCR53C400=y
+CONFIG_SCSI_IPS=m
+CONFIG_SCSI_INIA100=m
+CONFIG_SCSI_PPA=m
+CONFIG_SCSI_IMM=m
+# CONFIG_SCSI_IZIP_EPP16 is not set
+# CONFIG_SCSI_IZIP_SLOW_CTR is not set
+CONFIG_SCSI_NCR53C406A=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_IPR=m
+CONFIG_SCSI_IPR_TRACE=y
+CONFIG_SCSI_IPR_DUMP=y
+CONFIG_SCSI_PAS16=m
+CONFIG_SCSI_PSI240I=m
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_QLOGIC_ISP=m
+CONFIG_SCSI_QLOGIC_FC=m
+CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
+CONFIG_SCSI_QLOGIC_1280=m
+CONFIG_SCSI_QLA2XXX=m
+CONFIG_SCSI_QLA21XX=m
+CONFIG_SCSI_QLA22XX=m
+CONFIG_SCSI_QLA2300=m
+CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA6312=m
+CONFIG_SCSI_QLA6322=m
+CONFIG_SCSI_QLA2XXX_FAILOVER=y
+CONFIG_SCSI_QLA4XXX=m
+CONFIG_SCSI_QLA4XXX_FAILOVER=y
+CONFIG_SCSI_SYM53C416=m
+CONFIG_SCSI_DC395x=m
+CONFIG_SCSI_DC390T=m
+CONFIG_SCSI_T128=m
+CONFIG_SCSI_U14_34F=m
+CONFIG_SCSI_U14_34F_TAGGED_QUEUE=y
+CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y
+CONFIG_SCSI_U14_34F_MAX_TAGS=8
+CONFIG_SCSI_ULTRASTOR=m
+CONFIG_SCSI_NSP32=m
+CONFIG_SCSI_DEBUG=m
+
+#
+# PCMCIA SCSI adapter support
+#
+CONFIG_PCMCIA_AHA152X=m
+CONFIG_PCMCIA_FDOMAIN=m
+CONFIG_PCMCIA_NINJA_SCSI=m
+CONFIG_PCMCIA_QLOGIC=m
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+CONFIG_CD_NO_IDESCSI=y
+CONFIG_AZTCD=m
+CONFIG_GSCD=m
+CONFIG_MCD=m
+CONFIG_MCD_IRQ=11
+CONFIG_MCD_BASE=0x300
+CONFIG_OPTCD=m
+CONFIG_SJCD=m
+CONFIG_ISP16_CDI=m
+CONFIG_CDU535=m
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_FLAKEY=m
+CONFIG_BLK_DEV_DM_BBR=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=m
+CONFIG_FUSION_MAX_SGE=40
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+
+#
+# IEEE 1394 (FireWire) support
+#
+CONFIG_IEEE1394=m
+
+#
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
+CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
+
+#
+# Device Drivers
+#
+CONFIG_IEEE1394_PCILYNX=m
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_CONFIG=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=m
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=m
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_NAT=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_TOS=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_ACCEPT_QUEUES is not set
+
+#
+# IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=12
+
+#
+# IPVS transport protocol load balancing support
+#
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_NDISC_NEW=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+
+#
+# MOBILE IPv6 (EXPERIMENTAL)
+#
+CONFIG_IPV6_MOBILITY=m
+CONFIG_IPV6_MOBILITY_MN=m
+CONFIG_IPV6_MOBILITY_HA=m
+# CONFIG_IPV6_MOBILITY_DEBUG is not set
+CONFIG_DECNET=m
+CONFIG_DECNET_SIOCGIFCONF=y
+# CONFIG_DECNET_ROUTER is not set
+CONFIG_BRIDGE=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_POLICY=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_MATCH_PHYSDEV=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+# CONFIG_IP_NF_NAT_LOCAL is not set
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_CLASSIFY=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_COMPAT_IPCHAINS=m
+CONFIG_IP_NF_COMPAT_IPFWADM=m
+CONFIG_IP_NF_CONNTRACK_MARK=y
+CONFIG_IP_NF_TARGET_CONNMARK=m
+CONFIG_IP_NF_MATCH_CONNMARK=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+
+#
+# IPv6: Netfilter Configuration
+#
+CONFIG_IP6_NF_FTP=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_CONNTRACK=m
+CONFIG_IP6_NF_MATCH_STATE=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+
+#
+# DECnet: Netfilter Configuration
+#
+CONFIG_DECNET_NF_GRABULATOR=m
+
+#
+# Bridge: Netfilter Configuration
+#
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=m
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SCTP_HMAC_MD5=y
+CONFIG_ATM=y
+CONFIG_ATM_CLIP=y
+CONFIG_ATM_CLIP_NO_ICMP=y
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_VLAN_8021Q=m
+CONFIG_LLC=y
+CONFIG_LLC2=m
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=y
+CONFIG_LTPC=m
+CONFIG_COPS=m
+CONFIG_COPS_DAYNA=y
+CONFIG_COPS_TANGENT=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_X25=m
+CONFIG_LAPB=m
+# CONFIG_NET_DIVERT is not set
+CONFIG_ECONET=m
+# CONFIG_ECONET_AUNUDP is not set
+# CONFIG_ECONET_NATIVE is not set
+CONFIG_WAN_ROUTER=m
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_CSZ=m
+CONFIG_NET_SCH_ATM=y
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_DELAY=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+CONFIG_NET_PKTGEN=m
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+CONFIG_ARCNET=m
+CONFIG_ARCNET_1201=m
+CONFIG_ARCNET_1051=m
+CONFIG_ARCNET_RAW=m
+CONFIG_ARCNET_COM90xx=m
+CONFIG_ARCNET_COM90xxIO=m
+CONFIG_ARCNET_RIM_I=m
+CONFIG_ARCNET_COM20020=m
+CONFIG_ARCNET_COM20020_ISA=m
+CONFIG_ARCNET_COM20020_PCI=m
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+CONFIG_NET_SB1000=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNGEM=m
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_EL1=m
+CONFIG_EL2=m
+CONFIG_ELPLUS=m
+CONFIG_EL16=m
+CONFIG_EL3=m
+CONFIG_3C515=m
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+CONFIG_LANCE=m
+CONFIG_NET_VENDOR_SMC=y
+CONFIG_WD80x3=m
+CONFIG_ULTRA=m
+CONFIG_SMC9194=m
+CONFIG_NET_VENDOR_RACAL=y
+CONFIG_NI52=m
+CONFIG_NI65=m
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+CONFIG_DE2104X=m
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+CONFIG_TULIP_NAPI=y
+CONFIG_TULIP_NAPI_HW_MITIGATION=y
+CONFIG_DE4X5=m
+CONFIG_WINBOND_840=m
+CONFIG_DM9102=m
+CONFIG_PCMCIA_XIRCOM=m
+CONFIG_AT1700=m
+CONFIG_DEPCA=m
+CONFIG_HP100=m
+CONFIG_NET_ISA=y
+CONFIG_E2100=m
+CONFIG_EWRK3=m
+CONFIG_EEXPRESS=m
+CONFIG_EEXPRESS_PRO=m
+CONFIG_HPLAN_PLUS=m
+CONFIG_HPLAN=m
+CONFIG_LP486E=m
+CONFIG_ETH16I=m
+CONFIG_NE2000=m
+CONFIG_ZNET=m
+CONFIG_SEEQ8005=m
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+CONFIG_AMD8111_ETH=m
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_ADAPTEC_STARFIRE_NAPI=y
+CONFIG_AC3200=m
+CONFIG_APRICOT=m
+CONFIG_B44=m
+CONFIG_FORCEDETH=m
+CONFIG_CS89x0=m
+CONFIG_DGRS=m
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+CONFIG_E100_NAPI=y
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_8139_RXBUF_IDX=2
+CONFIG_SIS900=m
+CONFIG_EPIC100=m
+CONFIG_SUNDANCE=m
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_TLAN=m
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+CONFIG_NET_POCKET=y
+CONFIG_ATP=m
+CONFIG_DE600=m
+CONFIG_DE620=m
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_DL2K=m
+CONFIG_E1000=m
+CONFIG_E1000_NAPI=y
+CONFIG_E1000_NEW=m
+CONFIG_E1000_NEW_NAPI=y
+CONFIG_NS83820=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_R8169=m
+CONFIG_SIS190=m
+CONFIG_SK98LIN=m
+CONFIG_TIGON3=m
+CONFIG_NET_BROADCOM=m
+CONFIG_NET_BROADCOM_NEW=m
+CONFIG_NET_BCM44=m
+CONFIG_TIGON3_NEW=m
+
+#
+# Ethernet (10000 Mbit)
+#
+CONFIG_IXGB=m
+CONFIG_IXGB_NAPI=y
+CONFIG_S2IO=m
+CONFIG_S2IO_NAPI=y
+CONFIG_FDDI=y
+# CONFIG_DEFXX is not set
+CONFIG_SKFP=m
+CONFIG_HIPPI=y
+CONFIG_ROADRUNNER=m
+CONFIG_ROADRUNNER_LARGE_RINGS=y
+CONFIG_PLIP=m
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPPOATM=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_SLIP_MODE_SLIP6=y
+
+#
+# Wireless LAN (non-hamradio)
+#
+CONFIG_NET_RADIO=y
+
+#
+# Obsolete Wireless cards support (pre-802.11)
+#
+CONFIG_STRIP=m
+# CONFIG_ARLAN is not set
+CONFIG_WAVELAN=m
+CONFIG_PCMCIA_WAVELAN=m
+CONFIG_PCMCIA_NETWAVE=m
+
+#
+# Wireless 802.11 Frequency Hopping cards support
+#
+CONFIG_PCMCIA_RAYCS=m
+
+#
+# Wireless 802.11b ISA/PCI cards support
+#
+CONFIG_AIRO=m
+CONFIG_HERMES=m
+CONFIG_PLX_HERMES=m
+CONFIG_TMD_HERMES=m
+CONFIG_PCI_HERMES=m
+CONFIG_ATMEL=m
+CONFIG_PCI_ATMEL=m
+
+#
+# Wireless 802.11b Pcmcia/Cardbus cards support
+#
+CONFIG_PCMCIA_HERMES=m
+CONFIG_AIRO_CS=m
+CONFIG_PCMCIA_ATMEL=m
+CONFIG_PCMCIA_WL3501=m
+
+#
+# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support
+#
+CONFIG_PRISM54=m
+CONFIG_NET_WIRELESS=y
+
+#
+# Token Ring devices
+#
+CONFIG_TR=y
+CONFIG_IBMTR=m
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+CONFIG_TMS380TR=m
+CONFIG_TMSPCI=m
+CONFIG_SKISA=m
+CONFIG_PROTEON=m
+CONFIG_ABYSS=m
+CONFIG_SMCTR=m
+CONFIG_NET_FC=y
+CONFIG_NET_LPFC=m
+CONFIG_RCPCI=m
+CONFIG_SHAPER=m
+CONFIG_NETCONSOLE=m
+
+#
+# Wan interfaces
+#
+CONFIG_WAN=y
+CONFIG_HOSTESS_SV11=m
+# CONFIG_COSA is not set
+CONFIG_DSCC4=m
+CONFIG_DSCC4_PCISYNC=y
+CONFIG_DSCC4_PCI_RST=y
+CONFIG_LANMEDIA=m
+CONFIG_SEALEVEL_4021=m
+CONFIG_SYNCLINK_SYNCPPP=m
+CONFIG_HDLC=m
+CONFIG_HDLC_RAW=y
+CONFIG_HDLC_RAW_ETH=y
+CONFIG_HDLC_CISCO=y
+CONFIG_HDLC_FR=y
+CONFIG_HDLC_PPP=y
+CONFIG_HDLC_X25=y
+CONFIG_PCI200SYN=m
+CONFIG_WANXL=m
+# CONFIG_WANXL_BUILD_FIRMWARE is not set
+CONFIG_PC300=m
+CONFIG_PC300_MLPPP=y
+CONFIG_N2=m
+CONFIG_C101=m
+CONFIG_FARSYNC=m
+CONFIG_DLCI=m
+CONFIG_DLCI_COUNT=24
+CONFIG_DLCI_MAX=8
+CONFIG_SDLA=m
+# CONFIG_WAN_ROUTER_DRIVERS is not set
+CONFIG_LAPBETHER=m
+CONFIG_X25_ASY=m
+# CONFIG_SBNI is not set
+
+#
+# PCMCIA network device support
+#
+CONFIG_NET_PCMCIA=y
+CONFIG_PCMCIA_3C589=m
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_FMVJ18X=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_PCMCIA_SMC91C92=m
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_PCMCIA_AXNET=m
+CONFIG_ARCNET_COM20020_CS=m
+CONFIG_PCMCIA_IBMTR=m
+
+#
+# ATM drivers
+#
+CONFIG_ATM_TCP=m
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_ENI=m
+# CONFIG_ATM_ENI_DEBUG is not set
+# CONFIG_ATM_ENI_TUNE_BURST is not set
+CONFIG_ATM_FIRESTREAM=m
+CONFIG_ATM_ZATM=m
+# CONFIG_ATM_ZATM_DEBUG is not set
+CONFIG_ATM_NICSTAR=m
+CONFIG_ATM_NICSTAR_USE_SUNI=y
+CONFIG_ATM_NICSTAR_USE_IDT77105=y
+CONFIG_ATM_IDT77252=m
+# CONFIG_ATM_IDT77252_DEBUG is not set
+CONFIG_ATM_IDT77252_RCV_ALL=y
+CONFIG_ATM_IDT77252_USE_SUNI=y
+CONFIG_ATM_AMBASSADOR=m
+# CONFIG_ATM_AMBASSADOR_DEBUG is not set
+CONFIG_ATM_HORIZON=m
+# CONFIG_ATM_HORIZON_DEBUG is not set
+CONFIG_ATM_IA=m
+# CONFIG_ATM_IA_DEBUG is not set
+CONFIG_ATM_FORE200E_MAYBE=m
+CONFIG_ATM_FORE200E_PCA=y
+CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y
+CONFIG_ATM_FORE200E_TX_RETRY=16
+CONFIG_ATM_FORE200E_DEBUG=0
+CONFIG_ATM_FORE200E=m
+CONFIG_ATM_HE=m
+CONFIG_ATM_HE_USE_SUNI=y
+
+#
+# Amateur Radio support
+#
+CONFIG_HAMRADIO=y
+
+#
+# Packet Radio protocols
+#
+CONFIG_AX25=m
+CONFIG_AX25_DAMA_SLAVE=y
+CONFIG_NETROM=m
+CONFIG_ROSE=m
+
+#
+# AX.25 network device drivers
+#
+CONFIG_BPQETHER=m
+CONFIG_SCC=m
+CONFIG_SCC_DELAY=y
+CONFIG_SCC_TRXECHO=y
+CONFIG_BAYCOM_SER_FDX=m
+CONFIG_BAYCOM_SER_HDX=m
+CONFIG_BAYCOM_PAR=m
+CONFIG_BAYCOM_EPP=m
+CONFIG_YAM=m
+
+#
+# IrDA (infrared) support
+#
+CONFIG_IRDA=m
+
+#
+# IrDA protocols
+#
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+CONFIG_IRCOMM=m
+CONFIG_IRDA_ULTRA=y
+
+#
+# IrDA options
+#
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+# CONFIG_IRDA_FAST_RR is not set
+# CONFIG_IRDA_DEBUG is not set
+
+#
+# Infrared-port device drivers
+#
+
+#
+# SIR device drivers
+#
+CONFIG_IRTTY_SIR=m
+
+#
+# Dongle support
+#
+CONFIG_DONGLE=y
+CONFIG_ESI_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_LITELINK_DONGLE=m
+CONFIG_MA600_DONGLE=m
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_MCP2120_DONGLE=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_ACT200L_DONGLE=m
+
+#
+# Old SIR device drivers
+#
+
+#
+# Old Serial dongle support
+#
+
+#
+# FIR device drivers
+#
+CONFIG_USB_IRDA=m
+CONFIG_SIGMATEL_FIR=m
+CONFIG_NSC_FIR=m
+CONFIG_WINBOND_FIR=m
+CONFIG_TOSHIBA_FIR=m
+CONFIG_SMC_IRCC_FIR=m
+CONFIG_ALI_FIR=m
+CONFIG_VLSI_FIR=m
+CONFIG_VIA_FIR=m
+
+#
+# Bluetooth support
+#
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+CONFIG_BT_SCO=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_HCIUSB=m
+CONFIG_BT_HCIUSB_SCO=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIBTUART=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_NETPOLL=y
+CONFIG_NETPOLL_RX=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
+
+#
+# ISDN subsystem
+#
+CONFIG_ISDN=m
+
+#
+# Old ISDN4Linux
+#
+CONFIG_ISDN_I4L=m
+CONFIG_ISDN_PPP=y
+CONFIG_ISDN_PPP_VJ=y
+CONFIG_ISDN_MPP=y
+CONFIG_IPPP_FILTER=y
+CONFIG_ISDN_PPP_BSDCOMP=m
+CONFIG_ISDN_AUDIO=y
+CONFIG_ISDN_TTY_FAX=y
+CONFIG_ISDN_X25=y
+
+#
+# ISDN feature submodules
+#
+
+#
+# ISDN4Linux hardware drivers
+#
+
+#
+# Passive cards
+#
+CONFIG_ISDN_DRV_HISAX=m
+
+#
+# D-channel protocol features
+#
+CONFIG_HISAX_EURO=y
+CONFIG_DE_AOC=y
+# CONFIG_HISAX_NO_SENDCOMPLETE is not set
+# CONFIG_HISAX_NO_LLC is not set
+# CONFIG_HISAX_NO_KEYPAD is not set
+CONFIG_HISAX_1TR6=y
+CONFIG_HISAX_NI1=y
+CONFIG_HISAX_MAX_CARDS=8
+
+#
+# HiSax supported cards
+#
+CONFIG_HISAX_16_0=y
+CONFIG_HISAX_16_3=y
+CONFIG_HISAX_TELESPCI=y
+CONFIG_HISAX_S0BOX=y
+CONFIG_HISAX_AVM_A1=y
+CONFIG_HISAX_FRITZPCI=y
+CONFIG_HISAX_AVM_A1_PCMCIA=y
+CONFIG_HISAX_ELSA=y
+CONFIG_HISAX_IX1MICROR2=y
+CONFIG_HISAX_DIEHLDIVA=y
+CONFIG_HISAX_ASUSCOM=y
+CONFIG_HISAX_TELEINT=y
+CONFIG_HISAX_HFCS=y
+CONFIG_HISAX_SEDLBAUER=y
+CONFIG_HISAX_SPORTSTER=y
+CONFIG_HISAX_MIC=y
+CONFIG_HISAX_NETJET=y
+CONFIG_HISAX_NETJET_U=y
+CONFIG_HISAX_NICCY=y
+CONFIG_HISAX_ISURF=y
+CONFIG_HISAX_HSTSAPHIR=y
+CONFIG_HISAX_BKM_A4T=y
+CONFIG_HISAX_SCT_QUADRO=y
+CONFIG_HISAX_GAZEL=y
+CONFIG_HISAX_HFC_PCI=y
+CONFIG_HISAX_W6692=y
+CONFIG_HISAX_HFC_SX=y
+CONFIG_HISAX_ENTERNOW_PCI=y
+CONFIG_HISAX_DEBUG=y
+
+#
+# HiSax PCMCIA card service modules
+#
+CONFIG_HISAX_SEDLBAUER_CS=m
+CONFIG_HISAX_ELSA_CS=m
+CONFIG_HISAX_AVM_A1_CS=m
+CONFIG_HISAX_TELES_CS=m
+
+#
+# HiSax sub driver modules
+#
+CONFIG_HISAX_ST5481=m
+CONFIG_HISAX_HFCUSB=m
+CONFIG_HISAX_FRITZ_PCIPNP=m
+CONFIG_HISAX_HDLC=y
+
+#
+# Active cards
+#
+CONFIG_ISDN_DRV_ICN=m
+CONFIG_ISDN_DRV_PCBIT=m
+CONFIG_ISDN_DRV_SC=m
+CONFIG_ISDN_DRV_ACT2000=m
+CONFIG_ISDN_DRV_TPAM=m
+
+#
+# CAPI subsystem
+#
+CONFIG_ISDN_CAPI=m
+CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y
+CONFIG_ISDN_CAPI_MIDDLEWARE=y
+CONFIG_ISDN_CAPI_CAPI20=m
+CONFIG_ISDN_CAPI_CAPIFS_BOOL=y
+CONFIG_ISDN_CAPI_CAPIFS=m
+CONFIG_ISDN_CAPI_CAPIDRV=m
+
+#
+# CAPI hardware drivers
+#
+
+#
+# Active AVM cards
+#
+CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_T1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
+
+#
+# Active Eicon DIVA Server cards
+#
+CONFIG_CAPI_EICON=y
+CONFIG_ISDN_DIVAS=m
+CONFIG_ISDN_DIVAS_BRIPCI=y
+CONFIG_ISDN_DIVAS_PRIPCI=y
+CONFIG_ISDN_DIVAS_DIVACAPI=m
+CONFIG_ISDN_DIVAS_USERIDI=m
+CONFIG_ISDN_DIVAS_MAINT=m
+
+#
+# Telephony Support
+#
+CONFIG_PHONE=m
+CONFIG_PHONE_IXJ=m
+CONFIG_PHONE_IXJ_PCMCIA=m
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_TSDEV=m
+CONFIG_INPUT_TSDEV_SCREEN_X=240
+CONFIG_INPUT_TSDEV_SCREEN_Y=320
+CONFIG_INPUT_EVDEV=m
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+CONFIG_GAMEPORT=m
+CONFIG_SOUND_GAMEPORT=m
+CONFIG_GAMEPORT_NS558=m
+CONFIG_GAMEPORT_L4=m
+CONFIG_GAMEPORT_EMU10K1=m
+CONFIG_GAMEPORT_VORTEX=m
+CONFIG_GAMEPORT_FM801=m
+CONFIG_GAMEPORT_CS461x=m
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_CT82C710=m
+CONFIG_SERIO_PARKBD=m
+CONFIG_SERIO_PCIPS2=m
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_KEYBOARD_SUNKBD=m
+# CONFIG_KEYBOARD_LKKBD is not set
+CONFIG_KEYBOARD_XTKBD=m
+CONFIG_KEYBOARD_NEWTON=m
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_INPORT=m
+CONFIG_MOUSE_ATIXL=y
+CONFIG_MOUSE_LOGIBM=m
+CONFIG_MOUSE_PC110PAD=m
+# CONFIG_MOUSE_VSXXXAA is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_ANALOG=m
+CONFIG_JOYSTICK_A3D=m
+CONFIG_JOYSTICK_ADI=m
+CONFIG_JOYSTICK_COBRA=m
+CONFIG_JOYSTICK_GF2K=m
+CONFIG_JOYSTICK_GRIP=m
+CONFIG_JOYSTICK_GRIP_MP=m
+CONFIG_JOYSTICK_GUILLEMOT=m
+CONFIG_JOYSTICK_INTERACT=m
+CONFIG_JOYSTICK_SIDEWINDER=m
+CONFIG_JOYSTICK_TMDC=m
+CONFIG_JOYSTICK_IFORCE=m
+CONFIG_JOYSTICK_IFORCE_USB=y
+CONFIG_JOYSTICK_IFORCE_232=y
+CONFIG_JOYSTICK_WARRIOR=m
+CONFIG_JOYSTICK_MAGELLAN=m
+CONFIG_JOYSTICK_SPACEORB=m
+CONFIG_JOYSTICK_SPACEBALL=m
+CONFIG_JOYSTICK_STINGER=m
+CONFIG_JOYSTICK_TWIDDLER=m
+CONFIG_JOYSTICK_DB9=m
+CONFIG_JOYSTICK_GAMECON=m
+CONFIG_JOYSTICK_TURBOGRAFX=m
+# CONFIG_INPUT_JOYDUMP is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_GUNZE=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=y
+CONFIG_INPUT_UINPUT=m
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_ECC=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_ROCKETPORT=m
+CONFIG_SYNCLINK=m
+CONFIG_SYNCLINKMP=m
+CONFIG_N_HDLC=m
+CONFIG_STALDRV=y
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_CS=m
+# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+CONFIG_SERIAL_8250_MULTIPORT=y
+CONFIG_SERIAL_8250_RSA=y
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_ICOM is not set
+CONFIG_SERIAL_JSM=m
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_PRINTER=m
+# CONFIG_LP_CONSOLE is not set
+CONFIG_PPDEV=m
+CONFIG_TIPAR=m
+CONFIG_QIC02_TAPE=m
+CONFIG_QIC02_DYNCONF=y
+
+#
+# Setting runtime QIC-02 configuration is done with qic02conf
+#
+
+#
+# from the tpqic02-support package.  It is available at
+#
+
+#
+# metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/
+#
+
+#
+# IPMI
+#
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_PANIC_EVENT=y
+CONFIG_IPMI_PANIC_STRING=y
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_KCS=m
+CONFIG_IPMI_WATCHDOG=m
+
+#
+# Watchdog Cards
+#
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_ACQUIRE_WDT=m
+CONFIG_ADVANTECH_WDT=m
+CONFIG_ALIM1535_WDT=m
+CONFIG_ALIM7101_WDT=m
+CONFIG_AMD7XX_TCO=m
+CONFIG_SC520_WDT=m
+CONFIG_EUROTECH_WDT=m
+CONFIG_IB700_WDT=m
+CONFIG_WAFER_WDT=m
+CONFIG_I8XX_TCO=m
+CONFIG_SC1200_WDT=m
+CONFIG_SCx200_WDT=m
+CONFIG_60XX_WDT=m
+CONFIG_CPU5_WDT=m
+CONFIG_W83627HF_WDT=m
+CONFIG_W83877F_WDT=m
+CONFIG_MACHZ_WDT=m
+
+#
+# ISA-based Watchdog Cards
+#
+CONFIG_PCWATCHDOG=m
+CONFIG_MIXCOMWD=m
+CONFIG_WDT=m
+CONFIG_WDT_501=y
+
+#
+# PCI-based Watchdog Cards
+#
+CONFIG_PCIPCWATCHDOG=m
+CONFIG_WDTPCI=m
+CONFIG_WDT_501_PCI=y
+
+#
+# USB-based Watchdog Cards
+#
+CONFIG_USBPCWATCHDOG=m
+CONFIG_HW_RANDOM=m
+CONFIG_NVRAM=m
+CONFIG_RTC=y
+CONFIG_DTLK=m
+CONFIG_R3964=m
+CONFIG_APPLICOM=m
+CONFIG_SONYPI=m
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=m
+CONFIG_AGP_ALI=m
+CONFIG_AGP_ATI=m
+CONFIG_AGP_AMD=m
+CONFIG_AGP_AMD64=m
+CONFIG_AGP_INTEL=m
+CONFIG_AGP_INTEL_MCH=m
+CONFIG_AGP_NVIDIA=m
+CONFIG_AGP_SIS=m
+CONFIG_AGP_SWORKS=m
+CONFIG_AGP_VIA=m
+CONFIG_AGP_EFFICEON=m
+# CONFIG_DRM is not set
+
+#
+# PCMCIA character devices
+#
+CONFIG_SYNCLINK_CS=m
+# CONFIG_MWAVE is not set
+CONFIG_SCx200_GPIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_MAX_RAW_DEVS=4096
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_VTUNE=m
+
+#
+# Linux InfraRed Controller
+#
+CONFIG_LIRC_SUPPORT=m
+CONFIG_LIRC_MAX_DEV=2
+CONFIG_LIRC_BT829=m
+CONFIG_LIRC_IT87=m
+CONFIG_LIRC_ATIUSB=m
+CONFIG_LIRC_SERIAL=m
+# CONFIG_LIRC_HOMEBREW is not set
+CONFIG_LIRC_PORT_SERIAL=0x3f8
+CONFIG_LIRC_IRQ_SERIAL=4
+CONFIG_LIRC_SIR=m
+CONFIG_LIRC_PORT_SIR=0x3f8
+CONFIG_LIRC_IRQ_SIR=4
+
+#
+# I2C support
+#
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCF=m
+
+#
+# I2C Hardware Bus support
+#
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_I810=m
+CONFIG_I2C_ISA=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_PROSAVAGE=m
+CONFIG_I2C_SAVAGE4=m
+CONFIG_SCx200_I2C=m
+CONFIG_SCx200_I2C_SCL=12
+CONFIG_SCx200_I2C_SDA=13
+CONFIG_SCx200_ACB=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+
+#
+# Hardware Sensors Chip support
+#
+CONFIG_I2C_SENSOR=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_DS1621=m
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83627HF=m
+
+#
+# Other I2C Chip support
+#
+CONFIG_SENSORS_EEPROM=m
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+CONFIG_IBM_ASM=m
+
+#
+# Multimedia devices
+#
+CONFIG_VIDEO_DEV=m
+
+#
+# Video For Linux
+#
+
+#
+# Video Adapters
+#
+CONFIG_VIDEO_BT848=m
+CONFIG_VIDEO_PMS=m
+CONFIG_VIDEO_BWQCAM=m
+CONFIG_VIDEO_CQCAM=m
+CONFIG_VIDEO_W9966=m
+CONFIG_VIDEO_CPIA=m
+CONFIG_VIDEO_CPIA_PP=m
+CONFIG_VIDEO_CPIA_USB=m
+CONFIG_VIDEO_SAA5246A=m
+CONFIG_VIDEO_SAA5249=m
+CONFIG_TUNER_3036=m
+CONFIG_VIDEO_STRADIS=m
+CONFIG_VIDEO_ZORAN=m
+CONFIG_VIDEO_ZORAN_BUZ=m
+CONFIG_VIDEO_ZORAN_DC10=m
+CONFIG_VIDEO_ZORAN_DC30=m
+CONFIG_VIDEO_ZORAN_LML33=m
+CONFIG_VIDEO_ZORAN_LML33R10=m
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_MXB=m
+CONFIG_VIDEO_DPC=m
+CONFIG_VIDEO_HEXIUM_ORION=m
+CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_CX88=m
+
+#
+# Radio Adapters
+#
+CONFIG_RADIO_CADET=m
+CONFIG_RADIO_RTRACK=m
+CONFIG_RADIO_RTRACK2=m
+CONFIG_RADIO_AZTECH=m
+CONFIG_RADIO_GEMTEK=m
+CONFIG_RADIO_GEMTEK_PCI=m
+CONFIG_RADIO_MAXIRADIO=m
+CONFIG_RADIO_MAESTRO=m
+CONFIG_RADIO_MIROPCM20=m
+# CONFIG_RADIO_MIROPCM20_RDS is not set
+CONFIG_RADIO_SF16FMI=m
+CONFIG_RADIO_SF16FMR2=m
+CONFIG_RADIO_TERRATEC=m
+CONFIG_RADIO_TRUST=m
+CONFIG_RADIO_TYPHOON=m
+CONFIG_RADIO_TYPHOON_PROC_FS=y
+CONFIG_RADIO_ZOLTRIX=m
+
+#
+# Digital Video Broadcasting Devices
+#
+CONFIG_DVB=y
+CONFIG_DVB_CORE=m
+
+#
+# Supported Frontend Modules
+#
+CONFIG_DVB_TWINHAN_DST=m
+CONFIG_DVB_STV0299=m
+CONFIG_DVB_SP887X=m
+CONFIG_DVB_SP887X_FIRMWARE_FILE="/etc/dvb/sc_main.mc"
+CONFIG_DVB_ALPS_TDLB7=m
+CONFIG_DVB_ALPS_TDMB7=m
+CONFIG_DVB_ATMEL_AT76C651=m
+CONFIG_DVB_CX24110=m
+CONFIG_DVB_GRUNDIG_29504_491=m
+CONFIG_DVB_GRUNDIG_29504_401=m
+CONFIG_DVB_MT312=m
+CONFIG_DVB_VES1820=m
+CONFIG_DVB_VES1X93=m
+CONFIG_DVB_TDA1004X=m
+CONFIG_DVB_TDA1004X_FIRMWARE_FILE="/usr/lib/hotplug/firmware/tda1004x.bin"
+CONFIG_DVB_NXT6000=m
+
+#
+# Supported SAA7146 based PCI Adapters
+#
+CONFIG_DVB_AV7110=m
+# CONFIG_DVB_AV7110_FIRMWARE is not set
+CONFIG_DVB_AV7110_OSD=y
+CONFIG_DVB_BUDGET=m
+CONFIG_DVB_BUDGET_CI=m
+CONFIG_DVB_BUDGET_AV=m
+CONFIG_DVB_BUDGET_PATCH=m
+
+#
+# Supported USB Adapters
+#
+CONFIG_DVB_TTUSB_BUDGET=m
+CONFIG_DVB_TTUSB_DEC=m
+
+#
+# Supported FlexCopII (B2C2) Adapters
+#
+CONFIG_DVB_B2C2_SKYSTAR=m
+
+#
+# Supported BT878 Adapters
+#
+CONFIG_DVB_BT8XX=m
+CONFIG_VIDEO_SAA7146=m
+CONFIG_VIDEO_SAA7146_VV=m
+CONFIG_VIDEO_VIDEOBUF=m
+CONFIG_VIDEO_TUNER=m
+CONFIG_VIDEO_BUF=m
+CONFIG_VIDEO_BTCX=m
+CONFIG_VIDEO_IR=m
+
+#
+# Graphics support
+#
+CONFIG_FB=y
+CONFIG_FB_PM2=m
+CONFIG_FB_PM2_FIFO_DISCONNECT=y
+CONFIG_FB_CYBER2000=m
+CONFIG_FB_IMSTT=y
+CONFIG_FB_VGA16=m
+CONFIG_FB_VESA=y
+CONFIG_VIDEO_SELECT=y
+CONFIG_FB_HGA=m
+CONFIG_FB_RIVA=m
+CONFIG_FB_I810=m
+CONFIG_FB_I810_GTF=y
+# CONFIG_FB_MATROX is not set
+# CONFIG_FB_RADEON_OLD is not set
+CONFIG_FB_RADEON=m
+CONFIG_FB_RADEON_I2C=y
+# CONFIG_FB_RADEON_DEBUG is not set
+# CONFIG_FB_ATY128 is not set
+CONFIG_FB_ATY=m
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_ATY_XL_INIT=y
+CONFIG_FB_SIS=m
+CONFIG_FB_SIS_300=y
+CONFIG_FB_SIS_315=y
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_KYRO=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_VOODOO1=m
+CONFIG_FB_TRIDENT=m
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_MDA_CONSOLE=m
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_PCI_CONSOLE=y
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+
+#
+# Logo configuration
+#
+# CONFIG_LOGO is not set
+
+#
+# Bootsplash configuration
+#
+CONFIG_BOOTSPLASH=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_RTCTIMER=m
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DEBUG=y
+CONFIG_SND_DEBUG_MEMORY=y
+# CONFIG_SND_DEBUG_DETECT is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_OPL4_LIB=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+
+#
+# ISA devices
+#
+CONFIG_SND_AD1816A=m
+CONFIG_SND_AD1848=m
+CONFIG_SND_CS4231=m
+CONFIG_SND_CS4232=m
+CONFIG_SND_CS4236=m
+CONFIG_SND_ES968=m
+CONFIG_SND_ES1688=m
+CONFIG_SND_ES18XX=m
+CONFIG_SND_GUSCLASSIC=m
+CONFIG_SND_GUSEXTREME=m
+CONFIG_SND_GUSMAX=m
+CONFIG_SND_INTERWAVE=m
+CONFIG_SND_INTERWAVE_STB=m
+CONFIG_SND_OPTI92X_AD1848=m
+CONFIG_SND_OPTI92X_CS4231=m
+CONFIG_SND_OPTI93X=m
+CONFIG_SND_SB8=m
+CONFIG_SND_SB16=m
+CONFIG_SND_SBAWE=m
+CONFIG_SND_SB16_CSP=y
+CONFIG_SND_WAVEFRONT=m
+CONFIG_SND_ALS100=m
+CONFIG_SND_AZT2320=m
+CONFIG_SND_CMI8330=m
+CONFIG_SND_DT019X=m
+CONFIG_SND_OPL3SA2=m
+CONFIG_SND_SGALAXY=m
+CONFIG_SND_SSCAPE=m
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS4281=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_HDSP=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_YMFPCI=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VX222=m
+
+#
+# ALSA USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+
+#
+# PCMCIA devices
+#
+# CONFIG_SND_VXPOCKET is not set
+# CONFIG_SND_VXP440 is not set
+# CONFIG_SND_PDAUDIOCF is not set
+
+#
+# Open Sound System
+#
+CONFIG_SOUND_PRIME=m
+CONFIG_SOUND_BT878=m
+CONFIG_SOUND_CMPCI=m
+CONFIG_SOUND_CMPCI_FM=y
+CONFIG_SOUND_CMPCI_FMIO=0x388
+CONFIG_SOUND_CMPCI_MIDI=y
+CONFIG_SOUND_CMPCI_MPUIO=0x330
+CONFIG_SOUND_CMPCI_JOYSTICK=y
+CONFIG_SOUND_CMPCI_CM8738=y
+# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set
+CONFIG_SOUND_CMPCI_SPDIFLOOP=y
+CONFIG_SOUND_CMPCI_SPEAKERS=2
+CONFIG_SOUND_EMU10K1=m
+CONFIG_MIDI_EMU10K1=y
+# CONFIG_SOUND_FUSION is not set
+CONFIG_SOUND_CS4281=m
+CONFIG_SOUND_ES1370=m
+CONFIG_SOUND_ES1371=m
+CONFIG_SOUND_ESSSOLO1=m
+CONFIG_SOUND_MAESTRO=m
+CONFIG_SOUND_MAESTRO3=m
+CONFIG_SOUND_ICH=m
+CONFIG_SOUND_SONICVIBES=m
+CONFIG_SOUND_TRIDENT=m
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+CONFIG_MIDI_VIA82CXXX=y
+CONFIG_SOUND_OSS=m
+CONFIG_SOUND_TRACEINIT=y
+CONFIG_SOUND_DMAP=y
+# CONFIG_SOUND_AD1816 is not set
+CONFIG_SOUND_AD1889=m
+CONFIG_SOUND_SGALAXY=m
+CONFIG_SOUND_ADLIB=m
+CONFIG_SOUND_ACI_MIXER=m
+CONFIG_SOUND_CS4232=m
+CONFIG_SOUND_SSCAPE=m
+CONFIG_SOUND_GUS=m
+# CONFIG_SOUND_GUS16 is not set
+CONFIG_SOUND_GUSMAX=y
+CONFIG_SOUND_VMIDI=m
+CONFIG_SOUND_TRIX=m
+CONFIG_SOUND_MSS=m
+CONFIG_SOUND_MPU401=m
+CONFIG_SOUND_NM256=m
+CONFIG_SOUND_MAD16=m
+CONFIG_MAD16_OLDCARD=y
+CONFIG_SOUND_PAS=m
+CONFIG_SOUND_PSS=m
+CONFIG_PSS_MIXER=y
+# CONFIG_PSS_HAVE_BOOT is not set
+CONFIG_SOUND_SB=m
+# CONFIG_SOUND_AWE32_SYNTH is not set
+CONFIG_SOUND_WAVEFRONT=m
+CONFIG_SOUND_MAUI=m
+CONFIG_SOUND_YM3812=m
+CONFIG_SOUND_OPL3SA1=m
+CONFIG_SOUND_OPL3SA2=m
+CONFIG_SOUND_YMFPCI=m
+CONFIG_SOUND_YMFPCI_LEGACY=y
+CONFIG_SOUND_UART6850=m
+CONFIG_SOUND_AEDSP16=m
+CONFIG_SC6600=y
+CONFIG_SC6600_JOY=y
+CONFIG_SC6600_CDROM=4
+CONFIG_SC6600_CDROMBASE=0x0
+# CONFIG_AEDSP16_MSS is not set
+# CONFIG_AEDSP16_SBPRO is not set
+CONFIG_AEDSP16_MPU401=y
+CONFIG_SOUND_TVMIXER=m
+CONFIG_SOUND_KAHLUA=m
+CONFIG_SOUND_ALI5455=m
+CONFIG_SOUND_FORTE=m
+CONFIG_SOUND_RME96XX=m
+CONFIG_SOUND_AD1980=m
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_EHCI_SPLIT_ISO=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+
+#
+# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+CONFIG_USB_STORAGE_HP8200e=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+CONFIG_HID_FF=y
+CONFIG_HID_PID=y
+CONFIG_LOGITECH_FF=y
+CONFIG_THRUSTMASTER_FF=y
+CONFIG_USB_HIDDEV=y
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+CONFIG_USB_AIPTEK=m
+CONFIG_USB_WACOM=m
+CONFIG_USB_KBTAB=m
+CONFIG_USB_POWERMATE=m
+CONFIG_USB_MTOUCH=m
+CONFIG_USB_XPAD=m
+CONFIG_USB_ATI_REMOTE=m
+
+#
+# USB Imaging devices
+#
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_HPUSBSCSI=m
+
+#
+# USB Multimedia devices
+#
+CONFIG_USB_DABUSB=m
+CONFIG_USB_VICAM=m
+CONFIG_USB_DSBR=m
+CONFIG_USB_IBMCAM=m
+CONFIG_USB_KONICAWC=m
+CONFIG_USB_OV511=m
+CONFIG_USB_SE401=m
+CONFIG_USB_STV680=m
+CONFIG_USB_W9968CF=m
+
+#
+# USB Network adaptors
+#
+CONFIG_USB_CATC=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_USBNET=m
+
+#
+# USB Host-to-Host Cables
+#
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_BELKIN=y
+CONFIG_USB_GENESYS=y
+CONFIG_USB_NET1080=y
+CONFIG_USB_PL2301=y
+
+#
+# Intelligent USB Devices/Gadgets
+#
+CONFIG_USB_ARMLINUX=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_ZAURUS=y
+CONFIG_USB_CDCETHER=y
+
+#
+# USB Network Adapters
+#
+CONFIG_USB_AX8817X=y
+
+#
+# USB port drivers
+#
+CONFIG_USB_USS720=m
+
+#
+# USB Serial Converter support
+#
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19=y
+CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_SAFE=m
+CONFIG_USB_SERIAL_SAFE_PADDED=y
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_EZUSB=y
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_EMI62=m
+CONFIG_USB_EMI26=m
+CONFIG_USB_TIGL=m
+CONFIG_USB_AUERSWALD=m
+CONFIG_USB_RIO500=m
+CONFIG_USB_LEGOTOWER=m
+CONFIG_USB_LCD=m
+CONFIG_USB_LED=m
+CONFIG_USB_CYTHERM=m
+CONFIG_USB_SPEEDTOUCH=m
+# CONFIG_USB_TEST is not set
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_SDP is not set
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_UDAPL_HELPER=m
+CONFIG_INFINIBAND_MELLANOX_HCA=m
+CONFIG_AUDIT=m
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=m
+CONFIG_JBD_DEBUG=y
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_DMAPI=y
+# CONFIG_JFS_DEBUG is not set
+CONFIG_JFS_STATISTICS=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_RT=y
+CONFIG_XFS_QUOTA=m
+CONFIG_XFS_DMAPI=y
+CONFIG_XFS_SECURITY=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_MINIX_FS=y
+CONFIG_ROMFS_FS=m
+CONFIG_DMAPI=m
+# CONFIG_DMAPI_DEBUG is not set
+CONFIG_QUOTA=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_QUOTACTL=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_UDF_FS=m
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_DEBUG is not set
+# CONFIG_NTFS_RW is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+# CONFIG_DEVFS_FS is not set
+CONFIG_DEVPTS_FS_XATTR=y
+CONFIG_DEVPTS_FS_SECURITY=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+CONFIG_RELAYFS_FS=m
+# CONFIG_KLOG_CHANNEL is not set
+
+#
+# Miscellaneous filesystems
+#
+CONFIG_ADFS_FS=m
+# CONFIG_ADFS_FS_RW is not set
+CONFIG_AFFS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_JFFS_FS=m
+CONFIG_JFFS_FS_VERBOSE=0
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+# CONFIG_JFFS2_FS_NAND is not set
+CONFIG_CRAMFS=m
+CONFIG_VXFS_FS=m
+CONFIG_HPFS_FS=m
+CONFIG_QNX4FS_FS=m
+# CONFIG_QNX4FS_RW is not set
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_ACL=y
+CONFIG_NFS_ACL_SUPPORT=y
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=y
+CONFIG_STATD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_SMB_NLS_REMOTE="cp850"
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_CODA_FS=m
+# CONFIG_CODA_FS_OLD_API is not set
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_AFS_FS=m
+CONFIG_RXRPC=m
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+# CONFIG_MINIX_SUBPARTITION is not set
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+# CONFIG_LDM_DEBUG is not set
+CONFIG_NEC98_PARTITION=y
+CONFIG_SGI_PARTITION=y
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_FSHOOKS=y
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
+# Kernel hacking
+#
+CONFIG_CRASH_DUMP=m
+CONFIG_KERNTYPES=y
+CONFIG_CRASH_DUMP_BLOCKDEV=m
+CONFIG_CRASH_DUMP_NETDEV=m
+# CONFIG_CRASH_DUMP_MEMDEV is not set
+CONFIG_CRASH_DUMP_COMPRESS_RLE=m
+CONFIG_CRASH_DUMP_COMPRESS_GZIP=m
+CONFIG_DEBUG_KERNEL=y
+CONFIG_EARLY_PRINTK=y
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUGREG is not set
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_KDB is not set
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+# CONFIG_HOOK is not set
+
+#
+# Security options
+#
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_CAPABILITIES=m
+CONFIG_SECURITY_ROOTPLUG=m
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+# CONFIG_SECURITY_SELINUX_MLS is not set
+
+#
+# IBM Crypto Hardware support
+#
+CONFIG_IBM_CRYPTO=m
+CONFIG_ICA_LEEDSLITE=m
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TEST=m
+
+#
+# Library routines
+#
+CONFIG_CRC32=y
+CONFIG_QSORT=y
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+
+#
+# Build options
+#
+CONFIG_SUSE_KERNEL=y
+CONFIG_CFGNAME="bigsmp"
+CONFIG_RELEASE="SLES9_SP1_BRANCH_2004110217390391"
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_PC=y
diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config

new file mode 100644 (file)

index 0000000..349bca7
--- /dev/null
+++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config
@@ -0,0 +1,2845 @@
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_GENERIC_ISA_DMA=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+# CONFIG_STANDALONE is not set
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_HOTPLUG=y
+CONFIG_EVLOG=y
+# CONFIG_EVLOG_FWPRINTK is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_EMBEDDED is not set
+
+#
+# Class Based Kernel Resource Management
+#
+CONFIG_CKRM=y
+CONFIG_RCFS_FS=m
+CONFIG_CKRM_TYPE_TASKCLASS=y
+CONFIG_CKRM_RES_NUMTASKS=m
+CONFIG_CKRM_TYPE_SOCKETCLASS=y
+CONFIG_CKRM_RBCE=m
+CONFIG_CKRM_CRBCE=m
+CONFIG_DELAY_ACCT=y
+CONFIG_KALLSYMS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+# CONFIG_X86_PC is not set
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+CONFIG_X86_GENERICARCH=y
+# CONFIG_X86_ES7000 is not set
+CONFIG_X86_CYCLONE_TIMER=y
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+# CONFIG_M686 is not set
+CONFIG_MPENTIUMII=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+CONFIG_X86_GENERIC=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_HPET_EMULATE_RTC is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=128
+CONFIG_SCHED_SMT=y
+# CONFIG_PREEMPT is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_TSC=y
+CONFIG_X86_MCE=y
+# CONFIG_X86_MCE_NONFATAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+CONFIG_TOSHIBA=m
+CONFIG_I8K=m
+CONFIG_MICROCODE=m
+CONFIG_X86_MSR=m
+CONFIG_X86_CPUID=m
+
+#
+# Firmware Drivers
+#
+CONFIG_EDD=m
+# CONFIG_NOHIGHMEM is not set
+# CONFIG_HIGHMEM4G is not set
+CONFIG_HIGHMEM64G=y
+CONFIG_HIGHMEM=y
+CONFIG_X86_PAE=y
+# CONFIG_NUMA is not set
+CONFIG_HIGHPTE=y
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MTRR=y
+CONFIG_EFI=y
+CONFIG_IRQBALANCE=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_BOOT_IOREMAP=y
+CONFIG_REGPARM=y
+
+#
+# Special options
+#
+CONFIG_PROC_MM=y
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+# CONFIG_SOFTWARE_SUSPEND is not set
+# CONFIG_PM_DISK is not set
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SLEEP_PROC_FS=y
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+# CONFIG_ACPI_ASUS is not set
+CONFIG_ACPI_TOSHIBA=m
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_X86_PM_TIMER=y
+CONFIG_ACPI_INITRD=y
+
+#
+# APM (Advanced Power Management) BIOS Support
+#
+CONFIG_APM=y
+# CONFIG_APM_IGNORE_USER_SUSPEND is not set
+CONFIG_APM_DO_ENABLE=y
+# CONFIG_APM_CPU_IDLE is not set
+CONFIG_APM_DISPLAY_BLANK=y
+# CONFIG_APM_RTC_IS_GMT is not set
+CONFIG_APM_ALLOW_INTS=y
+# CONFIG_APM_REAL_MODE_POWER_OFF is not set
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_PROC_INTF=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_GOV_USERSPACE=m
+CONFIG_CPU_FREQ_GOV_ONDEMAND=m
+# CONFIG_CPU_FREQ_24_API is not set
+CONFIG_CPU_FREQ_TABLE=m
+
+#
+# CPUFreq processor drivers
+#
+CONFIG_X86_ACPI_CPUFREQ=m
+# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
+CONFIG_X86_POWERNOW_K6=m
+CONFIG_X86_POWERNOW_K7=m
+CONFIG_X86_POWERNOW_K8=m
+CONFIG_X86_POWERNOW_K8_ACPI=y
+CONFIG_X86_GX_SUSPMOD=m
+CONFIG_X86_SPEEDSTEP_CENTRINO=m
+CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y
+# CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI is not set
+CONFIG_X86_SPEEDSTEP_ICH=m
+CONFIG_X86_SPEEDSTEP_SMI=m
+CONFIG_X86_P4_CLOCKMOD=m
+CONFIG_X86_SPEEDSTEP_LIB=m
+CONFIG_X86_LONGRUN=m
+CONFIG_X86_LONGHAUL=m
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+# CONFIG_PCI_USE_VECTOR is not set
+# CONFIG_PCI_LEGACY_PROC is not set
+# CONFIG_PCI_NAMES is not set
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+CONFIG_SCx200=m
+
+#
+# PCMCIA/CardBus support
+#
+CONFIG_PCMCIA=m
+# CONFIG_PCMCIA_DEBUG is not set
+CONFIG_YENTA=m
+CONFIG_CARDBUS=y
+CONFIG_I82092=m
+CONFIG_I82365=m
+CONFIG_TCIC=m
+CONFIG_PCMCIA_PROBE=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI_FAKE=m
+CONFIG_HOTPLUG_PCI_COMPAQ=m
+CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y
+CONFIG_HOTPLUG_PCI_IBM=m
+CONFIG_HOTPLUG_PCI_AMD=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_HOTPLUG_PCI_CPCI=y
+CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
+CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
+CONFIG_HOTPLUG_PCI_PCIE=m
+# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_AOUT=m
+CONFIG_BINFMT_MISC=m
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_FW_LOADER=m
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+CONFIG_MTD=m
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_PARTITIONS=m
+CONFIG_MTD_CONCAT=m
+CONFIG_MTD_REDBOOT_PARTS=m
+CONFIG_MTD_CMDLINE_PARTS=m
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=m
+CONFIG_MTD_BLOCK=m
+# CONFIG_MTD_BLOCK_RO is not set
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=m
+CONFIG_MTD_JEDECPROBE=m
+CONFIG_MTD_GEN_PROBE=m
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_NOSWAP=y
+# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_GEOMETRY is not set
+CONFIG_MTD_CFI_INTELEXT=m
+CONFIG_MTD_CFI_AMDSTD=m
+CONFIG_MTD_CFI_STAA=m
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+CONFIG_MTD_ABSENT=m
+CONFIG_MTD_OBSOLETE_CHIPS=y
+CONFIG_MTD_AMDSTD=m
+CONFIG_MTD_SHARP=m
+CONFIG_MTD_JEDEC=m
+
+#
+# Mapping drivers for chip access
+#
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=m
+CONFIG_MTD_PHYSMAP_START=0x8000000
+CONFIG_MTD_PHYSMAP_LEN=0x4000000
+CONFIG_MTD_PHYSMAP_BUSWIDTH=2
+CONFIG_MTD_PNC2000=m
+CONFIG_MTD_SC520CDP=m
+CONFIG_MTD_NETSC520=m
+CONFIG_MTD_SBC_GXX=m
+CONFIG_MTD_ELAN_104NC=m
+CONFIG_MTD_OCTAGON=m
+CONFIG_MTD_VMAX=m
+CONFIG_MTD_SCx200_DOCFLASH=m
+CONFIG_MTD_AMD76XROM=m
+CONFIG_MTD_ICH2ROM=m
+CONFIG_MTD_SCB2_FLASH=m
+CONFIG_MTD_NETtel=m
+CONFIG_MTD_DILNETPC=m
+CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000
+CONFIG_MTD_L440GX=m
+CONFIG_MTD_PCI=m
+
+#
+# Self-contained MTD device drivers
+#
+CONFIG_MTD_PMC551=m
+CONFIG_MTD_PMC551_BUGFIX=y
+# CONFIG_MTD_PMC551_DEBUG is not set
+CONFIG_MTD_SLRAM=m
+CONFIG_MTD_MTDRAM=m
+CONFIG_MTDRAM_TOTAL_SIZE=4096
+CONFIG_MTDRAM_ERASE_SIZE=128
+CONFIG_MTD_BLKMTD=m
+
+#
+# Disk-On-Chip Device Drivers
+#
+CONFIG_MTD_DOC2000=m
+CONFIG_MTD_DOC2001=m
+CONFIG_MTD_DOC2001PLUS=m
+CONFIG_MTD_DOCPROBE=m
+CONFIG_MTD_DOCPROBE_ADVANCED=y
+CONFIG_MTD_DOCPROBE_ADDRESS=0x0000
+CONFIG_MTD_DOCPROBE_HIGH=y
+CONFIG_MTD_DOCPROBE_55AA=y
+
+#
+# NAND Flash Device Drivers
+#
+CONFIG_MTD_NAND=m
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NAND_IDS=m
+
+#
+# Parallel port support
+#
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_PC_CML1=m
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_PARPORT_PC_SUPERIO=y
+CONFIG_PARPORT_PC_PCMCIA=m
+CONFIG_PARPORT_OTHER=y
+CONFIG_PARPORT_1284=y
+
+#
+# Plug and Play support
+#
+CONFIG_PNP=y
+# CONFIG_PNP_DEBUG is not set
+
+#
+# Protocols
+#
+CONFIG_ISAPNP=y
+CONFIG_PNPBIOS=y
+CONFIG_PNPBIOS_PROC_FS=y
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_XD=m
+CONFIG_PARIDE=m
+CONFIG_PARIDE_PARPORT=m
+
+#
+# Parallel IDE high-level drivers
+#
+CONFIG_PARIDE_PD=m
+CONFIG_PARIDE_PCD=m
+CONFIG_PARIDE_PF=m
+CONFIG_PARIDE_PT=m
+CONFIG_PARIDE_PG=m
+
+#
+# Parallel IDE protocol modules
+#
+CONFIG_PARIDE_ATEN=m
+CONFIG_PARIDE_BPCK=m
+CONFIG_PARIDE_BPCK6=m
+CONFIG_PARIDE_COMM=m
+CONFIG_PARIDE_DSTR=m
+CONFIG_PARIDE_FIT2=m
+CONFIG_PARIDE_FIT3=m
+CONFIG_PARIDE_EPAT=m
+CONFIG_PARIDE_EPATC8=y
+CONFIG_PARIDE_EPIA=m
+CONFIG_PARIDE_FRIQ=m
+CONFIG_PARIDE_FRPW=m
+CONFIG_PARIDE_KBIC=m
+CONFIG_PARIDE_KTTI=m
+CONFIG_PARIDE_ON20=m
+CONFIG_PARIDE_ON26=m
+CONFIG_BLK_CPQ_DA=m
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_CISS_SCSI_TAPE=y
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_CARMEL=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=64000
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_LBD=y
+CONFIG_CIPHER_TWOFISH=m
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_IDEDISK_STROKE=y
+CONFIG_BLK_DEV_IDECS=m
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_IDETAPE=m
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+# CONFIG_IDE_TASKFILE_IO is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_CMD640=y
+CONFIG_BLK_DEV_CMD640_ENHANCED=y
+CONFIG_BLK_DEV_IDEPNP=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_OFFBOARD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_OPTI621=y
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+CONFIG_IDEDMA_ONLYDISK=y
+CONFIG_BLK_DEV_ADMA=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+# CONFIG_WDC_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_ATIIXP=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5520=m
+CONFIG_BLK_DEV_CS5530=m
+CONFIG_BLK_DEV_HPT34X=y
+CONFIG_HPT34X_AUTODMA=y
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_SC1200=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_NS87415=y
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+CONFIG_BLK_DEV_TRM290=y
+CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_IDE_CHIPSETS=y
+
+#
+# Note: most of these also require special kernel boot parameters
+#
+CONFIG_BLK_DEV_4DRIVES=y
+CONFIG_BLK_DEV_ALI14XX=y
+CONFIG_BLK_DEV_DTC2278=y
+CONFIG_BLK_DEV_HT6560B=y
+# CONFIG_BLK_DEV_PDC4030 is not set
+CONFIG_BLK_DEV_QD65XX=y
+CONFIG_BLK_DEV_UMC8672=y
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=m
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+CONFIG_CHR_DEV_SCH=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+CONFIG_SCSI_7000FASST=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_AHA152X=m
+CONFIG_SCSI_AHA1542=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=5000
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+CONFIG_SCSI_AIC7XXX_OLD=m
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_AIC79XX_NEW is not set
+CONFIG_SCSI_DPT_I2O=m
+CONFIG_SCSI_ADVANSYS=m
+CONFIG_SCSI_IN2000=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_LEGACY=m
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_SVW=m
+CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIS=m
+CONFIG_SCSI_SATA_VIA=m
+CONFIG_SCSI_SATA_VITESSE=m
+CONFIG_SCSI_BUSLOGIC=m
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_CPQFCTS is not set
+CONFIG_SCSI_DMX3191D=m
+CONFIG_SCSI_DTC3280=m
+CONFIG_SCSI_EATA=m
+CONFIG_SCSI_EATA_TAGGED_QUEUE=y
+CONFIG_SCSI_EATA_LINKED_COMMANDS=y
+CONFIG_SCSI_EATA_MAX_TAGS=16
+CONFIG_SCSI_EATA_PIO=m
+CONFIG_SCSI_FUTURE_DOMAIN=m
+CONFIG_SCSI_GDTH=m
+CONFIG_SCSI_GENERIC_NCR5380=m
+CONFIG_SCSI_GENERIC_NCR5380_MMIO=m
+CONFIG_SCSI_GENERIC_NCR53C400=y
+CONFIG_SCSI_IPS=m
+CONFIG_SCSI_INIA100=m
+CONFIG_SCSI_PPA=m
+CONFIG_SCSI_IMM=m
+# CONFIG_SCSI_IZIP_EPP16 is not set
+# CONFIG_SCSI_IZIP_SLOW_CTR is not set
+CONFIG_SCSI_NCR53C406A=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_IPR=m
+CONFIG_SCSI_IPR_TRACE=y
+CONFIG_SCSI_IPR_DUMP=y
+CONFIG_SCSI_PAS16=m
+CONFIG_SCSI_PSI240I=m
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_QLOGIC_ISP=m
+CONFIG_SCSI_QLOGIC_FC=m
+CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
+CONFIG_SCSI_QLOGIC_1280=m
+CONFIG_SCSI_QLA2XXX=m
+CONFIG_SCSI_QLA21XX=m
+CONFIG_SCSI_QLA22XX=m
+CONFIG_SCSI_QLA2300=m
+CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA6312=m
+CONFIG_SCSI_QLA6322=m
+CONFIG_SCSI_QLA2XXX_FAILOVER=y
+CONFIG_SCSI_QLA4XXX=m
+CONFIG_SCSI_QLA4XXX_FAILOVER=y
+CONFIG_SCSI_SYM53C416=m
+CONFIG_SCSI_DC395x=m
+CONFIG_SCSI_DC390T=m
+CONFIG_SCSI_T128=m
+CONFIG_SCSI_U14_34F=m
+CONFIG_SCSI_U14_34F_TAGGED_QUEUE=y
+CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y
+CONFIG_SCSI_U14_34F_MAX_TAGS=8
+CONFIG_SCSI_ULTRASTOR=m
+CONFIG_SCSI_NSP32=m
+CONFIG_SCSI_DEBUG=m
+
+#
+# PCMCIA SCSI adapter support
+#
+CONFIG_PCMCIA_AHA152X=m
+CONFIG_PCMCIA_FDOMAIN=m
+CONFIG_PCMCIA_NINJA_SCSI=m
+CONFIG_PCMCIA_QLOGIC=m
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+CONFIG_CD_NO_IDESCSI=y
+CONFIG_AZTCD=m
+CONFIG_GSCD=m
+CONFIG_MCD=m
+CONFIG_MCD_IRQ=11
+CONFIG_MCD_BASE=0x300
+CONFIG_OPTCD=m
+CONFIG_SJCD=m
+CONFIG_ISP16_CDI=m
+CONFIG_CDU535=m
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_FLAKEY=m
+CONFIG_BLK_DEV_DM_BBR=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=m
+CONFIG_FUSION_MAX_SGE=40
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+
+#
+# IEEE 1394 (FireWire) support
+#
+CONFIG_IEEE1394=m
+
+#
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
+CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
+
+#
+# Device Drivers
+#
+CONFIG_IEEE1394_PCILYNX=m
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_CONFIG=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=m
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=m
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_NAT=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_TOS=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_ACCEPT_QUEUES is not set
+
+#
+# IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=12
+
+#
+# IPVS transport protocol load balancing support
+#
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_NDISC_NEW=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+
+#
+# MOBILE IPv6 (EXPERIMENTAL)
+#
+CONFIG_IPV6_MOBILITY=m
+CONFIG_IPV6_MOBILITY_MN=m
+CONFIG_IPV6_MOBILITY_HA=m
+# CONFIG_IPV6_MOBILITY_DEBUG is not set
+CONFIG_DECNET=m
+CONFIG_DECNET_SIOCGIFCONF=y
+# CONFIG_DECNET_ROUTER is not set
+CONFIG_BRIDGE=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_POLICY=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_MATCH_PHYSDEV=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+# CONFIG_IP_NF_NAT_LOCAL is not set
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_CLASSIFY=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_COMPAT_IPCHAINS=m
+CONFIG_IP_NF_COMPAT_IPFWADM=m
+CONFIG_IP_NF_CONNTRACK_MARK=y
+CONFIG_IP_NF_TARGET_CONNMARK=m
+CONFIG_IP_NF_MATCH_CONNMARK=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+
+#
+# IPv6: Netfilter Configuration
+#
+CONFIG_IP6_NF_FTP=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_CONNTRACK=m
+CONFIG_IP6_NF_MATCH_STATE=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+
+#
+# DECnet: Netfilter Configuration
+#
+CONFIG_DECNET_NF_GRABULATOR=m
+
+#
+# Bridge: Netfilter Configuration
+#
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=m
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SCTP_HMAC_MD5=y
+CONFIG_ATM=y
+CONFIG_ATM_CLIP=y
+CONFIG_ATM_CLIP_NO_ICMP=y
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_VLAN_8021Q=m
+CONFIG_LLC=y
+CONFIG_LLC2=m
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=y
+CONFIG_LTPC=m
+CONFIG_COPS=m
+CONFIG_COPS_DAYNA=y
+CONFIG_COPS_TANGENT=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_X25=m
+CONFIG_LAPB=m
+# CONFIG_NET_DIVERT is not set
+CONFIG_ECONET=m
+# CONFIG_ECONET_AUNUDP is not set
+# CONFIG_ECONET_NATIVE is not set
+CONFIG_WAN_ROUTER=m
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_CSZ=m
+CONFIG_NET_SCH_ATM=y
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_DELAY=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+CONFIG_NET_PKTGEN=m
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+CONFIG_ARCNET=m
+CONFIG_ARCNET_1201=m
+CONFIG_ARCNET_1051=m
+CONFIG_ARCNET_RAW=m
+CONFIG_ARCNET_COM90xx=m
+CONFIG_ARCNET_COM90xxIO=m
+CONFIG_ARCNET_RIM_I=m
+CONFIG_ARCNET_COM20020=m
+CONFIG_ARCNET_COM20020_ISA=m
+CONFIG_ARCNET_COM20020_PCI=m
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+CONFIG_NET_SB1000=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNGEM=m
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_EL1=m
+CONFIG_EL2=m
+CONFIG_ELPLUS=m
+CONFIG_EL16=m
+CONFIG_EL3=m
+CONFIG_3C515=m
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+CONFIG_LANCE=m
+CONFIG_NET_VENDOR_SMC=y
+CONFIG_WD80x3=m
+CONFIG_ULTRA=m
+CONFIG_SMC9194=m
+CONFIG_NET_VENDOR_RACAL=y
+CONFIG_NI52=m
+CONFIG_NI65=m
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+CONFIG_DE2104X=m
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+CONFIG_TULIP_NAPI=y
+CONFIG_TULIP_NAPI_HW_MITIGATION=y
+CONFIG_DE4X5=m
+CONFIG_WINBOND_840=m
+CONFIG_DM9102=m
+CONFIG_PCMCIA_XIRCOM=m
+CONFIG_AT1700=m
+CONFIG_DEPCA=m
+CONFIG_HP100=m
+CONFIG_NET_ISA=y
+CONFIG_E2100=m
+CONFIG_EWRK3=m
+CONFIG_EEXPRESS=m
+CONFIG_EEXPRESS_PRO=m
+CONFIG_HPLAN_PLUS=m
+CONFIG_HPLAN=m
+CONFIG_LP486E=m
+CONFIG_ETH16I=m
+CONFIG_NE2000=m
+CONFIG_ZNET=m
+CONFIG_SEEQ8005=m
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+CONFIG_AMD8111_ETH=m
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_ADAPTEC_STARFIRE_NAPI=y
+CONFIG_AC3200=m
+CONFIG_APRICOT=m
+CONFIG_B44=m
+CONFIG_FORCEDETH=m
+CONFIG_CS89x0=m
+CONFIG_DGRS=m
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+CONFIG_E100_NAPI=y
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_8139_RXBUF_IDX=2
+CONFIG_SIS900=m
+CONFIG_EPIC100=m
+CONFIG_SUNDANCE=m
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_TLAN=m
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+CONFIG_NET_POCKET=y
+CONFIG_ATP=m
+CONFIG_DE600=m
+CONFIG_DE620=m
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_DL2K=m
+CONFIG_E1000=m
+CONFIG_E1000_NAPI=y
+CONFIG_E1000_NEW=m
+CONFIG_E1000_NEW_NAPI=y
+CONFIG_NS83820=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_R8169=m
+CONFIG_SIS190=m
+CONFIG_SK98LIN=m
+CONFIG_TIGON3=m
+CONFIG_NET_BROADCOM=m
+CONFIG_NET_BROADCOM_NEW=m
+CONFIG_NET_BCM44=m
+CONFIG_TIGON3_NEW=m
+
+#
+# Ethernet (10000 Mbit)
+#
+CONFIG_IXGB=m
+CONFIG_IXGB_NAPI=y
+CONFIG_S2IO=m
+CONFIG_S2IO_NAPI=y
+CONFIG_FDDI=y
+# CONFIG_DEFXX is not set
+CONFIG_SKFP=m
+CONFIG_HIPPI=y
+CONFIG_ROADRUNNER=m
+CONFIG_ROADRUNNER_LARGE_RINGS=y
+CONFIG_PLIP=m
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPPOATM=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_SLIP_MODE_SLIP6=y
+
+#
+# Wireless LAN (non-hamradio)
+#
+CONFIG_NET_RADIO=y
+
+#
+# Obsolete Wireless cards support (pre-802.11)
+#
+CONFIG_STRIP=m
+# CONFIG_ARLAN is not set
+CONFIG_WAVELAN=m
+CONFIG_PCMCIA_WAVELAN=m
+CONFIG_PCMCIA_NETWAVE=m
+
+#
+# Wireless 802.11 Frequency Hopping cards support
+#
+CONFIG_PCMCIA_RAYCS=m
+
+#
+# Wireless 802.11b ISA/PCI cards support
+#
+CONFIG_AIRO=m
+CONFIG_HERMES=m
+CONFIG_PLX_HERMES=m
+CONFIG_TMD_HERMES=m
+CONFIG_PCI_HERMES=m
+CONFIG_ATMEL=m
+CONFIG_PCI_ATMEL=m
+
+#
+# Wireless 802.11b Pcmcia/Cardbus cards support
+#
+CONFIG_PCMCIA_HERMES=m
+CONFIG_AIRO_CS=m
+CONFIG_PCMCIA_ATMEL=m
+CONFIG_PCMCIA_WL3501=m
+
+#
+# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support
+#
+CONFIG_PRISM54=m
+CONFIG_NET_WIRELESS=y
+
+#
+# Token Ring devices
+#
+CONFIG_TR=y
+CONFIG_IBMTR=m
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+CONFIG_TMS380TR=m
+CONFIG_TMSPCI=m
+CONFIG_SKISA=m
+CONFIG_PROTEON=m
+CONFIG_ABYSS=m
+CONFIG_SMCTR=m
+CONFIG_NET_FC=y
+CONFIG_NET_LPFC=m
+CONFIG_RCPCI=m
+CONFIG_SHAPER=m
+CONFIG_NETCONSOLE=m
+
+#
+# Wan interfaces
+#
+CONFIG_WAN=y
+CONFIG_HOSTESS_SV11=m
+# CONFIG_COSA is not set
+CONFIG_DSCC4=m
+CONFIG_DSCC4_PCISYNC=y
+CONFIG_DSCC4_PCI_RST=y
+CONFIG_LANMEDIA=m
+CONFIG_SEALEVEL_4021=m
+CONFIG_SYNCLINK_SYNCPPP=m
+CONFIG_HDLC=m
+CONFIG_HDLC_RAW=y
+CONFIG_HDLC_RAW_ETH=y
+CONFIG_HDLC_CISCO=y
+CONFIG_HDLC_FR=y
+CONFIG_HDLC_PPP=y
+CONFIG_HDLC_X25=y
+CONFIG_PCI200SYN=m
+CONFIG_WANXL=m
+# CONFIG_WANXL_BUILD_FIRMWARE is not set
+CONFIG_PC300=m
+CONFIG_PC300_MLPPP=y
+CONFIG_N2=m
+CONFIG_C101=m
+CONFIG_FARSYNC=m
+CONFIG_DLCI=m
+CONFIG_DLCI_COUNT=24
+CONFIG_DLCI_MAX=8
+CONFIG_SDLA=m
+# CONFIG_WAN_ROUTER_DRIVERS is not set
+CONFIG_LAPBETHER=m
+CONFIG_X25_ASY=m
+# CONFIG_SBNI is not set
+
+#
+# PCMCIA network device support
+#
+CONFIG_NET_PCMCIA=y
+CONFIG_PCMCIA_3C589=m
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_FMVJ18X=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_PCMCIA_SMC91C92=m
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_PCMCIA_AXNET=m
+CONFIG_ARCNET_COM20020_CS=m
+CONFIG_PCMCIA_IBMTR=m
+
+#
+# ATM drivers
+#
+CONFIG_ATM_TCP=m
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_ENI=m
+# CONFIG_ATM_ENI_DEBUG is not set
+# CONFIG_ATM_ENI_TUNE_BURST is not set
+CONFIG_ATM_FIRESTREAM=m
+CONFIG_ATM_ZATM=m
+# CONFIG_ATM_ZATM_DEBUG is not set
+CONFIG_ATM_NICSTAR=m
+CONFIG_ATM_NICSTAR_USE_SUNI=y
+CONFIG_ATM_NICSTAR_USE_IDT77105=y
+CONFIG_ATM_IDT77252=m
+# CONFIG_ATM_IDT77252_DEBUG is not set
+CONFIG_ATM_IDT77252_RCV_ALL=y
+CONFIG_ATM_IDT77252_USE_SUNI=y
+CONFIG_ATM_AMBASSADOR=m
+# CONFIG_ATM_AMBASSADOR_DEBUG is not set
+CONFIG_ATM_HORIZON=m
+# CONFIG_ATM_HORIZON_DEBUG is not set
+CONFIG_ATM_IA=m
+# CONFIG_ATM_IA_DEBUG is not set
+CONFIG_ATM_FORE200E_MAYBE=m
+CONFIG_ATM_FORE200E_PCA=y
+CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y
+CONFIG_ATM_FORE200E_TX_RETRY=16
+CONFIG_ATM_FORE200E_DEBUG=0
+CONFIG_ATM_FORE200E=m
+CONFIG_ATM_HE=m
+CONFIG_ATM_HE_USE_SUNI=y
+
+#
+# Amateur Radio support
+#
+CONFIG_HAMRADIO=y
+
+#
+# Packet Radio protocols
+#
+CONFIG_AX25=m
+CONFIG_AX25_DAMA_SLAVE=y
+CONFIG_NETROM=m
+CONFIG_ROSE=m
+
+#
+# AX.25 network device drivers
+#
+CONFIG_BPQETHER=m
+CONFIG_SCC=m
+CONFIG_SCC_DELAY=y
+CONFIG_SCC_TRXECHO=y
+CONFIG_BAYCOM_SER_FDX=m
+CONFIG_BAYCOM_SER_HDX=m
+CONFIG_BAYCOM_PAR=m
+CONFIG_BAYCOM_EPP=m
+CONFIG_YAM=m
+
+#
+# IrDA (infrared) support
+#
+CONFIG_IRDA=m
+
+#
+# IrDA protocols
+#
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+CONFIG_IRCOMM=m
+CONFIG_IRDA_ULTRA=y
+
+#
+# IrDA options
+#
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+# CONFIG_IRDA_FAST_RR is not set
+# CONFIG_IRDA_DEBUG is not set
+
+#
+# Infrared-port device drivers
+#
+
+#
+# SIR device drivers
+#
+CONFIG_IRTTY_SIR=m
+
+#
+# Dongle support
+#
+CONFIG_DONGLE=y
+CONFIG_ESI_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_LITELINK_DONGLE=m
+CONFIG_MA600_DONGLE=m
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_MCP2120_DONGLE=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_ACT200L_DONGLE=m
+
+#
+# Old SIR device drivers
+#
+
+#
+# Old Serial dongle support
+#
+
+#
+# FIR device drivers
+#
+CONFIG_USB_IRDA=m
+CONFIG_SIGMATEL_FIR=m
+CONFIG_NSC_FIR=m
+CONFIG_WINBOND_FIR=m
+CONFIG_TOSHIBA_FIR=m
+CONFIG_SMC_IRCC_FIR=m
+CONFIG_ALI_FIR=m
+CONFIG_VLSI_FIR=m
+CONFIG_VIA_FIR=m
+
+#
+# Bluetooth support
+#
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+CONFIG_BT_SCO=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_HCIUSB=m
+CONFIG_BT_HCIUSB_SCO=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIBTUART=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_NETPOLL=y
+CONFIG_NETPOLL_RX=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
+
+#
+# ISDN subsystem
+#
+CONFIG_ISDN=m
+
+#
+# Old ISDN4Linux
+#
+CONFIG_ISDN_I4L=m
+CONFIG_ISDN_PPP=y
+CONFIG_ISDN_PPP_VJ=y
+CONFIG_ISDN_MPP=y
+CONFIG_IPPP_FILTER=y
+CONFIG_ISDN_PPP_BSDCOMP=m
+CONFIG_ISDN_AUDIO=y
+CONFIG_ISDN_TTY_FAX=y
+CONFIG_ISDN_X25=y
+
+#
+# ISDN feature submodules
+#
+
+#
+# ISDN4Linux hardware drivers
+#
+
+#
+# Passive cards
+#
+CONFIG_ISDN_DRV_HISAX=m
+
+#
+# D-channel protocol features
+#
+CONFIG_HISAX_EURO=y
+CONFIG_DE_AOC=y
+# CONFIG_HISAX_NO_SENDCOMPLETE is not set
+# CONFIG_HISAX_NO_LLC is not set
+# CONFIG_HISAX_NO_KEYPAD is not set
+CONFIG_HISAX_1TR6=y
+CONFIG_HISAX_NI1=y
+CONFIG_HISAX_MAX_CARDS=8
+
+#
+# HiSax supported cards
+#
+CONFIG_HISAX_16_0=y
+CONFIG_HISAX_16_3=y
+CONFIG_HISAX_TELESPCI=y
+CONFIG_HISAX_S0BOX=y
+CONFIG_HISAX_AVM_A1=y
+CONFIG_HISAX_FRITZPCI=y
+CONFIG_HISAX_AVM_A1_PCMCIA=y
+CONFIG_HISAX_ELSA=y
+CONFIG_HISAX_IX1MICROR2=y
+CONFIG_HISAX_DIEHLDIVA=y
+CONFIG_HISAX_ASUSCOM=y
+CONFIG_HISAX_TELEINT=y
+CONFIG_HISAX_HFCS=y
+CONFIG_HISAX_SEDLBAUER=y
+CONFIG_HISAX_SPORTSTER=y
+CONFIG_HISAX_MIC=y
+CONFIG_HISAX_NETJET=y
+CONFIG_HISAX_NETJET_U=y
+CONFIG_HISAX_NICCY=y
+CONFIG_HISAX_ISURF=y
+CONFIG_HISAX_HSTSAPHIR=y
+CONFIG_HISAX_BKM_A4T=y
+CONFIG_HISAX_SCT_QUADRO=y
+CONFIG_HISAX_GAZEL=y
+CONFIG_HISAX_HFC_PCI=y
+CONFIG_HISAX_W6692=y
+CONFIG_HISAX_HFC_SX=y
+CONFIG_HISAX_ENTERNOW_PCI=y
+CONFIG_HISAX_DEBUG=y
+
+#
+# HiSax PCMCIA card service modules
+#
+CONFIG_HISAX_SEDLBAUER_CS=m
+CONFIG_HISAX_ELSA_CS=m
+CONFIG_HISAX_AVM_A1_CS=m
+CONFIG_HISAX_TELES_CS=m
+
+#
+# HiSax sub driver modules
+#
+CONFIG_HISAX_ST5481=m
+CONFIG_HISAX_HFCUSB=m
+CONFIG_HISAX_FRITZ_PCIPNP=m
+CONFIG_HISAX_HDLC=y
+
+#
+# Active cards
+#
+CONFIG_ISDN_DRV_ICN=m
+CONFIG_ISDN_DRV_PCBIT=m
+CONFIG_ISDN_DRV_SC=m
+CONFIG_ISDN_DRV_ACT2000=m
+CONFIG_ISDN_DRV_TPAM=m
+
+#
+# CAPI subsystem
+#
+CONFIG_ISDN_CAPI=m
+CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y
+CONFIG_ISDN_CAPI_MIDDLEWARE=y
+CONFIG_ISDN_CAPI_CAPI20=m
+CONFIG_ISDN_CAPI_CAPIFS_BOOL=y
+CONFIG_ISDN_CAPI_CAPIFS=m
+CONFIG_ISDN_CAPI_CAPIDRV=m
+
+#
+# CAPI hardware drivers
+#
+
+#
+# Active AVM cards
+#
+CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_T1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
+
+#
+# Active Eicon DIVA Server cards
+#
+CONFIG_CAPI_EICON=y
+CONFIG_ISDN_DIVAS=m
+CONFIG_ISDN_DIVAS_BRIPCI=y
+CONFIG_ISDN_DIVAS_PRIPCI=y
+CONFIG_ISDN_DIVAS_DIVACAPI=m
+CONFIG_ISDN_DIVAS_USERIDI=m
+CONFIG_ISDN_DIVAS_MAINT=m
+
+#
+# Telephony Support
+#
+CONFIG_PHONE=m
+CONFIG_PHONE_IXJ=m
+CONFIG_PHONE_IXJ_PCMCIA=m
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_TSDEV=m
+CONFIG_INPUT_TSDEV_SCREEN_X=240
+CONFIG_INPUT_TSDEV_SCREEN_Y=320
+CONFIG_INPUT_EVDEV=m
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+CONFIG_GAMEPORT=m
+CONFIG_SOUND_GAMEPORT=m
+CONFIG_GAMEPORT_NS558=m
+CONFIG_GAMEPORT_L4=m
+CONFIG_GAMEPORT_EMU10K1=m
+CONFIG_GAMEPORT_VORTEX=m
+CONFIG_GAMEPORT_FM801=m
+CONFIG_GAMEPORT_CS461x=m
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_CT82C710=m
+CONFIG_SERIO_PARKBD=m
+CONFIG_SERIO_PCIPS2=m
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_KEYBOARD_SUNKBD=m
+# CONFIG_KEYBOARD_LKKBD is not set
+CONFIG_KEYBOARD_XTKBD=m
+CONFIG_KEYBOARD_NEWTON=m
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_INPORT=m
+CONFIG_MOUSE_ATIXL=y
+CONFIG_MOUSE_LOGIBM=m
+CONFIG_MOUSE_PC110PAD=m
+# CONFIG_MOUSE_VSXXXAA is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_ANALOG=m
+CONFIG_JOYSTICK_A3D=m
+CONFIG_JOYSTICK_ADI=m
+CONFIG_JOYSTICK_COBRA=m
+CONFIG_JOYSTICK_GF2K=m
+CONFIG_JOYSTICK_GRIP=m
+CONFIG_JOYSTICK_GRIP_MP=m
+CONFIG_JOYSTICK_GUILLEMOT=m
+CONFIG_JOYSTICK_INTERACT=m
+CONFIG_JOYSTICK_SIDEWINDER=m
+CONFIG_JOYSTICK_TMDC=m
+CONFIG_JOYSTICK_IFORCE=m
+CONFIG_JOYSTICK_IFORCE_USB=y
+CONFIG_JOYSTICK_IFORCE_232=y
+CONFIG_JOYSTICK_WARRIOR=m
+CONFIG_JOYSTICK_MAGELLAN=m
+CONFIG_JOYSTICK_SPACEORB=m
+CONFIG_JOYSTICK_SPACEBALL=m
+CONFIG_JOYSTICK_STINGER=m
+CONFIG_JOYSTICK_TWIDDLER=m
+CONFIG_JOYSTICK_DB9=m
+CONFIG_JOYSTICK_GAMECON=m
+CONFIG_JOYSTICK_TURBOGRAFX=m
+# CONFIG_INPUT_JOYDUMP is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_GUNZE=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=y
+CONFIG_INPUT_UINPUT=m
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_ECC=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_ROCKETPORT=m
+CONFIG_SYNCLINK=m
+CONFIG_SYNCLINKMP=m
+CONFIG_N_HDLC=m
+CONFIG_STALDRV=y
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_CS=m
+# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+CONFIG_SERIAL_8250_MULTIPORT=y
+CONFIG_SERIAL_8250_RSA=y
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_ICOM is not set
+CONFIG_SERIAL_JSM=m
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_PRINTER=m
+# CONFIG_LP_CONSOLE is not set
+CONFIG_PPDEV=m
+CONFIG_TIPAR=m
+CONFIG_QIC02_TAPE=m
+CONFIG_QIC02_DYNCONF=y
+
+#
+# Setting runtime QIC-02 configuration is done with qic02conf
+#
+
+#
+# from the tpqic02-support package.  It is available at
+#
+
+#
+# metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/
+#
+
+#
+# IPMI
+#
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_PANIC_EVENT=y
+CONFIG_IPMI_PANIC_STRING=y
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_KCS=m
+CONFIG_IPMI_WATCHDOG=m
+
+#
+# Watchdog Cards
+#
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_ACQUIRE_WDT=m
+CONFIG_ADVANTECH_WDT=m
+CONFIG_ALIM1535_WDT=m
+CONFIG_ALIM7101_WDT=m
+CONFIG_AMD7XX_TCO=m
+CONFIG_SC520_WDT=m
+CONFIG_EUROTECH_WDT=m
+CONFIG_IB700_WDT=m
+CONFIG_WAFER_WDT=m
+CONFIG_I8XX_TCO=m
+CONFIG_SC1200_WDT=m
+CONFIG_SCx200_WDT=m
+CONFIG_60XX_WDT=m
+CONFIG_CPU5_WDT=m
+CONFIG_W83627HF_WDT=m
+CONFIG_W83877F_WDT=m
+CONFIG_MACHZ_WDT=m
+
+#
+# ISA-based Watchdog Cards
+#
+CONFIG_PCWATCHDOG=m
+CONFIG_MIXCOMWD=m
+CONFIG_WDT=m
+CONFIG_WDT_501=y
+
+#
+# PCI-based Watchdog Cards
+#
+CONFIG_PCIPCWATCHDOG=m
+CONFIG_WDTPCI=m
+CONFIG_WDT_501_PCI=y
+
+#
+# USB-based Watchdog Cards
+#
+CONFIG_USBPCWATCHDOG=m
+CONFIG_HW_RANDOM=m
+CONFIG_NVRAM=m
+CONFIG_RTC=y
+CONFIG_DTLK=m
+CONFIG_R3964=m
+CONFIG_APPLICOM=m
+CONFIG_SONYPI=m
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=m
+CONFIG_AGP_ALI=m
+CONFIG_AGP_ATI=m
+CONFIG_AGP_AMD=m
+CONFIG_AGP_AMD64=m
+CONFIG_AGP_INTEL=m
+CONFIG_AGP_INTEL_MCH=m
+CONFIG_AGP_NVIDIA=m
+CONFIG_AGP_SIS=m
+CONFIG_AGP_SWORKS=m
+CONFIG_AGP_VIA=m
+CONFIG_AGP_EFFICEON=m
+# CONFIG_DRM is not set
+
+#
+# PCMCIA character devices
+#
+CONFIG_SYNCLINK_CS=m
+# CONFIG_MWAVE is not set
+CONFIG_SCx200_GPIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_MAX_RAW_DEVS=4096
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_VTUNE=m
+
+#
+# Linux InfraRed Controller
+#
+CONFIG_LIRC_SUPPORT=m
+CONFIG_LIRC_MAX_DEV=2
+CONFIG_LIRC_BT829=m
+CONFIG_LIRC_IT87=m
+CONFIG_LIRC_ATIUSB=m
+CONFIG_LIRC_SERIAL=m
+# CONFIG_LIRC_HOMEBREW is not set
+CONFIG_LIRC_PORT_SERIAL=0x3f8
+CONFIG_LIRC_IRQ_SERIAL=4
+CONFIG_LIRC_SIR=m
+CONFIG_LIRC_PORT_SIR=0x3f8
+CONFIG_LIRC_IRQ_SIR=4
+
+#
+# I2C support
+#
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCF=m
+
+#
+# I2C Hardware Bus support
+#
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_I810=m
+CONFIG_I2C_ISA=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_PROSAVAGE=m
+CONFIG_I2C_SAVAGE4=m
+CONFIG_SCx200_I2C=m
+CONFIG_SCx200_I2C_SCL=12
+CONFIG_SCx200_I2C_SDA=13
+CONFIG_SCx200_ACB=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+
+#
+# Hardware Sensors Chip support
+#
+CONFIG_I2C_SENSOR=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_DS1621=m
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83627HF=m
+
+#
+# Other I2C Chip support
+#
+CONFIG_SENSORS_EEPROM=m
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+CONFIG_IBM_ASM=m
+
+#
+# Multimedia devices
+#
+CONFIG_VIDEO_DEV=m
+
+#
+# Video For Linux
+#
+
+#
+# Video Adapters
+#
+CONFIG_VIDEO_BT848=m
+CONFIG_VIDEO_PMS=m
+CONFIG_VIDEO_BWQCAM=m
+CONFIG_VIDEO_CQCAM=m
+CONFIG_VIDEO_W9966=m
+CONFIG_VIDEO_CPIA=m
+CONFIG_VIDEO_CPIA_PP=m
+CONFIG_VIDEO_CPIA_USB=m
+CONFIG_VIDEO_SAA5246A=m
+CONFIG_VIDEO_SAA5249=m
+CONFIG_TUNER_3036=m
+CONFIG_VIDEO_STRADIS=m
+CONFIG_VIDEO_ZORAN=m
+CONFIG_VIDEO_ZORAN_BUZ=m
+CONFIG_VIDEO_ZORAN_DC10=m
+CONFIG_VIDEO_ZORAN_DC30=m
+CONFIG_VIDEO_ZORAN_LML33=m
+CONFIG_VIDEO_ZORAN_LML33R10=m
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_MXB=m
+CONFIG_VIDEO_DPC=m
+CONFIG_VIDEO_HEXIUM_ORION=m
+CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_CX88=m
+
+#
+# Radio Adapters
+#
+CONFIG_RADIO_CADET=m
+CONFIG_RADIO_RTRACK=m
+CONFIG_RADIO_RTRACK2=m
+CONFIG_RADIO_AZTECH=m
+CONFIG_RADIO_GEMTEK=m
+CONFIG_RADIO_GEMTEK_PCI=m
+CONFIG_RADIO_MAXIRADIO=m
+CONFIG_RADIO_MAESTRO=m
+CONFIG_RADIO_MIROPCM20=m
+# CONFIG_RADIO_MIROPCM20_RDS is not set
+CONFIG_RADIO_SF16FMI=m
+CONFIG_RADIO_SF16FMR2=m
+CONFIG_RADIO_TERRATEC=m
+CONFIG_RADIO_TRUST=m
+CONFIG_RADIO_TYPHOON=m
+CONFIG_RADIO_TYPHOON_PROC_FS=y
+CONFIG_RADIO_ZOLTRIX=m
+
+#
+# Digital Video Broadcasting Devices
+#
+CONFIG_DVB=y
+CONFIG_DVB_CORE=m
+
+#
+# Supported Frontend Modules
+#
+CONFIG_DVB_TWINHAN_DST=m
+CONFIG_DVB_STV0299=m
+CONFIG_DVB_SP887X=m
+CONFIG_DVB_SP887X_FIRMWARE_FILE="/etc/dvb/sc_main.mc"
+CONFIG_DVB_ALPS_TDLB7=m
+CONFIG_DVB_ALPS_TDMB7=m
+CONFIG_DVB_ATMEL_AT76C651=m
+CONFIG_DVB_CX24110=m
+CONFIG_DVB_GRUNDIG_29504_491=m
+CONFIG_DVB_GRUNDIG_29504_401=m
+CONFIG_DVB_MT312=m
+CONFIG_DVB_VES1820=m
+CONFIG_DVB_VES1X93=m
+CONFIG_DVB_TDA1004X=m
+CONFIG_DVB_TDA1004X_FIRMWARE_FILE="/usr/lib/hotplug/firmware/tda1004x.bin"
+CONFIG_DVB_NXT6000=m
+
+#
+# Supported SAA7146 based PCI Adapters
+#
+CONFIG_DVB_AV7110=m
+# CONFIG_DVB_AV7110_FIRMWARE is not set
+CONFIG_DVB_AV7110_OSD=y
+CONFIG_DVB_BUDGET=m
+CONFIG_DVB_BUDGET_CI=m
+CONFIG_DVB_BUDGET_AV=m
+CONFIG_DVB_BUDGET_PATCH=m
+
+#
+# Supported USB Adapters
+#
+CONFIG_DVB_TTUSB_BUDGET=m
+CONFIG_DVB_TTUSB_DEC=m
+
+#
+# Supported FlexCopII (B2C2) Adapters
+#
+CONFIG_DVB_B2C2_SKYSTAR=m
+
+#
+# Supported BT878 Adapters
+#
+CONFIG_DVB_BT8XX=m
+CONFIG_VIDEO_SAA7146=m
+CONFIG_VIDEO_SAA7146_VV=m
+CONFIG_VIDEO_VIDEOBUF=m
+CONFIG_VIDEO_TUNER=m
+CONFIG_VIDEO_BUF=m
+CONFIG_VIDEO_BTCX=m
+CONFIG_VIDEO_IR=m
+
+#
+# Graphics support
+#
+CONFIG_FB=y
+CONFIG_FB_PM2=m
+CONFIG_FB_PM2_FIFO_DISCONNECT=y
+CONFIG_FB_CYBER2000=m
+CONFIG_FB_IMSTT=y
+CONFIG_FB_VGA16=m
+CONFIG_FB_VESA=y
+CONFIG_VIDEO_SELECT=y
+CONFIG_FB_HGA=m
+CONFIG_FB_RIVA=m
+CONFIG_FB_I810=m
+CONFIG_FB_I810_GTF=y
+# CONFIG_FB_MATROX is not set
+# CONFIG_FB_RADEON_OLD is not set
+CONFIG_FB_RADEON=m
+CONFIG_FB_RADEON_I2C=y
+# CONFIG_FB_RADEON_DEBUG is not set
+# CONFIG_FB_ATY128 is not set
+CONFIG_FB_ATY=m
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_ATY_XL_INIT=y
+CONFIG_FB_SIS=m
+CONFIG_FB_SIS_300=y
+CONFIG_FB_SIS_315=y
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_KYRO=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_VOODOO1=m
+CONFIG_FB_TRIDENT=m
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_MDA_CONSOLE=m
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_PCI_CONSOLE=y
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+
+#
+# Logo configuration
+#
+# CONFIG_LOGO is not set
+
+#
+# Bootsplash configuration
+#
+CONFIG_BOOTSPLASH=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_RTCTIMER=m
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DEBUG=y
+CONFIG_SND_DEBUG_MEMORY=y
+# CONFIG_SND_DEBUG_DETECT is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_OPL4_LIB=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+
+#
+# ISA devices
+#
+CONFIG_SND_AD1816A=m
+CONFIG_SND_AD1848=m
+CONFIG_SND_CS4231=m
+CONFIG_SND_CS4232=m
+CONFIG_SND_CS4236=m
+CONFIG_SND_ES968=m
+CONFIG_SND_ES1688=m
+CONFIG_SND_ES18XX=m
+CONFIG_SND_GUSCLASSIC=m
+CONFIG_SND_GUSEXTREME=m
+CONFIG_SND_GUSMAX=m
+CONFIG_SND_INTERWAVE=m
+CONFIG_SND_INTERWAVE_STB=m
+CONFIG_SND_OPTI92X_AD1848=m
+CONFIG_SND_OPTI92X_CS4231=m
+CONFIG_SND_OPTI93X=m
+CONFIG_SND_SB8=m
+CONFIG_SND_SB16=m
+CONFIG_SND_SBAWE=m
+CONFIG_SND_SB16_CSP=y
+CONFIG_SND_WAVEFRONT=m
+CONFIG_SND_ALS100=m
+CONFIG_SND_AZT2320=m
+CONFIG_SND_CMI8330=m
+CONFIG_SND_DT019X=m
+CONFIG_SND_OPL3SA2=m
+CONFIG_SND_SGALAXY=m
+CONFIG_SND_SSCAPE=m
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS4281=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_HDSP=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_YMFPCI=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VX222=m
+
+#
+# ALSA USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+
+#
+# PCMCIA devices
+#
+# CONFIG_SND_VXPOCKET is not set
+# CONFIG_SND_VXP440 is not set
+# CONFIG_SND_PDAUDIOCF is not set
+
+#
+# Open Sound System
+#
+CONFIG_SOUND_PRIME=m
+CONFIG_SOUND_BT878=m
+CONFIG_SOUND_CMPCI=m
+CONFIG_SOUND_CMPCI_FM=y
+CONFIG_SOUND_CMPCI_FMIO=0x388
+CONFIG_SOUND_CMPCI_MIDI=y
+CONFIG_SOUND_CMPCI_MPUIO=0x330
+CONFIG_SOUND_CMPCI_JOYSTICK=y
+CONFIG_SOUND_CMPCI_CM8738=y
+# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set
+CONFIG_SOUND_CMPCI_SPDIFLOOP=y
+CONFIG_SOUND_CMPCI_SPEAKERS=2
+CONFIG_SOUND_EMU10K1=m
+CONFIG_MIDI_EMU10K1=y
+# CONFIG_SOUND_FUSION is not set
+CONFIG_SOUND_CS4281=m
+CONFIG_SOUND_ES1370=m
+CONFIG_SOUND_ES1371=m
+CONFIG_SOUND_ESSSOLO1=m
+CONFIG_SOUND_MAESTRO=m
+CONFIG_SOUND_MAESTRO3=m
+CONFIG_SOUND_ICH=m
+CONFIG_SOUND_SONICVIBES=m
+CONFIG_SOUND_TRIDENT=m
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+CONFIG_MIDI_VIA82CXXX=y
+CONFIG_SOUND_OSS=m
+CONFIG_SOUND_TRACEINIT=y
+CONFIG_SOUND_DMAP=y
+# CONFIG_SOUND_AD1816 is not set
+CONFIG_SOUND_AD1889=m
+CONFIG_SOUND_SGALAXY=m
+CONFIG_SOUND_ADLIB=m
+CONFIG_SOUND_ACI_MIXER=m
+CONFIG_SOUND_CS4232=m
+CONFIG_SOUND_SSCAPE=m
+CONFIG_SOUND_GUS=m
+# CONFIG_SOUND_GUS16 is not set
+CONFIG_SOUND_GUSMAX=y
+CONFIG_SOUND_VMIDI=m
+CONFIG_SOUND_TRIX=m
+CONFIG_SOUND_MSS=m
+CONFIG_SOUND_MPU401=m
+CONFIG_SOUND_NM256=m
+CONFIG_SOUND_MAD16=m
+CONFIG_MAD16_OLDCARD=y
+CONFIG_SOUND_PAS=m
+CONFIG_SOUND_PSS=m
+CONFIG_PSS_MIXER=y
+# CONFIG_PSS_HAVE_BOOT is not set
+CONFIG_SOUND_SB=m
+# CONFIG_SOUND_AWE32_SYNTH is not set
+CONFIG_SOUND_WAVEFRONT=m
+CONFIG_SOUND_MAUI=m
+CONFIG_SOUND_YM3812=m
+CONFIG_SOUND_OPL3SA1=m
+CONFIG_SOUND_OPL3SA2=m
+CONFIG_SOUND_YMFPCI=m
+CONFIG_SOUND_YMFPCI_LEGACY=y
+CONFIG_SOUND_UART6850=m
+CONFIG_SOUND_AEDSP16=m
+CONFIG_SC6600=y
+CONFIG_SC6600_JOY=y
+CONFIG_SC6600_CDROM=4
+CONFIG_SC6600_CDROMBASE=0x0
+# CONFIG_AEDSP16_MSS is not set
+# CONFIG_AEDSP16_SBPRO is not set
+CONFIG_AEDSP16_MPU401=y
+CONFIG_SOUND_TVMIXER=m
+CONFIG_SOUND_KAHLUA=m
+CONFIG_SOUND_ALI5455=m
+CONFIG_SOUND_FORTE=m
+CONFIG_SOUND_RME96XX=m
+CONFIG_SOUND_AD1980=m
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_EHCI_SPLIT_ISO=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+
+#
+# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+CONFIG_USB_STORAGE_HP8200e=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+CONFIG_HID_FF=y
+CONFIG_HID_PID=y
+CONFIG_LOGITECH_FF=y
+CONFIG_THRUSTMASTER_FF=y
+CONFIG_USB_HIDDEV=y
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+CONFIG_USB_AIPTEK=m
+CONFIG_USB_WACOM=m
+CONFIG_USB_KBTAB=m
+CONFIG_USB_POWERMATE=m
+CONFIG_USB_MTOUCH=m
+CONFIG_USB_XPAD=m
+CONFIG_USB_ATI_REMOTE=m
+
+#
+# USB Imaging devices
+#
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_HPUSBSCSI=m
+
+#
+# USB Multimedia devices
+#
+CONFIG_USB_DABUSB=m
+CONFIG_USB_VICAM=m
+CONFIG_USB_DSBR=m
+CONFIG_USB_IBMCAM=m
+CONFIG_USB_KONICAWC=m
+CONFIG_USB_OV511=m
+CONFIG_USB_SE401=m
+CONFIG_USB_STV680=m
+CONFIG_USB_W9968CF=m
+
+#
+# USB Network adaptors
+#
+CONFIG_USB_CATC=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_USBNET=m
+
+#
+# USB Host-to-Host Cables
+#
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_BELKIN=y
+CONFIG_USB_GENESYS=y
+CONFIG_USB_NET1080=y
+CONFIG_USB_PL2301=y
+
+#
+# Intelligent USB Devices/Gadgets
+#
+CONFIG_USB_ARMLINUX=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_ZAURUS=y
+CONFIG_USB_CDCETHER=y
+
+#
+# USB Network Adapters
+#
+CONFIG_USB_AX8817X=y
+
+#
+# USB port drivers
+#
+CONFIG_USB_USS720=m
+
+#
+# USB Serial Converter support
+#
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19=y
+CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_SAFE=m
+CONFIG_USB_SERIAL_SAFE_PADDED=y
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_EZUSB=y
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_EMI62=m
+CONFIG_USB_EMI26=m
+CONFIG_USB_TIGL=m
+CONFIG_USB_AUERSWALD=m
+CONFIG_USB_RIO500=m
+CONFIG_USB_LEGOTOWER=m
+CONFIG_USB_LCD=m
+CONFIG_USB_LED=m
+CONFIG_USB_CYTHERM=m
+CONFIG_USB_SPEEDTOUCH=m
+# CONFIG_USB_TEST is not set
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_SDP is not set
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_UDAPL_HELPER=m
+CONFIG_INFINIBAND_MELLANOX_HCA=m
+CONFIG_AUDIT=m
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=m
+CONFIG_JBD_DEBUG=y
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_DMAPI=y
+# CONFIG_JFS_DEBUG is not set
+CONFIG_JFS_STATISTICS=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_RT=y
+CONFIG_XFS_QUOTA=m
+CONFIG_XFS_DMAPI=y
+CONFIG_XFS_SECURITY=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_MINIX_FS=y
+CONFIG_ROMFS_FS=m
+CONFIG_DMAPI=m
+# CONFIG_DMAPI_DEBUG is not set
+CONFIG_QUOTA=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_QUOTACTL=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_UDF_FS=m
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_DEBUG is not set
+# CONFIG_NTFS_RW is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+# CONFIG_DEVFS_FS is not set
+CONFIG_DEVPTS_FS_XATTR=y
+CONFIG_DEVPTS_FS_SECURITY=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+CONFIG_RELAYFS_FS=m
+# CONFIG_KLOG_CHANNEL is not set
+
+#
+# Miscellaneous filesystems
+#
+CONFIG_ADFS_FS=m
+# CONFIG_ADFS_FS_RW is not set
+CONFIG_AFFS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_JFFS_FS=m
+CONFIG_JFFS_FS_VERBOSE=0
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+# CONFIG_JFFS2_FS_NAND is not set
+CONFIG_CRAMFS=m
+CONFIG_VXFS_FS=m
+CONFIG_HPFS_FS=m
+CONFIG_QNX4FS_FS=m
+# CONFIG_QNX4FS_RW is not set
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_ACL=y
+CONFIG_NFS_ACL_SUPPORT=y
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=y
+CONFIG_STATD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_SMB_NLS_REMOTE="cp850"
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_CODA_FS=m
+# CONFIG_CODA_FS_OLD_API is not set
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_AFS_FS=m
+CONFIG_RXRPC=m
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+# CONFIG_MINIX_SUBPARTITION is not set
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+# CONFIG_LDM_DEBUG is not set
+CONFIG_NEC98_PARTITION=y
+CONFIG_SGI_PARTITION=y
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_FSHOOKS=y
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
+# Kernel hacking
+#
+CONFIG_CRASH_DUMP=m
+CONFIG_KERNTYPES=y
+CONFIG_CRASH_DUMP_BLOCKDEV=m
+CONFIG_CRASH_DUMP_NETDEV=m
+# CONFIG_CRASH_DUMP_MEMDEV is not set
+CONFIG_CRASH_DUMP_COMPRESS_RLE=m
+CONFIG_CRASH_DUMP_COMPRESS_GZIP=m
+CONFIG_DEBUG_KERNEL=y
+CONFIG_EARLY_PRINTK=y
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUGREG is not set
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_KDB is not set
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+# CONFIG_HOOK is not set
+
+#
+# Security options
+#
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_CAPABILITIES=m
+CONFIG_SECURITY_ROOTPLUG=m
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+# CONFIG_SECURITY_SELINUX_MLS is not set
+
+#
+# IBM Crypto Hardware support
+#
+CONFIG_IBM_CRYPTO=m
+CONFIG_ICA_LEEDSLITE=m
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TEST=m
+
+#
+# Library routines
+#
+CONFIG_CRC32=y
+CONFIG_QSORT=y
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+
+#
+# Build options
+#
+CONFIG_SUSE_KERNEL=y
+CONFIG_CFGNAME="bigsmp"
+CONFIG_RELEASE="SLES9_SP1_BRANCH_2004110217390391"
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_PC=y
diff --git a/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch b/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch

new file mode 100644 (file)

index 0000000..81f62ff
--- /dev/null
+++ b/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch
@@ -0,0 +1,21 @@
+Index: linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091/kernel/sched.c
+===================================================================
+--- linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091.orig/kernel/sched.c  2004-10-22 15:25:05.000000000 -0400
++++ linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091/kernel/sched.c       2004-10-22 15:39:18.000000000 -0400
+@@ -3147,7 +3147,7 @@
+       return list_entry(p->sibling.next,struct task_struct,sibling);
+ }
+ 
+-static void show_task(task_t * p)
++void show_task(task_t * p)
+ {
+       task_t *relative;
+       unsigned state;
+@@ -3200,6 +3200,7 @@
+       if (state != TASK_RUNNING)
+               show_stack(p, NULL);
+ }
++EXPORT_SYMBOL(show_task);
+ 
+ void show_state(void)
+ {
diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch

new file mode 100644 (file)

index 0000000..cad7b54
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch
@@ -0,0 +1,2844 @@
+%patch
+Index: linux-2.6.5-sles9/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/extents.c   2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/extents.c        2004-11-09 02:25:56.143726112 +0300
+@@ -0,0 +1,2313 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
++ */
++
++/*
++ * Extents support for EXT3
++ *
++ * TODO:
++ *   - ext3_ext_walk_space() sould not use ext3_ext_find_extent()
++ *   - ext3_ext_calc_credits() could take 'mergable' into account
++ *   - ext3*_error() should be used in some situations
++ *   - find_goal() [to be tested and improved]
++ *   - smart tree reduction
++ *   - arch-independence
++ *     common on-disk format for big/little-endian arch
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/time.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/smp_lock.h>
++#include <linux/highuid.h>
++#include <linux/pagemap.h>
++#include <linux/quotaops.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ext3_extents.h>
++#include <asm/uaccess.h>
++
++static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
++{
++      int err;
++
++      if (handle->h_buffer_credits > needed)
++              return handle;
++      if (!ext3_journal_extend(handle, needed))
++              return handle;
++      err = ext3_journal_restart(handle, needed);
++      
++      return handle;
++}
++
++static int inline
++ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree)
++{
++      if (tree->ops->get_write_access)
++              return tree->ops->get_write_access(h,tree->buffer);
++      else
++              return 0;
++}
++
++static int inline
++ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree)
++{
++      if (tree->ops->mark_buffer_dirty)
++              return tree->ops->mark_buffer_dirty(h,tree->buffer);
++      else
++              return 0;
++}
++
++/*
++ * could return:
++ *  - EROFS
++ *  - ENOMEM
++ */
++static int ext3_ext_get_access(handle_t *handle,
++                              struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++      int err;
++
++      if (path->p_bh) {
++              /* path points to block */
++              err = ext3_journal_get_write_access(handle, path->p_bh);
++      } else {
++              /* path points to leaf/index in inode body */
++              err = ext3_ext_get_access_for_root(handle, tree);
++      }
++      return err;
++}
++
++/*
++ * could return:
++ *  - EROFS
++ *  - ENOMEM
++ *  - EIO
++ */
++static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++      int err;
++      if (path->p_bh) {
++              /* path points to block */
++              err =ext3_journal_dirty_metadata(handle, path->p_bh);
++      } else {
++              /* path points to leaf/index in inode body */
++              err = ext3_ext_mark_root_dirty(handle, tree);
++      }
++      return err;
++}
++
++static int inline
++ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, struct ext3_extent *ex,
++                      int *err)
++{
++      int goal, depth, newblock;
++      struct inode *inode;
++
++      EXT_ASSERT(tree);
++      if (tree->ops->new_block)
++              return tree->ops->new_block(handle, tree, path, ex, err);
++
++      inode = tree->inode;
++      depth = EXT_DEPTH(tree);
++      if (path && depth > 0) {
++              goal = path[depth-1].p_block;
++      } else {
++              struct ext3_inode_info *ei = EXT3_I(inode);
++              unsigned long bg_start;
++              unsigned long colour;
++
++              bg_start = (ei->i_block_group *
++                              EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++                      le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++              colour = (current->pid % 16) *
++                      (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++              goal = bg_start + colour;
++      }
++
++      newblock = ext3_new_block(handle, inode, goal, err);
++      return newblock;
++}
++
++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
++{
++      struct ext3_extent_header *neh;
++      neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation++;
++}
++
++static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
++{
++      int size;
++
++      size = (tree->inode->i_sb->s_blocksize -
++                      sizeof(struct ext3_extent_header))
++                              / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++      size = 6;
++#endif
++      return size;
++}
++
++static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree)
++{
++      int size;
++
++      size = (tree->inode->i_sb->s_blocksize -
++                      sizeof(struct ext3_extent_header))
++                              / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++      size = 5;
++#endif
++      return size;
++}
++
++static inline int ext3_ext_space_root(struct ext3_extents_tree *tree)
++{
++      int size;
++
++      size = (tree->buffer_len - sizeof(struct ext3_extent_header))
++                      / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++      size = 3;
++#endif
++      return size;
++}
++
++static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree)
++{
++      int size;
++
++      size = (tree->buffer_len -
++                      sizeof(struct ext3_extent_header))
++                      / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++      size = 4;
++#endif
++      return size;
++}
++
++static void ext3_ext_show_path(struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++#ifdef EXT_DEBUG
++      int k, l = path->p_depth;
++
++      ext_debug(tree, "path:");
++      for (k = 0; k <= l; k++, path++) {
++              if (path->p_idx) {
++                      ext_debug(tree, "  %d->%d", path->p_idx->ei_block,
++                                      path->p_idx->ei_leaf);
++              } else if (path->p_ext) {
++                      ext_debug(tree, "  %d:%d:%d",
++                                      path->p_ext->ee_block,
++                                      path->p_ext->ee_len,
++                                      path->p_ext->ee_start);
++              } else
++                      ext_debug(tree, "  []");
++      }
++      ext_debug(tree, "\n");
++#endif
++}
++
++static void ext3_ext_show_leaf(struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++#ifdef EXT_DEBUG
++      int depth = EXT_DEPTH(tree);
++      struct ext3_extent_header *eh;
++      struct ext3_extent *ex;
++      int i;
++
++      if (!path)
++              return;
++
++      eh = path[depth].p_hdr;
++      ex = EXT_FIRST_EXTENT(eh);
++
++      for (i = 0; i < eh->eh_entries; i++, ex++) {
++              ext_debug(tree, "%d:%d:%d ",
++                              ex->ee_block, ex->ee_len, ex->ee_start);
++      }
++      ext_debug(tree, "\n");
++#endif
++}
++
++static void ext3_ext_drop_refs(struct ext3_ext_path *path)
++{
++      int depth = path->p_depth;
++      int i;
++
++      for (i = 0; i <= depth; i++, path++)
++              if (path->p_bh) {
++                      brelse(path->p_bh);
++                      path->p_bh = NULL;
++              }
++}
++
++/*
++ * binary search for closest index by given block
++ */
++static inline void
++ext3_ext_binsearch_idx(struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, int block)
++{
++      struct ext3_extent_header *eh = path->p_hdr;
++      struct ext3_extent_idx *ix;
++      int l = 0, k, r;
++
++      EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++      EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++      EXT_ASSERT(eh->eh_entries > 0);
++
++      ext_debug(tree, "binsearch for %d(idx):  ", block);
++
++      path->p_idx = ix = EXT_FIRST_INDEX(eh);
++
++      r = k = eh->eh_entries;
++      while (k > 1) {
++              k = (r - l) / 2;
++              if (block < ix[l + k].ei_block)
++                      r -= k;
++              else
++                      l += k;
++              ext_debug(tree, "%d:%d:%d ", k, l, r);
++      }
++
++      ix += l;
++      path->p_idx = ix;
++      ext_debug(tree, "  -> %d->%d ", path->p_idx->ei_block, path->p_idx->ei_leaf);
++
++      while (l++ < r) {
++              if (block < ix->ei_block) 
++                      break;
++              path->p_idx = ix++;
++      }
++      ext_debug(tree, "  -> %d->%d\n", path->p_idx->ei_block,
++                      path->p_idx->ei_leaf);
++
++#ifdef CHECK_BINSEARCH 
++      {
++              struct ext3_extent_idx *chix;
++
++              chix = ix = EXT_FIRST_INDEX(eh);
++              for (k = 0; k < eh->eh_entries; k++, ix++) {
++                      if (k != 0 && ix->ei_block <= ix[-1].ei_block) {
++                              printk("k=%d, ix=0x%p, first=0x%p\n", k,
++                                      ix, EXT_FIRST_INDEX(eh));
++                              printk("%u <= %u\n",
++                                      ix->ei_block,ix[-1].ei_block);
++                      }
++                      EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block);
++                      if (block < ix->ei_block) 
++                              break;
++                      chix = ix;
++              }
++              EXT_ASSERT(chix == path->p_idx);
++      }
++#endif
++
++}
++
++/*
++ * binary search for closest extent by given block
++ */
++static inline void
++ext3_ext_binsearch(struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, int block)
++{
++      struct ext3_extent_header *eh = path->p_hdr;
++      struct ext3_extent *ex;
++      int l = 0, k, r;
++
++      EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++      EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++
++      if (eh->eh_entries == 0) {
++              /*
++               * this leaf is empty yet:
++               *  we get such a leaf in split/add case
++               */
++              return;
++      }
++      
++      ext_debug(tree, "binsearch for %d:  ", block);
++
++      path->p_ext = ex = EXT_FIRST_EXTENT(eh);
++
++      r = k = eh->eh_entries;
++      while (k > 1) {
++              k = (r - l) / 2;
++              if (block < ex[l + k].ee_block)
++                      r -= k;
++              else
++                      l += k;
++              ext_debug(tree, "%d:%d:%d ", k, l, r);
++      }
++
++      ex += l;
++      path->p_ext = ex;
++      ext_debug(tree, "  -> %d:%d:%d ", path->p_ext->ee_block,
++                      path->p_ext->ee_start, path->p_ext->ee_len);
++
++      while (l++ < r) {
++              if (block < ex->ee_block) 
++                      break;
++              path->p_ext = ex++;
++      }
++      ext_debug(tree, "  -> %d:%d:%d\n", path->p_ext->ee_block,
++                      path->p_ext->ee_start, path->p_ext->ee_len);
++
++#ifdef CHECK_BINSEARCH 
++      {
++              struct ext3_extent *chex;
++
++              chex = ex = EXT_FIRST_EXTENT(eh);
++              for (k = 0; k < eh->eh_entries; k++, ex++) {
++                      EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block);
++                      if (block < ex->ee_block) 
++                              break;
++                      chex = ex;
++              }
++              EXT_ASSERT(chex == path->p_ext);
++      }
++#endif
++
++}
++
++int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree)
++{
++      struct ext3_extent_header *eh;
++
++      BUG_ON(tree->buffer_len == 0);
++      ext3_ext_get_access_for_root(handle, tree);
++      eh = EXT_ROOT_HDR(tree);
++      eh->eh_depth = 0;
++      eh->eh_entries = 0;
++      eh->eh_magic = EXT3_EXT_MAGIC;
++      eh->eh_max = ext3_ext_space_root(tree);
++      ext3_ext_mark_root_dirty(handle, tree);
++      ext3_ext_invalidate_cache(tree);
++      return 0;
++}
++
++struct ext3_ext_path *
++ext3_ext_find_extent(struct ext3_extents_tree *tree, int block,
++                      struct ext3_ext_path *path)
++{
++      struct ext3_extent_header *eh;
++      struct buffer_head *bh;
++      int depth, i, ppos = 0;
++
++      EXT_ASSERT(tree);
++      EXT_ASSERT(tree->inode);
++      EXT_ASSERT(tree->root);
++
++      eh = EXT_ROOT_HDR(tree);
++      EXT_ASSERT(eh);
++      i = depth = EXT_DEPTH(tree);
++      EXT_ASSERT(eh->eh_max);
++      EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++      EXT_ASSERT(i == 0 || eh->eh_entries > 0);
++      
++      /* account possible depth increase */
++      if (!path) {
++              path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2),
++                              GFP_NOFS);
++              if (!path)
++                      return ERR_PTR(-ENOMEM);
++      }
++      memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++      path[0].p_hdr = eh;
++
++      /* walk through the tree */
++      while (i) {
++              ext_debug(tree, "depth %d: num %d, max %d\n",
++                              ppos, eh->eh_entries, eh->eh_max);
++              ext3_ext_binsearch_idx(tree, path + ppos, block);
++              path[ppos].p_block = path[ppos].p_idx->ei_leaf;
++              path[ppos].p_depth = i;
++              path[ppos].p_ext = NULL;
++
++              bh = sb_bread(tree->inode->i_sb, path[ppos].p_block);
++              if (!bh) {
++                      ext3_ext_drop_refs(path);
++                      kfree(path);
++                      return ERR_PTR(-EIO);
++              }
++              eh = EXT_BLOCK_HDR(bh);
++              ppos++;
++              EXT_ASSERT(ppos <= depth);
++              path[ppos].p_bh = bh;
++              path[ppos].p_hdr = eh;
++              i--;
++      }
++
++      path[ppos].p_depth = i;
++      path[ppos].p_hdr = eh;
++      path[ppos].p_ext = NULL;
++
++      /* find extent */
++      ext3_ext_binsearch(tree, path + ppos, block);
++
++      ext3_ext_show_path(tree, path);
++
++      return path;
++}
++
++/*
++ * insert new index [logical;ptr] into the block at cupr
++ * it check where to insert: before curp or after curp
++ */
++static int ext3_ext_insert_index(handle_t *handle,
++                              struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *curp,
++                              int logical, int ptr)
++{
++      struct ext3_extent_idx *ix;
++      int len, err;
++
++      if ((err = ext3_ext_get_access(handle, tree, curp)))
++              return err;
++
++      EXT_ASSERT(logical != curp->p_idx->ei_block);
++      len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
++      if (logical > curp->p_idx->ei_block) {
++              /* insert after */
++              if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
++                      len = (len - 1) * sizeof(struct ext3_extent_idx);
++                      len = len < 0 ? 0 : len;
++                      ext_debug(tree, "insert new index %d after: %d. "
++                                      "move %d from 0x%p to 0x%p\n",
++                                      logical, ptr, len,
++                                      (curp->p_idx + 1), (curp->p_idx + 2));
++                      memmove(curp->p_idx + 2, curp->p_idx + 1, len);
++              }
++              ix = curp->p_idx + 1;
++      } else {
++              /* insert before */
++              len = len * sizeof(struct ext3_extent_idx);
++              len = len < 0 ? 0 : len;
++              ext_debug(tree, "insert new index %d before: %d. "
++                              "move %d from 0x%p to 0x%p\n",
++                              logical, ptr, len,
++                              curp->p_idx, (curp->p_idx + 1));
++              memmove(curp->p_idx + 1, curp->p_idx, len);
++              ix = curp->p_idx;
++      }
++
++      ix->ei_block = logical;
++      ix->ei_leaf = ptr;
++      curp->p_hdr->eh_entries++;
++
++      EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max);
++      EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr));
++
++      err = ext3_ext_dirty(handle, tree, curp);
++      ext3_std_error(tree->inode->i_sb, err);
++
++      return err;
++}
++
++/*
++ * routine inserts new subtree into the path, using free index entry
++ * at depth 'at:
++ *  - allocates all needed blocks (new leaf and all intermediate index blocks)
++ *  - makes decision where to split
++ *  - moves remaining extens and index entries (right to the split point)
++ *    into the newly allocated blocks
++ *  - initialize subtree
++ */
++static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path,
++                              struct ext3_extent *newext, int at)
++{
++      struct buffer_head *bh = NULL;
++      int depth = EXT_DEPTH(tree);
++      struct ext3_extent_header *neh;
++      struct ext3_extent_idx *fidx;
++      struct ext3_extent *ex;
++      int i = at, k, m, a;
++      unsigned long newblock, oldblock, border;
++      int *ablocks = NULL; /* array of allocated blocks */
++      int err = 0;
++
++      /* make decision: where to split? */
++      /* FIXME: now desicion is simplest: at current extent */
++
++      /* if current leaf will be splitted, then we should use 
++       * border from split point */
++      EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr));
++      if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
++              border = path[depth].p_ext[1].ee_block;
++              ext_debug(tree, "leaf will be splitted."
++                              " next leaf starts at %d\n",
++                              (int)border);
++      } else {
++              border = newext->ee_block;
++              ext_debug(tree, "leaf will be added."
++                              " next leaf starts at %d\n",
++                              (int)border);
++      }
++
++      /* 
++       * if error occurs, then we break processing
++       * and turn filesystem read-only. so, index won't
++       * be inserted and tree will be in consistent
++       * state. next mount will repair buffers too
++       */
++
++      /*
++       * get array to track all allocated blocks
++       * we need this to handle errors and free blocks
++       * upon them
++       */
++      ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
++      if (!ablocks)
++              return -ENOMEM;
++      memset(ablocks, 0, sizeof(unsigned long) * depth);
++
++      /* allocate all needed blocks */
++      ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at);
++      for (a = 0; a < depth - at; a++) {
++              newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
++              if (newblock == 0)
++                      goto cleanup;
++              ablocks[a] = newblock;
++      }
++
++      /* initialize new leaf */
++      newblock = ablocks[--a];
++      EXT_ASSERT(newblock);
++      bh = sb_getblk(tree->inode->i_sb, newblock);
++      if (!bh) {
++              err = -EIO;
++              goto cleanup;
++      }
++      lock_buffer(bh);
++
++      if ((err = ext3_journal_get_create_access(handle, bh)))
++              goto cleanup;
++
++      neh = EXT_BLOCK_HDR(bh);
++      neh->eh_entries = 0;
++      neh->eh_max = ext3_ext_space_block(tree);
++      neh->eh_magic = EXT3_EXT_MAGIC;
++      neh->eh_depth = 0;
++      ex = EXT_FIRST_EXTENT(neh);
++
++      /* move remain of path[depth] to the new leaf */
++      EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max);
++      /* start copy from next extent */
++      /* TODO: we could do it by single memmove */
++      m = 0;
++      path[depth].p_ext++;
++      while (path[depth].p_ext <=
++                      EXT_MAX_EXTENT(path[depth].p_hdr)) {
++              ext_debug(tree, "move %d:%d:%d in new leaf %lu\n",
++                              path[depth].p_ext->ee_block,
++                              path[depth].p_ext->ee_start,
++                              path[depth].p_ext->ee_len,
++                              newblock);
++              memmove(ex++, path[depth].p_ext++,
++                              sizeof(struct ext3_extent));
++              neh->eh_entries++;
++              m++;
++      }
++      set_buffer_uptodate(bh);
++      unlock_buffer(bh);
++
++      if ((err = ext3_journal_dirty_metadata(handle, bh)))
++              goto cleanup;   
++      brelse(bh);
++      bh = NULL;
++
++      /* correct old leaf */
++      if (m) {
++              if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++                      goto cleanup;
++              path[depth].p_hdr->eh_entries -= m;
++              if ((err = ext3_ext_dirty(handle, tree, path + depth)))
++                      goto cleanup;
++              
++      }
++
++      /* create intermediate indexes */
++      k = depth - at - 1;
++      EXT_ASSERT(k >= 0);
++      if (k)
++              ext_debug(tree, "create %d intermediate indices\n", k);
++      /* insert new index into current index block */
++      /* current depth stored in i var */
++      i = depth - 1;
++      while (k--) {
++              oldblock = newblock;
++              newblock = ablocks[--a];
++              bh = sb_getblk(tree->inode->i_sb, newblock);
++              if (!bh) {
++                      err = -EIO;
++                      goto cleanup;
++              }
++              lock_buffer(bh);
++
++              if ((err = ext3_journal_get_create_access(handle, bh)))
++                      goto cleanup;
++
++              neh = EXT_BLOCK_HDR(bh);
++              neh->eh_entries = 1;
++              neh->eh_magic = EXT3_EXT_MAGIC;
++              neh->eh_max = ext3_ext_space_block_idx(tree);
++              neh->eh_depth = depth - i; 
++              fidx = EXT_FIRST_INDEX(neh);
++              fidx->ei_block = border;
++              fidx->ei_leaf = oldblock;
++
++              ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n",
++                              i, newblock, border, oldblock);
++              /* copy indexes */
++              m = 0;
++              path[i].p_idx++;
++
++              ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx,
++                              EXT_MAX_INDEX(path[i].p_hdr));
++              EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
++                              EXT_LAST_INDEX(path[i].p_hdr));
++              while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
++                      ext_debug(tree, "%d: move %d:%d in new index %lu\n",
++                                      i, path[i].p_idx->ei_block,
++                                      path[i].p_idx->ei_leaf, newblock);
++                      memmove(++fidx, path[i].p_idx++,
++                                      sizeof(struct ext3_extent_idx));
++                      neh->eh_entries++;
++                      EXT_ASSERT(neh->eh_entries <= neh->eh_max);
++                      m++;
++              }
++              set_buffer_uptodate(bh);
++              unlock_buffer(bh);
++
++              if ((err = ext3_journal_dirty_metadata(handle, bh)))
++                      goto cleanup;
++              brelse(bh);
++              bh = NULL;
++
++              /* correct old index */
++              if (m) {
++                      err = ext3_ext_get_access(handle, tree, path + i);
++                      if (err)
++                              goto cleanup;
++                      path[i].p_hdr->eh_entries -= m;
++                      err = ext3_ext_dirty(handle, tree, path + i);
++                      if (err)
++                              goto cleanup;
++              }
++
++              i--;
++      }
++
++      /* insert new index */
++      if (!err)
++              err = ext3_ext_insert_index(handle, tree, path + at,
++                                              border, newblock);
++
++cleanup:
++      if (bh) {
++              if (buffer_locked(bh))
++                      unlock_buffer(bh);
++              brelse(bh);
++      }
++
++      if (err) {
++              /* free all allocated blocks in error case */
++              for (i = 0; i < depth; i++) {
++                      if (!ablocks[i])
++                              continue;
++                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++              }
++      }
++      kfree(ablocks);
++
++      return err;
++}
++
++/*
++ * routine implements tree growing procedure:
++ *  - allocates new block
++ *  - moves top-level data (index block or leaf) into the new block
++ *  - initialize new top-level, creating index that points to the
++ *    just created block
++ */
++static int ext3_ext_grow_indepth(handle_t *handle,
++                                      struct ext3_extents_tree *tree,
++                                      struct ext3_ext_path *path,
++                                      struct ext3_extent *newext)
++{
++      struct ext3_ext_path *curp = path;
++      struct ext3_extent_header *neh;
++      struct ext3_extent_idx *fidx;
++      struct buffer_head *bh;
++      unsigned long newblock;
++      int err = 0;
++
++      newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
++      if (newblock == 0)
++              return err;
++
++      bh = sb_getblk(tree->inode->i_sb, newblock);
++      if (!bh) {
++              err = -EIO;
++              ext3_std_error(tree->inode->i_sb, err);
++              return err;
++      }
++      lock_buffer(bh);
++
++      if ((err = ext3_journal_get_create_access(handle, bh))) {
++              unlock_buffer(bh);
++              goto out;       
++      }
++
++      /* move top-level index/leaf into new block */
++      memmove(bh->b_data, curp->p_hdr, tree->buffer_len);
++
++      /* set size of new block */
++      neh = EXT_BLOCK_HDR(bh);
++      /* old root could have indexes or leaves
++       * so calculate e_max right way */
++      if (EXT_DEPTH(tree))
++              neh->eh_max = ext3_ext_space_block_idx(tree);
++      else
++              neh->eh_max = ext3_ext_space_block(tree);
++      neh->eh_magic = EXT3_EXT_MAGIC;
++      set_buffer_uptodate(bh);
++      unlock_buffer(bh);
++
++      if ((err = ext3_journal_dirty_metadata(handle, bh)))
++              goto out;
++
++      /* create index in new top-level index: num,max,pointer */
++      if ((err = ext3_ext_get_access(handle, tree, curp)))
++              goto out;
++
++      curp->p_hdr->eh_magic = EXT3_EXT_MAGIC;
++      curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree);
++      curp->p_hdr->eh_entries = 1;
++      curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
++      /* FIXME: it works, but actually path[0] can be index */
++      curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
++      curp->p_idx->ei_leaf = newblock;
++
++      neh = EXT_ROOT_HDR(tree);
++      fidx = EXT_FIRST_INDEX(neh);
++      ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n",
++                      neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); 
++
++      neh->eh_depth = path->p_depth + 1;
++      err = ext3_ext_dirty(handle, tree, curp);
++out:
++      brelse(bh);
++
++      return err;
++}
++
++/*
++ * routine finds empty index and adds new leaf. if no free index found
++ * then it requests in-depth growing
++ */
++static int ext3_ext_create_new_leaf(handle_t *handle,
++                                      struct ext3_extents_tree *tree,
++                                      struct ext3_ext_path *path,
++                                      struct ext3_extent *newext)
++{
++      struct ext3_ext_path *curp;
++      int depth, i, err = 0;
++
++repeat:
++      i = depth = EXT_DEPTH(tree);
++      
++      /* walk up to the tree and look for free index entry */
++      curp = path + depth;
++      while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
++              i--;
++              curp--;
++      }
++
++      /* we use already allocated block for index block
++       * so, subsequent data blocks should be contigoues */
++      if (EXT_HAS_FREE_INDEX(curp)) {
++              /* if we found index with free entry, then use that
++               * entry: create all needed subtree and add new leaf */
++              err = ext3_ext_split(handle, tree, path, newext, i);
++
++              /* refill path */
++              ext3_ext_drop_refs(path);
++              path = ext3_ext_find_extent(tree, newext->ee_block, path);
++              if (IS_ERR(path))
++                      err = PTR_ERR(path);
++      } else {
++              /* tree is full, time to grow in depth */
++              err = ext3_ext_grow_indepth(handle, tree, path, newext);
++
++              /* refill path */
++              ext3_ext_drop_refs(path);
++              path = ext3_ext_find_extent(tree, newext->ee_block, path);
++              if (IS_ERR(path))
++                      err = PTR_ERR(path);
++
++              /*
++               * only first (depth 0 -> 1) produces free space
++               * in all other cases we have to split growed tree
++               */
++              depth = EXT_DEPTH(tree);
++              if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
++                      /* now we need split */
++                      goto repeat;
++              }
++      }
++
++      if (err)
++              return err;
++
++      return 0;
++}
++
++/*
++ * returns allocated block in subsequent extent or EXT_MAX_BLOCK
++ * NOTE: it consider block number from index entry as
++ * allocated block. thus, index entries have to be consistent
++ * with leafs
++ */
++static unsigned long
++ext3_ext_next_allocated_block(struct ext3_ext_path *path)
++{
++      int depth;
++
++      EXT_ASSERT(path != NULL);
++      depth = path->p_depth;
++
++      if (depth == 0 && path->p_ext == NULL)
++              return EXT_MAX_BLOCK;
++
++      /* FIXME: what if index isn't full ?! */
++      while (depth >= 0) {
++              if (depth == path->p_depth) {
++                      /* leaf */
++                      if (path[depth].p_ext !=
++                                      EXT_LAST_EXTENT(path[depth].p_hdr))
++                              return path[depth].p_ext[1].ee_block;
++              } else {
++                      /* index */
++                      if (path[depth].p_idx !=
++                                      EXT_LAST_INDEX(path[depth].p_hdr))
++                              return path[depth].p_idx[1].ei_block;
++              }
++              depth--;        
++      }
++
++      return EXT_MAX_BLOCK;
++}
++
++/*
++ * returns first allocated block from next leaf or EXT_MAX_BLOCK
++ */
++static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree,
++                                               struct ext3_ext_path *path)
++{
++      int depth;
++
++      EXT_ASSERT(path != NULL);
++      depth = path->p_depth;
++
++      /* zero-tree has no leaf blocks at all */
++      if (depth == 0)
++              return EXT_MAX_BLOCK;
++
++      /* go to index block */
++      depth--;
++      
++      while (depth >= 0) {
++              if (path[depth].p_idx !=
++                              EXT_LAST_INDEX(path[depth].p_hdr))
++                      return path[depth].p_idx[1].ei_block;
++              depth--;        
++      }
++
++      return EXT_MAX_BLOCK;
++}
++
++/*
++ * if leaf gets modified and modified extent is first in the leaf
++ * then we have to correct all indexes above
++ * TODO: do we need to correct tree in all cases?
++ */
++int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path)
++{
++      struct ext3_extent_header *eh;
++      int depth = EXT_DEPTH(tree);    
++      struct ext3_extent *ex;
++      unsigned long border;
++      int k, err = 0;
++      
++      eh = path[depth].p_hdr;
++      ex = path[depth].p_ext;
++      EXT_ASSERT(ex);
++      EXT_ASSERT(eh);
++      
++      if (depth == 0) {
++              /* there is no tree at all */
++              return 0;
++      }
++      
++      if (ex != EXT_FIRST_EXTENT(eh)) {
++              /* we correct tree if first leaf got modified only */
++              return 0;
++      }
++      
++      /*
++       * TODO: we need correction if border is smaller then current one
++       */
++      k = depth - 1;
++      border = path[depth].p_ext->ee_block;
++      if ((err = ext3_ext_get_access(handle, tree, path + k)))
++              return err;
++      path[k].p_idx->ei_block = border;
++      if ((err = ext3_ext_dirty(handle, tree, path + k)))
++              return err;
++
++      while (k--) {
++              /* change all left-side indexes */
++              if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
++                      break;
++              if ((err = ext3_ext_get_access(handle, tree, path + k)))
++                      break;
++              path[k].p_idx->ei_block = border;
++              if ((err = ext3_ext_dirty(handle, tree, path + k)))
++                      break;
++      }
++
++      return err;
++}
++
++static int inline
++ext3_can_extents_be_merged(struct ext3_extents_tree *tree,
++                              struct ext3_extent *ex1,
++                              struct ext3_extent *ex2)
++{
++      if (ex1->ee_block + ex1->ee_len != ex2->ee_block)
++              return 0;
++
++#ifdef AGRESSIVE_TEST
++      if (ex1->ee_len >= 4)
++              return 0;
++#endif
++
++      if (!tree->ops->mergable)
++              return 1;
++
++      return tree->ops->mergable(ex1, ex2);
++}
++
++/*
++ * this routine tries to merge requsted extent into the existing
++ * extent or inserts requested extent as new one into the tree,
++ * creating new leaf in no-space case
++ */
++int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path,
++                              struct ext3_extent *newext)
++{
++      struct ext3_extent_header * eh;
++      struct ext3_extent *ex, *fex;
++      struct ext3_extent *nearex; /* nearest extent */
++      struct ext3_ext_path *npath = NULL;
++      int depth, len, err, next;
++
++      EXT_ASSERT(newext->ee_len > 0);
++      EXT_ASSERT(newext->ee_len < EXT_CACHE_MARK);
++      depth = EXT_DEPTH(tree);
++      ex = path[depth].p_ext;
++      EXT_ASSERT(path[depth].p_hdr);
++
++      /* try to insert block into found extent and return */
++      if (ex && ext3_can_extents_be_merged(tree, ex, newext)) {
++              ext_debug(tree, "append %d block to %d:%d (from %d)\n",
++                              newext->ee_len, ex->ee_block, ex->ee_len,
++                              ex->ee_start);
++              if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++                      return err;
++              ex->ee_len += newext->ee_len;
++              eh = path[depth].p_hdr;
++              nearex = ex;
++              goto merge;
++      }
++
++repeat:
++      depth = EXT_DEPTH(tree);
++      eh = path[depth].p_hdr;
++      if (eh->eh_entries < eh->eh_max)
++              goto has_space;
++
++      /* probably next leaf has space for us? */
++      fex = EXT_LAST_EXTENT(eh);
++      next = ext3_ext_next_leaf_block(tree, path);
++      if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) {
++              ext_debug(tree, "next leaf block - %d\n", next);
++              EXT_ASSERT(!npath);
++              npath = ext3_ext_find_extent(tree, next, NULL);
++              if (IS_ERR(npath))
++                      return PTR_ERR(npath);
++              EXT_ASSERT(npath->p_depth == path->p_depth);
++              eh = npath[depth].p_hdr;
++              if (eh->eh_entries < eh->eh_max) {
++                      ext_debug(tree, "next leaf isnt full(%d)\n",
++                                      eh->eh_entries);
++                      path = npath;
++                      goto repeat;
++              }
++              ext_debug(tree, "next leaf hasno free space(%d,%d)\n",
++                              eh->eh_entries, eh->eh_max);
++      }
++
++      /*
++       * there is no free space in found leaf
++       * we're gonna add new leaf in the tree
++       */
++      err = ext3_ext_create_new_leaf(handle, tree, path, newext);
++      if (err)
++              goto cleanup;
++      depth = EXT_DEPTH(tree);
++      eh = path[depth].p_hdr;
++
++has_space:
++      nearex = path[depth].p_ext;
++
++      if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++              goto cleanup;
++
++      if (!nearex) {
++              /* there is no extent in this leaf, create first one */
++              ext_debug(tree, "first extent in the leaf: %d:%d:%d\n",
++                              newext->ee_block, newext->ee_start,
++                              newext->ee_len);
++              path[depth].p_ext = EXT_FIRST_EXTENT(eh);
++      } else if (newext->ee_block > nearex->ee_block) {
++              EXT_ASSERT(newext->ee_block != nearex->ee_block);
++              if (nearex != EXT_LAST_EXTENT(eh)) {
++                      len = EXT_MAX_EXTENT(eh) - nearex;
++                      len = (len - 1) * sizeof(struct ext3_extent);
++                      len = len < 0 ? 0 : len;
++                      ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, "
++                                      "move %d from 0x%p to 0x%p\n",
++                                      newext->ee_block, newext->ee_start,
++                                      newext->ee_len,
++                                      nearex, len, nearex + 1, nearex + 2);
++                      memmove(nearex + 2, nearex + 1, len);
++              }
++              path[depth].p_ext = nearex + 1;
++      } else {
++              EXT_ASSERT(newext->ee_block != nearex->ee_block);
++              len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent);
++              len = len < 0 ? 0 : len;
++              ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, "
++                              "move %d from 0x%p to 0x%p\n",
++                              newext->ee_block, newext->ee_start, newext->ee_len,
++                              nearex, len, nearex + 1, nearex + 2);
++              memmove(nearex + 1, nearex, len);
++              path[depth].p_ext = nearex;
++      }
++
++      eh->eh_entries++;
++      nearex = path[depth].p_ext;
++      nearex->ee_block = newext->ee_block;
++      nearex->ee_start = newext->ee_start;
++      nearex->ee_len = newext->ee_len;
++      /* FIXME: support for large fs */
++      nearex->ee_start_hi = 0;
++
++merge:
++      /* try to merge extents to the right */
++      while (nearex < EXT_LAST_EXTENT(eh)) {
++              if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1))
++                      break;
++              /* merge with next extent! */
++              nearex->ee_len += nearex[1].ee_len;
++              if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
++                      len = (EXT_LAST_EXTENT(eh) - nearex - 1)
++                                      * sizeof(struct ext3_extent);
++                      memmove(nearex + 1, nearex + 2, len);
++              }
++              eh->eh_entries--;
++              EXT_ASSERT(eh->eh_entries > 0);
++      }
++
++      /* try to merge extents to the left */
++
++      /* time to correct all indexes above */
++      err = ext3_ext_correct_indexes(handle, tree, path);
++      if (err)
++              goto cleanup;
++
++      err = ext3_ext_dirty(handle, tree, path + depth);
++
++cleanup:
++      if (npath) {
++              ext3_ext_drop_refs(npath);
++              kfree(npath);
++      }
++      ext3_ext_tree_changed(tree);
++      ext3_ext_invalidate_cache(tree);
++      return err;
++}
++
++int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block,
++                      unsigned long num, ext_prepare_callback func)
++{
++      struct ext3_ext_path *path = NULL;
++      struct ext3_extent *ex, cbex;
++      unsigned long next, start = 0, end = 0;
++      unsigned long last = block + num;
++      int depth, exists, err = 0;
++
++      EXT_ASSERT(tree);
++      EXT_ASSERT(func);
++      EXT_ASSERT(tree->inode);
++      EXT_ASSERT(tree->root);
++
++      while (block < last && block != EXT_MAX_BLOCK) {
++              num = last - block;
++              /* find extent for this block */
++              path = ext3_ext_find_extent(tree, block, path);
++              if (IS_ERR(path)) {
++                      err = PTR_ERR(path);
++                      path = NULL;
++                      break;
++              }
++
++              depth = EXT_DEPTH(tree);
++              EXT_ASSERT(path[depth].p_hdr);
++              ex = path[depth].p_ext;
++              next = ext3_ext_next_allocated_block(path);
++
++              exists = 0;
++              if (!ex) {
++                      /* there is no extent yet, so try to allocate
++                       * all requested space */
++                      start = block;
++                      end = block + num;
++              } else if (ex->ee_block > block) {
++                      /* need to allocate space before found extent */
++                      start = block;
++                      end = ex->ee_block;
++                      if (block + num < end)
++                              end = block + num;
++              } else if (block >= ex->ee_block + ex->ee_len) {
++                      /* need to allocate space after found extent */
++                      start = block;
++                      end = block + num;
++                      if (end >= next)
++                              end = next;
++              } else if (block >= ex->ee_block) {
++                      /* 
++                       * some part of requested space is covered
++                       * by found extent
++                       */
++                      start = block;
++                      end = ex->ee_block + ex->ee_len;
++                      if (block + num < end)
++                              end = block + num;
++                      exists = 1;
++              } else {
++                      BUG();
++              }
++              EXT_ASSERT(end > start);
++
++              if (!exists) {
++                      cbex.ee_block = start;
++                      cbex.ee_len = end - start;
++                      cbex.ee_start = 0;
++              } else
++                      cbex = *ex;
++
++              EXT_ASSERT(path[depth].p_hdr);
++              err = func(tree, path, &cbex, exists);
++              ext3_ext_drop_refs(path);
++
++              if (err < 0)
++                      break;
++              if (err == EXT_REPEAT)
++                      continue;
++              else if (err == EXT_BREAK) {
++                      err = 0;
++                      break;
++              }
++
++              if (EXT_DEPTH(tree) != depth) {
++                      /* depth was changed. we have to realloc path */
++                      kfree(path);
++                      path = NULL;
++              }
++
++              block = cbex.ee_block + cbex.ee_len;
++      }
++
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
++
++      return err;
++}
++
++static inline void
++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block,
++                      __u32 len, __u32 start, int type)
++{
++      EXT_ASSERT(len > 0);
++      if (tree->cex) {
++              tree->cex->ec_type = type;
++              tree->cex->ec_block = block;
++              tree->cex->ec_len = len;
++              tree->cex->ec_start = start;
++      }
++}
++
++/*
++ * this routine calculate boundaries of the gap requested block fits into
++ * and cache this gap
++ */
++static inline void
++ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path,
++                              unsigned long block)
++{
++      int depth = EXT_DEPTH(tree);
++      unsigned long lblock, len;
++      struct ext3_extent *ex;
++
++      if (!tree->cex)
++              return;
++
++      ex = path[depth].p_ext;
++      if (ex == NULL) {
++              /* there is no extent yet, so gap is [0;-] */
++              lblock = 0;
++              len = EXT_MAX_BLOCK;
++              ext_debug(tree, "cache gap(whole file):");
++      } else if (block < ex->ee_block) {
++              lblock = block;
++              len = ex->ee_block - block;
++              ext_debug(tree, "cache gap(before): %lu [%lu:%lu]",
++                              (unsigned long) block,
++                              (unsigned long) ex->ee_block,
++                              (unsigned long) ex->ee_len);
++      } else if (block >= ex->ee_block + ex->ee_len) {
++              lblock = ex->ee_block + ex->ee_len;
++              len = ext3_ext_next_allocated_block(path);
++              ext_debug(tree, "cache gap(after): [%lu:%lu] %lu",
++                              (unsigned long) ex->ee_block,
++                              (unsigned long) ex->ee_len,
++                              (unsigned long) block);
++              EXT_ASSERT(len > lblock);
++              len = len - lblock;
++      } else {
++              lblock = len = 0;
++              BUG();
++      }
++
++      ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len);
++      ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP);
++}
++
++static inline int
++ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block,
++                      struct ext3_extent *ex)
++{
++      struct ext3_ext_cache *cex = tree->cex;
++
++      /* is there cache storage at all? */
++      if (!cex)
++              return EXT3_EXT_CACHE_NO;
++
++      /* has cache valid data? */
++      if (cex->ec_type == EXT3_EXT_CACHE_NO)
++              return EXT3_EXT_CACHE_NO;
++
++      EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP ||
++                      cex->ec_type == EXT3_EXT_CACHE_EXTENT);
++      if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
++              ex->ee_block = cex->ec_block;
++              ex->ee_start = cex->ec_start;
++              ex->ee_len = cex->ec_len;
++              ext_debug(tree, "%lu cached by %lu:%lu:%lu\n",
++                              (unsigned long) block,
++                              (unsigned long) ex->ee_block,
++                              (unsigned long) ex->ee_len,
++                              (unsigned long) ex->ee_start);
++              return cex->ec_type;
++      }
++
++      /* not in cache */
++      return EXT3_EXT_CACHE_NO;
++}
++
++/*
++ * routine removes index from the index block
++ * it's used in truncate case only. thus all requests are for
++ * last index in the block only
++ */
++int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path)
++{
++      struct buffer_head *bh;
++      int err;
++      
++      /* free index block */
++      path--;
++      EXT_ASSERT(path->p_hdr->eh_entries);
++      if ((err = ext3_ext_get_access(handle, tree, path)))
++              return err;
++      path->p_hdr->eh_entries--;
++      if ((err = ext3_ext_dirty(handle, tree, path)))
++              return err;
++      ext_debug(tree, "index is empty, remove it, free block %d\n",
++                      path->p_idx->ei_leaf);
++      bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
++      ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
++      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++      return err;
++}
++
++int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree,
++                                      struct ext3_ext_path *path)
++{
++      int depth = EXT_DEPTH(tree);
++      int needed;
++
++      if (path) {
++              /* probably there is space in leaf? */
++              if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max)
++                      return 1;
++      }
++      
++      /*
++       * the worste case we're expecting is creation of the
++       * new root (growing in depth) with index splitting
++       * for splitting we have to consider depth + 1 because
++       * previous growing could increase it
++       */
++      depth = depth + 1;
++
++      /* 
++       * growing in depth:
++       * block allocation + new root + old root
++       */
++      needed = EXT3_ALLOC_NEEDED + 2;
++
++      /* index split. we may need:
++       *   allocate intermediate indexes and new leaf
++       *   change two blocks at each level, but root
++       *   modify root block (inode)
++       */
++      needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1;
++
++      return needed;
++}
++
++static int
++ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, unsigned long start,
++                      unsigned long end)
++{
++      struct ext3_extent *ex, tex;
++      struct ext3_ext_path *npath;
++      int depth, creds, err;
++
++      depth = EXT_DEPTH(tree);
++      ex = path[depth].p_ext;
++      EXT_ASSERT(ex);
++      EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1);
++      EXT_ASSERT(ex->ee_block < start);
++
++      /* calculate tail extent */
++      tex.ee_block = end + 1;
++      EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len);
++      tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block;
++
++      creds = ext3_ext_calc_credits_for_insert(tree, path);
++      handle = ext3_ext_journal_restart(handle, creds);
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++      
++      /* calculate head extent. use primary extent */
++      err = ext3_ext_get_access(handle, tree, path + depth);
++      if (err)
++              return err;
++      ex->ee_len = start - ex->ee_block;
++      err = ext3_ext_dirty(handle, tree, path + depth);
++      if (err)
++              return err;
++
++      /* FIXME: some callback to free underlying resource
++       * and correct ee_start? */
++      ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n",
++                      ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len);
++
++      npath = ext3_ext_find_extent(tree, ex->ee_block, NULL);
++      if (IS_ERR(npath))
++              return PTR_ERR(npath);
++      depth = EXT_DEPTH(tree);
++      EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block);
++      EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len);
++
++      err = ext3_ext_insert_extent(handle, tree, npath, &tex);
++      ext3_ext_drop_refs(npath);
++      kfree(npath);
++
++      return err;
++                      
++}
++
++static int
++ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path, unsigned long start,
++                      unsigned long end)
++{
++      struct ext3_extent *ex, *fu = NULL, *lu, *le;
++      int err = 0, correct_index = 0;
++      int depth = EXT_DEPTH(tree), credits;
++      struct ext3_extent_header *eh;
++      unsigned a, b, block, num;
++
++      ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end);
++      if (!path[depth].p_hdr)
++              path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh);
++      eh = path[depth].p_hdr;
++      EXT_ASSERT(eh);
++      EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++      EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++      
++      /* find where to start removing */
++      le = ex = EXT_LAST_EXTENT(eh);
++      while (ex != EXT_FIRST_EXTENT(eh)) {
++              if (ex->ee_block <= end)
++                      break;
++              ex--;
++      }
++
++      if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) {
++              /* removal of internal part of the extent requested
++               * tail and head must be placed in different extent
++               * so, we have to insert one more extent */
++              path[depth].p_ext = ex;
++              return ext3_ext_split_for_rm(handle, tree, path, start, end);
++      }
++      
++      lu = ex;
++      while (ex >= EXT_FIRST_EXTENT(eh) &&
++                      ex->ee_block + ex->ee_len > start) {
++              ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len);
++              path[depth].p_ext = ex;
++      
++              a = ex->ee_block > start ? ex->ee_block : start;
++              b = ex->ee_block + ex->ee_len - 1 < end ?
++                      ex->ee_block + ex->ee_len - 1 : end;
++              
++              ext_debug(tree, "  border %u:%u\n", a, b);
++
++              if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) {
++                      block = 0;
++                      num = 0;
++                      BUG();
++              } else if (a != ex->ee_block) {
++                      /* remove tail of the extent */
++                      block = ex->ee_block;
++                      num = a - block;
++              } else if (b != ex->ee_block + ex->ee_len - 1) {
++                      /* remove head of the extent */
++                      block = a;
++                      num = b - a;
++              } else {
++                      /* remove whole extent: excelent! */
++                      block = ex->ee_block; 
++                      num = 0;
++                      EXT_ASSERT(a == ex->ee_block &&
++                                      b == ex->ee_block + ex->ee_len - 1);
++              }
++
++              if (ex == EXT_FIRST_EXTENT(eh))
++                      correct_index = 1;
++
++              credits = 1;
++              if (correct_index)
++                      credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1;
++              if (tree->ops->remove_extent_credits)
++                      credits+=tree->ops->remove_extent_credits(tree,ex,a,b);
++              
++              handle = ext3_ext_journal_restart(handle, credits);
++              if (IS_ERR(handle)) {
++                      err = PTR_ERR(handle);
++                      goto out;
++              }
++
++              err = ext3_ext_get_access(handle, tree, path + depth);
++              if (err)
++                      goto out;
++
++              if (tree->ops->remove_extent)
++                      err = tree->ops->remove_extent(tree, ex, a, b);
++              if (err)
++                      goto out;
++
++              if (num == 0) {
++                      /* this extent is removed entirely mark slot unused */
++                      ex->ee_start = 0;
++                      eh->eh_entries--;
++                      fu = ex;
++              }
++
++              ex->ee_block = block;
++              ex->ee_len = num;
++
++              err = ext3_ext_dirty(handle, tree, path + depth);
++              if (err)
++                      goto out;
++
++              ext_debug(tree, "new extent: %u:%u:%u\n",
++                              ex->ee_block, ex->ee_len, ex->ee_start);
++              ex--;
++      }
++
++      if (fu) {
++              /* reuse unused slots */
++              while (lu < le) {
++                      if (lu->ee_start) {
++                              *fu = *lu;
++                              lu->ee_start = 0;
++                              fu++;
++                      }
++                      lu++;
++              }
++      }
++
++      if (correct_index && eh->eh_entries)
++              err = ext3_ext_correct_indexes(handle, tree, path);
++
++      /* if this leaf is free, then we should
++       * remove it from index block above */
++      if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
++              err = ext3_ext_rm_idx(handle, tree, path + depth);
++
++out:
++      return err;
++}
++
++
++static struct ext3_extent_idx *
++ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block)
++{
++      struct ext3_extent_idx *ix;
++      
++      ix = EXT_LAST_INDEX(hdr);
++      while (ix != EXT_FIRST_INDEX(hdr)) {
++              if (ix->ei_block <= block)
++                      break;
++              ix--;
++      }
++      return ix;
++}
++
++/*
++ * returns 1 if current index have to be freed (even partial)
++ */
++static int inline
++ext3_ext_more_to_rm(struct ext3_ext_path *path)
++{
++      EXT_ASSERT(path->p_idx);
++
++      if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
++              return 0;
++
++      /*
++       * if truncate on deeper level happened it it wasn't partial
++       * so we have to consider current index for truncation
++       */
++      if (path->p_hdr->eh_entries == path->p_block)
++              return 0;
++      return 1;
++}
++
++int ext3_ext_remove_space(struct ext3_extents_tree *tree,
++                              unsigned long start, unsigned long end)
++{
++      struct inode *inode = tree->inode;
++      struct super_block *sb = inode->i_sb;
++      int depth = EXT_DEPTH(tree);
++      struct ext3_ext_path *path;
++      handle_t *handle;
++      int i = 0, err = 0;
++
++      ext_debug(tree, "space to be removed: %lu:%lu\n", start, end);
++
++      /* probably first extent we're gonna free will be last in block */
++      handle = ext3_journal_start(inode, depth + 1);
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++
++      ext3_ext_invalidate_cache(tree);
++
++      /*
++       * we start scanning from right side freeing all the blocks
++       * after i_size and walking into the deep
++       */
++      path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL);
++      if (IS_ERR(path)) {
++              ext3_error(sb, "ext3_ext_remove_space",
++                              "Can't allocate path array");
++              ext3_journal_stop(handle);
++              return -ENOMEM;
++      }
++      memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++      path[i].p_hdr = EXT_ROOT_HDR(tree);
++      
++      while (i >= 0 && err == 0) {
++              if (i == depth) {
++                      /* this is leaf block */
++                      err = ext3_ext_rm_leaf(handle, tree, path, start, end);
++                      /* root level have p_bh == NULL, brelse() eats this */
++                      brelse(path[i].p_bh);
++                      i--;
++                      continue;
++              }
++              
++              /* this is index block */
++              if (!path[i].p_hdr) {
++                      ext_debug(tree, "initialize header\n");
++                      path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh);
++              }
++
++              EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max);
++              EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC);
++              
++              if (!path[i].p_idx) {
++                      /* this level hasn't touched yet */
++                      path[i].p_idx =
++                              ext3_ext_last_covered(path[i].p_hdr, end);
++                      path[i].p_block = path[i].p_hdr->eh_entries + 1;
++                      ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n",
++                                      path[i].p_hdr, path[i].p_hdr->eh_entries);
++              } else {
++                      /* we've already was here, see at next index */
++                      path[i].p_idx--;
++              }
++
++              ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n",
++                              i, EXT_FIRST_INDEX(path[i].p_hdr),
++                              path[i].p_idx);
++              if (ext3_ext_more_to_rm(path + i)) {
++                      /* go to the next level */
++                      ext_debug(tree, "move to level %d (block %d)\n",
++                                      i + 1, path[i].p_idx->ei_leaf);
++                      memset(path + i + 1, 0, sizeof(*path));
++                      path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf);
++                      if (!path[i+1].p_bh) {
++                              /* should we reset i_size? */
++                              err = -EIO;
++                              break;
++                      }
++                      /* put actual number of indexes to know is this
++                       * number got changed at the next iteration */
++                      path[i].p_block = path[i].p_hdr->eh_entries;
++                      i++;
++              } else {
++                      /* we finish processing this index, go up */
++                      if (path[i].p_hdr->eh_entries == 0 && i > 0) {
++                              /* index is empty, remove it
++                               * handle must be already prepared by the
++                               * truncatei_leaf() */
++                              err = ext3_ext_rm_idx(handle, tree, path + i);
++                      }
++                      /* root level have p_bh == NULL, brelse() eats this */
++                      brelse(path[i].p_bh);
++                      i--;
++                      ext_debug(tree, "return to level %d\n", i);
++              }
++      }
++
++      /* TODO: flexible tree reduction should be here */
++      if (path->p_hdr->eh_entries == 0) {
++              /*
++               * truncate to zero freed all the tree
++               * so, we need to correct eh_depth
++               */
++              err = ext3_ext_get_access(handle, tree, path);
++              if (err == 0) {
++                      EXT_ROOT_HDR(tree)->eh_depth = 0;
++                      EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree);
++                      err = ext3_ext_dirty(handle, tree, path);
++              }
++      }
++      ext3_ext_tree_changed(tree);
++
++      kfree(path);
++      ext3_journal_stop(handle);
++
++      return err;
++}
++
++int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks)
++{
++      int lcap, icap, rcap, leafs, idxs, num;
++
++      rcap = ext3_ext_space_root(tree);
++      if (blocks <= rcap) {
++              /* all extents fit to the root */
++              return 0;
++      }
++
++      rcap = ext3_ext_space_root_idx(tree);
++      lcap = ext3_ext_space_block(tree);
++      icap = ext3_ext_space_block_idx(tree);
++
++      num = leafs = (blocks + lcap - 1) / lcap;
++      if (leafs <= rcap) {
++              /* all pointers to leafs fit to the root */
++              return leafs;
++      }
++
++      /* ok. we need separate index block(s) to link all leaf blocks */
++      idxs = (leafs + icap - 1) / icap;
++      do {
++              num += idxs;
++              idxs = (idxs + icap - 1) / icap;
++      } while (idxs > rcap);
++
++      return num;
++}
++
++/*
++ * called at mount time
++ */
++void ext3_ext_init(struct super_block *sb)
++{
++      /*
++       * possible initialization would be here
++       */
++
++      if (test_opt(sb, EXTENTS)) {
++              printk("EXT3-fs: file extents enabled");
++#ifdef AGRESSIVE_TEST
++              printk(", agressive tests");
++#endif
++#ifdef CHECK_BINSEARCH
++              printk(", check binsearch");
++#endif
++              printk("\n");
++      }
++}
++
++/*
++ * called at umount time
++ */
++void ext3_ext_release(struct super_block *sb)
++{
++}
++
++/************************************************************************
++ * VFS related routines
++ ************************************************************************/
++
++static int ext3_get_inode_write_access(handle_t *handle, void *buffer)
++{
++      /* we use in-core data, not bh */
++      return 0;
++}
++
++static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer)
++{
++      struct inode *inode = buffer;
++      return ext3_mark_inode_dirty(handle, inode);
++}
++
++static int ext3_ext_mergable(struct ext3_extent *ex1,
++                              struct ext3_extent *ex2)
++{
++      /* FIXME: support for large fs */
++      if (ex1->ee_start + ex1->ee_len == ex2->ee_start)
++              return 1;
++      return 0;
++}
++
++static int
++ext3_remove_blocks_credits(struct ext3_extents_tree *tree,
++                              struct ext3_extent *ex,
++                              unsigned long from, unsigned long to)
++{
++      int needed;
++      
++      /* at present, extent can't cross block group */;
++      needed = 4; /* bitmap + group desc + sb + inode */
++
++#ifdef CONFIG_QUOTA
++      needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++      return needed;
++}
++
++static int
++ext3_remove_blocks(struct ext3_extents_tree *tree,
++                              struct ext3_extent *ex,
++                              unsigned long from, unsigned long to)
++{
++      int needed = ext3_remove_blocks_credits(tree, ex, from, to);
++      handle_t *handle = ext3_journal_start(tree->inode, needed);
++      struct buffer_head *bh;
++      int i;
++
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++      if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
++              /* tail removal */
++              unsigned long num, start;
++              num = ex->ee_block + ex->ee_len - from;
++              start = ex->ee_start + ex->ee_len - num;
++              ext_debug(tree, "free last %lu blocks starting %lu\n",
++                              num, start);
++              for (i = 0; i < num; i++) {
++                      bh = sb_find_get_block(tree->inode->i_sb, start + i);
++                      ext3_forget(handle, 0, tree->inode, bh, start + i);
++              }
++              ext3_free_blocks(handle, tree->inode, start, num);
++      } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
++              printk("strange request: removal %lu-%lu from %u:%u\n",
++                      from, to, ex->ee_block, ex->ee_len);
++      } else {
++              printk("strange request: removal(2) %lu-%lu from %u:%u\n",
++                      from, to, ex->ee_block, ex->ee_len);
++      }
++      ext3_journal_stop(handle);
++      return 0;
++}
++
++static int ext3_ext_find_goal(struct inode *inode,
++                              struct ext3_ext_path *path, unsigned long block)
++{
++      struct ext3_inode_info *ei = EXT3_I(inode);
++      unsigned long bg_start;
++      unsigned long colour;
++      int depth;
++      
++      if (path) {
++              struct ext3_extent *ex;
++              depth = path->p_depth;
++              
++              /* try to predict block placement */
++              if ((ex = path[depth].p_ext))
++                      return ex->ee_start + (block - ex->ee_block);
++
++              /* it looks index is empty
++               * try to find starting from index itself */
++              if (path[depth].p_bh)
++                      return path[depth].p_bh->b_blocknr;
++      }
++
++      /* OK. use inode's group */
++      bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++              le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++      colour = (current->pid % 16) *
++                      (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++      return bg_start + colour + block;
++}
++
++static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree,
++                              struct ext3_ext_path *path,
++                              struct ext3_extent *ex, int *err)
++{
++      struct inode *inode = tree->inode;
++      int newblock, goal;
++      
++      EXT_ASSERT(path);
++      EXT_ASSERT(ex);
++      EXT_ASSERT(ex->ee_start);
++      EXT_ASSERT(ex->ee_len);
++      
++      /* reuse block from the extent to order data/metadata */
++      newblock = ex->ee_start++;
++      ex->ee_len--;
++      if (ex->ee_len == 0) {
++              ex->ee_len = 1;
++              /* allocate new block for the extent */
++              goal = ext3_ext_find_goal(inode, path, ex->ee_block);
++              ex->ee_start = ext3_new_block(handle, inode, goal, err);
++              if (ex->ee_start == 0) {
++                      /* error occured: restore old extent */
++                      ex->ee_start = newblock;
++                      return 0;
++              }
++      }
++      return newblock;
++}
++
++static struct ext3_extents_helpers ext3_blockmap_helpers = {
++      .get_write_access       = ext3_get_inode_write_access,
++      .mark_buffer_dirty      = ext3_mark_buffer_dirty,
++      .mergable               = ext3_ext_mergable,
++      .new_block              = ext3_new_block_cb,
++      .remove_extent          = ext3_remove_blocks,
++      .remove_extent_credits  = ext3_remove_blocks_credits,
++};
++
++void ext3_init_tree_desc(struct ext3_extents_tree *tree,
++                              struct inode *inode)
++{
++      tree->inode = inode;
++      tree->root = (void *) EXT3_I(inode)->i_data;
++      tree->buffer = (void *) inode;
++      tree->buffer_len = sizeof(EXT3_I(inode)->i_data);
++      tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent;
++      tree->ops = &ext3_blockmap_helpers;
++}
++
++int ext3_ext_get_block(handle_t *handle, struct inode *inode,
++                      long iblock, struct buffer_head *bh_result,
++                      int create, int extend_disksize)
++{
++      struct ext3_ext_path *path = NULL;
++      struct ext3_extent newex;
++      struct ext3_extent *ex;
++      int goal, newblock, err = 0, depth;
++      struct ext3_extents_tree tree;
++
++      clear_buffer_new(bh_result);
++      ext3_init_tree_desc(&tree, inode);
++      ext_debug(&tree, "block %d requested for inode %u\n",
++                      (int) iblock, (unsigned) inode->i_ino);
++      down(&EXT3_I(inode)->truncate_sem);
++
++      /* check in cache */
++      if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) {
++              if (goal == EXT3_EXT_CACHE_GAP) {
++                      if (!create) {
++                              /* block isn't allocated yet and
++                               * user don't want to allocate it */
++                              goto out2;
++                      }
++                      /* we should allocate requested block */
++              } else if (goal == EXT3_EXT_CACHE_EXTENT) {
++                      /* block is already allocated */
++                      newblock = iblock - newex.ee_block + newex.ee_start;
++                      goto out;
++              } else {
++                      EXT_ASSERT(0);
++              }
++      }
++
++      /* find extent for this block */
++      path = ext3_ext_find_extent(&tree, iblock, NULL);
++      if (IS_ERR(path)) {
++              err = PTR_ERR(path);
++              path = NULL;
++              goto out2;
++      }
++
++      depth = EXT_DEPTH(&tree);
++
++      /*
++       * consistent leaf must not be empty
++       * this situations is possible, though, _during_ tree modification
++       * this is why assert can't be put in ext3_ext_find_extent()
++       */
++      EXT_ASSERT(path[depth].p_ext != NULL || depth == 0);
++
++      if ((ex = path[depth].p_ext)) {
++              /* if found exent covers block, simple return it */
++              if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) {
++                      newblock = iblock - ex->ee_block + ex->ee_start;
++                      ext_debug(&tree, "%d fit into %d:%d -> %d\n",
++                                      (int) iblock, ex->ee_block, ex->ee_len,
++                                      newblock);
++                      ext3_ext_put_in_cache(&tree, ex->ee_block,
++                                              ex->ee_len, ex->ee_start,
++                                              EXT3_EXT_CACHE_EXTENT);
++                      goto out;
++              }
++      }
++
++      /*
++       * requested block isn't allocated yet
++       * we couldn't try to create block if create flag is zero 
++       */
++      if (!create) {
++              /* put just found gap into cache to speedup subsequest reqs */
++              ext3_ext_put_gap_in_cache(&tree, path, iblock);
++              goto out2;
++      }
++
++      /* allocate new block */
++      goal = ext3_ext_find_goal(inode, path, iblock);
++      newblock = ext3_new_block(handle, inode, goal, &err);
++      if (!newblock)
++              goto out2;
++      ext_debug(&tree, "allocate new block: goal %d, found %d\n",
++                      goal, newblock);
++
++      /* try to insert new extent into found leaf and return */
++      newex.ee_block = iblock;
++      newex.ee_start = newblock;
++      newex.ee_len = 1;
++      err = ext3_ext_insert_extent(handle, &tree, path, &newex);
++      if (err)
++              goto out2;
++      
++      if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize)
++              EXT3_I(inode)->i_disksize = inode->i_size;
++
++      /* previous routine could use block we allocated */
++      newblock = newex.ee_start;
++      set_buffer_new(bh_result);
++
++      ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len,
++                              newex.ee_start, EXT3_EXT_CACHE_EXTENT);
++out:
++      ext3_ext_show_leaf(&tree, path);
++      map_bh(bh_result, inode->i_sb, newblock);
++out2:
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
++      up(&EXT3_I(inode)->truncate_sem);
++
++      return err;     
++}
++
++void ext3_ext_truncate(struct inode * inode, struct page *page)
++{
++      struct address_space *mapping = inode->i_mapping;
++      struct super_block *sb = inode->i_sb;
++      struct ext3_extents_tree tree;
++      unsigned long last_block;
++      handle_t *handle;
++      int err = 0;
++
++      ext3_init_tree_desc(&tree, inode);
++
++      /*
++       * probably first extent we're gonna free will be last in block
++       */
++      err = ext3_writepage_trans_blocks(inode) + 3;
++      handle = ext3_journal_start(inode, err);
++      if (IS_ERR(handle)) {
++              if (page) {
++                      clear_highpage(page);
++                      flush_dcache_page(page);
++                      unlock_page(page);
++                      page_cache_release(page);
++              }
++              return;
++      }
++
++      if (page)
++              ext3_block_truncate_page(handle, page, mapping, inode->i_size);
++
++      down(&EXT3_I(inode)->truncate_sem);
++      ext3_ext_invalidate_cache(&tree);
++
++      /* 
++       * TODO: optimization is possible here
++       * probably we need not scaning at all,
++       * because page truncation is enough
++       */
++      if (ext3_orphan_add(handle, inode))
++              goto out_stop;
++
++      /* we have to know where to truncate from in crash case */
++      EXT3_I(inode)->i_disksize = inode->i_size;
++      ext3_mark_inode_dirty(handle, inode);
++
++      last_block = (inode->i_size + sb->s_blocksize - 1)
++                      >> EXT3_BLOCK_SIZE_BITS(sb);
++      err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK);
++      
++      /* In a multi-transaction truncate, we only make the final
++       * transaction synchronous */
++      if (IS_SYNC(inode))
++              handle->h_sync = 1;
++
++out_stop:
++      /*
++       * If this was a simple ftruncate(), and the file will remain alive
++       * then we need to clear up the orphan record which we created above.
++       * However, if this was a real unlink then we were called by
++       * ext3_delete_inode(), and we allow that function to clean up the
++       * orphan info for us.
++       */
++      if (inode->i_nlink)
++              ext3_orphan_del(handle, inode);
++
++      up(&EXT3_I(inode)->truncate_sem);
++      ext3_journal_stop(handle);
++}
++
++/*
++ * this routine calculate max number of blocks we could modify
++ * in order to allocate new block for an inode
++ */
++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num)
++{
++      struct ext3_extents_tree tree;
++      int needed;
++      
++      ext3_init_tree_desc(&tree, inode);
++      
++      needed = ext3_ext_calc_credits_for_insert(&tree, NULL);
++
++      /* caller want to allocate num blocks */
++      needed *= num;
++      
++#ifdef CONFIG_QUOTA
++      /* 
++       * FIXME: real calculation should be here
++       * it depends on blockmap format of qouta file
++       */
++      needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++
++      return needed;
++}
++
++void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode)
++{
++      struct ext3_extents_tree tree;
++
++      ext3_init_tree_desc(&tree, inode);
++      ext3_extent_tree_init(handle, &tree);
++}
++
++int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks)
++{
++      struct ext3_extents_tree tree;
++
++      ext3_init_tree_desc(&tree, inode);
++      return ext3_ext_calc_metadata_amount(&tree, blocks);
++}
++      
++static int
++ext3_ext_store_extent_cb(struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path,
++                      struct ext3_extent *newex, int exist)
++{
++      struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private;
++
++      if (!exist)
++              return EXT_CONTINUE;
++      if (buf->err < 0)
++              return EXT_BREAK;
++      if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen)
++              return EXT_BREAK;
++
++      if (!copy_to_user(buf->cur, newex, sizeof(*newex))) {
++              buf->err++;
++              buf->cur += sizeof(*newex);
++      } else {
++              buf->err = -EFAULT;
++              return EXT_BREAK;
++      }
++      return EXT_CONTINUE;
++}
++
++static int
++ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree,
++                      struct ext3_ext_path *path,
++                      struct ext3_extent *ex, int exist)
++{
++      struct ext3_extent_tree_stats *buf =
++              (struct ext3_extent_tree_stats *) tree->private;
++      int depth;
++
++      if (!exist)
++              return EXT_CONTINUE;
++
++      depth = EXT_DEPTH(tree);
++      buf->extents_num++;
++      if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr))
++              buf->leaf_num++;
++      return EXT_CONTINUE;
++}
++
++int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
++              unsigned long arg)
++{
++      int err = 0;
++
++      if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
++              return -EINVAL;
++
++      if (cmd == EXT3_IOC_GET_EXTENTS) {
++              struct ext3_extent_buf buf;
++              struct ext3_extents_tree tree;
++
++              if (copy_from_user(&buf, (void *) arg, sizeof(buf)))
++                      return -EFAULT;
++
++              ext3_init_tree_desc(&tree, inode);
++              buf.cur = buf.buffer;
++              buf.err = 0;
++              tree.private = &buf;
++              down(&EXT3_I(inode)->truncate_sem);
++              err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK,
++                                              ext3_ext_store_extent_cb);
++              up(&EXT3_I(inode)->truncate_sem);
++              if (err == 0)
++                      err = buf.err;
++      } else if (cmd == EXT3_IOC_GET_TREE_STATS) {
++              struct ext3_extent_tree_stats buf;
++              struct ext3_extents_tree tree;
++
++              ext3_init_tree_desc(&tree, inode);
++              down(&EXT3_I(inode)->truncate_sem);
++              buf.depth = EXT_DEPTH(&tree);
++              buf.extents_num = 0;
++              buf.leaf_num = 0;
++              tree.private = &buf;
++              err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK,
++                                              ext3_ext_collect_stats_cb);
++              up(&EXT3_I(inode)->truncate_sem);
++              if (!err)
++                      err = copy_to_user((void *) arg, &buf, sizeof(buf));
++      } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) {
++              struct ext3_extents_tree tree;
++              ext3_init_tree_desc(&tree, inode);
++              down(&EXT3_I(inode)->truncate_sem);
++              err = EXT_DEPTH(&tree);
++              up(&EXT3_I(inode)->truncate_sem);
++      }
++
++      return err;
++}
++
++EXPORT_SYMBOL(ext3_init_tree_desc);
++EXPORT_SYMBOL(ext3_mark_inode_dirty);
++EXPORT_SYMBOL(ext3_ext_invalidate_cache);
++EXPORT_SYMBOL(ext3_ext_insert_extent);
++EXPORT_SYMBOL(ext3_ext_walk_space);
++EXPORT_SYMBOL(ext3_ext_find_goal);
++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert);
++
+Index: linux-2.6.5-sles9/fs/ext3/ialloc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c    2004-11-09 02:22:55.763148128 +0300
++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2004-11-09 02:23:21.587222272 +0300
+@@ -647,6 +647,10 @@
+               DQUOT_FREE_INODE(inode);
+               goto fail2;
+       }
++      if (test_opt(sb, EXTENTS)) {
++              EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
++              ext3_extents_initialize_blockmap(handle, inode);
++      }
+       err = ext3_mark_inode_dirty(handle, inode);
+       if (err) {
+               ext3_std_error(sb, err);
+Index: linux-2.6.5-sles9/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/inode.c     2004-11-09 02:22:55.767147520 +0300
++++ linux-2.6.5-sles9/fs/ext3/inode.c  2004-11-09 02:23:21.592221512 +0300
+@@ -796,6 +796,17 @@
+       goto reread;
+ }
+ 
++static inline int
++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block,
++              struct buffer_head *bh, int create, int extend_disksize)
++{
++      if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++              return ext3_ext_get_block(handle, inode, block, bh, create,
++                                              extend_disksize);
++      return ext3_get_block_handle(handle, inode, block, bh, create,
++                                      extend_disksize);
++}
++
+ static int ext3_get_block(struct inode *inode, sector_t iblock,
+                       struct buffer_head *bh_result, int create)
+ {
+@@ -806,8 +817,8 @@
+               handle = ext3_journal_current_handle();
+               J_ASSERT(handle != 0);
+       }
+-      ret = ext3_get_block_handle(handle, inode, iblock,
+-                              bh_result, create, 1);
++      ret = ext3_get_block_wrap(handle, inode, iblock,
++                                      bh_result, create, 1);
+       return ret;
+ }
+ 
+@@ -833,8 +844,8 @@
+               }
+       }
+       if (ret == 0)
+-              ret = ext3_get_block_handle(handle, inode, iblock,
+-                                      bh_result, create, 0);
++              ret = ext3_get_block_wrap(handle, inode, iblock,
++                                              bh_result, create, 0);
+       if (ret == 0)
+               bh_result->b_size = (1 << inode->i_blkbits);
+       return ret;
+@@ -855,7 +866,7 @@
+       dummy.b_state = 0;
+       dummy.b_blocknr = -1000;
+       buffer_trace_init(&dummy.b_history);
+-      *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
++      *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1);
+       if (!*errp && buffer_mapped(&dummy)) {
+               struct buffer_head *bh;
+               bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1587,7 +1598,7 @@
+  * This required during truncate. We need to physically zero the tail end
+  * of that block so it doesn't yield old data if the file is later grown.
+  */
+-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
++int ext3_block_truncate_page(handle_t *handle, struct page *page,
+               struct address_space *mapping, loff_t from)
+ {
+       unsigned long index = from >> PAGE_CACHE_SHIFT;
+@@ -2083,6 +2094,9 @@
+                       return;
+       }
+ 
++      if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++              return ext3_ext_truncate(inode, page);
++
+       handle = start_transaction(inode);
+       if (IS_ERR(handle)) {
+               if (page) {
+@@ -2789,6 +2803,9 @@
+       int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+       int ret;
+ 
++      if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++              return ext3_ext_writepage_trans_blocks(inode, bpp);
++ 
+       if (ext3_should_journal_data(inode))
+               ret = 3 * (bpp + indirects) + 2;
+       else
+Index: linux-2.6.5-sles9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/Makefile    2004-11-09 02:18:27.604914376 +0300
++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+ 
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+-         ioctl.o namei.o super.o symlink.o hash.o
++         ioctl.o namei.o super.o symlink.o hash.o extents.o
+ 
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.5-sles9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/super.c     2004-11-09 02:22:56.450043704 +0300
++++ linux-2.6.5-sles9/fs/ext3/super.c  2004-11-09 02:23:21.597220752 +0300
+@@ -389,6 +389,7 @@
+       struct ext3_super_block *es = sbi->s_es;
+       int i;
+ 
++      ext3_ext_release(sb);
+       ext3_xattr_put_super(sb);
+       journal_destroy(sbi->s_journal);
+       if (!(sb->s_flags & MS_RDONLY)) {
+@@ -447,6 +448,10 @@
+ #endif
+       ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+       ei->vfs_inode.i_version = 1;
++      ei->i_cached_extent[0] = 0;
++      ei->i_cached_extent[1] = 0;
++      ei->i_cached_extent[2] = 0;
++      ei->i_cached_extent[3] = 0;
+       return &ei->vfs_inode;
+ }
+ 
+@@ -537,7 +542,7 @@
+       Opt_commit, Opt_journal_update, Opt_journal_inum,
+       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+       Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_err,
++      Opt_err, Opt_extents, Opt_extdebug
+ };
+ 
+ static match_table_t tokens = {
+@@ -582,6 +587,8 @@
+       {Opt_iopen, "iopen"},
+       {Opt_noiopen, "noiopen"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
++      {Opt_extents, "extents"},
++      {Opt_extdebug, "extdebug"},
+       {Opt_err, NULL}
+ };
+ 
+@@ -797,6 +804,12 @@
+                       break;
+               case Opt_ignore:
+                       break;
++              case Opt_extents:
++                      set_opt (sbi->s_mount_opt, EXTENTS);
++                      break;
++              case Opt_extdebug:
++                      set_opt (sbi->s_mount_opt, EXTDEBUG);
++                      break;
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1449,6 +1462,8 @@
+       percpu_counter_mod(&sbi->s_dirs_counter,
+               ext3_count_dirs(sb));
+ 
++      ext3_ext_init(sb);
++ 
+       return 0;
+ 
+ failed_mount3:
+Index: linux-2.6.5-sles9/fs/ext3/ioctl.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c     2004-11-09 02:15:44.610693264 +0300
++++ linux-2.6.5-sles9/fs/ext3/ioctl.c  2004-11-09 02:23:52.991448104 +0300
+@@ -124,6 +124,10 @@
+                       err = ext3_change_inode_journal_flag(inode, jflag);
+               return err;
+       }
++      case EXT3_IOC_GET_EXTENTS:
++      case EXT3_IOC_GET_TREE_STATS:
++      case EXT3_IOC_GET_TREE_DEPTH:
++              return ext3_ext_ioctl(inode, filp, cmd, arg);
+       case EXT3_IOC_GETVERSION:
+       case EXT3_IOC_GETVERSION_OLD:
+               return put_user(inode->i_generation, (int *) arg);
+Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h     2004-11-09 02:22:58.767691368 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs.h  2004-11-09 02:25:17.238640584 +0300
+@@ -186,6 +186,7 @@
+ #define EXT3_DIRSYNC_FL                       0x00010000 /* dirsync behaviour (directories only) */
+ #define EXT3_TOPDIR_FL                        0x00020000 /* Top of directory hierarchies*/
+ #define EXT3_RESERVED_FL              0x80000000 /* reserved for ext3 lib */
++#define EXT3_EXTENTS_FL                       0x00080000 /* Inode uses extents */
+ 
+ #define EXT3_FL_USER_VISIBLE          0x0003DFFF /* User visible flags */
+ #define EXT3_FL_USER_MODIFIABLE               0x000380FF /* User modifiable flags */
+@@ -211,6 +212,9 @@
+ #endif
+ #define EXT3_IOC_GETRSVSZ             _IOR('f', 5, long)
+ #define EXT3_IOC_SETRSVSZ             _IOW('f', 6, long)
++#define EXT3_IOC_GET_EXTENTS          _IOR('f', 7, long)
++#define EXT3_IOC_GET_TREE_DEPTH               _IOR('f', 8, long)
++#define EXT3_IOC_GET_TREE_STATS               _IOR('f', 9, long)
+ 
+ /*
+  * Structure of an inode on the disk
+@@ -333,6 +337,8 @@
+ #define EXT3_MOUNT_BARRIER            0x20000 /* Use block barriers */
+ #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -729,6 +735,7 @@
+ 
+ 
+ /* inode.c */
++extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t);
+ extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+@@ -802,6 +809,14 @@
+ extern struct inode_operations ext3_symlink_inode_operations;
+ extern struct inode_operations ext3_fast_symlink_inode_operations;
+ 
++/* extents.c */
++extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
++extern int ext3_ext_get_block(handle_t *, struct inode *, long,
++                              struct buffer_head *, int, int);
++extern void ext3_ext_truncate(struct inode *, struct page *);
++extern void ext3_ext_init(struct super_block *);
++extern void ext3_ext_release(struct super_block *);
++extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
+ 
+ #endif        /* __KERNEL__ */
+ 
+Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h        2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_extents.h     2004-11-09 02:23:21.606219384 +0300
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
++ */
++
++#ifndef _LINUX_EXT3_EXTENTS
++#define _LINUX_EXT3_EXTENTS
++
++/*
++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks
++ * become very little, so index split, in-depth growing and
++ * other hard changes happens much more often
++ * this is for debug purposes only
++ */
++#define AGRESSIVE_TEST_
++
++/*
++ * if CHECK_BINSEARCH defined, then results of binary search
++ * will be checked by linear search
++ */
++#define CHECK_BINSEARCH_
++
++/*
++ * if EXT_DEBUG is defined you can use 'extdebug' mount option
++ * to get lots of info what's going on
++ */
++#define EXT_DEBUG_
++#ifdef EXT_DEBUG
++#define ext_debug(tree,fmt,a...)                      \
++do {                                                  \
++      if (test_opt((tree)->inode->i_sb, EXTDEBUG))    \
++              printk(fmt, ##a);                       \
++} while (0);
++#else
++#define ext_debug(tree,fmt,a...)
++#endif
++
++/*
++ * if EXT_STATS is defined then stats numbers are collected
++ * these number will be displayed at umount time
++ */
++#define EXT_STATS_
++
++
++#define EXT3_ALLOC_NEEDED     3       /* block bitmap + group desc. + sb */
++
++/*
++ * ext3_inode has i_block array (total 60 bytes)
++ * first 4 bytes are used to store:
++ *  - tree depth (0 mean there is no tree yet. all extents in the inode)
++ *  - number of alive extents in the inode
++ */
++
++/*
++ * this is extent on-disk structure
++ * it's used at the bottom of the tree
++ */
++struct ext3_extent {
++      __u32   ee_block;       /* first logical block extent covers */
++      __u16   ee_len;         /* number of blocks covered by extent */
++      __u16   ee_start_hi;    /* high 16 bits of physical block */
++      __u32   ee_start;       /* low 32 bigs of physical block */
++};
++
++/*
++ * this is index on-disk structure
++ * it's used at all the levels, but the bottom
++ */
++struct ext3_extent_idx {
++      __u32   ei_block;       /* index covers logical blocks from 'block' */
++      __u32   ei_leaf;        /* pointer to the physical block of the next *
++                               * level. leaf or next index could bet here */
++      __u16   ei_leaf_hi;     /* high 16 bits of physical block */
++      __u16   ei_unused;
++};
++
++/*
++ * each block (leaves and indexes), even inode-stored has header
++ */
++struct ext3_extent_header {   
++      __u16   eh_magic;       /* probably will support different formats */   
++      __u16   eh_entries;     /* number of valid entries */
++      __u16   eh_max;         /* capacity of store in entries */
++      __u16   eh_depth;       /* has tree real underlaying blocks? */
++      __u32   eh_generation;  /* generation of the tree */
++};
++
++#define EXT3_EXT_MAGIC                0xf30a
++
++/*
++ * array of ext3_ext_path contains path to some extent
++ * creation/lookup routines use it for traversal/splitting/etc
++ * truncate uses it to simulate recursive walking
++ */
++struct ext3_ext_path {
++      __u32                           p_block;
++      __u16                           p_depth;
++      struct ext3_extent              *p_ext;
++      struct ext3_extent_idx          *p_idx;
++      struct ext3_extent_header       *p_hdr;
++      struct buffer_head              *p_bh;
++};
++
++/*
++ * structure for external API
++ */
++
++/*
++ * storage for cached extent
++ */
++struct ext3_ext_cache {
++      __u32   ec_start;
++      __u32   ec_block;
++      __u32   ec_len;
++      __u32   ec_type;
++};
++
++#define EXT3_EXT_CACHE_NO     0
++#define EXT3_EXT_CACHE_GAP    1
++#define EXT3_EXT_CACHE_EXTENT 2
++
++/*
++ * ext3_extents_tree is used to pass initial information
++ * to top-level extents API
++ */
++struct ext3_extents_helpers;
++struct ext3_extents_tree {
++      struct inode *inode;    /* inode which tree belongs to */
++      void *root;             /* ptr to data top of tree resides at */
++      void *buffer;           /* will be passed as arg to ^^ routines */
++      int buffer_len;
++      void *private;
++      struct ext3_ext_cache *cex;/* last found extent */
++      struct ext3_extents_helpers *ops;
++};
++
++struct ext3_extents_helpers {
++      int (*get_write_access)(handle_t *h, void *buffer);
++      int (*mark_buffer_dirty)(handle_t *h, void *buffer);
++      int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2);
++      int (*remove_extent_credits)(struct ext3_extents_tree *,
++                                      struct ext3_extent *, unsigned long,
++                                      unsigned long);
++      int (*remove_extent)(struct ext3_extents_tree *,
++                              struct ext3_extent *, unsigned long,
++                              unsigned long);
++      int (*new_block)(handle_t *, struct ext3_extents_tree *,
++                              struct ext3_ext_path *, struct ext3_extent *,
++                              int *);
++};
++
++/*
++ * to be called by ext3_ext_walk_space()
++ * negative retcode - error
++ * positive retcode - signal for ext3_ext_walk_space(), see below
++ * callback must return valid extent (passed or newly created)
++ */
++typedef int (*ext_prepare_callback)(struct ext3_extents_tree *,
++                                      struct ext3_ext_path *,
++                                      struct ext3_extent *, int);
++
++#define EXT_CONTINUE  0
++#define EXT_BREAK     1
++#define EXT_REPEAT    2
++
++
++#define EXT_MAX_BLOCK 0xffffffff
++#define EXT_CACHE_MARK        0xffff
++
++
++#define EXT_FIRST_EXTENT(__hdr__) \
++      ((struct ext3_extent *) (((char *) (__hdr__)) +         \
++                               sizeof(struct ext3_extent_header)))
++#define EXT_FIRST_INDEX(__hdr__) \
++      ((struct ext3_extent_idx *) (((char *) (__hdr__)) +     \
++                                   sizeof(struct ext3_extent_header)))
++#define EXT_HAS_FREE_INDEX(__path__) \
++      ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max)
++#define EXT_LAST_EXTENT(__hdr__) \
++      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1)
++#define EXT_LAST_INDEX(__hdr__) \
++      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1)
++#define EXT_MAX_EXTENT(__hdr__) \
++      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_MAX_INDEX(__hdr__) \
++      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++
++#define EXT_ROOT_HDR(tree) \
++      ((struct ext3_extent_header *) (tree)->root)
++#define EXT_BLOCK_HDR(bh) \
++      ((struct ext3_extent_header *) (bh)->b_data)
++#define EXT_DEPTH(_t_)        \
++      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
++#define EXT_GENERATION(_t_)   \
++      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++
++
++#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
++
++
++/*
++ * this structure is used to gather extents from the tree via ioctl
++ */
++struct ext3_extent_buf {
++      unsigned long start;
++      int buflen;
++      void *buffer;
++      void *cur;
++      int err;
++};
++
++/*
++ * this structure is used to collect stats info about the tree
++ */
++struct ext3_extent_tree_stats {
++      int depth;
++      int extents_num;
++      int leaf_num;
++};
++
++extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *);
++extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *);
++extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *);
++extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback);
++extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long);
++extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *);
++extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *);
++extern int ext3_ext_calc_blockmap_metadata(struct inode *, int);
++
++static inline void
++ext3_ext_invalidate_cache(struct ext3_extents_tree *tree)
++{
++      if (tree->cex)
++              tree->cex->ec_type = EXT3_EXT_CACHE_NO;
++}
++
++
++#endif /* _LINUX_EXT3_EXTENTS */
++
+Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h   2004-11-09 02:22:55.780145544 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h        2004-11-09 02:23:21.606219384 +0300
+@@ -128,6 +128,8 @@
+        */
+       struct semaphore truncate_sem;
+       struct inode vfs_inode;
++
++      __u32 i_cached_extent[4];
+ };
+ 
+ #endif        /* _LINUX_EXT3_FS_I */
+
+%diffstat
+ fs/ext3/Makefile             |    2 
+ fs/ext3/extents.c            | 2313 +++++++++++++++++++++++++++++++++++++++++++
+ fs/ext3/ialloc.c             |    4 
+ fs/ext3/inode.c              |   29 
+ fs/ext3/ioctl.c              |    4 
+ fs/ext3/super.c              |   17 
+ include/linux/ext3_extents.h |  252 ++++
+ include/linux/ext3_fs.h      |   15 
+ include/linux/ext3_fs_i.h    |    2 
+ 9 files changed, 2630 insertions(+), 8 deletions(-)
+
diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch

new file mode 100644 (file)

index 0000000..2408cc7
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
@@ -0,0 +1,1738 @@
+Index: linux-2.6.5-sles9/fs/ext3/mballoc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c   2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/mballoc.c        2004-11-09 02:34:25.181340632 +0300
+@@ -0,0 +1,1428 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
++ */
++
++
++/*
++ * mballoc.c contains the multiblocks allocation routines
++ */
++
++#include <linux/config.h>
++#include <linux/time.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/module.h>
++
++/*
++ * TODO:
++ *   - do not scan from the beginning, try to remember first free block
++ *   - mb_mark_used_* may allocate chunk right after splitting buddy
++ *   - special flag to advice allocator to look for requested + N blocks
++ *     this may improve interaction between extents and mballoc
++ */
++
++/*
++ * with AGRESSIVE_CHECK allocator runs consistency checks over
++ * structures. this checks slow things down a lot
++ */
++#define AGGRESSIVE_CHECK__
++
++/*
++ */
++#define MB_DEBUG__
++#ifdef MB_DEBUG
++#define mb_debug(fmt,a...)    printk(fmt, ##a)
++#else
++#define mb_debug(fmt,a...)
++#endif
++
++/*
++ * where to save buddies structures beetween umount/mount (clean case only)
++ */
++#define EXT3_BUDDY_FILE               ".buddy"
++
++/*
++ * max. number of chunks to be tracked in ext3_free_extent struct
++ */
++#define MB_ARR_SIZE   32
++
++struct ext3_allocation_context {
++      struct super_block *ac_sb;
++
++      /* search goals */
++      int ac_g_group;
++      int ac_g_start;
++      int ac_g_len;
++      int ac_g_flags;
++      
++      /* the best found extent */
++      int ac_b_group;
++      int ac_b_start;
++      int ac_b_len;
++      
++      /* number of iterations done. we have to track to limit searching */
++      int ac_repeats;
++      int ac_groups_scanned;
++      int ac_status;
++};
++
++#define AC_STATUS_CONTINUE    1
++#define AC_STATUS_FOUND               2
++
++
++struct ext3_buddy {
++      void *bd_bitmap;
++      void *bd_buddy;
++      int bd_blkbits;
++      struct buffer_head *bd_bh;
++      struct buffer_head *bd_bh2;
++      struct ext3_buddy_group_blocks *bd_bd;
++      struct super_block *bd_sb;
++};
++
++struct ext3_free_extent {
++      int fe_start;
++      int fe_len;
++      unsigned char fe_orders[MB_ARR_SIZE];
++      unsigned char fe_nums;
++      unsigned char fe_back;
++};
++
++#define in_range(b, first, len)       ((b) >= (first) && (b) <= (first) + (len) - 1)
++
++
++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
++int ext3_mb_reserve_blocks(struct super_block *, int);
++void ext3_mb_release_blocks(struct super_block *, int);
++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
++void ext3_mb_free_committed_blocks(struct super_block *);
++
++#define mb_correct_addr_and_bit(bit,addr)     \
++{                                             \
++      if ((unsigned) addr & 1) {              \
++              bit += 8;                       \
++              addr--;                         \
++      }                                       \
++      if ((unsigned) addr & 2) {              \
++              bit += 16;                      \
++              addr--;                         \
++              addr--;                         \
++      }                                       \
++}
++
++static inline int mb_test_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      return test_bit(bit, addr);
++}
++
++static inline void mb_set_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      set_bit(bit, addr);
++}
++
++static inline void mb_clear_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      clear_bit(bit, addr);
++}
++
++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
++{
++      int i = 1;
++      void *bb;
++
++      J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++      J_ASSERT(max != NULL);
++
++      if (order > e3b->bd_blkbits + 1)
++              return NULL;
++
++      /* at order 0 we see each particular block */
++      *max = 1 << (e3b->bd_blkbits + 3);
++      if (order == 0)
++              return e3b->bd_bitmap;
++
++      bb = e3b->bd_buddy;
++      *max = *max >> 1;
++      while (i < order) {
++              bb += 1 << (e3b->bd_blkbits - i);
++              i++;
++              *max = *max >> 1;
++      }
++      return bb;
++}
++
++static int ext3_mb_load_desc(struct super_block *sb, int group,
++                              struct ext3_buddy *e3b)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
++      J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++
++      /* load bitmap */
++      e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++      if (e3b->bd_bh == NULL) {
++              ext3_error(sb, "ext3_mb_load_desc",
++                              "can't get block for buddy bitmap\n");
++              goto out;
++      }
++      if (!buffer_uptodate(e3b->bd_bh)) {
++              ll_rw_block(READ, 1, &e3b->bd_bh);
++              wait_on_buffer(e3b->bd_bh);
++      }
++      J_ASSERT(buffer_uptodate(e3b->bd_bh));
++
++      /* load buddy */
++      e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++      if (e3b->bd_bh2 == NULL) {
++              ext3_error(sb, "ext3_mb_load_desc",
++                              "can't get block for buddy bitmap\n");
++              goto out;
++      }
++      if (!buffer_uptodate(e3b->bd_bh2)) {
++              ll_rw_block(READ, 1, &e3b->bd_bh2);
++              wait_on_buffer(e3b->bd_bh2);
++      }
++      J_ASSERT(buffer_uptodate(e3b->bd_bh2));
++
++      e3b->bd_bitmap = e3b->bd_bh->b_data;
++      e3b->bd_buddy = e3b->bd_bh2->b_data;
++      e3b->bd_blkbits = sb->s_blocksize_bits;
++      e3b->bd_bd = sbi->s_buddy_blocks + group;
++      e3b->bd_sb = sb;
++
++      return 0;
++out:
++      brelse(e3b->bd_bh);
++      brelse(e3b->bd_bh2);
++      e3b->bd_bh = NULL;
++      e3b->bd_bh2 = NULL;
++      return -EIO;
++}
++
++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
++{
++      mark_buffer_dirty(e3b->bd_bh);
++      mark_buffer_dirty(e3b->bd_bh2);
++}
++
++static void ext3_mb_release_desc(struct ext3_buddy *e3b)
++{
++      brelse(e3b->bd_bh);
++      brelse(e3b->bd_bh2);
++}
++
++#ifdef AGGRESSIVE_CHECK
++static void mb_check_buddy(struct ext3_buddy *e3b)
++{
++      int order = e3b->bd_blkbits + 1;
++      int max, max2, i, j, k, count;
++      void *buddy, *buddy2;
++
++      if (!test_opt(e3b->bd_sb, MBALLOC))
++              return;
++
++      while (order > 1) {
++              buddy = mb_find_buddy(e3b, order, &max);
++              J_ASSERT(buddy);
++              buddy2 = mb_find_buddy(e3b, order - 1, &max2);
++              J_ASSERT(buddy2);
++              J_ASSERT(buddy != buddy2);
++              J_ASSERT(max * 2 == max2);
++
++              count = 0;
++              for (i = 0; i < max; i++) {
++
++                      if (!mb_test_bit(i, buddy)) {
++                              /* only single bit in buddy2 may be 1 */
++                              if (mb_test_bit(i << 1, buddy2))
++                                      J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
++                              else if (mb_test_bit((i << 1) + 1, buddy2))
++                                      J_ASSERT(!mb_test_bit(i << 1, buddy2));
++                              continue;
++                      }
++
++                      /* both bits in buddy2 must be 0 */
++                      J_ASSERT(!mb_test_bit(i << 1, buddy2));
++                      J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
++
++                      for (j = 0; j < (1 << order); j++) {
++                              k = (i * (1 << order)) + j;
++                              J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
++                      }
++                      count++;
++              }
++              J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
++              order--;
++      }
++
++      buddy = mb_find_buddy(e3b, 0, &max);
++      for (i = 0; i < max; i++) {
++              if (mb_test_bit(i, buddy))
++                      continue;
++              /* check used bits only */
++              for (j = 0; j < e3b->bd_blkbits + 1; j++) {
++                      buddy2 = mb_find_buddy(e3b, j, &max2);
++                      k = i >> j;
++                      J_ASSERT(k < max2);
++                      J_ASSERT(!mb_test_bit(k, buddy2));
++              }
++      }
++}
++#else
++#define mb_check_buddy(e3b)
++#endif
++
++static inline void
++ext3_lock_group(struct super_block *sb, int group)
++{
++      spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static inline void
++ext3_unlock_group(struct super_block *sb, int group)
++{
++      spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
++{
++      int order = 1;
++      void *bb;
++
++      J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++      J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
++
++      bb = e3b->bd_buddy;
++      while (order <= e3b->bd_blkbits + 1) {
++              block = block >> 1;
++              if (mb_test_bit(block, bb)) {
++                      /* this block is part of buddy of order 'order' */
++                      return order;
++              }
++              bb += 1 << (e3b->bd_blkbits - order);
++              order++;
++      }
++      return 0;
++}
++
++static inline void mb_clear_bits(void *bm, int cur, int len)
++{
++      __u32 *addr;
++
++      len = cur + len;
++      while (cur < len) {
++              if ((cur & 31) == 0 && (len - cur) >= 32) {
++                      /* fast path: clear whole word at once */
++                      addr = bm + (cur >> 3);
++                      *addr = 0;
++                      cur += 32;
++                      continue;
++              }
++              mb_clear_bit(cur, bm);
++              cur++;
++      }
++}
++
++static inline void mb_set_bits(void *bm, int cur, int len)
++{
++      __u32 *addr;
++
++      len = cur + len;
++      while (cur < len) {
++              if ((cur & 31) == 0 && (len - cur) >= 32) {
++                      /* fast path: clear whole word at once */
++                      addr = bm + (cur >> 3);
++                      *addr = 0xffffffff;
++                      cur += 32;
++                      continue;
++              }
++              mb_set_bit(cur, bm);
++              cur++;
++      }
++}
++
++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
++{
++      int block, max, order;
++      void *buddy, *buddy2;
++
++      mb_check_buddy(e3b);
++      while (count-- > 0) {
++              block = first++;
++              order = 0;
++
++              J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
++              mb_set_bit(block, e3b->bd_bitmap);
++              e3b->bd_bd->bb_counters[order]++;
++
++              /* start of the buddy */
++              buddy = mb_find_buddy(e3b, order, &max);
++
++              do {
++                      block &= ~1UL;
++                      if (!mb_test_bit(block, buddy) ||
++                                      !mb_test_bit(block + 1, buddy))
++                              break;
++
++                      /* both the buddies are free, try to coalesce them */
++                      buddy2 = mb_find_buddy(e3b, order + 1, &max);
++
++                      if (!buddy2)
++                              break;
++
++                      if (order > 0) {
++                              /* for special purposes, we don't clear
++                               * free bits in bitmap */
++                              mb_clear_bit(block, buddy);
++                              mb_clear_bit(block + 1, buddy);
++                      }
++                      e3b->bd_bd->bb_counters[order]--;
++                      e3b->bd_bd->bb_counters[order]--;
++
++                      block = block >> 1;
++                      order++;
++                      e3b->bd_bd->bb_counters[order]++;
++
++                      mb_set_bit(block, buddy2);
++                      buddy = buddy2;
++              } while (1);
++      }
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++/*
++ * returns 1 if out extent is enough to fill needed space
++ */
++int mb_make_backward_extent(struct ext3_free_extent *in,
++                              struct ext3_free_extent *out, int needed)
++{
++      int i;
++
++      J_ASSERT(in);
++      J_ASSERT(out);
++      J_ASSERT(in->fe_nums < MB_ARR_SIZE);
++
++      out->fe_len = 0;
++      out->fe_start = in->fe_start + in->fe_len;
++      out->fe_nums = 0;
++
++      /* for single-chunk extent we need not back order
++       * also, if an extent doesn't fill needed space
++       * then it makes no sense to try back order becase
++       * if we select this extent then it'll be use as is */
++      if (in->fe_nums < 2 || in->fe_len < needed)
++              return 0;
++
++      i = in->fe_nums - 1;
++      while (i >= 0 && out->fe_len < needed) {
++              out->fe_len += (1 << in->fe_orders[i]);
++              out->fe_start -= (1 << in->fe_orders[i]);
++              i--;
++      }
++      /* FIXME: in some situation fe_orders may be too small to hold
++       * all the buddies */
++      J_ASSERT(out->fe_len >= needed);
++      
++      for (i++; i < in->fe_nums; i++)
++              out->fe_orders[out->fe_nums++] = in->fe_orders[i];
++      J_ASSERT(out->fe_nums < MB_ARR_SIZE);
++      out->fe_back = 1;
++
++      return 1;
++}
++
++int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
++                      int needed, struct ext3_free_extent *ex)
++{
++      int space = needed;
++      int next, max, ord;
++      void *buddy;
++
++      J_ASSERT(ex != NULL);
++
++      ex->fe_nums = 0;
++      ex->fe_len = 0;
++      
++      buddy = mb_find_buddy(e3b, order, &max);
++      J_ASSERT(buddy);
++      J_ASSERT(block < max);
++      if (!mb_test_bit(block, buddy))
++              goto nofree;
++
++      if (order == 0) {
++              /* find actual order */
++              order = mb_find_order_for_block(e3b, block);
++              block = block >> order;
++      }
++
++      ex->fe_orders[ex->fe_nums++] = order;
++      ex->fe_len = 1 << order;
++      ex->fe_start = block << order;
++      ex->fe_back = 0;
++
++      while ((space = space - (1 << order)) > 0) {
++
++              buddy = mb_find_buddy(e3b, order, &max);
++              J_ASSERT(buddy);
++
++              if (block + 1 >= max)
++                      break;
++
++              next = (block + 1) * (1 << order);
++              if (!mb_test_bit(next, e3b->bd_bitmap))
++                      break;
++
++              ord = mb_find_order_for_block(e3b, next);
++
++              if ((1 << ord) >= needed) {
++                      /* we dont want to coalesce with self-enough buddies */
++                      break;
++              }
++              order = ord;
++              block = next >> order;
++              ex->fe_len += 1 << order;
++
++              if (ex->fe_nums < MB_ARR_SIZE)
++                      ex->fe_orders[ex->fe_nums++] = order;
++      }
++
++nofree:
++      J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
++      return ex->fe_len;
++}
++
++static int mb_mark_used_backward(struct ext3_buddy *e3b,
++                                      struct ext3_free_extent *ex, int len)
++{
++      int start = ex->fe_start, len0 = len;
++      int ord, mlen, max, cur;
++      void *buddy;
++
++      start = ex->fe_start + ex->fe_len - 1;
++      while (len) {
++              ord = mb_find_order_for_block(e3b, start);
++              if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
++                              len >= (1 << ord)) {
++                      /* the whole chunk may be allocated at once! */
++                      mlen = 1 << ord;
++                      buddy = mb_find_buddy(e3b, ord, &max);
++                      J_ASSERT((start >> ord) < max);
++                      mb_clear_bit(start >> ord, buddy);
++                      e3b->bd_bd->bb_counters[ord]--;
++                      start -= mlen;
++                      len -= mlen;
++                      J_ASSERT(len >= 0);
++                      J_ASSERT(start >= 0);
++                      continue;
++              }
++
++              /* we have to split large buddy */
++              J_ASSERT(ord > 0);
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_clear_bit(start >> ord, buddy);
++              e3b->bd_bd->bb_counters[ord]--;
++
++              ord--;
++              cur = (start >> ord) & ~1U;
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_set_bit(cur, buddy);
++              mb_set_bit(cur + 1, buddy);
++              e3b->bd_bd->bb_counters[ord]++;
++              e3b->bd_bd->bb_counters[ord]++;
++      }
++
++      /* now drop all the bits in bitmap */
++      mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
++
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++static int mb_mark_used_forward(struct ext3_buddy *e3b,
++                              struct ext3_free_extent *ex, int len)
++{
++      int start = ex->fe_start, len0 = len;
++      int ord, mlen, max, cur;
++      void *buddy;
++
++      while (len) {
++              ord = mb_find_order_for_block(e3b, start);
++
++              if (((start >> ord) << ord) == start && len >= (1 << ord)) {
++                      /* the whole chunk may be allocated at once! */
++                      mlen = 1 << ord;
++                      buddy = mb_find_buddy(e3b, ord, &max);
++                      J_ASSERT((start >> ord) < max);
++                      mb_clear_bit(start >> ord, buddy);
++                      e3b->bd_bd->bb_counters[ord]--;
++                      start += mlen;
++                      len -= mlen;
++                      J_ASSERT(len >= 0);
++                      continue;
++              }
++
++              /* we have to split large buddy */
++              J_ASSERT(ord > 0);
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_clear_bit(start >> ord, buddy);
++              e3b->bd_bd->bb_counters[ord]--;
++
++              ord--;
++              cur = (start >> ord) & ~1U;
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_set_bit(cur, buddy);
++              mb_set_bit(cur + 1, buddy);
++              e3b->bd_bd->bb_counters[ord]++;
++              e3b->bd_bd->bb_counters[ord]++;
++      }
++
++      /* now drop all the bits in bitmap */
++      mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
++
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++int inline mb_mark_used(struct ext3_buddy *e3b,
++                      struct ext3_free_extent *ex, int len)
++{
++      int err;
++
++      J_ASSERT(ex);
++      if (ex->fe_back == 0)
++              err = mb_mark_used_forward(e3b, ex, len);
++      else
++              err = mb_mark_used_backward(e3b, ex, len);
++      return err;
++}
++
++int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
++                              struct ext3_buddy *e3b, int group)
++{
++      struct super_block *sb = ac->ac_sb;
++      int err, gorder, max, i;
++      struct ext3_free_extent curex;
++
++      /* let's know order of allocation */
++      gorder = 0;
++      while (ac->ac_g_len > (1 << gorder))
++              gorder++;
++
++      if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
++              /* someone asks for space at this specified block
++               * probably he wants to merge it into existing extent */
++              if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
++                      /* good. at least one block is free */
++                      max = mb_find_extent(e3b, 0, ac->ac_g_start,
++                                              ac->ac_g_len, &curex);
++                      max = min(curex.fe_len, ac->ac_g_len);
++                      mb_mark_used(e3b, &curex, max);
++                      
++                      ac->ac_b_group = group;
++                      ac->ac_b_start = curex.fe_start;
++                      ac->ac_b_len = max;
++                      ac->ac_status = AC_STATUS_FOUND;
++                      err = 0;
++                      goto out;
++              }
++              /* don't try to find goal anymore */
++              ac->ac_g_flags &= ~1;
++      }
++
++      i = 0;
++      while (1) {
++              i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
++              if (i >= sb->s_blocksize * 8)
++                      break;
++
++              max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
++              if (max >= ac->ac_g_len) {
++                      max = min(curex.fe_len, ac->ac_g_len);
++                      mb_mark_used(e3b, &curex, max);
++                      
++                      ac->ac_b_group = group;
++                      ac->ac_b_start = curex.fe_start;
++                      ac->ac_b_len = max;
++                      ac->ac_status = AC_STATUS_FOUND;
++                      break;
++              }
++              i += max;
++      }
++
++      return 0;
++
++out:
++      return err;
++}
++
++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
++{
++      struct ext3_group_desc *gdp;
++      int free_blocks;
++
++      gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
++      if (!gdp)
++              return 0;
++      free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++      if (free_blocks == 0)
++              return 0;
++
++      /* someone wants this block very much */
++      if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
++              return 1;
++
++      /* FIXME: I'd like to take fragmentation into account here */
++      if (cr == 0) {
++              if (free_blocks >= ac->ac_g_len >> 1)
++                      return 1;
++      } else if (cr == 1) {
++              if (free_blocks >= ac->ac_g_len >> 2)
++                      return 1;
++      } else if (cr == 2) {
++              return 1;
++      } else {
++              BUG();
++      }
++      return 0;
++}
++
++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++                      unsigned long goal, int *len, int flags, int *errp)
++{
++      struct buffer_head *bitmap_bh = NULL;
++      struct ext3_allocation_context ac;
++      int i, group, block, cr, err = 0;
++      struct ext3_group_desc *gdp;
++      struct ext3_super_block *es;
++      struct buffer_head *gdp_bh;
++      struct ext3_sb_info *sbi;
++      struct super_block *sb;
++      struct ext3_buddy e3b;
++
++      J_ASSERT(len != NULL);
++      J_ASSERT(*len > 0);
++
++      sb = inode->i_sb;
++      if (!sb) {
++              printk("ext3_mb_new_nblocks: nonexistent device");
++              return 0;
++      }
++
++      if (!test_opt(sb, MBALLOC)) {
++              static int ext3_mballoc_warning = 0;
++              if (ext3_mballoc_warning == 0) {
++                      printk(KERN_ERR "EXT3-fs: multiblock request with "
++                              "mballoc disabled!\n");
++                      ext3_mballoc_warning++;
++              }
++              *len = 1;
++              err = ext3_new_block_old(handle, inode, goal, errp);
++              return err;
++      }
++
++      ext3_mb_poll_new_transaction(sb, handle);
++
++      sbi = EXT3_SB(sb);
++      es = EXT3_SB(sb)->s_es;
++
++      if (!(flags & 2)) {
++              /* someone asks for non-reserved blocks */
++              BUG_ON(*len > 1);
++              err = ext3_mb_reserve_blocks(sb, 1);
++              if (err) {
++                      *errp = err;
++                      return 0;
++              }
++      }
++
++      /*
++       * Check quota for allocation of this blocks.
++       */
++      while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
++              *len -= 1;
++      if (*len == 0) {
++              *errp = -EDQUOT;
++              block = 0;
++              goto out;
++      }
++
++      /* start searching from the goal */
++      if (goal < le32_to_cpu(es->s_first_data_block) ||
++          goal >= le32_to_cpu(es->s_blocks_count))
++              goal = le32_to_cpu(es->s_first_data_block);
++      group = (goal - le32_to_cpu(es->s_first_data_block)) /
++                      EXT3_BLOCKS_PER_GROUP(sb);
++      block = ((goal - le32_to_cpu(es->s_first_data_block)) %
++                      EXT3_BLOCKS_PER_GROUP(sb));
++
++      /* set up allocation goals */
++      ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
++      ac.ac_status = 0;
++      ac.ac_groups_scanned = 0;
++      ac.ac_sb = inode->i_sb;
++      ac.ac_g_group = group;
++      ac.ac_g_start = block;
++      ac.ac_g_len = *len;
++      ac.ac_g_flags = flags;
++
++      /* loop over the groups */
++      for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
++              for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
++                      if (group == EXT3_SB(sb)->s_groups_count)
++                              group = 0;
++
++                      /* check is group good for our criteries */
++                      if (!mb_good_group(&ac, group, cr))
++                              continue;
++
++                      err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
++                      if (err)
++                              goto out_err;
++
++                      ext3_lock_group(sb, group);
++                      if (!mb_good_group(&ac, group, cr)) {
++                              /* someone did allocation from this group */
++                              ext3_unlock_group(sb, group);
++                              ext3_mb_release_desc(&e3b);
++                              continue;
++                      }
++
++                      err = ext3_mb_new_in_group(&ac, &e3b, group);
++                      ext3_unlock_group(sb, group);
++                      if (ac.ac_status == AC_STATUS_FOUND)
++                              ext3_mb_dirty_buddy(&e3b);
++                      ext3_mb_release_desc(&e3b);
++                      if (err)
++                              goto out_err;
++                      if (ac.ac_status == AC_STATUS_FOUND)
++                              break;
++              }
++      }
++
++      if (ac.ac_status != AC_STATUS_FOUND) {
++              /* unfortunately, we can't satisfy this request */
++              J_ASSERT(ac.ac_b_len == 0);
++              DQUOT_FREE_BLOCK(inode, *len);
++              *errp = -ENOSPC;
++              block = 0;
++              goto out;
++      }
++
++      /* good news - free block(s) have been found. now it's time
++       * to mark block(s) in good old journaled bitmap */
++      block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++                      + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
++
++      /* we made a desicion, now mark found blocks in good old
++       * bitmap to be journaled */
++
++      ext3_debug("using block group %d(%d)\n",
++                      ac.ac_b_group.group, gdp->bg_free_blocks_count);
++
++      bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
++      if (!bitmap_bh) {
++              *errp = -EIO;
++              goto out_err;
++      }
++
++      err = ext3_journal_get_write_access(handle, bitmap_bh);
++      if (err) {
++              *errp = err;
++              goto out_err;
++      }
++
++      gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
++      if (!gdp) {
++              *errp = -EIO;
++              goto out_err;
++      }
++      
++      err = ext3_journal_get_write_access(handle, gdp_bh);
++      if (err)
++              goto out_err;
++
++      block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++                              + le32_to_cpu(es->s_first_data_block);
++
++      if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
++          block == le32_to_cpu(gdp->bg_inode_bitmap) ||
++          in_range(block, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group))
++              ext3_error(sb, "ext3_new_block",
++                          "Allocating block in system zone - "
++                          "block = %u", block);
++#if 0
++      for (i = 0; i < ac.ac_b_len; i++)
++              J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
++#endif
++      mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
++
++      ext3_lock_group(sb, ac.ac_b_group);
++      gdp->bg_free_blocks_count =
++                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 
++                                      ac.ac_b_len);
++      ext3_unlock_group(sb, ac.ac_b_group);
++      percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
++
++      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++      if (err)
++              goto out_err;
++      err = ext3_journal_dirty_metadata(handle, gdp_bh);
++      if (err)
++              goto out_err;
++
++      sb->s_dirt = 1;
++      *errp = 0;
++      brelse(bitmap_bh);
++
++      /* drop non-allocated, but dquote'd blocks */
++      J_ASSERT(*len >= ac.ac_b_len);
++      DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
++
++      *len = ac.ac_b_len;
++      J_ASSERT(block != 0);
++      goto out;
++
++out_err:
++      /* if we've already allocated something, roll it back */
++      if (ac.ac_status == AC_STATUS_FOUND) {
++              /* FIXME: free blocks here */
++      }
++
++      DQUOT_FREE_BLOCK(inode, *len);
++      brelse(bitmap_bh);
++      *errp = err;
++      block = 0;
++out:
++      if (!(flags & 2)) {
++              /* block wasn't reserved before and we reserved it
++               * at the beginning of allocation. it doesn't matter
++               * whether we allocated anything or we failed: time
++               * to release reservation. NOTE: because I expect
++               * any multiblock request from delayed allocation
++               * path only, here is single block always */
++              ext3_mb_release_blocks(sb, 1);
++      }
++      return block;
++}
++
++int ext3_mb_generate_buddy(struct super_block *sb, int group)
++{
++      struct buffer_head *bh;
++      int i, err, count = 0;
++      struct ext3_buddy e3b;
++      
++      err = ext3_mb_load_desc(sb, group, &e3b);
++      if (err)
++              goto out;
++      memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
++      memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
++
++      bh = read_block_bitmap(sb, group);
++      if (bh == NULL) {
++              err = -EIO; 
++              goto out2;
++      }
++
++      /* loop over the blocks, nad create buddies for free ones */
++      for (i = 0; i < sb->s_blocksize * 8; i++) {
++              if (!mb_test_bit(i, (void *) bh->b_data)) {
++                      mb_free_blocks(&e3b, i, 1);
++                      count++;
++              }
++      }
++      brelse(bh);
++      mb_check_buddy(&e3b);
++      ext3_mb_dirty_buddy(&e3b);
++
++out2:
++      ext3_mb_release_desc(&e3b);
++out:
++      return err;
++}
++
++EXPORT_SYMBOL(ext3_mb_new_blocks);
++
++#define MB_CREDITS    \
++      (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS +   \
++              + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
++
++int ext3_mb_init_backend(struct super_block *sb)
++{
++      struct inode *root = sb->s_root->d_inode;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      struct dentry *db;
++      tid_t target;
++      int err, i;
++
++      sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++                                      sbi->s_groups_count, GFP_KERNEL);
++      if (sbi->s_buddy_blocks == NULL) {
++              printk("can't allocate mem for buddy maps\n");
++              return -ENOMEM;
++      }
++      memset(sbi->s_buddy_blocks, 0,
++              sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++      sbi->s_buddy = NULL;
++
++      down(&root->i_sem);
++      db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
++                              strlen(EXT3_BUDDY_FILE));
++      if (IS_ERR(db)) {
++              err = PTR_ERR(db);
++              printk("can't lookup buddy file: %d\n", err);
++              goto out;
++      }
++
++      if (db->d_inode != NULL) {
++              sbi->s_buddy = igrab(db->d_inode);
++              goto map;
++      }
++
++      err = ext3_create(root, db, S_IFREG, NULL);
++      if (err) {
++              printk("error while creation buddy file: %d\n", err);
++      } else {
++              sbi->s_buddy = igrab(db->d_inode);
++      }
++
++map:
++      for (i = 0; i < sbi->s_groups_count; i++) {
++              struct buffer_head *bh = NULL;
++              handle_t *handle;
++
++              handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
++              if (IS_ERR(handle)) {
++                      err = PTR_ERR(handle);
++                      goto out2;
++              }
++              
++              /* allocate block for bitmap */
++              bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
++              if (bh == NULL) {
++                      printk("can't get block for buddy bitmap: %d\n", err);
++                      goto out2;
++              }
++              sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++              brelse(bh);
++
++              /* allocate block for buddy */
++              bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
++              if (bh == NULL) {
++                      printk("can't get block for buddy: %d\n", err);
++                      goto out2;
++              }
++              sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++              brelse(bh);
++              ext3_journal_stop(handle);
++              spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
++              sbi->s_buddy_blocks[i].bb_md_cur = NULL;
++              sbi->s_buddy_blocks[i].bb_tid = 0;
++      }
++
++      if (journal_start_commit(sbi->s_journal, &target))
++              log_wait_commit(sbi->s_journal, target);
++
++out2:
++      dput(db);
++out:
++      up(&root->i_sem);
++      return err;
++}
++
++int ext3_mb_release(struct super_block *sb)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      
++      if (!test_opt(sb, MBALLOC))
++              return 0;
++
++      /* release freed, non-committed blocks */
++      spin_lock(&sbi->s_md_lock);
++      list_splice_init(&sbi->s_closed_transaction,
++                      &sbi->s_committed_transaction);
++      list_splice_init(&sbi->s_active_transaction,
++                      &sbi->s_committed_transaction);
++      spin_unlock(&sbi->s_md_lock);
++      ext3_mb_free_committed_blocks(sb);
++
++      if (sbi->s_buddy_blocks)
++              kfree(sbi->s_buddy_blocks);
++      if (sbi->s_buddy)
++              iput(sbi->s_buddy);
++      if (sbi->s_blocks_reserved)
++              printk("ext3-fs: %ld blocks being reserved at umount!\n",
++                              sbi->s_blocks_reserved);
++      return 0;
++}
++
++int ext3_mb_init(struct super_block *sb)
++{
++      struct ext3_super_block *es;
++      int i;
++
++      if (!test_opt(sb, MBALLOC))
++              return 0;
++
++      /* init file for buddy data */
++      clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++      ext3_mb_init_backend(sb);
++
++      es = EXT3_SB(sb)->s_es;
++      for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++              ext3_mb_generate_buddy(sb, i);
++      spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
++      spin_lock_init(&EXT3_SB(sb)->s_md_lock);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
++      set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++      printk("EXT3-fs: mballoc enabled\n");
++      return 0;
++}
++
++void ext3_mb_free_committed_blocks(struct super_block *sb)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int err, i, count = 0, count2 = 0;
++      struct ext3_free_metadata *md;
++      struct ext3_buddy e3b;
++
++      if (list_empty(&sbi->s_committed_transaction))
++              return;
++
++      /* there is committed blocks to be freed yet */
++      do {
++              /* get next array of blocks */
++              md = NULL;
++              spin_lock(&sbi->s_md_lock);
++              if (!list_empty(&sbi->s_committed_transaction)) {
++                      md = list_entry(sbi->s_committed_transaction.next,
++                                      struct ext3_free_metadata, list);
++                      list_del(&md->list);
++              }
++              spin_unlock(&sbi->s_md_lock);
++
++              if (md == NULL)
++                      break;
++
++              mb_debug("gonna free %u blocks in group %u (0x%p):",
++                              md->num, md->group, md);
++
++              err = ext3_mb_load_desc(sb, md->group, &e3b);
++              BUG_ON(err != 0);
++
++              /* there are blocks to put in buddy to make them really free */
++              count += md->num;
++              count2++;
++              ext3_lock_group(sb, md->group);
++              for (i = 0; i < md->num; i++) {
++                      mb_debug(" %u", md->blocks[i]);
++                      mb_free_blocks(&e3b, md->blocks[i], 1);
++              }
++              mb_debug("\n");
++              ext3_unlock_group(sb, md->group);
++
++              kfree(md);
++              ext3_mb_dirty_buddy(&e3b);
++              ext3_mb_release_desc(&e3b);
++
++      } while (md);
++      mb_debug("freed %u blocks in %u structures\n", count, count2);
++}
++
++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      if (sbi->s_last_transaction == handle->h_transaction->t_tid)
++              return;
++
++      /* new transaction! time to close last one and free blocks for
++       * committed transaction. we know that only transaction can be
++       * active, so previos transaction can be being logged and we
++       * know that transaction before previous is known to be alreade
++       * logged. this means that now we may free blocks freed in all
++       * transactions before previous one. hope I'm clear enough ... */
++
++      spin_lock(&sbi->s_md_lock);
++      if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
++              mb_debug("new transaction %lu, old %lu\n",
++                              (unsigned long) handle->h_transaction->t_tid,
++                              (unsigned long) sbi->s_last_transaction);
++              list_splice_init(&sbi->s_closed_transaction,
++                                      &sbi->s_committed_transaction);
++              list_splice_init(&sbi->s_active_transaction,
++                                      &sbi->s_closed_transaction);
++              sbi->s_last_transaction = handle->h_transaction->t_tid;
++      }
++      spin_unlock(&sbi->s_md_lock);
++
++      ext3_mb_free_committed_blocks(sb);
++}
++
++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
++                              int group, int block, int count)
++{
++      struct ext3_buddy_group_blocks *db = e3b->bd_bd;
++      struct super_block *sb = e3b->bd_sb;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      struct ext3_free_metadata *md;
++      int i;
++
++      ext3_lock_group(sb, group);
++      for (i = 0; i < count; i++) {
++              md = db->bb_md_cur;
++              if (md && db->bb_tid != handle->h_transaction->t_tid) {
++                      db->bb_md_cur = NULL;
++                      md = NULL;
++              }
++
++              if (md == NULL) {
++                      ext3_unlock_group(sb, group);
++                      md = kmalloc(sizeof(*md), GFP_KERNEL);
++                      if (md == NULL)
++                              return -ENOMEM;
++                      md->num = 0;
++                      md->group = group;
++
++                      ext3_lock_group(sb, group);
++                      if (db->bb_md_cur == NULL) {
++                              spin_lock(&sbi->s_md_lock);
++                              list_add(&md->list, &sbi->s_active_transaction);
++                              spin_unlock(&sbi->s_md_lock);
++                              db->bb_md_cur = md;
++                              db->bb_tid = handle->h_transaction->t_tid;
++                              mb_debug("new md 0x%p for group %u\n",
++                                                      md, md->group);
++                      } else {
++                              kfree(md);
++                              md = db->bb_md_cur;
++                      }
++              }
++
++              BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
++              md->blocks[md->num] = block + i;
++              md->num++;
++              if (md->num == EXT3_BB_MAX_BLOCKS) {
++                      /* no more space, put full container on a sb's list */
++                      db->bb_md_cur = NULL;
++              }
++      }
++      ext3_unlock_group(sb, group);
++      return 0;
++}
++
++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
++                      unsigned long block, unsigned long count, int metadata)
++{
++      struct buffer_head *bitmap_bh = NULL;
++      struct ext3_group_desc *gdp;
++      struct ext3_super_block *es;
++      unsigned long bit, overflow;
++      struct buffer_head *gd_bh;
++      unsigned long block_group;
++      struct ext3_sb_info *sbi;
++      struct super_block *sb;
++      struct ext3_buddy e3b;
++      int err = 0, ret;
++
++      sb = inode->i_sb;
++      if (!sb) {
++              printk ("ext3_free_blocks: nonexistent device");
++              return;
++      }
++
++      ext3_mb_poll_new_transaction(sb, handle);
++
++      sbi = EXT3_SB(sb);
++      es = EXT3_SB(sb)->s_es;
++      if (block < le32_to_cpu(es->s_first_data_block) ||
++          block + count < block ||
++          block + count > le32_to_cpu(es->s_blocks_count)) {
++              ext3_error (sb, "ext3_free_blocks",
++                          "Freeing blocks not in datazone - "
++                          "block = %lu, count = %lu", block, count);
++              goto error_return;
++      }
++
++      ext3_debug("freeing block %lu\n", block);
++
++do_more:
++      overflow = 0;
++      block_group = (block - le32_to_cpu(es->s_first_data_block)) /
++                    EXT3_BLOCKS_PER_GROUP(sb);
++      bit = (block - le32_to_cpu(es->s_first_data_block)) %
++                    EXT3_BLOCKS_PER_GROUP(sb);
++      /*
++       * Check to see if we are freeing blocks across a group
++       * boundary.
++       */
++      if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
++              overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
++              count -= overflow;
++      }
++      brelse(bitmap_bh);
++      bitmap_bh = read_block_bitmap(sb, block_group);
++      if (!bitmap_bh)
++              goto error_return;
++      gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
++      if (!gdp)
++              goto error_return;
++
++      if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
++          in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
++          in_range (block, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group) ||
++          in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group))
++              ext3_error (sb, "ext3_free_blocks",
++                          "Freeing blocks in system zones - "
++                          "Block = %lu, count = %lu",
++                          block, count);
++
++      BUFFER_TRACE(bitmap_bh, "getting write access");
++      err = ext3_journal_get_write_access(handle, bitmap_bh);
++      if (err)
++              goto error_return;
++
++      /*
++       * We are about to modify some metadata.  Call the journal APIs
++       * to unshare ->b_data if a currently-committing transaction is
++       * using it
++       */
++      BUFFER_TRACE(gd_bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, gd_bh);
++      if (err)
++              goto error_return;
++
++      err = ext3_mb_load_desc(sb, block_group, &e3b);
++      if (err)
++              goto error_return;
++
++      if (metadata) {
++              /* blocks being freed are metadata. these blocks shouldn't
++               * be used until this transaction is committed */
++              ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
++      } else { 
++              ext3_lock_group(sb, block_group);
++              mb_free_blocks(&e3b, bit, count);
++              gdp->bg_free_blocks_count =
++                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++              ext3_unlock_group(sb, block_group);
++              percpu_counter_mod(&sbi->s_freeblocks_counter, count);
++      }
++      
++      ext3_mb_dirty_buddy(&e3b);
++      ext3_mb_release_desc(&e3b);
++
++      /* FIXME: undo logic will be implemented later and another way */
++      mb_clear_bits(bitmap_bh->b_data, bit, count);
++      DQUOT_FREE_BLOCK(inode, count);
++
++      /* We dirtied the bitmap block */
++      BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
++      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++
++      /* And the group descriptor block */
++      BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
++      ret = ext3_journal_dirty_metadata(handle, gd_bh);
++      if (!err) err = ret;
++
++      if (overflow && !err) {
++              block += count;
++              count = overflow;
++              goto do_more;
++      }
++      sb->s_dirt = 1;
++error_return:
++      brelse(bitmap_bh);
++      ext3_std_error(sb, err);
++      return;
++}
++
++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int free, ret = -ENOSPC;
++
++      BUG_ON(blocks < 0);
++      spin_lock(&sbi->s_reserve_lock);
++      free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++      if (blocks <= free - sbi->s_blocks_reserved) {
++              sbi->s_blocks_reserved += blocks;
++              ret = 0;
++      }
++      spin_unlock(&sbi->s_reserve_lock);
++      return ret;
++}
++
++void ext3_mb_release_blocks(struct super_block *sb, int blocks)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      BUG_ON(blocks < 0);
++      spin_lock(&sbi->s_reserve_lock);
++      sbi->s_blocks_reserved -= blocks;
++      WARN_ON(sbi->s_blocks_reserved < 0);
++      if (sbi->s_blocks_reserved < 0)
++              sbi->s_blocks_reserved = 0;
++      spin_unlock(&sbi->s_reserve_lock);
++}
++
++int ext3_new_block(handle_t *handle, struct inode *inode,
++                      unsigned long goal, int *errp)
++{
++      int ret, len;
++
++      if (!test_opt(inode->i_sb, MBALLOC)) {
++              ret = ext3_new_block_old(handle, inode, goal, errp);
++              goto out;
++      }
++      len = 1;
++      ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
++out:
++      return ret;
++}
++
++
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++                      unsigned long block, unsigned long count, int metadata)
++{
++      if (!test_opt(inode->i_sb, MBALLOC))
++              ext3_free_blocks_old(handle, inode, block, count);
++      else
++              ext3_mb_free_blocks(handle, inode, block, count, metadata);
++      return;
++}
++
+Index: linux-2.6.5-sles9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/super.c     2004-11-09 02:23:21.597220752 +0300
++++ linux-2.6.5-sles9/fs/ext3/super.c  2004-11-09 02:26:12.572228600 +0300
+@@ -389,6 +389,7 @@
+       struct ext3_super_block *es = sbi->s_es;
+       int i;
+ 
++      ext3_mb_release(sb);
+       ext3_ext_release(sb);
+       ext3_xattr_put_super(sb);
+       journal_destroy(sbi->s_journal);
+@@ -542,7 +543,7 @@
+       Opt_commit, Opt_journal_update, Opt_journal_inum,
+       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+       Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_err, Opt_extents, Opt_extdebug
++      Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
+ };
+ 
+ static match_table_t tokens = {
+@@ -589,6 +590,7 @@
+       {Opt_iopen_nopriv, "iopen_nopriv"},
+       {Opt_extents, "extents"},
+       {Opt_extdebug, "extdebug"},
++      {Opt_mballoc, "mballoc"},
+       {Opt_err, NULL}
+ };
+ 
+@@ -810,6 +812,9 @@
+               case Opt_extdebug:
+                       set_opt (sbi->s_mount_opt, EXTDEBUG);
+                       break;
++              case Opt_mballoc:
++                      set_opt (sbi->s_mount_opt, MBALLOC);
++                      break;
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1463,7 +1468,8 @@
+               ext3_count_dirs(sb));
+ 
+       ext3_ext_init(sb);
+- 
++      ext3_mb_init(sb);
++
+       return 0;
+ 
+ failed_mount3:
+Index: linux-2.6.5-sles9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/Makefile    2004-11-09 02:23:21.593221360 +0300
++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+ 
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+-         ioctl.o namei.o super.o symlink.o hash.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
+ 
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.5-sles9/fs/ext3/balloc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c    2004-11-03 08:36:51.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300
+@@ -78,7 +78,7 @@
+  *
+  * Return buffer_head on success or NULL in case of failure.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+       struct ext3_group_desc * desc;
+@@ -274,7 +274,7 @@
+ }
+ 
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
++void ext3_free_blocks_old(handle_t *handle, struct inode *inode,
+                       unsigned long block, unsigned long count)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+@@ -1142,7 +1142,7 @@
+  * bitmap, and then for any free bit if that fails.
+  * This function also updates quota and i_blocks field.
+  */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       unsigned long goal, int *errp)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.5-sles9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/namei.c     2004-11-09 02:18:27.616912552 +0300
++++ linux-2.6.5-sles9/fs/ext3/namei.c  2004-11-09 02:26:12.580227384 +0300
+@@ -1640,7 +1640,7 @@
+  * If the create succeeds, we fill in the inode information
+  * with d_instantiate(). 
+  */
+-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
++int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+               struct nameidata *nd)
+ {
+       handle_t *handle; 
+Index: linux-2.6.5-sles9/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/inode.c     2004-11-09 02:23:21.592221512 +0300
++++ linux-2.6.5-sles9/fs/ext3/inode.c  2004-11-09 02:26:12.587226320 +0300
+@@ -572,7 +572,7 @@
+               ext3_journal_forget(handle, branch[i].bh);
+       }
+       for (i = 0; i < keys; i++)
+-              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+       return err;
+ }
+ 
+@@ -673,7 +673,7 @@
+       if (err == -EAGAIN)
+               for (i = 0; i < num; i++)
+                       ext3_free_blocks(handle, inode, 
+-                                       le32_to_cpu(where[i].key), 1);
++                                       le32_to_cpu(where[i].key), 1, 1);
+       return err;
+ }
+ 
+@@ -1829,7 +1829,7 @@
+               }
+       }
+ 
+-      ext3_free_blocks(handle, inode, block_to_free, count);
++      ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+ 
+ /**
+@@ -2000,7 +2000,7 @@
+                               ext3_journal_test_restart(handle, inode);
+                       }
+ 
+-                      ext3_free_blocks(handle, inode, nr, 1);
++                      ext3_free_blocks(handle, inode, nr, 1, 1);
+ 
+                       if (parent_bh) {
+                               /*
+Index: linux-2.6.5-sles9/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/extents.c   2004-11-09 02:25:56.143726112 +0300
++++ linux-2.6.5-sles9/fs/ext3/extents.c        2004-11-09 02:26:12.591225712 +0300
+@@ -740,7 +740,7 @@
+               for (i = 0; i < depth; i++) {
+                       if (!ablocks[i])
+                               continue;
+-                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+               }
+       }
+       kfree(ablocks);
+@@ -1391,7 +1391,7 @@
+                       path->p_idx->ei_leaf);
+       bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+-      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+       return err;
+ }
+ 
+@@ -1879,10 +1879,12 @@
+       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+       handle_t *handle = ext3_journal_start(tree->inode, needed);
+       struct buffer_head *bh;
+-      int i;
++      int i, metadata = 0;
+ 
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
++      if (S_ISDIR(tree->inode->i_mode))
++              metadata = 1;
+       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+               /* tail removal */
+               unsigned long num, start;
+@@ -1894,7 +1896,7 @@
+                       bh = sb_find_get_block(tree->inode->i_sb, start + i);
+                       ext3_forget(handle, 0, tree->inode, bh, start + i);
+               }
+-              ext3_free_blocks(handle, tree->inode, start, num);
++              ext3_free_blocks(handle, tree->inode, start, num, metadata);
+       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+               printk("strange request: removal %lu-%lu from %u:%u\n",
+                       from, to, ex->ee_block, ex->ee_len);
+Index: linux-2.6.5-sles9/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/xattr.c     2004-11-09 02:22:55.777146000 +0300
++++ linux-2.6.5-sles9/fs/ext3/xattr.c  2004-11-09 02:26:12.593225408 +0300
+@@ -1366,7 +1366,7 @@
+                       new_bh = sb_getblk(sb, block);
+                       if (!new_bh) {
+ getblk_failed:
+-                              ext3_free_blocks(handle, inode, block, 1);
++                              ext3_free_blocks(handle, inode, block, 1, 1);
+                               error = -EIO;
+                               goto cleanup;
+                       }
+@@ -1408,7 +1408,7 @@
+               if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+                       /* Free the old block. */
+                       ea_bdebug(old_bh, "freeing");
+-                      ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
++                      ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
+ 
+                       /* ext3_forget() calls bforget() for us, but we
+                          let our caller release old_bh, so we need to
+@@ -1504,7 +1504,7 @@
+       lock_buffer(bh);
+       if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+               ext3_xattr_cache_remove(bh);
+-              ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
++              ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
+               get_bh(bh);
+               ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
+       } else {
+Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h     2004-11-09 02:25:17.238640584 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs.h  2004-11-09 02:26:12.596224952 +0300
+@@ -57,6 +57,8 @@
+ #define ext3_debug(f, a...)   do {} while (0)
+ #endif
+ 
++#define EXT3_MULTIBLOCK_ALLOCATOR     1
++
+ /*
+  * Special inodes numbers
+  */
+@@ -339,6 +341,7 @@
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC            0x400000/* Buddy allocation support */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -698,7 +701,7 @@
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+-                            unsigned long);
++                            unsigned long, int);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h  2004-11-09 02:20:51.598024096 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h       2004-11-09 02:28:18.753046200 +0300
+@@ -23,10 +23,30 @@
+ #define EXT_INCLUDE
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #endif
+ #include <linux/rbtree.h>
+ 
++#define EXT3_BB_MAX_BLOCKS    30
++struct ext3_free_metadata {
++      unsigned short group;
++      unsigned short num;
++      unsigned short blocks[EXT3_BB_MAX_BLOCKS];
++      struct list_head list;
++};
++
++#define EXT3_BB_MAX_ORDER     14
++
++struct ext3_buddy_group_blocks {
++      sector_t        bb_bitmap;
++      sector_t        bb_buddy;
++      spinlock_t      bb_lock;
++      unsigned        bb_counters[EXT3_BB_MAX_ORDER];
++      struct ext3_free_metadata *bb_md_cur;
++      unsigned long bb_tid;
++};
++
+ /*
+  * third extended-fs super-block data in memory
+  */
+@@ -78,6 +98,17 @@
+       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
+       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
+ #endif
++
++      /* for buddy allocator */
++      struct ext3_buddy_group_blocks *s_buddy_blocks;
++      struct inode *s_buddy;
++      long s_blocks_reserved;
++      spinlock_t s_reserve_lock;
++      struct list_head s_active_transaction;
++      struct list_head s_closed_transaction;
++      struct list_head s_committed_transaction;
++      spinlock_t s_md_lock;
++      tid_t s_last_transaction;
+ };
+ 
+ #endif        /* _LINUX_EXT3_FS_SB */
diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch

new file mode 100644 (file)

index 0000000..9d782c4
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch
@@ -0,0 +1,1750 @@
+Index: linux-2.6.7/fs/ext3/mballoc.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.7/fs/ext3/mballoc.c      2004-09-06 12:51:42.000000000 +0400
+@@ -0,0 +1,1428 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
++ */
++
++
++/*
++ * mballoc.c contains the multiblocks allocation routines
++ */
++
++#include <linux/config.h>
++#include <linux/time.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/module.h>
++
++/*
++ * TODO:
++ *   - do not scan from the beginning, try to remember first free block
++ *   - mb_mark_used_* may allocate chunk right after splitting buddy
++ *   - special flag to advice allocator to look for requested + N blocks
++ *     this may improve interaction between extents and mballoc
++ */
++
++/*
++ * with AGRESSIVE_CHECK allocator runs consistency checks over
++ * structures. this checks slow things down a lot
++ */
++#define AGGRESSIVE_CHECK__
++
++/*
++ */
++#define MB_DEBUG__
++#ifdef MB_DEBUG
++#define mb_debug(fmt,a...)    printk(fmt, ##a)
++#else
++#define mb_debug(fmt,a...)
++#endif
++
++/*
++ * where to save buddies structures beetween umount/mount (clean case only)
++ */
++#define EXT3_BUDDY_FILE               ".buddy"
++
++/*
++ * max. number of chunks to be tracked in ext3_free_extent struct
++ */
++#define MB_ARR_SIZE   32
++
++struct ext3_allocation_context {
++      struct super_block *ac_sb;
++
++      /* search goals */
++      int ac_g_group;
++      int ac_g_start;
++      int ac_g_len;
++      int ac_g_flags;
++      
++      /* the best found extent */
++      int ac_b_group;
++      int ac_b_start;
++      int ac_b_len;
++      
++      /* number of iterations done. we have to track to limit searching */
++      int ac_repeats;
++      int ac_groups_scanned;
++      int ac_status;
++};
++
++#define AC_STATUS_CONTINUE    1
++#define AC_STATUS_FOUND               2
++
++
++struct ext3_buddy {
++      void *bd_bitmap;
++      void *bd_buddy;
++      int bd_blkbits;
++      struct buffer_head *bd_bh;
++      struct buffer_head *bd_bh2;
++      struct ext3_buddy_group_blocks *bd_bd;
++      struct super_block *bd_sb;
++};
++
++struct ext3_free_extent {
++      int fe_start;
++      int fe_len;
++      unsigned char fe_orders[MB_ARR_SIZE];
++      unsigned char fe_nums;
++      unsigned char fe_back;
++};
++
++#define in_range(b, first, len)       ((b) >= (first) && (b) <= (first) + (len) - 1)
++
++
++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *);
++int ext3_mb_reserve_blocks(struct super_block *, int);
++void ext3_mb_release_blocks(struct super_block *, int);
++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
++void ext3_mb_free_committed_blocks(struct super_block *);
++
++#define mb_correct_addr_and_bit(bit,addr)     \
++{                                             \
++      if ((unsigned) addr & 1) {              \
++              bit += 8;                       \
++              addr--;                         \
++      }                                       \
++      if ((unsigned) addr & 2) {              \
++              bit += 16;                      \
++              addr--;                         \
++              addr--;                         \
++      }                                       \
++}
++
++static inline int mb_test_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      return test_bit(bit, addr);
++}
++
++static inline void mb_set_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      set_bit(bit, addr);
++}
++
++static inline void mb_clear_bit(int bit, void *addr)
++{
++      mb_correct_addr_and_bit(bit,addr);
++      clear_bit(bit, addr);
++}
++
++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
++{
++      int i = 1;
++      void *bb;
++
++      J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++      J_ASSERT(max != NULL);
++
++      if (order > e3b->bd_blkbits + 1)
++              return NULL;
++
++      /* at order 0 we see each particular block */
++      *max = 1 << (e3b->bd_blkbits + 3);
++      if (order == 0)
++              return e3b->bd_bitmap;
++
++      bb = e3b->bd_buddy;
++      *max = *max >> 1;
++      while (i < order) {
++              bb += 1 << (e3b->bd_blkbits - i);
++              i++;
++              *max = *max >> 1;
++      }
++      return bb;
++}
++
++static int ext3_mb_load_desc(struct super_block *sb, int group,
++                              struct ext3_buddy *e3b)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
++      J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++
++      /* load bitmap */
++      e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++      if (e3b->bd_bh == NULL) {
++              ext3_error(sb, "ext3_mb_load_desc",
++                              "can't get block for buddy bitmap\n");
++              goto out;
++      }
++      if (!buffer_uptodate(e3b->bd_bh)) {
++              ll_rw_block(READ, 1, &e3b->bd_bh);
++              wait_on_buffer(e3b->bd_bh);
++      }
++      J_ASSERT(buffer_uptodate(e3b->bd_bh));
++
++      /* load buddy */
++      e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++      if (e3b->bd_bh2 == NULL) {
++              ext3_error(sb, "ext3_mb_load_desc",
++                              "can't get block for buddy bitmap\n");
++              goto out;
++      }
++      if (!buffer_uptodate(e3b->bd_bh2)) {
++              ll_rw_block(READ, 1, &e3b->bd_bh2);
++              wait_on_buffer(e3b->bd_bh2);
++      }
++      J_ASSERT(buffer_uptodate(e3b->bd_bh2));
++
++      e3b->bd_bitmap = e3b->bd_bh->b_data;
++      e3b->bd_buddy = e3b->bd_bh2->b_data;
++      e3b->bd_blkbits = sb->s_blocksize_bits;
++      e3b->bd_bd = sbi->s_buddy_blocks + group;
++      e3b->bd_sb = sb;
++
++      return 0;
++out:
++      brelse(e3b->bd_bh);
++      brelse(e3b->bd_bh2);
++      e3b->bd_bh = NULL;
++      e3b->bd_bh2 = NULL;
++      return -EIO;
++}
++
++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
++{
++      mark_buffer_dirty(e3b->bd_bh);
++      mark_buffer_dirty(e3b->bd_bh2);
++}
++
++static void ext3_mb_release_desc(struct ext3_buddy *e3b)
++{
++      brelse(e3b->bd_bh);
++      brelse(e3b->bd_bh2);
++}
++
++#ifdef AGGRESSIVE_CHECK
++static void mb_check_buddy(struct ext3_buddy *e3b)
++{
++      int order = e3b->bd_blkbits + 1;
++      int max, max2, i, j, k, count;
++      void *buddy, *buddy2;
++
++      if (!test_opt(e3b->bd_sb, MBALLOC))
++              return;
++
++      while (order > 1) {
++              buddy = mb_find_buddy(e3b, order, &max);
++              J_ASSERT(buddy);
++              buddy2 = mb_find_buddy(e3b, order - 1, &max2);
++              J_ASSERT(buddy2);
++              J_ASSERT(buddy != buddy2);
++              J_ASSERT(max * 2 == max2);
++
++              count = 0;
++              for (i = 0; i < max; i++) {
++
++                      if (!mb_test_bit(i, buddy)) {
++                              /* only single bit in buddy2 may be 1 */
++                              if (mb_test_bit(i << 1, buddy2))
++                                      J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
++                              else if (mb_test_bit((i << 1) + 1, buddy2))
++                                      J_ASSERT(!mb_test_bit(i << 1, buddy2));
++                              continue;
++                      }
++
++                      /* both bits in buddy2 must be 0 */
++                      J_ASSERT(!mb_test_bit(i << 1, buddy2));
++                      J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
++
++                      for (j = 0; j < (1 << order); j++) {
++                              k = (i * (1 << order)) + j;
++                              J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
++                      }
++                      count++;
++              }
++              J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
++              order--;
++      }
++
++      buddy = mb_find_buddy(e3b, 0, &max);
++      for (i = 0; i < max; i++) {
++              if (mb_test_bit(i, buddy))
++                      continue;
++              /* check used bits only */
++              for (j = 0; j < e3b->bd_blkbits + 1; j++) {
++                      buddy2 = mb_find_buddy(e3b, j, &max2);
++                      k = i >> j;
++                      J_ASSERT(k < max2);
++                      J_ASSERT(!mb_test_bit(k, buddy2));
++              }
++      }
++}
++#else
++#define mb_check_buddy(e3b)
++#endif
++
++static inline void
++ext3_lock_group(struct super_block *sb, int group)
++{
++      spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static inline void
++ext3_unlock_group(struct super_block *sb, int group)
++{
++      spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
++{
++      int order = 1;
++      void *bb;
++
++      J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++      J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
++
++      bb = e3b->bd_buddy;
++      while (order <= e3b->bd_blkbits + 1) {
++              block = block >> 1;
++              if (mb_test_bit(block, bb)) {
++                      /* this block is part of buddy of order 'order' */
++                      return order;
++              }
++              bb += 1 << (e3b->bd_blkbits - order);
++              order++;
++      }
++      return 0;
++}
++
++static inline void mb_clear_bits(void *bm, int cur, int len)
++{
++      __u32 *addr;
++
++      len = cur + len;
++      while (cur < len) {
++              if ((cur & 31) == 0 && (len - cur) >= 32) {
++                      /* fast path: clear whole word at once */
++                      addr = bm + (cur >> 3);
++                      *addr = 0;
++                      cur += 32;
++                      continue;
++              }
++              mb_clear_bit(cur, bm);
++              cur++;
++      }
++}
++
++static inline void mb_set_bits(void *bm, int cur, int len)
++{
++      __u32 *addr;
++
++      len = cur + len;
++      while (cur < len) {
++              if ((cur & 31) == 0 && (len - cur) >= 32) {
++                      /* fast path: clear whole word at once */
++                      addr = bm + (cur >> 3);
++                      *addr = 0xffffffff;
++                      cur += 32;
++                      continue;
++              }
++              mb_set_bit(cur, bm);
++              cur++;
++      }
++}
++
++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
++{
++      int block, max, order;
++      void *buddy, *buddy2;
++
++      mb_check_buddy(e3b);
++      while (count-- > 0) {
++              block = first++;
++              order = 0;
++
++              J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
++              mb_set_bit(block, e3b->bd_bitmap);
++              e3b->bd_bd->bb_counters[order]++;
++
++              /* start of the buddy */
++              buddy = mb_find_buddy(e3b, order, &max);
++
++              do {
++                      block &= ~1UL;
++                      if (!mb_test_bit(block, buddy) ||
++                                      !mb_test_bit(block + 1, buddy))
++                              break;
++
++                      /* both the buddies are free, try to coalesce them */
++                      buddy2 = mb_find_buddy(e3b, order + 1, &max);
++
++                      if (!buddy2)
++                              break;
++
++                      if (order > 0) {
++                              /* for special purposes, we don't clear
++                               * free bits in bitmap */
++                              mb_clear_bit(block, buddy);
++                              mb_clear_bit(block + 1, buddy);
++                      }
++                      e3b->bd_bd->bb_counters[order]--;
++                      e3b->bd_bd->bb_counters[order]--;
++
++                      block = block >> 1;
++                      order++;
++                      e3b->bd_bd->bb_counters[order]++;
++
++                      mb_set_bit(block, buddy2);
++                      buddy = buddy2;
++              } while (1);
++      }
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++/*
++ * returns 1 if out extent is enough to fill needed space
++ */
++int mb_make_backward_extent(struct ext3_free_extent *in,
++                              struct ext3_free_extent *out, int needed)
++{
++      int i;
++
++      J_ASSERT(in);
++      J_ASSERT(out);
++      J_ASSERT(in->fe_nums < MB_ARR_SIZE);
++
++      out->fe_len = 0;
++      out->fe_start = in->fe_start + in->fe_len;
++      out->fe_nums = 0;
++
++      /* for single-chunk extent we need not back order
++       * also, if an extent doesn't fill needed space
++       * then it makes no sense to try back order becase
++       * if we select this extent then it'll be use as is */
++      if (in->fe_nums < 2 || in->fe_len < needed)
++              return 0;
++
++      i = in->fe_nums - 1;
++      while (i >= 0 && out->fe_len < needed) {
++              out->fe_len += (1 << in->fe_orders[i]);
++              out->fe_start -= (1 << in->fe_orders[i]);
++              i--;
++      }
++      /* FIXME: in some situation fe_orders may be too small to hold
++       * all the buddies */
++      J_ASSERT(out->fe_len >= needed);
++      
++      for (i++; i < in->fe_nums; i++)
++              out->fe_orders[out->fe_nums++] = in->fe_orders[i];
++      J_ASSERT(out->fe_nums < MB_ARR_SIZE);
++      out->fe_back = 1;
++
++      return 1;
++}
++
++int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
++                      int needed, struct ext3_free_extent *ex)
++{
++      int space = needed;
++      int next, max, ord;
++      void *buddy;
++
++      J_ASSERT(ex != NULL);
++
++      ex->fe_nums = 0;
++      ex->fe_len = 0;
++      
++      buddy = mb_find_buddy(e3b, order, &max);
++      J_ASSERT(buddy);
++      J_ASSERT(block < max);
++      if (!mb_test_bit(block, buddy))
++              goto nofree;
++
++      if (order == 0) {
++              /* find actual order */
++              order = mb_find_order_for_block(e3b, block);
++              block = block >> order;
++      }
++
++      ex->fe_orders[ex->fe_nums++] = order;
++      ex->fe_len = 1 << order;
++      ex->fe_start = block << order;
++      ex->fe_back = 0;
++
++      while ((space = space - (1 << order)) > 0) {
++
++              buddy = mb_find_buddy(e3b, order, &max);
++              J_ASSERT(buddy);
++
++              if (block + 1 >= max)
++                      break;
++
++              next = (block + 1) * (1 << order);
++              if (!mb_test_bit(next, e3b->bd_bitmap))
++                      break;
++
++              ord = mb_find_order_for_block(e3b, next);
++
++              if ((1 << ord) >= needed) {
++                      /* we dont want to coalesce with self-enough buddies */
++                      break;
++              }
++              order = ord;
++              block = next >> order;
++              ex->fe_len += 1 << order;
++
++              if (ex->fe_nums < MB_ARR_SIZE)
++                      ex->fe_orders[ex->fe_nums++] = order;
++      }
++
++nofree:
++      J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
++      return ex->fe_len;
++}
++
++static int mb_mark_used_backward(struct ext3_buddy *e3b,
++                                      struct ext3_free_extent *ex, int len)
++{
++      int start = ex->fe_start, len0 = len;
++      int ord, mlen, max, cur;
++      void *buddy;
++
++      start = ex->fe_start + ex->fe_len - 1;
++      while (len) {
++              ord = mb_find_order_for_block(e3b, start);
++              if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
++                              len >= (1 << ord)) {
++                      /* the whole chunk may be allocated at once! */
++                      mlen = 1 << ord;
++                      buddy = mb_find_buddy(e3b, ord, &max);
++                      J_ASSERT((start >> ord) < max);
++                      mb_clear_bit(start >> ord, buddy);
++                      e3b->bd_bd->bb_counters[ord]--;
++                      start -= mlen;
++                      len -= mlen;
++                      J_ASSERT(len >= 0);
++                      J_ASSERT(start >= 0);
++                      continue;
++              }
++
++              /* we have to split large buddy */
++              J_ASSERT(ord > 0);
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_clear_bit(start >> ord, buddy);
++              e3b->bd_bd->bb_counters[ord]--;
++
++              ord--;
++              cur = (start >> ord) & ~1U;
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_set_bit(cur, buddy);
++              mb_set_bit(cur + 1, buddy);
++              e3b->bd_bd->bb_counters[ord]++;
++              e3b->bd_bd->bb_counters[ord]++;
++      }
++
++      /* now drop all the bits in bitmap */
++      mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
++
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++static int mb_mark_used_forward(struct ext3_buddy *e3b,
++                              struct ext3_free_extent *ex, int len)
++{
++      int start = ex->fe_start, len0 = len;
++      int ord, mlen, max, cur;
++      void *buddy;
++
++      while (len) {
++              ord = mb_find_order_for_block(e3b, start);
++
++              if (((start >> ord) << ord) == start && len >= (1 << ord)) {
++                      /* the whole chunk may be allocated at once! */
++                      mlen = 1 << ord;
++                      buddy = mb_find_buddy(e3b, ord, &max);
++                      J_ASSERT((start >> ord) < max);
++                      mb_clear_bit(start >> ord, buddy);
++                      e3b->bd_bd->bb_counters[ord]--;
++                      start += mlen;
++                      len -= mlen;
++                      J_ASSERT(len >= 0);
++                      continue;
++              }
++
++              /* we have to split large buddy */
++              J_ASSERT(ord > 0);
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_clear_bit(start >> ord, buddy);
++              e3b->bd_bd->bb_counters[ord]--;
++
++              ord--;
++              cur = (start >> ord) & ~1U;
++              buddy = mb_find_buddy(e3b, ord, &max);
++              mb_set_bit(cur, buddy);
++              mb_set_bit(cur + 1, buddy);
++              e3b->bd_bd->bb_counters[ord]++;
++              e3b->bd_bd->bb_counters[ord]++;
++      }
++
++      /* now drop all the bits in bitmap */
++      mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
++
++      mb_check_buddy(e3b);
++
++      return 0;
++}
++
++int inline mb_mark_used(struct ext3_buddy *e3b,
++                      struct ext3_free_extent *ex, int len)
++{
++      int err;
++
++      J_ASSERT(ex);
++      if (ex->fe_back == 0)
++              err = mb_mark_used_forward(e3b, ex, len);
++      else
++              err = mb_mark_used_backward(e3b, ex, len);
++      return err;
++}
++
++int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
++                              struct ext3_buddy *e3b, int group)
++{
++      struct super_block *sb = ac->ac_sb;
++      int err, gorder, max, i;
++      struct ext3_free_extent curex;
++
++      /* let's know order of allocation */
++      gorder = 0;
++      while (ac->ac_g_len > (1 << gorder))
++              gorder++;
++
++      if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
++              /* someone asks for space at this specified block
++               * probably he wants to merge it into existing extent */
++              if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
++                      /* good. at least one block is free */
++                      max = mb_find_extent(e3b, 0, ac->ac_g_start,
++                                              ac->ac_g_len, &curex);
++                      max = min(curex.fe_len, ac->ac_g_len);
++                      mb_mark_used(e3b, &curex, max);
++                      
++                      ac->ac_b_group = group;
++                      ac->ac_b_start = curex.fe_start;
++                      ac->ac_b_len = max;
++                      ac->ac_status = AC_STATUS_FOUND;
++                      err = 0;
++                      goto out;
++              }
++              /* don't try to find goal anymore */
++              ac->ac_g_flags &= ~1;
++      }
++
++      i = 0;
++      while (1) {
++              i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
++              if (i >= sb->s_blocksize * 8)
++                      break;
++
++              max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
++              if (max >= ac->ac_g_len) {
++                      max = min(curex.fe_len, ac->ac_g_len);
++                      mb_mark_used(e3b, &curex, max);
++                      
++                      ac->ac_b_group = group;
++                      ac->ac_b_start = curex.fe_start;
++                      ac->ac_b_len = max;
++                      ac->ac_status = AC_STATUS_FOUND;
++                      break;
++              }
++              i += max;
++      }
++
++      return 0;
++
++out:
++      return err;
++}
++
++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
++{
++      struct ext3_group_desc *gdp;
++      int free_blocks;
++
++      gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
++      if (!gdp)
++              return 0;
++      free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++      if (free_blocks == 0)
++              return 0;
++
++      /* someone wants this block very much */
++      if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
++              return 1;
++
++      /* FIXME: I'd like to take fragmentation into account here */
++      if (cr == 0) {
++              if (free_blocks >= ac->ac_g_len >> 1)
++                      return 1;
++      } else if (cr == 1) {
++              if (free_blocks >= ac->ac_g_len >> 2)
++                      return 1;
++      } else if (cr == 2) {
++              return 1;
++      } else {
++              BUG();
++      }
++      return 0;
++}
++
++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++                      unsigned long goal, int *len, int flags, int *errp)
++{
++      struct buffer_head *bitmap_bh = NULL;
++      struct ext3_allocation_context ac;
++      int i, group, block, cr, err = 0;
++      struct ext3_group_desc *gdp;
++      struct ext3_super_block *es;
++      struct buffer_head *gdp_bh;
++      struct ext3_sb_info *sbi;
++      struct super_block *sb;
++      struct ext3_buddy e3b;
++
++      J_ASSERT(len != NULL);
++      J_ASSERT(*len > 0);
++
++      sb = inode->i_sb;
++      if (!sb) {
++              printk("ext3_mb_new_nblocks: nonexistent device");
++              return 0;
++      }
++
++      if (!test_opt(sb, MBALLOC)) {
++              static int ext3_mballoc_warning = 0;
++              if (ext3_mballoc_warning == 0) {
++                      printk(KERN_ERR "EXT3-fs: multiblock request with "
++                              "mballoc disabled!\n");
++                      ext3_mballoc_warning++;
++              }
++              *len = 1;
++              err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp);
++              return err;
++      }
++
++      ext3_mb_poll_new_transaction(sb, handle);
++
++      sbi = EXT3_SB(sb);
++      es = EXT3_SB(sb)->s_es;
++
++      if (!(flags & 2)) {
++              /* someone asks for non-reserved blocks */
++              BUG_ON(*len > 1);
++              err = ext3_mb_reserve_blocks(sb, 1);
++              if (err) {
++                      *errp = err;
++                      return 0;
++              }
++      }
++
++      /*
++       * Check quota for allocation of this blocks.
++       */
++      while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
++              *len -= 1;
++      if (*len == 0) {
++              *errp = -EDQUOT;
++              block = 0;
++              goto out;
++      }
++
++      /* start searching from the goal */
++      if (goal < le32_to_cpu(es->s_first_data_block) ||
++          goal >= le32_to_cpu(es->s_blocks_count))
++              goal = le32_to_cpu(es->s_first_data_block);
++      group = (goal - le32_to_cpu(es->s_first_data_block)) /
++                      EXT3_BLOCKS_PER_GROUP(sb);
++      block = ((goal - le32_to_cpu(es->s_first_data_block)) %
++                      EXT3_BLOCKS_PER_GROUP(sb));
++
++      /* set up allocation goals */
++      ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
++      ac.ac_status = 0;
++      ac.ac_groups_scanned = 0;
++      ac.ac_sb = inode->i_sb;
++      ac.ac_g_group = group;
++      ac.ac_g_start = block;
++      ac.ac_g_len = *len;
++      ac.ac_g_flags = flags;
++
++      /* loop over the groups */
++      for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
++              for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
++                      if (group == EXT3_SB(sb)->s_groups_count)
++                              group = 0;
++
++                      /* check is group good for our criteries */
++                      if (!mb_good_group(&ac, group, cr))
++                              continue;
++
++                      err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
++                      if (err)
++                              goto out_err;
++
++                      ext3_lock_group(sb, group);
++                      if (!mb_good_group(&ac, group, cr)) {
++                              /* someone did allocation from this group */
++                              ext3_unlock_group(sb, group);
++                              ext3_mb_release_desc(&e3b);
++                              continue;
++                      }
++
++                      err = ext3_mb_new_in_group(&ac, &e3b, group);
++                      ext3_unlock_group(sb, group);
++                      if (ac.ac_status == AC_STATUS_FOUND)
++                              ext3_mb_dirty_buddy(&e3b);
++                      ext3_mb_release_desc(&e3b);
++                      if (err)
++                              goto out_err;
++                      if (ac.ac_status == AC_STATUS_FOUND)
++                              break;
++              }
++      }
++
++      if (ac.ac_status != AC_STATUS_FOUND) {
++              /* unfortunately, we can't satisfy this request */
++              J_ASSERT(ac.ac_b_len == 0);
++              DQUOT_FREE_BLOCK(inode, *len);
++              *errp = -ENOSPC;
++              block = 0;
++              goto out;
++      }
++
++      /* good news - free block(s) have been found. now it's time
++       * to mark block(s) in good old journaled bitmap */
++      block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++                      + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
++
++      /* we made a desicion, now mark found blocks in good old
++       * bitmap to be journaled */
++
++      ext3_debug("using block group %d(%d)\n",
++                      ac.ac_b_group.group, gdp->bg_free_blocks_count);
++
++      bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
++      if (!bitmap_bh) {
++              *errp = -EIO;
++              goto out_err;
++      }
++
++      err = ext3_journal_get_write_access(handle, bitmap_bh);
++      if (err) {
++              *errp = err;
++              goto out_err;
++      }
++
++      gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
++      if (!gdp) {
++              *errp = -EIO;
++              goto out_err;
++      }
++      
++      err = ext3_journal_get_write_access(handle, gdp_bh);
++      if (err)
++              goto out_err;
++
++      block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++                              + le32_to_cpu(es->s_first_data_block);
++
++      if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
++          block == le32_to_cpu(gdp->bg_inode_bitmap) ||
++          in_range(block, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group))
++              ext3_error(sb, "ext3_new_block",
++                          "Allocating block in system zone - "
++                          "block = %u", block);
++#if 0
++      for (i = 0; i < ac.ac_b_len; i++)
++              J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
++#endif
++      mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
++
++      ext3_lock_group(sb, ac.ac_b_group);
++      gdp->bg_free_blocks_count =
++                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 
++                                      ac.ac_b_len);
++      ext3_unlock_group(sb, ac.ac_b_group);
++      percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
++
++      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++      if (err)
++              goto out_err;
++      err = ext3_journal_dirty_metadata(handle, gdp_bh);
++      if (err)
++              goto out_err;
++
++      sb->s_dirt = 1;
++      *errp = 0;
++      brelse(bitmap_bh);
++
++      /* drop non-allocated, but dquote'd blocks */
++      J_ASSERT(*len >= ac.ac_b_len);
++      DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
++
++      *len = ac.ac_b_len;
++      J_ASSERT(block != 0);
++      goto out;
++
++out_err:
++      /* if we've already allocated something, roll it back */
++      if (ac.ac_status == AC_STATUS_FOUND) {
++              /* FIXME: free blocks here */
++      }
++
++      DQUOT_FREE_BLOCK(inode, *len);
++      brelse(bitmap_bh);
++      *errp = err;
++      block = 0;
++out:
++      if (!(flags & 2)) {
++              /* block wasn't reserved before and we reserved it
++               * at the beginning of allocation. it doesn't matter
++               * whether we allocated anything or we failed: time
++               * to release reservation. NOTE: because I expect
++               * any multiblock request from delayed allocation
++               * path only, here is single block always */
++              ext3_mb_release_blocks(sb, 1);
++      }
++      return block;
++}
++
++int ext3_mb_generate_buddy(struct super_block *sb, int group)
++{
++      struct buffer_head *bh;
++      int i, err, count = 0;
++      struct ext3_buddy e3b;
++      
++      err = ext3_mb_load_desc(sb, group, &e3b);
++      if (err)
++              goto out;
++      memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
++      memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
++
++      bh = read_block_bitmap(sb, group);
++      if (bh == NULL) {
++              err = -EIO; 
++              goto out2;
++      }
++
++      /* loop over the blocks, nad create buddies for free ones */
++      for (i = 0; i < sb->s_blocksize * 8; i++) {
++              if (!mb_test_bit(i, (void *) bh->b_data)) {
++                      mb_free_blocks(&e3b, i, 1);
++                      count++;
++              }
++      }
++      brelse(bh);
++      mb_check_buddy(&e3b);
++      ext3_mb_dirty_buddy(&e3b);
++
++out2:
++      ext3_mb_release_desc(&e3b);
++out:
++      return err;
++}
++
++EXPORT_SYMBOL(ext3_mb_new_blocks);
++
++#define MB_CREDITS    \
++      (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS +   \
++              2 * EXT3_QUOTA_INIT_BLOCKS)
++
++int ext3_mb_init_backend(struct super_block *sb)
++{
++      struct inode *root = sb->s_root->d_inode;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      struct dentry *db;
++      tid_t target;
++      int err, i;
++
++      sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++                                      sbi->s_groups_count, GFP_KERNEL);
++      if (sbi->s_buddy_blocks == NULL) {
++              printk("can't allocate mem for buddy maps\n");
++              return -ENOMEM;
++      }
++      memset(sbi->s_buddy_blocks, 0,
++              sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++      sbi->s_buddy = NULL;
++
++      down(&root->i_sem);
++      db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
++                              strlen(EXT3_BUDDY_FILE));
++      if (IS_ERR(db)) {
++              err = PTR_ERR(db);
++              printk("can't lookup buddy file: %d\n", err);
++              goto out;
++      }
++
++      if (db->d_inode != NULL) {
++              sbi->s_buddy = igrab(db->d_inode);
++              goto map;
++      }
++
++      err = ext3_create(root, db, S_IFREG, NULL);
++      if (err) {
++              printk("error while creation buddy file: %d\n", err);
++      } else {
++              sbi->s_buddy = igrab(db->d_inode);
++      }
++
++map:
++      for (i = 0; i < sbi->s_groups_count; i++) {
++              struct buffer_head *bh = NULL;
++              handle_t *handle;
++
++              handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
++              if (IS_ERR(handle)) {
++                      err = PTR_ERR(handle);
++                      goto out2;
++              }
++              
++              /* allocate block for bitmap */
++              bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
++              if (bh == NULL) {
++                      printk("can't get block for buddy bitmap: %d\n", err);
++                      goto out2;
++              }
++              sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++              brelse(bh);
++
++              /* allocate block for buddy */
++              bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
++              if (bh == NULL) {
++                      printk("can't get block for buddy: %d\n", err);
++                      goto out2;
++              }
++              sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++              brelse(bh);
++              ext3_journal_stop(handle);
++              spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
++              sbi->s_buddy_blocks[i].bb_md_cur = NULL;
++              sbi->s_buddy_blocks[i].bb_tid = 0;
++      }
++
++      if (journal_start_commit(sbi->s_journal, &target))
++              log_wait_commit(sbi->s_journal, target);
++
++out2:
++      dput(db);
++out:
++      up(&root->i_sem);
++      return err;
++}
++
++int ext3_mb_release(struct super_block *sb)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      
++      if (!test_opt(sb, MBALLOC))
++              return 0;
++
++      /* release freed, non-committed blocks */
++      spin_lock(&sbi->s_md_lock);
++      list_splice_init(&sbi->s_closed_transaction,
++                      &sbi->s_committed_transaction);
++      list_splice_init(&sbi->s_active_transaction,
++                      &sbi->s_committed_transaction);
++      spin_unlock(&sbi->s_md_lock);
++      ext3_mb_free_committed_blocks(sb);
++
++      if (sbi->s_buddy_blocks)
++              kfree(sbi->s_buddy_blocks);
++      if (sbi->s_buddy)
++              iput(sbi->s_buddy);
++      if (sbi->s_blocks_reserved)
++              printk("ext3-fs: %ld blocks being reserved at umount!\n",
++                              sbi->s_blocks_reserved);
++      return 0;
++}
++
++int ext3_mb_init(struct super_block *sb)
++{
++      struct ext3_super_block *es;
++      int i;
++
++      if (!test_opt(sb, MBALLOC))
++              return 0;
++
++      /* init file for buddy data */
++      clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++      ext3_mb_init_backend(sb);
++
++      es = EXT3_SB(sb)->s_es;
++      for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++              ext3_mb_generate_buddy(sb, i);
++      spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
++      spin_lock_init(&EXT3_SB(sb)->s_md_lock);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
++      INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
++      set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++      printk("EXT3-fs: mballoc enabled\n");
++      return 0;
++}
++
++void ext3_mb_free_committed_blocks(struct super_block *sb)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int err, i, count = 0, count2 = 0;
++      struct ext3_free_metadata *md;
++      struct ext3_buddy e3b;
++
++      if (list_empty(&sbi->s_committed_transaction))
++              return;
++
++      /* there is committed blocks to be freed yet */
++      do {
++              /* get next array of blocks */
++              md = NULL;
++              spin_lock(&sbi->s_md_lock);
++              if (!list_empty(&sbi->s_committed_transaction)) {
++                      md = list_entry(sbi->s_committed_transaction.next,
++                                      struct ext3_free_metadata, list);
++                      list_del(&md->list);
++              }
++              spin_unlock(&sbi->s_md_lock);
++
++              if (md == NULL)
++                      break;
++
++              mb_debug("gonna free %u blocks in group %u (0x%p):",
++                              md->num, md->group, md);
++
++              err = ext3_mb_load_desc(sb, md->group, &e3b);
++              BUG_ON(err != 0);
++
++              /* there are blocks to put in buddy to make them really free */
++              count += md->num;
++              count2++;
++              ext3_lock_group(sb, md->group);
++              for (i = 0; i < md->num; i++) {
++                      mb_debug(" %u", md->blocks[i]);
++                      mb_free_blocks(&e3b, md->blocks[i], 1);
++              }
++              mb_debug("\n");
++              ext3_unlock_group(sb, md->group);
++
++              kfree(md);
++              ext3_mb_dirty_buddy(&e3b);
++              ext3_mb_release_desc(&e3b);
++
++      } while (md);
++      mb_debug("freed %u blocks in %u structures\n", count, count2);
++}
++
++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      if (sbi->s_last_transaction == handle->h_transaction->t_tid)
++              return;
++
++      /* new transaction! time to close last one and free blocks for
++       * committed transaction. we know that only transaction can be
++       * active, so previos transaction can be being logged and we
++       * know that transaction before previous is known to be alreade
++       * logged. this means that now we may free blocks freed in all
++       * transactions before previous one. hope I'm clear enough ... */
++
++      spin_lock(&sbi->s_md_lock);
++      if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
++              mb_debug("new transaction %lu, old %lu\n",
++                              (unsigned long) handle->h_transaction->t_tid,
++                              (unsigned long) sbi->s_last_transaction);
++              list_splice_init(&sbi->s_closed_transaction,
++                                      &sbi->s_committed_transaction);
++              list_splice_init(&sbi->s_active_transaction,
++                                      &sbi->s_closed_transaction);
++              sbi->s_last_transaction = handle->h_transaction->t_tid;
++      }
++      spin_unlock(&sbi->s_md_lock);
++
++      ext3_mb_free_committed_blocks(sb);
++}
++
++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
++                              int group, int block, int count)
++{
++      struct ext3_buddy_group_blocks *db = e3b->bd_bd;
++      struct super_block *sb = e3b->bd_sb;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      struct ext3_free_metadata *md;
++      int i;
++
++      ext3_lock_group(sb, group);
++      for (i = 0; i < count; i++) {
++              md = db->bb_md_cur;
++              if (md && db->bb_tid != handle->h_transaction->t_tid) {
++                      db->bb_md_cur = NULL;
++                      md = NULL;
++              }
++
++              if (md == NULL) {
++                      ext3_unlock_group(sb, group);
++                      md = kmalloc(sizeof(*md), GFP_KERNEL);
++                      if (md == NULL)
++                              return -ENOMEM;
++                      md->num = 0;
++                      md->group = group;
++
++                      ext3_lock_group(sb, group);
++                      if (db->bb_md_cur == NULL) {
++                              spin_lock(&sbi->s_md_lock);
++                              list_add(&md->list, &sbi->s_active_transaction);
++                              spin_unlock(&sbi->s_md_lock);
++                              db->bb_md_cur = md;
++                              db->bb_tid = handle->h_transaction->t_tid;
++                              mb_debug("new md 0x%p for group %u\n",
++                                                      md, md->group);
++                      } else {
++                              kfree(md);
++                              md = db->bb_md_cur;
++                      }
++              }
++
++              BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
++              md->blocks[md->num] = block + i;
++              md->num++;
++              if (md->num == EXT3_BB_MAX_BLOCKS) {
++                      /* no more space, put full container on a sb's list */
++                      db->bb_md_cur = NULL;
++              }
++      }
++      ext3_unlock_group(sb, group);
++      return 0;
++}
++
++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
++                      unsigned long block, unsigned long count, int metadata)
++{
++      struct buffer_head *bitmap_bh = NULL;
++      struct ext3_group_desc *gdp;
++      struct ext3_super_block *es;
++      unsigned long bit, overflow;
++      struct buffer_head *gd_bh;
++      unsigned long block_group;
++      struct ext3_sb_info *sbi;
++      struct super_block *sb;
++      struct ext3_buddy e3b;
++      int err = 0, ret;
++
++      sb = inode->i_sb;
++      if (!sb) {
++              printk ("ext3_free_blocks: nonexistent device");
++              return;
++      }
++
++      ext3_mb_poll_new_transaction(sb, handle);
++
++      sbi = EXT3_SB(sb);
++      es = EXT3_SB(sb)->s_es;
++      if (block < le32_to_cpu(es->s_first_data_block) ||
++          block + count < block ||
++          block + count > le32_to_cpu(es->s_blocks_count)) {
++              ext3_error (sb, "ext3_free_blocks",
++                          "Freeing blocks not in datazone - "
++                          "block = %lu, count = %lu", block, count);
++              goto error_return;
++      }
++
++      ext3_debug("freeing block %lu\n", block);
++
++do_more:
++      overflow = 0;
++      block_group = (block - le32_to_cpu(es->s_first_data_block)) /
++                    EXT3_BLOCKS_PER_GROUP(sb);
++      bit = (block - le32_to_cpu(es->s_first_data_block)) %
++                    EXT3_BLOCKS_PER_GROUP(sb);
++      /*
++       * Check to see if we are freeing blocks across a group
++       * boundary.
++       */
++      if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
++              overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
++              count -= overflow;
++      }
++      brelse(bitmap_bh);
++      bitmap_bh = read_block_bitmap(sb, block_group);
++      if (!bitmap_bh)
++              goto error_return;
++      gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
++      if (!gdp)
++              goto error_return;
++
++      if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
++          in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
++          in_range (block, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group) ||
++          in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
++                    EXT3_SB(sb)->s_itb_per_group))
++              ext3_error (sb, "ext3_free_blocks",
++                          "Freeing blocks in system zones - "
++                          "Block = %lu, count = %lu",
++                          block, count);
++
++      BUFFER_TRACE(bitmap_bh, "getting write access");
++      err = ext3_journal_get_write_access(handle, bitmap_bh);
++      if (err)
++              goto error_return;
++
++      /*
++       * We are about to modify some metadata.  Call the journal APIs
++       * to unshare ->b_data if a currently-committing transaction is
++       * using it
++       */
++      BUFFER_TRACE(gd_bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, gd_bh);
++      if (err)
++              goto error_return;
++
++      err = ext3_mb_load_desc(sb, block_group, &e3b);
++      if (err)
++              goto error_return;
++
++      if (metadata) {
++              /* blocks being freed are metadata. these blocks shouldn't
++               * be used until this transaction is committed */
++              ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
++      } else { 
++              ext3_lock_group(sb, block_group);
++              mb_free_blocks(&e3b, bit, count);
++              gdp->bg_free_blocks_count =
++                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++              ext3_unlock_group(sb, block_group);
++              percpu_counter_mod(&sbi->s_freeblocks_counter, count);
++      }
++      
++      ext3_mb_dirty_buddy(&e3b);
++      ext3_mb_release_desc(&e3b);
++
++      /* FIXME: undo logic will be implemented later and another way */
++      mb_clear_bits(bitmap_bh->b_data, bit, count);
++      DQUOT_FREE_BLOCK(inode, count);
++
++      /* We dirtied the bitmap block */
++      BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
++      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++
++      /* And the group descriptor block */
++      BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
++      ret = ext3_journal_dirty_metadata(handle, gd_bh);
++      if (!err) err = ret;
++
++      if (overflow && !err) {
++              block += count;
++              count = overflow;
++              goto do_more;
++      }
++      sb->s_dirt = 1;
++error_return:
++      brelse(bitmap_bh);
++      ext3_std_error(sb, err);
++      return;
++}
++
++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int free, ret = -ENOSPC;
++
++      BUG_ON(blocks < 0);
++      spin_lock(&sbi->s_reserve_lock);
++      free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++      if (blocks <= free - sbi->s_blocks_reserved) {
++              sbi->s_blocks_reserved += blocks;
++              ret = 0;
++      }
++      spin_unlock(&sbi->s_reserve_lock);
++      return ret;
++}
++
++void ext3_mb_release_blocks(struct super_block *sb, int blocks)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      BUG_ON(blocks < 0);
++      spin_lock(&sbi->s_reserve_lock);
++      sbi->s_blocks_reserved -= blocks;
++      WARN_ON(sbi->s_blocks_reserved < 0);
++      if (sbi->s_blocks_reserved < 0)
++              sbi->s_blocks_reserved = 0;
++      spin_unlock(&sbi->s_reserve_lock);
++}
++
++int ext3_new_block(handle_t *handle, struct inode *inode,
++                      unsigned long goal, u32 *pc, u32 *pb, int *errp)
++{
++      int ret, len;
++
++      if (!test_opt(inode->i_sb, MBALLOC)) {
++              ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp);
++              goto out;
++      }
++      len = 1;
++      ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
++out:
++      return ret;
++}
++
++
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++                      unsigned long block, unsigned long count, int metadata)
++{
++      if (!test_opt(inode->i_sb, MBALLOC))
++              ext3_free_blocks_old(handle, inode, block, count);
++      else
++              ext3_mb_free_blocks(handle, inode, block, count, metadata);
++      return;
++}
++
+Index: linux-2.6.7/fs/ext3/super.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/super.c   2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/super.c        2004-09-03 08:46:59.000000000 +0400
+@@ -392,6 +392,7 @@
+       struct ext3_super_block *es = sbi->s_es;
+       int i;
+ 
++      ext3_mb_release(sb);
+       ext3_ext_release(sb);
+       ext3_xattr_put_super(sb);
+       journal_destroy(sbi->s_journal);
+@@ -594,7 +595,7 @@
+       Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_ignore, Opt_err, Opt_extents, Opt_extdebug
++      Opt_ignore, Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
+ };
+ 
+ static match_table_t tokens = {
+@@ -644,6 +645,7 @@
+       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_extents, "extents"},
+       {Opt_extdebug, "extdebug"},
++      {Opt_mballoc, "mballoc"},
+       {Opt_err, NULL}
+ };
+ 
+@@ -929,6 +931,9 @@
+               case Opt_extdebug:
+                       set_opt (sbi->s_mount_opt, EXTDEBUG);
+                       break;
++              case Opt_mballoc:
++                      set_opt (sbi->s_mount_opt, MBALLOC);
++                      break;
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1602,7 +1607,8 @@
+               ext3_count_dirs(sb));
+ 
+       ext3_ext_init(sb);
+- 
++      ext3_mb_init(sb);
++
+       return 0;
+ 
+ failed_mount3:
+Index: linux-2.6.7/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/Makefile  2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/Makefile       2004-09-03 08:46:59.000000000 +0400
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+ 
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+-         ioctl.o namei.o super.o symlink.o hash.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
+ 
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.7/fs/ext3/balloc.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/balloc.c  2004-08-26 17:11:16.000000000 +0400
++++ linux-2.6.7/fs/ext3/balloc.c       2004-09-03 08:46:59.000000000 +0400
+@@ -78,7 +78,7 @@
+  *
+  * Return buffer_head on success or NULL in case of failure.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+       struct ext3_group_desc * desc;
+@@ -98,8 +98,8 @@
+ }
+ 
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks (handle_t *handle, struct inode * inode,
+-                      unsigned long block, unsigned long count)
++void ext3_free_blocks_old (handle_t *handle, struct inode * inode,
++                              unsigned long block, unsigned long count)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+       struct buffer_head *gd_bh;
+@@ -474,8 +474,8 @@
+  * This function also updates quota and i_blocks field.
+  */
+ int
+-ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
+-              u32 *prealloc_count, u32 *prealloc_block, int *errp)
++ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal,
++                      u32 *prealloc_count, u32 *prealloc_block, int *errp)
+ {
+       struct buffer_head *bitmap_bh = NULL;   /* bh */
+       struct buffer_head *gdp_bh;             /* bh2 */
+Index: linux-2.6.7/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/namei.c   2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/namei.c        2004-09-03 08:46:59.000000000 +0400
+@@ -1640,7 +1640,7 @@
+  * If the create succeeds, we fill in the inode information
+  * with d_instantiate(). 
+  */
+-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
++int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+               struct nameidata *nd)
+ {
+       handle_t *handle; 
+Index: linux-2.6.7/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/inode.c   2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/inode.c        2004-09-03 08:46:59.000000000 +0400
+@@ -254,7 +254,7 @@
+               ei->i_prealloc_count = 0;
+               ei->i_prealloc_block = 0;
+               /* Writer: end */
+-              ext3_free_blocks (inode, block, total);
++              ext3_free_blocks (inode, block, total, 1);
+       }
+ #endif
+ }
+@@ -633,7 +633,7 @@
+               ext3_journal_forget(handle, branch[i].bh);
+       }
+       for (i = 0; i < keys; i++)
+-              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+       return err;
+ }
+ 
+@@ -734,7 +734,7 @@
+       if (err == -EAGAIN)
+               for (i = 0; i < num; i++)
+                       ext3_free_blocks(handle, inode, 
+-                                       le32_to_cpu(where[i].key), 1);
++                                       le32_to_cpu(where[i].key), 1, 1);
+       return err;
+ }
+ 
+@@ -1911,7 +1911,7 @@
+               }
+       }
+ 
+-      ext3_free_blocks(handle, inode, block_to_free, count);
++      ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+ 
+ /**
+@@ -2082,7 +2082,7 @@
+                               ext3_journal_test_restart(handle, inode);
+                       }
+ 
+-                      ext3_free_blocks(handle, inode, nr, 1);
++                      ext3_free_blocks(handle, inode, nr, 1, 1);
+ 
+                       if (parent_bh) {
+                               /*
+Index: linux-2.6.7/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/extents.c 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/extents.c      2004-09-03 08:46:59.000000000 +0400
+@@ -740,7 +740,7 @@
+               for (i = 0; i < depth; i++) {
+                       if (!ablocks[i])
+                               continue;
+-                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+               }
+       }
+       kfree(ablocks);
+@@ -1388,7 +1388,7 @@
+                       path->p_idx->ei_leaf);
+       bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+-      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+       return err;
+ }
+ 
+@@ -1876,10 +1876,12 @@
+       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+       handle_t *handle = ext3_journal_start(tree->inode, needed);
+       struct buffer_head *bh;
+-      int i;
++      int i, metadata = 0;
+ 
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
++      if (S_ISDIR(tree->inode->i_mode))
++              metadata = 1;
+       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+               /* tail removal */
+               unsigned long num, start;
+@@ -1891,7 +1893,7 @@
+                       bh = sb_find_get_block(tree->inode->i_sb, start + i);
+                       ext3_forget(handle, 0, tree->inode, bh, start + i);
+               }
+-              ext3_free_blocks(handle, tree->inode, start, num);
++              ext3_free_blocks(handle, tree->inode, start, num, metadata);
+       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+               printk("strange request: removal %lu-%lu from %u:%u\n",
+                       from, to, ex->ee_block, ex->ee_len);
+Index: linux-2.6.7/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/xattr.c   2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/xattr.c        2004-09-03 08:46:59.000000000 +0400
+@@ -1366,7 +1366,7 @@
+                       new_bh = sb_getblk(sb, block);
+                       if (!new_bh) {
+ getblk_failed:
+-                              ext3_free_blocks(handle, inode, block, 1);
++                              ext3_free_blocks(handle, inode, block, 1, 1);
+                               error = -EIO;
+                               goto cleanup;
+                       }
+@@ -1408,7 +1408,7 @@
+               if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+                       /* Free the old block. */
+                       ea_bdebug(old_bh, "freeing");
+-                      ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
++                      ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
+ 
+                       /* ext3_forget() calls bforget() for us, but we
+                          let our caller release old_bh, so we need to
+@@ -1497,7 +1497,7 @@
+       lock_buffer(bh);
+       if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+               ext3_xattr_cache_remove(bh);
+-              ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
++              ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
+               get_bh(bh);
+               ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
+       } else {
+Index: linux-2.6.7/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/ext3_fs.h   2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/include/linux/ext3_fs.h        2004-09-03 08:47:35.000000000 +0400
+@@ -57,6 +57,8 @@
+ #define ext3_debug(f, a...)   do {} while (0)
+ #endif
+ 
++#define EXT3_MULTIBLOCK_ALLOCATOR     1
++
+ /*
+  * Special inodes numbers
+  */
+@@ -335,6 +337,7 @@
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x10000 /* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x20000 /* Extents debug */
++#define EXT3_MOUNT_MBALLOC            0x100000/* Buddy allocation support */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -695,7 +698,7 @@
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
+                                           __u32 *, __u32 *, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+-                            unsigned long);
++                            unsigned long, int);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+Index: linux-2.6.7/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/ext3_fs_sb.h        2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/include/linux/ext3_fs_sb.h     2004-09-03 08:46:59.000000000 +0400
+@@ -23,9 +23,29 @@
+ #define EXT_INCLUDE
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #endif
+ 
++#define EXT3_BB_MAX_BLOCKS    30
++struct ext3_free_metadata {
++      unsigned short group;
++      unsigned short num;
++      unsigned short blocks[EXT3_BB_MAX_BLOCKS];
++      struct list_head list;
++};
++
++#define EXT3_BB_MAX_ORDER     14
++
++struct ext3_buddy_group_blocks {
++      sector_t        bb_bitmap;
++      sector_t        bb_buddy;
++      spinlock_t      bb_lock;
++      unsigned        bb_counters[EXT3_BB_MAX_ORDER];
++      struct ext3_free_metadata *bb_md_cur;
++      unsigned long bb_tid;
++};
++
+ /*
+  * third extended-fs super-block data in memory
+  */
+@@ -76,6 +96,17 @@
+       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
+       int s_jquota_fmt;                       /* Format of quota to use */
+ #endif
++
++      /* for buddy allocator */
++      struct ext3_buddy_group_blocks *s_buddy_blocks;
++      struct inode *s_buddy;
++      long s_blocks_reserved;
++      spinlock_t s_reserve_lock;
++      struct list_head s_active_transaction;
++      struct list_head s_closed_transaction;
++      struct list_head s_committed_transaction;
++      spinlock_t s_md_lock;
++      tid_t s_last_transaction;
+ };
+ 
+ #endif        /* _LINUX_EXT3_FS_SB */
diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch

new file mode 100644 (file)

index 0000000..b20be23
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch
@@ -0,0 +1,170 @@
+Index: linux-2.6.7/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/namei.c   2004-06-15 23:19:36.000000000 -0600
++++ linux-2.6.7/fs/ext3/namei.c        2004-08-20 17:48:54.000000000 -0600
+@@ -1596,11 +1596,17 @@ static int ext3_delete_entry (handle_t *
+ static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+ {
+       inode->i_nlink++;
++      if (is_dx(inode) && inode->i_nlink > 1) {
++              /* limit is 16-bit i_links_count */
++              if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2)
++                      inode->i_nlink = 1;
++        }
+ }
+ 
+ static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+ {
+-      inode->i_nlink--;
++      if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
++              inode->i_nlink--;
+ }
+ 
+ static int ext3_add_nondir(handle_t *handle,
+@@ -1693,7 +1698,7 @@ static int ext3_mkdir(struct inode * dir
+       struct ext3_dir_entry_2 * de;
+       int err;
+ 
+-      if (dir->i_nlink >= EXT3_LINK_MAX)
++      if (EXT3_DIR_LINK_MAXED(dir))
+               return -EMLINK;
+ 
+       handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -1715,7 +1720,7 @@ static int ext3_mkdir(struct inode * dir
+       inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+       dir_block = ext3_bread (handle, inode, 0, 1, &err);
+       if (!dir_block) {
+-              inode->i_nlink--; /* is this nlink == 0? */
++              ext3_dec_count(handle, inode); /* is this nlink == 0? */
+               ext3_mark_inode_dirty(handle, inode);
+               iput (inode);
+               goto out_stop;
+@@ -1747,7 +1752,7 @@ static int ext3_mkdir(struct inode * dir
+               iput (inode);
+               goto out_stop;
+       }
+-      dir->i_nlink++;
++      ext3_inc_count(handle, dir);
+       ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+       d_instantiate(dentry, inode);
+@@ -2010,10 +2015,10 @@ static int ext3_rmdir (struct inode * di
+       retval = ext3_delete_entry(handle, dir, de, bh);
+       if (retval)
+               goto end_rmdir;
+-      if (inode->i_nlink != 2)
+-              ext3_warning (inode->i_sb, "ext3_rmdir",
+-                            "empty directory has nlink!=2 (%d)",
+-                            inode->i_nlink);
++      if (!EXT3_DIR_LINK_EMPTY(inode))
++              ext3_warning(inode->i_sb, "ext3_rmdir",
++                           "empty directory has too many links (%d)",
++                           inode->i_nlink);
+       inode->i_version++;
+       inode->i_nlink = 0;
+       /* There's no need to set i_disksize: the fact that i_nlink is
+@@ -2023,7 +2028,7 @@ static int ext3_rmdir (struct inode * di
+       ext3_orphan_add(handle, inode);
+       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+       ext3_mark_inode_dirty(handle, inode);
+-      dir->i_nlink--;
++      ext3_dec_count(handle, dir);
+       ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+ 
+@@ -2074,7 +2079,7 @@ static int ext3_unlink(struct inode * di
+       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+       ext3_update_dx_flag(dir);
+       ext3_mark_inode_dirty(handle, dir);
+-      inode->i_nlink--;
++      ext3_dec_count(handle, inode);
+       if (!inode->i_nlink)
+               ext3_orphan_add(handle, inode);
+       inode->i_ctime = dir->i_ctime;
+@@ -2146,7 +2151,7 @@ static int ext3_link (struct dentry * ol
+       struct inode *inode = old_dentry->d_inode;
+       int err;
+ 
+-      if (inode->i_nlink >= EXT3_LINK_MAX)
++      if (EXT3_DIR_LINK_MAXED(inode))
+               return -EMLINK;
+ 
+       handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -2230,8 +2235,8 @@ static int ext3_rename (struct inode * o
+               if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+                       goto end_rename;
+               retval = -EMLINK;
+-              if (!new_inode && new_dir!=old_dir &&
+-                              new_dir->i_nlink >= EXT3_LINK_MAX)
++              if (!new_inode && new_dir != old_dir &&
++                  EXT3_DIR_LINK_MAXED(new_dir))
+                       goto end_rename;
+       }
+       if (!new_bh) {
+@@ -2288,7 +2293,7 @@ static int ext3_rename (struct inode * o
+       }
+ 
+       if (new_inode) {
+-              new_inode->i_nlink--;
++              ext3_dec_count(handle, new_inode);
+               new_inode->i_ctime = CURRENT_TIME;
+       }
+       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+@@ -2299,11 +2304,11 @@ static int ext3_rename (struct inode * o
+               PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
+               BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+               ext3_journal_dirty_metadata(handle, dir_bh);
+-              old_dir->i_nlink--;
++              ext3_dec_count(handle, old_dir);
+               if (new_inode) {
+-                      new_inode->i_nlink--;
++                      ext3_dec_count(handle, new_inode);
+               } else {
+-                      new_dir->i_nlink++;
++                      ext3_inc_count(handle, new_dir);
+                       ext3_update_dx_flag(new_dir);
+                       ext3_mark_inode_dirty(handle, new_dir);
+               }
+Index: linux-2.6.7/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/ext3_fs.h   2004-06-15 23:19:36.000000000 -0600
++++ linux-2.6.7/include/linux/ext3_fs.h        2004-08-20 17:41:27.000000000 -0600
+@@ -41,7 +41,7 @@ struct statfs;
+ /*
+  * Always enable hashed directories
+  */
+-#define CONFIG_EXT3_INDEX
++#define CONFIG_EXT3_INDEX 1
+ 
+ /*
+  * Debug code
+@@ -79,7 +81,7 @@
+ /*
+  * Maximal count of links to a file
+  */
+-#define EXT3_LINK_MAX         32000
++#define EXT3_LINK_MAX         65000
+ 
+ /*
+  * Macro-instructions used to manage several block sizes
+@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
+  */
+ 
+ #ifdef CONFIG_EXT3_INDEX
+-  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
+-                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
++                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
+                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
+-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
+-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
++                                (is_dx(dir) && (dir)->i_nlink == 1))
+ #else
+   #define is_dx(dir) 0
+-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
+ #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
+ #endif
+ 
diff --git a/lustre/kernel_patches/patches/ext3-xattr-header-move-2.6.suse.patch b/lustre/kernel_patches/patches/ext3-xattr-header-move-2.6.suse.patch

deleted file mode 100644 (file)

index 7c2d97d..0000000
--- a/lustre/kernel_patches/patches/ext3-xattr-header-move-2.6.suse.patch
+++ /dev/null
@@ -1,506 +0,0 @@
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/acl.c linux-2.6.4-51.0/fs/ext3/acl.c
---- linux-2.6.4-51.0.orig/fs/ext3/acl.c        2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/acl.c     2004-04-07 11:06:33.000000000 +0300
-@@ -10,7 +10,7 @@
- #include <linux/fs.h>
- #include <linux/ext3_jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
- 
- /*
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/file.c linux-2.6.4-51.0/fs/ext3/file.c
---- linux-2.6.4-51.0.orig/fs/ext3/file.c       2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/file.c    2004-04-07 11:06:39.000000000 +0300
-@@ -23,7 +23,7 @@
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
- #include <linux/ext3_jbd.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
- 
- /*
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/ialloc.c linux-2.6.4-51.0/fs/ext3/ialloc.c
---- linux-2.6.4-51.0.orig/fs/ext3/ialloc.c     2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/ialloc.c  2004-04-07 11:06:46.000000000 +0300
-@@ -26,7 +26,7 @@
- #include <asm/bitops.h>
- #include <asm/byteorder.h>
- 
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
- 
- /*
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/inode.c linux-2.6.4-51.0/fs/ext3/inode.c
---- linux-2.6.4-51.0.orig/fs/ext3/inode.c      2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/inode.c   2004-04-07 11:25:05.000000000 +0300
-@@ -36,7 +36,7 @@
- #include <linux/writeback.h>
- #include <linux/mpage.h>
- #include <linux/uio.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "iopen.h"
- #include "acl.h"
- 
-@@ -2340,7 +2340,7 @@ static unsigned long ext3_get_inode_bloc
-  * performed.
-  */
- int ext3_get_inode_loc(struct inode *inode,
--                              struct ext3_iloc *iloc, int in_mem)
-+                     struct ext3_iloc *iloc, int in_mem)
- {
-       unsigned long block;
-       struct buffer_head *bh;
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/namei.c linux-2.6.4-51.0/fs/ext3/namei.c
---- linux-2.6.4-51.0.orig/fs/ext3/namei.c      2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/namei.c   2004-04-07 11:06:57.000000000 +0300
-@@ -36,7 +36,7 @@
- #include <linux/quotaops.h>
- #include <linux/buffer_head.h>
- #include <linux/smp_lock.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "iopen.h"
- #include "acl.h"
- 
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/super.c linux-2.6.4-51.0/fs/ext3/super.c
---- linux-2.6.4-51.0.orig/fs/ext3/super.c      2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/super.c   2004-04-07 11:07:05.000000000 +0300
-@@ -33,7 +33,7 @@
- #include <linux/vfs.h>
- #include <linux/random.h>
- #include <asm/uaccess.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
- 
- static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/symlink.c linux-2.6.4-51.0/fs/ext3/symlink.c
---- linux-2.6.4-51.0.orig/fs/ext3/symlink.c    2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/symlink.c 2004-04-07 11:07:16.000000000 +0300
-@@ -20,7 +20,7 @@
- #include <linux/fs.h>
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- 
- static int
- ext3_readlink(struct dentry *dentry, char __user *buffer, int buflen)
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr.c linux-2.6.4-51.0/fs/ext3/xattr.c
---- linux-2.6.4-51.0.orig/fs/ext3/xattr.c      2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr.c   2004-04-07 11:22:34.000000000 +0300
-@@ -59,7 +59,7 @@
- #include <linux/mbcache.h>
- #include <linux/quotaops.h>
- #include <linux/rwsem.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
- 
- #define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
-@@ -348,7 +348,7 @@ cleanup:
-  */
- int
- ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
--             void *buffer, size_t buffer_size)
-+                   void *buffer, size_t buffer_size)
- {
-       int size, name_len = strlen(name), storage_size;
-       struct ext3_xattr_entry *last;
-@@ -360,7 +360,7 @@ ext3_xattr_ibody_get(struct inode *inode
-       if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
-               return -ENOENT;
- 
--      ret = ext3_get_inode_loc(inode, &iloc);
-+      ret = ext3_get_inode_loc(inode, &iloc, 1);
-       if (ret)
-               return ret;
-       raw_inode = ext3_raw_inode(&iloc);
-@@ -542,7 +542,7 @@ ext3_xattr_ibody_list(struct inode *inod
-       if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
-               return 0;
- 
--      ret = ext3_get_inode_loc(inode, &iloc);
-+      ret = ext3_get_inode_loc(inode, &iloc, 1);
-       if (ret)
-               return ret;
-       raw_inode = ext3_raw_inode(&iloc);
-@@ -693,7 +693,7 @@ ext3_xattr_ibody_find(struct inode *inod
-       if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
-               return ret;
- 
--      err = ext3_get_inode_loc(inode, &iloc);
-+      err = ext3_get_inode_loc(inode, &iloc, 1);
-       if (err)
-               return -EIO;
-       raw_inode = ext3_raw_inode(&iloc);
-@@ -824,7 +824,7 @@ ext3_xattr_ibody_set(handle_t *handle, s
-       if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
-               return -ENOSPC;
- 
--      err = ext3_get_inode_loc(inode, &iloc);
-+      err = ext3_get_inode_loc(inode, &iloc, 1);
-       if (err)
-               return err;
-       raw_inode = ext3_raw_inode(&iloc);
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr.h linux-2.6.4-51.0/fs/ext3/xattr.h
---- linux-2.6.4-51.0.orig/fs/ext3/xattr.h      2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr.h   1970-01-01 03:00:00.000000000 +0300
-@@ -1,147 +0,0 @@
--/*
--  File: fs/ext3/xattr.h
--
--  On-disk format of extended attributes for the ext3 filesystem.
--
--  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
--*/
--
--#include <linux/config.h>
--#include <linux/xattr.h>
--
--/* Magic value in attribute blocks */
--#define EXT3_XATTR_MAGIC              0xEA020000
--
--/* Maximum number of references to one attribute block */
--#define EXT3_XATTR_REFCOUNT_MAX               1024
--
--/* Name indexes */
--#define EXT3_XATTR_INDEX_MAX                  10
--#define EXT3_XATTR_INDEX_USER                 1
--#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS     2
--#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT    3
--#define EXT3_XATTR_INDEX_TRUSTED              4
--#define       EXT3_XATTR_INDEX_LUSTRE                 5
--#define EXT3_XATTR_INDEX_SECURITY             6
--
--struct ext3_xattr_header {
--      __u32   h_magic;        /* magic number for identification */
--      __u32   h_refcount;     /* reference count */
--      __u32   h_blocks;       /* number of disk blocks used */
--      __u32   h_hash;         /* hash value of all attributes */
--      __u32   h_reserved[4];  /* zero right now */
--};
--
--struct ext3_xattr_entry {
--      __u8    e_name_len;     /* length of name */
--      __u8    e_name_index;   /* attribute name index */
--      __u16   e_value_offs;   /* offset in disk block of value */
--      __u32   e_value_block;  /* disk block attribute is stored on (n/i) */
--      __u32   e_value_size;   /* size of attribute value */
--      __u32   e_hash;         /* hash value of name and value */
--      char    e_name[0];      /* attribute name */
--};
--
--#define EXT3_XATTR_PAD_BITS           2
--#define EXT3_XATTR_PAD                (1<<EXT3_XATTR_PAD_BITS)
--#define EXT3_XATTR_ROUND              (EXT3_XATTR_PAD-1)
--#define EXT3_XATTR_LEN(name_len) \
--      (((name_len) + EXT3_XATTR_ROUND + \
--      sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
--#define EXT3_XATTR_NEXT(entry) \
--      ( (struct ext3_xattr_entry *)( \
--        (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
--#define EXT3_XATTR_SIZE(size) \
--      (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
--
--# ifdef CONFIG_EXT3_FS_XATTR
--
--struct ext3_xattr_handler {
--      char *prefix;
--      size_t (*list)(char *list, struct inode *inode, const char *name,
--                     int name_len);
--      int (*get)(struct inode *inode, const char *name, void *buffer,
--                 size_t size);
--      int (*set)(struct inode *inode, const char *name, const void *buffer,
--                 size_t size, int flags);
--};
--
--extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
--extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
--
--extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int);
--extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
--extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
--extern int ext3_removexattr(struct dentry *, const char *);
--
--extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
--extern int ext3_xattr_list(struct inode *, char *, size_t);
--extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
--extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
--extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
--
--extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
--extern void ext3_xattr_put_super(struct super_block *);
--
--extern int init_ext3_xattr(void);
--extern void exit_ext3_xattr(void);
--
--# else  /* CONFIG_EXT3_FS_XATTR */
--#  define ext3_setxattr               NULL
--#  define ext3_getxattr               NULL
--#  define ext3_listxattr      NULL
--#  define ext3_removexattr    NULL
--
--static inline int
--ext3_xattr_get(struct inode *inode, int name_index, const char *name,
--             void *buffer, size_t size, int flags)
--{
--      return -EOPNOTSUPP;
--}
--
--static inline int
--ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
--{
--      return -EOPNOTSUPP;
--}
--
--static inline int
--ext3_xattr_set(struct inode *inode, int name_index, const char *name,
--             const void *value, size_t size, int flags)
--{
--      return -EOPNOTSUPP;
--}
--
--static inline int
--ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
--             const char *name, const void *value, size_t size, int flags)
--{
--      return -EOPNOTSUPP;
--}
--
--static inline void
--ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
--{
--}
--
--static inline void
--ext3_xattr_put_super(struct super_block *sb)
--{
--}
--
--static inline int
--init_ext3_xattr(void)
--{
--      return 0;
--}
--
--static inline void
--exit_ext3_xattr(void)
--{
--}
--
--# endif  /* CONFIG_EXT3_FS_XATTR */
--
--extern struct ext3_xattr_handler ext3_xattr_user_handler;
--extern struct ext3_xattr_handler ext3_xattr_trusted_handler;
--extern struct ext3_xattr_handler ext3_xattr_security_handler;
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_security.c linux-2.6.4-51.0/fs/ext3/xattr_security.c
---- linux-2.6.4-51.0.orig/fs/ext3/xattr_security.c     2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr_security.c  2004-04-07 11:06:22.000000000 +0300
-@@ -9,7 +9,7 @@
- #include <linux/smp_lock.h>
- #include <linux/ext3_jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- 
- static size_t
- ext3_xattr_security_list(char *list, struct inode *inode,
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_trusted.c linux-2.6.4-51.0/fs/ext3/xattr_trusted.c
---- linux-2.6.4-51.0.orig/fs/ext3/xattr_trusted.c      2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr_trusted.c   2004-04-07 11:07:41.000000000 +0300
-@@ -11,7 +11,7 @@
- #include <linux/smp_lock.h>
- #include <linux/ext3_jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- 
- #define XATTR_TRUSTED_PREFIX "trusted."
- 
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_user.c linux-2.6.4-51.0/fs/ext3/xattr_user.c
---- linux-2.6.4-51.0.orig/fs/ext3/xattr_user.c 2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr_user.c      2004-04-07 11:07:47.000000000 +0300
-@@ -11,7 +11,7 @@
- #include <linux/smp_lock.h>
- #include <linux/ext3_jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- 
- #define XATTR_USER_PREFIX "user."
- 
-diff -rupN linux-2.6.4-51.0.orig/include/linux/ext3_fs.h linux-2.6.4-51.0/include/linux/ext3_fs.h
---- linux-2.6.4-51.0.orig/include/linux/ext3_fs.h      2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/include/linux/ext3_fs.h   2004-04-07 11:13:26.000000000 +0300
-@@ -741,6 +741,9 @@ extern void ext3_truncate (struct inode 
- extern void ext3_set_inode_flags(struct inode *);
- extern void ext3_set_aops(struct inode *inode);
- 
-+extern int ext3_get_inode_loc(struct inode *inode,
-+                            struct ext3_iloc *iloc, int in_mem);
-+
- /* ioctl.c */
- extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
-                      unsigned long);
-diff -rupN linux-2.6.4-51.0.orig/include/linux/ext3_xattr.h linux-2.6.4-51.0/include/linux/ext3_xattr.h
---- linux-2.6.4-51.0.orig/include/linux/ext3_xattr.h   1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.4-51.0/include/linux/ext3_xattr.h        2004-04-07 11:08:34.000000000 +0300
-@@ -0,0 +1,152 @@
-+/*
-+  File: linux/include/linux/ext3_xattr.h
-+
-+  On-disk format of extended attributes for the ext3 filesystem.
-+
-+  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+*/
-+
-+#ifndef _LINUX_EXT3_XATTR_H
-+#define _LINUX_EXT3_XATTR_H
-+
-+#include <linux/config.h>
-+#include <linux/xattr.h>
-+
-+/* Magic value in attribute blocks */
-+#define EXT3_XATTR_MAGIC              0xEA020000
-+
-+/* Maximum number of references to one attribute block */
-+#define EXT3_XATTR_REFCOUNT_MAX               1024
-+
-+/* Name indexes */
-+#define EXT3_XATTR_INDEX_MAX                  10
-+#define EXT3_XATTR_INDEX_USER                 1
-+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS     2
-+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT    3
-+#define EXT3_XATTR_INDEX_TRUSTED              4
-+#define       EXT3_XATTR_INDEX_LUSTRE                 5
-+#define EXT3_XATTR_INDEX_SECURITY             6
-+
-+struct ext3_xattr_header {
-+      __u32   h_magic;        /* magic number for identification */
-+      __u32   h_refcount;     /* reference count */
-+      __u32   h_blocks;       /* number of disk blocks used */
-+      __u32   h_hash;         /* hash value of all attributes */
-+      __u32   h_reserved[4];  /* zero right now */
-+};
-+
-+struct ext3_xattr_entry {
-+      __u8    e_name_len;     /* length of name */
-+      __u8    e_name_index;   /* attribute name index */
-+      __u16   e_value_offs;   /* offset in disk block of value */
-+      __u32   e_value_block;  /* disk block attribute is stored on (n/i) */
-+      __u32   e_value_size;   /* size of attribute value */
-+      __u32   e_hash;         /* hash value of name and value */
-+      char    e_name[0];      /* attribute name */
-+};
-+
-+#define EXT3_XATTR_PAD_BITS           2
-+#define EXT3_XATTR_PAD                (1<<EXT3_XATTR_PAD_BITS)
-+#define EXT3_XATTR_ROUND              (EXT3_XATTR_PAD-1)
-+#define EXT3_XATTR_LEN(name_len) \
-+      (((name_len) + EXT3_XATTR_ROUND + \
-+      sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
-+#define EXT3_XATTR_NEXT(entry) \
-+      ( (struct ext3_xattr_entry *)( \
-+        (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
-+#define EXT3_XATTR_SIZE(size) \
-+      (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
-+
-+# ifdef CONFIG_EXT3_FS_XATTR
-+
-+struct ext3_xattr_handler {
-+      char *prefix;
-+      size_t (*list)(char *list, struct inode *inode, const char *name,
-+                     int name_len);
-+      int (*get)(struct inode *inode, const char *name, void *buffer,
-+                 size_t size);
-+      int (*set)(struct inode *inode, const char *name, const void *buffer,
-+                 size_t size, int flags);
-+};
-+
-+extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
-+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
-+
-+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int);
-+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
-+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
-+extern int ext3_removexattr(struct dentry *, const char *);
-+
-+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
-+extern int ext3_xattr_list(struct inode *, char *, size_t);
-+extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
-+extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
-+
-+extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
-+extern void ext3_xattr_put_super(struct super_block *);
-+
-+extern int init_ext3_xattr(void);
-+extern void exit_ext3_xattr(void);
-+
-+# else  /* CONFIG_EXT3_FS_XATTR */
-+#  define ext3_setxattr               NULL
-+#  define ext3_getxattr               NULL
-+#  define ext3_listxattr      NULL
-+#  define ext3_removexattr    NULL
-+
-+static inline int
-+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-+             void *buffer, size_t size, int flags)
-+{
-+      return -EOPNOTSUPP;
-+}
-+
-+static inline int
-+ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
-+{
-+      return -EOPNOTSUPP;
-+}
-+
-+static inline int
-+ext3_xattr_set(struct inode *inode, int name_index, const char *name,
-+             const void *value, size_t size, int flags)
-+{
-+      return -EOPNOTSUPP;
-+}
-+
-+static inline int
-+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
-+             const char *name, const void *value, size_t size, int flags)
-+{
-+      return -EOPNOTSUPP;
-+}
-+
-+static inline void
-+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
-+{
-+}
-+
-+static inline void
-+ext3_xattr_put_super(struct super_block *sb)
-+{
-+}
-+
-+static inline int
-+init_ext3_xattr(void)
-+{
-+      return 0;
-+}
-+
-+static inline void
-+exit_ext3_xattr(void)
-+{
-+}
-+
-+# endif  /* CONFIG_EXT3_FS_XATTR */
-+
-+extern struct ext3_xattr_handler ext3_xattr_user_handler;
-+extern struct ext3_xattr_handler ext3_xattr_trusted_handler;
-+extern struct ext3_xattr_handler ext3_xattr_security_handler;
-+
-+#endif
diff --git a/lustre/kernel_patches/patches/iopen-2.6-suse.patch b/lustre/kernel_patches/patches/iopen-2.6-suse.patch

index 8a8d115..4a51eb8 100644 (file)
--- a/lustre/kernel_patches/patches/iopen-2.6-suse.patch
+++ b/lustre/kernel_patches/patches/iopen-2.6-suse.patch
@@ -8,8 +8,8 @@
  
  Index: linux-stage/fs/ext3/Makefile
  ===================================================================
---- linux-stage.orig/fs/ext3/Makefile  2004-05-11 17:21:20.000000000 -0400
-+++ linux-stage/fs/ext3/Makefile       2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/Makefile  2004-11-03 14:41:24.747805262 -0500
++++ linux-stage/fs/ext3/Makefile       2004-11-03 14:41:25.123696274 -0500
  @@ -4,7 +4,7 @@
   
   obj-$(CONFIG_EXT3_FS) += ext3.o
@@ -21,8 +21,8 @@ Index: linux-stage/fs/ext3/Makefile
   ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  Index: linux-stage/fs/ext3/inode.c
  ===================================================================
---- linux-stage.orig/fs/ext3/inode.c   2004-05-11 17:21:21.000000000 -0400
-+++ linux-stage/fs/ext3/inode.c        2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/inode.c   2004-11-03 14:41:25.040720333 -0500
++++ linux-stage/fs/ext3/inode.c        2004-11-03 14:46:08.458515670 -0500
  @@ -37,6 +37,7 @@
   #include <linux/mpage.h>
   #include <linux/uio.h>
@@ -31,20 +31,20 @@ Index: linux-stage/fs/ext3/inode.c
   #include "acl.h"
   
   /*
-@@ -2472,6 +2473,9 @@
-       ei->i_acl = EXT3_ACL_NOT_CACHED;
+@@ -2401,6 +2402,9 @@
         ei->i_default_acl = EXT3_ACL_NOT_CACHED;
   #endif
-+      if (ext3_iopen_get_inode(inode))
-+              return;
-+
+       ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
++    
++    if (ext3_iopen_get_inode(inode))
++        return;
+ 
         if (ext3_get_inode_loc(inode, &iloc, 0))
                 goto bad_inode;
-       bh = iloc.bh;
  Index: linux-stage/fs/ext3/iopen.c
  ===================================================================
  --- linux-stage.orig/fs/ext3/iopen.c   1969-12-31 19:00:00.000000000 -0500
-+++ linux-stage/fs/ext3/iopen.c        2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/iopen.c        2004-11-03 14:41:25.125695694 -0500
  @@ -0,0 +1,272 @@
  +/*
  + * linux/fs/ext3/iopen.c
@@ -321,7 +321,7 @@ Index: linux-stage/fs/ext3/iopen.c
  Index: linux-stage/fs/ext3/iopen.h
  ===================================================================
  --- linux-stage.orig/fs/ext3/iopen.h   1969-12-31 19:00:00.000000000 -0500
-+++ linux-stage/fs/ext3/iopen.h        2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/iopen.h        2004-11-03 14:41:25.126695404 -0500
  @@ -0,0 +1,15 @@
  +/*
  + * iopen.h
@@ -340,8 +340,8 @@ Index: linux-stage/fs/ext3/iopen.h
  +                                         struct inode *inode, int rehash);
  Index: linux-stage/fs/ext3/namei.c
  ===================================================================
---- linux-stage.orig/fs/ext3/namei.c   2004-05-11 17:21:20.000000000 -0400
-+++ linux-stage/fs/ext3/namei.c        2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/namei.c   2004-11-03 14:41:24.957744391 -0500
++++ linux-stage/fs/ext3/namei.c        2004-11-03 14:41:25.127695114 -0500
  @@ -37,6 +37,7 @@
   #include <linux/buffer_head.h>
   #include <linux/smp_lock.h>
@@ -373,7 +373,7 @@ Index: linux-stage/fs/ext3/namei.c
   }
   
   
-@@ -2019,10 +2021,6 @@
+@@ -2029,10 +2031,6 @@
                               inode->i_nlink);
         inode->i_version++;
         inode->i_nlink = 0;
@@ -384,7 +384,7 @@ Index: linux-stage/fs/ext3/namei.c
         ext3_orphan_add(handle, inode);
         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         ext3_mark_inode_dirty(handle, inode);
-@@ -2139,6 +2137,23 @@
+@@ -2152,6 +2150,23 @@
         return err;
   }
   
@@ -408,7 +408,7 @@ Index: linux-stage/fs/ext3/namei.c
   static int ext3_link (struct dentry * old_dentry,
                 struct inode * dir, struct dentry *dentry)
   {
-@@ -2161,7 +2176,8 @@
+@@ -2175,7 +2190,8 @@
         ext3_inc_count(handle, inode);
         atomic_inc(&inode->i_count);
   
@@ -416,14 +416,14 @@ Index: linux-stage/fs/ext3/namei.c
  +      err = ext3_add_link(handle, dentry, inode);
  +      ext3_orphan_del(handle,inode);
         ext3_journal_stop(handle);
-       return err;
- }
+       if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+               goto retry;
  Index: linux-stage/fs/ext3/super.c
  ===================================================================
---- linux-stage.orig/fs/ext3/super.c   2004-05-11 17:21:21.000000000 -0400
-+++ linux-stage/fs/ext3/super.c        2004-05-11 17:44:53.000000000 -0400
-@@ -536,7 +536,7 @@
-       Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
+--- linux-stage.orig/fs/ext3/super.c   2004-11-03 14:41:25.043719463 -0500
++++ linux-stage/fs/ext3/super.c        2004-11-03 14:41:25.129694535 -0500
+@@ -534,7 +534,7 @@
+       Opt_reservation, Opt_noreservation, Opt_noload,
         Opt_commit, Opt_journal_update, Opt_journal_inum,
         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
  -      Opt_ignore, Opt_barrier,
@@ -441,7 +441,7 @@ Index: linux-stage/fs/ext3/super.c
         {Opt_err, NULL}
   };
   
-@@ -772,6 +775,18 @@
+@@ -778,6 +781,18 @@
                         else
                                 clear_opt(sbi->s_mount_opt, BARRIER);
                         break;
diff --git a/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 b/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7.patch

similarity index 100%

rename from lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7

rename to lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7.patch
diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch

index 023159c..77d5b30 100644 (file)
--- a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch
+++ b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch
@@ -1,11 +1,7 @@
-.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
-.new.........fs/nfs/dir.c
-.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
-.new.........fs/nfs/dir.c
-Index: linux-2.6.4-51.0/fs/nfs/dir.c
+Index: linux-2.6.5-7.108/fs/nfs/dir.c
  ===================================================================
---- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 17:09:16.000000000 -0400
-+++ linux-2.6.4-51.0/fs/nfs/dir.c      2004-04-05 17:09:23.000000000 -0400
+--- linux-2.6.5-7.108.orig/fs/nfs/dir.c        2004-09-15 19:26:43.012732408 +0300
++++ linux-2.6.5-7.108/fs/nfs/dir.c     2004-09-15 20:03:32.882781096 +0300
  @@ -782,7 +782,7 @@
         if (nd->flags & LOOKUP_DIRECTORY)
                 return 0;
@@ -51,10 +47,10 @@ Index: linux-2.6.4-51.0/fs/nfs/dir.c
         if (openflags & O_CREAT) {
                 /* If this is a negative dentry, just drop it */
                 if (!inode)
-Index: linux-2.6.4-51.0/fs/nfs/nfs4proc.c
+Index: linux-2.6.5-7.108/fs/nfs/nfs4proc.c
  ===================================================================
---- linux-2.6.4-51.0.orig/fs/nfs/nfs4proc.c    2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/nfs/nfs4proc.c 2004-04-05 17:09:23.000000000 -0400
+--- linux-2.6.5-7.108.orig/fs/nfs/nfs4proc.c   2004-04-04 06:37:39.000000000 +0300
++++ linux-2.6.5-7.108/fs/nfs/nfs4proc.c        2004-09-15 20:03:32.885780640 +0300
  @@ -792,17 +792,17 @@
         struct nfs4_state *state;
   
@@ -76,22 +72,23 @@ Index: linux-2.6.4-51.0/fs/nfs/nfs4proc.c
         put_rpccred(cred);
         if (IS_ERR(state))
                 return (struct inode *)state;
-Index: linux-2.6.4-51.0/fs/cifs/dir.c
+Index: linux-2.6.5-7.108/fs/cifs/dir.c
  ===================================================================
---- linux-2.6.4-51.0.orig/fs/cifs/dir.c        2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/cifs/dir.c     2004-04-05 17:13:47.000000000 -0400
-@@ -146,22 +146,22 @@
-       if(nd) { 
-               cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path));
+--- linux-2.6.5-7.108.orig/fs/cifs/dir.c       2004-09-04 13:28:22.000000000 +0300
++++ linux-2.6.5-7.108/fs/cifs/dir.c    2004-09-15 20:03:40.065689128 +0300
+@@ -173,23 +173,23 @@
+       }
   
+       if(nd) {
  -              if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
  +              if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY)
                         desiredAccess = GENERIC_READ;
--              else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY)
-+              else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY)
+-              else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) {
++              else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) {
                         desiredAccess = GENERIC_WRITE;
--              else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) {
-+              else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) {
+                       write_only = TRUE;
+-              } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) {
++              } else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) {
                         /* GENERIC_ALL is too much permission to request */
                         /* can cause unnecessary access denied on create */
                         /* desiredAccess = GENERIC_ALL; */
@@ -109,7 +106,7 @@ Index: linux-2.6.4-51.0/fs/cifs/dir.c
                         disposition = FILE_OPEN_IF;
                 else {
                         cFYI(1,("Create flag not set in create function"));
-@@ -319,7 +319,7 @@
+@@ -359,7 +359,7 @@
               parent_dir_inode, direntry->d_name.name, direntry));
   
         if(nd) {  /* BB removeme */
diff --git a/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch b/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch

deleted file mode 100644 (file)

index 9cdd51a..0000000
--- a/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch
+++ /dev/null
@@ -1,31 +0,0 @@
- include/linux/fs.h |    1 +
- mm/filemap.c       |    3 +++
- 2 files changed, 4 insertions(+)
-
-Index: linux-2.6.4-30.1/include/linux/fs.h
-===================================================================
---- linux-2.6.4-30.1.orig/include/linux/fs.h   2004-04-02 03:20:19.000000000 -0500
-+++ linux-2.6.4-30.1/include/linux/fs.h        2004-04-02 03:20:19.000000000 -0500
-@@ -320,6 +320,7 @@
-       int (*releasepage) (struct page *, int);
-       int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-                       loff_t offset, unsigned long nr_segs);
-+      void (*removepage)(struct page *); /* called when page gets removed from the inode */
- };
- 
- struct backing_dev_info;
-Index: linux-2.6.4-30.1/mm/filemap.c
-===================================================================
---- linux-2.6.4-30.1.orig/mm/filemap.c 2004-04-02 03:19:42.000000000 -0500
-+++ linux-2.6.4-30.1/mm/filemap.c      2004-04-02 03:23:10.000000000 -0500
-@@ -102,6 +102,9 @@
- {
-       struct address_space *mapping = page->mapping;
- 
-+      if (mapping->a_ops->removepage)
-+              mapping->a_ops->removepage(page);
-+
-       radix_tree_delete(&mapping->page_tree, page->index);
-       list_del(&page->list);
-       page->mapping = NULL;
-
diff --git a/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch b/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch

index f10484d..92ad3cc 100644 (file)
--- a/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch
+++ b/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch
@@ -6423,7 +6423,7 @@ Index: uml-2.6.7/fs/hostfs/hostfs_kern.c
  ===================================================================
  --- uml-2.6.7.orig/fs/hostfs/hostfs_kern.c     2004-07-16 19:47:23.631218720 +0300
  +++ uml-2.6.7/fs/hostfs/hostfs_kern.c  2004-07-16 19:47:24.263122656 +0300
-@@ -0,0 +1,1024 @@
+@@ -0,0 +1,1022 @@
  +/* 
  + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  + * Licensed under the GPL
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch

index 12436a7..c32be9d 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch
+++ b/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch
@@ -101,12 +101,12 @@ Index: linux-2.6.5-12.1/fs/namei.c
  +      struct dentry *dentry = nd->dentry;
  +      int err, counter = 0;
  +
++ revalidate_again:
  +      if (!dentry->d_op || !dentry->d_op->d_revalidate)
  +              return 0;
-+ revalidate_again:
  +      if (!dentry->d_op->d_revalidate(dentry, nd)) {
  +              struct dentry *new;
-+              if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC,nd)))
++              if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC, nd)))
  +                      return err;
  +              new = real_lookup(dentry->d_parent, &dentry->d_name, nd);
  +              if (IS_ERR(new))
@@ -654,7 +654,7 @@ Index: linux-2.6.5-12.1/include/linux/fs.h
         spinlock_t              f_ep_lock;
   #endif /* #ifdef CONFIG_EPOLL */
         struct address_space    *f_mapping;
-+      struct lookup_intent    *f_it;
++      struct lookup_intent    *f_it;
   };
   extern spinlock_t files_lock;
   #define file_list_lock() spin_lock(&files_lock);
diff --git a/lustre/kernel_patches/patches/vfs_lookup_in_file-2.6.patch b/lustre/kernel_patches/patches/vfs_lookup_in_file-2.6.patch

new file mode 100644 (file)

index 0000000..3665cbb
--- /dev/null
+++ b/lustre/kernel_patches/patches/vfs_lookup_in_file-2.6.patch
@@ -0,0 +1,16 @@
+--- linux-2.6.7.orig/fs/namei.c        2005-04-01 11:14:26.000000000 +0300
++++ linux-2.6.7/fs/namei.c     2005-04-01 11:23:01.748305104 +0300
+@@ -762,6 +762,13 @@ last_component:
+                               inode = nd->dentry->d_inode;
+                               /* fallthrough */
+                       case 1:
++                              if (lookup_flags & LOOKUP_DIRECTORY) {
++                                      err = -ENOTDIR;
++                                      if (!nd->dentry->d_inode->i_op ||
++                                          !nd->dentry->d_inode->i_op->lookup) {
++                                              goto return_err;
++                                      }
++                              }
+                               goto return_reval;
+               }
+               if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
diff --git a/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch b/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch

index 934dd77..cfd0db0 100644 (file)
--- a/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch
+++ b/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch
@@ -47,22 +47,24 @@ Index: linux-2.6.5-12.1/fs/namei.c
         path_release(&nd);
   out:
         putname(tmp);
-@@ -1626,10 +1637,18 @@
+@@ -1626,10 +1637,20 @@
   
                 struct dentry *dentry;
                 struct nameidata nd;
-+                intent_init(&nd.intent, IT_LOOKUP);
++              intent_init(&nd.intent, IT_LOOKUP);
   
                 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                 if (error)
                         goto out;
-+              if (nd.dentry->d_inode->i_op->mkdir_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir_raw(&nd, mode);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out2;
-+              }
++
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->mkdir_raw(&nd, mode);
++                      /* the file system wants to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
++
                 dentry = lookup_create(&nd, 1);
                 error = PTR_ERR(dentry);
                 if (!IS_ERR(dentry)) {
@@ -78,22 +80,24 @@ Index: linux-2.6.5-12.1/fs/namei.c
         char * name;
         struct dentry *dentry;
         struct nameidata nd;
-+        intent_init(&nd.intent, IT_LOOKUP);
++      intent_init(&nd.intent, IT_LOOKUP);
   
         name = getname(pathname);
         if(IS_ERR(name))
-@@ -1744,6 +1765,14 @@
+@@ -1744,6 +1765,16 @@
                         error = -EBUSY;
                         goto exit1;
         }
-+      if (nd.dentry->d_inode->i_op->rmdir_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
  +
-+              error = op->rmdir_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
++
         down(&nd.dentry->d_inode->i_sem);
         dentry = lookup_hash(&nd.last, nd.dentry);
         error = PTR_ERR(dentry);
@@ -101,7 +105,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
         struct dentry *dentry;
         struct nameidata nd;
         struct inode *inode = NULL;
-+        intent_init(&nd.intent, IT_LOOKUP);
++      intent_init(&nd.intent, IT_LOOKUP);
   
         name = getname(pathname);
         if(IS_ERR(name))
@@ -109,13 +113,13 @@ Index: linux-2.6.5-12.1/fs/namei.c
         error = -EISDIR;
         if (nd.last_type != LAST_NORM)
                 goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
         down(&nd.dentry->d_inode->i_sem);
         dentry = lookup_hash(&nd.last, nd.dentry);
         error = PTR_ERR(dentry);
@@ -123,7 +127,7 @@ Index: linux-2.6.5-12.1/fs/namei.c
   
                 struct dentry *dentry;
                 struct nameidata nd;
-+                intent_init(&nd.intent, IT_LOOKUP);
++              intent_init(&nd.intent, IT_LOOKUP);
   
                 error = path_lookup(to, LOOKUP_PARENT, &nd);
                 if (error)
@@ -150,8 +154,8 @@ Index: linux-2.6.5-12.1/fs/namei.c
         struct nameidata nd, old_nd;
         int error;
         char * to;
-+        intent_init(&nd.intent, IT_LOOKUP);
-+        intent_init(&old_nd.intent, IT_LOOKUP);
++      intent_init(&nd.intent, IT_LOOKUP);
++      intent_init(&old_nd.intent, IT_LOOKUP);
   
         to = getname(newname);
         if (IS_ERR(to))
@@ -159,40 +163,22 @@ Index: linux-2.6.5-12.1/fs/namei.c
         error = -EXDEV;
         if (old_nd.mnt != nd.mnt)
                 goto out_release;
-+        if (nd.dentry->d_inode->i_op->link_raw) {
-+                struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                error = op->link_raw(&old_nd, &nd);
-+                /* the file system wants to use normal vfs path now */
-+                if (error != -EOPNOTSUPP)
-+                        goto out_release;
-+        }
++      if (nd.dentry->d_inode->i_op->link_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->link_raw(&old_nd, &nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto out_release;
++      }
         new_dentry = lookup_create(&nd, 0);
         error = PTR_ERR(new_dentry);
         if (!IS_ERR(new_dentry)) {
-@@ -2038,7 +2093,7 @@
-  *       locking].
-  */
- int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry)
- {
-       int error = 0;
-       struct inode *target;
-@@ -2083,7 +2138,7 @@
- }
- 
- int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+                     struct inode *new_dir, struct dentry *new_dentry)
- {
-       struct inode *target;
-       int error;
  @@ -2160,6 +2215,8 @@
         struct dentry * old_dentry, *new_dentry;
         struct dentry * trap;
         struct nameidata oldnd, newnd;
-+        intent_init(&oldnd.intent, IT_LOOKUP);
-+        intent_init(&newnd.intent, IT_LOOKUP);
++      intent_init(&oldnd.intent, IT_LOOKUP);
++      intent_init(&newnd.intent, IT_LOOKUP);
   
         error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
         if (error)
@@ -200,12 +186,12 @@ Index: linux-2.6.5-12.1/fs/namei.c
         if (newnd.last_type != LAST_NORM)
                 goto exit2;
   
-+      if (old_dir->d_inode->i_op->rename_raw) {
-+              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit2;
-+      }
++      if (old_dir->d_inode->i_op->rename_raw) {
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
  +
         trap = lock_rename(new_dir, old_dir);
   
@@ -286,10 +272,10 @@ Index: linux-2.6.5-12.1/fs/open.c
  +              if (error != -EOPNOTSUPP)
  +                      goto dput_and_out;
  +      } else {
-+                down(&inode->i_sem);
-+                error = notify_change(nd.dentry, &newattrs);
-+                up(&inode->i_sem);
-+        }
++              down(&inode->i_sem);
++              error = notify_change(nd.dentry, &newattrs);
++              up(&inode->i_sem);
++      }
   dput_and_out:
         path_release(&nd);
   
@@ -425,14 +411,6 @@ Index: linux-2.6.5-12.1/fs/open.c
         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                 goto out;
         newattrs.ia_valid =  ATTR_CTIME;
-@@ -723,6 +749,7 @@
-       }
-       if (!S_ISDIR(inode->i_mode))
-               newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
-+
-       down(&inode->i_sem);
-       error = notify_change(dentry, &newattrs);
-       up(&inode->i_sem);
  Index: linux-2.6.5-12.1/fs/exec.c
  ===================================================================
  --- linux-2.6.5-12.1.orig/fs/exec.c    2004-05-11 15:41:54.000000000 -0400
diff --git a/lustre/kernel_patches/series/2.6-vanilla.series b/lustre/kernel_patches/series/2.6-vanilla.series

index b5f5e74..f8dfd66 100644 (file)
--- a/lustre/kernel_patches/series/2.6-vanilla.series
+++ b/lustre/kernel_patches/series/2.6-vanilla.series
@@ -20,3 +20,4 @@ jbd-buffer-release-2.6.7.patch
  dev_read_only-2.6-suse.patch
  vfs_gns-2.6-vanilla.patch
  linux-2.6.7-CITI_NFS4_ALL-7-lsec.patch
+vfs_lookup_in_file-2.6.patch
diff --git a/lustre/kernel_patches/targets/.cvsignore b/lustre/kernel_patches/targets/.cvsignore

new file mode 100644 (file)

index 0000000..ba141e8
--- /dev/null
+++ b/lustre/kernel_patches/targets/.cvsignore
@@ -0,0 +1 @@
+*.target
diff --git a/lustre/kernel_patches/targets/2.6-suse.target.in b/lustre/kernel_patches/targets/2.6-suse.target.in

new file mode 100644 (file)

index 0000000..9166d1b
--- /dev/null
+++ b/lustre/kernel_patches/targets/2.6-suse.target.in
@@ -0,0 +1,27 @@
+lnxmaj="2.6.5"
+lnxrel="SLES9_SP1_BRANCH_2004111114454891"
+
+KERNEL=linux-$lnxmaj-$lnxrel.tar.gz
+# they include our patches
+SERIES=
+VERSION=$lnxmaj
+EXTRA_VERSION="${lnxrel}_lustre.@VERSION@"
+RHBUILD=0
+LINUX26=1
+SUSEBUILD=1
+
+BASE_ARCHS="i686 ppc"
+BIGMEM_ARCHS=""
+BOOT_ARCHS=""
+JENSEN_ARCHS=""
+SMP_ARCHS=""
+BIGSMP_ARCHS="i686 ppc"
+UP_ARCHS=""
+SRC_ARCHS=""
+
+for cc in gcc33 ; do
+    if which $cc >/dev/null 2>/dev/null ; then
+        export CC=$cc
+        break
+    fi
+done
diff --git a/lustre/kernel_patches/targets/2.6-suse.target b/lustre/kernel_patches/targets/2.6-vanilla.target.in

similarity index 50%

rename from lustre/kernel_patches/targets/2.6-suse.target

rename to lustre/kernel_patches/targets/2.6-vanilla.target.in

index d8b192b..00c05df 100644 (file)
--- a/lustre/kernel_patches/targets/2.6-suse.target
+++ b/lustre/kernel_patches/targets/2.6-vanilla.target.in
@@ -1,7 +1,9 @@
-KERNEL=linux-2.6.5-12.1.tar.gz
-SERIES=2.6-suse
-VERSION=2.6.5
-EXTRA_VERSION=12.1_lustre
+lnxmaj=2.6.6
+
+KERNEL=linux-$lnxmaj.tar.gz
+SERIES=2.6-vanilla
+VERSION=$lnxmaj
+EXTRA_VERSION=lustre.@VERSION@
  RHBUILD=0
  
  BASE_ARCHS=""
diff --git a/lustre/kernel_patches/targets/hp_pnnl-2.4.target b/lustre/kernel_patches/targets/hp_pnnl-2.4.target.in

similarity index 54%

rename from lustre/kernel_patches/targets/hp_pnnl-2.4.target

rename to lustre/kernel_patches/targets/hp_pnnl-2.4.target.in

index 0d60be7..620e698 100644 (file)
--- a/lustre/kernel_patches/targets/hp_pnnl-2.4.target
+++ b/lustre/kernel_patches/targets/hp_pnnl-2.4.target.in
@@ -1,7 +1,10 @@
-KERNEL=linux-2.4.20-hp_pnnl.tar.gz
+lnxmaj=2.4.20
+lnxrel=hp_pnnl
+
+KERNEL=linux-$lnxmaj-$lnxrel.tar.gz
  SERIES=hp-pnnl-2.4.20
-VERSION=2.4.20
-EXTRA_VERSION=hp_pnnl_lustre
+VERSION=$lnxmaj
+EXTRA_VERSION=$lnxrel_lustre.@VERSION@
  RHBUILD=0
  
  BASE_ARCHS="ia64"
diff --git a/lustre/kernel_patches/targets/rh-2.4.target b/lustre/kernel_patches/targets/rh-2.4.target

deleted file mode 100644 (file)

index f7e04c1..0000000
--- a/lustre/kernel_patches/targets/rh-2.4.target
+++ /dev/null
@@ -1,13 +0,0 @@
-KERNEL=linux-2.4.20-31.9.tar.gz
-SERIES=rh-2.4.20
-VERSION=2.4.20
-EXTRA_VERSION=31.9_lustre
-RHBUILD=1
-
-BASE_ARCHS="i686"
-BIGMEM_ARCHS=""
-BOOT_ARCHS=""
-JENSEN_ARCHS=""
-SMP_ARCHS="i686"
-UP_ARCHS=""
-SRC_ARCHS="i686"
diff --git a/lustre/kernel_patches/targets/rh-2.4.target.in b/lustre/kernel_patches/targets/rh-2.4.target.in

new file mode 100644 (file)

index 0000000..fa9140d
--- /dev/null
+++ b/lustre/kernel_patches/targets/rh-2.4.target.in
@@ -0,0 +1,23 @@
+lnxmaj="2.4.20"
+lnxrel="31.9"
+
+KERNEL=linux-${lnxmaj}-${lnxrel}.tar.gz
+SERIES=rh-2.4.20
+VERSION=$lnxmaj
+EXTRA_VERSION=${lnxrel}_lustre.@VERSION@
+RHBUILD=1
+
+BASE_ARCHS="i686"
+BIGMEM_ARCHS=""
+BOOT_ARCHS=""
+JENSEN_ARCHS=""
+SMP_ARCHS="i686"
+UP_ARCHS=""
+
+# the modules in this kernel do not build with gcc 3
+for cc in i386-redhat-linux-gcc-2.96 gcc296 gcc ; do
+    if which $cc >/dev/null 2>/dev/null ; then
+        CC=$cc
+        break
+    fi
+done
diff --git a/lustre/kernel_patches/targets/rhel-2.4.target.in b/lustre/kernel_patches/targets/rhel-2.4.target.in

new file mode 100644 (file)

index 0000000..df41ed0
--- /dev/null
+++ b/lustre/kernel_patches/targets/rhel-2.4.target.in
@@ -0,0 +1,23 @@
+lnxmaj="2.4.21"
+lnxrel="20.EL"
+
+KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2
+SERIES=rhel-2.4.21
+VERSION=${lnxmaj}
+EXTRA_VERSION=${lnxrel}_lustre.@VERSION@
+RHBUILD=1
+
+BASE_ARCHS="i686 x86_64 ia64"
+BIGMEM_ARCHS=""
+BOOT_ARCHS=""
+JENSEN_ARCHS=""
+SMP_ARCHS="i686 x86_64 ia64"
+UP_ARCHS=""
+
+# the modules in this kernel do not build with gcc 3.3 or 2.96
+for cc in gcc33 ; do
+    if which $cc >/dev/null 2>/dev/null ; then
+        export CC=$cc
+        break
+    fi
+done
diff --git a/lustre/kernel_patches/targets/suse-2.4.21-2.target b/lustre/kernel_patches/targets/suse-2.4.21-2.target.in

similarity index 86%

rename from lustre/kernel_patches/targets/suse-2.4.21-2.target

rename to lustre/kernel_patches/targets/suse-2.4.21-2.target.in

index c27c3de..245c085 100644 (file)
--- a/lustre/kernel_patches/targets/suse-2.4.21-2.target
+++ b/lustre/kernel_patches/targets/suse-2.4.21-2.target.in
@@ -1,7 +1,7 @@
  KERNEL=linux-2.4.21-x86_64.tar.gz
  SERIES=suse-2.4.21-2
  VERSION=2.4.21
-EXTRA_VERSION=lustre.1.2.1
+EXTRA_VERSION=lustre.@VERSION@
  RHBUILD=0
  
  BASE_ARCHS="x86_64"
diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch

index 93f3411..f18c048 100644 (file)
--- a/lustre/kernel_patches/which_patch
+++ b/lustre/kernel_patches/which_patch
@@ -1,9 +1,16 @@
  SERIES                 MNEMONIC                 COMMENT                     ARCH
  
-chaos-2.4.18           linux-chaos-2.4.18       LLNL 2.4.18 chaos ~65       i386
+SUPPORTED KERNELS:
+rhel-2.4.21            linux-2.4.21-20.3EL      same as chaos-2.4.21        all
+2.6-suse               linux-2.6                SLES9 SP1 kernel            all
+
+UNSUPPORTED KERNELS; BEING PHASED OUT; MAY BE MISSING CRITICAL BUG FIXES:
  hp-pnnl-2.4.20         linux-2.4.20-hp4_pnnl1   same as vanilla but no uml  ia64
  vanilla-2.4.20         linux-2.4.20             patch with uml-2.4.20-6     um
  chaos-2.4.20           linux-chaos-2.4.20       same as rh-2.4.20-8         i386
  kgdb-2.5.73            linux-2.5.73             vanilla 2.5.73 with kgdb    i386
  bproc-2.4.20-hp-pnnl   linux-2.4.20-hp4_pnnl9   hp-pnnl + bproc             i386
  suse-2.4.19                                     SUSE ES 8
+vanilla-2.4.24         linux-2.4.24             patch with uml-2.4.24-6     um
+chaos-2.4.21           linux-chaos-2.4.21       same as rh-2.4.21-20.EL     i386
+suse-2.4.21-jvn        linux-2.4.21-241         sles8 2.4 kernel            i386
diff --git a/lustre/ldlm/l_lock.c b/lustre/ldlm/l_lock.c

index 9940df1..11cd02d 100644 (file)
--- a/lustre/ldlm/l_lock.c
+++ b/lustre/ldlm/l_lock.c
@@ -123,6 +123,7 @@ void l_check_no_ns_lock(struct ldlm_namespace *ns)
          if (l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) {
                  CERROR("namespace %s lock held illegally; tell phil\n",
                         ns->ns_name);
+                portals_debug_dumpstack(NULL);
                  next_msg = jiffies + 60 * HZ;
          }
  }
diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c

index 15f46e2..63fb58c 100644 (file)
--- a/lustre/ldlm/ldlm_flock.c
+++ b/lustre/ldlm/ldlm_flock.c
@@ -79,7 +79,6 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags)
                     mode, flags);
  
          LASSERT(list_empty(&lock->l_flock_waitq));
-
          list_del_init(&lock->l_res_link);
          if (flags == LDLM_FL_WAIT_NOREPROC) {
                  /* client side - set a flag to prevent sending a CANCEL */
@@ -135,9 +134,9 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq,
          int overlaps = 0;
          ENTRY;
  
-        CDEBUG(D_DLMTRACE, "flags %#x pid "LPU64" mode %u start "LPU64" end "
-               LPU64"\n", *flags, new->l_policy_data.l_flock.pid, mode,
-               req->l_policy_data.l_flock.start,
+        CDEBUG(D_DLMTRACE, "flags %#x pid %u mode %u start "LPU64" end "
+               LPU64"\n", *flags, (unsigned int)new->l_policy_data.l_flock.pid, 
+              mode, req->l_policy_data.l_flock.start,
                 req->l_policy_data.l_flock.end);
  
          *err = ELDLM_OK;
@@ -412,7 +411,7 @@ restart:
          if (added)
                  ldlm_flock_destroy(req, mode, *flags);
  
-        ldlm_resource_dump(res);
+        ldlm_resource_dump(D_OTHER, res);
          RETURN(LDLM_ITER_CONTINUE);
  }
  
@@ -426,7 +425,6 @@ ldlm_flock_interrupted_wait(void *data)
  {
          struct ldlm_lock *lock;
          struct lustre_handle lockh;
-        int rc;
          ENTRY;
  
          lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
@@ -434,9 +432,12 @@ ldlm_flock_interrupted_wait(void *data)
          /* take lock off the deadlock detection waitq. */
          list_del_init(&lock->l_flock_waitq);
  
+        /* client side - set flag to prevent lock from being put on lru list */
+        lock->l_flags |= LDLM_FL_CBPENDING;
+
          ldlm_lock_decref_internal(lock, lock->l_req_mode);
          ldlm_lock2handle(lock, &lockh);
-        rc = ldlm_cli_cancel(&lockh);
+        ldlm_cli_cancel(&lockh);
          EXIT;
  }
  
@@ -459,11 +460,6 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
  
          LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
  
-        if (flags == 0) {
-                wake_up(&lock->l_waitq);
-                RETURN(0);
-        }
-
          if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
                         LDLM_FL_BLOCK_CONV)))
                  goto  granted;
@@ -472,7 +468,6 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                     "sleeping");
  
          ldlm_lock_dump(D_DLMTRACE, lock, 0);
-
          fwd.fwd_lock = lock;
          obd = class_exp2obd(lock->l_conn_export);
  
@@ -493,17 +488,12 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                            ((lock->l_req_mode == lock->l_granted_mode) ||
                             lock->l_destroyed), &lwi);
  
-        if (rc) {
-                LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
-                           rc);
-                RETURN(rc);
-        }
-
-        LASSERT(!(lock->l_destroyed));
-
+        LDLM_DEBUG(lock, "client-side enqueue waking up: rc = %d", rc);
+        RETURN(rc);
+ 
  granted:
  
-        LDLM_DEBUG(lock, "client-side enqueue waking up");
+        LDLM_DEBUG(lock, "client-side enqueue granted");
          ns = lock->l_resource->lr_namespace;
          l_lock(&ns->ns_lock);
  
@@ -532,10 +522,13 @@ granted:
                  getlk->fl_start = lock->l_policy_data.l_flock.start;
                  getlk->fl_end = lock->l_policy_data.l_flock.end;
          } else {
+                int noreproc = LDLM_FL_WAIT_NOREPROC;
+
                  /* We need to reprocess the lock to do merges or splits
                   * with existing locks owned by this process. */
-                flags = LDLM_FL_WAIT_NOREPROC;
-                ldlm_process_flock_lock(lock, &flags, 1, &err);
+                ldlm_process_flock_lock(lock, &noreproc, 1, &err);
+                if (flags == 0)
+                        wake_up(&lock->l_waitq);
          }
          l_unlock(&ns->ns_lock);
          RETURN(0);
@@ -546,12 +539,12 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
  {
          struct ldlm_namespace *ns;
          ENTRY;
-
+                                                                                                                             
          LASSERT(lock);
          LASSERT(flag == LDLM_CB_CANCELING);
-
+                                                                                                                             
          ns = lock->l_resource->lr_namespace;
-        
+                                                                                                                             
          /* take lock off the deadlock detection waitq. */
          l_lock(&ns->ns_lock);
          list_del_init(&lock->l_flock_waitq);
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index d1b8914..d87e551 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -259,7 +259,6 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
                  cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
          }
  
-
          rc = ldlm_get_ref();
          if (rc) {
                  CERROR("ldlm_get_ref failed: %d\n", rc);
@@ -523,7 +522,14 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
          RETURN(0);
  }
  
-static char nidstr[PTL_NALFMT_SIZE];
+static inline int ptlrpc_peer_is_local(struct ptlrpc_peer *peer)
+{
+        ptl_process_id_t myid;
+
+        PtlGetId(peer->peer_ni->pni_ni_h, &myid);
+        return (memcmp(&peer->peer_id, &myid, sizeof(myid)) == 0);
+}
+
  int target_handle_connect(struct ptlrpc_request *req)
  {
          unsigned long connect_flags = 0, *cfp;
@@ -553,13 +559,12 @@ int target_handle_connect(struct ptlrpc_request *req)
  
          obd_str2uuid (&tgtuuid, str);
          target = class_uuid2obd(&tgtuuid);
-        if (!target) {
+        if (!target)
                  target = class_name2obd(str);
-        }
          
          if (!target || target->obd_stopping || !target->obd_set_up) {
-                CERROR("UUID '%s' is not available for connect from NID %s\n",
-                       str, ptlrpc_peernid2str(&req->rq_peer, nidstr));
+                CERROR("UUID '%s' is not available for connect from %s\n",
+                       str, req->rq_peerstr);
                  GOTO(out, rc = -ENODEV);
          }
  
@@ -638,8 +643,7 @@ int target_handle_connect(struct ptlrpc_request *req)
                        ptlrpc_peernid2str(&req->rq_peer, peer_str),
                        export, atomic_read(&export->exp_rpc_count));
                  GOTO(out, rc = -EBUSY);
-        }
-        else if (req->rq_reqmsg->conn_cnt == 1 && !initial_conn) {
+        } else if (req->rq_reqmsg->conn_cnt == 1 && !initial_conn) {
                  CERROR("%s reconnected with 1 conn_cnt; cookies not random?\n",
                         cluuid.uuid);
                  GOTO(out, rc = -EALREADY);
@@ -650,15 +654,18 @@ int target_handle_connect(struct ptlrpc_request *req)
          CWARN("%s: connection from %s@%s/%lu %s\n", target->obd_name, cluuid.uuid,
                ptlrpc_peernid2str(&req->rq_peer, peer_str), *cfp,
                target->obd_recovering ? "(recovering)" : "");
+
          if (target->obd_recovering) {
                  lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
                  target_start_recovery_timer(target);
          }
+
  #if 0
          /* Tell the client if we support replayable requests */
          if (target->obd_replayable)
                  lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_REPLAYABLE);
  #endif
+
          if (export == NULL) {
                  if (target->obd_recovering) {
                          CERROR("%s denying connection for new client %s@%s: "
@@ -717,6 +724,15 @@ int target_handle_connect(struct ptlrpc_request *req)
          if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT)
                  export->exp_libclient = 1;
  
+        if (!(lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_ASYNC) &&
+            ptlrpc_peer_is_local(&req->rq_peer)) {
+                CWARN("%s: exp %p set sync\n", target->obd_name, export);
+                export->exp_sync = 1;
+        } else {
+                CDEBUG(D_HA, "%s: exp %p set async\n",target->obd_name,export);
+                export->exp_sync = 0;
+        }
+
          if (export->exp_connection != NULL)
                  ptlrpc_put_connection(export->exp_connection);
          export->exp_connection = ptlrpc_get_connection(&req->rq_peer,
@@ -728,12 +744,11 @@ int target_handle_connect(struct ptlrpc_request *req)
                  GOTO(out, rc = 0);
          }
  
-        if (target->obd_recovering) {
+        if (target->obd_recovering)
                  target->obd_connected_clients++;
-        }
  
-        memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn),
-               sizeof conn);
+        memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof(conn)),
+               sizeof(conn));
  
          if (export->exp_imp_reverse != NULL) {
                  /* same logic as client_obd_cleanup */
@@ -838,6 +853,7 @@ ptlrpc_clone_req( struct ptlrpc_request *orig_req)
  
          return copy_req;
  }
+
  void ptlrpc_free_clone( struct ptlrpc_request *req) 
  {
          if (req->rq_svcsec)
@@ -849,8 +865,6 @@ void ptlrpc_free_clone( struct ptlrpc_request *req)
          OBD_FREE(req, sizeof *req);
  }
  
-
-
  static void target_release_saved_req(struct ptlrpc_request *req)
  {
          if (req->rq_svcsec)
@@ -861,7 +875,6 @@ static void target_release_saved_req(struct ptlrpc_request *req)
          OBD_FREE(req, sizeof *req);
  }
  
-#ifdef __KERNEL__
  static void target_finish_recovery(struct obd_device *obd)
  {
          struct list_head *tmp, *n;
@@ -917,7 +930,6 @@ static void abort_recovery_queue(struct obd_device *obd)
                  target_release_saved_req(req);
          }
  }
-#endif
  
  /* Called from a cleanup function if the device is being cleaned up
     forcefully.  The exports should all have been disconnected already,
@@ -955,14 +967,13 @@ void target_cleanup_recovery(struct obd_device *obd)
                  list_del(&req->rq_list);
                  LASSERT (req->rq_reply_state == 0);
                  target_release_saved_req(req);
-         }
+        }
  }
  
-#ifdef __KERNEL__
  static void target_abort_recovery(void *data)
  {
          struct obd_device *obd = data;
-                                                                                                                                                                                                     
+
          LASSERT(!obd->obd_recovering);
  
          class_disconnect_stale_exports(obd, 0);
@@ -974,7 +985,6 @@ static void target_abort_recovery(void *data)
          target_finish_recovery(obd);
          ptlrpc_run_recovery_over_upcall(obd);
  }
-#endif
  
  static void target_recovery_expired(unsigned long castmeharder)
  {
@@ -1260,14 +1270,12 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
                  }
          }
  
-        if (!inserted) {
+        if (!inserted)
                  list_add_tail(&req->rq_list, &obd->obd_recovery_queue);
-        }
  
          obd->obd_requests_queued_for_recovery++;
          wake_up(&obd->obd_next_transno_waitq);
          spin_unlock_bh(&obd->obd_processing_task_lock);
-
          return 0;
  }
  
diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c

index 9f863b4..cbabea0 100644 (file)
--- a/lustre/ldlm/ldlm_lock.c
+++ b/lustre/ldlm/ldlm_lock.c
@@ -162,6 +162,7 @@ void ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
          ENTRY;
          l_lock(&lock->l_resource->lr_namespace->ns_lock);
          if (!list_empty(&lock->l_lru)) {
+                LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
                  list_del_init(&lock->l_lru);
                  lock->l_resource->lr_namespace->ns_nr_unused--;
                  LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0);
@@ -443,9 +444,9 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
          if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP))
                  lock->l_writers++;
          lock->l_last_used = jiffies;
-        l_unlock(&lock->l_resource->lr_namespace->ns_lock);
          LDLM_LOCK_GET(lock);
          LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
+        l_unlock(&lock->l_resource->lr_namespace->ns_lock);
  }
  
  void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
@@ -453,9 +454,10 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
          struct ldlm_namespace *ns;
          ENTRY;
  
-        LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
          ns = lock->l_resource->lr_namespace;
+
          l_lock(&ns->ns_lock);
+        LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
          if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
                  LASSERT(lock->l_readers > 0);
                  lock->l_readers--;
@@ -607,7 +609,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
                        policy->l_inodebits.bits))
                          continue;
  
-                if (lock->l_destroyed)
+                if (lock->l_destroyed || (lock->l_flags & LDLM_FL_FAILED))
                          continue;
  
                  if ((flags & LDLM_FL_LOCAL_ONLY) &&
@@ -701,10 +703,15 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
                  ldlm_lock2handle(lock, lockh);
                  if (!(lock->l_flags & LDLM_FL_CAN_MATCH)) {
                          struct l_wait_info lwi;
-                        if (lock->l_completion_ast)
-                                lock->l_completion_ast(lock,
-                                                       LDLM_FL_WAIT_NOREPROC,
-                                                       NULL);
+                        if (lock->l_completion_ast) {
+                                int err = lock->l_completion_ast(lock,
+                                                                 LDLM_FL_WAIT_NOREPROC,
+                                                                 NULL);
+                                if (err) {
+                                        rc = 0;
+                                        goto out2;
+                                }
+                        }
  
                          lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL);
  
@@ -713,20 +720,25 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
                                       (lock->l_flags & LDLM_FL_CAN_MATCH), &lwi);
                  }
          }
-        if (rc)
+
+out2:
+        if (rc) {
+                l_lock(&ns->ns_lock);
                  LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
                             type == LDLM_PLAIN ? res_id->name[2] :
                                  policy->l_extent.start,
                             type == LDLM_PLAIN ? res_id->name[3] :
-                                policy->l_extent.end);
-        else if (!(flags & LDLM_FL_TEST_LOCK)) /* less verbose for test-only */
+                           policy->l_extent.end);
+                l_unlock(&ns->ns_lock);
+        } else if (!(flags & LDLM_FL_TEST_LOCK)) {/* less verbose for test-only */
                  LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
                                    LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
                                    type, mode, res_id->name[0], res_id->name[1],
                                    type == LDLM_PLAIN ? res_id->name[2] :
-                                        policy->l_extent.start,
+                                  policy->l_extent.start,
                                    type == LDLM_PLAIN ? res_id->name[3] :
-                                        policy->l_extent.end);
+                                  policy->l_extent.end);
+        }
  
          if (old_lock)
                  LDLM_LOCK_PUT(old_lock);
@@ -773,6 +785,7 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
          lock->l_blocking_ast = blocking;
          lock->l_completion_ast = completion;
          lock->l_glimpse_ast = glimpse;
+        lock->l_pid = current->pid;
  
          if (lvb_len) {
                  lock->l_lvb_len = lvb_len;
@@ -1039,15 +1052,12 @@ void ldlm_lock_cancel(struct ldlm_lock *lock)
          struct ldlm_namespace *ns;
          ENTRY;
  
-        /* There's no race between calling this and taking the ns lock below;
-         * a lock can only be put on the waiting list once, because it can only
-         * issue a blocking AST once. */
-        ldlm_del_waiting_lock(lock);
-
          res = lock->l_resource;
          ns = res->lr_namespace;
  
          l_lock(&ns->ns_lock);
+        ldlm_del_waiting_lock(lock);
+
          /* Please do not, no matter how tempting, remove this LBUG without
           * talking to me first. -phik */
          if (lock->l_readers || lock->l_writers) {
@@ -1112,7 +1122,7 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
                  *flags |= LDLM_FL_BLOCK_GRANTED;
                  RETURN(lock->l_resource);
          }
-                                                                                                                                                                                                     
+
          LASSERTF(new_mode == LCK_PW && lock->l_granted_mode == LCK_PR,
                   "new_mode %u, granted %u\n", new_mode, lock->l_granted_mode);
  
@@ -1181,9 +1191,9 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
                  return;
          }
  
-        CDEBUG(level, "  -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d)\n",
+        CDEBUG(level, "  -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d) (pid: %d)\n",
                 lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
-               pos);
+               pos, lock->l_pid);
          if (lock->l_conn_export != NULL)
                  obd = lock->l_conn_export->exp_obd;
          if (lock->l_export && lock->l_export->exp_connection) {
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index 5999373..0a5d6a1 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -176,6 +176,9 @@ static void waiting_locks_callback(unsigned long unused)
          struct ldlm_lock *lock;
          char str[PTL_NALFMT_SIZE];
  
+        if (obd_dump_on_timeout)
+                portals_debug_dumplog();
+
          spin_lock_bh(&waiting_locks_spinlock);
          while (!list_empty(&waiting_locks_list)) {
                  lock = list_entry(waiting_locks_list.next, struct ldlm_lock,
@@ -314,6 +317,9 @@ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,const char *ast_type)
                     " (%s)", ast_type, rc, lock->l_export->exp_client_uuid.uuid,
                     conn->c_remote_uuid.uuid, conn->c_peer.peer_id.nid,
                     ptlrpc_peernid2str(&conn->c_peer, str));
+
+        if (obd_dump_on_timeout)
+                portals_debug_dumplog();
          ptlrpc_fail_export(lock->l_export);
  }
  
@@ -333,7 +339,9 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock,
                          ldlm_lock_cancel(lock);
                          rc = -ERESTART;
                  } else {
+                        l_lock(&lock->l_resource->lr_namespace->ns_lock);
                          ldlm_del_waiting_lock(lock);
+                        l_unlock(&lock->l_resource->lr_namespace->ns_lock);
                          ldlm_failed_ast(lock, rc, ast_type);
                  }
          } else if (rc) {
@@ -376,8 +384,8 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
          if (lock->l_granted_mode != lock->l_req_mode) {
                  /* this blocking AST will be communicated as part of the
                   * completion AST instead */
+                LDLM_DEBUG(lock, "lock not granted, not sending blocking AST");
                  l_unlock(&lock->l_resource->lr_namespace->ns_lock);
-                LDLM_DEBUG(lock, "lock not granted, not sending blocking AST");                 
                  RETURN(0);
          }
  
@@ -527,7 +535,9 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
                 sizeof(body->lock_handle1));
          ldlm_lock2desc(lock, &body->lock_desc);
  
+       down(&lock->l_resource->lr_lvb_sem);
          size = lock->l_resource->lr_lvb_len;
+       up(&lock->l_resource->lr_lvb_sem);
          req->rq_replen = lustre_msg_size(1, &size);
  
          req->rq_send_state = LUSTRE_IMP_FULL;
@@ -545,6 +555,27 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
          RETURN(rc);
  }
  
+static struct ldlm_lock *
+find_existing_lock(struct obd_export *exp, struct lustre_handle *remote_hdl)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct list_head *iter;
+
+        l_lock(&obd->obd_namespace->ns_lock);
+        list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) {
+                struct ldlm_lock *lock;
+                lock = list_entry(iter, struct ldlm_lock, l_export_chain);
+                if (lock->l_remote_handle.cookie == remote_hdl->cookie) {
+                        LDLM_LOCK_GET(lock);
+                        l_unlock(&obd->obd_namespace->ns_lock);
+                        return lock;
+                }
+        }
+        l_unlock(&obd->obd_namespace->ns_lock);
+        return NULL;
+}
+
+
  int ldlm_handle_enqueue(struct ptlrpc_request *req,
                          ldlm_completion_callback completion_callback,
                          ldlm_blocking_callback blocking_callback,
@@ -572,6 +603,18 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req,
  
          flags = dlm_req->lock_flags;
  
+        LASSERT(req->rq_export);
+
+        if (flags & LDLM_FL_REPLAY) {
+                lock = find_existing_lock(req->rq_export,
+                                          &dlm_req->lock_handle1);
+                if (lock != NULL) {
+                        DEBUG_REQ(D_HA, req, "found existing lock cookie "LPX64,
+                                  lock->l_handle.h_cookie);
+                        GOTO(existing_lock, rc = 0);
+                }
+        }
+
          /* The lock's callback data might be set in the policy function */
          lock = ldlm_lock_create(obddev->obd_namespace, &dlm_req->lock_handle2,
                                  dlm_req->lock_desc.l_resource.lr_name,
@@ -601,6 +644,8 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req,
                   &lock->l_export->exp_ldlm_data.led_held_locks);
          l_unlock(&lock->l_resource->lr_namespace->ns_lock);
  
+existing_lock:
+
          if (flags & LDLM_FL_HAS_INTENT) {
                  /* In this case, the reply buffer is allocated deep in
                   * local_lock_enqueue by the policy function. */
@@ -661,8 +706,10 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req,
          /* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this
           * ldlm_reprocess_all.  If this moves, revisit that code. -phil */
          if (lock) {
+                l_lock(&lock->l_resource->lr_namespace->ns_lock);
                  LDLM_DEBUG(lock, "server-side enqueue handler, sending reply"
                             "(err=%d, rc=%d)", err, rc);
+                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
  
                  if (rc == 0) {
                          down(&lock->l_resource->lr_lvb_sem);
@@ -679,7 +726,6 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req,
                          up(&lock->l_resource->lr_lvb_sem);
                  } else {
                          ldlm_lock_destroy(lock);
-
                  }
  
                  if (!err && dlm_req->lock_desc.l_resource.lr_type != LDLM_FLOCK)
@@ -720,14 +766,18 @@ int ldlm_handle_convert(struct ptlrpc_request *req)
                  req->rq_status = EINVAL;
          } else {
                  void *res = NULL;
-                
+
+                l_lock(&lock->l_resource->lr_namespace->ns_lock);
                  LDLM_DEBUG(lock, "server-side convert handler START");
+                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+
                  res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode,
                                          &dlm_rep->lock_flags);
-
                  if (res) {
+                        l_lock(&lock->l_resource->lr_namespace->ns_lock);
                          if (ldlm_del_waiting_lock(lock))
                                  CDEBUG(D_DLMTRACE, "converted waiting lock %p\n", lock);
+                        l_unlock(&lock->l_resource->lr_namespace->ns_lock);
                          req->rq_status = 0;
                  } else {
                          req->rq_status = EDEADLOCK;
@@ -735,8 +785,11 @@ int ldlm_handle_convert(struct ptlrpc_request *req)
          }
  
          if (lock) {
-                ldlm_reprocess_all(lock->l_resource);
+                if (!req->rq_status)
+                        ldlm_reprocess_all(lock->l_resource);
+                l_lock(&lock->l_resource->lr_namespace->ns_lock);
                  LDLM_DEBUG(lock, "server-side convert handler END");
+                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
                  LDLM_LOCK_PUT(lock);
          } else
                  LDLM_DEBUG_NOLOCK("server-side convert handler END");
@@ -749,7 +802,6 @@ int ldlm_handle_cancel(struct ptlrpc_request *req)
          struct ldlm_request *dlm_req;
          struct ldlm_lock *lock;
          struct ldlm_resource *res;
-        char str[PTL_NALFMT_SIZE];
          int rc;
          ENTRY;
  
@@ -769,10 +821,10 @@ int ldlm_handle_cancel(struct ptlrpc_request *req)
          lock = ldlm_handle2lock(&dlm_req->lock_handle1);
          if (!lock) {
                  CERROR("received cancel for unknown lock cookie "LPX64
-                       " from client %s nid %s\n",
+                       " from client %s id %s\n",
                         dlm_req->lock_handle1.cookie,
                         req->rq_export->exp_client_uuid.uuid,
-                       ptlrpc_peernid2str(&req->rq_peer, str));
+                       req->rq_peerstr);
                  LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock "
                                    "(cookie "LPU64")",
                                    dlm_req->lock_handle1.cookie);
@@ -787,9 +839,11 @@ int ldlm_handle_cancel(struct ptlrpc_request *req)
                                  //(res, req->rq_reqmsg, 1);
                  }
  
+                l_lock(&res->lr_namespace->ns_lock);
                  ldlm_lock_cancel(lock);
                  if (ldlm_del_waiting_lock(lock))
                          CDEBUG(D_DLMTRACE, "cancelled waiting lock %p\n", lock);
+                l_unlock(&res->lr_namespace->ns_lock);
                  req->rq_status = rc;
          }
  
@@ -798,7 +852,9 @@ int ldlm_handle_cancel(struct ptlrpc_request *req)
  
          if (lock) {
                  ldlm_reprocess_all(lock->l_resource);
+                l_lock(&lock->l_resource->lr_namespace->ns_lock);
                  LDLM_DEBUG(lock, "server-side cancel handler END");
+                l_unlock(&lock->l_resource->lr_namespace->ns_lock);
                  LDLM_LOCK_PUT(lock);
          }
  
@@ -1023,7 +1079,6 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
          struct ldlm_namespace *ns;
          struct ldlm_request *dlm_req;
          struct ldlm_lock *lock;
-        char str[PTL_NALFMT_SIZE];
          int rc;
          ENTRY;
  
@@ -1041,11 +1096,11 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
          if (req->rq_export == NULL) {
                  struct ldlm_request *dlm_req;
  
-                CDEBUG(D_RPCTRACE, "operation %d from nid %s with bad "
+                CDEBUG(D_RPCTRACE, "operation %d from %s with bad "
                         "export cookie "LPX64"; this is "
                         "normal if this node rebooted with a lock held\n",
                         req->rq_reqmsg->opc,
-                       ptlrpc_peernid2str(&req->rq_peer, str),
+                       req->rq_peerstr,
                         req->rq_reqmsg->handle.cookie);
                  dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req),
                                               lustre_swab_ldlm_request);
@@ -1179,11 +1234,9 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req)
  
          if (req->rq_export == NULL) {
                  struct ldlm_request *dlm_req;
-                char str[PTL_NALFMT_SIZE];
-                CERROR("operation %d with bad export from NID %s\n",
+                CERROR("operation %d with bad export from %s\n",
                         req->rq_reqmsg->opc,
-                       ptlrpc_peernid2str(&req->rq_peer, str));
-
+                       req->rq_peerstr);
                  CERROR("--> export cookie: "LPX64"\n",
                         req->rq_reqmsg->handle.cookie);
                  dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req),
@@ -1335,7 +1388,7 @@ static int ldlm_setup(void)
          ldlm_state->ldlm_cb_service =
                  ptlrpc_init_svc(LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE,
                                  LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
-                                ldlm_callback_handler, "ldlm_cbd",
+                                1500, ldlm_callback_handler, "ldlm_cbd",
                                  ldlm_svc_proc_dir);
  
          if (!ldlm_state->ldlm_cb_service) {
@@ -1346,7 +1399,7 @@ static int ldlm_setup(void)
          ldlm_state->ldlm_cancel_service =
                  ptlrpc_init_svc(LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE,
                                  LDLM_CANCEL_REQUEST_PORTAL,
-                                LDLM_CANCEL_REPLY_PORTAL,
+                                LDLM_CANCEL_REPLY_PORTAL, 30000,
                                  ldlm_cancel_handler, "ldlm_canceld",
                                  ldlm_svc_proc_dir);
  
@@ -1439,7 +1492,7 @@ static int ldlm_cleanup(int force)
  
          if (!list_empty(&ldlm_namespace_list)) {
                  CERROR("ldlm still has namespaces; clean these up first.\n");
-                ldlm_dump_all_namespaces();
+                ldlm_dump_all_namespaces(D_DLMTRACE);
                  RETURN(-EBUSY);
          }
  
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index 14e0610..094ae03 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -48,15 +48,17 @@ int ldlm_expired_completion_wait(void *data)
          struct obd_device *obd;
  
          if (lock->l_conn_export == NULL) {
-                static unsigned long next_dump = 0;
+                static unsigned long next_dump = 0, last_dump = 0;
  
                  LDLM_ERROR(lock, "lock timed out; not entering recovery in "
                             "server code, just going back to sleep");
                  if (time_after(jiffies, next_dump)) {
-                        ldlm_namespace_dump(lock->l_resource->lr_namespace);
-                        if (next_dump == 0)
-                                portals_debug_dumplog();
+                        last_dump = next_dump;
                          next_dump = jiffies + 300 * HZ;
+                        ldlm_namespace_dump(D_DLMTRACE,
+                                            lock->l_resource->lr_namespace);
+                        if (last_dump == 0)
+                                portals_debug_dumplog();
                  }
                  RETURN(0);
          }
@@ -213,6 +215,12 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
          l_unlock(&ns->ns_lock);
  
          ldlm_lock_decref_and_cancel(lockh, mode);
+
+        /* XXX - HACK because we shouldn't call ldlm_lock_destroy()
+         *       from llite/file.c/ll_file_flock(). */
+        if (lock->l_resource->lr_type == LDLM_FLOCK) {
+                ldlm_lock_destroy(lock);
+        }
  }
  
  int ldlm_cli_enqueue(struct obd_export *exp,
@@ -341,11 +349,6 @@ int ldlm_cli_enqueue(struct obd_export *exp,
                  GOTO(cleanup, rc = -EPROTO);
          }
  
-        /* XXX - Phil, wasn't sure if this should go before or after the
-         * lustre_swab_repbuf() ? If we can't unpack the reply then we
-         * don't know what occurred on the server so I think the safest
-         * bet is to cleanup the lock as if it didn't make it ? */
-
          /* lock enqueued on the server */
          cleanup_phase = 1;
  
@@ -416,10 +419,10 @@ int ldlm_cli_enqueue(struct obd_export *exp,
                  rc = ldlm_lock_enqueue(ns, &lock, NULL, flags);
                  if (lock->l_completion_ast != NULL) {
                          int err = lock->l_completion_ast(lock, *flags, NULL);
-                        if (!rc) {
-                                cleanup_phase = 2;
+                        if (!rc)
                                  rc = err;
-                        }
+                        if (rc)
+                                cleanup_phase = 2;
                  }
          }
  
@@ -448,6 +451,8 @@ cleanup:
  static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode,
                                    int *flags)
  {
+        struct ldlm_resource *res;
+        int rc;
          ENTRY;
          if (lock->l_resource->lr_namespace->ns_client) {
                  CERROR("Trying to cancel local lock\n");
@@ -455,23 +460,29 @@ static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode,
          }
          LDLM_DEBUG(lock, "client-side local convert");
  
-        ldlm_lock_convert(lock, new_mode, flags);
-        ldlm_reprocess_all(lock->l_resource);
-
+        res = ldlm_lock_convert(lock, new_mode, flags);
+        if (res) {
+                ldlm_reprocess_all(res);
+                rc = 0;
+        } else {
+                rc = EDEADLOCK;
+        }
          LDLM_DEBUG(lock, "client-side local convert handler END");
          LDLM_LOCK_PUT(lock);
-        RETURN(0);
+        RETURN(rc);
  }
  
  /* FIXME: one of ldlm_cli_convert or the server side should reject attempted
   * conversion of locks which are on the waiting or converting queue */
+/* Caller of this code is supposed to take care of lock readers/writers
+   accounting */
  int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
  {
          struct ldlm_request *body;
          struct ldlm_reply *reply;
          struct ldlm_lock *lock;
          struct ldlm_resource *res;
-        struct ptlrpc_request *req;
+        struct ptlrpc_request *req = NULL;
          int rc, size = sizeof(*body);
          ENTRY;
  
@@ -513,13 +524,23 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
                  GOTO (out, rc = -EPROTO);
          }
  
+        if (req->rq_status)
+                GOTO(out, rc = req->rq_status);
+
          res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
-        if (res != NULL)
+        if (res != NULL) {
                  ldlm_reprocess_all(res);
-        /* Go to sleep until the lock is granted. */
-        /* FIXME: or cancelled. */
-        if (lock->l_completion_ast)
-                lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL);
+                /* Go to sleep until the lock is granted. */
+                /* FIXME: or cancelled. */
+                if (lock->l_completion_ast) {
+                        rc = lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC,
+                                                    NULL);
+                        if (rc)
+                                GOTO(out, rc);
+                }
+        } else {
+                rc = EDEADLOCK;
+        }
          EXIT;
   out:
          LDLM_LOCK_PUT(lock);
@@ -548,7 +569,7 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
                  /* Set this flag to prevent others from getting new references*/
                  l_lock(&lock->l_resource->lr_namespace->ns_lock);
                  lock->l_flags |= LDLM_FL_CBPENDING;
-                local_only = (lock->l_flags & LDLM_FL_LOCAL_ONLY);
+                local_only = lock->l_flags & LDLM_FL_LOCAL_ONLY;
                  l_unlock(&lock->l_resource->lr_namespace->ns_lock);
                  ldlm_cancel_callback(lock);
  
@@ -902,7 +923,7 @@ void ldlm_change_cbdata(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
          ENTRY;
  
          if (ns == NULL) {
-                CERROR("must pass in namespace");
+                CERROR("must pass in namespace\n");
                  LBUG();
          }
  
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c

index 910fcf1..2051268 100644 (file)
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -33,7 +33,7 @@
  
  kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
  
-spinlock_t ldlm_namespace_lock = SPIN_LOCK_UNLOCKED;
+DECLARE_MUTEX(ldlm_namespace_lock);
  struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list);
  struct proc_dir_entry *ldlm_type_proc_dir = NULL;
  struct proc_dir_entry *ldlm_ns_proc_dir = NULL;
@@ -43,7 +43,7 @@ struct proc_dir_entry *ldlm_svc_proc_dir = NULL;
  static int ldlm_proc_dump_ns(struct file *file, const char *buffer,
                               unsigned long count, void *data)
  {
-        ldlm_dump_all_namespaces();
+        ldlm_dump_all_namespaces(D_DLMTRACE);
          RETURN(count);
  }
  
@@ -252,9 +252,9 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
          ns->ns_nr_unused = 0;
          ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
  
-        spin_lock(&ldlm_namespace_lock);
+        down(&ldlm_namespace_lock);
          list_add(&ns->ns_list_chain, &ldlm_namespace_list);
-        spin_unlock(&ldlm_namespace_lock);
+        up(&ldlm_namespace_lock);
  #ifdef __KERNEL__
          ldlm_proc_namespace(ns);
  #endif
@@ -363,7 +363,7 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags)
                                  CERROR("Resource refcount nonzero (%d) after "
                                         "lock cleanup; forcing cleanup.\n",
                                         atomic_read(&res->lr_refcount));
-                                ldlm_resource_dump(res);
+                                ldlm_resource_dump(D_ERROR, res);
                                  atomic_set(&res->lr_refcount, 1);
                                  ldlm_resource_putref(res);
                          }
@@ -380,10 +380,9 @@ int ldlm_namespace_free(struct ldlm_namespace *ns, int force)
          if (!ns)
                  RETURN(ELDLM_OK);
  
-        spin_lock(&ldlm_namespace_lock);
+        down(&ldlm_namespace_lock);
          list_del(&ns->ns_list_chain);
-
-        spin_unlock(&ldlm_namespace_lock);
+        up(&ldlm_namespace_lock);
  
          /* At shutdown time, don't call the cancellation callback */
          ldlm_namespace_cleanup(ns, 0);
@@ -429,10 +428,9 @@ static struct ldlm_resource *ldlm_resource_new(void)
          struct ldlm_resource *res;
  
          OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res);
-        if (res == NULL) {
-                LBUG();
+        if (res == NULL)
                  return NULL;
-        }
+
          memset(res, 0, sizeof(*res));
  
          INIT_LIST_HEAD(&res->lr_children);
@@ -460,10 +458,8 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
                   "type: %d", type);
  
          res = ldlm_resource_new();
-        if (!res) {
-                LBUG();
+        if (!res)
                  RETURN(NULL);
-        }
  
          spin_lock(&ns->ns_counter_lock);
          ns->ns_resources++;
@@ -520,11 +516,13 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
                  }
          }
  
-        if (create)
+        if (create) {
                  res = ldlm_resource_add(ns, parent, name, type);
-        else
+                if (res == NULL)
+                        GOTO(out, NULL);
+        } else {
                  res = NULL;
-
+        }
  
          if (create && ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
                  int rc;
@@ -542,6 +540,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
                          CERROR("lvbo_init failed for resource "LPU64": rc %d\n",
                                 name.name[0], rc);
          } else {
+out:
                  l_unlock(&ns->ns_lock);
          }
  
@@ -582,22 +581,22 @@ int ldlm_resource_putref(struct ldlm_resource *res)
                  }
  
                  if (!list_empty(&res->lr_granted)) {
-                        ldlm_resource_dump(res);
+                        ldlm_resource_dump(D_ERROR, res);
                          LBUG();
                  }
  
                  if (!list_empty(&res->lr_converting)) {
-                        ldlm_resource_dump(res);
+                        ldlm_resource_dump(D_ERROR, res);
                          LBUG();
                  }
  
                  if (!list_empty(&res->lr_waiting)) {
-                        ldlm_resource_dump(res);
+                        ldlm_resource_dump(D_ERROR, res);
                          LBUG();
                  }
  
                  if (!list_empty(&res->lr_children)) {
-                        ldlm_resource_dump(res);
+                        ldlm_resource_dump(D_ERROR, res);
                          LBUG();
                  }
  
@@ -628,7 +627,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
  {
          l_lock(&res->lr_namespace->ns_lock);
  
-        ldlm_resource_dump(res);
+        ldlm_resource_dump(D_OTHER, res);
          CDEBUG(D_OTHER, "About to add this lock:\n");
          ldlm_lock_dump(D_OTHER, lock, 0);
  
@@ -651,7 +650,7 @@ void ldlm_resource_insert_lock_after(struct ldlm_lock *original,
  
          l_lock(&res->lr_namespace->ns_lock);
  
-        ldlm_resource_dump(res);
+        ldlm_resource_dump(D_OTHER, res);
          CDEBUG(D_OTHER, "About to insert this lock after %p:\n", original);
          ldlm_lock_dump(D_OTHER, new, 0);
  
@@ -680,44 +679,44 @@ void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
          memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name));
  }
  
-void ldlm_dump_all_namespaces(void)
+void ldlm_dump_all_namespaces(int level)
  {
          struct list_head *tmp;
  
-        spin_lock(&ldlm_namespace_lock);
+        down(&ldlm_namespace_lock);
  
          list_for_each(tmp, &ldlm_namespace_list) {
                  struct ldlm_namespace *ns;
                  ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain);
-                ldlm_namespace_dump(ns);
+                ldlm_namespace_dump(level, ns);
          }
  
-        spin_unlock(&ldlm_namespace_lock);
+        up(&ldlm_namespace_lock);
  }
  
-void ldlm_namespace_dump(struct ldlm_namespace *ns)
+void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
  {
          struct list_head *tmp;
-        unsigned int debug_save = portal_debug;
  
-        portal_debug |= D_OTHER;
-        l_lock(&ns->ns_lock);
-        CDEBUG(D_OTHER, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name,
-               ns->ns_refcount, ns->ns_client);
+        CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n",
+               ns->ns_name, ns->ns_refcount, ns->ns_client);
  
-        list_for_each(tmp, &ns->ns_root_list) {
-                struct ldlm_resource *res;
-                res = list_entry(tmp, struct ldlm_resource, lr_childof);
+        l_lock(&ns->ns_lock);
+        if (time_after(jiffies, ns->ns_next_dump)) {
+                list_for_each(tmp, &ns->ns_root_list) {
+                        struct ldlm_resource *res;
+                        res = list_entry(tmp, struct ldlm_resource, lr_childof);
  
-                /* Once we have resources with children, this should really dump
-                 * them recursively. */
-                ldlm_resource_dump(res);
+                        /* Once we have resources with children, this should
+                         * really dump them recursively. */
+                        ldlm_resource_dump(level, res);
+                }
+                ns->ns_next_dump = jiffies + 10 * HZ;
          }
          l_unlock(&ns->ns_lock);
-        portal_debug = debug_save;
  }
  
-void ldlm_resource_dump(struct ldlm_resource *res)
+void ldlm_resource_dump(int level, struct ldlm_resource *res)
  {
          struct list_head *tmp;
          int pos;
@@ -725,36 +724,36 @@ void ldlm_resource_dump(struct ldlm_resource *res)
          if (RES_NAME_SIZE != 4)
                  LBUG();
  
-        CDEBUG(D_OTHER, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
+        CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
                 ") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1],
                 res->lr_name.name[2], res->lr_name.name[3],
                 atomic_read(&res->lr_refcount));
  
          if (!list_empty(&res->lr_granted)) {
                  pos = 0;
-                CDEBUG(D_OTHER, "Granted locks:\n");
+                CDEBUG(level, "Granted locks:\n");
                  list_for_each(tmp, &res->lr_granted) {
                          struct ldlm_lock *lock;
                          lock = list_entry(tmp, struct ldlm_lock, l_res_link);
-                        ldlm_lock_dump(D_OTHER, lock, ++pos);
+                        ldlm_lock_dump(level, lock, ++pos);
                  }
          }
          if (!list_empty(&res->lr_converting)) {
                  pos = 0;
-                CDEBUG(D_OTHER, "Converting locks:\n");
+                CDEBUG(level, "Converting locks:\n");
                  list_for_each(tmp, &res->lr_converting) {
                          struct ldlm_lock *lock;
                          lock = list_entry(tmp, struct ldlm_lock, l_res_link);
-                        ldlm_lock_dump(D_OTHER, lock, ++pos);
+                        ldlm_lock_dump(level, lock, ++pos);
                  }
          }
          if (!list_empty(&res->lr_waiting)) {
                  pos = 0;
-                CDEBUG(D_OTHER, "Waiting locks:\n");
+                CDEBUG(level, "Waiting locks:\n");
                  list_for_each(tmp, &res->lr_waiting) {
                          struct ldlm_lock *lock;
                          lock = list_entry(tmp, struct ldlm_lock, l_res_link);
-                        ldlm_lock_dump(D_OTHER, lock, ++pos);
+                        ldlm_lock_dump(level, lock, ++pos);
                  }
          }
  }
diff --git a/lustre/ldlm/ldlm_test.c b/lustre/ldlm/ldlm_test.c

index 5381b5b..7a5e066 100644 (file)
--- a/lustre/ldlm/ldlm_test.c
+++ b/lustre/ldlm/ldlm_test.c
@@ -218,8 +218,7 @@ int ldlm_test_extents(struct obd_device *obddev)
                  LBUG();
  
          flags = 0;
-        lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,
-                                 0);
+        lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0);
          if (lock1 == NULL)
                  LBUG();
          err = ldlm_lock_enqueue(ns, lock1, &ext1, sizeof(ext1), &flags, NULL,
@@ -230,8 +229,7 @@ int ldlm_test_extents(struct obd_device *obddev)
                  LBUG();
  
          flags = 0;
-        lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR,
-                                NULL, 0);
+        lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0);
          err = ldlm_lock_enqueue(ns, lock2, &ext2, sizeof(ext2), &flags, NULL,
                                  NULL);
          if (err != ELDLM_OK)
diff --git a/lustre/liblustre/Makefile.am b/lustre/liblustre/Makefile.am

index 860d2ec..44fd10d 100644 (file)
--- a/lustre/liblustre/Makefile.am
+++ b/lustre/liblustre/Makefile.am
@@ -7,9 +7,9 @@ AM_CPPFLAGS = $(HAVE_EFENCE) -I$(SYSIO)/include -D_LARGEFILE64_SOURCE=1 \
                $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals
  AM_CFLAGS = $(LLCFLAGS)
  
-LIBS = $(LIBEFENCE)
+AM_LIBS = $(LIBEFENCE)
  
-LUSTRE_LIBS = liblutils.a libllite.a \
+LUSTRE_LIBS = libllite.a \
                $(top_builddir)/lustre/lov/liblov.a \
                $(top_builddir)/lustre/obdecho/libobdecho.a \
                $(top_builddir)/lustre/osc/libosc.a \
@@ -25,14 +25,14 @@ PTL_LIBS =    $(top_builddir)/portals/utils/libuptlctl.a \
                $(top_builddir)/portals/portals/libportals.a
  else
  PTL_LIBS =    $(top_builddir)/portals/utils/libuptlctl.a \
-              $(CRAY_PORTALS_PATH)/lib_TV/snos64/libportals.a
+              $(CRAY_PORTALS_LIBS)/libportals.a
  endif
  
  SYSIO_LIBS =  $(SYSIO)/lib/libsysio.a
  
  if LIBLUSTRE
  lib_LIBRARIES = liblustre.a
-noinst_LIBRARIES = liblutils.a libllite.a
+noinst_LIBRARIES = libllite.a
  
  install-exec-hook: liblustre.so
         @$(NORMAL_INSTALL)
@@ -48,16 +48,14 @@ else
  install-exec-hook:
  endif
  
-liblutils_a_SOURCES = lutil.c lutil.h
-
  libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \
-                    llite_lib.h
+                    lutil.c lutil.h llite_lib.h
  
  # for make rpms -- need cleanup
  liblustre_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c
  
  liblustre.a : $(LUSTRE_LIBS) $(PTL_LIBS) $(SYSIO_LIBS)
-       sh $(srcdir)/genlib.sh $(SYSIO) $(CRAY_PORTALS_PATH)
+       sh $(srcdir)/genlib.sh "$(SYSIO)" "$(CRAY_PORTALS_LIBS)" "$(LIBS)"
  
  EXTRA_DIST = genlib.sh
  
diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c

index 7e1d7dd..f685beb 100644 (file)
--- a/lustre/liblustre/dir.c
+++ b/lustre/liblustre/dir.c
@@ -33,16 +33,31 @@
  #include <sys/fcntl.h>
  #include <sys/queue.h>
  
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
  #include <sysio.h>
  #include <fs.h>
  #include <mount.h>
  #include <inode.h>
+#ifdef HAVE_FILE_H
  #include <file.h>
+#endif
  
  #undef LIST_HEAD
  
+#ifdef HAVE_LINUX_TYPES_H
  #include <linux/types.h>
+#elif defined(HAVE_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_LINUX_UNISTD_H
  #include <linux/unistd.h>
+#elif defined(HAVE_UNISTD_H)
+#include <unistd.h>
+#endif
+
  #include <dirent.h>
  
  #include "llite_lib.h"
diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c

index e393198..b2fa8f4 100644 (file)
--- a/lustre/liblustre/file.c
+++ b/lustre/liblustre/file.c
@@ -31,12 +31,16 @@
  #include <sys/queue.h>
  #include <fcntl.h>
  
+#ifdef HAVE_XTIO_H
  #include <xtio.h>
+#endif
  #include <sysio.h>
  #include <fs.h>
  #include <mount.h>
  #include <inode.h>
+#ifdef HAVE_FILE_H
  #include <file.h>
+#endif
  
  #undef LIST_HEAD
  
diff --git a/lustre/liblustre/genlib.sh b/lustre/liblustre/genlib.sh

index 04e27fe..f5c489e 100755 (executable)
--- a/lustre/liblustre/genlib.sh
+++ b/lustre/liblustre/genlib.sh
@@ -17,7 +17,8 @@ RANLIB=/usr/bin/ranlib
  CWD=`pwd`
  
  SYSIO=$1
-CRAY_PORTALS_PATH=$2
+CRAY_PORTALS_LIBS=$2
+LIBS=$3
  
  if [ ! -f $SYSIO/lib/libsysio.a ]; then
    echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist"
@@ -52,7 +53,7 @@ build_sysio_obj_list() {
    _objs=`$AR -t $1`
    mkdir -p $sysio_tmp
    cd $sysio_tmp
-  $AR -x $1
+  $AR -x ../$1
    cd ..
    for _lib in $_objs; do
      ALL_OBJS=$ALL_OBJS"$sysio_tmp/$_lib ";
@@ -76,7 +77,7 @@ build_cray_portals_obj_list() {
  }
  
  # lustre components libs
-build_obj_list . liblutils.a
+build_obj_list . libllite.a
  build_obj_list ../lov liblov.a
  build_obj_list ../obdecho libobdecho.a
  build_obj_list ../osc libosc.a
@@ -101,11 +102,10 @@ rm -f $CWD/liblsupport.a
  $AR -cru $CWD/liblsupport.a $ALL_OBJS
  $RANLIB $CWD/liblsupport.a
  
-# libllite should be at the beginning of obj list
-prepend_obj_list . libllite.a
-
-# libsysio
-build_sysio_obj_list $SYSIO/lib/libsysio.a
+# if libsysio is already in our LIBS we don't need to link against it here
+if $(echo "$LIBS" | grep -v -- "-lsysio" >/dev/null) ; then
+  build_sysio_obj_list $SYSIO/lib/libsysio.a
+fi
  
  # create static lib lustre
  rm -f $CWD/liblustre.a
diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c

index bf006fa..395eb3f 100644 (file)
--- a/lustre/liblustre/llite_lib.c
+++ b/lustre/liblustre/llite_lib.c
@@ -28,11 +28,25 @@
  #include <sys/types.h>
  #include <sys/queue.h>
  
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
  #include <sysio.h>
  #include <fs.h>
  #include <mount.h>
  #include <inode.h>
+#ifdef HAVE_FILE_H
  #include <file.h>
+#endif
+
+/* env variables */
+#define ENV_LUSTRE_MNTPNT               "LIBLUSTRE_MOUNT_POINT"
+#define ENV_LUSTRE_MNTTGT               "LIBLUSTRE_MOUNT_TARGET"
+#define ENV_LUSTRE_TIMEOUT              "LIBLUSTRE_TIMEOUT"
+#define ENV_LUSTRE_DUMPFILE             "LIBLUSTRE_DUMPFILE"
+#define ENV_LUSTRE_DEBUG_MASK           "LIBLUSTRE_DEBUG_MASK"
+#define ENV_LUSTRE_DEBUG_SUBSYS         "LIBLUSTRE_DEBUG_SUBSYS"
+#define ENV_LUSTRE_NAL_NAME             "LIBLUSTRE_NAL_NAME"
  
  #ifdef REDSTORM
  #define CSTART_INIT
@@ -84,6 +98,7 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
          struct llog_ctxt *ctxt;
          ptl_nid_t nid = 0;
          int nal, err, rc = 0;
+        char *nal_name;
          ENTRY;
  
          generate_random_uuid(uuid);
@@ -94,9 +109,12 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
                  RETURN(-EINVAL);
          }
  
-        nal = ptl_name2nal(LIBLUSTRE_NAL_NAME);
+        nal_name = getenv(ENV_LUSTRE_NAL_NAME);
+        if (!nal_name)
+                nal_name = "tcp";
+        nal = ptl_name2nal(nal_name);
          if (nal <= 0) {
-                CERROR("Can't parse NAL %s\n", LIBLUSTRE_NAL_NAME);
+                CERROR("Can't parse NAL %s\n", nal_name);
                  RETURN(-EINVAL);
          }
          LCFG_INIT(lcfg, LCFG_ADD_UUID, name);
@@ -125,7 +143,7 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
          err = class_process_config(&lcfg);
          if (err < 0)
                  GOTO(out_detach, err);
-        
+
          obd = class_name2obd(name);
          if (obd == NULL)
                  GOTO(out_cleanup, err = -EINVAL);
@@ -141,9 +159,9 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
                          g_zconf_mdsname, err);
                  GOTO(out_cleanup, err);
          }
-        
+
          exp = class_conn2export(&mdc_conn);
-        
+
          ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT];
          rc = class_config_process_llog(ctxt, g_zconf_profile, cfg);
          if (rc)
@@ -172,7 +190,7 @@ out_del_uuid:
  out:
          if (rc == 0)
                  rc = err;
-        
+
          RETURN(rc);
  }
  
@@ -189,7 +207,7 @@ int ll_parse_mount_target(const char *target, char **mdsnid,
          if ((s = strchr(buf, ':'))) {
                  *mdsnid = buf;
                  *s = '\0';
-                                                                                                                        
+
                  while (*++s == '/')
                          ;
                  *mdsname = s;
@@ -213,7 +231,7 @@ int ll_parse_mount_target(const char *target, char **mdsnid,
   * or in the apps themselves.  The NAMESPACE_STRING specifying
   * the initial set of fs ops (creates, mounts, etc.) is passed
   * as an environment variable.
- * 
+ *
   *      _sysio_init();
   *      _sysio_incore_init();
   *      _sysio_native_init();
@@ -242,22 +260,14 @@ int _sysio_lustre_init(void)
          err = lllib_init();
          if (err) {
                  perror("init llite driver");
-        }       
+        }
          return err;
  }
  
-/* env variables */
-#define ENV_LUSTRE_MNTPNT               "LIBLUSTRE_MOUNT_POINT"
-#define ENV_LUSTRE_MNTTGT               "LIBLUSTRE_MOUNT_TARGET"
-#define ENV_LUSTRE_TIMEOUT              "LIBLUSTRE_TIMEOUT"
-#define ENV_LUSTRE_DUMPFILE             "LIBLUSTRE_DUMPFILE"
-#define ENV_LUSTRE_DEBUG_MASK           "LIBLUSTRE_DEBUG_MASK"
-#define ENV_LUSTRE_DEBUG_SUBSYS         "LIBLUSTRE_DEBUG_SUBSYS"
-
  extern int _sysio_native_init();
  extern unsigned int obd_timeout;
  
-static char *lustre_path = NULL;
+char *lustre_path = NULL;
  
  /* global variables */
  char   *g_zconf_mdsname = NULL; /* mdsname, for zeroconf */
@@ -319,30 +329,31 @@ void __liblustre_setup_(void)
                  portal_subsystem_debug =
                                  (unsigned int) strtol(debug_subsys, NULL, 0);
  
-#ifndef CSTART_INIT
+
+#ifdef INIT_SYSIO
          /* initialize libsysio & mount rootfs */
-       if (_sysio_init()) {
-               perror("init sysio");
-               exit(1);
-       }
+        if (_sysio_init()) {
+                perror("init sysio");
+                exit(1);
+        }
          _sysio_native_init();
  
-       err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
-       if (err) {
-               perror(root_driver);
-               exit(1);
-       }
+        err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
+        if (err) {
+                perror(root_driver);
+                exit(1);
+        }
  
          if (_sysio_lustre_init())
-               exit(1);
-#endif
+                exit(1);
+#endif /* INIT_SYSIO */
  
          err = mount("/", lustre_path, lustre_driver, mntflgs, NULL);
-       if (err) {
-               errno = -err;
-               perror(lustre_driver);
-               exit(1);
-       }
+        if (err) {
+                errno = -err;
+                perror(lustre_driver);
+                exit(1);
+        }
  }
  
  void __liblustre_cleanup_(void)
@@ -362,7 +373,15 @@ void __liblustre_cleanup_(void)
           * liblutre. this delima lead to another hack in
           * libsysio/src/file_hack.c FIXME
           */
+#ifdef INIT_SYSIO
          _sysio_shutdown();
          cleanup_lib_portals();
          PtlFini();
+#else
+       /*
+        * don't do any libsysio or low level portals cleanups
+        * platform framework does it
+        */
+       cleanup_lib_portals();
+#endif
  }
diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h

index 9a3b6cc..0c457a3 100644 (file)
--- a/lustre/liblustre/llite_lib.h
+++ b/lustre/liblustre/llite_lib.h
@@ -42,8 +42,7 @@ struct ll_file_data {
          unsigned long fd_gid;
  };
  
-struct llu_sb_info
-{
+struct llu_sb_info {
          struct obd_uuid         ll_sb_uuid;
          struct obd_export      *ll_md_exp;
          struct obd_export      *ll_dt_exp;
@@ -108,14 +107,11 @@ struct llu_inode_info {
          unsigned long           lli_st_generation;
  };
  
-static inline struct llu_sb_info *llu_fs2sbi(struct filesys *fs)
-{
-        return (struct llu_sb_info*)(fs->fs_private);
-}
+#define llu_fs2sbi(fs) (struct llu_sb_info *)(fs)->fs_private
  
  static inline struct llu_inode_info *llu_i2info(struct inode *inode)
  {
-        return (struct llu_inode_info*)(inode->i_private);
+        return (struct llu_inode_info *)inode->i_private;
  }
  
  static inline struct llu_sb_info *llu_i2sbi(struct inode *inode)
@@ -135,7 +131,7 @@ static inline struct obd_export *llu_i2mdexp(struct inode *inode)
  
  static inline int llu_is_root_inode(struct inode *inode)
  {
-        return (llu_i2info(inode)->lli_id.li_stc.u.e3s.l3s_ino ==
+        return (id_ino(&llu_i2info(inode)->lli_id) ==
                  llu_i2info(inode)->lli_sbi->ll_rootino);
  }
  
@@ -178,10 +174,14 @@ static inline void ll_inode2id(struct lustre_id *id,
  }
  
  typedef int (*intent_finish_cb)(struct ptlrpc_request *,
-                                struct inode *parent, struct pnode *pnode, 
-                                struct lookup_intent *, int offset, obd_id ino);
+                                struct inode *parent, 
+                               struct pnode *pnode, 
+                                struct lookup_intent *, 
+                               int offset, obd_id ino);
+                               
  int llu_intent_lock(struct inode *parent, struct pnode *pnode,
-                    struct lookup_intent *, int flags, intent_finish_cb);
+                    struct lookup_intent *, int flags, 
+                   intent_finish_cb);
  
  static inline __u64 ll_file_maxbytes(struct inode *inode)
  {
@@ -194,18 +194,21 @@ struct mount_option_s
          char *osc_uuid;
  };
  
-#define IS_BAD_PTR(ptr)         \
+#define IS_BAD_PTR(ptr) \
          ((unsigned long)(ptr) == 0 || (unsigned long)(ptr) > -1000UL)
  
  /* llite_lib.c */
  void generate_random_uuid(unsigned char uuid_out[16]);
-int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov);
+
+int liblustre_process_log(struct config_llog_instance *cfg, 
+                         int allow_recov);
+                         
  int ll_parse_mount_target(const char *target, char **mdsnid,
                            char **mdsname, char **profile);
  
-extern char   *g_zconf_mdsnid;
-extern char   *g_zconf_mdsname;
-extern char   *g_zconf_profile;
+extern char *g_zconf_mdsnid;
+extern char *g_zconf_mdsname;
+extern char *g_zconf_profile;
  extern struct mount_option_s mount_option;
  
  /* super.c */
diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c

index 0949b5d..92d9444 100644 (file)
--- a/lustre/liblustre/namei.c
+++ b/lustre/liblustre/namei.c
@@ -32,11 +32,16 @@
  #include <sys/fcntl.h>
  #include <sys/queue.h>
  
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
  #include <sysio.h>
  #include <fs.h>
  #include <mount.h>
  #include <inode.h>
+#ifdef HAVE_FILE_H
  #include <file.h>
+#endif
  
  #undef LIST_HEAD
  
@@ -215,8 +220,8 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it)
          int rc;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%x\n",
-               pb->pb_name.name, it ? it->it_op : 0);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,intent=%x\n",
+               (int)pb->pb_name.len, pb->pb_name.name, it ? it->it_op : 0);
  
          /* We don't want to cache negative dentries, so return 0 immediately.
           * We believe that this is safe, that negative dentries cannot be
diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c

index 5de64a6..c6f4bd0 100644 (file)
--- a/lustre/liblustre/rw.c
+++ b/lustre/liblustre/rw.c
@@ -32,11 +32,15 @@
  #include <fcntl.h>
  #include <sys/uio.h>
  
-#include <fs.h>
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
  #include <sysio.h>
  #include <mount.h>
  #include <inode.h>
+#ifdef HAVE_FILE_H
  #include <file.h>
+#endif
  
  #undef LIST_HEAD
  
@@ -308,14 +312,6 @@ struct ll_async_page {
          struct inode   *llap_inode;
  };
  
-static struct ll_async_page *llap_from_cookie(void *cookie)
-{
-        struct ll_async_page *llap = cookie;
-        if (llap->llap_magic != LLAP_MAGIC)
-                return ERR_PTR(-EINVAL);
-        return llap;
-};
-
  static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
  {
          struct ll_async_page *llap;
@@ -324,12 +320,7 @@ static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
          obd_valid valid_flags;
          ENTRY;
  
-        llap = llap_from_cookie(data);
-        if (IS_ERR(llap)) {
-                EXIT;
-                return;
-        }
-
+        llap = LLAP_FROM_COOKIE(data);
          inode = llap->llap_inode;
          lsm = llu_i2info(inode)->lli_smd;
  
@@ -349,12 +340,7 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
          struct ll_async_page *llap;
          struct page *page;
  
-        llap = llap_from_cookie(data);
-        if (IS_ERR(llap)) {
-                EXIT;
-                return;
-        }
-
+        llap = LLAP_FROM_COOKIE(data);
          llap->llap_queued = 0;
          page = llap->llap_page;
  
@@ -507,9 +493,6 @@ void put_io_group(struct llu_io_group *group)
          OBD_FREE(group, LLU_IO_GROUP_SIZE(group->lig_maxpages));
  }
  
-void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
-                      obd_off size);
-
  static
  ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen,
                          _SYSIO_OFF_T pos, ssize_t len,
@@ -618,7 +601,7 @@ ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen,
                          pos += ret;
                          if (!is_read) {
                                  LASSERT(ret == count);
-                                lov_increase_kms(exp, lsm, pos);
+                                obd_adjust_kms(exp, lsm, pos, 0);
                                  /* file size grow immediately */
                                  if (pos > lli->lli_st_size)
                                          lli->lli_st_size = pos;
diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c

index 1962920..26a9720 100644 (file)
--- a/lustre/liblustre/super.c
+++ b/lustre/liblustre/super.c
@@ -38,11 +38,16 @@
  # include <sys/statfs.h>
  #endif
  
-#include <fs.h>
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
  #include <sysio.h>
  #include <mount.h>
  #include <inode.h>
+#include <fs.h>
+#ifdef HAVE_FILE_H
  #include <file.h>
+#endif
  
  #undef LIST_HEAD
  #include "llite_lib.h"
@@ -84,7 +89,7 @@ static int ll_permission(struct inode *inode, int mask)
  
  static void llu_fsop_gone(struct filesys *fs)
  {
-        struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
+        struct llu_sb_info *sbi = (struct llu_sb_info *)fs->fs_private;
          struct obd_device *obd = class_exp2obd(sbi->ll_md_exp);
          struct lustre_cfg lcfg;
          int next = 0;
@@ -181,8 +186,8 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_valid valid)
          valid &= src->o_valid;
  
          if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
-                CDEBUG(D_INODE, "valid %llx, cur time %lu/%lu, new %lu/%lu\n",
-                       (unsigned long long)src->o_valid, 
+                CDEBUG(D_INODE, "valid "LPX64", cur time %lu/%lu, new %lu/%lu\n",
+                       src->o_valid, 
                         LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime),
                         (long)src->o_mtime, (long)src->o_ctime);
  
@@ -221,8 +226,8 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_valid valid)
          obd_valid newvalid = 0;
  
          if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
-                CDEBUG(D_INODE, "valid %llx, new time %lu/%lu\n",
-                       (unsigned long long)valid, LTIME_S(lli->lli_st_mtime), 
+                CDEBUG(D_INODE, "valid "LPX64", new time %lu/%lu\n",
+                       valid, LTIME_S(lli->lli_st_mtime), 
                         LTIME_S(lli->lli_st_ctime));
  
          if (valid & OBD_MD_FLATIME) {
@@ -944,8 +949,9 @@ static int llu_iop_mknod_raw(struct pnode *pno,
          int err = -EMLINK;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu\n",
-               pno->p_base->pb_name.name, llu_i2info(dir)->lli_st_ino);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu\n",
+               (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
+               llu_i2info(dir)->lli_st_ino);
  
          if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX)
                  RETURN(err);
@@ -1179,8 +1185,8 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
          struct mdc_op_data op_data;
          int err = -EMLINK;
          ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n",
-               name, lli->lli_st_ino, lli->lli_st_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%lu(%p)\n",
+               len, name, lli->lli_st_ino, lli->lli_st_generation, dir);
  
          if (lli->lli_st_nlink >= EXT2_LINK_MAX)
                  RETURN(err);
@@ -1204,8 +1210,8 @@ static int llu_iop_rmdir_raw(struct pnode *pno)
          struct llu_inode_info *lli = llu_i2info(dir);
          int rc;
          ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n",
-               name, lli->lli_st_ino, lli->lli_st_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%lu(%p)\n",
+               len, name, lli->lli_st_ino, lli->lli_st_generation, dir);
  
          llu_prepare_mdc_data(&op_data, dir, NULL, name, len, S_IFDIR);
          rc = mdc_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
@@ -1234,7 +1240,7 @@ static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
                  flags = va_arg(ap, long);
                  flags &= FCNTL_FLMASK;
                  if (flags & FCNTL_FLMASK_INVALID) {
-                        CERROR("liblustre does not support O_NONBLOCK, O_ASYNC, "
+                        CERROR("liblustre don't support O_NONBLOCK, O_ASYNC, "
                                 "and O_DIRECT on file descriptor\n");
                          *rtn = -1;
                          return EINVAL;
@@ -1356,8 +1362,8 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
          if ((md->body->valid &
               (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
              (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) {
-                CERROR("bad md body valid mask 0x%llx\n", 
-                      (unsigned long long)md->body->valid);
+                CERROR("bad md body valid mask 0x"LPX64"\n", 
+                      md->body->valid);
                  LBUG();
                  return ERR_PTR(-EPERM);
          }
@@ -1477,9 +1483,8 @@ llu_fsswop_mount(const char *source,
          }
          obd_set_info(obd->obd_self_export, strlen("async"), "async",
                       sizeof(async), &async);
-#warning "FIXME ASAP!"
  #if 0
-        if (mdc_init_ea_size(obd, lov))
+        if (mdc_init_ea_size(obd, osc))
                  GOTO(out_free, err = -EINVAL);
  #endif
          /* setup mdc */
diff --git a/lustre/liblustre/tests/Makefile.am b/lustre/liblustre/tests/Makefile.am

index ee16557..4b670c6 100644 (file)
--- a/lustre/liblustre/tests/Makefile.am
+++ b/lustre/liblustre/tests/Makefile.am
@@ -2,19 +2,25 @@
  
  AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals
  AM_CFLAGS = $(LLCFLAGS)
-LIBS = $(LIBEFENCE) $(LIBREADLINE)
+AM_LIBS = $(LIBEFENCE) $(LIBREADLINE)
  
  LLIB_EXEC= $(top_builddir)/lustre/liblustre/liblustre.a -lcap -lpthread
  
  if LIBLUSTRE
  noinst_LIBRARIES = libtestcommon.a
-def_tests = echo_test sanity recovery_small replay_single replay_ost_single
+
+if LIBLUSTRE_TESTS
+noinst_PROGRAMS = sanity recovery_small replay_single replay_ost_single
+
+if TESTS
+noinst_PROGRAMS += echo_test
+endif # TESTS
  
  if MPITESTS
-noinst_PROGRAMS = $(def_tests) test_lock_cancel
-else
-noinst_PROGRAMS = $(def_tests)
-endif
+noinst_PROGRAMS += test_lock_cancel
+endif # MPITESTS
+
+endif # LIBLUSTRE_TESTS
  endif # LIBLUSTRE
  
  libtestcommon_a_SOURCES = test_common.c test_common.h
diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c

index 4223fac..3ec12e0 100644 (file)
--- a/lustre/liblustre/tests/echo_test.c
+++ b/lustre/liblustre/tests/echo_test.c
@@ -39,6 +39,7 @@ struct pingcli_args {
          int count;
          int size;
  };
+
  /*      bug #4615       */
  #if 0
  char *portals_id2str(int nal, ptl_process_id_t id, char *str)
diff --git a/lustre/liblustre/tests/replay_single.c b/lustre/liblustre/tests/replay_single.c

index 235d330..9628354 100644 (file)
--- a/lustre/liblustre/tests/replay_single.c
+++ b/lustre/liblustre/tests/replay_single.c
@@ -92,9 +92,11 @@ static void mds_failover()
  
  void t0()
  {
+        char *path="/mnt/lustre/f0";
          ENTRY("empty replay");
          replay_barrier();
          mds_failover();
+        t_check_stat_fail("/mnt/lustre/f0");
          LEAVE();
  }
  
diff --git a/lustre/liblustre/tests/sanity.c b/lustre/liblustre/tests/sanity.c

index 15d16a1..fce471c 100644 (file)
--- a/lustre/liblustre/tests/sanity.c
+++ b/lustre/liblustre/tests/sanity.c
@@ -35,9 +35,13 @@
  #include <signal.h>
  #include <errno.h>
  #include <dirent.h>
+#include <sys/uio.h>
+#include <sys/time.h>
  
  #include "test_common.h"
  
+extern char *lustre_path;
+
  #define ENTRY(str)                                                      \
          do {                                                            \
                  char buf[100];                                          \
@@ -63,8 +67,10 @@
  
  void t1()
  {
-        char *path="/mnt/lustre/test_t1";
+        char path[MAX_PATH_LENGTH] = "";
+
          ENTRY("create/delete");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t1", lustre_path);
  
          t_touch(path);
          t_unlink(path);
@@ -73,8 +79,10 @@ void t1()
  
  void t2()
  {
-        char *path="/mnt/lustre/test_t2";
+        char path[MAX_PATH_LENGTH] = "";
+
          ENTRY("mkdir/rmdir");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t2", lustre_path);
  
          t_mkdir(path);
          t_rmdir(path);
@@ -83,8 +91,10 @@ void t2()
  
  void t3()
  {
-        char *path="/mnt/lustre/test_t3";
+        char path[MAX_PATH_LENGTH] = "";
+
          ENTRY("regular stat");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t3", lustre_path);
  
          t_touch(path);
          t_check_stat(path, NULL);
@@ -94,8 +104,10 @@ void t3()
  
  void t4()
  {
-        char *path="/mnt/lustre/test_t4";
+        char path[MAX_PATH_LENGTH] = "";
+
          ENTRY("dir stat");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t4", lustre_path);
  
          t_mkdir(path);
          t_check_stat(path, NULL);
@@ -105,9 +117,12 @@ void t4()
  
  void t6()
  {
-        char *path="/mnt/lustre/test_t6";
-        char *path2="/mnt/lustre/test_t6_link";
+        char path[MAX_PATH_LENGTH] = "";
+        char path2[MAX_PATH_LENGTH] = "";
+
          ENTRY("symlink");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t6", lustre_path);
+        snprintf(path2, MAX_PATH_LENGTH, "%s/test_t6_link", lustre_path);
  
          t_touch(path);
          t_symlink(path, path2);
@@ -119,9 +134,11 @@ void t6()
  
  void t7()
  {
-        char *path="/mnt/lustre/test_t7";
+        char path[MAX_PATH_LENGTH] = "";
          int rc;
+
          ENTRY("mknod");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t7", lustre_path);
  
          if (geteuid() != 0) {
                  rc = mknod(path, S_IFCHR | 0644, (5<<8 | 4));
@@ -139,8 +156,10 @@ void t7()
  
  void t8()
  {
-        char *path="/mnt/lustre/test_t8";
+        char path[MAX_PATH_LENGTH] = "";
+
          ENTRY("chmod");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t8", lustre_path);
  
          t_touch(path);
          t_chmod_raw(path, 0700);
@@ -151,9 +170,12 @@ void t8()
  
  void t9()
  {
-        char *path="/mnt/lustre/test_t9";
-        char *path2="/mnt/lustre/test_t9_link";
+        char path[MAX_PATH_LENGTH] = "";
+        char path2[MAX_PATH_LENGTH] = "";
+
          ENTRY("hard link");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t9", lustre_path);
+        snprintf(path2, MAX_PATH_LENGTH, "%s/test_t9_link", lustre_path);
  
          t_touch(path);
          t_link(path, path2);
@@ -166,14 +188,22 @@ void t9()
  
  void t10()
  {
-        char *dir1="/mnt/lustre/test_t10_dir1";
-        char *dir2="/mnt/lustre/test_t10_dir2";
-        char *path1="/mnt/lustre/test_t10_reg1";
-        char *path2="/mnt/lustre/test_t10_reg2";
-        char *rename1="/mnt/lustre/test_t10_dir1/rename1";
-        char *rename2="/mnt/lustre/test_t10_dir2/rename2";
-        char *rename3="/mnt/lustre/test_t10_dir2/rename3";
+        char dir1[MAX_PATH_LENGTH] = "";
+        char dir2[MAX_PATH_LENGTH] = "";
+        char path1[MAX_PATH_LENGTH] = "";
+        char path2[MAX_PATH_LENGTH] = "";
+        char rename1[MAX_PATH_LENGTH] = "";
+        char rename2[MAX_PATH_LENGTH] = "";
+        char rename3[MAX_PATH_LENGTH] = "";
+
          ENTRY("rename");
+        snprintf(dir1, MAX_PATH_LENGTH, "%s/test_t10_dir1", lustre_path);
+        snprintf(dir2, MAX_PATH_LENGTH, "%s/test_t10_dir2", lustre_path);
+        snprintf(path1, MAX_PATH_LENGTH, "%s/test_t10_reg1", lustre_path);
+        snprintf(path2, MAX_PATH_LENGTH, "%s/test_t10_reg2", lustre_path);
+        snprintf(rename1, MAX_PATH_LENGTH, "%s/test_t10_dir1/rename1", lustre_path);
+        snprintf(rename2, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename2", lustre_path);
+        snprintf(rename3, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename3", lustre_path);
  
          t_mkdir(dir1);
          t_mkdir(dir2);
@@ -191,7 +221,7 @@ void t10()
  
  void t11()
  {
-        char *base="/mnt/lustre";
+        char *base=lustre_path;
          char path[MAX_PATH_LENGTH], path2[MAX_PATH_LENGTH];
          int i, j, level = 5, nreg = 5;
          ENTRY("deep tree");
@@ -227,10 +257,11 @@ void t11()
  
  void t12()
  {
-        char *dir="/mnt/lustre/test_t12_dir";
+        char dir[MAX_PATH_LENGTH] = "";
          char buf[1024*128];
          int fd;
          ENTRY("empty directory readdir");
+        snprintf(dir, MAX_PATH_LENGTH, "%s/test_t12_dir", lustre_path);
  
          t_mkdir(dir);
          fd = t_opendir(dir);
@@ -242,13 +273,14 @@ void t12()
  
  void t13()
  {
-        char *dir="/mnt/lustre/test_t13_dir/";
+        char dir[MAX_PATH_LENGTH] = "";
          char name[1024];
          char buf[1024];
          const int nfiles = 20;
          char *prefix = "test13_filename_prefix_";
          int fd, i;
          ENTRY("multiple entries directory readdir");
+        snprintf(dir, MAX_PATH_LENGTH, "%s/test_t13_dir/", lustre_path);
  
          t_mkdir(dir);
          printf("Creating %d files...\n", nfiles);
@@ -270,7 +302,7 @@ void t13()
  
  void t14()
  {
-        char *dir="/mnt/lustre/test_t14_dir/";
+        char dir[MAX_PATH_LENGTH] = "";
          char name[1024];
          char buf[1024];
          const int nfiles = 256;
@@ -279,6 +311,7 @@ void t14()
          int fd, i, rc, pos, index;
         loff_t base = 0;
          ENTRY(">1 block(4k) directory readdir");
+        snprintf(dir, MAX_PATH_LENGTH, "%s/test_t14_dir/", lustre_path);
  
          t_mkdir(dir);
          printf("Creating %d files...\n", nfiles);
@@ -328,9 +361,10 @@ iter:
  
  void t15()
  {
-        char *file = "/mnt/lustre/test_t15_file";
+        char file[MAX_PATH_LENGTH] = "";
          int fd;
          ENTRY("open-stat-close");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t15_file", lustre_path);
  
          t_touch(file);
          fd = t_open(file);
@@ -342,9 +376,9 @@ void t15()
  
  void t16()
  {
-        char *file = "/mnt/lustre/test_t16_file";
-        int fd;
+        char file[MAX_PATH_LENGTH] = "";
          ENTRY("small-write-read");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t16_file", lustre_path);
  
          t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaa");
          t_grep(file, "aaaaaaaaaaaaaaaaaaaaaa");
@@ -354,9 +388,10 @@ void t16()
  
  void t17()
  {
-        char *file = "/mnt/lustre/test_t17_file";
+        char file[MAX_PATH_LENGTH] = "";
          int fd;
          ENTRY("open-unlink without close");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t17_file", lustre_path);
  
          fd = open(file, O_WRONLY | O_CREAT, 0666);
          if (fd < 0) {
@@ -369,11 +404,12 @@ void t17()
  
  void t18()
  {
-        char *file = "/mnt/lustre/test_t18_file";
+        char file[MAX_PATH_LENGTH] = "";
          char buf[128];
          int fd, i;
          struct stat statbuf[3];
          ENTRY("write should change mtime/atime");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t18_file", lustre_path);
  
          for (i = 0; i < 3; i++) {
                  fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666);
@@ -390,7 +426,7 @@ void t18()
                          printf("Error stat\n");
                          exit(1);
                  }
-                printf("mtime %ld, ctime %d\n",
+                printf("mtime %lu, ctime %lu\n",
                          statbuf[i].st_atime, statbuf[i].st_mtime);
                  sleep(2);
          }
@@ -403,8 +439,208 @@ void t18()
                  }
          }
          t_unlink(file);
+        LEAVE();
  }
  
+void t19()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd;
+        struct stat statbuf;
+        ENTRY("open(O_TRUNC) should trancate file to 0-length");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t19_file", lustre_path);
+
+        t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+
+        fd = open(file, O_RDWR|O_CREAT|O_TRUNC, (mode_t)0666);
+        if (fd < 0) {
+                printf("error open file: %s\n", strerror(errno));
+                exit(-1);
+        }
+        close(fd);
+        if(stat(file, &statbuf) != 0) {
+                printf("Error stat\n");
+                exit(1);
+        }
+        if (statbuf.st_size != 0) {
+                printf("size %ld is not zero\n", statbuf.st_size);
+                exit(-1);
+        }
+        t_unlink(file);
+        LEAVE();
+}
+
+void t20()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd;
+        struct iovec iov[2];
+        char buf[100];
+        ssize_t ret;
+        ENTRY("trap app's general bad pointer for file i/o");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t20_file", lustre_path);
+
+        fd = open(file, O_RDWR|O_CREAT, (mode_t)0666);
+        if (fd < 0) {
+                printf("error open file: %s\n", strerror(errno));
+                exit(-1);
+        }
+
+        ret = write(fd, NULL, 20);
+        if (ret != -1 || errno != EFAULT) {
+                printf("write 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        ret = write(fd, (void *)-1, 20);
+        if (ret != -1 || errno != EFAULT) {
+                printf("write 2: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        iov[0].iov_base = NULL;
+        iov[0].iov_len = 10;
+        iov[1].iov_base = (void *)-1;
+        iov[1].iov_len = 10;
+        ret = writev(fd, iov, 2);
+        if (ret != -1 || errno != EFAULT) {
+                printf("writev 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        iov[0].iov_base = NULL;
+        iov[0].iov_len = 0;
+        iov[1].iov_base = buf;
+        iov[1].iov_len = sizeof(buf);
+        ret = writev(fd, iov, 2);
+        if (ret != sizeof(buf)) {
+                printf("write 3 ret %ld, error %d\n", ret, errno);
+                exit(1);
+        }
+        lseek(fd, 0, SEEK_SET);
+
+        ret = read(fd, NULL, 20);
+        if (ret != -1 || errno != EFAULT) {
+                printf("read 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        ret = read(fd, (void *)-1, 20);
+        if (ret != -1 || errno != EFAULT) {
+                printf("read 2: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        iov[0].iov_base = NULL;
+        iov[0].iov_len = 10;
+        iov[1].iov_base = (void *)-1;
+        iov[1].iov_len = 10;
+        ret = readv(fd, iov, 2);
+        if (ret != -1 || errno != EFAULT) {
+                printf("readv 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        iov[0].iov_base = NULL;
+        iov[0].iov_len = 0;
+        iov[1].iov_base = buf;
+        iov[1].iov_len = sizeof(buf);
+        ret = readv(fd, iov, 2);
+        if (ret != sizeof(buf)) {
+                printf("read 3 ret %ld, error %d\n", ret, errno);
+                exit(1);
+        }
+
+        close(fd);
+        t_unlink(file);
+        LEAVE();
+}
+
+void t21()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd, ret;
+        ENTRY("basic fcntl support");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t21_file", lustre_path);
+
+        fd = open(file, O_RDWR|O_CREAT, (mode_t)0666);
+        if (fd < 0) {
+                printf("error open file: %s\n", strerror(errno));
+                exit(-1);
+        }
+        if (fcntl(fd, F_SETFL, O_APPEND)) {
+                printf("error set flag: %s\n", strerror(errno));
+                exit(-1);
+        }
+        if ((ret = fcntl(fd, F_GETFL)) != O_APPEND) {
+                printf("error get flag: ret %x\n", ret);
+                exit(-1);
+        }
+
+        close(fd);
+        t_unlink(file);
+        LEAVE();
+}
+
+void t22()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd;
+        char *str = "1234567890";
+        char buf[100];
+        ssize_t ret;
+        ENTRY("make sure O_APPEND take effect");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t22_file", lustre_path);
+
+        fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666);
+        if (fd < 0) {
+                printf("error open file: %s\n", strerror(errno));
+                exit(-1);
+        }
+
+        lseek(fd, 100, SEEK_SET);
+        ret = write(fd, str, strlen(str));
+        if (ret != strlen(str)) {
+                printf("write 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+
+        lseek(fd, 0, SEEK_SET);
+        ret = read(fd, buf, sizeof(buf));
+        if (ret != strlen(str)) {
+                printf("read 1 got %ld\n", ret);
+                exit(1);
+        }
+
+        if (memcmp(buf, str, strlen(str))) {
+                printf("read 1 data err\n");
+                exit(1);
+        }
+
+        if (fcntl(fd, F_SETFL, 0)) {
+                printf("fcntl err: %s\n", strerror(errno));
+                exit(1);
+        }
+
+        lseek(fd, 100, SEEK_SET);
+        ret = write(fd, str, strlen(str));
+        if (ret != strlen(str)) {
+                printf("write 2: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+
+        lseek(fd, 100, SEEK_SET);
+        ret = read(fd, buf, sizeof(buf));
+        if (ret != strlen(str)) {
+                printf("read 2 got %ld\n", ret);
+                exit(1);
+        }
+
+        if (memcmp(buf, str, strlen(str))) {
+                printf("read 2 data err\n");
+                exit(1);
+        }
+
+        close(fd);
+        t_unlink(file);
+        LEAVE();
+}
+
+
  #define PAGE_SIZE (4096)
  #define _npages (2048)
  
@@ -415,12 +651,14 @@ static int _buffer[_npages][PAGE_SIZE/sizeof(int)];
   */
  static void pages_io(int xfer, loff_t pos)
  {
-        char *path="/mnt/lustre/test_t50";
+        char path[MAX_PATH_LENGTH] = "";
+
          int check_sum[_npages] = {0,};
-        int fd, rc, i, j;
+        int fd, rc, i, j, data_error = 0;
          struct timeval tw1, tw2, tr1, tr2;
          double tw, tr;
  
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t50", lustre_path);
          memset(_buffer, 0, sizeof(_buffer));
  
          /* create sample data */
@@ -474,6 +712,7 @@ static void pages_io(int xfer, loff_t pos)
                          sum += _buffer[i][j];
                  }
                  if (sum != check_sum[i]) {
+                        data_error = 1;
                          printf("chunk %d checksum error: expected 0x%x, get 0x%x\n",
                                  i, check_sum[i], sum);
                  }
@@ -486,12 +725,15 @@ static void pages_io(int xfer, loff_t pos)
          printf(" (R:%.3fM/s, W:%.3fM/s)\n",
                  (_npages * PAGE_SIZE) / (tw / 1000000.0) / (1024 * 1024),
                  (_npages * PAGE_SIZE) / (tr / 1000000.0) / (1024 * 1024));
+
+        if (data_error)
+                exit(1);
  }
  
  void t50()
  {
-        char text[256];
-        loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191, 1024*1024*1024*1024ULL};
+        loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191,
+                              1024*1024*1024*1024ULL};
          int np = 1, i;
          loff_t offset = 0;
  
@@ -576,6 +818,10 @@ int main(int argc, char * const argv[])
          t16();
          t17();
          t18();
+        t19();
+        t20();
+        t21();
+        t22();
          t50();
  
         printf("liblustre is about shutdown\n");
diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c

index b8a6d0a..beb5f53 100644 (file)
--- a/lustre/llite/dcache.c
+++ b/lustre/llite/dcache.c
@@ -53,6 +53,34 @@ static void ll_release(struct dentry *de)
          EXIT;
  }
  
+/* Compare if two dentries are the same.  Don't match if the existing dentry
+ * is marked DCACHE_LUSTRE_INVALID.  Returns 1 if different, 0 if the same.
+ *
+ * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
+ * an AST before calling d_revalidate_it().  The dentry still exists (marked
+ * INVALID) so d_lookup() matches it, but we have no lock on it (so
+ * lock_match() fails) and we spin around real_lookup(). */
+static int ll_dcompare(struct dentry *parent, struct qstr *d_name,
+                       struct qstr *name){
+        struct dentry *dchild;
+        ENTRY;
+
+        if (d_name->len != name->len)
+                RETURN(1);
+
+        if (memcmp(d_name->name, name->name, name->len))
+                RETURN(1);
+
+        dchild = container_of(d_name, struct dentry, d_name); /* ugh */
+        if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
+                CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
+                       dchild);
+                RETURN(1);
+        }
+
+        RETURN(0);
+}
+
  /* should NOT be called with the dcache lock, see fs/dcache.c */
  static int ll_ddelete(struct dentry *de)
  {
@@ -71,7 +99,7 @@ void ll_set_dd(struct dentry *de)
          ENTRY;
          LASSERT(de != NULL);
  
-        CDEBUG(D_DENTRY, "ldd on dentry %*s (%p) parent %p inode %p refc %d\n",
+        CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
                 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
                 atomic_read(&de->d_count));
          lock_kernel();
@@ -128,6 +156,7 @@ void ll_unhash_aliases(struct inode *inode)
  
          if (inode == NULL) {
                  CERROR("unexpected NULL inode, tell phil\n");
+                EXIT;
                  return;
          }
  
@@ -142,7 +171,7 @@ restart:
          while ((tmp = tmp->next) != head) {
                  struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
                  if (atomic_read(&dentry->d_count) == 0) {
-                        CDEBUG(D_DENTRY, "deleting dentry %*s (%p) parent %p "
+                        CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
                                 "inode %p\n", dentry->d_name.len,
                                 dentry->d_name.name, dentry, dentry->d_parent,
                                 dentry->d_inode);
@@ -152,7 +181,7 @@ restart:
                          dput(dentry);
                          goto restart;
                  } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
-                        CDEBUG(D_DENTRY, "unhashing dentry %*s (%p) parent %p "
+                        CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
                                 "inode %p refc %d\n", dentry->d_name.len,
                                 dentry->d_name.name, dentry, dentry->d_parent,
                                 dentry->d_inode, atomic_read(&dentry->d_count));
@@ -265,8 +294,8 @@ int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
  {
          struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
          struct ptlrpc_request *req = NULL;
-        struct it_cb_data icbd;
          struct obd_export *exp;
+        struct it_cb_data icbd;
          struct lustre_id pid;
          struct lustre_id cid;
          int orig_it, rc = 0;
@@ -324,6 +353,7 @@ int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
          if (nd != NULL)
                  nd->mnt->mnt_last_used = jiffies;
  
+        OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
          orig_it = it ? it->it_op : IT_OPEN;
          ll_frob_intent(&it, &lookup_it);
          LASSERT(it != NULL);
@@ -362,11 +392,68 @@ int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
                  ll_intent_release(&lookup_it);
          }
  
+#if 1
+        if ((it->it_op == IT_OPEN) && de->d_inode) {
+                struct inode *inode = de->d_inode;
+                struct ll_inode_info *lli = ll_i2info(inode);
+                struct obd_client_handle **och_p;
+                __u64 *och_usecount;
+                struct obd_device *obddev;
+                struct lustre_handle lockh;
+                int flags = LDLM_FL_BLOCK_GRANTED;
+                ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
+                struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id), 
+                                                          id_group(&lli->lli_id)}};
+                int lockmode;
+
+                if (it->it_flags & FMODE_WRITE) {
+                        och_p = &lli->lli_mds_write_och;
+                        och_usecount = &lli->lli_open_fd_write_count;
+                        lockmode = LCK_CW;
+                } else if (it->it_flags & FMODE_EXEC) {
+                        och_p = &lli->lli_mds_exec_och;
+                        och_usecount = &lli->lli_open_fd_exec_count;
+                        lockmode = LCK_PR;
+                } else {
+                        och_p = &lli->lli_mds_read_och;
+                        och_usecount = &lli->lli_open_fd_read_count;
+                        lockmode = LCK_CR;
+                }
+
+                /* Check for the proper lock */
+                obddev = md_get_real_obd(exp, &lli->lli_id);
+                if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
+                                     LDLM_IBITS, &policy, lockmode, &lockh))
+                        goto do_lock;
+                down(&lli->lli_och_sem);
+                if (*och_p) { /* Everything is open already, do nothing */
+                        /*(*och_usecount)++;  Do not let them steal our open
+                                              handle from under us */
+                        /* XXX The code above was my original idea, but in case
+                           we have the handle, but we cannot use it due to later
+                           checks (e.g. O_CREAT|O_EXCL flags set), nobody
+                           would decrement counter increased here. So we just
+                           hope the lock won't be invalidated in between. But
+                           if it would be, we'll reopen the open request to
+                           MDS later during file open path */
+                        up(&lli->lli_och_sem);
+                        memcpy(&LUSTRE_IT(it)->it_lock_handle, &lockh,
+                               sizeof(lockh));
+                        LUSTRE_IT(it)->it_lock_mode = lockmode;
+                        RETURN(1);
+                } else {
+                        /* Hm, interesting. Lock is present, but no open
+                           handle? */
+                        up(&lli->lli_och_sem);
+                        ldlm_lock_decref(&lockh, lockmode);
+                }
+        }
+#endif
+
+do_lock:
          rc = md_intent_lock(exp, &pid, de->d_name.name, de->d_name.len,
-                            NULL, 0, &cid, it, flags, &req,
-                            ll_mdc_blocking_ast);
-        
-        /* If req is NULL, then mdc_intent_lock only tried to do a lock match;
+                            NULL, 0, &cid, it, flags, &req, ll_mdc_blocking_ast);
+        /* If req is NULL, then md_intent_lock() only tried to do a lock match;
           * if all was well, it will return 1 if it found locks, 0 otherwise. */
          if (req == NULL && rc >= 0) {
                  if (!rc)
@@ -409,13 +496,14 @@ out:
                                  ptlrpc_req_finished(req);
                  }
                  ll_unhash_aliases(de->d_inode);
-                return rc;
+                return 0;
          }
  
          CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p "
                 "inode %p refc %d\n", de->d_name.len,
                 de->d_name.name, de, de->d_parent, de->d_inode,
                 atomic_read(&de->d_count));
+
          ll_lookup_finish_locks(it, de);
          de->d_flags &= ~DCACHE_LUSTRE_INVALID;
          if (it == &lookup_it)
@@ -594,6 +682,7 @@ struct dentry_operations ll_d_ops = {
          .d_release = ll_release,
          .d_iput = ll_dentry_iput,
          .d_delete = ll_ddelete,
+        .d_compare = ll_dcompare,
  #if 0
          .d_pin = ll_pin,
          .d_unpin = ll_unpin,
diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c

index fa9a335..1f25e82 100644 (file)
--- a/lustre/llite/dir.c
+++ b/lustre/llite/dir.c
@@ -228,7 +228,7 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
  
                  OBD_ALLOC(op_data, sizeof(*op_data));
                  if (op_data == NULL)
-                        RETURN(ERR_PTR(-ENOMEM));
+                        return ERR_PTR(-ENOMEM);
  
                  ll_prepare_mdc_data(op_data, dir, NULL, NULL, 0, 0);
  
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index e13260c..3b75acf 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -34,25 +34,32 @@
  #include "llite_internal.h"
  #include <linux/obd_lov.h>
  
-#define XATTR_NAME_MAX  255
-int ll_md_close(struct obd_export *md_exp, struct inode *inode,
-                struct file *file)
+int ll_md_och_close(struct obd_export *md_exp, struct inode *inode,
+                    struct obd_client_handle *och)
  {
-        struct ll_file_data *fd = file->private_data;
          struct ptlrpc_request *req = NULL;
-        struct obd_client_handle *och = &fd->fd_mds_och;
          struct obdo *obdo = NULL;
+        struct obd_device *obd;
          int rc;
          ENTRY;
  
-        /* clear group lock, if present */
-        if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-                rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
-                                      &fd->fd_cwlockh);
+        obd = class_exp2obd(md_exp);
+        if (obd == NULL) {
+                CERROR("Invalid MDC connection handle "LPX64"\n",
+                       md_exp->exp_handle.h_cookie);
+                EXIT;
+                return 0;
          }
  
+        /*
+         * here we check if this is forced umount. If so this is called on
+         * canceling "open lock" and we do not call md_close() in this case , as
+         * it will not successful, as import is already deactivated.
+         */
+        if (obd->obd_no_recov)
+                GOTO(out, rc = 0);
+
+        /* closing opened file */
          obdo = obdo_alloc();
          if (obdo == NULL)
                  RETURN(-ENOMEM);
@@ -67,29 +74,140 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode,
                  obdo->o_flags = MDS_BFLAG_UNCOMMITTED_WRITES;
                  obdo->o_valid |= OBD_MD_FLFLAGS;
          }
+        obdo->o_fid = id_fid(&ll_i2info(inode)->lli_id);
          obdo->o_mds = id_group(&ll_i2info(inode)->lli_id);
          rc = md_close(md_exp, obdo, och, &req);
          obdo_free(obdo);
  
          if (rc == EAGAIN) {
-                /* We are the last writer, so the MDS has instructed us to get
-                 * the file size and any write cookies, then close again. */
+                /*
+                 * we are the last writer, so the MDS has instructed us to get
+                 * the file size and any write cookies, then close again.
+                 */
+
                  //ll_queue_done_writing(inode);
                  rc = 0;
          } else if (rc) {
                  CERROR("inode %lu mdc close failed: rc = %d\n",
-                       inode->i_ino, rc);
+                       (unsigned long)inode->i_ino, rc);
          }
+
+        /* objects are destroed on OST only if metadata close was
+         * successful.*/
          if (rc == 0) {
-                rc = ll_objects_destroy(req, file->f_dentry->d_inode, 1);
+                rc = ll_objects_destroy(req, inode, 1);
                  if (rc)
                          CERROR("inode %lu ll_objects destroy: rc = %d\n",
                                 inode->i_ino, rc);
          }
  
-        mdc_clear_open_replay_data(md_exp, och);
          ptlrpc_req_finished(req);
+        EXIT;
+out:
+        mdc_clear_open_replay_data(md_exp, och);
          och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+        OBD_FREE(och, sizeof *och);
+        return rc;
+}
+
+int ll_md_real_close(struct obd_export *md_exp,
+                     struct inode *inode, int flags)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct obd_client_handle **och_p;
+        struct obd_client_handle *och;
+        __u64 *och_usecount;
+        int rc = 0;
+        ENTRY;
+
+        if (flags & FMODE_WRITE) {
+                och_p = &lli->lli_mds_write_och;
+                och_usecount = &lli->lli_open_fd_write_count;
+        } else if (flags & FMODE_EXEC) {
+                och_p = &lli->lli_mds_exec_och;
+                och_usecount = &lli->lli_open_fd_exec_count;
+         } else {
+                och_p = &lli->lli_mds_read_och;
+                och_usecount = &lli->lli_open_fd_read_count;
+        }
+
+        down(&lli->lli_och_sem);
+        if (*och_usecount) { /* There are still users of this handle, so
+                                skip freeing it. */
+                up(&lli->lli_och_sem);
+                RETURN(0);
+        }
+        och = *och_p;
+
+        *och_p = NULL;
+        up(&lli->lli_och_sem);
+
+        /*
+         * there might be a race and somebody have freed this och
+         * already. Another way to have this twice called is if file closing
+         * will fail due to netwok problems and on umount lock will be canceled
+         * and this will be called from block_ast callack.
+        */
+        if (och && och->och_fh.cookie != DEAD_HANDLE_MAGIC)
+                rc = ll_md_och_close(md_exp, inode, och);
+        
+        RETURN(rc);
+}
+
+int ll_md_close(struct obd_export *md_exp, struct inode *inode,
+                struct file *file)
+{
+        struct ll_file_data *fd = file->private_data;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        int rc = 0;
+        ENTRY;
+
+        /* clear group lock, if present */
+        if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
+                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+                fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
+                rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
+                                      &fd->fd_cwlockh);
+        }
+
+        /* Let's see if we have good enough OPEN lock on the file and if
+           we can skip talking to MDS */
+        if (file->f_dentry->d_inode) {
+                int lockmode;
+                struct obd_device *obddev;
+                struct lustre_handle lockh;
+                int flags = LDLM_FL_BLOCK_GRANTED;
+                struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id), 
+                                                          id_group(&lli->lli_id)}};
+                ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
+
+                down(&lli->lli_och_sem);
+                if (fd->fd_omode & FMODE_WRITE) {
+                        lockmode = LCK_CW;
+                        LASSERT(lli->lli_open_fd_write_count);
+                        lli->lli_open_fd_write_count--;
+                } else if (fd->fd_omode & FMODE_EXEC) {
+                        lockmode = LCK_PR;
+                        LASSERT(lli->lli_open_fd_exec_count);
+                        lli->lli_open_fd_exec_count--;
+                } else {
+                        lockmode = LCK_CR;
+                        LASSERT(lli->lli_open_fd_read_count);
+                        lli->lli_open_fd_read_count--;
+                }
+                up(&lli->lli_och_sem);
+                
+                obddev = md_get_real_obd(md_exp, &lli->lli_id);
+                if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
+                                     LDLM_IBITS, &policy, lockmode, &lockh))
+                {
+                        rc = ll_md_real_close(md_exp, file->f_dentry->d_inode,
+                                              fd->fd_omode);
+                } else {
+                        ldlm_lock_decref(&lockh, lockmode);
+                }
+        }
+
          file->private_data = NULL;
          OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof(*fd));
          RETURN(rc);
@@ -139,7 +257,9 @@ static int ll_intent_file_open(struct file *file, void *lmm,
          OBD_ALLOC(op_data, sizeof(*op_data));
          if (op_data == NULL)
                  RETURN(-ENOMEM);
-        ll_prepare_mdc_data(op_data, parent->d_inode, NULL, name, len, O_RDWR);
+        
+        ll_prepare_mdc_data(op_data, parent->d_inode, NULL,
+                            name, len, O_RDWR);
  
          rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, itp, LCK_PR, op_data,
                          &lockh, lmm, lmmsize, ldlm_completion_ast,
@@ -153,24 +273,38 @@ static int ll_intent_file_open(struct file *file, void *lmm,
          } else if (rc < 0) {
                  CERROR("lock enqueue: err: %d\n", rc);
          }
-        
          RETURN(rc);
  }
  
-int ll_local_open(struct file *file, struct lookup_intent *it)
+void ll_och_fill(struct inode *inode, struct lookup_intent *it,
+                 struct obd_client_handle *och)
  {
          struct ptlrpc_request *req = LUSTRE_IT(it)->it_data;
-        struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
-        struct obd_export *md_exp = ll_i2mdexp(file->f_dentry->d_inode);
-        struct ll_file_data *fd;
+        struct ll_inode_info *lli = ll_i2info(inode);
          struct mds_body *body;
-        ENTRY;
+        LASSERT(och);
  
          body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body));
-        LASSERT (body != NULL);                 /* reply already checked out */
-        LASSERT_REPSWABBED (req, 1);            /* and swabbed down */
+        LASSERT (body != NULL);          /* reply already checked out */
+        LASSERT_REPSWABBED (req, 1);     /* and swabbed down */
+
+        memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
+        och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+        lli->lli_io_epoch = body->io_epoch;
+        mdc_set_open_replay_data(ll_i2mdexp(inode), och, 
+                                LUSTRE_IT(it)->it_data);
+}
+
+int ll_local_open(struct file *file, struct lookup_intent *it,
+                  struct obd_client_handle *och)
+{
+        struct ll_file_data *fd;
+        ENTRY;
  
-        LASSERTF(file->private_data == NULL, "file %*s/%*s ino %lu/%u (%o)\n",
+        if (och)
+                ll_och_fill(file->f_dentry->d_inode, it, och);
+
+        LASSERTF(file->private_data == NULL, "file %.*s/%.*s ino %lu/%u (%o)\n",
                   file->f_dentry->d_name.len, file->f_dentry->d_name.name,
                   file->f_dentry->d_parent->d_name.len,
                   file->f_dentry->d_parent->d_name.name,
@@ -178,22 +312,15 @@ int ll_local_open(struct file *file, struct lookup_intent *it)
                   file->f_dentry->d_inode->i_generation,
                   file->f_dentry->d_inode->i_mode);
  
-
          OBD_SLAB_ALLOC(fd, ll_file_data_slab, SLAB_KERNEL, sizeof *fd);
          
          /* We can't handle this well without reorganizing ll_file_open and
           * ll_md_close(), so don't even try right now. */
          LASSERT(fd != NULL);
  
-        memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle));
-        fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC;
          file->private_data = fd;
          ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras);
-
-        lli->lli_io_epoch = body->io_epoch;
-
-        mdc_set_open_replay_data(md_exp, &fd->fd_mds_och, LUSTRE_IT(it)->it_data);
-        
+        fd->fd_omode = it->it_flags;
          RETURN(0);
  }
  
@@ -220,38 +347,117 @@ int ll_file_open(struct inode *inode, struct file *file)
          struct lov_stripe_md *lsm;
          struct ptlrpc_request *req;
          int rc = 0;
+        struct obd_client_handle **och_p;
+        __u64 *och_usecount;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
-               inode->i_generation, inode, file->f_flags);
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n",
+               inode->i_ino, inode->i_generation, inode, file->f_flags);
  
          /* don't do anything for / */
          if (inode->i_sb->s_root == file->f_dentry)
                  RETURN(0);
  
+        if ((file->f_flags+1) & O_ACCMODE)
+                oit.it_flags++;
+        if (file->f_flags & O_TRUNC)
+                oit.it_flags |= 2;
+
          it = file->f_it;
  
-        if (!it || !LUSTRE_IT(it) || !LUSTRE_IT(it)->it_disposition) {
+        /*
+         * sometimes LUSTRE_IT(it) may not be allocated like opening file by
+         * dentry_open() from GNS stuff.
+         */
+        if (!it || !LUSTRE_IT(it)) {
                  it = &oit;
                  rc = ll_intent_alloc(it);
                  if (rc)
                          GOTO(out, rc);
-                rc = ll_intent_file_open(file, NULL, 0, it);
-                if (rc)
-                        GOTO(out, rc);
          }
  
-
          lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
-        /* mdc_intent_lock() didn't get a request ref if there was an open
-         * error, so don't do cleanup on the request here (bug 3430) */
-        rc = it_open_error(DISP_OPEN_OPEN, it);
-        if (rc)
-                RETURN(rc);
+        
+        /*
+         * mdc_intent_lock() didn't get a request ref if there was an open
+         * error, so don't do cleanup on the * request here (bug 3430)
+         */
+        if (LUSTRE_IT(it)->it_disposition) {
+               rc = it_open_error(DISP_OPEN_OPEN, it);
+               if (rc)
+                       RETURN(rc);
+       }
+
+        /* Let's see if we have file open on MDS already. */
+        if (it->it_flags & FMODE_WRITE) {
+                och_p = &lli->lli_mds_write_och;
+                och_usecount = &lli->lli_open_fd_write_count;
+        } else if (it->it_flags & FMODE_EXEC) {
+                och_p = &lli->lli_mds_exec_och;
+                och_usecount = &lli->lli_open_fd_exec_count;
+        } else {
+                och_p = &lli->lli_mds_read_och;
+                och_usecount = &lli->lli_open_fd_read_count;
+        }
+
+        down(&lli->lli_och_sem);
+        if (*och_p) { /* Open handle is present */
+                if (LUSTRE_IT(it)->it_disposition) {
+                        struct obd_client_handle *och;
+                        /* Well, there's extra open request that we do not need,
+                           let's close it somehow*/
+                        OBD_ALLOC(och, sizeof (struct obd_client_handle));
+                        if (!och) {
+                                up(&lli->lli_och_sem);
+                                RETURN(-ENOMEM);
+                        }
  
-        rc = ll_local_open(file, it);
+                        ll_och_fill(inode, it, och);
+                        /* ll_md_och_close() will free och */
+                        ll_md_och_close(ll_i2mdexp(inode), inode, och);
+                }
+                (*och_usecount)++;
+                        
+                rc = ll_local_open(file, it, NULL);
+                if (rc)
+                        LBUG();
+        } else {
+                LASSERT(*och_usecount == 0);
+                OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
+                if (!*och_p)
+                        GOTO(out, rc = -ENOMEM);
+                (*och_usecount)++;
+
+                if (!it || !LUSTRE_IT(it) || !LUSTRE_IT(it)->it_disposition) {
+                        /* We are going to replace intent here, and that may
+                           possibly change access mode (FMODE_EXEC can only be
+                           set in intent), but I hope it never happens (I was
+                           not able to trigger it yet at least) -- green */
+                        /* FIXME: FMODE_EXEC is not covered by O_ACCMODE! */
+                        LASSERT(!(it->it_flags & FMODE_EXEC));
+                        LASSERTF((it->it_flags & O_ACCMODE) ==
+                                 (oit.it_flags & O_ACCMODE), "Changing intent "
+                                 "flags %x to incompatible %x\n",it->it_flags,
+                                 oit.it_flags);
+                        it = &oit;
+                        rc = ll_intent_file_open(file, NULL, 0, it);
+                        if (rc)
+                                GOTO(out, rc);
+                        rc = it_open_error(DISP_OPEN_OPEN, it);
+                        if (rc)
+                                GOTO(out_och_free, rc);
  
-        LASSERTF(rc == 0, "rc = %d\n", rc);
+                        mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle,
+                                          file->f_dentry->d_inode);
+                }
+                lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
+                rc = ll_local_open(file, it, *och_p);
+                LASSERTF(rc == 0, "rc = %d\n", rc);
+        }
+        up(&lli->lli_och_sem);
+        /* Must do this outside lli_och_sem lock to prevent deadlock where
+           different kind of OPEN lock for this same inode gets cancelled
+           by ldlm_cancel_lru */
  
          if (!S_ISREG(inode->i_mode))
                  GOTO(out, rc);
@@ -268,11 +474,21 @@ int ll_file_open(struct inode *inode, struct file *file)
          GOTO(out, rc);
   out:
          req = LUSTRE_IT(it)->it_data;
+        ll_intent_drop_lock(it);
          ll_intent_release(it);
-
          ptlrpc_req_finished(req);
-        if (rc == 0)
+        if (rc == 0) {
                  ll_open_complete(inode);
+        } else {
+out_och_free:
+                if (*och_p) {
+                        OBD_FREE(*och_p, sizeof (struct obd_client_handle));
+                        *och_p = NULL; /* OBD_FREE writes some magic there */
+                        (*och_usecount)--;
+                }
+                up(&lli->lli_och_sem);
+        }
+                
          return rc;
  }
  
@@ -424,6 +640,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
          
          for (i = start; i <= end; i += (j + skip)) {
                  j = min(count - (i % count), end - i + 1);
+                LASSERT(j > 0);
                  LASSERT(inode->i_mapping);
                  if (ll_teardown_mmaps(inode->i_mapping, i << PAGE_CACHE_SHIFT,
                                        ((i+j) << PAGE_CACHE_SHIFT) - 1) )
@@ -545,13 +762,8 @@ static int ll_extent_lock_callback(struct ldlm_lock *lock,
                          goto iput;
                  ll_pgcache_remove_extent(inode, lsm, lock, stripe);
  
-                /* grabbing the i_sem will wait for write() to complete.  ns
-                 * lock hold times should be very short as ast processing
-                 * requires them and has a short timeout.  so, i_sem before ns
-                 * lock.*/
-
-                down(&inode->i_sem);
                  l_lock(&lock->l_resource->lr_namespace->ns_lock);
+                down(&lli->lli_size_sem);
                  kms = ldlm_extent_shift_kms(lock,
                                              lsm->lsm_oinfo[stripe].loi_kms);
                 
@@ -559,8 +771,8 @@ static int ll_extent_lock_callback(struct ldlm_lock *lock,
                          LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
                                     lsm->lsm_oinfo[stripe].loi_kms, kms);
                  lsm->lsm_oinfo[stripe].loi_kms = kms;
+                up(&lli->lli_size_sem);
                  l_unlock(&lock->l_resource->lr_namespace->ns_lock);
-                up(&inode->i_sem);
                  //ll_try_done_writing(inode);
          iput:
                  iput(inode);
@@ -661,6 +873,9 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
  
          lvb = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*lvb));
          lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms;
+        lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+        lvb->lvb_atime = LTIME_S(inode->i_atime);
+        lvb->lvb_ctime = LTIME_S(inode->i_ctime);
  
          LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64,
                     inode->i_size, stripe, lvb->lvb_size);
@@ -707,12 +922,17 @@ int ll_glimpse_size(struct inode *inode)
                  RETURN(rc > 0 ? -EIO : rc);
          }
  
+        down(&lli->lli_size_sem);
          inode->i_size = lov_merge_size(lli->lli_smd, 0);
          inode->i_blocks = lov_merge_blocks(lli->lli_smd);
-        //inode->i_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime);
+        up(&lli->lli_size_sem);
+
+        LTIME_S(inode->i_mtime) = lov_merge_mtime(lli->lli_smd,
+                                                  LTIME_S(inode->i_mtime));
  
          CDEBUG(D_DLMTRACE, "glimpse: size: "LPU64", blocks: "LPU64"\n",
                 (__u64)inode->i_size, (__u64)inode->i_blocks);
+        
          obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR, &lockh);
          RETURN(rc);
  }
@@ -733,6 +953,7 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
                     ldlm_policy_data_t *policy, struct lustre_handle *lockh,
                     int ast_flags, struct obd_service_time *stime)
  {
+        struct ll_inode_info *lli = ll_i2info(inode);
          struct ll_sb_info *sbi = ll_i2sbi(inode);
          struct timeval start;
          int rc;
@@ -768,11 +989,15 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
                   * when doing appending writes and effectively cancel the
                   * result of the truncate.  Getting the i_sem after the enqueue
                   * maintains the DLM -> i_sem acquiry order. */
-                down(&inode->i_sem);
+                down(&lli->lli_size_sem);
                  inode->i_size = lov_merge_size(lsm, 1);
-                up(&inode->i_sem);
+                up(&lli->lli_size_sem);
+        }
+        
+        if (rc == 0) {
+                LTIME_S(inode->i_mtime) =
+                        lov_merge_mtime(lsm, LTIME_S(inode->i_mtime));
          }
-        //inode->i_mtime = lov_merge_mtime(lsm, inode->i_mtime);
  
          RETURN(rc);
  }
@@ -831,15 +1056,18 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
          if (rc != 0)
                  RETURN(rc);
  
+        down(&lli->lli_size_sem);
          kms = lov_merge_size(lsm, 1);
          if (*ppos + count - 1 > kms) {
                  /* A glimpse is necessary to determine whether we return a short
                   * read or some zeroes at the end of the buffer */
+                up(&lli->lli_size_sem);
                  retval = ll_glimpse_size(inode);
                  if (retval)
                          goto out;
          } else {
                  inode->i_size = kms;
+                up(&lli->lli_size_sem);
          }
  
          CDEBUG(D_INFO, "Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
@@ -1001,8 +1229,15 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
          struct ptlrpc_request *req = NULL;
          int rc = 0;
          struct lustre_md md;
+        struct obd_client_handle *och;
          ENTRY;
  
+        
+        if ((file->f_flags+1) & O_ACCMODE)
+                oit.it_flags++;
+        if (file->f_flags & O_TRUNC)
+                oit.it_flags |= 2;
+
          down(&lli->lli_open_sem);
          lsm = lli->lli_smd;
          if (lsm) {
@@ -1018,6 +1253,7 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
  
          f->f_dentry = file->f_dentry;
          f->f_vfsmnt = file->f_vfsmnt;
+        f->f_flags = flags;
  
          rc = ll_intent_alloc(&oit);
          if (rc)
@@ -1040,12 +1276,30 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
                  GOTO(out, rc);
          ll_update_inode(f->f_dentry->d_inode, &md);
  
-        rc = ll_local_open(f, &oit);
-        if (rc)
+        OBD_ALLOC(och, sizeof(struct obd_client_handle));
+        rc = ll_local_open(f, &oit, och);
+        if (rc) { /* Actually ll_local_open cannot fail! */
                  GOTO(out, rc);
+        }
+        if (LUSTRE_IT(&oit)->it_lock_mode) {
+                ldlm_lock_decref_and_cancel((struct lustre_handle *)
+                                            &LUSTRE_IT(&oit)->it_lock_handle,
+                                            LUSTRE_IT(&oit)->it_lock_mode);
+                LUSTRE_IT(&oit)->it_lock_mode = 0;
+        }
+
          ll_intent_release(&oit);
  
+        /* ll_file_release will decrease the count, but won't free anything
+           because we have at least one more reference coming from actual open
+         */
+        down(&lli->lli_och_sem);
+        lli->lli_open_fd_write_count++;
+        up(&lli->lli_och_sem);
          rc = ll_file_release(f->f_dentry->d_inode, f);
+        
+        /* Now also destroy our supplemental och */
+        ll_md_och_close(ll_i2mdexp(inode), f->f_dentry->d_inode, och);
          EXIT;
   out:
          ll_intent_release(&oit);
@@ -1118,7 +1372,7 @@ static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
                  RETURN(-ENODATA);
  
          return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
-                            (void *)arg);
+                             (void *)arg);
  }
  
  static int ll_get_grouplock(struct inode *inode, struct file *file,
@@ -1289,6 +1543,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
          lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK);
          if (origin == 2) { /* SEEK_END */
                  ldlm_policy_data_t policy = { .l_extent = {0, OBD_OBJECT_EOF }};
+                struct ll_inode_info *lli = ll_i2info(inode);
                  int nonblock = 0, rc;
  
                  if (file->f_flags & O_NONBLOCK)
@@ -1299,7 +1554,9 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
                  if (rc != 0)
                          RETURN(rc);
  
+                down(&lli->lli_size_sem);
                  offset += inode->i_size;
+                up(&lli->lli_size_sem);
          } else if (origin == 1) { /* SEEK_CUR */
                  offset += file->f_pos;
          }
@@ -1522,6 +1779,7 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
  {
          int res = 0;
          struct inode *inode = de->d_inode;
+        struct ll_inode_info *lli = ll_i2info(inode);
  
          res = ll_inode_revalidate_it(de);
          lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR);
@@ -1537,9 +1795,13 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
          stat->atime = inode->i_atime;
          stat->mtime = inode->i_mtime;
          stat->ctime = inode->i_ctime;
-        stat->size = inode->i_size;
          stat->blksize = inode->i_blksize;
+
+        down(&lli->lli_size_sem);
+        stat->size = inode->i_size;
          stat->blocks = inode->i_blocks;
+        up(&lli->lli_size_sem);
+        
          stat->rdev = kdev_t_to_nr(inode->i_rdev);
          stat->dev = id_group(&ll_i2info(inode)->lli_id);
          return 0;
diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h

index ec99d29..5839ba9 100644 (file)
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -10,6 +10,8 @@
  #ifndef LLITE_INTERNAL_H
  #define LLITE_INTERNAL_H
  
+#include <linux/lustre_debug.h>
+
  /* default to about 40meg of readahead on a given system.  That much tied
   * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
  #define SBI_DEFAULT_RA_MAX ((40 << 20) >> PAGE_CACHE_SHIFT)
@@ -130,9 +132,9 @@ extern kmem_cache_t *ll_file_data_slab;
  extern kmem_cache_t *ll_intent_slab;
  struct lustre_handle;
  struct ll_file_data {
-        struct obd_client_handle fd_mds_och;
          struct ll_readahead_state fd_ras;
          __u32 fd_flags;
+        int fd_omode;
          struct lustre_handle fd_cwlockh;
          unsigned long fd_gid;
  };
@@ -174,14 +176,20 @@ struct ll_async_page {
           /* only trust these if the page lock is providing exclusion */
          unsigned         llap_write_queued:1,
                           llap_defer_uptodate:1,
+                         llap_origin:3,
                           llap_ra_used:1;
  
          struct list_head llap_proc_item;
  };
  
-#define LL_CDEBUG_PAGE(mask, page, fmt, arg...)                         \
-        CDEBUG(mask, "page %p map %p ind %lu priv %0lx: " fmt,          \
-               page, page->mapping, page->index, page->private, ## arg)
+enum {
+        LLAP_ORIGIN_UNKNOWN = 0,
+        LLAP_ORIGIN_READPAGE,
+        LLAP_ORIGIN_READAHEAD,
+        LLAP_ORIGIN_COMMIT_WRITE,
+        LLAP_ORIGIN_WRITEPAGE,
+        LLAP__ORIGIN_MAX,
+};
  
  /* llite/lproc_llite.c */
  int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
@@ -212,7 +220,7 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
  void ll_removepage(struct page *page);
  int ll_readpage(struct file *file, struct page *page);
  struct ll_async_page *llap_from_cookie(void *cookie);
-struct ll_async_page *llap_from_page(struct page *page);
+struct ll_async_page *llap_from_page(struct page *page, unsigned origin);
  struct ll_async_page *llap_cast_private(struct page *page);
  void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
  
@@ -222,6 +230,8 @@ void ll_truncate(struct inode *inode);
  /* llite/file.c */
  extern struct file_operations ll_file_operations;
  extern struct inode_operations ll_file_inode_operations;
+int ll_md_real_close(struct obd_export *md_exp,
+                     struct inode *inode, int flags);
  extern int ll_inode_revalidate_it(struct dentry *);
  extern int ll_setxattr(struct dentry *, const char *, const void *,
                         size_t, int);
@@ -240,9 +250,15 @@ int ll_file_open(struct inode *inode, struct file *file);
  int ll_file_release(struct inode *inode, struct file *file);
  int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
  int ll_glimpse_size(struct inode *inode);
-int ll_local_open(struct file *file, struct lookup_intent *it);
+int ll_local_open(struct file *file, struct lookup_intent *it,
+                  struct obd_client_handle *och);
  int ll_md_close(struct obd_export *md_exp, struct inode *inode,
-                 struct file *file);
+                struct file *file);
+int ll_md_och_close(struct obd_export *md_exp, struct inode *inode,
+                    struct obd_client_handle *och);
+void ll_och_fill(struct inode *inode, struct lookup_intent *it,
+                 struct obd_client_handle *och);
+
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
  int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
  #endif
@@ -282,7 +298,8 @@ extern struct super_operations lustre_super_operations;
  
  char *ll_read_opt(const char *opt, char *data);
  int ll_set_opt(const char *opt, char *data, int fl);
-void ll_options(char *options, char **ost, char **mds, char **sec, int *flags);
+void ll_options(char *options, char **ost, char **mds, char **sec, 
+                int *async, int *flags);
  void ll_lli_init(struct ll_inode_info *lli);
  int ll_fill_super(struct super_block *sb, void *data, int silent);
  int lustre_fill_super(struct super_block *sb, void *data, int silent);
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c

index 338a597..31b2468 100644 (file)
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -124,7 +124,7 @@ int lustre_init_dt_desc(struct ll_sb_info *sbi)
  extern struct dentry_operations ll_d_ops;
  
  int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov,
-                             char *security, __u32 *nllu)
+                             char *security, __u32 *nllu, int async)
  {
          struct ll_sb_info *sbi = ll_s2sbi(sb);
          struct ptlrpc_request *request = NULL;
@@ -143,6 +143,8 @@ int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov,
                  CERROR("MDC %s: not setup or attached\n", lmv);
                  RETURN(-EINVAL);
          }
+        obd_set_info(obd->obd_self_export, strlen("async"), "async",
+                     sizeof(async), &async);
  
          if (security == NULL)
                  security = "null";
@@ -172,7 +174,7 @@ int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov,
  
          err = obd_connect(&md_conn, obd, &sbi->ll_sb_uuid, OBD_OPT_REAL_CLIENT);
          if (err == -EBUSY) {
-                CERROR("An MDS (mdc %s) is performing recovery, of which this"
+                CERROR("An MDS (lmv %s) is performing recovery, of which this"
                         " client is not a part.  Please wait for recovery to "
                         "complete, abort, or time out.\n", lmv);
                  GOTO(out, err);
@@ -205,10 +207,12 @@ int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov,
                  CERROR("OSC %s: not setup or attached\n", lov);
                  GOTO(out_lmv, err);
          }
+        obd_set_info(obd->obd_self_export, strlen("async"), "async",
+                     sizeof(async), &async);
  
          err = obd_connect(&dt_conn, obd, &sbi->ll_sb_uuid, OBD_OPT_REAL_CLIENT);
          if (err == -EBUSY) {
-                CERROR("An OST (osc %s) is performing recovery, of which this"
+                CERROR("An OST (lov %s) is performing recovery, of which this"
                         " client is not a part.  Please wait for recovery to "
                         "complete, abort, or time out.\n", lov);
                  GOTO(out, err);
@@ -271,10 +275,11 @@ int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov,
  
          ll_gns_add_timer(sbi);
  
-        /* making vm readahead 0 for 2.4.x. In the case of 2.6.x, backing dev
-           info assigned to inode mapping is used for determining maximal
-           readahead. */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+        /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
+           backing dev info assigned to inode mapping is used for
+           determining maximal readahead. */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
+    !defined(KERNEL_HAS_AS_MAX_READAHEAD)
          /* bug 2805 - set VM readahead to zero */
          vm_max_readahead = vm_min_readahead = 0;
  #endif
@@ -321,14 +326,12 @@ void lustre_common_put_super(struct super_block *sb)
          obd_disconnect(sbi->ll_md_exp, 0);
  
          // We do this to get rid of orphaned dentries. That is not really trw.
-        spin_lock(&dcache_lock);
          hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
                  struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash);
-                CWARN("orphan dentry %*s (%p) at unmount\n",
-                      dentry->d_name.len, dentry->d_name.name, dentry);
+                CWARN("orphan dentry %.*s (%p->%p) at unmount\n",
+                      dentry->d_name.len, dentry->d_name.name, dentry, next);
                  shrink_dcache_parent(dentry);
          }
-        spin_unlock(&dcache_lock);
          EXIT;
  }
  
@@ -367,7 +370,8 @@ int ll_set_opt(const char *opt, char *data, int fl)
                  RETURN(fl);
  }
  
-void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags)
+void ll_options(char *options, char **lov, char **lmv, char **sec, 
+                int *async, int *flags)
  {
          char *this_char;
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
@@ -380,6 +384,7 @@ void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags)
                  return;
          }
  
+        *async = 0;
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
          for (this_char = strtok (options, ",");
               this_char != NULL;
@@ -392,6 +397,10 @@ void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags)
                          continue;
                  if (!*lmv && (*lmv = ll_read_opt("mdc", this_char)))
                          continue;
+                if (!strncmp(this_char, "lasync", strlen("lasync"))) {
+                        *async = 1;
+                        continue;
+                }
                  if (!*sec && (*sec = ll_read_opt("sec", this_char)))
                          continue;
                  if (!(*flags & LL_SBI_NOLCK) &&
@@ -407,12 +416,18 @@ void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags)
  void ll_lli_init(struct ll_inode_info *lli)
  {
          sema_init(&lli->lli_open_sem, 1);
+        sema_init(&lli->lli_size_sem, 1);
          lli->lli_flags = 0;
          lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
          spin_lock_init(&lli->lli_lock);
          INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
          lli->lli_inode_magic = LLI_INODE_MAGIC;
          memset(&lli->lli_id, 0, sizeof(lli->lli_id));
+        sema_init(&lli->lli_och_sem, 1);
+        lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
+        lli->lli_mds_exec_och = NULL;
+        lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
+        lli->lli_open_fd_exec_count = 0;
  }
  
  int ll_fill_super(struct super_block *sb, void *data, int silent)
@@ -420,9 +435,9 @@ int ll_fill_super(struct super_block *sb, void *data, int silent)
          struct ll_sb_info *sbi;
          char *lov = NULL;
          char *lmv = NULL;
+        int async, err;
          char *sec = NULL;
          __u32 nllu[2] = { 99, 99 };
-        int err;
          ENTRY;
  
          CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
@@ -432,7 +447,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent)
                  RETURN(-ENOMEM);
  
          sbi->ll_flags |= LL_SBI_READAHEAD;
-        ll_options(data, &lov, &lmv, &sec, &sbi->ll_flags);
+        ll_options(data, &lov, &lmv, &sec, &async, &sbi->ll_flags);
  
          if (!lov) {
                  CERROR("no osc\n");
@@ -444,7 +459,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent)
                  GOTO(out, err = -EINVAL);
          }
  
-        err = lustre_common_fill_super(sb, lmv, lov, sec, nllu);
+        err = lustre_common_fill_super(sb, lmv, lov, sec, nllu, async);
          EXIT;
  out:
          if (err)
@@ -607,6 +622,35 @@ out:
          return rc;
  }
  
+static void lustre_manual_cleanup(struct ll_sb_info *sbi)
+{
+        struct lustre_cfg lcfg;
+        struct obd_device *obd;
+        int next = 0;
+
+        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
+        {
+                int err;
+
+                LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name);
+                err = class_process_config(&lcfg);
+                if (err) {
+                        CERROR("cleanup failed: %s\n", obd->obd_name);
+                        //continue;
+                }
+
+                LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name);
+                err = class_process_config(&lcfg);
+                if (err) {
+                        CERROR("detach failed: %s\n", obd->obd_name);
+                        //continue;
+                }
+        }
+
+        if (sbi->ll_lmd != NULL)
+                class_del_profile(sbi->ll_lmd->lmd_profile);
+}
+
  int lustre_fill_super(struct super_block *sb, void *data, int silent)
  {
          struct lustre_mount_data * lmd = data;
@@ -687,7 +731,7 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
          }
  
          err = lustre_common_fill_super(sb, lmv, lov, lmd->lmd_security,
-                                       &lmd->lmd_nllu);
+                                       &lmd->lmd_nllu, lmd->lmd_async);
  
          if (err)
                  GOTO(out_free, err);
@@ -707,57 +751,28 @@ out_free:
  
                  if (sbi->ll_instance != NULL) {
                          struct lustre_mount_data *lmd = sbi->ll_lmd;
-                        char * cln_prof;
                          struct config_llog_instance cfg;
+                       char *cl_prof;
  
                          cfg.cfg_instance = sbi->ll_instance;
                          cfg.cfg_uuid = sbi->ll_sb_uuid;
  
-                        OBD_ALLOC(cln_prof, len);
-                        sprintf(cln_prof, "%s-clean", lmd->lmd_profile);
-
-                        err = lustre_process_log(lmd, cln_prof, &cfg, 0);
-                        if (err < 0)
-                                CERROR("Unable to process log: %s\n", cln_prof);
-                        OBD_FREE(cln_prof, len);
-                        OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance)+ 1);
+                        OBD_ALLOC(cl_prof, len);
+                        sprintf(cl_prof, "%s-clean", lmd->lmd_profile);
+                        err = lustre_process_log(lmd, cl_prof, &cfg, 0);
+                        if (err < 0) {
+                                CERROR("Unable to process log: %s\n", cl_prof);
+                                lustre_manual_cleanup(sbi);
+                        }
+                        OBD_FREE(cl_prof, len);
+                        OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1);
                  }
                  OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
          }
          lustre_free_sbi(sb);
-
          goto out_dev;
  } /* lustre_fill_super */
  
-static void lustre_manual_cleanup(struct ll_sb_info *sbi)
-{
-        struct lustre_cfg lcfg;
-        struct obd_device *obd;
-        int next = 0;
-
-        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
-        {
-                int err;
-
-                LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name);
-                err = class_process_config(&lcfg);
-                if (err) {
-                        CERROR("cleanup failed: %s\n", obd->obd_name);
-                        //continue;
-                }
-
-                LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name);
-                err = class_process_config(&lcfg);
-                if (err) {
-                        CERROR("detach failed: %s\n", obd->obd_name);
-                        //continue;
-                }
-        }
-
-        if (sbi->ll_lmd != NULL)
-                class_del_profile(sbi->ll_lmd->lmd_profile);
-}
-
  void lustre_put_super(struct super_block *sb)
  {
          struct obd_device *obd;
@@ -773,7 +788,7 @@ void lustre_put_super(struct super_block *sb)
  
          lustre_common_put_super(sb);
          if (sbi->ll_lmd != NULL) {
-                char * cln_prof;
+                char *cl_prof;
                  int len = strlen(sbi->ll_lmd->lmd_profile) + sizeof("-clean")+1;
                  int err;
                  struct config_llog_instance cfg;
@@ -787,17 +802,16 @@ void lustre_put_super(struct super_block *sb)
                  cfg.cfg_instance = sbi->ll_instance;
                  cfg.cfg_uuid = sbi->ll_sb_uuid;
  
-                OBD_ALLOC(cln_prof, len);
-                sprintf(cln_prof, "%s-clean", sbi->ll_lmd->lmd_profile);
-
-                err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg, 0);
+                OBD_ALLOC(cl_prof, len);
+                sprintf(cl_prof, "%s-clean", sbi->ll_lmd->lmd_profile);
+                err = lustre_process_log(sbi->ll_lmd, cl_prof, &cfg, 0);
                  if (err < 0) {
                          CERROR("Unable to process log: %s, doing manual cleanup"
-                               "\n", cln_prof);
+                               "\n", cl_prof);
                          lustre_manual_cleanup(sbi);
                  }
  
-                OBD_FREE(cln_prof, len);
+                OBD_FREE(cl_prof, len);
          free_lmd:
                  OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
                  OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1);
@@ -887,8 +901,11 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
                  if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
                          inode = igrab(lock->l_ast_data);
                  } else {
-                        CERROR("DEBUG: l_ast_data %p is bogus: magic %x\n",
+                        inode = lock->l_ast_data;
+                        CDEBUG(inode->i_state & I_FREEING ? D_INFO : D_WARNING,
+                               "l_ast_data %p is bogus: magic %0x8\n",
                                 lock->l_ast_data, lli->lli_inode_magic);
+                        inode = NULL;
                  }
          }
          l_unlock(&lock->l_resource->lr_namespace->ns_lock);
@@ -923,6 +940,17 @@ void ll_clear_inode(struct inode *inode)
          clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags));
          md_change_cbdata(sbi->ll_md_exp, &id, null_if_equal, inode);
  
+        LASSERT(!lli->lli_open_fd_write_count);
+        LASSERT(!lli->lli_open_fd_read_count);
+        LASSERT(!lli->lli_open_fd_exec_count);
+
+        if (lli->lli_mds_write_och)
+                ll_md_real_close(sbi->ll_md_exp, inode, FMODE_WRITE);
+        if (lli->lli_mds_exec_och)
+                ll_md_real_close(sbi->ll_md_exp, inode, FMODE_EXEC);
+        if (lli->lli_mds_read_och)
+                ll_md_real_close(sbi->ll_md_exp, inode, FMODE_READ);
+
          if (lli->lli_smd)
                  obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd,
                                    null_if_equal, inode);
@@ -943,6 +971,7 @@ void ll_clear_inode(struct inode *inode)
                           strlen(lli->lli_symlink_name) + 1);
                  lli->lli_symlink_name = NULL;
          }
+        lli->lli_inode_magic = LLI_INODE_DEAD;
  
          EXIT;
  }
@@ -1015,6 +1044,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
           * inode ourselves so we can call obdo_from_inode() always. */
          if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN /*| ATTR_RAW*/) : ~0)) {
                  struct lustre_md md;
+                int save_valid;
  
                  OBD_ALLOC(op_data, sizeof(*op_data));
                  if (op_data == NULL)
@@ -1038,9 +1068,16 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                          RETURN(rc);
                  }
  
-                /* Won't invoke vmtruncate as we already cleared ATTR_SIZE,
-                 * but needed to set timestamps backwards on utime. */
+                /* We call inode_setattr to adjust timestamps, but we first
+                 * clear ATTR_SIZE to avoid invoking vmtruncate.
+                 *
+                 * NB: ATTR_SIZE will only be set at this point if the size
+                 * resides on the MDS, ie, this file has no objects. */
+                save_valid = attr->ia_valid;
+                attr->ia_valid &= ~ATTR_SIZE;
                  inode_setattr(inode, attr);
+                attr->ia_valid = save_valid;
+                 
                  ll_update_inode(inode, &md);
                  ptlrpc_req_finished(request);
  
@@ -1081,6 +1118,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                  ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
                                                             OBD_OBJECT_EOF } };
                  struct lustre_handle lockh = { 0 };
+                struct ll_inode_info *lli = ll_i2info(inode);
                  int err, ast_flags = 0;
                  /* XXX when we fix the AST intents to pass the discard-range
                   * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
@@ -1088,38 +1126,20 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                  if (attr->ia_size == 0)
                          ast_flags = LDLM_AST_DISCARD_DATA;
  
-                /* bug 1639: avoid write/truncate i_sem/DLM deadlock */
-                LASSERT(atomic_read(&inode->i_sem.count) <= 0);
-                up(&inode->i_sem);
-                UP_WRITE_I_ALLOC_SEM(inode);
                  rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh,
                                      ast_flags, &ll_i2sbi(inode)->ll_seek_stime);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-                DOWN_WRITE_I_ALLOC_SEM(inode);
-                down(&inode->i_sem);
-#else
-                down(&inode->i_sem);
-                DOWN_WRITE_I_ALLOC_SEM(inode);
-#endif
+
                  if (rc != 0)
                          RETURN(rc);
  
+                down(&lli->lli_size_sem);
                  rc = vmtruncate(inode, attr->ia_size);
+                if (rc != 0) {
+                        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
+                        up(&lli->lli_size_sem);
+                }
  
-                /* We need to drop the semaphore here, because this unlock may
-                 * result in a cancellation, which will need the i_sem */
-                up(&inode->i_sem);
-                UP_WRITE_I_ALLOC_SEM(inode);
-                /* unlock now as we don't mind others file lockers racing with
-                 * the mds updates below? */
                  err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-                DOWN_WRITE_I_ALLOC_SEM(inode);
-                down(&inode->i_sem);
-#else
-                down(&inode->i_sem);
-                DOWN_WRITE_I_ALLOC_SEM(inode);
-#endif
                  if (err) {
                          CERROR("ll_extent_unlock failed: %d\n", err);
                          if (!rc)
@@ -1164,7 +1184,7 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
  
          rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age);
          if (rc) {
-                CERROR("mdc_statfs fails: rc = %d\n", rc);
+                CERROR("obd_statfs fails: rc = %d\n", rc);
                  RETURN(rc);
          }
  
@@ -1244,8 +1264,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
          ENTRY;
  
          LASSERT((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-        LASSERT((mea != NULL) == ((body->valid & OBD_MD_FLDIREA) != 0));
  
+        if (md->lsm && md->lsm->lsm_magic != LOV_MAGIC) {
+                /* check for default striping info for dir. */
+                LASSERT((mea != NULL) == ((body->valid & OBD_MD_FLDIREA) != 0));
+        }
+        
          if (lsm != NULL) {
                  LASSERT(lsm->lsm_object_gr > 0);
                  if (lli->lli_smd == NULL) {
@@ -1575,8 +1599,8 @@ int ll_iocontrol(struct inode *inode, struct file *file,
  void ll_umount_begin(struct super_block *sb)
  {
          struct ll_sb_info *sbi = ll_s2sbi(sb);
-        struct obd_device *obd;
          struct obd_ioctl_data ioc_data = { 0 };
+        struct obd_device *obd;
          ENTRY;
       
          CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c

index ff3eefb..26bac6c 100644 (file)
--- a/lustre/llite/llite_mmap.c
+++ b/lustre/llite/llite_mmap.c
@@ -91,6 +91,10 @@ struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
  
  int lt_compare(struct ll_lock_tree_node *one, struct ll_lock_tree_node *two)
  {
+        /* XXX remove this assert when we really want to use this function
+         * to compare different file's region */
+        LASSERT(one->lt_oid == two->lt_oid);
+
          if ( one->lt_oid < two->lt_oid)
                  return -1;
          if ( one->lt_oid > two->lt_oid)
@@ -212,6 +216,8 @@ int ll_tree_lock(struct ll_lock_tree *tree,
          if (first_node != NULL)
                  lt_insert(tree, first_node);
  
+        /* order locking. what we have to concern about is ONLY double lock:
+         * the buffer is mapped to exactly this file. */
          if (mapping_mapped(inode->i_mapping)) {
                  rc = lt_get_mmap_locks(tree, inode, (unsigned long)buf, count);
                  if (rc)
@@ -259,7 +265,9 @@ static void policy_from_vma(ldlm_policy_data_t *policy,
          policy->l_extent.end = (policy->l_extent.start + count - 1) |
                                 (PAGE_CACHE_SIZE - 1);
  }
-static struct vm_area_struct * our_vma(unsigned long addr, size_t count)
+
+static struct vm_area_struct *our_vma(unsigned long addr, size_t count,
+                                       struct inode *inode)
  {
          struct mm_struct *mm = current->mm;
          struct vm_area_struct *vma, *ret = NULL;
@@ -268,7 +276,8 @@ static struct vm_area_struct * our_vma(unsigned long addr, size_t count)
          spin_lock(&mm->page_table_lock);
          for(vma = find_vma(mm, addr);
              vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
-                if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage) {
+                if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
+                    vma->vm_file && vma->vm_file->f_dentry->d_inode == inode) {
                          ret = vma;
                          break;
                  }
@@ -292,7 +301,7 @@ int lt_get_mmap_locks(struct ll_lock_tree *tree, struct inode *inode,
          count += addr & (PAGE_SIZE - 1);
          addr -= addr & (PAGE_SIZE - 1);
  
-        while ((vma = our_vma(addr, count)) != NULL) {
+        while ((vma = our_vma(addr, count, inode)) != NULL) {
  
                  policy_from_vma(&policy, vma, addr, count);
                  node = ll_node_from_inode(inode, policy.l_extent.start,
@@ -360,7 +369,7 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
          mode = mode_from_vma(vma);
          stime = (mode & LCK_PW) ? &ll_i2sbi(inode)->ll_write_stime :
                                    &ll_i2sbi(inode)->ll_read_stime;
-
+        
          rc = ll_extent_lock(fd, inode, ll_i2info(inode)->lli_smd, mode, &policy,
                              &lockh, LDLM_FL_CBPENDING, stime);
          if (rc != 0)
@@ -404,13 +413,14 @@ static inline unsigned long file_to_user(struct vm_area_struct *vma,
  {
          return vma->vm_start +
                 (byte - ((__u64)vma->vm_pgoff << PAGE_CACHE_SHIFT));
-
  }
  
  #define VMA_DEBUG(vma, fmt, arg...)                                          \
-        CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld) inode(%p): "  \
-               fmt, vma, vma->vm_start, vma->vm_end, vma->vm_pgoff,          \
-               vma->vm_file->f_dentry->d_inode, ## arg);
+        CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld) inode(%p) "   \
+               "ino(%lu) iname(%s): " fmt, vma, vma->vm_start, vma->vm_end,  \
+               vma->vm_pgoff, vma->vm_file->f_dentry->d_inode,               \
+               vma->vm_file->f_dentry->d_inode->i_ino,                       \
+               vma->vm_file->f_dentry->d_iname, ## arg);                     \
  
  #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
  /* [first, last] are the byte offsets affected.
@@ -422,20 +432,32 @@ static void teardown_vmas(struct vm_area_struct *vma, __u64 first,
  {
          unsigned long address, len;
          for (; vma ; vma = vma->vm_next_share) {
-                if (last >> PAGE_CACHE_SHIFT < vma->vm_pgoff)
+                if (last >> PAGE_SHIFT < vma->vm_pgoff)
                          continue;
                  if (first >> PAGE_CACHE_SHIFT > (vma->vm_pgoff +
                      ((vma->vm_end - vma->vm_start) >> PAGE_CACHE_SHIFT)))
                          continue;
  
-                address = max((unsigned long)vma->vm_start,
+                /* XXX in case of unmap the cow pages of a running file,
+                 * don't unmap these private writeable mapping here!
+                 * though that will break private mappping a little.
+                 *
+                 * the clean way is to check the mapping of every page
+                 * and just unmap the non-cow pages, just like
+                 * unmap_mapping_range() with even_cow=0 in kernel 2.6.
+                 */
+                if (!(vma->vm_flags & VM_SHARED) &&
+                    (vma->vm_flags & VM_WRITE))
+                        continue;
+
+                address = max((unsigned long)vma->vm_start, 
                                file_to_user(vma, first));
                  len = min((unsigned long)vma->vm_end,
                            file_to_user(vma, last) + 1) - address;
  
-                VMA_DEBUG(vma, "zapping vma [address=%ld len=%ld]\n",
-                          address, len);
-                LASSERT(vma->vm_mm);
+                VMA_DEBUG(vma, "zapping vma [first="LPU64" last="LPU64" "
+                          "address=%ld len=%ld]\n", first, last, address, len);
+                LASSERT(len > 0);
                  ll_zap_page_range(vma, address, len);
          }
  }
@@ -449,11 +471,12 @@ int ll_teardown_mmaps(struct address_space *mapping, __u64 first,
          int rc = -ENOENT;
          ENTRY;
  
+        LASSERT(last > first);
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
          if (mapping_mapped(mapping)) {
                  rc = 0;
                  unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
-                                    last - first + 1, 1);
+                                    last - first + 1, 0);
          }
  #else
          spin_lock(&mapping->i_shared_lock);
diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c

index 2d35405..6b2c3c8 100644 (file)
--- a/lustre/llite/llite_nfs.c
+++ b/lustre/llite/llite_nfs.c
@@ -37,10 +37,25 @@ __u32 get_uuid2int(const char *name, int len)
          return (key0 << 1);
  }
  
-static struct inode *search_inode_for_lustre(struct super_block *sb,
-                                             unsigned long ino,
-                                             unsigned long generation,
-                                             int mode)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+static int ll_nfs_test_inode(struct inode *inode, unsigned long ino, 
+                            void *opaque)
+#else
+static int ll_nfs_test_inode(struct inode *inode, void *opaque)
+#endif
+{
+        struct lustre_id *iid = opaque;
+
+        if (inode->i_ino == id_ino(iid) && 
+           inode->i_generation == id_gen(iid))
+                return 1;
+
+        return 0;
+}
+static struct inode * search_inode_for_lustre(struct super_block *sb,
+                                              unsigned long ino,
+                                              unsigned long generation,
+                                              int mode)
  {
          struct ptlrpc_request *req = NULL;
          struct ll_sb_info *sbi = ll_s2sbi(sb);
@@ -48,8 +63,11 @@ static struct inode *search_inode_for_lustre(struct super_block *sb,
          __u64 valid = 0;
          int eadatalen = 0, rc;
          struct inode *inode = NULL;
-
-        inode = ILOOKUP(sb, ino, NULL, NULL);
+        struct lustre_id iid;
+       
+       id_ino(&iid) = (__u64)ino;
+       id_gen(&iid) = generation;
+        inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid);
  
          if (inode)
                  return inode;
@@ -95,20 +113,17 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
          if (IS_ERR(inode)) {
                  return ERR_PTR(PTR_ERR(inode));
          }
-        if (is_bad_inode(inode) 
-            || (generation && inode->i_generation != generation)
-            ){
+        if (is_bad_inode(inode) ||
+            (generation && inode->i_generation != generation)){
                  /* we didn't find the right inode.. */
-              CERROR(" Inode %lu, Bad count: %lu %d or version  %u %u\n",
-                        inode->i_ino, 
-                        (unsigned long)inode->i_nlink, 
-                        atomic_read(&inode->i_count), 
-                        inode->i_generation, 
-                        generation);
+                CERROR(" Inode %lu, Bad count: %lu %d or version  %u %u\n",
+                       inode->i_ino, (unsigned long)inode->i_nlink,
+                       atomic_read(&inode->i_count), inode->i_generation,
+                       generation);
                  iput(inode);
                  return ERR_PTR(-ESTALE);
          }
-        
+
          /* now to find a dentry.
           * If possible, get a well-connected one
           */
diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c

index 815c1ac..e796f4d 100644 (file)
--- a/lustre/llite/lproc_llite.c
+++ b/lustre/llite/lproc_llite.c
@@ -26,6 +26,9 @@
  #include <linux/lprocfs_status.h>
  #include <linux/seq_file.h>
  #include <linux/obd_support.h>
+#ifdef HAVE_MM_INLINE
+#include <linux/mm_inline.h>
+#endif
  
  #include "llite_internal.h"
  
@@ -642,8 +645,8 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
          /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
           * it in our own state */
          if (dummy_llap->llap_magic == 0) {
-                seq_printf(seq, "generation | llap .cookie | page ");
-                seq_printf(seq, "inode .index [ page flags ]\n");
+                seq_printf(seq, "generation | llap cookie origin | page ");
+                seq_printf(seq, "inode index count [ page flags ]\n");
                  return 0;
          }
  
@@ -653,11 +656,23 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
          if (llap != NULL)  {
                  int has_flags = 0;
                  struct page *page = llap->llap_page;
-
-                seq_printf(seq, "%lu | %p %p | %p %p %lu [",
+                static char *origins[] = {
+                        [LLAP_ORIGIN_UNKNOWN] = "--",
+                        [LLAP_ORIGIN_READPAGE] = "rp",
+                        [LLAP_ORIGIN_READAHEAD] = "ra",
+                        [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
+                        [LLAP_ORIGIN_WRITEPAGE] = "wp",
+                };
+
+                LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n",
+                         llap->llap_origin);
+
+                seq_printf(seq, "%lu | %p %p %s | %p %p %lu %u [",
                                  sbi->ll_pglist_gen,
                                  llap, llap->llap_cookie,
-                                page, page->mapping->host, page->index);
+                                origins[llap->llap_origin],
+                                page, page->mapping->host, page->index,
+                                page_count(page));
                  seq_page_flag(seq, page, locked, has_flags);
                  seq_page_flag(seq, page, error, has_flags);
                  seq_page_flag(seq, page, referenced, has_flags);
@@ -814,7 +829,7 @@ static int ll_ra_stats_seq_show(struct seq_file *seq, void *v)
  
          spin_lock(&sbi->ll_lock);
  
-        seq_printf(seq, "snapshot_time:         %lu:%lu (secs:usecs)\n",
+        seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
                     now.tv_sec, now.tv_usec);
          seq_printf(seq, "pending issued pages:           %lu\n",
                     ra->ra_cur_pages);
@@ -898,7 +913,7 @@ static int llite_wait_times_seq_show(struct seq_file *seq, void *v)
  
          spin_lock(&sbi->ll_lock);
  
-        seq_printf(seq, "snapshot_time:         %lu:%lu (secs:usecs)\n\n",
+        seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n\n",
                     now.tv_sec, now.tv_usec);
  
          seq_printf(seq, "lock wait times: (num, average ms)\n");
diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c

index d291096..f8f9a17 100644 (file)
--- a/lustre/llite/namei.c
+++ b/lustre/llite/namei.c
@@ -183,6 +183,31 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                                     (unsigned long)id_group(&li->lli_id));
                  }
  
+                if (bits & MDS_INODELOCK_OPEN) {
+                        int flags = 0;
+                        switch (lock->l_req_mode) {
+                        case LCK_CW:
+                                flags = FMODE_WRITE;
+                                break;
+                        case LCK_PR:
+                                flags = FMODE_EXEC;
+                                break;
+                        case LCK_CR:
+                                flags = FMODE_READ;
+                                break;
+                        default:
+                                CERROR("Unexpected lock mode for OPEN lock "
+                                       "%d, inode %ld\n", lock->l_req_mode,
+                                       inode->i_ino);
+                        }
+                        ll_md_real_close(ll_i2mdexp(inode), inode, flags);
+                }
+
+                if (bits & MDS_INODELOCK_UPDATE)
+                        clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
+                                  &(ll_i2info(inode)->lli_flags));
+
+
                  /* If lookup lock is cancelled, we just drop the dentry and
                     this will cause us to reget data from MDS when we'd want to
                     access this dentry/inode again. If this is lock on
@@ -340,9 +365,12 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
          int rc, orig_it;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
-               dentry->d_name.name, parent->i_ino, parent->i_generation,
-               parent, LL_IT2STR(it));
+        if (dentry->d_name.len > EXT3_NAME_LEN)
+                RETURN(ERR_PTR(-ENAMETOOLONG));
+
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
+               dentry->d_name.len, dentry->d_name.name, parent->i_ino,
+               parent->i_generation, parent, LL_IT2STR(it));
  
          if (d_mountpoint(dentry))
                  CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
@@ -481,9 +509,9 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
          int rc = 0;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
-               dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
-               LL_IT2STR(it));
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
+               dentry->d_name.len, dentry->d_name.name, dir->i_ino,
+               dir->i_generation, dir, LL_IT2STR(it));
  
          rc = it_open_error(DISP_OPEN_CREATE, it);
          if (rc)
@@ -528,15 +556,13 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
  {
          struct ptlrpc_request *request = NULL;
          struct inode *dir = nd->dentry->d_inode;
-        const char *name = nd->last.name;
-        int len = nd->last.len;
          struct ll_sb_info *sbi = ll_i2sbi(dir);
          struct mdc_op_data *op_data;
          int err = -EMLINK;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
-               name, dir->i_ino, dir->i_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
  
          mode &= ~current->fs->umask;
  
@@ -551,7 +577,8 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
                  OBD_ALLOC(op_data, sizeof(*op_data));
                  if (op_data == NULL)
                          RETURN(-ENOMEM);
-                ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
+                ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, 
+                                   nd->last.len, 0);
                  err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
                                  current->fsuid, current->fsgid, rdev,
                                  &request);
@@ -569,20 +596,19 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
          RETURN(err);
  }
  
-static int ll_mknod(struct inode *dir, struct dentry *child,
+static int ll_mknod(struct inode *dir, struct dentry *dchild,
                      int mode, ll_dev_t rdev)
  {
          struct ptlrpc_request *request = NULL;
          struct inode *inode = NULL;
-        const char *name = child->d_name.name;
-        int len = child->d_name.len;
          struct ll_sb_info *sbi = ll_i2sbi(dir);
          struct mdc_op_data *op_data;
          int err = -EMLINK;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
-               name, dir->i_ino, dir->i_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+               dchild->d_name.len, dchild->d_name.name,
+               dir->i_ino, dir->i_generation, dir);
  
          mode &= ~current->fs->umask;
  
@@ -597,7 +623,8 @@ static int ll_mknod(struct inode *dir, struct dentry *child,
                  OBD_ALLOC(op_data, sizeof(*op_data));
                  if (op_data == NULL)
                          RETURN(-ENOMEM);
-                ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
+                ll_prepare_mdc_data(op_data, dir, NULL, dchild->d_name.name, 
+                                   dchild->d_name.len, 0);
                  err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
                                  current->fsuid, current->fsgid, rdev,
                                  &request);
@@ -606,9 +633,8 @@ static int ll_mknod(struct inode *dir, struct dentry *child,
                          GOTO(out_err, err);
  
                  ll_update_times(request, 0, dir);
-                
                  err = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
-                                    &inode, request, 0, child->d_sb);
+                                    &inode, request, 0, dchild->d_sb);
                  if (err)
                          GOTO(out_err, err);
                  break;
@@ -619,7 +645,7 @@ static int ll_mknod(struct inode *dir, struct dentry *child,
                  RETURN(-EINVAL);
          }
  
-        d_instantiate(child, inode);
+        d_instantiate(dchild, inode);
          EXIT;
   out_err:
          ptlrpc_req_finished(request);
@@ -629,17 +655,21 @@ static int ll_mknod(struct inode *dir, struct dentry *child,
  static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
  {
          struct inode *dir = nd->dentry->d_inode;
-        const char *name = nd->last.name;
-        int len = nd->last.len;
          struct ptlrpc_request *request = NULL;
          struct ll_sb_info *sbi = ll_i2sbi(dir);
+        const char *name = nd->last.name;
          struct mdc_op_data *op_data;
+        int len = nd->last.len;
          int err = -EMLINK;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),target=%s\n",
-               name, dir->i_ino, dir->i_generation, dir, tgt);
-        
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),target=%s\n",
+               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation,
+               dir, tgt);
+
+        if (dir->i_nlink >= EXT3_LINK_MAX)
+                RETURN(err);
+
          OBD_ALLOC(op_data, sizeof(*op_data));
          if (op_data == NULL)
                  RETURN(-ENOMEM);
@@ -660,22 +690,22 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
  {
          struct inode *src = srcnd->dentry->d_inode;
          struct inode *dir = tgtnd->dentry->d_inode;
-        const char *name = tgtnd->last.name;
-        int len = tgtnd->last.len;
          struct ptlrpc_request *request = NULL;
          struct mdc_op_data *op_data;
          int err;
          struct ll_sb_info *sbi = ll_i2sbi(dir);
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s\n",
-               src->i_ino, src->i_generation, src, dir->i_ino, dir->i_generation,
-               dir, name);
+        CDEBUG(D_VFSTRACE,
+               "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n",
+               src->i_ino, src->i_generation, src, dir->i_ino,
+               dir->i_generation, dir, tgtnd->last.len, tgtnd->last.name);
  
          OBD_ALLOC(op_data, sizeof(*op_data));
          if (op_data == NULL)
                  RETURN(-ENOMEM);
-        ll_prepare_mdc_data(op_data, src, dir, name, len, 0);
+        ll_prepare_mdc_data(op_data, src, dir, tgtnd->last.name, 
+                            tgtnd->last.len, 0);
          err = md_link(sbi->ll_md_exp, op_data, &request);
          OBD_FREE(op_data, sizeof(*op_data));
          if (err == 0)
@@ -688,21 +718,20 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
  static int ll_mkdir_raw(struct nameidata *nd, int mode)
  {
          struct inode *dir = nd->dentry->d_inode;
-        const char *name = nd->last.name;
-        int len = nd->last.len;
          struct ptlrpc_request *request = NULL;
          struct ll_sb_info *sbi = ll_i2sbi(dir);
          struct mdc_op_data *op_data;
          int err = -EMLINK;
          ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
-               name, dir->i_ino, dir->i_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
  
          mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
          OBD_ALLOC(op_data, sizeof(*op_data));
          if (op_data == NULL)
                  RETURN(-ENOMEM);
-        ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
+        ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, 
+                            nd->last.len, 0);
          err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
                          current->fsuid, current->fsgid, 0, &request);
          OBD_FREE(op_data, sizeof(*op_data));
@@ -715,19 +744,19 @@ static int ll_mkdir_raw(struct nameidata *nd, int mode)
  static int ll_rmdir_raw(struct nameidata *nd)
  {
          struct inode *dir = nd->dentry->d_inode;
-        const char *name = nd->last.name;
-        int len = nd->last.len;
          struct ptlrpc_request *request = NULL;
          struct mdc_op_data *op_data;
          int rc;
+
          ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
-               name, dir->i_ino, dir->i_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
  
          OBD_ALLOC(op_data, sizeof(*op_data));
          if (op_data == NULL)
                  RETURN(-ENOMEM);
-        ll_prepare_mdc_data(op_data, dir, NULL, name, len, S_IFDIR);
+        ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, 
+                            nd->last.len, S_IFDIR);
          rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
          OBD_FREE(op_data, sizeof(*op_data));
          if (rc == 0)
@@ -758,10 +787,12 @@ int ll_objects_destroy(struct ptlrpc_request *request,
                  GOTO(out, rc = -EPROTO);
          }
  
-        /* The MDS sent back the EA because we unlinked the last reference
-         * to this file. Use this EA to unlink the objects on the OST.
-         * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
-         * check it is complete and sensible. */
+        /*
+         * the MDS sent back the EA because we unlinked the last reference to
+         * this file. Use this EA to unlink the objects on the OST. It's opaque
+         * so we don't swab here; we leave it to obd_unpackmd() to check it is
+         * complete and sensible.
+         */
          eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
          LASSERT(eadata != NULL);
          if (eadata == NULL) {
@@ -820,19 +851,17 @@ int ll_objects_destroy(struct ptlrpc_request *request,
  static int ll_unlink_raw(struct nameidata *nd)
  {
          struct inode *dir = nd->dentry->d_inode;
-        const char *name = nd->last.name;
-        int len = nd->last.len;
          struct ptlrpc_request *request = NULL;
          struct mdc_op_data *op_data;
          int rc;
          ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
-               name, dir->i_ino, dir->i_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
  
          OBD_ALLOC(op_data, sizeof(*op_data));
          if (op_data == NULL)
                  RETURN(-ENOMEM);
-        ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
+        ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, nd->last.len, 0);
          rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
          OBD_FREE(op_data, sizeof(*op_data));
          if (rc)
@@ -846,29 +875,28 @@ out:
          return rc;
  }
  
-static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd)
+static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
  {
-        struct inode *src = oldnd->dentry->d_inode;
-        struct inode *tgt = newnd->dentry->d_inode;
-        const char *oldname = oldnd->last.name;
-        int oldlen  = oldnd->last.len;
-        const char *newname = newnd->last.name;
-        int newlen  = newnd->last.len;
+        struct inode *src = srcnd->dentry->d_inode;
+        struct inode *tgt = tgtnd->dentry->d_inode;
          struct ptlrpc_request *request = NULL;
          struct ll_sb_info *sbi = ll_i2sbi(src);
          struct mdc_op_data *op_data;
          int err;
          ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s, src_dir=%lu/%u(%p), newname=%s, "
-               "tgt_dir=%lu/%u(%p)\n", oldname, src->i_ino, src->i_generation,
-               src, newname, tgt->i_ino, tgt->i_generation, tgt);
+        
+        CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s,"
+               "tgt_dir=%lu/%u(%p)\n", srcnd->last.len, srcnd->last.name,
+               src->i_ino, src->i_generation, src, tgtnd->last.len,
+               tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt);
  
          OBD_ALLOC(op_data, sizeof(*op_data));
          if (op_data == NULL)
                  RETURN(-ENOMEM);
          ll_prepare_mdc_data(op_data, src, tgt, NULL, 0, 0);
-        err = md_rename(sbi->ll_md_exp, op_data, oldname, oldlen,
-                        newname, newlen, &request);
+        err = md_rename(sbi->ll_md_exp, op_data, srcnd->last.name, 
+                        srcnd->last.len, tgtnd->last.name, tgtnd->last.len, 
+                        &request);
          OBD_FREE(op_data, sizeof(*op_data));
          if (!err) {
                  ll_update_times(request, 0, src);
diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c

index d16f0af..549aec9 100644 (file)
--- a/lustre/llite/rw.c
+++ b/lustre/llite/rw.c
@@ -106,62 +106,74 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
  
  __u64 lov_merge_size(struct lov_stripe_md *lsm, int kms);
  
-/* this isn't where truncate starts.   roughly:
+/*
+ * this isn't where truncate starts.   roughly:
   * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate
- * we grab the lock back in setattr_raw to avoid races. */
+ * we grab the lock back in setattr_raw to avoid races.
+ *
+ * must be called with lli_size_sem held.
+ */
  void ll_truncate(struct inode *inode)
  {
          struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+        struct ll_inode_info *lli = ll_i2info(inode);
          struct obdo *oa = NULL;
          int rc;
          ENTRY;
  
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-               inode->i_generation, inode);
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %llu\n", inode->i_ino,
+               inode->i_generation, inode, inode->i_size);
  
          if (!lsm) {
                  CDEBUG(D_INODE, "truncate on inode %lu with no objects\n",
                         inode->i_ino);
-                EXIT;
-                return;
+                GOTO(out_unlock, 0);
          }
  
          if (lov_merge_size(lsm, 0) == inode->i_size) {
                  CDEBUG(D_VFSTRACE, "skipping punch for "LPX64" (size = %llu)\n",
                         lsm->lsm_object_id, inode->i_size);
-        } else {
-                CDEBUG(D_INFO, "calling punch for "LPX64" (new size %llu)\n",
-                       lsm->lsm_object_id, inode->i_size);
+                GOTO(out_unlock, 0);
+        }
+        
+        CDEBUG(D_INFO, "calling punch for "LPX64" (new size %llu)\n",
+               lsm->lsm_object_id, inode->i_size);
                 
-               oa = obdo_alloc();
-               if (oa == NULL) {
-                       CERROR("cannot alloc oa, error %d\n",
-                               -ENOMEM);
-                       EXIT;
-                       return;
-               }
-
-               oa->o_id = lsm->lsm_object_id;
-               oa->o_gr = lsm->lsm_object_gr;
-               oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
-               obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |
-                               OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
-               /* truncate == punch from new size to absolute end of file */
-               /* NB: obd_punch must be called with i_sem held!  It updates the kms! */
-               rc = obd_punch(ll_i2dtexp(inode), oa, lsm, inode->i_size,
-                              OBD_OBJECT_EOF, NULL);
-               if (rc)
-                       CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
-               else
-                       obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-                                     OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
-               obdo_free(oa);
-       }
+        oa = obdo_alloc();
+        if (oa == NULL) {
+                CERROR("cannot alloc oa, error %d\n",
+                       -ENOMEM);
+                EXIT;
+                return;
+        }
+
+        oa->o_id = lsm->lsm_object_id;
+        oa->o_gr = lsm->lsm_object_gr;
+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |
+                        OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+        obd_adjust_kms(ll_i2dtexp(inode), lsm, inode->i_size, 1);
+
+        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
+        up(&lli->lli_size_sem);
+        
+        rc = obd_punch(ll_i2dtexp(inode), oa, lsm, inode->i_size,
+                       OBD_OBJECT_EOF, NULL);
+        if (rc)
+                CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
+        else
+                obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                              OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+        obdo_free(oa);
         
          EXIT;
          return;
+        
+out_unlock:
+        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
+        up(&lli->lli_size_sem);
  } /* ll_truncate */
  
  int ll_prepare_write(struct file *file, struct page *page, unsigned from,
@@ -214,7 +226,9 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from,
          /* If are writing to a new page, no need to read old data.  The extent
           * locking will have updated the KMS, and for our purposes here we can
           * treat it like i_size. */
+        down(&lli->lli_size_sem);
          kms = lov_merge_size(lsm, 1);
+        up(&lli->lli_size_sem);
          if (kms <= offset) {
                  memset(kmap(page), 0, PAGE_SIZE);
                  kunmap(page);
@@ -238,24 +252,13 @@ out_free_oa:
          return rc;
  }
  
-struct ll_async_page *llap_from_cookie(void *cookie)
-{
-        struct ll_async_page *llap = cookie;
-        if (llap->llap_magic != LLAP_MAGIC)
-                return ERR_PTR(-EINVAL);
-        return llap;
-};
-
  static int ll_ap_make_ready(void *data, int cmd)
  {
          struct ll_async_page *llap;
          struct page *page;
          ENTRY;
  
-        llap = llap_from_cookie(data);
-        if (IS_ERR(llap))
-                RETURN(-EINVAL);
-
+        llap = LLAP_FROM_COOKIE(data);
          page = llap->llap_page;
  
          LASSERT(cmd != OBD_BRW_READ);
@@ -303,10 +306,7 @@ static int ll_ap_refresh_count(void *data, int cmd)
          /* readpage queues with _COUNT_STABLE, shouldn't get here. */
          LASSERT(cmd != OBD_BRW_READ);
  
-        llap = llap_from_cookie(data);
-        if (IS_ERR(llap))
-                RETURN(PTR_ERR(llap));
-
+        llap = LLAP_FROM_COOKIE(data);
          page = llap->llap_page;
          lsm = ll_i2info(page->mapping->host)->lli_smd;
          kms = lov_merge_size(lsm, 1);
@@ -351,12 +351,7 @@ static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
          struct ll_async_page *llap;
          ENTRY;
  
-        llap = llap_from_cookie(data);
-        if (IS_ERR(llap)) {
-                EXIT;
-                return;
-        }
-
+        llap = LLAP_FROM_COOKIE(data);
          ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa);
          EXIT;
  }
@@ -368,6 +363,7 @@ static struct obd_async_page_ops ll_async_page_ops = {
          .ap_completion =        ll_ap_completion,
  };
  
+
  struct ll_async_page *llap_cast_private(struct page *page)
  {
          struct ll_async_page *llap = (struct ll_async_page *)page->private;
@@ -380,7 +376,7 @@ struct ll_async_page *llap_cast_private(struct page *page)
  }
  
  /* XXX have the exp be an argument? */
-struct ll_async_page *llap_from_page(struct page *page)
+struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
  {
          struct ll_async_page *llap;
          struct obd_export *exp;
@@ -389,9 +385,11 @@ struct ll_async_page *llap_from_page(struct page *page)
          int rc;
          ENTRY;
  
+        LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
+
          llap = llap_cast_private(page);
          if (llap != NULL)
-                RETURN(llap);
+                GOTO(out, llap);
  
          exp = ll_i2dtexp(page->mapping->host);
          if (exp == NULL)
@@ -420,6 +418,8 @@ struct ll_async_page *llap_from_page(struct page *page)
          list_add_tail(&llap->llap_proc_item, &sbi->ll_pglist);
          spin_unlock(&sbi->ll_lock);
  
+out:
+        llap->llap_origin = origin;
          RETURN(llap);
  }
  
@@ -475,9 +475,6 @@ out:
          return rc;
  }
  
-void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
-                      obd_off size);
-
  /* be careful not to return success without setting the page Uptodate or
   * the next pass through prepare_write will read in stale data from disk. */
  int ll_commit_write(struct file *file, struct page *page, unsigned from,
@@ -499,22 +496,22 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from,
          CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
                 inode, page, from, to, page->index);
  
-        llap = llap_from_page(page);
+        llap = llap_from_page(page, LLAP_ORIGIN_COMMIT_WRITE);
          if (IS_ERR(llap))
                  RETURN(PTR_ERR(llap));
  
+        exp = ll_i2dtexp(inode);
+        if (exp == NULL)
+                RETURN(-EINVAL);
+
          /* queue a write for some time in the future the first time we
           * dirty the page */
          if (!PageDirty(page)) {
                  lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
                                       LPROC_LL_DIRTY_MISSES);
  
-                exp = ll_i2dtexp(inode);
-                if (exp == NULL)
-                        RETURN(-EINVAL);
-
-                rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, llap,
-                                         to, 0);
+                rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, 
+                                         llap, to, 0);
                  if (rc)
                          GOTO(out, rc);
          } else {
@@ -529,16 +526,47 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from,
                  set_page_dirty(page);
          EXIT;
  out:
+        size = (((obd_off)page->index) << PAGE_SHIFT) + to;
+        down(&lli->lli_size_sem);
          if (rc == 0) {
-                size = (((obd_off)page->index) << PAGE_SHIFT) + to;
-                lov_increase_kms(ll_i2dtexp(inode), lsm, size);
+                obd_adjust_kms(exp, lsm, size, 0);
                  if (size > inode->i_size)
                          inode->i_size = size;
                  SetPageUptodate(page);
+        } else if (size > inode->i_size) {
+                /* this page beyond the pales of i_size, so it can't be
+                 * truncated in ll_p_r_e during lock revoking. we must
+                 * teardown our book-keeping here. */
+                ll_removepage(page);
          }
+        up(&lli->lli_size_sem);
          return rc;
  }
-                                                                                                                                                                                                     
+
+static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
+{
+        struct ll_ra_info *ra = &sbi->ll_ra_info;
+        unsigned long ret;
+        ENTRY;
+
+        spin_lock(&sbi->ll_lock);
+        ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
+        ra->ra_cur_pages += ret;
+        spin_unlock(&sbi->ll_lock);
+
+        RETURN(ret);
+}
+
+static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
+{
+        struct ll_ra_info *ra = &sbi->ll_ra_info;
+        spin_lock(&sbi->ll_lock);
+        LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
+                 ra->ra_cur_pages, len);
+        ra->ra_cur_pages -= len;
+        spin_unlock(&sbi->ll_lock);
+}
+
  int ll_writepage(struct page *page)
  {
          struct inode *inode = page->mapping->host;
@@ -554,7 +582,7 @@ int ll_writepage(struct page *page)
          if (exp == NULL)
                  GOTO(out, rc = -EINVAL);
  
-        llap = llap_from_page(page);
+        llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
          if (IS_ERR(llap))
                  GOTO(out, rc = PTR_ERR(llap));
  
@@ -578,31 +606,6 @@ out:
          return rc;
  }
  
-static unsigned long
-ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
-{
-        struct ll_ra_info *ra = &sbi->ll_ra_info;
-        unsigned long ret;
-        ENTRY;
-
-        spin_lock(&sbi->ll_lock);
-        ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
-        ra->ra_cur_pages += ret;
-        spin_unlock(&sbi->ll_lock);
-
-        RETURN(ret);
-}
-
-static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
-{
-        struct ll_ra_info *ra = &sbi->ll_ra_info;
-        spin_lock(&sbi->ll_lock);
-        LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
-                 ra->ra_cur_pages, len);
-        ra->ra_cur_pages -= len;
-        spin_unlock(&sbi->ll_lock);
-}
-
  /* called for each page in a completed rpc.*/
  void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
  {
@@ -610,12 +613,7 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
          struct page *page;
          ENTRY;
  
-        llap = llap_from_cookie(data);
-        if (IS_ERR(llap)) {
-                EXIT;
-                return;
-        }
-
+        llap = LLAP_FROM_COOKIE(data);
          page = llap->llap_page;
          LASSERT(PageLocked(page));
  
@@ -683,7 +681,7 @@ void ll_removepage(struct page *page)
                  return;
          }
  
-        llap = llap_from_page(page);
+        llap = llap_from_page(page, 0);
          if (IS_ERR(llap)) {
                  CERROR("page %p ind %lu couldn't find llap: %ld\n", page,
                         page->index, PTR_ERR(llap));
@@ -773,7 +771,7 @@ void ll_ra_accounting(struct page *page, struct address_space *mapping)
  {
          struct ll_async_page *llap;
  
-        llap = llap_from_page(page);
+        llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
          if (IS_ERR(llap))
                  return;
  
@@ -851,7 +849,7 @@ static int ll_readahead(struct ll_readahead_state *ras,
                  
                  /* we do this first so that we can see the page in the /proc
                   * accounting */
-                llap = llap_from_page(page);
+                llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
                  if (IS_ERR(llap) || llap->llap_defer_uptodate)
                          goto next_page;
  
@@ -1031,7 +1029,7 @@ int ll_readpage(struct file *filp, struct page *page)
          if (exp == NULL)
                  GOTO(out, rc = -EINVAL);
  
-        llap = llap_from_page(page);
+        llap = llap_from_page(page, LLAP_ORIGIN_READPAGE);
          if (IS_ERR(llap))
                  GOTO(out, rc = PTR_ERR(llap));
  
diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c

index 4ac9cad..40c915f 100644 (file)
--- a/lustre/llite/rw24.c
+++ b/lustre/llite/rw24.c
@@ -125,17 +125,10 @@ static int ll_direct_IO_24(int rw, struct inode *inode, struct kiobuf *iobuf,
                          CERROR("error from callback: rc = %d\n", rc);
          }
          ptlrpc_set_destroy(set);
-        if (rc == 0 && rw == WRITE) {
-                void lov_increase_kms(struct obd_export *,
-                                      struct lov_stripe_md *, obd_off size);
-                obd_off size = offset + length;
-                lov_increase_kms(ll_i2dtexp(inode), lsm, size);
-                if (size > inode->i_size)
-                        inode->i_size = size;
-        }
          if (rc == 0) {
                  rc = iobuf->length;
-                obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
+                if (rw == WRITE)
+                        obd_adjust_kms(ll_i2dtexp(inode), lsm, offset, 0);
          }
          obdo_free(oa);
          EXIT;
@@ -144,6 +137,13 @@ out_free_pga:
          return rc;
  }
  
+#ifdef KERNEL_HAS_AS_MAX_READAHEAD
+static int ll_max_readahead(struct inode *inode)
+{
+        return 0;
+}
+#endif
+
  struct address_space_operations ll_aops = {
          .readpage       = ll_readpage,
          .direct_IO      = ll_direct_IO_24,
@@ -152,5 +152,8 @@ struct address_space_operations ll_aops = {
          .commit_write   = ll_commit_write,
          .removepage     = ll_removepage,
          .sync_page      = NULL,
-        .bmap           = NULL
+        .bmap           = NULL,
+#ifdef KERNEL_HAS_AS_MAX_READAHEAD
+        .max_readahead  = ll_max_readahead,
+#endif
  };
diff --git a/lustre/llite/special.c b/lustre/llite/special.c

index befc716..33401fc 100644 (file)
--- a/lustre/llite/special.c
+++ b/lustre/llite/special.c
@@ -283,11 +283,22 @@ static int ll_special_open(struct inode *inode, struct file *filp)
  {
          struct file_operations **pfop = get_save_fops(filp, INODE_OPS);
          struct file_operations *sfops = filp->f_op;
+        struct ll_inode_info *lli = ll_i2info(inode);
          struct ptlrpc_request *req;
          struct lookup_intent *it;
          int rc = -EINVAL, err;
+        struct obd_client_handle **och_p;
+        __u64 *och_usecount;
          ENTRY;
  
+        it = filp->f_it;
+
+        if (LUSTRE_IT(it)->it_disposition) {
+                err = it_open_error(DISP_OPEN_OPEN, it);
+                if (err)
+                        RETURN(err);
+        }
+
          if (pfop && *pfop) {
                  /* mostly we will have @def_blk_fops here and it is not in a
                   * module but we do this just to be sure. */
@@ -303,11 +314,54 @@ static int ll_special_open(struct inode *inode, struct file *filp)
                  }
          }
  
+        /* Let's see if we have file open on MDS already. */
+        if (it->it_flags & FMODE_WRITE) {
+                och_p = &lli->lli_mds_write_och;
+                och_usecount = &lli->lli_open_fd_write_count;
+        } else if (it->it_flags & FMODE_EXEC) {
+                och_p = &lli->lli_mds_exec_och;
+                och_usecount = &lli->lli_open_fd_exec_count;
+         } else {
+                och_p = &lli->lli_mds_read_och;
+                och_usecount = &lli->lli_open_fd_read_count;
+        }
+
          lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
  
-        it = filp->f_it;
+        down(&lli->lli_och_sem);
+        if (*och_p) { /* Open handle is present */
+                if (LUSTRE_IT(it)->it_disposition) {
+                        struct obd_client_handle *och;
+                        /* Well, there's extra open request that we do not need,
+                           let's close it somehow*/
+                        OBD_ALLOC(och, sizeof (struct obd_client_handle));
+                        if (!och) {
+                                /* XXX We leak open fd and open OPEN connectioni
+                                   to server here */
+                                up(&lli->lli_och_sem);
+                                RETURN(-ENOMEM);
+                        }
+                        ll_och_fill(inode, it, och);
+                        /* ll_md_och_close() will free och */
+                        ll_md_och_close(ll_i2mdexp(inode), inode, och);
+                }       
+                (*och_usecount)++;        
+
+                err = ll_local_open(filp, it, NULL);
+        } else {
+                LASSERT(*och_usecount == 0);
+                OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
+                if (!*och_p) {
+                        // XXX Same as above
+                        up(&lli->lli_och_sem);
+                        RETURN(-ENOMEM);
+                }
+                (*och_usecount)++;
+
+                err = ll_local_open(filp, it, *och_p);
+        }
+        up(&lli->lli_och_sem);
  
-        err = ll_local_open(filp, it);
          if (rc != 0) {
                  CERROR("error opening special file: rc %d\n", rc);
                  ll_md_close(ll_i2sbi(inode)->ll_md_exp, inode, filp);
diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c

index 6061f74..6d8a38b 100644 (file)
--- a/lustre/llite/symlink.c
+++ b/lustre/llite/symlink.c
@@ -55,7 +55,7 @@ static int ll_readlink_internal(struct inode *inode,
          if (rc) {
                  if (rc != -ENOENT)
                          CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
-                RETURN(rc);
+                GOTO(failed, rc);
          }
  
          body = lustre_msg_buf ((*request)->rq_repmsg, 0, sizeof (*body));
@@ -92,7 +92,7 @@ static int ll_readlink_internal(struct inode *inode,
  
   failed:
          ptlrpc_req_finished (*request);
-        RETURN (-EPROTO);
+        RETURN(rc);
  }
  
  static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
@@ -141,8 +141,11 @@ static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
          down(&lli->lli_open_sem);
          rc = ll_readlink_internal(inode, &request, &symname);
          up(&lli->lli_open_sem);
-        if (rc)
+        if (rc) {
+                path_release(nd); /* Kernel assumes that ->follow_link()
+                                     releases nameidata on error */
                  GOTO(out, rc);
+        }
  
          rc = vfs_follow_link(nd, symname);
          ptlrpc_req_finished(request);
diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c

index 86b1f97..3e4bcde 100644 (file)
--- a/lustre/lmv/lmv_obd.c
+++ b/lustre/lmv/lmv_obd.c
@@ -51,6 +51,11 @@
  #include <linux/lustre_lite.h>
  #include "lmv_internal.h"
  
+/* not defined for liblustre building */
+#if !defined(ATOMIC_INIT)
+#define ATOMIC_INIT(val) { (val) }
+#endif
+
  /* object cache. */
  kmem_cache_t *obj_cache;
  atomic_t obj_cache_count = ATOMIC_INIT(0);
@@ -859,7 +864,7 @@ int lmv_get_mea_and_update_object(struct obd_export *exp,
          md.mea = NULL;
          mealen = MEA_SIZE_LMV(lmv);
          
-        valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
+        valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA;
  
          /* time to update mea of parent id */
          rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp,
@@ -1905,6 +1910,10 @@ int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
          lsmp = (struct mea *)lsm;
          meap = (struct mea *)*lmmp;
  
+        if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR &&
+            lsmp->mea_magic != MEA_MAGIC_ALL_CHARS)
+                RETURN(-EINVAL);
+
          meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
          meap->mea_count = cpu_to_le32(lsmp->mea_count);
          meap->mea_master = cpu_to_le32(lsmp->mea_master);
@@ -1917,45 +1926,69 @@ int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
          RETURN(mea_size);
  }
  
-int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
-                 struct lov_mds_md *disk_src, int mdsize)
+int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+                 struct lov_mds_md *lmm, int lmm_size)
  {
          struct obd_device *obd = class_exp2obd(exp);
-        struct mea **tmea = (struct mea **)mem_tgt;
-        struct mea *mea = (struct mea *)disk_src;
+        struct mea **tmea = (struct mea **)lsmp;
+        struct mea *mea = (struct mea *)lmm;
          struct lmv_obd *lmv = &obd->u.lmv;
-        int mea_size, i;
+        int mea_size, i, rc = 0;
+        __u32 magic;
          ENTRY;
  
-       mea_size = sizeof(struct lustre_id) * 
+        mea_size = sizeof(struct lustre_id) * 
                  lmv->desc.ld_tgt_count + sizeof(struct mea);
-        if (mem_tgt == NULL)
+
+        if (lsmp == NULL)
                  return mea_size;
  
-        if (*mem_tgt != NULL && disk_src == NULL) {
+        if (*lsmp != NULL && lmm == NULL) {
                  OBD_FREE(*tmea, mea_size);
                  RETURN(0);
          }
  
-        LASSERT(mea_size == mdsize);
+        LASSERT(mea_size == lmm_size);
  
          OBD_ALLOC(*tmea, mea_size);
          if (*tmea == NULL)
                  RETURN(-ENOMEM);
  
-        if (!disk_src)
+        if (!lmm)
                  RETURN(mea_size);
  
-        (*tmea)->mea_magic = le32_to_cpu(mea->mea_magic);
+        if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
+            mea->mea_magic == MEA_MAGIC_ALL_CHARS)
+        {
+                magic = le32_to_cpu(mea->mea_magic);
+        } else {
+                struct mea_old *old = (struct mea_old *)lmm;
+        
+                mea_size = sizeof(struct lustre_id) * old->mea_count + 
+                        sizeof(struct mea_old);
+        
+                if (old->mea_count > 256 || old->mea_master > 256 ||
+                    lmm_size < mea_size || old->mea_master > old->mea_count) {
+                        CWARN("bad MEA: count %u, master %u, size %u\n",
+                              old->mea_count, old->mea_master, mea_size);
+                        GOTO(out_free_mea, rc = -EINVAL);
+                }
+                magic = MEA_MAGIC_LAST_CHAR;
+        }
+
+        (*tmea)->mea_magic = magic;
          (*tmea)->mea_count = le32_to_cpu(mea->mea_count);
          (*tmea)->mea_master = le32_to_cpu(mea->mea_master);
  
-        for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+        for (i = 0; i < (*tmea)->mea_count; i++) {
                  (*tmea)->mea_ids[i] = mea->mea_ids[i];
                  id_le_to_cpu(&(*tmea)->mea_ids[i]);
          }
-
          RETURN(mea_size);
+
+out_free_mea:
+        OBD_FREE(*tmea, mea_size);
+        return rc;
  }
  
  int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
diff --git a/lustre/lmv/lmv_objmgr.c b/lustre/lmv/lmv_objmgr.c

index 4c2ef10..5ed2544 100644 (file)
--- a/lustre/lmv/lmv_objmgr.c
+++ b/lustre/lmv/lmv_objmgr.c
@@ -307,7 +307,7 @@ lmv_create_obj(struct obd_export *exp, struct lustre_id *id, struct mea *mea)
                  
                  /* time to update mea of parent id */
                  md.mea = NULL;
-                valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
+                valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA;
  
                  rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp,
                                  id, valid, NULL, 0, mealen, &req);
diff --git a/lustre/lov/Makefile.in b/lustre/lov/Makefile.in

index 52dba75..aebee3e 100644 (file)
--- a/lustre/lov/Makefile.in
+++ b/lustre/lov/Makefile.in
@@ -1,4 +1,4 @@
  MODULES := lov
-lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o
+lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o lov_request.o lov_qos.o
  
  @INCLUDE_RULES@
diff --git a/lustre/lov/autoMakefile.am b/lustre/lov/autoMakefile.am

index 2847d56..e6854ec 100644 (file)
--- a/lustre/lov/autoMakefile.am
+++ b/lustre/lov/autoMakefile.am
@@ -5,7 +5,7 @@
  
  if LIBLUSTRE
  noinst_LIBRARIES = liblov.a
-liblov_a_SOURCES = lov_log.c lov_obd.c lov_pack.c lov_internal.h
+liblov_a_SOURCES = lov_log.c lov_obd.c lov_pack.c lov_request.c lov_offset.c lov_qos.c lov_merge.c lov_internal.h
  liblov_a_CPPFLAGS = $(LLCPPFLAGS)
  liblov_a_CFLAGS = $(LLCFLAGS)
  endif
diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h

index 4291f39..b6172f5 100644 (file)
--- a/lustre/lov/lov_internal.h
+++ b/lustre/lov/lov_internal.h
@@ -12,6 +12,44 @@
  
  #include <lustre/lustre_user.h>
  
+struct lov_lock_handles {
+        struct portals_handle   llh_handle;
+        atomic_t                llh_refcount;
+        int                     llh_stripe_count;
+        struct lustre_handle    llh_handles[0];
+};
+
+struct lov_request {
+        struct list_head         rq_link;
+        struct ldlm_extent       rq_extent;
+        int                      rq_idx;        /* index in lov->tgts array */
+        int                      rq_stripe;     /* stripe number */
+        int                      rq_complete;
+        int                      rq_rc;
+        int                      rq_buflen;     /* length of sub_md */
+        struct obdo             *rq_oa;
+        struct lov_stripe_md    *rq_md;
+        obd_count                rq_oabufs;
+        obd_count                rq_pgaidx;
+};
+
+struct lov_request_set {
+        atomic_t                 set_refcount;
+        struct obd_export       *set_exp;
+        int                      set_count;
+        int                      set_completes;
+        int                      set_success;
+        struct llog_cookie      *set_cookies;
+        int                      set_cookie_sent;
+        struct lov_stripe_md    *set_md;
+        struct obdo             *set_oa;
+        struct obd_trans_info   *set_oti;
+        obd_count                set_oabufs;
+        struct brw_page         *set_pga;
+        struct lov_lock_handles *set_lockh;
+        struct list_head         set_list;
+};
+
  #define LAP_MAGIC 8200
  
  #define LOV_MAX_TGT_COUNT 1024
@@ -27,10 +65,141 @@ struct lov_async_page {
          obd_off                         lap_sub_offset;
          void                            *lap_sub_cookie;
          struct obd_async_page_ops       *lap_caller_ops;
-        struct obd_async_page_ops       *lap_caller_data;
+        void                            *lap_caller_data;
          obd_id                          lap_loi_id;
  };
  
+#define LAP_FROM_COOKIE(c)                                                      \
+        (LASSERT(((struct lov_async_page *)(c))->lap_magic == LAP_MAGIC),       \
+         (struct lov_async_page *)(c))
+
+static inline void lov_llh_addref(void *llhp)
+{
+        struct lov_lock_handles *llh = llhp;
+        atomic_inc(&llh->llh_refcount);
+        CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
+               atomic_read(&llh->llh_refcount));
+}
+
+static inline struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
+{
+        struct lov_lock_handles *llh;
+
+        OBD_ALLOC(llh, sizeof *llh +
+                  sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
+        if (llh == NULL) 
+                return NULL;
+        atomic_set(&llh->llh_refcount, 2);
+        llh->llh_stripe_count = lsm->lsm_stripe_count;
+        INIT_LIST_HEAD(&llh->llh_handle.h_link);
+        class_handle_hash(&llh->llh_handle, lov_llh_addref);
+        return llh;
+}
+
+static inline struct lov_lock_handles *
+lov_handle2llh(struct lustre_handle *handle)
+{
+        LASSERT(handle != NULL);
+        return(class_handle2object(handle->cookie));
+}
+
+static inline void lov_llh_put(struct lov_lock_handles *llh)
+{
+        CDEBUG(D_INFO, "PUTting llh %p : new refcount %d\n", llh,
+               atomic_read(&llh->llh_refcount) - 1);
+        LASSERT(atomic_read(&llh->llh_refcount) > 0 &&
+                atomic_read(&llh->llh_refcount) < 0x5a5a);
+        if (atomic_dec_and_test(&llh->llh_refcount)) {
+                class_handle_unhash(&llh->llh_handle);
+                LASSERT(list_empty(&llh->llh_handle.h_link));
+                OBD_FREE(llh, sizeof *llh +
+                         sizeof(*llh->llh_handles) * llh->llh_stripe_count);
+        }
+}
+
+/* lov_merge.c */
+void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flags valid,
+                     struct lov_stripe_md *lsm, int stripeno, int *set);
+
+int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
+                   obd_off size, int shrink);
+/* lov_offset.c */
+obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size, 
+                         int stripeno);
+int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+                      int stripeno, obd_off *obd_off);
+obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size,
+                           int stripeno);
+int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
+                          obd_off start, obd_off end,
+                          obd_off *obd_start, obd_off *obd_end);
+int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off);
+
+/* lov_qos.c */
+void qos_shrink_lsm(struct lov_request_set *set);
+int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, 
+                    int newea);
+
+/* lov_request.c */
+void lov_set_add_req(struct lov_request *req, struct lov_request_set *set);
+int lov_update_common_set(struct lov_request_set *set, 
+                          struct lov_request *req, int rc);
+int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **ea, 
+                        struct obdo *src_oa, struct obd_trans_info *oti,
+                        struct lov_request_set **reqset);
+int lov_update_create_set(struct lov_request_set *set,
+                          struct lov_request *req, int rc);
+int lov_fini_create_set(struct lov_request_set *set, struct lov_stripe_md **ea);
+int lov_prep_brw_set(struct obd_export *exp, struct obdo *src_oa, 
+                     struct lov_stripe_md *lsm, obd_count oa_bufs,
+                     struct brw_page *pga, struct obd_trans_info *oti,
+                     struct lov_request_set **reqset);
+int lov_fini_brw_set(struct lov_request_set *set);
+int lov_prep_getattr_set(struct obd_export *exp, struct obdo *src_oa, 
+                         struct lov_stripe_md *lsm, 
+                         struct lov_request_set **reqset);
+int lov_fini_getattr_set(struct lov_request_set *set);
+int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa,
+                         struct lov_stripe_md *lsm, 
+                         struct obd_trans_info *oti, 
+                         struct lov_request_set **reqset);
+int lov_update_destroy_set(struct lov_request_set *set,
+                           struct lov_request *req, int rc);
+int lov_fini_destroy_set(struct lov_request_set *set);
+int lov_prep_setattr_set(struct obd_export *exp, struct obdo *src_oa,
+                         struct lov_stripe_md *lsm, struct obd_trans_info *oti,
+                         struct lov_request_set **reqset);
+int lov_fini_setattr_set(struct lov_request_set *set);
+int lov_prep_punch_set(struct obd_export *exp, struct obdo *src_oa,
+                       struct lov_stripe_md *lsm, obd_off start,
+                       obd_off end, struct obd_trans_info *oti,
+                       struct lov_request_set **reqset);
+int lov_update_punch_set(struct lov_request_set *set, struct lov_request *req,
+                         int rc);
+int lov_fini_punch_set(struct lov_request_set *set);
+int lov_prep_sync_set(struct obd_export *exp, struct obdo *src_oa,
+                      struct lov_stripe_md *lsm, obd_off start,
+                      obd_off end, struct lov_request_set **reqset);
+int lov_fini_sync_set(struct lov_request_set *set);
+int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm, 
+                         ldlm_policy_data_t *policy, __u32 mode,
+                         struct lustre_handle *lockh,
+                         struct lov_request_set **reqset);
+int lov_update_enqueue_set(struct lov_request_set *set, 
+                           struct lov_request *req, int rc, int flags);
+int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode);
+int lov_prep_match_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+                       ldlm_policy_data_t *policy, __u32 mode,
+                       struct lustre_handle *lockh,
+                       struct lov_request_set **reqset);
+int lov_update_match_set(struct lov_request_set *set, struct lov_request *req,
+                         int rc);
+int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags); 
+int lov_prep_cancel_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+                        __u32 mode, struct lustre_handle *lockh,
+                        struct lov_request_set **reqset);
+int lov_fini_cancel_set(struct lov_request_set *set);
+
  /* lov_obd.c */
  int lov_get_stripecnt(struct lov_obd *lov, int stripe_count);
  int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern);
diff --git a/lustre/lov/lov_merge.c b/lustre/lov/lov_merge.c

new file mode 100644 (file)

index 0000000..14ff9eb
--- /dev/null
+++ b/lustre/lov/lov_merge.c
@@ -0,0 +1,153 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#include <asm/div64.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/obd_lov.h>
+
+#include "lov_internal.h"
+
+/* Merge rss if kms == 0
+ *
+ * Even when merging RSS, we will take the KMS value if it's larger.
+ * This prevents getattr from stomping on dirty cached pages which
+ * extend the file size. */
+__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms)
+{
+        struct lov_oinfo *loi;
+        __u64 size = 0;
+        int i;
+
+        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+             i++, loi++) {
+                obd_size lov_size, tmpsize;
+
+                tmpsize = loi->loi_kms;
+                if (kms == 0 && loi->loi_rss > tmpsize)
+                        tmpsize = loi->loi_rss;
+
+                lov_size = lov_stripe_size(lsm, tmpsize, i);
+                if (lov_size > size)
+                        size = lov_size;
+        }
+
+        return size;
+}
+EXPORT_SYMBOL(lov_merge_size);
+
+/* Merge blocks */
+__u64 lov_merge_blocks(struct lov_stripe_md *lsm)
+{
+        struct lov_oinfo *loi;
+        __u64 blocks = 0;
+        int i;
+
+        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++)
+                blocks += loi->loi_blocks;
+        return blocks;
+}
+EXPORT_SYMBOL(lov_merge_blocks);
+
+__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time)
+{
+        struct lov_oinfo *loi;
+        int i;
+
+        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++)
+                if (loi->loi_mtime > current_time)
+                        current_time = loi->loi_mtime;
+        return current_time;
+}
+EXPORT_SYMBOL(lov_merge_mtime);
+
+int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
+                   obd_off size, int shrink)
+{
+        struct lov_oinfo *loi;
+        int stripe = 0;
+        __u64 kms;
+        ENTRY;
+
+        if (shrink) {
+                struct lov_oinfo *loi;
+                int i = 0;
+                for (loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+                     i++, loi++) {
+                        kms = lov_size_to_stripe(lsm, size, i);
+                        loi->loi_kms = loi->loi_rss = kms;
+                }
+                RETURN(0);
+        }
+
+        if (size > 0)
+                stripe = lov_stripe_number(lsm, size - 1);
+        kms = lov_size_to_stripe(lsm, size, stripe);
+        loi = &(lsm->lsm_oinfo[stripe]);
+
+        CDEBUG(D_INODE, "stripe %d KMS %sincreasing "LPU64"->"LPU64"\n",
+               stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms);
+        if (kms > loi->loi_kms)
+                loi->loi_kms = kms;
+
+        RETURN(0);
+}
+EXPORT_SYMBOL(lov_adjust_kms);
+
+void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flags valid,
+                     struct lov_stripe_md *lsm, int stripeno, int *set)
+{
+        valid &= src->o_valid;
+
+        if (*set) {
+                if (valid & OBD_MD_FLSIZE) {
+                        /* this handles sparse files properly */
+                        obd_size lov_size;
+
+                        lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
+                        if (lov_size > tgt->o_size)
+                                tgt->o_size = lov_size;
+                }
+                if (valid & OBD_MD_FLBLOCKS)
+                        tgt->o_blocks += src->o_blocks;
+                if (valid & OBD_MD_FLBLKSZ)
+                        tgt->o_blksize += src->o_blksize;
+                if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
+                        tgt->o_ctime = src->o_ctime;
+                if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
+                        tgt->o_mtime = src->o_mtime;
+        } else {
+                memcpy(tgt, src, sizeof(*tgt));
+                tgt->o_id = lsm->lsm_object_id;
+                if (valid & OBD_MD_FLSIZE)
+                        tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
+                *set = 1;
+        }
+}
diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c

index 7246a1d..218a518 100644 (file)
--- a/lustre/lov/lov_obd.c
+++ b/lustre/lov/lov_obd.c
@@ -51,68 +51,6 @@
  
  #include "lov_internal.h"
  
-static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
-                             int stripeno, obd_off *obd_off);
-
-struct lov_lock_handles {
-        struct portals_handle llh_handle;
-        atomic_t llh_refcount;
-        int llh_stripe_count;
-        struct lustre_handle llh_handles[0];
-};
-
-static void lov_llh_addref(void *llhp)
-{
-        struct lov_lock_handles *llh = llhp;
-
-        atomic_inc(&llh->llh_refcount);
-        CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
-               atomic_read(&llh->llh_refcount));
-}
-
-static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
-{
-        struct lov_lock_handles *llh;
-
-        OBD_ALLOC(llh, sizeof *llh +
-                  sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
-        if (llh == NULL) {
-                CERROR("out of memory\n");
-                return NULL;
-        }
-        atomic_set(&llh->llh_refcount, 2);
-        llh->llh_stripe_count = lsm->lsm_stripe_count;
-        INIT_LIST_HEAD(&llh->llh_handle.h_link);
-        class_handle_hash(&llh->llh_handle, lov_llh_addref);
-        return llh;
-}
-
-static struct lov_lock_handles *lov_handle2llh(struct lustre_handle *handle)
-{
-        ENTRY;
-        LASSERT(handle != NULL);
-        RETURN(class_handle2object(handle->cookie));
-}
-
-static void lov_llh_put(struct lov_lock_handles *llh)
-{
-        CDEBUG(D_INFO, "PUTting llh %p : new refcount %d\n", llh,
-               atomic_read(&llh->llh_refcount) - 1);
-        LASSERT(atomic_read(&llh->llh_refcount) > 0 &&
-                atomic_read(&llh->llh_refcount) < 0x5a5a);
-        if (atomic_dec_and_test(&llh->llh_refcount)) {
-                LASSERT(list_empty(&llh->llh_handle.h_link));
-                OBD_FREE(llh, sizeof *llh +
-                         sizeof(*llh->llh_handles) * llh->llh_stripe_count);
-        }
-}
-
-static void lov_llh_destroy(struct lov_lock_handles *llh)
-{
-        class_handle_unhash(&llh->llh_handle);
-        lov_llh_put(llh);
-}
-
  /* obd methods */
  #define MAX_STRING_SIZE 128
  static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt,
@@ -307,7 +245,6 @@ static int lov_disconnect_obd(struct obd_device *obd,
                  }
          }
  #endif
-        
          if (obd->obd_no_recov) {
                  /* Pass it on to our clients.
                   * XXX This should be an argument to disconnect,
@@ -331,7 +268,6 @@ static int lov_disconnect_obd(struct obd_device *obd,
                  tgt->active = 0;
                  lov->desc.ld_active_tgt_count--;
          }
-
          tgt->ltd_exp = NULL;
          RETURN(0);
  }
@@ -369,7 +305,7 @@ static int lov_disconnect(struct obd_export *exp, unsigned long flags)
                         obd->obd_type->typ_name, obd->obd_name);
          }
  #endif
-
+        
   out_local:
          rc = class_disconnect(exp, 0);
          RETURN(rc);
@@ -393,6 +329,9 @@ static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid,
  
          spin_lock(&lov->lov_lock);
          for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
+                if (tgt->ltd_exp == NULL)
+                        continue;
+
                  CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n",
                         i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
                  if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
@@ -709,59 +648,6 @@ out:
          RETURN(rc);
  }
  
-/* compute object size given "stripeno" and the ost size */
-static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
-                                int stripeno)
-{
-        unsigned long ssize  = lsm->lsm_stripe_size;
-        unsigned long swidth = ssize * lsm->lsm_stripe_count;
-        unsigned long stripe_size;
-        obd_size lov_size;
-
-        if (ost_size == 0)
-                return 0;
-
-        /* do_div(a, b) returns a % b, and a = a / b */
-        stripe_size = do_div(ost_size, ssize);
-        if (stripe_size)
-                lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
-        else
-                lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
-
-        return lov_size;
-}
-
-static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_valid valid,
-                            struct lov_stripe_md *lsm, int stripeno, int *set)
-{
-        valid &= src->o_valid;
-
-        if (*set) {
-                if (valid & OBD_MD_FLSIZE) {
-                        /* this handles sparse files properly */
-                        obd_size lov_size;
-
-                        lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
-                        if (lov_size > tgt->o_size)
-                                tgt->o_size = lov_size;
-                }
-                if (valid & OBD_MD_FLBLOCKS)
-                        tgt->o_blocks += src->o_blocks;
-                if (valid & OBD_MD_FLBLKSZ)
-                        tgt->o_blksize += src->o_blksize;
-                if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
-                        tgt->o_ctime = src->o_ctime;
-                if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
-                        tgt->o_mtime = src->o_mtime;
-        } else {
-                memcpy(tgt, src, sizeof(*tgt));
-                tgt->o_id = lsm->lsm_object_id;
-                if (valid & OBD_MD_FLSIZE)
-                        tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
-                *set = 1;
-        }
-}
-
  #ifndef log2
  #define log2(n) ffz(~(n))
  #endif
@@ -823,23 +709,58 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa,
          RETURN(rc);
  }
  
-#define LOV_CREATE_RESEED_INTERVAL 1000
+static int lov_recreate(struct obd_export *exp, struct obdo *src_oa,
+                        struct lov_stripe_md **ea, struct obd_trans_info *oti)
+{
+        struct lov_stripe_md *obj_mdp, *lsm;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        unsigned ost_idx;
+        int rc, i;
+        ENTRY;
+
+        LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS &&
+                src_oa->o_flags & OBD_FL_RECREATE_OBJS);
+
+        OBD_ALLOC(obj_mdp, sizeof(*obj_mdp));
+        if (obj_mdp == NULL)
+                RETURN(-ENOMEM);
+
+        ost_idx = src_oa->o_nlink;
+        lsm = *ea;
+        if (lsm == NULL)
+                GOTO(out, rc = -EINVAL);
+        if (ost_idx >= lov->desc.ld_tgt_count)
+                GOTO(out, rc = -EINVAL);
+
+        for (i = 0; i < lsm->lsm_stripe_count; i++) {
+                if (lsm->lsm_oinfo[i].loi_ost_idx == ost_idx) {
+                        if (lsm->lsm_oinfo[i].loi_id != src_oa->o_id)
+                                GOTO(out, rc = -EINVAL);
+                        break;
+                }
+        }
+        if (i == lsm->lsm_stripe_count)
+                GOTO(out, rc = -EINVAL);
+
+        rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa, &obj_mdp, oti);
+out:
+        OBD_FREE(obj_mdp, sizeof(*obj_mdp));
+        RETURN(rc);
+}
  
  /* the LOV expects oa->o_id to be set to the LOV object id */
  static int lov_create(struct obd_export *exp, struct obdo *src_oa,
                        struct lov_stripe_md **ea, struct obd_trans_info *oti)
  {
-        static int ost_start_idx, ost_start_count;
+        struct lov_request_set *set = NULL;
+        struct list_head *pos;
          struct lov_obd *lov;
-        struct lov_stripe_md *lsm;
-        struct lov_oinfo *loi = NULL;
-        struct obdo *tmp_oa, *ret_oa;
-        struct llog_cookie *cookies = NULL;
-        unsigned ost_count, ost_idx;
-        int set = 0, obj_alloc = 0, cookie_sent = 0, rc = 0, i;
+        int rc = 0;
          ENTRY;
  
          LASSERT(ea != NULL);
+        if (exp == NULL)
+                RETURN(-EINVAL);
  
          if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
              src_oa->o_flags == OBD_FL_DELORPHAN) {
@@ -847,324 +768,32 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
                  RETURN(rc);
          }
  
-        if (exp == NULL)
-                RETURN(-EINVAL);
-
          lov = &exp->exp_obd->u.lov;
-
          if (!lov->desc.ld_active_tgt_count)
                  RETURN(-EIO);
  
          /* Recreate a specific object id at the given OST index */
          if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
              (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) {
-                 struct lov_stripe_md obj_md;
-                 struct lov_stripe_md *obj_mdp = &obj_md;
-
-                 ost_idx = src_oa->o_nlink;
-                 lsm = *ea;
-                 if (lsm == NULL)
-                        RETURN(-EINVAL);
-                 if (ost_idx >= lov->desc.ld_tgt_count)
-                         RETURN(-EINVAL);
-                 for (i = 0; i < lsm->lsm_stripe_count; i++) {
-                         if (lsm->lsm_oinfo[i].loi_ost_idx == ost_idx) {
-                                 if (lsm->lsm_oinfo[i].loi_id != src_oa->o_id ||
-                                     lsm->lsm_oinfo[i].loi_gr != src_oa->o_gr) {
-                                         RETURN(-EINVAL);
-                                 }
-                                 break;
-                         }
-                 }
-                 if (i == lsm->lsm_stripe_count)
-                         RETURN(-EINVAL);
-
-                 rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa,
-                                 &obj_mdp, oti);
+                 rc = lov_recreate(exp, src_oa, ea, oti);
                   RETURN(rc);
          }
  
-        ret_oa = obdo_alloc();
-        if (!ret_oa)
-                RETURN(-ENOMEM);
-
-        tmp_oa = obdo_alloc();
-        if (!tmp_oa)
-                GOTO(out_oa, rc = -ENOMEM);
-
-        lsm = *ea;
-        if (lsm == NULL) {
-                int stripes;
-                ost_count = lov_get_stripecnt(lov, 0);
-
-                /* If the MDS file was truncated up to some size, stripe over
-                 * enough OSTs to allow the file to be created at that size. */
-                if (src_oa->o_valid & OBD_MD_FLSIZE) {
-                        stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
-                        do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
-
-                        if (stripes > lov->desc.ld_active_tgt_count)
-                                RETURN(-EFBIG);
-                        if (stripes < ost_count)
-                                stripes = ost_count;
-                } else {
-                        stripes = ost_count;
-                }
-
-                rc = lov_alloc_memmd(&lsm, stripes, lov->desc.ld_pattern ?
-                                     lov->desc.ld_pattern : LOV_PATTERN_RAID0);
-                if (rc < 0)
-                        GOTO(out_tmp, rc);
-
-                rc = 0;
-        }
-
-        ost_count = lov->desc.ld_tgt_count;
-
-        LASSERT(src_oa->o_gr > 0);
-        LASSERT(src_oa->o_valid & OBD_MD_FLID);
-        lsm->lsm_object_id = src_oa->o_id;
-        lsm->lsm_object_gr = src_oa->o_gr;
-        if (!lsm->lsm_stripe_size)
-                lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
-        if (!lsm->lsm_pattern) {
-                lsm->lsm_pattern = lov->desc.ld_pattern ?
-                        lov->desc.ld_pattern : LOV_PATTERN_RAID0;
-        }
-
-        if (*ea == NULL || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) {
-                if (--ost_start_count <= 0) {
-                        ost_start_idx = ll_insecure_random_int();
-                        ost_start_count = LOV_CREATE_RESEED_INTERVAL;
-                } else if (lsm->lsm_stripe_count >=
-                           lov->desc.ld_active_tgt_count) {
-                        /* If we allocate from all of the stripes, make the
-                         * next file start on the next OST. */
-                        ++ost_start_idx;
-                }
-                ost_idx = ost_start_idx % ost_count;
-        } else {
-                ost_idx = lsm->lsm_oinfo[0].loi_ost_idx;
-        }
-
-        CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
-               lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
-
-        /* XXX LOV STACKING: need to figure out how many real OSCs */
-        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
-                oti_alloc_cookies(oti, lsm->lsm_stripe_count);
-                if (!oti->oti_logcookies)
-                        GOTO(out_cleanup, rc = -ENOMEM);
-                cookies = oti->oti_logcookies;
-        }
-
-        loi = lsm->lsm_oinfo;
-        for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
-                struct lov_stripe_md obj_md;
-                struct lov_stripe_md *obj_mdp = &obj_md;
-                int err;
-
-                ++ost_start_idx;
-                if (lov->tgts[ost_idx].active == 0) {
-                        if (!obd_uuid_empty(&lov->tgts[ost_idx].uuid))
-                                CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
-                        continue;
-                }
-
-                /* create data objects with "parent" OA */
-                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
-
-                /* XXX When we start creating objects on demand, we need to
-                 *     make sure that we always create the object on the
-                 *     stripe which holds the existing file size.
-                 */
-                if (src_oa->o_valid & OBD_MD_FLSIZE) {
-                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
-                                              &tmp_oa->o_size) < 0 &&
-                            tmp_oa->o_size)
-                                tmp_oa->o_size--;
-
-                        CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
-                               i, tmp_oa->o_size, src_oa->o_size);
-                }
+        rc = lov_prep_create_set(exp, ea, src_oa, oti, &set);
+        if (rc)
+                RETURN(rc);
  
+        list_for_each (pos, &set->set_list) {
+                struct lov_request *req = 
+                        list_entry(pos, struct lov_request, rq_link);
  
                  /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
-                err = obd_create(lov->tgts[ost_idx].ltd_exp, tmp_oa, &obj_mdp,
-                                 oti);
-                if (err) {
-                        if (lov->tgts[ost_idx].active) {
-                                CERROR("error creating objid "LPX64" sub-object"
-                                       " on OST idx %d/%d: rc = %d\n",
-                                       src_oa->o_id, ost_idx,
-                                       lsm->lsm_stripe_count, err);
-                                if (err > 0) {
-                                        CERROR("obd_create returned invalid "
-                                               "err %d\n", err);
-                                        err = -EIO;
-                                }
-                        }
-                        if (!rc)
-                                rc = err;
-                        continue;
-                }
-                if (oti->oti_objid)
-                        oti->oti_objid[ost_idx] = tmp_oa->o_id;
-                loi->loi_id = tmp_oa->o_id;
-                loi->loi_gr = tmp_oa->o_gr;
-                loi->loi_ost_idx = ost_idx;
-                loi->loi_ost_gen = lov->tgts[ost_idx].ltd_gen;
-                CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at "
-                      "idx %d gen %d\n", lsm->lsm_object_id, loi->loi_id,
-                       ost_idx, loi->loi_ost_gen);
-
-                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
-                                obj_alloc, &set);
-                loi_init(loi);
-
-                if (cookies)
-                        ++oti->oti_logcookies;
-                if (tmp_oa->o_valid & OBD_MD_FLCOOKIE)
-                        ++cookie_sent;
-                ++obj_alloc;
-                ++loi;
-
-                /* If we have allocated enough objects, we are OK */
-                if (obj_alloc == lsm->lsm_stripe_count)
-                        GOTO(out_done, rc = 0);
-        }
-
-        if (obj_alloc == 0) {
-                if (rc == 0)
-                        rc = -EIO;
-                GOTO(out_cleanup, rc);
-        }
-
-        /* If we were passed specific striping params, then a failure to
-         * meet those requirements is an error, since we can't reallocate
-         * that memory (it might be part of a larger array or something).
-         *
-         * We can only get here if lsm_stripe_count was originally > 1.
-         */
-        if (*ea != NULL) {
-                CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
-                       lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc);
-                if (rc == 0)
-                        rc = -EFBIG;
-                GOTO(out_cleanup, rc);
-        } else {
-                struct lov_stripe_md *lsm_new;
-                /* XXX LOV STACKING call into osc for sizes */
-                unsigned oldsize, newsize;
-
-                if (oti && cookies && cookie_sent) {
-                        oldsize = lsm->lsm_stripe_count * sizeof(*cookies);
-                        newsize = obj_alloc * sizeof(*cookies);
-
-                        oti_alloc_cookies(oti, obj_alloc);
-                        if (oti->oti_logcookies) {
-                                memcpy(oti->oti_logcookies, cookies, newsize);
-                                OBD_FREE(cookies, oldsize);
-                                cookies = oti->oti_logcookies;
-                        } else {
-                                CWARN("'leaking' %d bytes\n", oldsize-newsize);
-                        }
-                }
-
-                CWARN("using fewer stripes for object "LPX64": old %u new %u\n",
-                      lsm->lsm_object_id, lsm->lsm_stripe_count, obj_alloc);
-                oldsize = lov_stripe_md_size(lsm->lsm_stripe_count);
-                newsize = lov_stripe_md_size(obj_alloc);
-                OBD_ALLOC(lsm_new, newsize);
-                if (lsm_new != NULL) {
-                        memcpy(lsm_new, lsm, newsize);
-                        lsm_new->lsm_stripe_count = obj_alloc;
-                        OBD_FREE(lsm, oldsize);
-                        lsm = lsm_new;
-                } else {
-                        CWARN("'leaking' %d bytes\n", oldsize - newsize);
-                }
-                rc = 0;
-        }
-        EXIT;
- out_done:
-        *ea = lsm;
-        if (src_oa->o_valid & OBD_MD_FLSIZE &&
-            ret_oa->o_size != src_oa->o_size) {
-                CERROR("original size "LPU64" isn't new object size "LPU64"\n",
-                       src_oa->o_size, ret_oa->o_size);
-                LBUG();
-        }
-        ret_oa->o_id = src_oa->o_id;
-        ret_oa->o_gr = src_oa->o_gr;
-        ret_oa->o_valid |= OBD_MD_FLGROUP;
-        memcpy(src_oa, ret_oa, sizeof(*src_oa));
-
- out_tmp:
-        obdo_free(tmp_oa);
- out_oa:
-        obdo_free(ret_oa);
-        if (oti && cookies) {
-                oti->oti_logcookies = cookies;
-                if (!cookie_sent) {
-                        oti_free_cookies(oti);
-                        src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
-                } else {
-                        src_oa->o_valid |= OBD_MD_FLCOOKIE;
-                }
+                rc = obd_create(lov->tgts[req->rq_idx].ltd_exp, 
+                                req->rq_oa, &req->rq_md, oti);
+                lov_update_create_set(set, req, rc);
          }
+        rc = lov_fini_create_set(set, ea);
          RETURN(rc);
-
- out_cleanup:
-        while (obj_alloc-- > 0) {
-                struct obd_export *sub_exp;
-                int err;
-
-                --loi;
-                sub_exp = lov->tgts[loi->loi_ost_idx].ltd_exp;
-                /* destroy already created objects here */
-                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
-                tmp_oa->o_id = loi->loi_id;
-
-                err = obd_destroy(sub_exp, tmp_oa, NULL, oti);
-                if (err)
-                        CERROR("Failed to uncreate objid "LPX64" subobj "LPX64
-                               " on OST idx %d: rc = %d\n", src_oa->o_id,
-                               loi->loi_id, loi->loi_ost_idx, err);
-        }
-        if (*ea == NULL)
-                obd_free_memmd(exp, &lsm);
-        goto out_tmp;
-}
-
-static int lov_revalidate_policy(struct lov_obd *lov, struct lov_stripe_md *lsm)
-{
-        static int next_idx = 0;
-        struct lov_tgt_desc *tgt;
-        int i, count;
-
-        /* XXX - we should do something clever and take lsm
-         * into account but just do round robin for now. */
-
-        /* last_idx must always be less that count because
-         * ld_tgt_count currently cannot shrink. */
-        count = lov->desc.ld_tgt_count;
-
-        for (i = next_idx, tgt = lov->tgts + i; i < count; i++, tgt++) {
-                if (tgt->active) {
-                        next_idx = (i + 1) % count;
-                        RETURN(i);
-                }
-        }
-
-        for (i = 0, tgt = lov->tgts; i < next_idx; i++, tgt++) {
-                if (tgt->active) {
-                        next_idx = (i + 1) % count;
-                        RETURN(i);
-                }
-        }
-
-        RETURN(-EIO);
  }
  
  #define lsm_bad_magic(LSMP)                                     \
@@ -1185,10 +814,11 @@ static int lov_revalidate_policy(struct lov_obd *lov, struct lov_stripe_md *lsm)
  static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                         struct lov_stripe_md *lsm, struct obd_trans_info *oti)
  {
-        struct obdo *tmp = NULL;
-        struct lov_oinfo *loi;
+        struct lov_request_set *set;
+        struct lov_request *req;
+        struct list_head *pos;
          struct lov_obd *lov;
-        int rc = 0, i;
+        int rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -1198,44 +828,40 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                  RETURN(-ENODEV);
  
          lov = &exp->exp_obd->u.lov;
-        loi = lsm->lsm_oinfo;
-        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
-                int err;
+        rc = lov_prep_destroy_set(exp, oa, lsm, oti, &set);
+        if (rc)
+                RETURN(rc);
  
-                if (lov->tgts[loi->loi_ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-                        /* Orphan clean up will (someday) fix this up. */
-                        if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE)
-                                oti->oti_logcookies++;
-                        continue;
-                }
+        list_for_each (pos, &set->set_list) {
+                int err;
+                req = list_entry(pos, struct lov_request, rq_link);
  
-                tmp = obdo_alloc();
-                if (tmp == NULL)
-                        RETURN(-ENOMEM);
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
-                err = obd_destroy(lov->tgts[loi->loi_ost_idx].ltd_exp,
-                                  tmp, NULL, oti);
-                obdo_free(tmp);
-                if (err && lov->tgts[loi->loi_ost_idx].active) {
-                        CDEBUG(D_INODE, "error: destroying objid "LPX64" subobj "
-                               LPX64" on OST idx %d: rc = %d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+                /* XXX update the cookie position */
+                oti->oti_logcookies = set->set_cookies + req->rq_stripe;
+                rc = obd_destroy(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa,
+                                 NULL, oti);
+                err = lov_update_common_set(set, req, rc);
+                if (rc) {
+                        CERROR("error: destroying objid "LPX64" subobj "
+                               LPX64" on OST idx %d: rc = %d\n", 
+                               set->set_oa->o_id, req->rq_oa->o_id, 
+                               req->rq_idx, rc);
                          if (!rc)
                                  rc = err;
                  }
          }
+        lov_fini_destroy_set(set);
          RETURN(rc);
  }
  
  static int lov_getattr(struct obd_export *exp, struct obdo *oa,
                         struct lov_stripe_md *lsm)
  {
-        struct obdo *tmp = NULL;
-        int i, rc = 0, set = 0;
-        struct lov_oinfo *loi;
+        struct lov_request_set *set;
+        struct lov_request *req;
+        struct list_head *pos;
          struct lov_obd *lov;
+        int err = 0, rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -1245,78 +871,49 @@ static int lov_getattr(struct obd_export *exp, struct obdo *oa,
                  RETURN(-ENODEV);
  
          lov = &exp->exp_obd->u.lov;
+        
+        rc = lov_prep_getattr_set(exp, oa, lsm, &set);
+        if (rc)
+                RETURN(rc);
  
-        CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n",
-               lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size);
-        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
-                int err;
-
-                if (lov->tgts[loi->loi_ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-                        continue;
-                }
-
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
+                
                  CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx "
-                       "%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx);
-                /* create data objects with "parent" OA */
-                tmp = obdo_alloc();
-                if (tmp == NULL)
-                        RETURN(-ENOMEM);
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
-
-                err = obd_getattr(lov->tgts[loi->loi_ost_idx].ltd_exp,
-                                  tmp, NULL);
+                       "%u\n", oa->o_id, req->rq_stripe, req->rq_oa->o_id, 
+                       req->rq_idx);
+
+                rc = obd_getattr(lov->tgts[req->rq_idx].ltd_exp, 
+                                 req->rq_oa, NULL);
+                err = lov_update_common_set(set, req, rc);
                  if (err) {
-                        if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("error: getattr objid "LPX64" subobj "
-                                       LPX64" on OST idx %d: rc = %d\n",
-                                       oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                                       err);
-                                obdo_free(tmp);
-                                RETURN(err);
-                        }
-                } else {
-                        lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
+                        CERROR("error: getattr objid "LPX64" subobj "
+                               LPX64" on OST idx %d: rc = %d\n",
+                               set->set_oa->o_id, req->rq_oa->o_id, 
+                               req->rq_idx, err);
+                        break;
                  }
-                obdo_free(tmp);
          }
-        if (!set)
-                rc = -EIO;
+        
+        rc = lov_fini_getattr_set(set);
+        if (err)
+                rc = err;
          RETURN(rc);
  }
  
  static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data,
                                   int rc)
  {
-        struct lov_getattr_async_args *aa = data;
-        struct lov_stripe_md *lsm = aa->aa_lsm;
-        struct obdo          *oa = aa->aa_oa;
-        struct obdo          *obdos = aa->aa_obdos;
-        struct lov_oinfo     *loi;
-        int                   i;
-        int                   set = 0;
+        struct lov_request_set *lovset = (struct lov_request_set *)data;
          ENTRY;
  
-        if (rc == 0) {
-                /* NB all stripe requests succeeded to get here */
-
-                loi = lsm->lsm_oinfo;
-                for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
-                        if (obdos[i].o_valid == 0)      /* inactive stripe */
-                                continue;
-
-                        lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm,
-                                        i, &set);
-                }
-
-                if (!set) {
-                        CERROR ("No stripes had valid attrs\n");
-                        rc = -EIO;
-                }
+        /* don't do attribute merge if this aysnc op failed */
+        if (rc) {
+                lovset->set_completes = 0;
+                lov_fini_getattr_set(lovset);
+        } else {
+                rc = lov_fini_getattr_set(lovset);
          }
-
-        OBD_FREE (obdos, lsm->lsm_stripe_count * sizeof (*obdos));
          RETURN (rc);
  }
  
@@ -1324,11 +921,11 @@ static int lov_getattr_async(struct obd_export *exp, struct obdo *oa,
                                struct lov_stripe_md *lsm,
                                struct ptlrpc_request_set *rqset)
  {
-        struct obdo *obdos;
+        struct lov_request_set *lovset;
          struct lov_obd *lov;
-        struct lov_oinfo *loi;
-        struct lov_getattr_async_args *aa;
-        int i, rc = 0, set = 0;
+        struct list_head *pos;
+        struct lov_request *req;
+        int rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -1339,67 +936,50 @@ static int lov_getattr_async(struct obd_export *exp, struct obdo *oa,
  
          lov = &exp->exp_obd->u.lov;
  
-        OBD_ALLOC (obdos, lsm->lsm_stripe_count * sizeof (*obdos));
-        if (obdos == NULL)
-                RETURN(-ENOMEM);
+        rc = lov_prep_getattr_set(exp, oa, lsm, &lovset);
+        if (rc)
+                RETURN(rc);
  
          CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n",
                 lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size);
-        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
-                int err;
-
-                if (lov->tgts[loi->loi_ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-                        /* leaves obdos[i].obd_valid unset */
-                        continue;
-                }
  
-                CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at "
-                       "idx %u gen %d\n", oa->o_id, i, loi->loi_id,
-                       loi->loi_ost_idx, loi->loi_ost_gen);
-
-                /* create data objects with "parent" OA */
-                memcpy(&obdos[i], oa, sizeof(obdos[i]));
-                obdos[i].o_id = loi->loi_id;
-
-                err = obd_getattr_async(lov->tgts[loi->loi_ost_idx].ltd_exp,
-                                         &obdos[i], NULL, rqset);
-                if (err) {
+        list_for_each (pos, &lovset->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
+                
+                CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx "
+                       "%u\n", oa->o_id, req->rq_stripe, req->rq_oa->o_id, 
+                       req->rq_idx);
+                rc = obd_getattr_async(lov->tgts[req->rq_idx].ltd_exp,
+                                       req->rq_oa, NULL, rqset);
+                if (rc) {
                          CERROR("error: getattr objid "LPX64" subobj "
                                 LPX64" on OST idx %d: rc = %d\n",
-                               oa->o_id, loi->loi_id, loi->loi_ost_idx,
-                               err);
-                        GOTO(out_obdos, rc = err);
+                               lovset->set_oa->o_id, req->rq_oa->o_id, 
+                               req->rq_idx, rc);
+                        GOTO(out, rc);
                  }
-                set = 1;
+                lov_update_common_set(lovset, req, rc);
          }
-        if (!set)
-                GOTO (out_obdos, rc = -EIO);
-
+        
+        LASSERT(rc == 0);
          LASSERT (rqset->set_interpret == NULL);
          rqset->set_interpret = lov_getattr_interpret;
-        LASSERT (sizeof (rqset->set_args) >= sizeof (*aa));
-        aa = (struct lov_getattr_async_args *)&rqset->set_args;
-        aa->aa_lsm = lsm;
-        aa->aa_oa = oa;
-        aa->aa_obdos = obdos;
-        aa->aa_lov = lov;
-        GOTO(out, rc = 0);
-
-out_obdos:
-        OBD_FREE (obdos, lsm->lsm_stripe_count * sizeof (*obdos));
+        rqset->set_arg = (void *)lovset;
+        RETURN(rc);
  out:
+        LASSERT(rc);
+        lov_fini_getattr_set(lovset);
          RETURN(rc);
  }
  
-
  static int lov_setattr(struct obd_export *exp, struct obdo *src_oa,
                         struct lov_stripe_md *lsm, struct obd_trans_info *oti)
  {
-        struct obdo *tmp_oa, *ret_oa;
+        struct lov_request_set *set;
          struct lov_obd *lov;
-        struct lov_oinfo *loi;
-        int rc = 0, i, set = 0;
+        struct list_head *pos;
+        struct lov_request *req;
+        int err = 0, rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -1416,258 +996,60 @@ static int lov_setattr(struct obd_export *exp, struct obdo *src_oa,
  
          LASSERT(!(src_oa->o_valid & OBD_MD_FLGROUP) || src_oa->o_gr > 0);
  
-        ret_oa = obdo_alloc();
-        if (!ret_oa)
-                RETURN(-ENOMEM);
-
-        tmp_oa = obdo_alloc();
-        if (!tmp_oa)
-                GOTO(out_oa, rc = -ENOMEM);
-
          lov = &exp->exp_obd->u.lov;
-        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
-                int err;
-
-                if (lov->tgts[loi->loi_ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-                        continue;
-                }
-
-                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
-                tmp_oa->o_id = loi->loi_id;
-                LASSERT(!(tmp_oa->o_valid & OBD_MD_FLGROUP) || tmp_oa->o_gr>0);
-
-                if (src_oa->o_valid & OBD_MD_FLSIZE) {
-                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
-                                              &tmp_oa->o_size) < 0 &&
-                            tmp_oa->o_size)
-                                tmp_oa->o_size--;
-
-                        CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
-                               i, tmp_oa->o_size, src_oa->o_size);
-                }
+        rc = lov_prep_setattr_set(exp, src_oa, lsm, NULL, &set);
+        if (rc)
+                RETURN(rc);
  
-                err = obd_setattr(lov->tgts[loi->loi_ost_idx].ltd_exp, tmp_oa,
-                                  NULL, NULL);
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
+                
+                rc = obd_setattr(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa,
+                                 NULL, NULL);
+                err = lov_update_common_set(set, req, rc);
                  if (err) {
-                        if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("error: setattr objid "LPX64" subobj "
-                                       LPX64" on OST idx %d: rc = %d\n",
-                                       src_oa->o_id, loi->loi_id,
-                                       loi->loi_ost_idx, err);
-                                if (!rc)
-                                        rc = err;
-                        }
-                        continue;
+                        CERROR("error: setattr objid "LPX64" subobj "
+                               LPX64" on OST idx %d: rc = %d\n",
+                               set->set_oa->o_id, req->rq_oa->o_id,
+                               req->rq_idx, err);
+                        if (!rc)
+                                rc = err;
                  }
-                lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
          }
-        if (!set && !rc)
-                rc = -EIO;
-
-        ret_oa->o_id = src_oa->o_id;
-        memcpy(src_oa, ret_oa, sizeof(*src_oa));
-        GOTO(out_tmp, rc);
-out_tmp:
-        obdo_free(tmp_oa);
-out_oa:
-        obdo_free(ret_oa);
-        return rc;
+        err = lov_fini_setattr_set(set);
+        if (!rc)
+                rc = err;
+        RETURN(rc);
  }
  
-/* we have an offset in file backed by an lov and want to find out where
- * that offset lands in our given stripe of the file.  for the easy
- * case where the offset is within the stripe, we just have to scale the
- * offset down to make it relative to the stripe instead of the lov.
- *
- * the harder case is what to do when the offset doesn't intersect the
- * stripe.  callers will want start offsets clamped ahead to the start
- * of the nearest stripe in the file.  end offsets similarly clamped to the
- * nearest ending byte of a stripe in the file:
- *
- * all this function does is move offsets to the nearest region of the
- * stripe, and it does its work "mod" the full length of all the stripes.
- * consider a file with 3 stripes:
- *
- *             S                                              E
- * ---------------------------------------------------------------------
- * |    0    |     1     |     2     |    0    |     1     |     2     |
- * ---------------------------------------------------------------------
- *
- * to find stripe 1's offsets for S and E, it divides by the full stripe
- * width and does its math in the context of a single set of stripes:
- *
- *             S         E
- * -----------------------------------
- * |    0    |     1     |     2     |
- * -----------------------------------
- *
- * it'll notice that E is outside stripe 1 and clamp it to the end of the
- * stripe, then multiply it back out by lov_off to give the real offsets in
- * the stripe:
- *
- *   S                   E
- * ---------------------------------------------------------------------
- * |    1    |     1     |     1     |    1    |     1     |     1     |
- * ---------------------------------------------------------------------
- *
- * it would have done similarly and pulled S forward to the start of a 1
- * stripe if, say, S had landed in a 0 stripe.
- *
- * this rounding isn't always correct.  consider an E lov offset that lands
- * on a 0 stripe, the "mod stripe width" math will pull it forward to the
- * start of a 1 stripe, when in fact it wanted to be rounded back to the end
- * of a previous 1 stripe.  this logic is handled by callers and this is why:
- *
- * this function returns < 0 when the offset was "before" the stripe and
- * was moved forward to the start of the stripe in question;  0 when it
- * falls in the stripe and no shifting was done; > 0 when the offset
- * was outside the stripe and was pulled back to its final byte. */
-static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
-                             int stripeno, obd_off *obd_off)
+static int lov_revalidate_policy(struct lov_obd *lov, struct lov_stripe_md *lsm)
  {
-        unsigned long ssize  = lsm->lsm_stripe_size;
-        unsigned long swidth = ssize * lsm->lsm_stripe_count;
-        unsigned long stripe_off, this_stripe;
-        int ret = 0;
-
-        if (lov_off == OBD_OBJECT_EOF) {
-                *obd_off = OBD_OBJECT_EOF;
-                return 0;
-        }
+        static int next_idx = 0;
+        struct lov_tgt_desc *tgt;
+        int i, count;
  
-        /* do_div(a, b) returns a % b, and a = a / b */
-        stripe_off = do_div(lov_off, swidth);
+        /* XXX - we should do something clever and take lsm
+         * into account but just do round robin for now. */
  
-        this_stripe = stripeno * ssize;
-        if (stripe_off < this_stripe) {
-                stripe_off = 0;
-                ret = -1;
-        } else {
-                stripe_off -= this_stripe;
+        /* last_idx must always be less that count because
+         * ld_tgt_count currently cannot shrink. */
+        count = lov->desc.ld_tgt_count;
  
-                if (stripe_off >= ssize) {
-                        stripe_off = ssize;
-                        ret = 1;
+        for (i = next_idx, tgt = lov->tgts + i; i < count; i++, tgt++) {
+                if (tgt->active) {
+                        next_idx = (i + 1) % count;
+                        RETURN(i);
                  }
          }
  
-        *obd_off = lov_off * ssize + stripe_off;
-        return ret;
-}
-
-/* Given a whole-file size and a stripe number, give the file size which
- * corresponds to the individual object of that stripe.
- *
- * This behaves basically in the same was as lov_stripe_offset, except that
- * file sizes falling before the beginning of a stripe are clamped to the end
- * of the previous stripe, not the beginning of the next:
- *
- *                                               S
- * ---------------------------------------------------------------------
- * |    0    |     1     |     2     |    0    |     1     |     2     |
- * ---------------------------------------------------------------------
- *
- * if clamped to stripe 2 becomes:
- *
- *                                   S
- * ---------------------------------------------------------------------
- * |    0    |     1     |     2     |    0    |     1     |     2     |
- * ---------------------------------------------------------------------
- */
-static obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size,
-                                  int stripeno)
-{
-        unsigned long ssize  = lsm->lsm_stripe_size;
-        unsigned long swidth = ssize * lsm->lsm_stripe_count;
-        unsigned long stripe_off, this_stripe;
-
-        if (file_size == OBD_OBJECT_EOF)
-                return OBD_OBJECT_EOF;
-
-        /* do_div(a, b) returns a % b, and a = a / b */
-        stripe_off = do_div(file_size, swidth);
-
-        this_stripe = stripeno * ssize;
-        if (stripe_off < this_stripe) {
-                /* Move to end of previous stripe, or zero */
-                if (file_size > 0) {
-                        file_size--;
-                        stripe_off = ssize;
-                } else {
-                        stripe_off = 0;
-                }
-        } else {
-                stripe_off -= this_stripe;
-
-                if (stripe_off >= ssize) {
-                        /* Clamp to end of this stripe */
-                        stripe_off = ssize;
+        for (i = 0, tgt = lov->tgts; i < next_idx; i++, tgt++) {
+                if (tgt->active) {
+                        next_idx = (i + 1) % count;
+                        RETURN(i);
                  }
          }
  
-        return (file_size * ssize + stripe_off);
-}
-
-/* given an extent in an lov and a stripe, calculate the extent of the stripe
- * that is contained within the lov extent.  this returns true if the given
- * stripe does intersect with the lov extent. */
-static int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
-                                 obd_off start, obd_off end,
-                                 obd_off *obd_start, obd_off *obd_end)
-{
-        int start_side = 0, end_side = 0;
-
-        switch (lsm->lsm_pattern) {
-        case LOV_PATTERN_RAID0:
-                start_side = lov_stripe_offset(lsm, start, stripeno, obd_start);
-                end_side = lov_stripe_offset(lsm, end, stripeno, obd_end);
-                break;
-        case LOV_PATTERN_CMOBD:
-                *obd_start = start;
-                *obd_end = end;
-                start_side = end_side = 0;
-                break;
-        default:
-                LBUG();
-        }
-
-        CDEBUG(D_INODE, "["LPU64"->"LPU64"] -> [(%d) "LPU64"->"LPU64" (%d)]\n",
-               start, end, start_side, *obd_start, *obd_end, end_side);
-
-        /* this stripe doesn't intersect the file extent when neither
-         * start or the end intersected the stripe and obd_start and
-         * obd_end got rounded up to the save value. */
-        if (start_side != 0 && end_side != 0 && *obd_start == *obd_end)
-                return 0;
-
-        /* as mentioned in the lov_stripe_offset commentary, end
-         * might have been shifted in the wrong direction.  This
-         * happens when an end offset is before the stripe when viewed
-         * through the "mod stripe size" math. we detect it being shifted
-         * in the wrong direction and touch it up.
-         * interestingly, this can't underflow since end must be > start
-         * if we passed through the previous check.
-         * (should we assert for that somewhere?) */
-        if (end_side != 0)
-                (*obd_end)--;
-
-        return 1;
-}
-
-/* compute which stripe number "lov_off" will be written into */
-static int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
-{
-        unsigned long ssize  = lsm->lsm_stripe_size;
-        unsigned long swidth = ssize * lsm->lsm_stripe_count;
-        unsigned long stripe_off;
-
-        if (lsm->lsm_pattern == LOV_PATTERN_CMOBD)
-                return 0;
-
-        stripe_off = do_div(lov_off, swidth);
-
-        return stripe_off / ssize;
+        RETURN(-EIO);
  }
  
  static int lov_revalidate_md(struct obd_export *exp, struct obdo *src_oa,
@@ -1715,6 +1097,7 @@ static int lov_revalidate_md(struct obd_export *exp, struct obdo *src_oa,
                  if (oti->oti_objid)
                          oti->oti_objid[ost_idx] = tmp_oa->o_id;
                  loi->loi_id = tmp_oa->o_id;
+                loi->loi_gr = tmp_oa->o_gr;
                  loi->loi_ost_idx = ost_idx;
                  loi->loi_ost_gen = lov->tgts[ost_idx].ltd_gen;
                  CDEBUG(D_INODE, "replacing objid "LPX64" subobj "LPX64
@@ -1738,10 +1121,11 @@ static int lov_punch(struct obd_export *exp, struct obdo *oa,
                       struct lov_stripe_md *lsm,
                       obd_off start, obd_off end, struct obd_trans_info *oti)
  {
-        struct obdo *tmp = NULL;
-        struct lov_oinfo *loi;
+        struct lov_request_set *set;
          struct lov_obd *lov;
-        int rc = 0, i;
+        struct list_head *pos;
+        struct lov_request *req;
+        int err = 0, rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -1751,50 +1135,39 @@ static int lov_punch(struct obd_export *exp, struct obdo *oa,
                  RETURN(-ENODEV);
  
          lov = &exp->exp_obd->u.lov;
-        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
-                obd_off starti, endi;
-                int err;
-
-                if (lov->tgts[loi->loi_ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-                        continue;
-                }
-
-                if (!lov_stripe_intersects(lsm, i, start, end, &starti, &endi))
-                        continue;
+        rc = lov_prep_punch_set(exp, oa, lsm, start, end, oti, &set);
+        if (rc)
+                RETURN(rc);
  
-                /* create data objects with "parent" OA */
-                tmp = obdo_alloc();
-                if (tmp == NULL)
-                        RETURN(-ENOMEM);
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
  
-                err = obd_punch(lov->tgts[loi->loi_ost_idx].ltd_exp,
-                                tmp, NULL, starti, endi, NULL);
-                obdo_free(tmp);
+                rc = obd_punch(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, 
+                               NULL, req->rq_extent.start, 
+                               req->rq_extent.end, NULL);
+                err = lov_update_punch_set(set, req, rc);
                  if (err) {
-                        if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("error: punch objid "LPX64" subobj "LPX64
-                                       " on OST idx %d: rc = %d\n", oa->o_id,
-                                       loi->loi_id, loi->loi_ost_idx, err);
-                        }
+                        CERROR("error: punch objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", set->set_oa->o_id,
+                               req->rq_oa->o_id, req->rq_idx, rc);
                          if (!rc)
                                  rc = err;
-                } else {
-                        loi->loi_kms = loi->loi_rss = starti;
                  }
          }
+        err = lov_fini_punch_set(set);
+        if (!rc)
+                rc = err;
          RETURN(rc);
  }
  
  static int lov_sync(struct obd_export *exp, struct obdo *oa,
                      struct lov_stripe_md *lsm, obd_off start, obd_off end)
  {
-        struct obdo *tmp;
+        struct lov_request_set *set;
          struct lov_obd *lov;
-        struct lov_oinfo *loi;
-        int rc = 0, i;
+        struct list_head *pos;
+        struct lov_request *req;
+        int err = 0, rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -1803,40 +1176,28 @@ static int lov_sync(struct obd_export *exp, struct obdo *oa,
          if (!exp->exp_obd)
                  RETURN(-ENODEV);
  
-        tmp = obdo_alloc();
-        if (!tmp)
-                RETURN(-ENOMEM);
-
          lov = &exp->exp_obd->u.lov;
-        for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
-                obd_off starti, endi;
-                int err;
-
-                if (lov->tgts[loi->loi_ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-                        continue;
-                }
-
-                if (!lov_stripe_intersects(lsm, i, start, end, &starti, &endi))
-                        continue;
+        rc = lov_prep_sync_set(exp, oa, lsm, start, end, &set);
+        if (rc)
+                RETURN(rc);
  
-                memcpy(tmp, oa, sizeof(*tmp));
-                tmp->o_id = loi->loi_id;
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
  
-                err = obd_sync(lov->tgts[loi->loi_ost_idx].ltd_exp, tmp, NULL,
-                               starti, endi);
+                rc = obd_sync(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, 
+                              NULL, req->rq_extent.start, req->rq_extent.end);
+                err = lov_update_common_set(set, req, rc);
                  if (err) {
-                        if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("error: fsync objid "LPX64" subobj "LPX64
-                                       " on OST idx %d: rc = %d\n", oa->o_id,
-                                       loi->loi_id, loi->loi_ost_idx, err);
-                        }
+                        CERROR("error: fsync objid "LPX64" subobj "LPX64
+                               " on OST idx %d: rc = %d\n", set->set_oa->o_id,
+                               req->rq_oa->o_id, req->rq_idx, rc);
                          if (!rc)
                                  rc = err;
                  }
          }
-
-        obdo_free(tmp);
+        err = lov_fini_sync_set(set);
+        if (!rc)
+                rc = err;
          RETURN(rc);
  }
  
@@ -1875,155 +1236,58 @@ static int lov_brw(int cmd, struct obd_export *exp, struct obdo *src_oa,
                     struct lov_stripe_md *lsm, obd_count oa_bufs,
                     struct brw_page *pga, struct obd_trans_info *oti)
  {
-        struct {
-                int bufct;
-                int index;
-                int subcount;
-                struct lov_stripe_md lsm;
-                int ost_idx;
-        } *stripeinfo, *si, *si_last;
-        struct obdo *ret_oa = NULL, *tmp_oa = NULL;
-        struct lov_obd *lov;
-        struct brw_page *ioarr;
-        struct lov_oinfo *loi;
-        int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count, set = 0;
+        struct lov_request_set *set;
+        struct lov_request *req;
+        struct list_head *pos;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int err, rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
                  RETURN(-EINVAL);
  
-        lov = &exp->exp_obd->u.lov;
-
          if (cmd == OBD_BRW_CHECK) {
                  rc = lov_brw_check(lov, src_oa, lsm, oa_bufs, pga);
                  RETURN(rc);
          }
  
-        OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo));
-        if (!stripeinfo)
-                RETURN(-ENOMEM);
-
-        OBD_ALLOC(where, sizeof(*where) * oa_bufs);
-        if (!where)
-                GOTO(out_sinfo, rc = -ENOMEM);
-
-        OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
-        if (!ioarr)
-                GOTO(out_where, rc = -ENOMEM);
-
-        if (src_oa) {
-                ret_oa = obdo_alloc();
-                if (!ret_oa)
-                        GOTO(out_ioarr, rc = -ENOMEM);
-
-                tmp_oa = obdo_alloc();
-                if (!tmp_oa)
-                        GOTO(out_oa, rc = -ENOMEM);
-        }
-
-        for (i = 0; i < oa_bufs; i++) {
-                where[i] = lov_stripe_number(lsm, pga[i].disk_offset);
-                stripeinfo[where[i]].bufct++;
-        }
-
-        for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo;
-             i < stripe_count; i++, loi++, si_last = si, si++) {
-                if (i > 0)
-                        si->index = si_last->index + si_last->bufct;
-                si->lsm.lsm_object_id = loi->loi_id;
-                si->lsm.lsm_object_gr = lsm->lsm_object_gr;
-                si->ost_idx = loi->loi_ost_idx;
-        }
-
-        for (i = 0; i < oa_bufs; i++) {
-                int which = where[i];
-                int shift;
-
-                shift = stripeinfo[which].index + stripeinfo[which].subcount;
-                LASSERT(shift < oa_bufs);
-                ioarr[shift] = pga[i];
-                lov_stripe_offset(lsm, pga[i].disk_offset, which,
-                                  &ioarr[shift].disk_offset);
-                stripeinfo[which].subcount++;
-        }
-
-        for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) {
-                int shift = si->index;
-
-                if (lov->tgts[si->ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
-                        GOTO(out_oa, rc = -EIO);
-                }
-
-                if (si->bufct) {
-                        LASSERT(shift < oa_bufs);
-                        if (src_oa)
-                                memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
-
-                        tmp_oa->o_id = si->lsm.lsm_object_id;
-                        rc = obd_brw(cmd, lov->tgts[si->ost_idx].ltd_exp,
-                                     tmp_oa, &si->lsm, si->bufct,
-                                     &ioarr[shift], oti);
-                        if (rc)
-                                GOTO(out_oa, rc);
+        rc = lov_prep_brw_set(exp, src_oa, lsm, oa_bufs, pga, oti, &set);
+        if (rc)
+                RETURN(rc);
  
-                        lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
-                                        i, &set);
-                }
+        list_for_each (pos, &set->set_list) {
+                struct obd_export *sub_exp;
+                struct brw_page *sub_pga;
+                req = list_entry(pos, struct lov_request, rq_link);
+                
+                sub_exp = lov->tgts[req->rq_idx].ltd_exp;
+                sub_pga = set->set_pga + req->rq_pgaidx;
+                rc = obd_brw(cmd, sub_exp, req->rq_oa, req->rq_md, 
+                             req->rq_oabufs, sub_pga, oti);
+                if (rc)
+                        break;
+                lov_update_common_set(set, req, rc);
          }
  
-        ret_oa->o_id = src_oa->o_id;
-        memcpy(src_oa, ret_oa, sizeof(*src_oa));
-
-        GOTO(out_oa, rc);
- out_oa:
-        if (tmp_oa)
-                obdo_free(tmp_oa);
-        if (ret_oa)
-                obdo_free(ret_oa);
- out_ioarr:
-        OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
- out_where:
-        OBD_FREE(where, sizeof(*where) * oa_bufs);
- out_sinfo:
-        OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
-        return rc;
+        err = lov_fini_brw_set(set);
+        if (!rc)
+                rc = err;
+        RETURN(rc);
  }
  
  static int lov_brw_interpret(struct ptlrpc_request_set *reqset, void *data,
                               int rc)
  {
-        struct lov_brw_async_args *aa = data;
-        struct lov_stripe_md *lsm = aa->aa_lsm;
-        obd_count             oa_bufs = aa->aa_oa_bufs;
-        struct obdo          *oa = aa->aa_oa;
-        struct obdo          *obdos = aa->aa_obdos;
-        struct brw_page      *ioarr = aa->aa_ioarr;
-        struct lov_oinfo     *loi;
-        int i, set = 0;
+        struct lov_request_set *lovset = (struct lov_request_set *)data;
          ENTRY;
-
-        if (rc == 0) {
-                /* NB all stripe requests succeeded to get here */
-
-                for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-                     i++, loi++) {
-                        if (obdos[i].o_valid == 0)      /* inactive stripe */
-                                continue;
-
-                        lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm,
-                                        i, &set);
-                }
-
-                if (!set) {
-                        CERROR("No stripes had valid attrs\n");
-                        rc = -EIO;
-                }
+        
+        if (rc) {
+                lovset->set_completes = 0;
+                lov_fini_brw_set(lovset);
+        } else {
+                rc = lov_fini_brw_set(lovset);
          }
-        oa->o_id = lsm->lsm_object_id;
-
-        OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos));
-        OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+                
          RETURN(rc);
  }
  
@@ -2032,155 +1296,65 @@ static int lov_brw_async(int cmd, struct obd_export *exp, struct obdo *oa,
                           struct brw_page *pga, struct ptlrpc_request_set *set,
                           struct obd_trans_info *oti)
  {
-        struct {
-                int bufct;
-                int index;
-                int subcount;
-                struct lov_stripe_md lsm;
-                int ost_idx;
-        } *stripeinfo, *si, *si_last;
-        struct lov_obd *lov;
-        struct brw_page *ioarr;
-        struct obdo *obdos = NULL;
-        struct lov_oinfo *loi;
-        struct lov_brw_async_args *aa;
-        int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
+        struct lov_request_set *lovset;
+        struct lov_request *req;
+        struct list_head *pos;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
                  RETURN(-EINVAL);
  
-        lov = &exp->exp_obd->u.lov;
-
          if (cmd == OBD_BRW_CHECK) {
                  rc = lov_brw_check(lov, oa, lsm, oa_bufs, pga);
                  RETURN(rc);
          }
  
-        OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo));
-        if (!stripeinfo)
-                RETURN(-ENOMEM);
-
-        OBD_ALLOC(where, sizeof(*where) * oa_bufs);
-        if (!where)
-                GOTO(out_sinfo, rc = -ENOMEM);
-
-        if (oa) {
-                OBD_ALLOC(obdos, sizeof(*obdos) * stripe_count);
-                if (!obdos)
-                        GOTO(out_where, rc = -ENOMEM);
-        }
-
-        OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
-        if (!ioarr)
-                GOTO(out_obdos, rc = -ENOMEM);
-
-        for (i = 0; i < oa_bufs; i++) {
-                where[i] = lov_stripe_number(lsm, pga[i].disk_offset);
-                stripeinfo[where[i]].bufct++;
-        }
-
-        for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo;
-             i < stripe_count; i++, loi++, si_last = si, si++) {
-                if (i > 0)
-                        si->index = si_last->index + si_last->bufct;
-                si->lsm.lsm_object_id = loi->loi_id;
-                si->ost_idx = loi->loi_ost_idx;
-
-                if (oa) {
-                        memcpy(&obdos[i], oa, sizeof(*obdos));
-                        obdos[i].o_id = si->lsm.lsm_object_id;
-                }
-        }
-
-        for (i = 0; i < oa_bufs; i++) {
-                int which = where[i];
-                int shift;
-
-                shift = stripeinfo[which].index + stripeinfo[which].subcount;
-                LASSERT(shift < oa_bufs);
-                ioarr[shift] = pga[i];
-                lov_stripe_offset(lsm, pga[i].disk_offset, which,
-                                  &ioarr[shift].disk_offset);
-                stripeinfo[which].subcount++;
-        }
-
-        for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) {
-                int shift = si->index;
-
-                if (si->bufct == 0)
-                        continue;
-
-                if (lov->tgts[si->ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
-                        GOTO(out_ioarr, rc = -EIO);
-                }
-
-                LASSERT(shift < oa_bufs);
+        rc = lov_prep_brw_set(exp, oa, lsm, oa_bufs, pga, oti, &lovset);
+        if (rc)
+                RETURN(rc);
  
-                rc = obd_brw_async(cmd, lov->tgts[si->ost_idx].ltd_exp,
-                                   &obdos[i], &si->lsm, si->bufct,
-                                   &ioarr[shift], set, oti);
+        list_for_each (pos, &lovset->set_list) {
+                struct obd_export *sub_exp;
+                struct brw_page *sub_pga;
+                req = list_entry(pos, struct lov_request, rq_link);
+                
+                sub_exp = lov->tgts[req->rq_idx].ltd_exp;
+                sub_pga = lovset->set_pga + req->rq_pgaidx;
+                rc = obd_brw_async(cmd, sub_exp, req->rq_oa, req->rq_md,
+                                   req->rq_oabufs, sub_pga, set, oti);
                  if (rc)
-                        GOTO(out_ioarr, rc);
+                        GOTO(out, rc);
+                lov_update_common_set(lovset, req, rc);
          }
          LASSERT(rc == 0);
          LASSERT(set->set_interpret == NULL);
          set->set_interpret = (set_interpreter_func)lov_brw_interpret;
-        LASSERT(sizeof(set->set_args) >= sizeof(struct lov_brw_async_args));
-        aa = (struct lov_brw_async_args *)&set->set_args;
-        aa->aa_lsm = lsm;
-        aa->aa_obdos = obdos;
-        aa->aa_oa = oa;
-        aa->aa_ioarr = ioarr;
-        aa->aa_oa_bufs = oa_bufs;
-
-        /* Don't free ioarr or obdos - that's done in lov_brw_interpret */
-        GOTO(out_where, rc);
-
- out_ioarr:
-        OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
- out_obdos:
-        OBD_FREE(obdos, stripe_count * sizeof(*obdos));
- out_where:
-        OBD_FREE(where, sizeof(*where) * oa_bufs);
- out_sinfo:
-        OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
-        return rc;
+        set->set_arg = (void *)lovset;
+        
+        RETURN(rc);
+out:
+        lov_fini_brw_set(lovset);
+        RETURN(rc);
  }
  
-struct lov_async_page *lap_from_cookie(void *cookie)
-{
-        struct lov_async_page *lap = cookie;
-        if (lap->lap_magic != LAP_MAGIC)
-                return ERR_PTR(-EINVAL);
-        return lap;
-};
-
  static int lov_ap_make_ready(void *data, int cmd)
  {
-        struct lov_async_page *lap = lap_from_cookie(data);
-        /* XXX should these assert? */
-        if (IS_ERR(lap))
-                return -EINVAL;
+        struct lov_async_page *lap = LAP_FROM_COOKIE(data);
  
          return lap->lap_caller_ops->ap_make_ready(lap->lap_caller_data, cmd);
  }
  static int lov_ap_refresh_count(void *data, int cmd)
  {
-        struct lov_async_page *lap = lap_from_cookie(data);
-        if (IS_ERR(lap))
-                return -EINVAL;
+        struct lov_async_page *lap = LAP_FROM_COOKIE(data);
  
          return lap->lap_caller_ops->ap_refresh_count(lap->lap_caller_data,
                                                       cmd);
  }
  static void lov_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
  {
-        struct lov_async_page *lap = lap_from_cookie(data);
-        /* XXX should these assert? */
-        if (IS_ERR(lap))
-                return;
+        struct lov_async_page *lap = LAP_FROM_COOKIE(data);
  
          lap->lap_caller_ops->ap_fill_obdo(lap->lap_caller_data, cmd, oa);
          /* XXX woah, shouldn't we be altering more here?  size? */
@@ -2189,9 +1363,7 @@ static void lov_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
  
  static void lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
  {
-        struct lov_async_page *lap = lap_from_cookie(data);
-        if (IS_ERR(lap))
-                return;
+        struct lov_async_page *lap = LAP_FROM_COOKIE(data);
  
          /* in a raid1 regime this would down a count of many ios
           * in flight, onl calling the caller_ops completion when all
@@ -2288,9 +1460,7 @@ static int lov_queue_async_io(struct obd_export *exp,
          if (lsm_bad_magic(lsm))
                  RETURN(-EINVAL);
  
-        lap = lap_from_cookie(cookie);
-        if (IS_ERR(lap))
-                RETURN(PTR_ERR(lap));
+        lap = LAP_FROM_COOKIE(cookie);
  
          loi = &lsm->lsm_oinfo[lap->lap_stripe];
  
@@ -2314,9 +1484,7 @@ static int lov_set_async_flags(struct obd_export *exp,
          if (lsm_bad_magic(lsm))
                  RETURN(-EINVAL);
  
-        lap = lap_from_cookie(cookie);
-        if (IS_ERR(lap))
-                RETURN(PTR_ERR(lap));
+        lap = LAP_FROM_COOKIE(cookie);
  
          loi = &lsm->lsm_oinfo[lap->lap_stripe];
  
@@ -2341,9 +1509,7 @@ static int lov_queue_group_io(struct obd_export *exp,
          if (lsm_bad_magic(lsm))
                  RETURN(-EINVAL);
  
-        lap = lap_from_cookie(cookie);
-        if (IS_ERR(lap))
-                RETURN(PTR_ERR(lap));
+        lap = LAP_FROM_COOKIE(cookie);
  
          loi = &lsm->lsm_oinfo[lap->lap_stripe];
  
@@ -2397,9 +1563,7 @@ static int lov_teardown_async_page(struct obd_export *exp,
          if (lsm_bad_magic(lsm))
                  RETURN(-EINVAL);
  
-        lap = lap_from_cookie(cookie);
-        if (IS_ERR(lap))
-                RETURN(PTR_ERR(lap));
+        lap = LAP_FROM_COOKIE(cookie);
  
          loi = &lsm->lsm_oinfo[lap->lap_stripe];
  
@@ -2420,14 +1584,13 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
                         void *data,__u32 lvb_len, void *lvb_swabber,
                         struct lustre_handle *lockh)
  {
-        struct lov_lock_handles *lov_lockh = NULL;
+        struct lov_request_set *set;
+        struct lov_request *req;
+        struct list_head *pos;
          struct lustre_handle *lov_lockhp;
          struct lov_obd *lov;
-        struct lov_oinfo *loi;
-        char submd_buf[sizeof(struct lov_stripe_md) + sizeof(struct lov_oinfo)];
-        struct lov_stripe_md *submd = (void *)submd_buf;
          ldlm_error_t rc;
-        int i, save_flags = *flags;
+        int save_flags = *flags;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -2439,145 +1602,44 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
          if (!exp || !exp->exp_obd)
                  RETURN(-ENODEV);
  
-        if (lsm->lsm_stripe_count > 1) {
-                lov_lockh = lov_llh_new(lsm);
-                if (lov_lockh == NULL)
-                        RETURN(-ENOMEM);
-
-                lockh->cookie = lov_lockh->llh_handle.h_cookie;
-                lov_lockhp = lov_lockh->llh_handles;
-        } else {
-                lov_lockhp = lockh;
-        }
-
          lov = &exp->exp_obd->u.lov;
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-             i++, loi++, lov_lockhp++) {
-                ldlm_policy_data_t sub_ext;
-                obd_off start, end;
-
-                if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
-                                           policy->l_extent.end, &start,
-                                           &end))
-                        continue;
-
-                sub_ext.l_extent.start = start;
-                sub_ext.l_extent.end = end;
-                sub_ext.l_extent.gid = policy->l_extent.gid;
+        rc = lov_prep_enqueue_set(exp, lsm, policy, mode, lockh, &set);
+        if (rc)
+                RETURN(rc);
  
-                if (lov->tgts[loi->loi_ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-                        continue;
-                }
+        list_for_each (pos, &set->set_list) {
+                ldlm_policy_data_t sub_policy;
+                req = list_entry(pos, struct lov_request, rq_link);
+                lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+                LASSERT(lov_lockhp);
  
-                /* XXX LOV STACKING: submd should be from the subobj */
-                submd->lsm_object_id = loi->loi_id;
-                submd->lsm_object_gr = lsm->lsm_object_gr;
-                submd->lsm_stripe_count = 0;
-                submd->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid;
-                submd->lsm_oinfo->loi_rss = loi->loi_rss;
-                submd->lsm_oinfo->loi_kms = loi->loi_kms;
-                submd->lsm_oinfo->loi_blocks = loi->loi_blocks;
-                loi->loi_mtime = submd->lsm_oinfo->loi_mtime;
-                /* XXX submd is not fully initialized here */
                  *flags = save_flags;
-                rc = obd_enqueue(lov->tgts[loi->loi_ost_idx].ltd_exp, submd,
-                                 type, &sub_ext, mode, flags, bl_cb, cp_cb,
-                                 gl_cb, data, lvb_len, lvb_swabber, lov_lockhp);
-
-                /* XXX FIXME: This unpleasantness doesn't belong here at *all*.
-                 * It belongs in the OSC, except that the OSC doesn't have
-                 * access to the real LOI -- it gets a copy, that we created
-                 * above, and that copy can be arbitrarily out of date.
-                 *
-                 * The LOV API is due for a serious rewriting anyways, and this
-                 * can be addressed then. */
-                if (rc == ELDLM_OK) {
-                        struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
-                        __u64 tmp = submd->lsm_oinfo->loi_rss;
-
-                        LASSERT(lock != NULL);
-                        loi->loi_rss = tmp;
-                        loi->loi_blocks = submd->lsm_oinfo->loi_blocks;
-                        /* Extend KMS up to the end of this lock and no further
-                         * A lock on [x,y] means a KMS of up to y + 1 bytes! */
-                        if (tmp > lock->l_policy_data.l_extent.end)
-                                tmp = lock->l_policy_data.l_extent.end + 1;
-                        if (tmp >= loi->loi_kms) {
-                                CDEBUG(D_INODE, "lock acquired, setting rss="
-                                       LPU64", kms="LPU64"\n", loi->loi_rss,
-                                       tmp);
-                                loi->loi_kms = tmp;
-                                loi->loi_kms_valid = 1;
-                        } else {
-                                CDEBUG(D_INODE, "lock acquired, setting rss="
-                                       LPU64"; leaving kms="LPU64", end="LPU64
-                                       "\n", loi->loi_rss, loi->loi_kms,
-                                       lock->l_policy_data.l_extent.end);
-                        }
-                        ldlm_lock_allow_match(lock);
-                        LDLM_LOCK_PUT(lock);
-                } else if (rc == ELDLM_LOCK_ABORTED &&
-                           save_flags & LDLM_FL_HAS_INTENT) {
-                        memset(lov_lockhp, 0, sizeof(*lov_lockhp));
-                        loi->loi_rss = submd->lsm_oinfo->loi_rss;
-                        loi->loi_blocks = submd->lsm_oinfo->loi_blocks;
-                        CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
-                               " kms="LPU64"\n", loi->loi_rss, loi->loi_kms);
-                } else {
-                        memset(lov_lockhp, 0, sizeof(*lov_lockhp));
-                        if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("error: enqueue objid "LPX64" subobj "
-                                       LPX64" on OST idx %d: rc = %d\n",
-                                       lsm->lsm_object_id, loi->loi_id,
-                                       loi->loi_ost_idx, rc);
-                                GOTO(out_locks, rc);
-                        }
-                }
-        }
-        if (lsm->lsm_stripe_count > 1)
-                lov_llh_put(lov_lockh);
-        RETURN(ELDLM_OK);
-
- out_locks:
-        while (loi--, lov_lockhp--, i-- > 0) {
-                struct lov_stripe_md submd;
-                int err;
-
-                if (lov_lockhp->cookie == 0)
-                        continue;
-
-                /* XXX LOV STACKING: submd should be from the subobj */
-                submd.lsm_object_id = loi->loi_id;
-                submd.lsm_object_gr = lsm->lsm_object_gr;
-                submd.lsm_stripe_count = 0;
-                err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd,
-                                 mode, lov_lockhp);
-                if (err && lov->tgts[loi->loi_ost_idx].active) {
-                        CERROR("error: cancelling objid "LPX64" on OST "
-                               "idx %d after enqueue error: rc = %d\n",
-                               loi->loi_id, loi->loi_ost_idx, err);
-                }
+                sub_policy.l_extent.start = req->rq_extent.start;
+                sub_policy.l_extent.end = req->rq_extent.end;
+
+                rc = obd_enqueue(lov->tgts[req->rq_idx].ltd_exp, req->rq_md,
+                                 type, &sub_policy, mode, flags, bl_cb,
+                                 cp_cb, gl_cb, data, lvb_len, lvb_swabber,
+                                 lov_lockhp);
+                rc = lov_update_enqueue_set(set, req, rc, save_flags);
+                if (rc != ELDLM_OK)
+                        break;
          }
  
-        if (lsm->lsm_stripe_count > 1) {
-                lov_llh_destroy(lov_lockh);
-                lov_llh_put(lov_lockh);
-        }
-        return rc;
+        lov_fini_enqueue_set(set, mode);
+        RETURN(rc);
  }
  
  static int lov_match(struct obd_export *exp, struct lov_stripe_md *lsm,
                       __u32 type, ldlm_policy_data_t *policy, __u32 mode,
                       int *flags, void *data, struct lustre_handle *lockh)
  {
-        struct lov_lock_handles *lov_lockh = NULL;
+        struct lov_request_set *set;
+        struct lov_request *req;
+        struct list_head *pos;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
          struct lustre_handle *lov_lockhp;
-        struct lov_obd *lov;
-        struct lov_oinfo *loi;
-        struct lov_stripe_md submd;
-        ldlm_error_t rc = 0;
-        int i;
+        int lov_flags, rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -2586,86 +1648,29 @@ static int lov_match(struct obd_export *exp, struct lov_stripe_md *lsm,
          if (!exp || !exp->exp_obd)
                  RETURN(-ENODEV);
  
-        if (lsm->lsm_stripe_count > 1) {
-                lov_lockh = lov_llh_new(lsm);
-                if (lov_lockh == NULL)
-                        RETURN(-ENOMEM);
-
-                lockh->cookie = lov_lockh->llh_handle.h_cookie;
-                lov_lockhp = lov_lockh->llh_handles;
-        } else {
-                lov_lockhp = lockh;
-        }
-
          lov = &exp->exp_obd->u.lov;
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-             i++, loi++, lov_lockhp++) {
-                ldlm_policy_data_t sub_ext;
-                obd_off start, end;
-                int lov_flags;
-
-                if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
-                                           policy->l_extent.end, &start, &end))
-                        continue;
-
-                sub_ext.l_extent.start = start;
-                sub_ext.l_extent.end = end;
+        rc = lov_prep_match_set(exp, lsm, policy, mode, lockh, &set);
+        if (rc)
+                RETURN(rc);
  
-                if (obd_uuid_empty(&lov->tgts[loi->loi_ost_idx].uuid)) {
-                        CDEBUG(D_HA, "lov idx %d deleted\n", loi->loi_ost_idx);
-                        continue;
-                }
-                if (lov->tgts[loi->loi_ost_idx].active == 0) {
-                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-                        rc = -EIO;
-                        break;
-                }
+        list_for_each (pos, &set->set_list) {
+                ldlm_policy_data_t sub_policy;
+                req = list_entry(pos, struct lov_request, rq_link);
+                lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+                LASSERT(lov_lockhp);
  
-                /* XXX LOV STACKING: submd should be from the subobj */
-                submd.lsm_object_id = loi->loi_id;
-                submd.lsm_object_gr = lsm->lsm_object_gr;
-                submd.lsm_stripe_count = 0;
+                sub_policy.l_extent.start = req->rq_extent.start;
+                sub_policy.l_extent.end = req->rq_extent.end;
                  lov_flags = *flags;
-                /* XXX submd is not fully initialized here */
-                rc = obd_match(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd,
-                               type, &sub_ext, mode, &lov_flags, data,
+
+                rc = obd_match(lov->tgts[req->rq_idx].ltd_exp, req->rq_md,
+                               type, &sub_policy, mode, &lov_flags, data,
                                 lov_lockhp);
+                rc = lov_update_match_set(set, req, rc);
                  if (rc != 1)
                          break;
          }
-        if (rc == 1) {
-                if (lsm->lsm_stripe_count > 1) {
-                        if (*flags & LDLM_FL_TEST_LOCK)
-                                lov_llh_destroy(lov_lockh);
-                        lov_llh_put(lov_lockh);
-                }
-                RETURN(1);
-        }
-
-        while (loi--, lov_lockhp--, i-- > 0) {
-                struct lov_stripe_md submd;
-                int err;
-
-                if (lov_lockhp->cookie == 0)
-                        continue;
-
-                /* XXX LOV STACKING: submd should be from the subobj */
-                submd.lsm_object_id = loi->loi_id;
-                submd.lsm_object_gr = lsm->lsm_object_gr;
-                submd.lsm_stripe_count = 0;
-                err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd,
-                                 mode, lov_lockhp);
-                if (err && lov->tgts[loi->loi_ost_idx].active) {
-                        CERROR("error: cancelling objid "LPX64" on OST "
-                               "idx %d after match failure: rc = %d\n",
-                               loi->loi_id, loi->loi_ost_idx, err);
-                }
-        }
-
-        if (lsm->lsm_stripe_count > 1) {
-                lov_llh_destroy(lov_lockh);
-                lov_llh_put(lov_lockh);
-        }
+        lov_fini_match_set(set, mode, *flags);
          RETURN(rc);
  }
  
@@ -2706,11 +1711,12 @@ static int lov_change_cbdata(struct obd_export *exp,
  static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm,
                        __u32 mode, struct lustre_handle *lockh)
  {
-        struct lov_lock_handles *lov_lockh = NULL;
+        struct lov_request_set *set;
+        struct lov_request *req;
+        struct list_head *pos;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
          struct lustre_handle *lov_lockhp;
-        struct lov_obd *lov;
-        struct lov_oinfo *loi;
-        int rc = 0, i;
+        int err = 0, rc = 0;
          ENTRY;
  
          if (lsm_bad_magic(lsm))
@@ -2722,57 +1728,34 @@ static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm,
          LASSERT(lsm->lsm_object_gr > 0);
  
          LASSERT(lockh);
-        if (lsm->lsm_stripe_count > 1) {
-                lov_lockh = lov_handle2llh(lockh);
-                if (!lov_lockh) {
-                        CERROR("LOV: invalid lov lock handle %p\n", lockh);
-                        RETURN(-EINVAL);
-                }
-
-                lov_lockhp = lov_lockh->llh_handles;
-        } else {
-                lov_lockhp = lockh;
-        }
-
          lov = &exp->exp_obd->u.lov;
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-             i++, loi++, lov_lockhp++) {
-                struct lov_stripe_md submd;
-                int err;
+        rc = lov_prep_cancel_set(exp, lsm, mode, lockh, &set);
+        if (rc)
+                RETURN(rc);
  
-                if (lov_lockhp->cookie == 0) {
-                        CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n",
-                               loi->loi_ost_idx, loi->loi_id);
-                        continue;
-                }
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
+                lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
  
-                /* XXX LOV STACKING: submd should be from the subobj */
-                submd.lsm_object_id = loi->loi_id;
-                submd.lsm_object_gr = lsm->lsm_object_gr;
-                submd.lsm_stripe_count = 0;
-                err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd,
-                                 mode, lov_lockhp);
-                if (err) {
-                        if (lov->tgts[loi->loi_ost_idx].active) {
-                                CERROR("error: cancel objid "LPX64" subobj "
-                                       LPX64" on OST idx %d: rc = %d\n",
-                                       lsm->lsm_object_id,
-                                       loi->loi_id, loi->loi_ost_idx, err);
-                                if (!rc)
-                                        rc = err;
-                        }
+                rc = obd_cancel(lov->tgts[req->rq_idx].ltd_exp, req->rq_md,
+                                mode, lov_lockhp);
+                rc = lov_update_common_set(set, req, rc);
+                if (rc) {
+                        CERROR("error: cancel objid "LPX64" subobj "
+                               LPX64" on OST idx %d: rc = %d\n",
+                               lsm->lsm_object_id,
+                               req->rq_md->lsm_object_id, req->rq_idx, rc);
+                        err = rc;
                  }
+ 
          }
-
-        if (lsm->lsm_stripe_count > 1)
-                lov_llh_destroy(lov_lockh);
-        if (lov_lockh != NULL)
-                lov_llh_put(lov_lockh);
-        RETURN(rc);
+        lov_fini_cancel_set(set);
+        RETURN(err);
  }
  
  static int lov_cancel_unused(struct obd_export *exp,
-                             struct lov_stripe_md *lsm, int flags, void *opaque)
+                             struct lov_stripe_md *lsm, 
+                            int flags, void *opaque)
  {
          struct lov_obd *lov;
          struct lov_oinfo *loi;
@@ -2977,8 +1960,8 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                                              len, karg, uarg);
                          if (err) {
                                  if (lov->tgts[i].active) {
-                                        CERROR("error: iocontrol OSC %s on OST"
-                                               "idx %d: cmd %x err = %d\n",
+                                        CERROR("error: iocontrol OSC %s on OST "
+                                               "idx %d cmd %x: err = %d\n",
                                                 lov->tgts[i].uuid.uuid, i,
                                                 cmd, err);
                                          if (!rc)
@@ -3061,8 +2044,8 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen,
                  for (i = 0; i < lov->desc.ld_tgt_count; i++) {
                          if (!lov->tgts[i].active)
                                  continue;
-                        rc = obd_get_info(lov->tgts[i].ltd_exp, keylen, key,
-                                          &size, &(ids[i]));
+                        rc = obd_get_info(lov->tgts[i].ltd_exp,
+                                          keylen, key, &size, &(ids[i]));
                          if (rc != 0)
                                  RETURN(rc);
                  }
@@ -3082,7 +2065,7 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
  {
          struct obd_device *obddev = class_exp2obd(exp);
          struct lov_obd *lov = &obddev->u.lov;
-        int i, rc = 0;
+        int i, rc = 0, err;
          ENTRY;
  
  #define KEY_IS(str) \
@@ -3092,21 +2075,51 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
                  if (vallen != lov->desc.ld_tgt_count)
                          RETURN(-EINVAL);
                  for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                        int er;
-
-                        /* OST was deleted */
-                        if (obd_uuid_empty(&lov->tgts[i].uuid))
-                                continue;
-
                          /* initialize all OSCs, even inactive ones */
  
-                        er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key,
-                                          sizeof(obd_id), ((obd_id*)val) + i);
+                        err = obd_set_info(lov->tgts[i].ltd_exp,
+                                          keylen, key, sizeof(obd_id),
+                                          ((obd_id*)val) + i);
                          if (!rc)
-                                rc = er;
+                                rc = err;
+                }
+                RETURN(rc);
+        }
+        if (KEY_IS("async")) {
+                struct lov_desc *desc = &lov->desc;
+                struct lov_tgt_desc *tgts = lov->tgts;
+
+                if (vallen != sizeof(int))
+                        RETURN(-EINVAL);
+                lov->async = *((int*) val);
+
+                for (i = 0; i < desc->ld_tgt_count; i++, tgts++) {
+                        struct obd_uuid *tgt_uuid = &tgts->uuid;
+                        struct obd_device *tgt_obd;
+
+                        tgt_obd = class_find_client_obd(tgt_uuid,
+                                                        LUSTRE_OSC_NAME,
+                                                        &obddev->obd_uuid);
+                        if (!tgt_obd) {
+                                CERROR("Target %s not attached\n",
+                                        tgt_uuid->uuid);
+                                if (!rc)
+                                        rc = -EINVAL;
+                                continue;
+                        }
+
+                        err = obd_set_info(tgt_obd->obd_self_export,
+                                           keylen, key, vallen, val);
+                        if (err) {
+                                CERROR("Failed to set async on target %s\n",
+                                        tgt_obd->obd_name);
+                                if (!rc)
+                                        rc = err;
+                        }
                  }
                  RETURN(rc);
          }
+
          if (KEY_IS("growth_count")) {
                  if (vallen != sizeof(int))
                          RETURN(-EINVAL);
@@ -3156,81 +2169,20 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
          }
  
          for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                int er;
-
-                /* OST was deleted */
-                if (obd_uuid_empty(&lov->tgts[i].uuid))
+                if (val && !obd_uuid_equals(val, &lov->tgts[i].uuid))
                          continue;
  
                  if (!val && !lov->tgts[i].active)
                          continue;
  
-                er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key, vallen,
-                                  val);
+                err = obd_set_info(lov->tgts[i].ltd_exp,
+                                  keylen, key, vallen, val);
                  if (!rc)
-                        rc = er;
+                        rc = err;
          }
          RETURN(rc);
  #undef KEY_IS
-
-}
-
-/* Merge rss if @kms_only == 0
- *
- * Even when merging RSS, we will take the KMS value if it's larger.
- * This prevents getattr from stomping on dirty cached pages which
- * extend the file size. */
-__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms_only)
-{
-        struct lov_oinfo *loi;
-        __u64 size = 0;
-        int i;
-
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-             i++, loi++) {
-                obd_size lov_size, tmpsize;
-
-                tmpsize = loi->loi_kms;
-                if (kms_only == 0 && loi->loi_rss > tmpsize)
-                        tmpsize = loi->loi_rss;
-
-                lov_size = lov_stripe_size(lsm, tmpsize, i);
-                if (lov_size > size)
-                        size = lov_size;
-        }
-
-        return size;
-}
-EXPORT_SYMBOL(lov_merge_size);
-
-/* Merge blocks */
-__u64 lov_merge_blocks(struct lov_stripe_md *lsm)
-{
-        struct lov_oinfo *loi;
-        __u64 blocks = 0;
-        int i;
-
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-             i++, loi++) {
-                blocks += loi->loi_blocks;
-        }
-        return blocks;
  }
-EXPORT_SYMBOL(lov_merge_blocks);
-
-__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time)
-{
-        struct lov_oinfo *loi;
-        int i;
-
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
-             i++, loi++) {
-                if (loi->loi_mtime > current_time)
-                        current_time = loi->loi_mtime;
-        }
-        return current_time;
-}
-EXPORT_SYMBOL(lov_merge_mtime);
  
  #if 0
  struct lov_multi_wait {
@@ -3322,27 +2274,6 @@ int lov_complete_many(struct obd_export *exp, struct lov_stripe_md *lsm,
  }
  #endif
  
-void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
-                      obd_off size)
-{
-        struct lov_oinfo *loi;
-        int stripe = 0;
-        __u64 kms;
-        ENTRY;
-
-        if (size > 0)
-                stripe = lov_stripe_number(lsm, size - 1);
-        kms = lov_size_to_stripe(lsm, size, stripe);
-        loi = &(lsm->lsm_oinfo[stripe]);
-
-        CDEBUG(D_INODE, "stripe %d KMS %sincreasing "LPU64"->"LPU64"\n",
-               stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms);
-        if (kms > loi->loi_kms)
-                loi->loi_kms = kms;
-        EXIT;
-}
-EXPORT_SYMBOL(lov_increase_kms);
-
  struct obd_ops lov_obd_ops = {
          .o_owner               = THIS_MODULE,
          .o_attach              = lov_attach,
@@ -3369,6 +2300,7 @@ struct obd_ops lov_obd_ops = {
          .o_queue_group_io      = lov_queue_group_io,
          .o_trigger_group_io    = lov_trigger_group_io,
          .o_teardown_async_page = lov_teardown_async_page,
+        .o_adjust_kms          = lov_adjust_kms,
          .o_punch               = lov_punch,
          .o_sync                = lov_sync,
          .o_enqueue             = lov_enqueue,
diff --git a/lustre/lov/lov_offset.c b/lustre/lov/lov_offset.c

new file mode 100644 (file)

index 0000000..66fad27
--- /dev/null
+++ b/lustre/lov/lov_offset.c
@@ -0,0 +1,240 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#include <asm/div64.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/obd_lov.h>
+
+#include "lov_internal.h"
+
+/* compute object size given "stripeno" and the ost size */
+obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
+                         int stripeno)
+{
+        unsigned long ssize  = lsm->lsm_stripe_size;
+        unsigned long swidth = ssize * lsm->lsm_stripe_count;
+        unsigned long stripe_size;
+        obd_size lov_size;
+        ENTRY;
+
+        if (ost_size == 0)
+                RETURN(0);
+
+        /* do_div(a, b) returns a % b, and a = a / b */
+        stripe_size = do_div(ost_size, ssize);
+        if (stripe_size)
+                lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
+        else
+                lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
+
+        RETURN(lov_size);
+}
+
+/* we have an offset in file backed by an lov and want to find out where
+ * that offset lands in our given stripe of the file.  for the easy
+ * case where the offset is within the stripe, we just have to scale the
+ * offset down to make it relative to the stripe instead of the lov.
+ *
+ * the harder case is what to do when the offset doesn't intersect the
+ * stripe.  callers will want start offsets clamped ahead to the start
+ * of the nearest stripe in the file.  end offsets similarly clamped to the
+ * nearest ending byte of a stripe in the file:
+ *
+ * all this function does is move offsets to the nearest region of the
+ * stripe, and it does its work "mod" the full length of all the stripes.
+ * consider a file with 3 stripes:
+ *
+ *             S                                              E
+ * ---------------------------------------------------------------------
+ * |    0    |     1     |     2     |    0    |     1     |     2     |
+ * ---------------------------------------------------------------------
+ *
+ * to find stripe 1's offsets for S and E, it divides by the full stripe
+ * width and does its math in the context of a single set of stripes:
+ *
+ *             S         E
+ * -----------------------------------
+ * |    0    |     1     |     2     |
+ * -----------------------------------
+ *
+ * it'll notice that E is outside stripe 1 and clamp it to the end of the
+ * stripe, then multiply it back out by lov_off to give the real offsets in
+ * the stripe:
+ *
+ *   S                   E
+ * ---------------------------------------------------------------------
+ * |    1    |     1     |     1     |    1    |     1     |     1     |
+ * ---------------------------------------------------------------------
+ *
+ * it would have done similarly and pulled S forward to the start of a 1
+ * stripe if, say, S had landed in a 0 stripe.
+ *
+ * this rounding isn't always correct.  consider an E lov offset that lands
+ * on a 0 stripe, the "mod stripe width" math will pull it forward to the
+ * start of a 1 stripe, when in fact it wanted to be rounded back to the end
+ * of a previous 1 stripe.  this logic is handled by callers and this is why:
+ *
+ * this function returns < 0 when the offset was "before" the stripe and
+ * was moved forward to the start of the stripe in question;  0 when it
+ * falls in the stripe and no shifting was done; > 0 when the offset
+ * was outside the stripe and was pulled back to its final byte. */
+int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+                      int stripeno, obd_off *obd_off)
+{
+        unsigned long ssize  = lsm->lsm_stripe_size;
+        unsigned long swidth = ssize * lsm->lsm_stripe_count;
+        unsigned long stripe_off, this_stripe;
+        int ret = 0;
+
+        if (lov_off == OBD_OBJECT_EOF) {
+                *obd_off = OBD_OBJECT_EOF;
+                return 0;
+        }
+
+        /* do_div(a, b) returns a % b, and a = a / b */
+        stripe_off = do_div(lov_off, swidth);
+
+        this_stripe = stripeno * ssize;
+        if (stripe_off < this_stripe) {
+                stripe_off = 0;
+                ret = -1;
+        } else {
+                stripe_off -= this_stripe;
+
+                if (stripe_off >= ssize) {
+                        stripe_off = ssize;
+                        ret = 1;
+                }
+        }
+
+        *obd_off = lov_off * ssize + stripe_off;
+        return ret;
+}
+
+/* Given a whole-file size and a stripe number, give the file size which
+ * corresponds to the individual object of that stripe.
+ *
+ * This behaves basically in the same was as lov_stripe_offset, except that
+ * file sizes falling before the beginning of a stripe are clamped to the end
+ * of the previous stripe, not the beginning of the next:
+ *
+ *                                               S
+ * ---------------------------------------------------------------------
+ * |    0    |     1     |     2     |    0    |     1     |     2     |
+ * ---------------------------------------------------------------------
+ *
+ * if clamped to stripe 2 becomes:
+ *
+ *                                   S
+ * ---------------------------------------------------------------------
+ * |    0    |     1     |     2     |    0    |     1     |     2     |
+ * ---------------------------------------------------------------------
+ */
+obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size,
+                           int stripeno)
+{
+        unsigned long ssize  = lsm->lsm_stripe_size;
+        unsigned long swidth = ssize * lsm->lsm_stripe_count;
+        unsigned long stripe_off, this_stripe;
+
+        if (file_size == OBD_OBJECT_EOF)
+                return OBD_OBJECT_EOF;
+
+        /* do_div(a, b) returns a % b, and a = a / b */
+        stripe_off = do_div(file_size, swidth);
+
+        this_stripe = stripeno * ssize;
+        if (stripe_off < this_stripe) {
+                /* Move to end of previous stripe, or zero */
+                if (file_size > 0) {
+                        file_size--;
+                        stripe_off = ssize;
+                } else {
+                        stripe_off = 0;
+                }
+        } else {
+                stripe_off -= this_stripe;
+
+                if (stripe_off >= ssize) {
+                        /* Clamp to end of this stripe */
+                        stripe_off = ssize;
+                }
+        }
+
+        return (file_size * ssize + stripe_off);
+}
+
+/* given an extent in an lov and a stripe, calculate the extent of the stripe
+ * that is contained within the lov extent.  this returns true if the given
+ * stripe does intersect with the lov extent. */
+int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
+                          obd_off start, obd_off end,
+                          obd_off *obd_start, obd_off *obd_end)
+{
+        int start_side, end_side;
+
+        start_side = lov_stripe_offset(lsm, start, stripeno, obd_start);
+        end_side = lov_stripe_offset(lsm, end, stripeno, obd_end);
+
+        CDEBUG(D_INODE, "["LPU64"->"LPU64"] -> [(%d) "LPU64"->"LPU64" (%d)]\n",
+               start, end, start_side, *obd_start, *obd_end, end_side);
+
+        /* this stripe doesn't intersect the file extent when neither
+         * start or the end intersected the stripe and obd_start and
+         * obd_end got rounded up to the save value. */
+        if (start_side != 0 && end_side != 0 && *obd_start == *obd_end)
+                return 0;
+
+        /* as mentioned in the lov_stripe_offset commentary, end
+         * might have been shifted in the wrong direction.  This
+         * happens when an end offset is before the stripe when viewed
+         * through the "mod stripe size" math. we detect it being shifted
+         * in the wrong direction and touch it up.
+         * interestingly, this can't underflow since end must be > start
+         * if we passed through the previous check.
+         * (should we assert for that somewhere?) */
+        if (end_side != 0)
+                (*obd_end)--;
+
+        return 1;
+}
+
+/* compute which stripe number "lov_off" will be written into */
+int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
+{
+        unsigned long ssize  = lsm->lsm_stripe_size;
+        unsigned long swidth = ssize * lsm->lsm_stripe_count;
+        unsigned long stripe_off;
+
+        stripe_off = do_div(lov_off, swidth);
+
+        return stripe_off / ssize;
+}
diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c

index 378422e..4f65c2a 100644 (file)
--- a/lustre/lov/lov_pack.c
+++ b/lustre/lov/lov_pack.c
@@ -108,7 +108,7 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
  
          if (lsm) {
                  if (lsm->lsm_magic != LOV_MAGIC) {
-                        CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X\n",
+                        CWARN("bad LOV MAGIC: 0x%08X != 0x%08X\n",
                                 lsm->lsm_magic, LOV_MAGIC);
                          RETURN(-EINVAL);
                  }
@@ -307,12 +307,14 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern)
          return lsm_size;
  }
  EXPORT_SYMBOL(lov_alloc_memmd);
+
  void lov_free_memmd(struct lov_stripe_md **lsmp)
  {
          OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count));
          *lsmp = NULL;
  }
  EXPORT_SYMBOL(lov_free_memmd);
+
  int lov_unpackmd_v0(struct lov_obd *lov, struct lov_stripe_md *lsm,
                      struct lov_mds_md_v0 *lmm)
  {
diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c

new file mode 100644 (file)

index 0000000..b8ac8fe
--- /dev/null
+++ b/lustre/lov/lov_qos.c
@@ -0,0 +1,187 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/obd_lov.h>
+
+#include "lov_internal.h"
+
+void qos_shrink_lsm(struct lov_request_set *set)
+{
+        struct lov_stripe_md *lsm = set->set_md;
+        struct lov_stripe_md *lsm_new;
+        /* XXX LOV STACKING call into osc for sizes */
+        unsigned oldsize, newsize;
+
+        if (set->set_oti && set->set_cookies && set->set_cookie_sent) {
+                struct llog_cookie *cookies;
+                oldsize = lsm->lsm_stripe_count * sizeof(*cookies);
+                newsize = set->set_count * sizeof(*cookies);
+
+                cookies = set->set_cookies;
+                oti_alloc_cookies(set->set_oti, set->set_count);
+                if (set->set_oti->oti_logcookies) {
+                        memcpy(set->set_oti->oti_logcookies, cookies, newsize);
+                        OBD_FREE(cookies, oldsize);
+                        set->set_cookies = set->set_oti->oti_logcookies;
+                } else {
+                        CWARN("'leaking' %d bytes\n", oldsize - newsize);
+                }
+        }
+
+        CWARN("using fewer stripes for object "LPX64": old %u new %u\n",
+              lsm->lsm_object_id, lsm->lsm_stripe_count, set->set_count);
+
+        oldsize = lov_stripe_md_size(lsm->lsm_stripe_count);
+        newsize = lov_stripe_md_size(set->set_count);
+        OBD_ALLOC(lsm_new, newsize);
+        if (lsm_new != NULL) {
+                memcpy(lsm_new, lsm, newsize);
+                lsm_new->lsm_stripe_count = set->set_count;
+                OBD_FREE(lsm, oldsize);
+                set->set_md = lsm_new;
+        } else {
+                CWARN("'leaking' %d bytes\n", oldsize - newsize);
+        }
+}
+
+#define LOV_CREATE_RESEED_INTERVAL 1000
+/* FIXME use real qos data to prepare the lov create request */
+int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
+{
+        static int ost_start_idx, ost_start_count;
+        unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
+        struct lov_stripe_md *lsm = set->set_md;
+        struct obdo *src_oa = set->set_oa;
+        int i, rc = 0;
+        ENTRY;
+
+        LASSERT(src_oa->o_valid & OBD_MD_FLID);
+        
+        lsm->lsm_object_id = src_oa->o_id;
+        lsm->lsm_object_gr = src_oa->o_gr;
+        if (!lsm->lsm_stripe_size)
+                lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
+        if (!lsm->lsm_pattern) {
+                lsm->lsm_pattern = lov->desc.ld_pattern ?
+                        lov->desc.ld_pattern : LOV_PATTERN_RAID0;
+        }
+
+        if (newea || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) {
+                if (--ost_start_count <= 0) {
+                        ost_start_idx = ll_insecure_random_int();
+                        ost_start_count = LOV_CREATE_RESEED_INTERVAL;
+                } else if (lsm->lsm_stripe_count >=
+                           lov->desc.ld_active_tgt_count) {
+                        /* If we allocate from all of the stripes, make the
+                         * next file start on the next OST. */
+                        ++ost_start_idx;
+                }
+                ost_idx = ost_start_idx % ost_count;
+        } else {
+                ost_idx = lsm->lsm_oinfo[0].loi_ost_idx;
+        }
+
+        CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
+               lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
+
+        for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
+                struct lov_request *req;
+                
+                ++ost_start_idx;
+                if (lov->tgts[ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
+                        continue;
+                }
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out, rc = -ENOMEM);
+                
+                req->rq_buflen = sizeof(*req->rq_md);
+                OBD_ALLOC(req->rq_md, req->rq_buflen);
+                if (req->rq_md == NULL)
+                        GOTO(out, rc = -ENOMEM);
+                
+                req->rq_oa = obdo_alloc();
+                if (req->rq_oa == NULL)
+                        GOTO(out, rc = -ENOMEM);
+                
+                req->rq_idx = ost_idx;
+                req->rq_stripe = i;
+                /* create data objects with "parent" OA */
+                memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+
+                /* XXX When we start creating objects on demand, we need to
+                 *     make sure that we always create the object on the
+                 *     stripe which holds the existing file size.
+                 */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
+                                              &req->rq_oa->o_size) < 0 &&
+                            req->rq_oa->o_size)
+                                req->rq_oa->o_size--;
+
+                        CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+                               i, req->rq_oa->o_size, src_oa->o_size);
+                }
+
+                lov_set_add_req(req, set);
+
+                /* If we have allocated enough objects, we are OK */
+                if (set->set_count == lsm->lsm_stripe_count)
+                        GOTO(out, rc = 0);
+        }
+        
+        if (set->set_count == 0)
+                GOTO(out, rc = -EIO);
+        
+        /* If we were passed specific striping params, then a failure to
+         * meet those requirements is an error, since we can't reallocate
+         * that memory (it might be part of a larger array or something).
+         *
+         * We can only get here if lsm_stripe_count was originally > 1.
+         */
+        if (!newea) {
+                CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
+                       lsm->lsm_object_id, set->set_count, 
+                       lsm->lsm_stripe_count, rc);
+                rc = rc ? rc : -EFBIG;
+        } else {
+                qos_shrink_lsm(set);
+                rc = 0;
+        }
+out:
+        RETURN(rc);
+}
+
+
+
diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c

new file mode 100644 (file)

index 0000000..9df75b6
--- /dev/null
+++ b/lustre/lov/lov_request.c
@@ -0,0 +1,1295 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#include <asm/div64.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/obd_lov.h>
+#include <linux/lustre_idl.h>
+
+#include "lov_internal.h"
+
+static void lov_init_set(struct lov_request_set *set)
+{
+        set->set_count = 0;
+        set->set_completes = 0;
+        set->set_success = 0;
+        INIT_LIST_HEAD(&set->set_list);
+        atomic_set(&set->set_refcount, 1);
+}
+
+static void lov_finish_set(struct lov_request_set *set)
+{
+        struct list_head *pos, *n;
+        ENTRY;
+
+        LASSERT(set);
+        list_for_each_safe(pos, n, &set->set_list) {
+                struct lov_request *req = list_entry(pos, struct lov_request,
+                                                     rq_link);
+                list_del_init(&req->rq_link);
+
+                if (req->rq_oa)
+                        obdo_free(req->rq_oa);
+                if (req->rq_md)
+                        OBD_FREE(req->rq_md, req->rq_buflen);
+                OBD_FREE(req, sizeof(*req));
+        }
+
+        if (set->set_pga) {
+                int len = set->set_oabufs * sizeof(*set->set_pga);
+                OBD_FREE(set->set_pga, len);
+        }
+        if (set->set_lockh)
+                lov_llh_put(set->set_lockh);
+
+        OBD_FREE(set, sizeof(*set));
+        EXIT;
+}
+
+static void lov_update_set(struct lov_request_set *set,
+                           struct lov_request *req, int rc)
+{
+        req->rq_complete = 1;
+        req->rq_rc = rc;
+
+        set->set_completes++;
+        if (rc == 0)
+                set->set_success++;
+}
+
+int lov_update_common_set(struct lov_request_set *set,
+                          struct lov_request *req, int rc)
+{
+        struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+        ENTRY;
+
+        lov_update_set(set, req, rc);
+
+        /* grace error on inactive ost */
+        if (rc && !lov->tgts[req->rq_idx].active)
+                rc = 0;
+
+        /* FIXME in raid1 regime, should return 0 */
+        RETURN(rc);
+}
+
+void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
+{
+        list_add_tail(&req->rq_link, &set->set_list);
+        set->set_count++;
+}
+
+int lov_update_enqueue_set(struct lov_request_set *set,
+                           struct lov_request *req, int rc, int flags)
+{
+        struct lustre_handle *lov_lockhp;
+        struct lov_oinfo *loi;
+        ENTRY;
+
+        lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+        loi = &set->set_md->lsm_oinfo[req->rq_stripe];
+
+        /* XXX FIXME: This unpleasantness doesn't belong here at *all*.
+         * It belongs in the OSC, except that the OSC doesn't have
+         * access to the real LOI -- it gets a copy, that we created
+         * above, and that copy can be arbitrarily out of date.
+         *
+         * The LOV API is due for a serious rewriting anyways, and this
+         * can be addressed then. */
+        if (rc == ELDLM_OK) {
+                struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
+                __u64 tmp = req->rq_md->lsm_oinfo->loi_rss;
+
+                LASSERT(lock != NULL);
+                loi->loi_rss = tmp;
+                loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime;
+                loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks;
+                /* Extend KMS up to the end of this lock and no further
+                 * A lock on [x,y] means a KMS of up to y + 1 bytes! */
+                if (tmp > lock->l_policy_data.l_extent.end)
+                        tmp = lock->l_policy_data.l_extent.end + 1;
+                if (tmp >= loi->loi_kms) {
+                        CDEBUG(D_INODE, "lock acquired, setting rss="
+                               LPU64", kms="LPU64"\n", loi->loi_rss, tmp);
+                        loi->loi_kms = tmp;
+                        loi->loi_kms_valid = 1;
+                } else {
+                        CDEBUG(D_INODE, "lock acquired, setting rss="
+                               LPU64"; leaving kms="LPU64", end="LPU64
+                               "\n", loi->loi_rss, loi->loi_kms,
+                               lock->l_policy_data.l_extent.end);
+                }
+                ldlm_lock_allow_match(lock);
+                LDLM_LOCK_PUT(lock);
+        } else if (rc == ELDLM_LOCK_ABORTED && flags & LDLM_FL_HAS_INTENT) {
+                memset(lov_lockhp, 0, sizeof(*lov_lockhp));
+                loi->loi_rss = req->rq_md->lsm_oinfo->loi_rss;
+                loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime;
+                loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks;
+                CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
+                       " kms="LPU64"\n", loi->loi_rss, loi->loi_kms);
+                rc = ELDLM_OK;
+        } else {
+                struct obd_export *exp = set->set_exp;
+                struct lov_obd *lov = &exp->exp_obd->u.lov;
+
+                memset(lov_lockhp, 0, sizeof(*lov_lockhp));
+                if (lov->tgts[req->rq_idx].active) {
+                        CERROR("error: enqueue objid "LPX64" subobj "
+                                LPX64" on OST idx %d: rc = %d\n",
+                                set->set_md->lsm_object_id, loi->loi_id,
+                                loi->loi_ost_idx, rc);
+                } else {
+                        rc = ELDLM_OK;
+                }
+        }
+        lov_update_set(set, req, rc);
+        RETURN(rc);
+}
+
+static int enqueue_done(struct lov_request_set *set, __u32 mode)
+{
+        struct list_head *pos;
+        struct lov_request *req;
+        struct lustre_handle *lov_lockhp = NULL;
+        struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_completes);
+        /* enqueue/match success, just return */
+        if (set->set_completes == set->set_success)
+                RETURN(0);
+
+        /* cancel enqueued/matched locks */
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
+
+                if (!req->rq_complete || req->rq_rc)
+                        continue;
+
+                lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+                LASSERT(lov_lockhp);
+                if (lov_lockhp->cookie == 0)
+                        continue;
+
+                rc = obd_cancel(lov->tgts[req->rq_idx].ltd_exp, req->rq_md,
+                                mode, lov_lockhp);
+                if (rc && lov->tgts[req->rq_idx].active)
+                        CERROR("cancelling obdjid "LPX64" on OST "
+                               "idx %d error: rc = %d\n",
+                               req->rq_md->lsm_object_id, req->rq_idx, rc);
+        }
+        lov_llh_put(set->set_lockh);
+        RETURN(rc);
+}
+
+int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes)
+                rc = enqueue_done(set, mode);
+        else
+                lov_llh_put(set->set_lockh);
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(rc);
+}
+
+int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+                         ldlm_policy_data_t *policy, __u32 mode,
+                         struct lustre_handle *lockh,
+                         struct lov_request_set **reqset)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        struct lov_request_set *set;
+        int i, rc = 0;
+        struct lov_oinfo *loi;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_lockh = lov_llh_new(lsm);
+        if (set->set_lockh == NULL)
+                GOTO(out_set, rc = -ENOMEM);
+        lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+                obd_off start, end;
+
+                if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
+                                           policy->l_extent.end, &start, &end))
+                        continue;
+
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        continue;
+                }
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+
+                req->rq_buflen = sizeof(*req->rq_md) +
+                        sizeof(struct lov_oinfo);
+                OBD_ALLOC(req->rq_md, req->rq_buflen);
+                if (req->rq_md == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+
+                req->rq_extent.start = start;
+                req->rq_extent.end = end;
+
+                req->rq_idx = loi->loi_ost_idx;
+                req->rq_stripe = i;
+
+                /* XXX LOV STACKING: submd should be from the subobj */
+                req->rq_md->lsm_object_id = loi->loi_id;
+                req->rq_md->lsm_object_gr = lsm->lsm_object_gr;
+                req->rq_md->lsm_stripe_count = 0;
+                req->rq_md->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid;
+                req->rq_md->lsm_oinfo->loi_rss = loi->loi_rss;
+                req->rq_md->lsm_oinfo->loi_kms = loi->loi_kms;
+                req->rq_md->lsm_oinfo->loi_blocks = loi->loi_blocks;
+                loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime;
+
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out_set, rc = -EIO);
+        *reqset = set;
+        RETURN(0);
+out_set:
+        lov_fini_enqueue_set(set, mode);
+        RETURN(rc);
+}
+
+int lov_update_match_set(struct lov_request_set *set, struct lov_request *req,
+                         int rc)
+{
+        int ret = rc;
+        ENTRY;
+
+        if (rc == 1)
+                ret = 0;
+        lov_update_set(set, req, ret);
+        RETURN(rc);
+}
+
+int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes) {
+                if (set->set_count == set->set_success &&
+                    flags & LDLM_FL_TEST_LOCK)
+                        lov_llh_put(set->set_lockh);
+                rc = enqueue_done(set, mode);
+        } else {
+                lov_llh_put(set->set_lockh);
+        }
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(rc);
+}
+
+int lov_prep_match_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+                       ldlm_policy_data_t *policy, __u32 mode,
+                       struct lustre_handle *lockh,
+                       struct lov_request_set **reqset)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        struct lov_request_set *set;
+        int i, rc = 0;
+        struct lov_oinfo *loi;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_lockh = lov_llh_new(lsm);
+        if (set->set_lockh == NULL)
+                GOTO(out_set, rc = -ENOMEM);
+        lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+                obd_off start, end;
+
+                if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
+                                           policy->l_extent.end, &start, &end))
+                        continue;
+
+                /* FIXME raid1 should grace this error */
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        GOTO(out_set, rc = -EIO);
+                }
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+
+                req->rq_buflen = sizeof(*req->rq_md);
+                OBD_ALLOC(req->rq_md, req->rq_buflen);
+                if (req->rq_md == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+
+                req->rq_extent.start = start;
+                req->rq_extent.end = end;
+
+                req->rq_idx = loi->loi_ost_idx;
+                req->rq_stripe = i;
+
+                /* XXX LOV STACKING: submd should be from the subobj */
+                req->rq_md->lsm_object_id = loi->loi_id;
+               req->rq_md->lsm_object_gr = lsm->lsm_object_gr;
+                req->rq_md->lsm_stripe_count = 0;
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out_set, rc = -EIO);
+        *reqset = set;
+        RETURN(rc);
+out_set:
+        lov_fini_match_set(set, mode, 0);
+        RETURN(rc);
+}
+
+int lov_fini_cancel_set(struct lov_request_set *set)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+
+        if (set->set_lockh)
+                lov_llh_put(set->set_lockh);
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(rc);
+}
+
+int lov_prep_cancel_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+                        __u32 mode, struct lustre_handle *lockh,
+                        struct lov_request_set **reqset)
+{
+        struct lov_request_set *set;
+        int i, rc = 0;
+        struct lov_oinfo *loi;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_lockh = lov_handle2llh(lockh);
+        if (set->set_lockh == NULL) {
+                CERROR("LOV: invalid lov lock handle %p\n", lockh);
+                GOTO(out_set, rc = -EINVAL);
+        }
+        lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+                struct lustre_handle *lov_lockhp;
+
+                lov_lockhp = set->set_lockh->llh_handles + i;
+                if (lov_lockhp->cookie == 0) {
+                        CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n",
+                               loi->loi_ost_idx, loi->loi_id);
+                        continue;
+                }
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+
+                req->rq_buflen = sizeof(*req->rq_md);
+                OBD_ALLOC(req->rq_md, req->rq_buflen);
+                if (req->rq_md == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+
+                req->rq_idx = loi->loi_ost_idx;
+                req->rq_stripe = i;
+
+                /* XXX LOV STACKING: submd should be from the subobj */
+                req->rq_md->lsm_object_id = loi->loi_id;
+               req->rq_md->lsm_object_gr = lsm->lsm_object_gr;
+                req->rq_md->lsm_stripe_count = 0;
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out_set, rc = -EIO);
+        *reqset = set;
+        RETURN(rc);
+out_set:
+        lov_fini_cancel_set(set);
+        RETURN(rc);
+}
+
+static int create_done(struct obd_export *exp, struct lov_request_set *set,
+                       struct lov_stripe_md **ea)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        struct obd_trans_info *oti = set->set_oti;
+        struct obdo *src_oa = set->set_oa;
+        struct list_head *pos;
+        struct lov_request *req;
+        struct obdo *ret_oa = NULL;
+        int attrset = 0, rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_completes);
+
+        if (!set->set_success)
+                GOTO(cleanup, rc = -EIO);
+
+        if (*ea == NULL && set->set_count != set->set_success) {
+                set->set_count = set->set_success;
+                qos_shrink_lsm(set);
+        }
+
+        ret_oa = obdo_alloc();
+        if (ret_oa == NULL)
+                GOTO(cleanup, rc = -ENOMEM);
+
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
+                if (!req->rq_complete || req->rq_rc)
+                        continue;
+                lov_merge_attrs(ret_oa, req->rq_oa, req->rq_oa->o_valid,
+                                set->set_md, req->rq_stripe, &attrset);
+        }
+        if (src_oa->o_valid & OBD_MD_FLSIZE &&
+            ret_oa->o_size != src_oa->o_size) {
+                CERROR("original size "LPU64" isn't new object size "LPU64"\n",
+                       src_oa->o_size, ret_oa->o_size);
+                LBUG();
+        }
+        ret_oa->o_id = src_oa->o_id;
+        ret_oa->o_gr = src_oa->o_gr;
+        ret_oa->o_valid |= OBD_MD_FLGROUP;
+        memcpy(src_oa, ret_oa, sizeof(*src_oa));
+        obdo_free(ret_oa);
+
+        *ea = set->set_md;
+        GOTO(done, rc = 0);
+
+        EXIT;
+cleanup:
+        list_for_each (pos, &set->set_list) {
+                struct obd_export *sub_exp;
+                int err = 0;
+                req = list_entry(pos, struct lov_request, rq_link);
+
+                if (!req->rq_complete || req->rq_rc)
+                        continue;
+
+                sub_exp = lov->tgts[req->rq_idx].ltd_exp,
+                err = obd_destroy(sub_exp, req->rq_oa, NULL, oti);
+                if (err)
+                        CERROR("Failed to uncreate objid "LPX64" subobj "
+                               LPX64" on OST idx %d: rc = %d\n",
+                               set->set_oa->o_id, req->rq_oa->o_id,
+                               req->rq_idx, rc);
+        }
+        if (*ea == NULL)
+                obd_free_memmd(exp, &set->set_md);
+done:
+        if (oti && set->set_cookies) {
+                oti->oti_logcookies = set->set_cookies;
+                if (!set->set_cookie_sent) {
+                        oti_free_cookies(oti);
+                        src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
+                } else {
+                        src_oa->o_valid |= OBD_MD_FLCOOKIE;
+                }
+        }
+        return rc;
+}
+
+int lov_fini_create_set(struct lov_request_set *set, struct lov_stripe_md **ea)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes) {
+                rc = create_done(set->set_exp, set, ea);
+                /* FIXME update qos data here */
+        }
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(rc);
+}
+
+int lov_update_create_set(struct lov_request_set *set,
+                          struct lov_request *req, int rc)
+{
+        struct obd_trans_info *oti = set->set_oti;
+        struct lov_stripe_md *lsm = set->set_md;
+        struct lov_oinfo *loi;
+        struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+        ENTRY;
+
+        req->rq_stripe = set->set_success;
+        loi = &lsm->lsm_oinfo[req->rq_stripe];
+
+        if (rc && lov->tgts[req->rq_idx].active) {
+                CERROR("error creating objid "LPX64" sub-object"
+                       " on OST idx %d/%d: rc = %d\n",
+                       set->set_oa->o_id, req->rq_idx,
+                       lsm->lsm_stripe_count, rc);
+                if (rc > 0) {
+                        CERROR("obd_create returned invalid err %d\n", rc);
+                        rc = -EIO;
+                }
+        }
+        lov_update_set(set, req, rc);
+        if (rc)
+                RETURN(rc);
+
+        if (oti && oti->oti_objid)
+                oti->oti_objid[req->rq_idx] = req->rq_oa->o_id;
+
+        loi->loi_id = req->rq_oa->o_id;
+        loi->loi_gr = req->rq_oa->o_gr;
+        loi->loi_ost_idx = req->rq_idx;
+        CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPX64" at idx %d\n",
+               lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
+        loi_init(loi);
+
+        if (set->set_cookies)
+                ++oti->oti_logcookies;
+        if (req->rq_oa->o_valid & OBD_MD_FLCOOKIE)
+                set->set_cookie_sent++;
+
+        RETURN(0);
+}
+
+int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **ea,
+                        struct obdo *src_oa, struct obd_trans_info *oti,
+                        struct lov_request_set **reqset)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        struct lov_request_set *set;
+        int rc = 0, newea = 0;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = *ea;
+        set->set_oa = src_oa;
+        set->set_oti = oti;
+
+        if (set->set_md == NULL) {
+                int stripes, stripe_cnt;
+                stripe_cnt = lov_get_stripecnt(lov, 0);
+
+                /* If the MDS file was truncated up to some size, stripe over
+                 * enough OSTs to allow the file to be created at that size. */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
+                        do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
+
+                        if (stripes > lov->desc.ld_active_tgt_count)
+                                GOTO(out_set, rc = -EFBIG);
+                        if (stripes < stripe_cnt)
+                                stripes = stripe_cnt;
+                } else {
+                        stripes = stripe_cnt;
+                }
+
+                rc = lov_alloc_memmd(&set->set_md, stripes,
+                                     lov->desc.ld_pattern ?
+                                     lov->desc.ld_pattern : LOV_PATTERN_RAID0);
+                if (rc < 0)
+                        goto out_set;
+                newea = 1;
+        }
+
+        rc = qos_prep_create(lov, set, newea);
+        if (rc)
+                goto out_lsm;
+
+        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
+                oti_alloc_cookies(oti, set->set_count);
+                if (!oti->oti_logcookies)
+                        goto out_lsm;
+                set->set_cookies = oti->oti_logcookies;
+        }
+        *reqset = set;
+        RETURN(rc);
+
+out_lsm:
+        if (*ea == NULL)
+                obd_free_memmd(exp, &set->set_md);
+out_set:
+        lov_fini_create_set(set, ea);
+        RETURN(rc);
+}
+
+static int common_attr_done(struct lov_request_set *set)
+{
+        struct list_head *pos;
+        struct lov_request *req;
+        struct obdo *tmp_oa;
+        int rc = 0, attrset = 0;
+        ENTRY;
+
+        if (set->set_oa == NULL)
+                RETURN(0);
+
+        if (!set->set_success)
+                RETURN(-EIO);
+
+        tmp_oa = obdo_alloc();
+        if (tmp_oa == NULL)
+                GOTO(out, rc = -ENOMEM);
+
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
+
+                if (!req->rq_complete || req->rq_rc)
+                        continue;
+                if (req->rq_oa->o_valid == 0)   /* inactive stripe */
+                        continue;
+                lov_merge_attrs(tmp_oa, req->rq_oa, req->rq_oa->o_valid,
+                                set->set_md, req->rq_stripe, &attrset);
+        }
+        if (!attrset) {
+                CERROR("No stripes had valid attrs\n");
+                rc = -EIO;
+        }
+        tmp_oa->o_id = set->set_oa->o_id;
+        memcpy(set->set_oa, tmp_oa, sizeof(*set->set_oa));
+out:
+        if (tmp_oa)
+                obdo_free(tmp_oa);
+        RETURN(rc);
+
+}
+
+static int brw_done(struct lov_request_set *set)
+{
+        struct lov_stripe_md *lsm = set->set_md;
+        struct lov_oinfo     *loi = NULL;
+        struct list_head *pos;
+        struct lov_request *req;
+        ENTRY;
+                                                                                                                             
+        list_for_each (pos, &set->set_list) {
+                req = list_entry(pos, struct lov_request, rq_link);
+                                                                                                                             
+                if (!req->rq_complete || req->rq_rc)
+                        continue;
+                                                                                                                             
+                loi = &lsm->lsm_oinfo[req->rq_stripe];
+                                                                                                                             
+                if (req->rq_oa->o_valid & OBD_MD_FLBLOCKS)
+                        loi->loi_blocks = req->rq_oa->o_blocks;
+        }
+                                                                                                                             
+        RETURN(0);
+}
+
+int lov_fini_brw_set(struct lov_request_set *set)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes) {
+                rc = brw_done(set);
+                /* FIXME update qos data here */
+        }
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(rc);
+}
+
+int lov_prep_brw_set(struct obd_export *exp, struct obdo *src_oa,
+                     struct lov_stripe_md *lsm, obd_count oa_bufs,
+                     struct brw_page *pga, struct obd_trans_info *oti,
+                     struct lov_request_set **reqset)
+{
+        struct {
+                obd_count       index;
+                obd_count       count;
+                obd_count       off;
+        } *info = NULL;
+        struct lov_request_set *set;
+        struct lov_oinfo *loi = NULL;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int rc = 0, i, shift;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_oa = src_oa;
+        set->set_oti = oti;
+        set->set_oabufs = oa_bufs;
+        OBD_ALLOC(set->set_pga, oa_bufs * sizeof(*set->set_pga));
+        if (!set->set_pga)
+                GOTO(out, rc = -ENOMEM);
+
+        OBD_ALLOC(info, sizeof(*info) * lsm->lsm_stripe_count);
+        if (!info)
+                GOTO(out, rc = -ENOMEM);
+
+        /* calculate the page count for each stripe */
+        for (i = 0; i < oa_bufs; i++) {
+                int stripe = lov_stripe_number(lsm, pga[i].disk_offset);
+                info[stripe].count++;
+        }
+
+        /* alloc and initialize lov request */
+        loi = lsm->lsm_oinfo;
+        shift = 0;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+
+                if (info[i].count == 0)
+                        continue;
+
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        GOTO(out, rc = -EIO);
+                }
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out, rc = -ENOMEM);
+
+                req->rq_oa = obdo_alloc();
+                if (req->rq_oa == NULL)
+                        GOTO(out, rc = -ENOMEM);
+
+                if (src_oa)
+                        memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+                req->rq_oa->o_id = loi->loi_id;
+                req->rq_buflen = sizeof(*req->rq_md);
+                OBD_ALLOC(req->rq_md, req->rq_buflen);
+                if (req->rq_md == NULL)
+                        GOTO(out, rc = -ENOMEM);
+
+                req->rq_idx = loi->loi_ost_idx;
+                req->rq_stripe = i;
+
+                /* XXX LOV STACKING */
+                req->rq_md->lsm_object_id = loi->loi_id;
+                req->rq_md->lsm_object_gr = lsm->lsm_object_gr;
+                req->rq_oabufs = info[i].count;
+                req->rq_pgaidx = shift;
+                shift += req->rq_oabufs;
+
+                /* remember the index for sort brw_page array */
+                info[i].index = req->rq_pgaidx;
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out, rc = -EIO);
+
+        /* rotate & sort the brw_page array */
+        for (i = 0; i < oa_bufs; i++) {
+                int stripe = lov_stripe_number(lsm, pga[i].disk_offset);
+
+                shift = info[stripe].index + info[stripe].off;
+                LASSERT(shift < oa_bufs);
+                set->set_pga[shift] = pga[i];
+                lov_stripe_offset(lsm, pga[i].disk_offset, stripe,
+                                  &set->set_pga[shift].disk_offset);
+                info[stripe].off++;
+        }
+out:
+        if (info)
+                OBD_FREE(info, sizeof(*info) * lsm->lsm_stripe_count);
+
+        if (rc == 0)
+                *reqset = set;
+        else
+                lov_fini_brw_set(set);
+
+        RETURN(rc);
+}
+
+static int getattr_done(struct lov_request_set *set)
+{
+        return common_attr_done(set);
+}
+
+int lov_fini_getattr_set(struct lov_request_set *set)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes)
+                rc = getattr_done(set);
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(rc);
+}
+
+int lov_prep_getattr_set(struct obd_export *exp, struct obdo *src_oa,
+                         struct lov_stripe_md *lsm,
+                         struct lov_request_set **reqset)
+{
+        struct lov_request_set *set;
+        struct lov_oinfo *loi = NULL;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int rc = 0, i;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_oa = src_oa;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        continue;
+                }
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+
+                req->rq_stripe = i;
+                req->rq_idx = loi->loi_ost_idx;
+
+                req->rq_oa = obdo_alloc();
+                if (req->rq_oa == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+                memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+                req->rq_oa->o_id = loi->loi_id;
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out_set, rc = -EIO);
+        *reqset = set;
+        RETURN(rc);
+out_set:
+        lov_fini_getattr_set(set);
+        RETURN(rc);
+}
+
+int lov_fini_destroy_set(struct lov_request_set *set)
+{
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes) {
+                /* FIXME update qos data here */
+        }
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(0);
+}
+
+int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa,
+                         struct lov_stripe_md *lsm,
+                         struct obd_trans_info *oti,
+                         struct lov_request_set **reqset)
+{
+        struct lov_request_set *set;
+        struct lov_oinfo *loi = NULL;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int rc = 0, cookie_set = 0, i;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_oa = src_oa;
+        set->set_oti = oti;
+        if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
+                set->set_cookies = oti->oti_logcookies;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        continue;
+                }
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+
+                req->rq_stripe = i;
+                req->rq_idx = loi->loi_ost_idx;
+
+                req->rq_oa = obdo_alloc();
+                if (req->rq_oa == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+                memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+                req->rq_oa->o_id = loi->loi_id;
+
+                /* Setup the first request's cookie position */
+                if (!cookie_set && set->set_cookies) {
+                        oti->oti_logcookies = set->set_cookies + i;
+                        cookie_set = 1;
+                }
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out_set, rc = -EIO);
+        *reqset = set;
+        RETURN(rc);
+out_set:
+        lov_fini_destroy_set(set);
+        RETURN(rc);
+}
+
+static int setattr_done(struct lov_request_set *set)
+{
+        return common_attr_done(set);
+}
+
+int lov_fini_setattr_set(struct lov_request_set *set)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes) {
+                rc = setattr_done(set);
+                /* FIXME update qos data here */
+        }
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+        RETURN(rc);
+}
+
+int lov_prep_setattr_set(struct obd_export *exp, struct obdo *src_oa,
+                         struct lov_stripe_md *lsm, struct obd_trans_info *oti,
+                         struct lov_request_set **reqset)
+{
+        struct lov_request_set *set;
+        struct lov_oinfo *loi = NULL;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int rc = 0, i;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_oa = src_oa;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        continue;
+                }
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+                req->rq_stripe = i;
+                req->rq_idx = loi->loi_ost_idx;
+
+                req->rq_oa = obdo_alloc();
+                if (req->rq_oa == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+                memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+                req->rq_oa->o_id = loi->loi_id;
+                LASSERT(!(req->rq_oa->o_valid & OBD_MD_FLGROUP) || req->rq_oa->o_gr>0);
+
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
+                                              &req->rq_oa->o_size) < 0 &&
+                            req->rq_oa->o_size)
+                                req->rq_oa->o_size--;
+                        CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+                               i, req->rq_oa->o_size, src_oa->o_size);
+                }
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out_set, rc = -EIO);
+        *reqset = set;
+        RETURN(rc);
+out_set:
+        lov_fini_setattr_set(set);
+        RETURN(rc);
+}
+
+int lov_update_punch_set(struct lov_request_set *set, struct lov_request *req,
+                         int rc)
+{
+        struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+        ENTRY;
+
+        lov_update_set(set, req, rc);
+        if (rc && !lov->tgts[req->rq_idx].active)
+                rc = 0;
+        /* FIXME in raid1 regime, should return 0 */
+        RETURN(rc);
+}
+
+int lov_fini_punch_set(struct lov_request_set *set)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes) {
+                if (!set->set_success)
+                        rc = -EIO;
+                /* FIXME update qos data here */
+        }
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(rc);
+}
+
+int lov_prep_punch_set(struct obd_export *exp, struct obdo *src_oa,
+                       struct lov_stripe_md *lsm, obd_off start,
+                       obd_off end, struct obd_trans_info *oti,
+                       struct lov_request_set **reqset)
+{
+        struct lov_request_set *set;
+        struct lov_oinfo *loi = NULL;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int rc = 0, i;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_oa = src_oa;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+                obd_off rs, re;
+
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        continue;
+                }
+
+                if (!lov_stripe_intersects(lsm, i, start, end, &rs, &re))
+                        continue;
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+                req->rq_stripe = i;
+                req->rq_idx = loi->loi_ost_idx;
+
+                req->rq_oa = obdo_alloc();
+                if (req->rq_oa == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+                memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+                req->rq_oa->o_id = loi->loi_id;
+                req->rq_oa->o_gr = loi->loi_gr;
+               req->rq_oa->o_valid |= OBD_MD_FLGROUP;
+
+                req->rq_extent.start = rs;
+                req->rq_extent.end = re;
+
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out_set, rc = -EIO);
+        *reqset = set;
+        RETURN(rc);
+out_set:
+        lov_fini_punch_set(set);
+        RETURN(rc);
+}
+
+int lov_fini_sync_set(struct lov_request_set *set)
+{
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(set->set_exp);
+        if (set == NULL)
+                RETURN(0);
+        if (set->set_completes) {
+                if (!set->set_success)
+                        rc = -EIO;
+                /* FIXME update qos data here */
+        }
+
+        if (atomic_dec_and_test(&set->set_refcount))
+                lov_finish_set(set);
+
+        RETURN(rc);
+}
+
+int lov_prep_sync_set(struct obd_export *exp, struct obdo *src_oa,
+                      struct lov_stripe_md *lsm, obd_off start,
+                      obd_off end, struct lov_request_set **reqset)
+{
+        struct lov_request_set *set;
+        struct lov_oinfo *loi = NULL;
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int rc = 0, i;
+        ENTRY;
+
+        OBD_ALLOC(set, sizeof(*set));
+        if (set == NULL)
+                RETURN(-ENOMEM);
+        lov_init_set(set);
+
+        set->set_exp = exp;
+        set->set_md = lsm;
+        set->set_oa = src_oa;
+
+        loi = lsm->lsm_oinfo;
+        for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+                struct lov_request *req;
+                obd_off rs, re;
+
+                if (lov->tgts[loi->loi_ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+                        continue;
+                }
+
+                if (!lov_stripe_intersects(lsm, i, start, end, &rs, &re))
+                        continue;
+
+                OBD_ALLOC(req, sizeof(*req));
+                if (req == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+                req->rq_stripe = i;
+                req->rq_idx = loi->loi_ost_idx;
+
+                req->rq_oa = obdo_alloc();
+                if (req->rq_oa == NULL)
+                        GOTO(out_set, rc = -ENOMEM);
+                memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+                req->rq_oa->o_id = loi->loi_id;
+                req->rq_extent.start = rs;
+                req->rq_extent.end = re;
+                lov_set_add_req(req, set);
+        }
+        if (!set->set_count)
+                GOTO(out_set, rc = -EIO);
+        *reqset = set;
+        RETURN(rc);
+out_set:
+        lov_fini_sync_set(set);
+        RETURN(rc);
+}
diff --git a/lustre/lvfs/Makefile.in b/lustre/lvfs/Makefile.in

index 7f98c6a..00a327d 100644 (file)
--- a/lustre/lvfs/Makefile.in
+++ b/lustre/lvfs/Makefile.in
@@ -3,8 +3,8 @@ MODULES := lvfs fsfilt_@BACKINGFS@ fsfilt_smfs
  @SNAPFS_TRUE@MODULES += fsfilt_snap_@BACKINGFS@ fsfilt_snap_smfs 
  
  lvfs-objs := fsfilt.o lvfs_common.o llog_lvfs.o lvfs_linux.o 
-lvfs-objs += llog.o llog_cat.o 
-
+lvfs-objs += llog.o llog_cat.o
+ 
  ifeq ($(PATCHLEVEL),6)
  fsfilt_@BACKINGFS@-objs := fsfilt-@BACKINGFS@.o
  
diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am

index ca9c839..4a80755 100644 (file)
--- a/lustre/lvfs/autoMakefile.am
+++ b/lustre/lvfs/autoMakefile.am
@@ -31,6 +31,8 @@ ldiskfs_sed_flags = \
         -e "s/dx_hash_info/ext3_dx_hash_info/g" \
         -e "s/dir_private_info/ext3_dir_private_info/g" \
         -e "s/DX_HASH/EXT3_DX_HASH/g" \
+       -e "s/reserve_window/ext3_reserve_window/g" \
+       -e "s/rsv_window_add/ext3_rsv_window_add/g" \
         -e "s/EXT3/LDISKFS/g" -e "s/ext3/ldiskfs/g"
  
  fsfilt_ldiskfs.c: fsfilt_ext3.c
diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c

index 0eda25b..03c1e04 100644 (file)
--- a/lustre/lvfs/fsfilt_ext3.c
+++ b/lustre/lvfs/fsfilt_ext3.c
@@ -52,6 +52,14 @@
  #endif
  
  
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7))
+# define lock_24kernel() lock_kernel()
+# define unlock_24kernel() unlock_kernel()
+#else
+# define lock_24kernel() do {} while (0)
+# define unlock_24kernel() do {} while (0)
+#endif
+
  static kmem_cache_t *fcb_cache;
  static atomic_t fcb_cache_count = ATOMIC_INIT(0);
  
@@ -68,6 +76,7 @@ struct fsfilt_cb_data {
  #endif
  
  #define XATTR_LUSTRE_MDS_LOV_EA         "lov"
+#define XATTR_LUSTRE_MDS_MEA_EA         "mea"
  #define XATTR_LUSTRE_MDS_MID_EA         "mid"
  #define XATTR_LUSTRE_MDS_SID_EA         "sid"
  
@@ -161,9 +170,10 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
          }
  
   journal_start:
-        lock_kernel();
+        LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);
+        lock_24kernel();
          handle = journal_start(EXT3_JOURNAL(inode), nblocks);
-        unlock_kernel();
+        unlock_24kernel();
  
          if (!IS_ERR(handle))
                  LASSERT(current->journal_info == handle);
@@ -299,9 +309,10 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
                  needed = journal->j_max_transaction_buffers;
          }
  
-        lock_kernel();
+        LASSERTF(needed > 0, "can't start %d credit transaction\n", needed);
+        lock_24kernel();
          handle = journal_start(journal, needed);
-        unlock_kernel();
+        unlock_24kernel();
          if (IS_ERR(handle)) {
                  CERROR("can't get handle for %d credits: rc = %ld\n", needed,
                         PTR_ERR(handle));
@@ -323,9 +334,9 @@ static int fsfilt_ext3_commit(struct super_block *sb, struct inode *inode,
          if (force_sync)
                  handle->h_sync = 1; /* recovery likes this */
  
-        lock_kernel();
+        lock_24kernel();
          rc = journal_stop(handle);
-        unlock_kernel();
+        unlock_24kernel();
  
          return rc;
  }
@@ -484,69 +495,69 @@ static int fsfilt_ext3_get_xattr(struct inode *inode, char *name,
  }
  
  static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
-                              void *lmm, int lmm_size)
-{
-        int rc;
-
-        LASSERT(down_trylock(&inode->i_sem) != 0);
-
-        /* keep this when we get rid of OLD_EA (too noisy during conversion) */
-        if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */)
-                CWARN("setting EA on %lu/%u again... interesting\n",
-                      inode->i_ino, inode->i_generation);
-
-        rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_LOV_EA,
-                                   lmm, lmm_size);
-        return rc;
-}
-
-/* Must be called with i_sem held */
-static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size)
-{
-        int rc;
-
-        rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_LOV_EA,
-                                   lmm, lmm_size);
-        return rc;
-}
-
-static int fsfilt_ext3_set_mid(struct inode *inode, void *handle,
-                               void *mid, int mid_size)
-{
-        int rc;
-
-        rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_MID_EA,
-                                   mid, mid_size);
-        return rc;
-}
-
-/* Must be called with i_sem held */
-static int fsfilt_ext3_get_mid(struct inode *inode, void *mid, int mid_size)
-{
-        int rc;
-
-        rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_MID_EA,
-                                   mid, mid_size);
-        return rc;
-}
-
-static int fsfilt_ext3_set_sid(struct inode *inode, void *handle,
-                               void *sid, int sid_size)
+                              void *lmm, int lmm_size,
+                              enum ea_type type)
  {
          int rc;
+        
+        switch(type) {
+        case EA_LOV:
+                rc = fsfilt_ext3_set_xattr(inode, handle,
+                                           XATTR_LUSTRE_MDS_LOV_EA,
+                                           lmm, lmm_size);
+                break;
+        case EA_MEA:
+                rc = fsfilt_ext3_set_xattr(inode, handle,
+                                           XATTR_LUSTRE_MDS_MEA_EA,
+                                           lmm, lmm_size);
+                break;
+        case EA_SID:
+                rc = fsfilt_ext3_set_xattr(inode, handle,
+                                           XATTR_LUSTRE_MDS_SID_EA,
+                                           lmm, lmm_size);
+                break;
+        case EA_MID:
+                rc = fsfilt_ext3_set_xattr(inode, handle,
+                                           XATTR_LUSTRE_MDS_MID_EA,
+                                           lmm, lmm_size);
+                break;
+        default:
+                return -EINVAL;
+        }
  
-        rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_SID_EA,
-                                   sid, sid_size);
          return rc;
  }
  
-/* Must be called with i_sem held */
-static int fsfilt_ext3_get_sid(struct inode *inode, void *sid, int sid_size)
+static int fsfilt_ext3_get_md(struct inode *inode, void *lmm,
+                              int lmm_size, enum ea_type type)
  {
          int rc;
-
-        rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_SID_EA,
-                                   sid, sid_size);
+        
+        switch (type) {
+        case EA_LOV:
+                rc = fsfilt_ext3_get_xattr(inode,
+                                           XATTR_LUSTRE_MDS_LOV_EA,
+                                           lmm, lmm_size);
+                break;
+        case EA_MEA:
+                rc = fsfilt_ext3_get_xattr(inode,
+                                           XATTR_LUSTRE_MDS_MEA_EA,
+                                           lmm, lmm_size);
+                break;
+        case EA_SID:
+                rc = fsfilt_ext3_get_xattr(inode,
+                                           XATTR_LUSTRE_MDS_SID_EA,
+                                           lmm, lmm_size);
+                break;
+        case EA_MID:
+                rc = fsfilt_ext3_get_xattr(inode,
+                                           XATTR_LUSTRE_MDS_MID_EA,
+                                           lmm, lmm_size);
+                break;
+        default:
+                return -EINVAL;
+        }
+        
          return rc;
  }
  
@@ -681,12 +692,10 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd,
          fcb->cb_data = cb_data;
  
          CDEBUG(D_EXT2, "set callback for last_num: "LPD64"\n", last_num);
-        
          lock_kernel();
          journal_callback_set(handle, fsfilt_ext3_cb_func,
                               (struct journal_callback *)fcb);
          unlock_kernel();
-
          return 0;
  }
  
@@ -728,7 +737,7 @@ static int fsfilt_ext3_sync(struct super_block *sb)
  #define ext3_up_truncate_sem(inode)  up(&EXT3_I(inode)->truncate_sem);
  #define ext3_down_truncate_sem(inode)  down(&EXT3_I(inode)->truncate_sem);
  #endif
-                                                                                                                                                                                                     
+
  #include <linux/lustre_version.h>
  #if EXT3_EXT_MAGIC == 0xf301
  #define ee_start e_start
@@ -793,16 +802,16 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
          loff_t new_i_size;
          handle_t *handle;
          int i, aflags = 0;
-                                                                                                                                                                                                     
+        
          i = EXT_DEPTH(tree);
          EXT_ASSERT(i == path->p_depth);
          EXT_ASSERT(path[i].p_hdr);
-                                                                                                                                                                                                     
+        
          if (exist) {
                  err = EXT_CONTINUE;
                  goto map;
          }
-                                                                                                                                                                                                     
+        
          if (bp->create == 0) {
                  i = 0;
                  if (newex->ee_block < bp->start)
@@ -824,7 +833,6 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
          tgen = EXT_GENERATION(tree);
          count = ext3_ext_calc_credits_for_insert(tree, path);
          ext3_up_truncate_sem(inode);
-                                                                                                                                                                                                     
          lock_kernel();
          handle = journal_start(EXT3_JOURNAL(inode), count + EXT3_ALLOC_NEEDED + 1);
          unlock_kernel();
@@ -832,7 +840,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
                  ext3_down_truncate_sem(inode);
                  return PTR_ERR(handle);
          }
-                                                                                                                                                                                                     
+        
          if (tgen != EXT_GENERATION(tree)) {
                  /* the tree has changed. so path can be invalid at moment */
                  lock_kernel();
@@ -841,7 +849,6 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
                  ext3_down_truncate_sem(inode);
                  return EXT_REPEAT;
          }
-                                                                                                                                                                                                     
          ext3_down_truncate_sem(inode);
          count = newex->ee_len;
          goal = ext3_ext_find_goal(inode, path, newex->ee_block, &aflags);
@@ -868,9 +875,9 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
                  }
          }
  out:
-        lock_kernel();
+        lock_24kernel();
          journal_stop(handle);
-        unlock_kernel();
+        unlock_24kernel();
  map:
          if (err >= 0) {
                  /* map blocks */
@@ -918,12 +925,11 @@ int fsfilt_map_nblocks(struct inode *inode, unsigned long block,
          bp.start = block;
          bp.init_num = bp.num = num;
          bp.create = create;
-                                                                                                                                                                                                     
+        
          ext3_down_truncate_sem(inode);
          err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb);
          ext3_ext_invalidate_cache(&tree);
          ext3_up_truncate_sem(inode);
-                                                                                                                                                                                                     
          return err;
  }
  
@@ -1097,10 +1103,10 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
          block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
  
          journal = EXT3_SB(inode->i_sb)->s_journal;
-        lock_kernel();
+        lock_24kernel();
          handle = journal_start(journal,
                                 block_count * EXT3_DATA_TRANS_BLOCKS + 2);
-        unlock_kernel();
+        unlock_24kernel();
          if (IS_ERR(handle)) {
                  CERROR("can't start transaction\n");
                  return PTR_ERR(handle);
@@ -1158,9 +1164,9 @@ out:
                  unlock_kernel();
          }
  
-        lock_kernel();
+        lock_24kernel();
          journal_stop(handle);
-        unlock_kernel();
+        unlock_24kernel();
  
          if (err == 0)
                  *offs = offset;
@@ -1392,10 +1398,6 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
          .fs_iocontrol               = fsfilt_ext3_iocontrol,
          .fs_set_md                  = fsfilt_ext3_set_md,
          .fs_get_md                  = fsfilt_ext3_get_md,
-        .fs_set_mid                 = fsfilt_ext3_set_mid,
-        .fs_get_mid                 = fsfilt_ext3_get_mid,
-        .fs_set_sid                 = fsfilt_ext3_set_sid,
-        .fs_get_sid                 = fsfilt_ext3_get_sid,
          .fs_readpage                = fsfilt_ext3_readpage,
          .fs_add_journal_cb          = fsfilt_ext3_add_journal_cb,
          .fs_statfs                  = fsfilt_ext3_statfs,
diff --git a/lustre/lvfs/fsfilt_smfs.c b/lustre/lvfs/fsfilt_smfs.c

index 1526902..8d61cd1 100644 (file)
--- a/lustre/lvfs/fsfilt_smfs.c
+++ b/lustre/lvfs/fsfilt_smfs.c
@@ -243,11 +243,14 @@ static int fsfilt_smfs_iocontrol(struct inode *inode, struct file *file,
          RETURN(rc);
  }
  
-typedef int (*set_ea_func_t) (struct inode *, void *, void *, int);
-typedef int (*get_ea_func_t) (struct inode *, void *, int);
+typedef int (*set_ea_func_t) (struct inode *, void *, void *,
+                              int, enum ea_type);
+
+typedef int (*get_ea_func_t) (struct inode *, void *, int,
+                              enum ea_type);
  
  static int fsfilt_smfs_set_ea(struct inode *inode, void *handle,
-                              void *ea, int ea_size, 
+                              void *ea, int ea_size, enum ea_type type, 
                                set_ea_func_t set_ea_func)
  {
          struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
@@ -268,7 +271,8 @@ static int fsfilt_smfs_set_ea(struct inode *inode, void *handle,
          pre_smfs_inode(inode, cache_inode);
  
          down(&cache_inode->i_sem);
-        rc = set_ea_func(cache_inode, handle, ea, ea_size);
+        rc = set_ea_func(cache_inode, handle, ea,
+                         ea_size, type);
          up(&cache_inode->i_sem);
  
          post_smfs_inode(inode, cache_inode);
@@ -277,7 +281,8 @@ static int fsfilt_smfs_set_ea(struct inode *inode, void *handle,
  }
  
  static int fsfilt_smfs_get_ea(struct inode *inode, void *ea, 
-                              int ea_size, get_ea_func_t get_ea_func)
+                              int ea_size, enum ea_type type,
+                              get_ea_func_t get_ea_func)
  {
          struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
          struct inode *cache_inode = NULL;
@@ -297,7 +302,7 @@ static int fsfilt_smfs_get_ea(struct inode *inode, void *ea,
          pre_smfs_inode(inode, cache_inode);
  
          down(&cache_inode->i_sem);
-        rc = get_ea_func(cache_inode, ea, ea_size);
+        rc = get_ea_func(cache_inode, ea, ea_size, type);
          up(&cache_inode->i_sem);
  
          post_smfs_inode(inode, cache_inode);
@@ -306,65 +311,30 @@ static int fsfilt_smfs_get_ea(struct inode *inode, void *ea,
  }
  
  static int fsfilt_smfs_set_md(struct inode *inode, void *handle,
-                              void *lmm, int lmm_size)
+                              void *lmm, int lmm_size, enum ea_type type)
  {
-        int rc = 0;
          struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
-        
-        rc = fsfilt_smfs_set_ea(inode, handle, lmm, lmm_size,
-                                cache_fsfilt->fs_set_md);
+        int rc = fsfilt_smfs_set_ea(inode, handle, lmm, lmm_size,
+                                    type, cache_fsfilt->fs_set_md);
          if (rc)
                  return rc;
                  
-        smfs_rec_md(inode, lmm, lmm_size);
+        smfs_rec_md(inode, lmm, lmm_size, type);
          return rc;
  }
  
-static int fsfilt_smfs_get_md(struct inode *inode, void *lmm, int
-                              lmm_size)
+static int fsfilt_smfs_get_md(struct inode *inode, void *lmm,
+                              int lmm_size, enum ea_type type)
  {
          struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
-        return fsfilt_smfs_get_ea(inode, lmm, lmm_size,
+        return fsfilt_smfs_get_ea(inode, lmm, lmm_size, type,
                                    cache_fsfilt->fs_get_md);
  }
  
-static int fsfilt_smfs_set_mid(struct inode *inode, void *handle,
-                               void *mid, int mid_size)
-{
-        struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
-        return fsfilt_smfs_set_ea(inode, handle, mid, mid_size,
-                                  cache_fsfilt->fs_set_mid);
-}
-
-static int fsfilt_smfs_get_mid(struct inode *inode, void *mid,
-                               int mid_size)
-{
-        struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
-        return fsfilt_smfs_get_ea(inode, mid, mid_size,
-                                  cache_fsfilt->fs_get_mid);
-}
-
-static int fsfilt_smfs_set_sid(struct inode *inode, void *handle,
-                               void *sid, int sid_size)
-{
-        struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
-        return fsfilt_smfs_set_ea(inode, handle, sid, sid_size,
-                                  cache_fsfilt->fs_set_sid);
-}
-
-static int fsfilt_smfs_get_sid(struct inode *inode, void *sid,
-                               int sid_size)
-{
-        struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
-        return fsfilt_smfs_get_ea(inode, sid, sid_size,
-                                  cache_fsfilt->fs_get_sid);
-}
-
  static int fsfilt_smfs_send_bio(int rw, struct inode *inode, void *bio)
  {
          struct inode *cache_inode;
          struct fsfilt_operations *cache_fsfilt;
-        
          ENTRY;
          
          cache_fsfilt = I2FOPS(inode);
@@ -1074,10 +1044,6 @@ static struct fsfilt_operations fsfilt_smfs_ops = {
          .fs_iocontrol           = fsfilt_smfs_iocontrol,
          .fs_set_md              = fsfilt_smfs_set_md,
          .fs_get_md              = fsfilt_smfs_get_md,
-        .fs_set_mid             = fsfilt_smfs_set_mid,
-        .fs_get_mid             = fsfilt_smfs_get_mid,
-        .fs_set_sid             = fsfilt_smfs_set_sid,
-        .fs_get_sid             = fsfilt_smfs_get_sid,
          .fs_readpage            = fsfilt_smfs_readpage,
          .fs_getpage             = fsfilt_smfs_getpage,
          .fs_add_journal_cb      = fsfilt_smfs_add_journal_cb,
diff --git a/lustre/lvfs/lvfs_common.c b/lustre/lvfs/lvfs_common.c

index caa6e01..744b8fb 100644 (file)
--- a/lustre/lvfs/lvfs_common.c
+++ b/lustre/lvfs/lvfs_common.c
@@ -25,6 +25,7 @@
  
  #define DEBUG_SUBSYSTEM S_FILTER
  
+#include <linux/obd.h>
  #include <linux/lvfs.h>
  
  struct dentry *lvfs_id2dentry(struct lvfs_run_ctxt *ctxt, __u64 id,
@@ -33,3 +34,117 @@ struct dentry *lvfs_id2dentry(struct lvfs_run_ctxt *ctxt, __u64 id,
          return ctxt->cb_ops.l_id2dentry(id, gen, gr, data);
  }
  EXPORT_SYMBOL(lvfs_id2dentry);
+
+static struct list_head lvfs_context_list;
+
+void lvfs_mount_list_init(void)
+{
+        INIT_LIST_HEAD(&lvfs_context_list);
+}
+
+void lvfs_mount_list_cleanup(void)
+{
+        struct list_head *tmp;
+
+        if (list_empty(&lvfs_context_list))
+                return;
+
+        list_for_each(tmp, &lvfs_context_list) {
+                struct lvfs_obd_ctxt *data = 
+                       list_entry(tmp, struct lvfs_obd_ctxt, loc_list);
+                CERROR("device %s still mounted with refcount %d\n",
+                        data->loc_name, atomic_read(&data->loc_refcount));
+        }
+}
+
+static inline
+struct lvfs_obd_ctxt *get_lvfs_mount(struct lvfs_obd_ctxt *lvfs_ctxt)
+{
+        atomic_inc(&lvfs_ctxt->loc_refcount);
+        return lvfs_ctxt;
+}
+
+static struct lvfs_obd_ctxt *add_lvfs_mount(struct vfsmount *mnt, char *name)
+{
+        struct lvfs_obd_ctxt *lvfs_ctxt;
+        ENTRY;
+
+        OBD_ALLOC(lvfs_ctxt, sizeof(*lvfs_ctxt));
+        if (!lvfs_ctxt) {
+                CERROR("No Memory\n");
+                RETURN(NULL);
+        }
+
+        if (name) {
+                int length = strlen(name) + 1;
+
+                OBD_ALLOC(lvfs_ctxt->loc_name, length);
+                if (!lvfs_ctxt->loc_name) {
+                        CERROR("No Memory\n");
+                        OBD_FREE(lvfs_ctxt, sizeof(*lvfs_ctxt));
+                        RETURN(NULL);
+                }
+                memcpy(lvfs_ctxt->loc_name, name, length);
+        }
+        lvfs_ctxt->loc_mnt = mnt;
+        list_add(&lvfs_ctxt->loc_list, &lvfs_context_list);
+        atomic_set(&lvfs_ctxt->loc_refcount, 1);
+        RETURN(lvfs_ctxt);
+}
+
+void lvfs_umount_fs(struct lvfs_obd_ctxt *lvfs_ctxt)
+{
+        if (lvfs_ctxt && atomic_dec_and_test(&lvfs_ctxt->loc_refcount)) {
+                struct vfsmount *mnt = lvfs_ctxt->loc_mnt;
+
+                list_del(&lvfs_ctxt->loc_list);
+                if (atomic_read(&mnt->mnt_count) > 2)
+                       CERROR("mount busy, mnt %p mnt_count %d != 2\n", mnt,
+                               atomic_read(&mnt->mnt_count));
+                
+                mntput(mnt);
+                
+                if (lvfs_ctxt->loc_name)
+                        OBD_FREE(lvfs_ctxt->loc_name, 
+                                 strlen(lvfs_ctxt->loc_name) + 1);
+                OBD_FREE(lvfs_ctxt, sizeof(*lvfs_ctxt));
+                dev_clear_rdonly(2);
+        }
+}
+EXPORT_SYMBOL(lvfs_umount_fs);
+
+int lvfs_mount_fs(char *name, char *fstype, char *options, int flags,
+                  struct lvfs_obd_ctxt **lvfs_ctxt)
+{
+        struct vfsmount *mnt = NULL;
+        struct list_head *tmp;
+        int rc = 0;
+        ENTRY;
+
+        list_for_each(tmp, &lvfs_context_list) {
+                struct lvfs_obd_ctxt *data =
+                               list_entry(tmp, struct lvfs_obd_ctxt, loc_list);
+                if (strcmp(data->loc_name, name) == 0) {
+                       *lvfs_ctxt = get_lvfs_mount(data);
+                       RETURN(0);
+                }
+        }
+        mnt = do_kern_mount(fstype, flags, name, options);
+
+        if (IS_ERR(mnt)) {
+                rc = PTR_ERR(mnt);
+                CERROR("do_kern_mount failed: rc = %d\n", rc);
+                GOTO(out, rc);
+        }
+        CDEBUG(D_SUPER, "%s: mnt = %p\n", name, mnt);
+        /*add this lvfs context to the lvfs_mount_list*/
+        *lvfs_ctxt = add_lvfs_mount(mnt, name);
+        if (!*lvfs_ctxt) {
+                mntput(mnt);
+                CERROR("add_lvfs_mount failed\n");
+                GOTO(out, rc = -EINVAL);
+        }
+out:
+        RETURN(rc);
+}
+EXPORT_SYMBOL(lvfs_mount_fs);
diff --git a/lustre/lvfs/lvfs_internal.h b/lustre/lvfs/lvfs_internal.h

index def6c9a..d7123a9 100644 (file)
--- a/lustre/lvfs/lvfs_internal.h
+++ b/lustre/lvfs/lvfs_internal.h
@@ -7,8 +7,10 @@ void fsfilt_extN_exit(void);
  int  fsfilt_ldiskfs_init(void);
  void fsfilt_ldiskfs_exit(void);
  
-
  int  fsfilt_reiser_init(void);
  void fsfilt_reiser_exit(void);
  
+void lvfs_mount_list_init(void);
+void lvfs_mount_list_cleanup(void);
+
  int lookup_by_path(char *path, int flags, struct nameidata *nd);
diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c

index 4118a53..5c11665 100644 (file)
--- a/lustre/lvfs/lvfs_linux.c
+++ b/lustre/lvfs/lvfs_linux.c
@@ -53,7 +53,6 @@
  atomic_t obd_memory;
  int obd_memmax;
  
-
  /* Debugging check only needed during development */
  #ifdef OBD_CTXT_DEBUG
  # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
@@ -126,7 +125,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
  
          /*
          CDEBUG(D_INFO,
-               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
                 save, current, current->fs, current->fs->pwd,
                 atomic_read(&current->fs->pwd->d_count),
                 atomic_read(&current->fs->pwd->d_inode->i_count),
@@ -165,7 +164,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
  
          /*
          CDEBUG(D_INFO,
-               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
                 new_ctx, current, current->fs, current->fs->pwd,
                 atomic_read(&current->fs->pwd->d_count),
                 atomic_read(&current->fs->pwd->d_inode->i_count),
@@ -189,7 +188,7 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
  
          /*
          CDEBUG(D_INFO,
-               " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+               " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
                 new_ctx, current, current->fs, current->fs->pwd,
                 atomic_read(&current->fs->pwd->d_count),
                 atomic_read(&current->fs->pwd->d_inode->i_count),
@@ -217,7 +216,7 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
  
          /*
          CDEBUG(D_INFO,
-               "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+               "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
                 saved, current, current->fs, current->fs->pwd,
                 atomic_read(&current->fs->pwd->d_count),
                 atomic_read(&current->fs->pwd->d_inode->i_count),
@@ -236,7 +235,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
          ENTRY;
  
          ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
-        CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
+        CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
  
          dchild = ll_lookup_one_len(name, dir, strlen(name));
          if (IS_ERR(dchild))
@@ -281,15 +280,19 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
          ENTRY;
  
          ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
-        CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
+        CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
          dchild = ll_lookup_one_len(name, dir, strlen(name));
          if (IS_ERR(dchild))
                  GOTO(out_up, dchild);
  
          if (dchild->d_inode) {
                  int old_mode = dchild->d_inode->i_mode;
-                if (!S_ISDIR(old_mode))
+                if (!S_ISDIR(old_mode)) {
+                        CERROR("found %s (%lu/%u) is mode %o\n", name,
+                               dchild->d_inode->i_ino,
+                               dchild->d_inode->i_generation, old_mode);
                          GOTO(out_err, err = -ENOTDIR);
+                }
  
                  /* Fixup directory permissions if necessary */
                  if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
@@ -420,20 +423,197 @@ EXPORT_SYMBOL(l_readdir);
  EXPORT_SYMBOL(obd_memory);
  EXPORT_SYMBOL(obd_memmax);
  
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED;
+static struct hlist_head *obd_memtable;
+static unsigned long obd_memtable_size;
+
+static int lvfs_memdbg_init(int size)
+{
+        struct hlist_head *head;
+        int i;
+
+        LASSERT(size > sizeof(sizeof(struct hlist_head)));
+        obd_memtable_size = size / sizeof(struct hlist_head);
+
+        CWARN("Allocating %lu malloc entries...\n",
+              (unsigned long)obd_memtable_size);
+
+        obd_memtable = kmalloc(size, GFP_KERNEL);
+        if (!obd_memtable)
+                return -ENOMEM;
+
+        i = obd_memtable_size;
+        head = obd_memtable;
+        do {
+                INIT_HLIST_HEAD(head);
+                head++;
+                i--;
+        } while(i);
+
+        return 0;
+}
+
+static int lvfs_memdbg_cleanup(void)
+{
+        struct hlist_node *node = NULL, *tmp = NULL;
+        struct hlist_head *head;
+        struct mtrack *mt;
+        int i;
+
+        spin_lock(&obd_memlist_lock);
+        for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
+                hlist_for_each_safe(node, tmp, head) {
+                        mt = hlist_entry(node, struct mtrack, m_hash);
+                        hlist_del_init(&mt->m_hash);
+                        kfree(mt);
+                }
+        }
+        spin_unlock(&obd_memlist_lock);
+        kfree(obd_memtable);
+        return 0;
+}
+
+static inline unsigned long const hashfn(void *ptr)
+{
+        return (unsigned long)ptr &
+                (obd_memtable_size - 1);
+}
+
+static void __lvfs_memdbg_insert(struct mtrack *mt)
+{
+        struct hlist_head *head = obd_memtable +
+                hashfn(mt->m_ptr);
+        hlist_add_head(&mt->m_hash, head);
+}
+
+void lvfs_memdbg_insert(struct mtrack *mt)
+{
+        spin_lock(&obd_memlist_lock);
+        __lvfs_memdbg_insert(mt);
+        spin_unlock(&obd_memlist_lock);
+}
+EXPORT_SYMBOL(lvfs_memdbg_insert);
+
+static void __lvfs_memdbg_remove(struct mtrack *mt)
+{
+        hlist_del_init(&mt->m_hash);
+}
+
+void lvfs_memdbg_remove(struct mtrack *mt)
+{
+        spin_lock(&obd_memlist_lock);
+        __lvfs_memdbg_remove(mt);
+        spin_unlock(&obd_memlist_lock);
+}
+EXPORT_SYMBOL(lvfs_memdbg_remove);
+
+static struct mtrack *__lvfs_memdbg_find(void *ptr)
+{
+        struct hlist_node *node = NULL;
+        struct mtrack *mt = NULL;
+        struct hlist_head *head;
+
+        head = obd_memtable + hashfn(ptr);
+
+        hlist_for_each(node, head) {
+                mt = hlist_entry(node, struct mtrack, m_hash);
+                if ((unsigned long)mt->m_ptr == (unsigned long)ptr)
+                        break;
+                mt = NULL;
+        }
+        return mt;
+}
+
+struct mtrack *lvfs_memdbg_find(void *ptr)
+{
+        struct mtrack *mt;
+
+        spin_lock(&obd_memlist_lock);
+        mt = __lvfs_memdbg_find(ptr);
+        spin_unlock(&obd_memlist_lock);
+        
+        return mt;
+}
+EXPORT_SYMBOL(lvfs_memdbg_find);
+
+int lvfs_memdbg_check_insert(struct mtrack *mt)
+{
+        spin_lock(&obd_memlist_lock);
+        if (!__lvfs_memdbg_find(mt->m_ptr)) {
+                __lvfs_memdbg_insert(mt);
+                spin_unlock(&obd_memlist_lock);
+                return 1;
+        }
+        spin_unlock(&obd_memlist_lock);
+        return 0;
+}
+EXPORT_SYMBOL(lvfs_memdbg_check_insert);
+
+struct mtrack *
+lvfs_memdbg_check_remove(void *ptr)
+{
+        struct mtrack *mt;
+
+        spin_lock(&obd_memlist_lock);
+        mt = __lvfs_memdbg_find(ptr);
+        if (mt) {
+                __lvfs_memdbg_remove(mt);
+                spin_unlock(&obd_memlist_lock);
+                return mt;
+        }
+        spin_unlock(&obd_memlist_lock);
+        return NULL;
+}
+EXPORT_SYMBOL(lvfs_memdbg_check_remove);
+
+static void lvfs_memdbg_show(void)
+{
+        struct hlist_node *node = NULL;
+        struct hlist_head *head;
+        struct mtrack *mt;
+        int leaked, i;
+
+        leaked = atomic_read(&obd_memory);
+
+        if (leaked > 0) {
+                CWARN("Memory leaks detected (max %d, leaked %d):\n",
+                      obd_memmax, leaked);
+
+                spin_lock(&obd_memlist_lock);
+                for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
+                        hlist_for_each(node, head) {
+                                mt = hlist_entry(node, struct mtrack, m_hash);
+                                CWARN("  ptr: 0x%p, size: %d, src at \"%s\"\n",
+                                      mt->m_ptr, mt->m_size, mt->m_loc);
+                        }
+                }
+                spin_unlock(&obd_memlist_lock);
+        }
+}
+#endif
+
  static int __init lvfs_linux_init(void)
  {
+        ENTRY;
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+        lvfs_memdbg_init(PAGE_SIZE);
+#endif
+        lvfs_mount_list_init();
          RETURN(0);
  }
  
  static void __exit lvfs_linux_exit(void)
  {
-        int leaked;
          ENTRY;
  
-        leaked = atomic_read(&obd_memory);
-        CDEBUG(leaked ? D_ERROR : D_INFO,
-               "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
+        lvfs_mount_list_cleanup();
  
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+        lvfs_memdbg_show();
+        lvfs_memdbg_cleanup();
+#endif
+        EXIT;
          return;
  }
  
diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h

index 273425e..587a24e 100644 (file)
--- a/lustre/mdc/mdc_internal.h
+++ b/lustre/mdc/mdc_internal.h
@@ -21,6 +21,12 @@
  #ifndef MDC_INTERNAL_H
  #define MDC_INTERNAL_H
  
+int mdc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+               struct lov_stripe_md *lsm);
+
+int mdc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+                 struct lov_mds_md *lmm, int lmm_size);
+
  void mdc_getattr_pack(struct lustre_msg *msg, int offset,
                        __u64 valid, int flags, struct mdc_op_data *data);
  void mdc_open_pack(struct lustre_msg *msg, int offset,
diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c

index bc6e4bb..c52644f 100644 (file)
--- a/lustre/mdc/mdc_lib.c
+++ b/lustre/mdc/mdc_lib.c
@@ -150,3 +150,28 @@ void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa,
                  body->valid |= OBD_MD_FLFLAGS;
          }
  }
+
+/* 
+ * these methods needed for saying higher levels that MDC does not pack/unpack
+ * any EAs. This is needed to have real abstraction and do not try to recognize
+ * what OBD type is to avoid calling these methods on it, as they may not be
+ * implemented.
+ *
+ * Sometimes pack/unpack calls happen to MDC too. This is for instance default
+ * striping info for directories and our goal here is to skip them with no
+ * errors or any complains.
+ */
+int mdc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+               struct lov_stripe_md *lsm)
+{
+        ENTRY;
+        RETURN(0);
+}
+
+int mdc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+                 struct lov_mds_md *lmm, int lmm_size)
+{
+        ENTRY;
+        RETURN(0);
+}
+
diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c

index e3bda59..436a155 100644 (file)
--- a/lustre/mdc/mdc_locks.c
+++ b/lustre/mdc/mdc_locks.c
@@ -175,6 +175,22 @@ int mdc_change_cbdata(struct obd_export *exp, struct lustre_id *id,
          return 0;
  }
  
+static inline void
+mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
+{
+        /* Don't hold error requests for replay. */
+        if (req->rq_replay) {
+                unsigned long irqflags;
+                spin_lock_irqsave(&req->rq_lock, irqflags);
+                req->rq_replay = 0;
+                spin_unlock_irqrestore(&req->rq_lock, irqflags);
+        }
+        if (rc && req->rq_transno != 0) {
+                DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc);
+                LBUG();
+        }
+}
+
  /* We always reserve enough space in the reply packet for a stripe MD, because
   * we don't know in advance the file type. */
  int mdc_enqueue(struct obd_export *exp,
@@ -197,7 +213,6 @@ int mdc_enqueue(struct obd_export *exp,
          ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_LOOKUP } };
          struct ldlm_intent *lit;
          struct ldlm_request *lockreq;
-        struct ldlm_reply *dlm_rep;
          int reqsize[6] = {[MDS_REQ_SECDESC_OFF] = 0,
                            [MDS_REQ_INTENT_LOCKREQ_OFF] = sizeof(*lockreq),
                            [MDS_REQ_INTENT_IT_OFF] = sizeof(*lit)};
@@ -206,6 +221,7 @@ int mdc_enqueue(struct obd_export *exp,
                            obddev->u.cli.cl_max_mds_easize};
          int req_buffers = 3, reply_buffers = 0;
          int rc, flags = LDLM_FL_HAS_INTENT;
+        struct ldlm_reply *dlm_rep = NULL;
          void *eadata;
          unsigned long irqflags;
          ENTRY;
@@ -222,6 +238,7 @@ int mdc_enqueue(struct obd_export *exp,
                  reqsize[req_buffers++] = sizeof(struct mds_rec_create);
                  reqsize[req_buffers++] = data->namelen + 1;
                  reqsize[req_buffers++] = obddev->u.cli.cl_max_mds_easize;
+
                  req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
                                        LDLM_ENQUEUE, req_buffers, reqsize, NULL);
                  if (!req)
@@ -238,8 +255,7 @@ int mdc_enqueue(struct obd_export *exp,
  
                  /* pack the intended request */
                  mdc_open_pack(req->rq_reqmsg, MDS_REQ_INTENT_REC_OFF, data,
-                              it->it_create_mode, 0, it->it_flags,
-                              lmm, lmmsize);
+                              it->it_create_mode, 0, it->it_flags, lmm, lmmsize);
                  /* get ready for the reply */
                  repsize[3] = 4;
                  repsize[4] = xattr_acl_size(LL_ACL_MAX_ENTRIES);
@@ -330,13 +346,21 @@ int mdc_enqueue(struct obd_export *exp,
  
          /* This can go when we're sure that this can never happen */
          LASSERT(rc != -ENOENT);
+        /* We need dlm_rep to be assigned this early, to check lock mode of
+           returned lock from request to avoid possible race with lock
+           conversion */
+        if (rc == ELDLM_LOCK_ABORTED || !rc) {
+                dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
+                LASSERT(dlm_rep != NULL);   /* checked by ldlm_cli_enqueue() */
+        }
          if (rc == ELDLM_LOCK_ABORTED) {
                  lock_mode = 0;
                  memset(lockh, 0, sizeof(*lockh));
                  rc = 0;
          } else if (rc != 0) {
                  CERROR("ldlm_cli_enqueue: %d\n", rc);
-                LASSERT (rc < 0);
+                LASSERTF(rc < 0, "rc = %d\n", rc);
+                mdc_clear_replay_flag(req, rc);
                  ptlrpc_req_finished(req);
                  RETURN(rc);
          } else { /* rc = 0 */
@@ -345,18 +369,16 @@ int mdc_enqueue(struct obd_export *exp,
  
                  /* If the server gave us back a different lock mode, we should
                   * fix up our variables. */
-                if (lock->l_req_mode != lock_mode) {
-                        ldlm_lock_addref(lockh, lock->l_req_mode);
+                if (dlm_rep->lock_desc.l_req_mode != lock_mode) {
+                        ldlm_lock_addref(lockh, dlm_rep->lock_desc.l_req_mode);
                          ldlm_lock_decref(lockh, lock_mode);
-                        lock_mode = lock->l_req_mode;
+                        lock_mode = dlm_rep->lock_desc.l_req_mode;
                  }
  
                  ldlm_lock_allow_match(lock);
                  LDLM_LOCK_PUT(lock);
          }
  
-        dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
-        LASSERT(dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
          LASSERT_REPSWABBED(req, 0);         /* swabbed by ldlm_cli_enqueue() */
  
          LUSTRE_IT(it)->it_disposition = (int) dlm_rep->lock_policy_res1;
@@ -364,13 +386,8 @@ int mdc_enqueue(struct obd_export *exp,
          LUSTRE_IT(it)->it_lock_mode = lock_mode;
          LUSTRE_IT(it)->it_data = req;
  
-        if (LUSTRE_IT(it)->it_status < 0 && req->rq_replay) {
-                LASSERT(req->rq_transno == 0);
-                /* Don't hold error requests for replay. */
-                spin_lock(&req->rq_lock);
-                req->rq_replay = 0;
-                spin_unlock(&req->rq_lock);
-        }
+        if (LUSTRE_IT(it)->it_status < 0 && req->rq_replay)
+                mdc_clear_replay_flag(req, LUSTRE_IT(it)->it_status);
  
          DEBUG_REQ(D_RPCTRACE, req, "disposition: %x, status: %d",
                    LUSTRE_IT(it)->it_disposition, LUSTRE_IT(it)->it_status);
@@ -472,24 +489,40 @@ int mdc_intent_lock(struct obd_export *exp, struct lustre_id *pid,
                                                        id_group(cid)}};
                  struct lustre_handle lockh;
                  ldlm_policy_data_t policy;
-                int mode = LCK_PR;
+                int mode;
  
                  /* For the GETATTR case, ll_revalidate_it issues two separate
                     queries - for LOOKUP and for UPDATE lock because it cannot
                     check them together - we might have those two bits to be
                     present in two separate granted locks */
                  policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ?
-                        MDS_INODELOCK_UPDATE: MDS_INODELOCK_LOOKUP;
+                        MDS_INODELOCK_UPDATE : MDS_INODELOCK_LOOKUP;
                  
                  mode = LCK_PR;
                  rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
                                       LDLM_FL_BLOCK_GRANTED, &res_id,
-                                     LDLM_IBITS, &policy, LCK_PR, &lockh);
+                                     LDLM_IBITS, &policy, mode,
+                                     &lockh);
+
+                if (!rc) {
+                        mode = LCK_CR;
+                        rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
+                                             LDLM_FL_BLOCK_GRANTED, &res_id,
+                                             LDLM_IBITS, &policy, mode,
+                                             &lockh);
+                }
                  if (!rc) {
                          mode = LCK_PW;
                          rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
                                               LDLM_FL_BLOCK_GRANTED, &res_id,
-                                             LDLM_IBITS, &policy, LCK_PW,
+                                             LDLM_IBITS, &policy, mode,
+                                             &lockh);
+                }
+                if (!rc) {
+                        mode = LCK_CW;
+                        rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
+                                             LDLM_FL_BLOCK_GRANTED, &res_id,
+                                             LDLM_IBITS, &policy, mode,
                                               &lockh);
                  }
                  if (rc) {
@@ -543,16 +576,10 @@ int mdc_intent_lock(struct obd_export *exp, struct lustre_id *pid,
           * It's important that we do this first!  Otherwise we might exit the
           * function without doing so, and try to replay a failed create (bug
           * 3440) */
-        if (it->it_op & IT_OPEN) {
-                if (!it_disposition(it, DISP_OPEN_OPEN) ||
-                    LUSTRE_IT(it)->it_status != 0) {
-                        unsigned long irqflags;
-
-                        spin_lock_irqsave(&request->rq_lock, irqflags);
-                        request->rq_replay = 0;
-                        spin_unlock_irqrestore(&request->rq_lock, irqflags);
-                }
-        }
+        if (it->it_op & IT_OPEN && request->rq_replay &&
+            (!it_disposition(it, DISP_OPEN_OPEN) || LUSTRE_IT(it)->it_status != 0))
+                mdc_clear_replay_flag(request, LUSTRE_IT(it)->it_status);
+ 
          if (!it_disposition(it, DISP_IT_EXECD)) {
                  /* The server failed before it even started executing the
                   * intent, i.e. because it couldn't unpack the request. */
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c

index 6cf6e08..7a751af 100644 (file)
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -335,81 +335,65 @@ int mdc_req2lustre_md(struct obd_export *exp_lmv, struct ptlrpc_request *req,
                        struct lustre_md *md)
  {
          void *buf;
+        int rc = 0;
          int size, acl_off;
          struct posix_acl *acl;
-        int rc = 0;
+        struct lov_mds_md *lmm;
          ENTRY;
  
-        LASSERT(md);
+        LASSERT(md != NULL);
          memset(md, 0, sizeof(*md));
  
-        md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body));
-        LASSERT (md->body != NULL);
-        LASSERT_REPSWABBED (req, offset);
+        md->body = lustre_msg_buf(req->rq_repmsg, offset,
+                                  sizeof(*md->body));
+        if (!md->body)
+                RETURN(-ENOMEM);
+
+        LASSERT_REPSWABBED(req, offset);
  
          if (!(md->body->valid & OBD_MD_FLEASIZE) && 
              !(md->body->valid & OBD_MD_FLDIREA))
                  RETURN(0);
  
-        /* ea is presented in reply, parse it */
          if (S_ISREG(md->body->mode)) {
-                int lmmsize;
-                struct lov_mds_md *lmm;
-
                  if (md->body->eadatasize == 0) {
-                        CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
+                        CERROR("invalid EA size (0) is detected\n");
                          RETURN(-EPROTO);
                  }
-                lmmsize = md->body->eadatasize;
-                lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize);
-                LASSERT (lmm != NULL);
-                LASSERT_REPSWABBED (req, offset + 1);
-
-                rc = obd_unpackmd(exp_lov, &md->lsm, lmm, lmmsize);
-                if (rc >= 0) {
-                        LASSERT (rc >= sizeof (*md->lsm));
+
+                lmm = lustre_msg_buf(req->rq_repmsg, offset + 1,
+                                     md->body->eadatasize);
+                if (!lmm)
+                        RETURN(-EINVAL);
+
+                LASSERT(exp_lov != NULL);
+                
+                rc = obd_unpackmd(exp_lov, &md->lsm, lmm,
+                                  md->body->eadatasize);
+                if (rc > 0) {
+                        LASSERT(rc >= sizeof(*md->lsm));
                          rc = 0;
                  }
          } else if (S_ISDIR(md->body->mode)) {
-                struct mea *mea;
-                int mdsize;
-                LASSERT(exp_lmv != NULL);
-                
                  /* dir can be non-splitted */
                  if (md->body->eadatasize == 0)
                          RETURN(0);
  
-                mdsize = md->body->eadatasize;
-                mea = lustre_msg_buf(req->rq_repmsg, offset + 1, mdsize);
-                LASSERT(mea != NULL);
-
-                /* 
-                 * check mea for validness, as there is possible that old tests
-                 * will try to set lov EA to dir object and thus confuse this
-                 * stuff.
-                 */
-                if (mea->mea_magic != MEA_MAGIC_LAST_CHAR &&
-                    mea->mea_magic != MEA_MAGIC_ALL_CHARS)
-                        GOTO(out_invalid_mea, rc = -EINVAL);
-
-                if (mea->mea_count > 256 || mea->mea_master > 256 ||
-                    mea->mea_master > mea->mea_count)
-                        GOTO(out_invalid_mea, rc = -EINVAL);
-                        
-                LASSERT(id_fid(&mea->mea_ids[0]));
-
-                rc = obd_unpackmd(exp_lmv, (void *)&md->mea,
-                                  (void *)mea, mdsize);
-                if (rc >= 0) {
-                        LASSERT (rc >= sizeof (*md->mea));
-                        rc = 0;
-                }
-
-                RETURN(rc);
+                lmm = lustre_msg_buf(req->rq_repmsg, offset + 1,
+                                     md->body->eadatasize);
+                if (!lmm)
+                        RETURN(-EINVAL);
  
-        out_invalid_mea:
-                CERROR("Detected invalid mea, which does not "
-                       "support neither old either new format.\n");
+                if (md->body->valid & OBD_MD_MEA) {
+                        LASSERT(exp_lmv != NULL);
+                
+                        rc = obd_unpackmd(exp_lmv, (void *)&md->mea,
+                                          lmm, md->body->eadatasize);
+                        if (rc > 0) {
+                                LASSERT(rc >= sizeof(*md->mea));
+                                rc = 0;
+                        }
+                }
          } else {
                  LASSERT(S_ISCHR(md->body->mode) ||
                          S_ISBLK(md->body->mode) ||
@@ -442,7 +426,6 @@ int mdc_req2lustre_md(struct obd_export *exp_lmv, struct ptlrpc_request *req,
  
                  md->acl_access = acl;
          }
-
          RETURN(rc);
  }
  
@@ -509,9 +492,9 @@ static void mdc_replay_open(struct ptlrpc_request *req)
          EXIT;
  }
  
-int  mdc_set_open_replay_data(struct obd_export *exp,
-                              struct obd_client_handle *och,
-                              struct ptlrpc_request *open_req)
+int mdc_set_open_replay_data(struct obd_export *exp,
+                             struct obd_client_handle *och,
+                             struct ptlrpc_request *open_req)
  {
          struct mdc_open_data *mod;
          struct mds_rec_create *rec;
@@ -595,24 +578,25 @@ static void mdc_commit_close(struct ptlrpc_request *req)
          spin_unlock(&open_req->rq_lock);
  }
  
-static int mdc_close_interpret(struct ptlrpc_request *req, void *data, int rc)
+static int mdc_close_interpret(struct ptlrpc_request *req,
+                               void *data, int rc)
  {
          union ptlrpc_async_args *aa = data;
-        struct mdc_rpc_lock *rpc_lock;
+        struct mdc_rpc_lock *close_lock;
          struct obd_device *obd = aa->pointer_arg[1];
          unsigned long flags;
  
          spin_lock_irqsave(&req->rq_lock, flags);
-        rpc_lock = aa->pointer_arg[0];
+        close_lock = aa->pointer_arg[0];
          aa->pointer_arg[0] = NULL;
          spin_unlock_irqrestore (&req->rq_lock, flags);
  
-        if (rpc_lock == NULL) {
-                CERROR("called with NULL rpc_lock\n");
+        if (close_lock == NULL) {
+                CERROR("called with NULL close_lock\n");
          } else {
-                mdc_put_rpc_lock(rpc_lock, NULL);
-                LASSERTF(rpc_lock == obd->u.cli.cl_rpc_lock, "%p != %p\n",
-                         rpc_lock, obd->u.cli.cl_rpc_lock);
+                mdc_put_rpc_lock(close_lock, NULL);
+                LASSERTF(close_lock == obd->u.cli.cl_close_lock, "%p != %p\n",
+                         close_lock, obd->u.cli.cl_close_lock);
          }
          wake_up(&req->rq_reply_waitq);
          RETURN(rc);
@@ -632,15 +616,12 @@ static int mdc_close_check_reply(struct ptlrpc_request *req)
          return rc;
  }
  
-static int go_back_to_sleep(void *unused)
-{
-        return 0;
-}
-
  int mdc_close(struct obd_export *exp, struct obdo *oa,
-              struct obd_client_handle *och, struct ptlrpc_request **request)
+              struct obd_client_handle *och,
+              struct ptlrpc_request **request)
  {
          struct obd_device *obd = class_exp2obd(exp);
+        struct obd_import *imp = class_exp2cliimp(exp);
          int reqsize[3] = {0, sizeof(struct mds_body),
                            obd->u.cli.cl_max_mds_cookiesize};
          int rc, repsize[3] = {sizeof(struct mds_body),
@@ -651,25 +632,30 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
          struct l_wait_info lwi;
          ENTRY;
  
+        if (imp->imp_connection == NULL) {
+                CERROR("request on not connected import %s\n",
+                        imp->imp_obd->obd_name);
+                RETURN(-EIO);
+        }
+
          //reqsize[0] = mdc_get_secdesc_size();
+        //mdc_pack_secdesc(req, reqsize[0]);
  
          req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
                                MDS_CLOSE, 3, reqsize, NULL);
          if (req == NULL)
                  GOTO(out, rc = -ENOMEM);
  
-        //mdc_pack_secdesc(req, reqsize[0]);
-
          /* Ensure that this close's handle is fixed up during replay. */
          LASSERT(och != NULL);
          mod = och->och_mod;
          if (likely(mod != NULL)) {
                  mod->mod_close_req = req;
                  LASSERT(mod->mod_open_req->rq_type != LI_POISON);
-                DEBUG_REQ(D_HA, mod->mod_open_req, "matched open req %p",
-                          mod->mod_open_req);
+                DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
          } else {
-                CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
+                CDEBUG(D_HA, "couldn't find open req; "
+                       "expecting close error\n");
          }
  
          mdc_close_pack(req, 1, oa, oa->o_valid, och);
@@ -682,13 +668,13 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
          /* We hand a ref to the rpcd here, so we need another one of our own. */
          ptlrpc_request_addref(req);
  
-        mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
+        mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
          req->rq_interpret_reply = mdc_close_interpret;
-        req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_rpc_lock;
+        req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_close_lock;
          req->rq_async_args.pointer_arg[1] = obd;
          ptlrpcd_add_req(req);
-        lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), go_back_to_sleep,
-                               NULL, NULL);
+
+        lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), NULL, NULL, NULL);
          rc = l_wait_event(req->rq_reply_waitq, mdc_close_check_reply(req),
                            &lwi);
          if (req->rq_repmsg == NULL) {
@@ -699,22 +685,25 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
          } else if (rc == 0) {
                  rc = req->rq_repmsg->status;
                  if (req->rq_repmsg->type == PTL_RPC_MSG_ERR) {
-                        DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, err "
-                                  "= %d", rc);
+                        DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, "
+                                  "err = %d", rc);
                          if (rc > 0)
                                  rc = -rc;
-                } else if (mod == NULL) {
-                        CERROR("Unexpected: can't find mdc_open_data, but the "
-                               "close succeeded.  Please tell CFS.\n");
-                }
-                if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body),
-                                        lustre_swab_mds_body)) {
-                        CERROR("Error unpacking mds_body\n");
-                        rc = -EPROTO;
+                } else {
+                        if (mod == NULL)
+                                CERROR("Unexpected: can't find mdc_open_data, but "
+                                       "close succeeded. Please tell CFS.\n");
+                        if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body),
+                                                lustre_swab_mds_body))
+                        {
+                                CERROR("Error unpacking mds_body\n");
+                                rc = -EPROTO;
+                        }
                  }
          }
          if (req->rq_async_args.pointer_arg[0] != NULL) {
-                CERROR("returned without dropping rpc_lock: rc %d\n", rc);
+                CERROR("returned without dropping close lock: rc %d, "
+                       "dropping it now\n", rc);
                  mdc_close_interpret(req, &req->rq_async_args, rc);
          }
  
@@ -937,6 +926,14 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen,
                  cli->cl_nllu = ((__u32 *) val)[0];
                  cli->cl_nllg = ((__u32 *) val)[1];
                  RETURN(0);
+        } else if (keylen == strlen("async") && memcmp(key, "async", keylen) == 0) {
+                struct client_obd *cl = &exp->exp_obd->u.cli;
+                if (vallen != sizeof(int))
+                        RETURN(-EINVAL);
+                cl->cl_async = *(int *)val;
+                CDEBUG(D_HA, "%s: set async = %d\n",
+                       exp->exp_obd->obd_name, cl->cl_async);
+                RETURN(0);
          }
  
          RETURN(rc);
@@ -1181,9 +1178,14 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf)
                  GOTO(err_rpc_lock, rc = -ENOMEM);
          mdc_init_rpc_lock(cli->cl_setattr_lock);
  
+        OBD_ALLOC(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
+        if (!cli->cl_close_lock)
+                GOTO(err_setattr_lock, rc = -ENOMEM);
+        mdc_init_rpc_lock(cli->cl_close_lock);
+
          rc = client_obd_setup(obd, len, buf);
          if (rc)
-                GOTO(err_setattr_lock, rc);
+                GOTO(err_close_lock, rc);
  
          rc = obd_llog_init(obd, &obd->obd_llogs, obd, 0, NULL);
          if (rc) {
@@ -1193,6 +1195,8 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf)
  
          RETURN(rc);
  
+err_close_lock:
+        OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
  err_setattr_lock:
          OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
  err_rpc_lock:
@@ -1231,6 +1235,7 @@ static int mdc_cleanup(struct obd_device *obd, int flags)
  
          OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
          OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
+        OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
  
          ptlrpcd_decref();
  
@@ -1495,6 +1500,8 @@ struct obd_ops mdc_obd_ops = {
          .o_connect       = client_connect_import,
          .o_disconnect    = client_disconnect_export,
          .o_iocontrol     = mdc_iocontrol,
+        .o_packmd        = mdc_packmd,
+        .o_unpackmd      = mdc_unpackmd,
          .o_statfs        = mdc_statfs,
          .o_pin           = mdc_pin,
          .o_unpin         = mdc_unpin,
diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c

index 2b2c223..ef0b9e6 100644 (file)
--- a/lustre/mds/handler.c
+++ b/lustre/mds/handler.c
@@ -159,6 +159,8 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file,
          return rc;
  }
  
+extern char *ldlm_lockname[];
+
  int mds_lock_mode_for_dir(struct obd_device *obd,
                            struct dentry *dentry, int mode)
  {
@@ -228,7 +230,8 @@ int mds_lock_mode_for_dir(struct obd_device *obd,
                          }
                  }
          }
-        return ret_mode;
+
+       return ret_mode;        
  }
  
  /* only valid locked dentries or errors should be returned */
@@ -298,7 +301,6 @@ struct dentry *mds_id2locked_dentry(struct obd_device *obd, struct lustre_id *id
  #define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED
  #endif
  
-
  /* Look up an entry by inode number. This function ONLY returns valid dget'd
   * dentries with an initialized inode or errors */
  struct dentry *mds_id2dentry(struct obd_device *obd, struct lustre_id *id,
@@ -453,8 +455,8 @@ static int mds_init_export(struct obd_export *exp)
  
  static int mds_destroy_export(struct obd_export *export)
  {
-        struct mds_export_data *med;
          struct obd_device *obd = export->exp_obd;
+        struct mds_export_data *med;
          struct lvfs_run_ctxt saved;
          int rc = 0;
          ENTRY;
@@ -473,6 +475,8 @@ static int mds_destroy_export(struct obd_export *export)
                  struct list_head *tmp = med->med_open_head.next;
                  struct mds_file_data *mfd =
                          list_entry(tmp, struct mds_file_data, mfd_list);
+                struct lustre_id sid;
+                
                  BDEVNAME_DECLARE_STORAGE(btmp);
  
                  /* bug 1579: fix force-closing for 2.5 */
@@ -481,12 +485,22 @@ static int mds_destroy_export(struct obd_export *export)
                  list_del(&mfd->mfd_list);
                  spin_unlock(&med->med_open_lock);
  
+                down(&dentry->d_inode->i_sem);
+                rc = mds_read_inode_sid(obd, dentry->d_inode, &sid);
+                up(&dentry->d_inode->i_sem);
+                if (rc) {
+                        CERROR("Can't read inode self id, inode %lu, "
+                               "rc %d\n", dentry->d_inode->i_ino, rc);
+                        memset(&sid, 0, sizeof(sid));
+                }
+
                  /* If you change this message, be sure to update
                   * replay_single:test_46 */
-                CERROR("force closing client file handle for %*s (%s:%lu)\n",
-                       dentry->d_name.len, dentry->d_name.name,
+                CERROR("force closing client file handle for %.*s (%s:"
+                       DLID4")\n", dentry->d_name.len, dentry->d_name.name,
                         ll_bdevname(dentry->d_inode->i_sb, btmp),
-                       dentry->d_inode->i_ino);
+                       OLID4(&sid));
+                
                  /* child inode->i_alloc_sem protects orphan_dec_test and
                   * is_orphan race, mds_mfd_close drops it */
                  DOWN_WRITE_I_ALLOC_SEM(dentry->d_inode);
@@ -643,8 +657,30 @@ int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
          RETURN(0);
  }
  
-int mds_get_md(struct obd_device *obd, struct inode *inode, void *md,
-               int *size, int lock)
+static int mds_convert_md(struct obd_device *obd, struct inode *inode,
+                          void *md, int size, int mea)
+{
+        int rc = size;
+        
+        if (S_ISREG(inode->i_mode)) {
+                rc = mds_convert_lov_ea(obd, inode, md, size);
+        } else if (S_ISDIR(inode->i_mode)) {
+                if (mea) {
+                        rc = mds_convert_mea_ea(obd, inode, md, size);
+                } else {
+                        rc = mds_convert_lov_ea(obd, inode, md, size);
+                }
+                if (rc == -EINVAL) {
+                        CERROR("Invalid EA format (nor LOV or MEA) "
+                               "is detected. Inode %lu/%u\n",
+                               inode->i_ino, inode->i_generation);
+                }
+        }
+        return rc;
+}
+
+int mds_get_md(struct obd_device *obd, struct inode *inode,
+               void *md, int *size, int lock, int mea)
  {
          int lmm_size;
          int rc = 0;
@@ -652,21 +688,16 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md,
  
          if (lock)
                  down(&inode->i_sem);
-        rc = fsfilt_get_md(obd, inode, md, *size);
-        if (lock)
-                up(&inode->i_sem);
  
+        rc = fsfilt_get_md(obd, inode, md, *size,
+                           (mea ? EA_MEA : EA_LOV));
          if (rc < 0) {
                  CERROR("Error %d reading eadata for ino %lu\n",
                         rc, inode->i_ino);
          } else if (rc > 0) {
                  lmm_size = rc;
-                
-                if (S_ISREG(inode->i_mode))
-                        rc = mds_convert_lov_ea(obd, inode, md, lmm_size);
-                if (S_ISDIR(inode->i_mode))
-                        rc = mds_convert_mea_ea(obd, inode, md, lmm_size);
-
+                rc = mds_convert_md(obd, inode, md,
+                                    lmm_size, mea);
                  if (rc == 0) {
                          *size = lmm_size;
                          rc = lmm_size;
@@ -674,15 +705,17 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md,
                          *size = rc;
                  }
          }
+        if (lock)
+                up(&inode->i_sem);
  
-        RETURN (rc);
+        RETURN(rc);
  }
  
  
  /* Call with lock=1 if you want mds_pack_md to take the i_sem.
   * Call with lock=0 if the caller has already taken the i_sem. */
  int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset,
-                struct mds_body *body, struct inode *inode, int lock)
+                struct mds_body *body, struct inode *inode, int lock, int mea)
  {
          struct mds_obd *mds = &obd->u.mds;
          void *lmm;
@@ -710,12 +743,16 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset,
                  // RETURN(-EINVAL);
          }
  
-        rc = mds_get_md(obd, inode, lmm, &lmm_size, lock);
+        rc = mds_get_md(obd, inode, lmm, &lmm_size, lock, mea);
          if (rc > 0) {
                  if (S_ISDIR(inode->i_mode))
                          body->valid |= OBD_MD_FLDIREA;
                  else
                          body->valid |= OBD_MD_FLEASIZE;
+
+                if (mea)
+                        body->valid |= OBD_MD_MEA;
+                
                  body->eadatasize = lmm_size;
                  rc = 0;
          }
@@ -812,8 +849,9 @@ int mds_pack_acl(struct obd_device *obd, struct lustre_msg *repmsg, int offset,
                   struct mds_body *body, struct inode *inode)
  {
          struct dentry de = { .d_inode = inode };
+        __u32 buflen, *sizep;
          void *buf;
-        __u32 buflen, *sizep, size;
+        int size;
          ENTRY;
  
          if (!inode->i_op->getxattr)
@@ -823,7 +861,7 @@ int mds_pack_acl(struct obd_device *obd, struct lustre_msg *repmsg, int offset,
          buf = lustre_msg_buf(repmsg, offset + 1, buflen);
  
          size = inode->i_op->getxattr(&de, XATTR_NAME_ACL_ACCESS, buf, buflen);
-        if (size == -ENODATA)
+        if (size == -ENODATA || size == -EOPNOTSUPP)
                  RETURN(0);
          if (size < 0)
                  RETURN(size);
@@ -892,7 +930,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
          if ((S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE)) ||
              (S_ISDIR(inode->i_mode) && (reqbody->valid & OBD_MD_FLDIREA))) {
                  rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1, body,
-                                 inode, 1);
+                                 inode, 1, (reqbody->valid & OBD_MD_MEA) ? 1 : 0);
  
                  /* if we have LOV EA data, the OST holds size, atime, mtime. */
                  if (!(body->valid & OBD_MD_FLEASIZE) &&
@@ -954,18 +992,20 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct dentry *de,
          int rc = 0, size[4] = {sizeof(*body)}, bufcount = 1;
          ENTRY;
  
-        body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
+        body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
          LASSERT(body != NULL);                 /* checked by caller */
          LASSERT_REQSWABBED(req, offset);       /* swabbed by caller */
  
          if ((S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) ||
              (S_ISDIR(inode->i_mode) && (body->valid & OBD_MD_FLDIREA))) {
                  int rc;
+                
                  down(&inode->i_sem);
-                rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
+                rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0,
+                                   ((body->valid & OBD_MD_MEA) ? EA_MEA : EA_LOV));
                  up(&inode->i_sem);
                  if (rc < 0) {
-                        if (rc != -ENODATA)
+                        if (rc != -ENODATA && rc != -EOPNOTSUPP)
                                  CERROR("error getting inode %lu MD: rc = %d\n",
                                         inode->i_ino, rc);
                          size[bufcount] = 0;
@@ -993,7 +1033,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct dentry *de,
                          rc = inode->i_op->getxattr(de, ea_name, NULL, 0);
                  
                  if (rc < 0) {
-                        if (rc != -ENODATA)
+                        if (rc != -ENODATA && rc != -EOPNOTSUPP)
                                  CERROR("error getting inode %lu EA: rc = %d\n",
                                         inode->i_ino, rc);
                          size[bufcount] = 0;
@@ -1007,7 +1047,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct dentry *de,
                          rc = inode->i_op->listxattr(de, NULL, 0);
  
                  if (rc < 0) {
-                        if (rc != -ENODATA)
+                        if (rc != -ENODATA && rc != -EOPNOTSUPP)
                                  CERROR("error getting inode %lu EA: rc = %d\n",
                                         inode->i_ino, rc);
                          size[bufcount] = 0;
@@ -1192,7 +1232,7 @@ static int mds_getattr_lock(struct ptlrpc_request *req, int offset,
                  
                         cleanup_phase = 2; /* dchild, dparent, locks */
                          
-                       /*
+                        /*
                           * let's make sure this name should leave on this mds
                           * node.
                           */
@@ -1318,6 +1358,8 @@ static int mds_getattr(struct ptlrpc_request *req, int offset)
          int rc = 0;
          ENTRY;
  
+        MD_COUNTER_INCREMENT(obd, getattr);
+
          rsd = lustre_swab_mds_secdesc(req, MDS_REQ_SECDESC_OFF);
          if (!rsd) {
                  CERROR("Can't unpack security desc\n");
@@ -1331,8 +1373,6 @@ static int mds_getattr(struct ptlrpc_request *req, int offset)
                  RETURN (-EFAULT);
          }
  
-        MD_COUNTER_INCREMENT(obd, getattr);
-
          rc = mds_init_ucred(&uc, req, rsd);
          if (rc) {
                  mds_exit_ucred(&uc);
@@ -1384,6 +1424,10 @@ static int mds_statfs(struct ptlrpc_request *req)
          int rc, size = sizeof(struct obd_statfs);
          ENTRY;
  
+        /* This will trigger a watchdog timeout */
+        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP,
+                         (MDS_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1);
+
          rc = lustre_pack_reply(req, 1, &size, NULL);
          if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) {
                  CERROR("mds: statfs lustre_pack_reply failed: rc = %d\n", rc);
@@ -1414,7 +1458,8 @@ static int mds_sync(struct ptlrpc_request *req, int offset)
          int rc, size = sizeof(*body);
          ENTRY;
  
-        body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
+        body = lustre_swab_reqbuf(req, offset, sizeof(*body),
+                                  lustre_swab_mds_body);
          if (body == NULL)
                  GOTO(out, rc = -EPROTO);
  
@@ -2024,12 +2069,15 @@ repeat:
                  mea->mea_count = 0;
  
                  down(&new->d_inode->i_sem);
-                rc = fsfilt_set_md(obd, new->d_inode, handle, mea, mealen);
+                rc = fsfilt_set_md(obd, new->d_inode, handle,
+                                   mea, mealen, EA_MEA);
                  up(&new->d_inode->i_sem);
                  if (rc)
-                        CERROR("fsfilt_set_md() failed, rc = %d\n", rc);
+                        CERROR("fsfilt_set_md() failed, "
+                               "rc = %d\n", rc);
  
                  OBD_FREE(mea, mealen);
+                
                  CDEBUG(D_OTHER, "%s: mark non-splittable %lu/%u - %d\n",
                         obd->obd_name, new->d_inode->i_ino,
                         new->d_inode->i_generation, flags);
@@ -2280,8 +2328,6 @@ static int mds_msg_check_version(struct lustre_msg *msg)
          return rc;
  }
  
-static char str[PTL_NALFMT_SIZE];
-
  int mds_handle(struct ptlrpc_request *req)
  {
          int should_process, fail = OBD_FAIL_MDS_ALL_REPLY_NET;
@@ -2312,9 +2358,9 @@ int mds_handle(struct ptlrpc_request *req)
                  int recovering;
  
                  if (req->rq_export == NULL) {
-                        CERROR("operation %d on unconnected MDS from NID %s\n",
+                        CERROR("operation %d on unconnected MDS from %s\n",
                                 req->rq_reqmsg->opc,
-                               ptlrpc_peernid2str(&req->rq_peer, str));
+                               req->rq_peerstr);
                          req->rq_status = -ENOTCONN;
                          GOTO(out, rc = -ENOTCONN);
                  }
@@ -2325,7 +2371,8 @@ int mds_handle(struct ptlrpc_request *req)
  
                  /* sanity check: if the xid matches, the request must
                   * be marked as a resent or replayed */
-                if (req->rq_xid == med->med_mcd->mcd_last_xid) {
+                if (req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_xid) ||
+                   req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_close_xid)) {
                          LASSERTF(lustre_msg_get_flags(req->rq_reqmsg) &
                                   (MSG_RESENT | MSG_REPLAY),
                                   "rq_xid "LPU64" matches last_xid, "
@@ -2780,10 +2827,10 @@ int mds_read_inode_sid(struct obd_device *obd, struct inode *inode,
          LASSERT(obd != NULL);
          LASSERT(inode != NULL);
  
-        rc = fsfilt_get_sid(obd, inode, &id->li_fid,
-                            sizeof(id->li_fid));
+        rc = fsfilt_get_md(obd, inode, &id->li_fid,
+                           sizeof(id->li_fid), EA_SID);
          if (rc < 0) {
-                CERROR("fsfilt_get_sid() failed, "
+                CERROR("fsfilt_get_md() failed, "
                         "rc = %d\n", rc);
                  RETURN(rc);
          } else if (!rc) {
@@ -2807,10 +2854,10 @@ int mds_update_inode_sid(struct obd_device *obd, struct inode *inode,
          LASSERT(obd != NULL);
          LASSERT(inode != NULL);
          
-        rc = fsfilt_set_sid(obd, inode, handle, &id->li_fid,
-                            sizeof(id->li_fid));
+        rc = fsfilt_set_md(obd, inode, handle, &id->li_fid,
+                           sizeof(id->li_fid), EA_SID);
          if (rc) {
-                CERROR("fsfilt_set_sid() failed, rc = %d\n", rc);
+                CERROR("fsfilt_set_md() failed, rc = %d\n", rc);
                  RETURN(rc);
          }
  
@@ -2832,10 +2879,9 @@ int mds_read_inode_mid(struct obd_device *obd, struct inode *inode,
          LASSERT(obd != NULL);
          LASSERT(inode != NULL);
  
-        rc = fsfilt_get_mid(obd, inode, id, sizeof(*id));
+        rc = fsfilt_get_md(obd, inode, id, sizeof(*id), EA_MID);
          if (rc < 0) {
-                CERROR("fsfilt_get_mid() failed, "
-                       "rc = %d\n", rc);
+                CERROR("fsfilt_get_md() failed, rc = %d\n", rc);
                  RETURN(rc);
          } else if (!rc) {
                  rc = -ENODATA;
@@ -2862,9 +2908,11 @@ int mds_update_inode_mid(struct obd_device *obd, struct inode *inode,
          LASSERT(obd != NULL);
          LASSERT(inode != NULL);
          
-        rc = fsfilt_set_mid(obd, inode, handle, id, sizeof(*id));
+        rc = fsfilt_set_md(obd, inode, handle, id,
+                           sizeof(*id), EA_MID);
          if (rc) {
-                CERROR("fsfilt_set_mid() failed, rc = %d\n", rc);
+                CERROR("fsfilt_set_md() failed, "
+                       "rc = %d\n", rc);
                  RETURN(rc);
          }
  
@@ -2876,6 +2924,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
  {
          struct lustre_cfg* lcfg = buf;
          struct mds_obd *mds = &obd->u.mds;
+        struct lvfs_obd_ctxt *lvfs_ctxt = NULL;
          char *options = NULL;
          struct vfsmount *mnt;
          char ns_name[48];
@@ -2911,6 +2960,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
                          lcfg->lcfg_inlbuf4);
  
          /* we have to know mdsnum before touching underlying fs -bzzz */
+        atomic_set(&mds->mds_open_count, 0);
          sema_init(&mds->mds_md_sem, 1);
          mds->mds_md_connected = 0;
          mds->mds_md_name = NULL;
@@ -2950,15 +3000,19 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
                  }     
          }
  
-        mnt = do_kern_mount(lcfg->lcfg_inlbuf2, 0, lcfg->lcfg_inlbuf1, options);
+        rc = lvfs_mount_fs(lcfg->lcfg_inlbuf1, lcfg->lcfg_inlbuf2, 
+                           options, 0, &lvfs_ctxt);
+
          free_page(page);
  
-        if (IS_ERR(mnt)) {
-                rc = PTR_ERR(mnt);
-                CERROR("do_kern_mount failed: rc = %d\n", rc);
+        if (rc || !lvfs_ctxt) {
+                CERROR("lvfs_mount_fs failed: rc = %d\n", rc);
                  GOTO(err_ops, rc);
          }
  
+        mnt = lvfs_ctxt->loc_mnt;
+        mds->mds_lvfs_ctxt = lvfs_ctxt;
+
          CDEBUG(D_SUPER, "%s: mnt = %p\n", lcfg->lcfg_inlbuf1, mnt);
  
          sema_init(&mds->mds_epoch_sem, 1);
@@ -3037,7 +3091,7 @@ err_ns:
          obd->obd_namespace = NULL;
  err_put:
          unlock_kernel();
-        mntput(mds->mds_vfsmnt);
+        lvfs_umount_fs(mds->mds_lvfs_ctxt);
          mds->mds_sb = 0;
          lock_kernel();
  err_ops:
@@ -3235,15 +3289,10 @@ static int mds_cleanup(struct obd_device *obd, int flags)
  
          unlock_kernel();
  
-        /*
-         * 2 seems normal on mds, (may_umount() also expects 2 fwiw), but we
-         * only see 1 at this point in obdfilter.
-         */
-        if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2)
-                CERROR("%s: mount busy, mnt_count %d != 2\n", obd->obd_name,
-                       atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count));
+        /* 2 seems normal on mds, (may_umount() also expects 2
+          fwiw), but we only see 1 at this point in obdfilter. */
+        lvfs_umount_fs(mds->mds_lvfs_ctxt);
  
-        mntput(mds->mds_vfsmnt);
          mds->mds_sb = 0;
  
          ldlm_namespace_free(obd->obd_namespace, flags & OBD_OPT_FORCE);
@@ -3256,7 +3305,6 @@ static int mds_cleanup(struct obd_device *obd, int flags)
          spin_unlock_bh(&obd->obd_processing_task_lock);
  
          lock_kernel();
-        dev_clear_rdonly(2);
          fsfilt_put_ops(obd->obd_fsops);
  
  #ifdef ENABLE_GSS
@@ -3356,7 +3404,12 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req,
          /* If the xid matches, then we know this is a resent request,
           * and allow it. (It's probably an OPEN, for which we don't
           * send a lock */
-        if (req->rq_xid == exp->exp_mds_data.med_mcd->mcd_last_xid)
+        if (req->rq_xid == 
+            le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_xid))
+                return;
+
+        if (req->rq_xid == 
+            le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_close_xid))
                  return;
  
          /* This remote handle isn't enqueued, so we never received or
@@ -3443,23 +3496,32 @@ static int mds_intent_policy(struct ldlm_namespace *ns,
          switch ((long)it->opc) {
          case IT_OPEN:
          case IT_CREAT|IT_OPEN:
+                fixup_handle_for_resent_req(req, MDS_REQ_INTENT_LOCKREQ_OFF,
+                                           lock, NULL, lockh);
                  /* XXX swab here to assert that an mds_open reint
                   * packet is following */
                  fixup_handle_for_resent_req(req, MDS_REQ_INTENT_LOCKREQ_OFF, 
                                              lock, NULL, lockh);
                  rep->lock_policy_res2 = mds_reint(req, offset, lockh);
-#if 0
-                /* We abort the lock if the lookup was negative and
-                 * we did not make it to the OPEN portion */
-                if (!intent_disposition(rep, DISP_LOOKUP_EXECD))
+
+                if (rep->lock_policy_res2) {
+                        /* 
+                         * mds_open() returns ENOLCK where it should return
+                         * zero, but it has no lock to return.
+                         */
+                        if (rep->lock_policy_res2 == ENOLCK)
+                                rep->lock_policy_res2 = 0;
+
                          RETURN(ELDLM_LOCK_ABORTED);
-                if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
-                    !intent_disposition(rep, DISP_OPEN_OPEN))
-#endif
-                /* IT_OPEN may return lock on cross-node dentry
-                 * that we want to hold during attr retrival -bzzz */
-                if (rc != 0 || lockh[0].cookie == 0)
+                }
+                
+                /*
+                 * IT_OPEN may return lock on cross-node dentry that we want to
+                 * hold during attr retrival -bzzz
+                 */
+                if (lockh[0].cookie == 0)
                          RETURN(ELDLM_LOCK_ABORTED);
+                
                  break;
          case IT_LOOKUP:
                  getattr_part = MDS_INODELOCK_LOOKUP;
@@ -3512,7 +3574,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns,
  
          LASSERTF(new_lock != NULL, "op "LPX64" lockh "LPX64"\n",
                   it->opc, lockh[0].cookie);
- 
+
          /* If we've already given this lock to a client once, then we should
           * have no readers or writers.  Otherwise, we should have one reader
           * _or_ writer ref (which will be zeroed below) before returning the
@@ -3599,6 +3661,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf)
          mds->mds_service =
                  ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
                                  MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
+                                MDS_SERVICE_WATCHDOG_TIMEOUT,
                                  mds_handle, "mds", obd->obd_proc_entry);
  
          if (!mds->mds_service) {
@@ -3614,6 +3677,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf)
          mds->mds_setattr_service =
                  ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
                                  MDS_SETATTR_PORTAL, MDC_REPLY_PORTAL,
+                                MDS_SERVICE_WATCHDOG_TIMEOUT,
                                  mds_handle, "mds_setattr",
                                  obd->obd_proc_entry);
          if (!mds->mds_setattr_service) {
@@ -3629,6 +3693,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf)
          mds->mds_readpage_service =
                  ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
                                  MDS_READPAGE_PORTAL, MDC_REPLY_PORTAL,
+                                MDS_SERVICE_WATCHDOG_TIMEOUT,
                                  mds_handle, "mds_readpage",
                                  obd->obd_proc_entry);
          if (!mds->mds_readpage_service) {
diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c

index 085c840..289b9ad 100644 (file)
--- a/lustre/mds/lproc_mds.c
+++ b/lustre/mds/lproc_mds.c
@@ -96,6 +96,17 @@ static int lprocfs_mds_wr_config_update(struct file *file, const char *buffer,
          RETURN(mds_dt_update_config(obd, 0));
  }
  
+static int lprocfs_rd_filesopen(char *page, char **start, off_t off,
+                                int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+        LASSERT(obd != NULL);
+        *eof = 1;
+
+        return snprintf(page, count, "%d\n",
+                        atomic_read(&obd->u.mds.mds_open_count));
+}
+
  static int lprocfs_rd_last_fid(char *page, char **start, off_t off,
                                 int count, int *eof, void *data)
  {
@@ -131,6 +142,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = {
          { "fstype",       lprocfs_rd_fstype,      0, 0 },
          { "filestotal",   lprocfs_rd_filestotal,  0, 0 },
          { "filesfree",    lprocfs_rd_filesfree,   0, 0 },
+        { "filesopen",    lprocfs_rd_filesopen,   0, 0 },
          { "mntdev",       lprocfs_mds_rd_mntdev,  0, 0 },
          { "last_fid",     lprocfs_rd_last_fid,    0, 0 },
          { "group",        lprocfs_rd_group,       0, 0 },
diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c

index b52bf4a..20c0145 100644 (file)
--- a/lustre/mds/mds_fs.c
+++ b/lustre/mds/mds_fs.c
@@ -46,7 +46,6 @@
  
  /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
  #define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
-#define MDS_MAX_CLIENT_WORDS (MDS_MAX_CLIENTS / sizeof(unsigned long))
  
  #define LAST_RCVD "last_rcvd"
  #define LOV_OBJID "lov_objid"
@@ -179,13 +178,13 @@ int mds_client_free(struct obd_export *exp, int clear_client)
  
  free_and_out:
          OBD_FREE(med->med_mcd, sizeof(*med->med_mcd));
+        med->med_mcd = NULL;
          return 0;
  }
  
  static int mds_server_free_data(struct mds_obd *mds)
  {
-        OBD_FREE(mds->mds_client_bitmap,
-                 MDS_MAX_CLIENT_WORDS * sizeof(unsigned long));
+        OBD_FREE(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8);
          OBD_FREE(mds->mds_server_data, sizeof(*mds->mds_server_data));
          mds->mds_server_data = NULL;
  
@@ -255,8 +254,7 @@ static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
          if (!msd)
                  RETURN(-ENOMEM);
  
-        OBD_ALLOC_WAIT(mds->mds_client_bitmap,
-                  MDS_MAX_CLIENT_WORDS * sizeof(unsigned long));
+        OBD_ALLOC_WAIT(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8);
          if (!mds->mds_client_bitmap) {
                  OBD_FREE(msd, sizeof(*msd));
                  RETURN(-ENOMEM);
@@ -357,7 +355,10 @@ static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
                          continue;
                  }
  
-                last_transno = le64_to_cpu(mcd->mcd_last_transno);
+                last_transno = le64_to_cpu(mcd->mcd_last_transno) >
+                               le64_to_cpu(mcd->mcd_last_close_transno) ?
+                               le64_to_cpu(mcd->mcd_last_transno) :
+                               le64_to_cpu(mcd->mcd_last_close_transno);
  
                  /* These exports are cleaned up by mds_disconnect(), so they
                   * need to be set up like real exports as mds_connect() does.
@@ -673,8 +674,7 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
                  GOTO(err_pop, rc);
          }
          mds->mds_id_de = dentry;
-
-        if (!dentry->d_inode) {
+        if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) {
                  rc = -ENOENT;
                  CERROR("__iopen__ directory has no inode? rc = %d\n", rc);
                  GOTO(err_id_de, rc);
@@ -1070,7 +1070,7 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
                         oa->o_id, oa->o_generation, rc);
          
          err = fsfilt_commit(obd, mds->mds_sb, mds->mds_objects_dir->d_inode, 
-                            handle, 0);
+                            handle, exp->exp_sync);
          if (err && !rc)
                  rc = err;
  out_dput:
diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h

index d68b78e..98271df 100644 (file)
--- a/lustre/mds/mds_internal.h
+++ b/lustre/mds/mds_internal.h
@@ -7,6 +7,8 @@
  
  #include <linux/lustre_mds.h>
  
+#define MDS_SERVICE_WATCHDOG_TIMEOUT 30000
+
  #define MAX_ATIME_DIFF 60
  
  struct mds_filter_data {
@@ -209,10 +211,10 @@ void mds_set_last_fid(struct obd_device *obd, __u64 fid);
  
  #ifdef __KERNEL__
  int mds_get_md(struct obd_device *, struct inode *, void *md,
-               int *size, int lock);
+               int *size, int lock, int mea);
  
  int mds_pack_md(struct obd_device *, struct lustre_msg *, int offset,
-                struct mds_body *, struct inode *, int lock);
+                struct mds_body *, struct inode *, int lock, int mea);
  int mds_pack_link(struct dentry *dentry, struct ptlrpc_request *req,
                    struct mds_body *repbody, int reply_off);
  int mds_pack_ea(struct dentry *dentry, struct ptlrpc_request *req,
diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c

index 6706841..cdc1425 100644 (file)
--- a/lustre/mds/mds_lib.c
+++ b/lustre/mds/mds_lib.c
@@ -432,7 +432,7 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset,
          rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
                                    lustre_swab_mds_rec_create);
          if (rec == NULL)
-                RETURN (-EFAULT);
+                RETURN(-EFAULT);
  
          r->ur_id1 = &rec->cr_id;
          r->ur_id2 = &rec->cr_replayid;
@@ -451,7 +451,7 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset,
          if (req->rq_reqmsg->bufcount > offset + 2) {
                  r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0);
                  if (r->ur_eadata == NULL)
-                        RETURN (-EFAULT);
+                        RETURN(-EFAULT);
                  r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
          }
          RETURN(0);
@@ -501,6 +501,10 @@ int mds_update_unpack(struct ptlrpc_request *req, int offset,
          rec->ur_opcode = opcode;
  
          rc = mds_unpackers[opcode](req, offset, rec);
+       
+#if CRAY_PORTALS
+        rec->ur_fsuid = req->rq_uid;
+#endif
          RETURN(rc);
  }
  
@@ -786,6 +790,11 @@ int mds_init_ucred(struct lvfs_ucred *ucred,
          ucred->luc_ginfo = NULL;
          ucred->luc_lsd = lsd = mds_get_lsd(rsd->rsd_uid);
  
+#if CRAY_PORTALS
+        ucred->luc_fsuid = req->rq_uid;
+#else
+        ucred->luc_fsuid = rsd->rsd_fsuid;
+#endif
          if (lsd) {
                  if (req->rq_remote) {
                          /* record the gid mapping here */
@@ -850,7 +859,6 @@ int mds_init_ucred(struct lvfs_ucred *ucred,
                  rsd->rsd_cap &= ~CAP_FS_MASK;
  
          /* by now every fields in rsd have been granted */
-        ucred->luc_fsuid = rsd->rsd_fsuid;
          ucred->luc_fsgid = rsd->rsd_fsgid;
          ucred->luc_cap = rsd->rsd_cap;
          ucred->luc_uid = rsd->rsd_uid;
diff --git a/lustre/mds/mds_lmv.c b/lustre/mds/mds_lmv.c

index 6685b2b..23d18d5 100644 (file)
--- a/lustre/mds/mds_lmv.c
+++ b/lustre/mds/mds_lmv.c
@@ -226,7 +226,7 @@ int mds_md_get_attr(struct obd_device *obd, struct inode *inode,
          if (*mea_size < 0 || *mea == NULL)
                  return *mea_size < 0 ? *mea_size : -EINVAL;
  
-        rc = mds_get_md(obd, inode, *mea, mea_size, 1);
+        rc = mds_get_md(obd, inode, *mea, mea_size, 1, 1);
  
         if (rc <= 0) {
                 OBD_FREE(*mea, *mea_size);
@@ -710,7 +710,7 @@ int mds_try_to_split_dir(struct obd_device *obd, struct dentry *dentry,
                  GOTO(err_oa, rc = PTR_ERR(handle));
          }
  
-       rc = fsfilt_set_md(obd, dir, handle, *mea, mea_size);
+       rc = fsfilt_set_md(obd, dir, handle, *mea, mea_size, EA_MEA);
          if (rc) {
                  up(&dir->i_sem);
                  CERROR("fsfilt_set_md() failed, error %d.\n", rc);
@@ -1168,12 +1168,12 @@ int mds_lock_and_check_slave(int offset, struct ptlrpc_request *req,
          }
          cleanup_phase = 1;
  
-        /* 
-         * handling the case when remote MDS checks if dir is empty before
-         * rename. But it also does it for all entries, because inode is stored
-         * here and remote MDS does not know if rename point to dir or to reg
-         * file. So we check it here.
-         */
+       /* 
+        * handling the case when remote MDS checks if dir is empty 
+        * before rename. But it also does it for all entries, because
+        * inode is stored here and remote MDS does not know if rename
+        * point to dir or to reg file. So we check it here. 
+        */
         if (!S_ISDIR(dentry->d_inode->i_mode))
                 GOTO(cleanup, rc = 0);
  
@@ -1202,49 +1202,41 @@ cleanup:
  }
  
  int mds_convert_mea_ea(struct obd_device *obd, struct inode *inode,
-                       struct lov_mds_md *lmm, int lmmsize)
+                       struct lov_mds_md *lmm, int lmm_size)
  {
-        int i, rc, err, size;
+        struct lov_stripe_md *lsm = NULL;
          struct mea_old *old;
          struct mea *mea;
-        struct mea *new;
          void *handle;
+        int rc, err;
          ENTRY;
  
-        mea = (struct mea *) lmm;
+        mea = (struct mea *)lmm;
+        old = (struct mea_old *)lmm;
+
          if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
-                mea->mea_magic == MEA_MAGIC_ALL_CHARS)
+            mea->mea_magic == MEA_MAGIC_ALL_CHARS)
                  RETURN(0);
  
-        old = (struct mea_old *) lmm;
-        
-        rc = sizeof(struct lustre_id) * old->mea_count + 
-                sizeof(struct mea_old);
-        
-        if (old->mea_count > 256 || old->mea_master > 256 || lmmsize < rc
-                        || old->mea_master > old->mea_count) {
-                CWARN("unknown MEA format, dont convert it\n");
-                CWARN("  count %u, master %u, size %u\n",
-                      old->mea_count, old->mea_master, rc);
-                RETURN(0);
-        }
-                
-        CWARN("converting MEA EA on %lu/%u from V0 to V1 (%u/%u)\n",
-              inode->i_ino, inode->i_generation, old->mea_count, 
-              old->mea_master);
+        /*
+         * making MDS try LOV EA converting in the non-LMV configuration
+         * cases.
+         */
+        if (!obd->u.mds.mds_md_exp)
+                RETURN(-EINVAL);
  
-        size = sizeof(struct lustre_id) * old->mea_count + 
-                sizeof(struct mea);
-        
-        OBD_ALLOC(new, size);
-        if (new == NULL)
-                RETURN(-ENOMEM);
+        CDEBUG(D_INODE, "converting MEA EA on %lu/%u from V0 to V1 (%u/%u)\n",
+               inode->i_ino, inode->i_generation, old->mea_count, 
+               old->mea_master);
  
-        new->mea_magic = MEA_MAGIC_LAST_CHAR;
-        new->mea_count = old->mea_count;
-        new->mea_master = old->mea_master;
-        for (i = 0; i < new->mea_count; i++)
-                new->mea_ids[i] = old->mea_ids[i];
+        rc = obd_unpackmd(obd->u.mds.mds_md_exp, &lsm, lmm, lmm_size);
+        if (rc < 0)
+                GOTO(conv_end, rc);
+
+        rc = obd_packmd(obd->u.mds.mds_md_exp, &lmm, lsm);
+        if (rc < 0)
+                GOTO(conv_free, rc);
+        lmm_size = rc;
  
          handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
          if (IS_ERR(handle)) {
@@ -1252,17 +1244,14 @@ int mds_convert_mea_ea(struct obd_device *obd, struct inode *inode,
                  GOTO(conv_free, rc);
          }
  
-        rc = fsfilt_set_md(obd, inode, handle, (struct lov_mds_md *) new, size);
-        if (rc > lmmsize)
-                size = lmmsize;
-        memcpy(lmm, new, size);
-
+        rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_MEA);
          err = fsfilt_commit(obd, obd->u.mds.mds_sb, inode, handle, 0);
          if (!rc)
-                rc = err ? err : size;
-        EXIT;
+                rc = err ? err : lmm_size;
+        GOTO(conv_free, rc);
  conv_free:
-        OBD_FREE(new, size);
+        obd_free_memmd(obd->u.mds.mds_md_exp, &lsm);
+conv_end:
          return rc;
  }
  
diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c

index bd3ed48..13d262e 100644 (file)
--- a/lustre/mds/mds_lov.c
+++ b/lustre/mds/mds_lov.c
@@ -363,6 +363,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
          int rc = 0;
          ENTRY;
  
+        CDEBUG(D_INFO, "ioctl cmd %x\n", cmd);
          switch (cmd) {
          case OBD_IOC_RECORD: {
                  char *name = data->ioc_inlbuf1;
@@ -559,6 +560,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                  RETURN(0);
          }
          default:
+                CDEBUG(D_INFO, "unknown command %x\n", cmd);
                  RETURN(-EINVAL);
          }
          RETURN(0);
@@ -665,9 +667,10 @@ int mds_dt_synchronize(void *data)
          }
          rc = 0;
  
+        EXIT;
  cleanup:
          up(&mds->mds_orphan_recovery_sem);
-        RETURN(rc);
+        return rc;
  }
  
  int mds_dt_start_synchronize(struct obd_device *obd,
@@ -847,8 +850,7 @@ int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
                  GOTO(conv_free, rc);
          }
  
-        rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size);
-
+        rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_LOV);
          err = fsfilt_commit(obd, obd->u.mds.mds_sb, inode, handle, 0);
          if (!rc)
                  rc = err ? err : lmm_size;
@@ -933,7 +935,7 @@ int mds_revalidate_lov_ea(struct obd_device *obd, struct inode *inode,
                  GOTO(out_oa, rc);
          }
  
-        rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size);
+        rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_LOV);
          err = fsfilt_commit(obd, inode->i_sb, inode, handle, 0);
          if (!rc)
                  rc = err;
diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c

index 8b66569..6948dd4 100644 (file)
--- a/lustre/mds/mds_open.c
+++ b/lustre/mds/mds_open.c
@@ -366,7 +366,8 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
                  LASSERT(lmm_buf);
                  LASSERT(lmm_bufsize >= lmm_size);
                  memcpy(lmm_buf, lmm, lmm_size);
-                rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size);
+                rc = fsfilt_set_md(obd, inode, *handle, lmm,
+                                   lmm_size, EA_LOV);
                  if (rc)
                          CERROR("open replay failed to set md:%d\n", rc);
                  RETURN(0);
@@ -400,9 +401,20 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
                          if (rc)
                                  GOTO(out_oa, rc);
                  } else {
-                        /* Per-directory striping default code removed, because
-                         * it uses the same unnamed EA storage as the directory
-                         * striping for CMD. -p */
+                        OBD_ALLOC(lmm, mds->mds_max_mdsize);
+                        if (lmm == NULL)
+                                GOTO(out_oa, rc = -ENOMEM);
+
+                        lmm_size = mds->mds_max_mdsize;
+                        rc = mds_get_md(obd, dchild->d_parent->d_inode,
+                                        lmm, &lmm_size, 1, 0);
+                        if (rc > 0)
+                                rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE,
+                                                   mds->mds_dt_exp,
+                                                   0, &lsm, lmm);
+                        OBD_FREE(lmm, mds->mds_max_mdsize);
+                        if (rc)
+                                GOTO(out_oa, rc);
                  } 
                  LASSERT(oa->o_gr >= FILTER_GROUP_FIRST_MDS);
                  rc = obd_create(mds->mds_dt_exp, oa, &lsm, &oti);
@@ -465,7 +477,9 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
                  GOTO(out_ids, rc);
          }
  
-        rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size);
+        rc = fsfilt_set_md(obd, inode, *handle, lmm,
+                           lmm_size, EA_LOV);
+        
          lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, 0);
          lmm_bufsize = req->rq_repmsg->buflens[offset];
          LASSERT(lmm_buf);
@@ -481,6 +495,8 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
                  OBD_FREE(*ids, mds->mds_dt_desc.ld_tgt_count * sizeof(**ids));
                  *ids = NULL;
          }
+        if(lsm)
+                obd_free_memmd(mds->mds_dt_exp, &lsm);
          RETURN(rc);
  }
  
@@ -538,7 +554,7 @@ static void reconstruct_open(struct mds_update_record *rec, int offset,
          mds_pack_inode2body(obd, body, dchild->d_inode, 1);
          if (S_ISREG(dchild->d_inode->i_mode)) {
                  rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
-                                 dchild->d_inode, 1);
+                                 dchild->d_inode, 1, 0);
  
                  if (rc)
                          LASSERT(rc == req->rq_status);
@@ -622,7 +638,7 @@ static int accmode(int flags)
  /* Handles object creation, actual opening, and I/O epoch */
  static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
                             struct mds_body *body, int flags, void **handle,
-                           struct mds_update_record *rec,struct ldlm_reply *rep)
+                           struct mds_update_record *rec, struct ldlm_reply *rep)
  {
          struct mds_obd *mds = mds_req2mds(req);
          struct obd_device *obd = req->rq_export->exp_obd;
@@ -638,7 +654,7 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
          if ((S_ISREG(mode) && !(body->valid & OBD_MD_FLEASIZE)) || 
              (S_ISDIR(mode) && !(body->valid & OBD_MD_FLDIREA))) {
                  rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
-                                 dchild->d_inode, 0);
+                                 dchild->d_inode, 0, 0);
                  if (rc) {
                          up(&dchild->d_inode->i_sem);
                          RETURN(rc);
@@ -667,7 +683,7 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
                                                     req->rq_repmsg, 2);
                          if (!rc)
                                  rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
-                                                 dchild->d_inode, 0);
+                                                 dchild->d_inode, 0, 0);
                          if (rc) {
                                  up(&dchild->d_inode->i_sem);
                                  RETURN(rc);
@@ -720,9 +736,7 @@ static int mds_open_by_id(struct ptlrpc_request *req,
          ENTRY;
  
          down(&pending_dir->i_sem);
-        
          idlen = ll_id2str(idname, id_ino(id), id_gen(id));
-        
          dchild = lookup_one_len(idname, mds->mds_pending_dir,
                                  idlen);
          if (IS_ERR(dchild)) {
@@ -734,7 +748,6 @@ static int mds_open_by_id(struct ptlrpc_request *req,
          }
  
          if (dchild->d_inode != NULL) {
-                up(&pending_dir->i_sem);
                  mds_inode_set_orphan(dchild->d_inode);
                  mds_pack_inode2body(req2obd(req), body,
                                      dchild->d_inode, 1);
@@ -745,8 +758,7 @@ static int mds_open_by_id(struct ptlrpc_request *req,
                         idname);
                  goto open;
          }
-        dput(dchild);
-        up(&pending_dir->i_sem);
+        l_dput(dchild);
  
          /*
           * we didn't find it in PENDING so it isn't an orphan.  See if it was a
@@ -767,7 +779,6 @@ static int mds_open_by_id(struct ptlrpc_request *req,
          rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
                                  req, rc, rep ? rep->lock_policy_res1 : 0);
          /* XXX what do we do here if mds_finish_transno itself failed? */
-
          l_dput(dchild);
          RETURN(rc);
  }
@@ -808,6 +819,7 @@ int mds_lock_new_child(struct obd_device *obd, struct inode *inode,
          struct lustre_handle lockh;
          int lock_flags = 0;
          int rc;
+        ENTRY;
  
          if (child_lockh == NULL)
                  child_lockh = &lockh;
@@ -856,12 +868,21 @@ int mds_open(struct mds_update_record *rec, int offset,
          struct dentry_params dp;
          struct mea *mea = NULL;
          int mea_size, update_mode;
+        int child_mode = LCK_PR;
+        /* Always returning LOOKUP lock if open succesful to guard
+           dentry on client. */
+        ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_LOOKUP}};
+        struct ldlm_res_id child_res_id = { .name = {0}};
+        int lock_flags = 0;
          ENTRY;
  
          DEBUG_REQ(D_INODE, req, "parent "DLID4" name %*s mode %o",
                    OLID4(rec->ur_id1), rec->ur_namelen - 1, rec->ur_name,
                    rec->ur_mode);
  
+        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE,
+                         (obd_timeout + 1) / 4);
+
          if (offset == 3) { /* intent */
                  rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
                  body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
@@ -909,7 +930,7 @@ int mds_open(struct mds_update_record *rec, int offset,
          acc_mode = accmode(rec->ur_flags);
  
          /* Step 1: Find and lock the parent */
-        if (rec->ur_flags & O_CREAT) {
+        if (rec->ur_flags & MDS_OPEN_CREAT) {
                  /* XXX Well, in fact we only need this lock mode change if
                     in addition to O_CREAT, the file does not exist.
                     But we do not know if it exists or not yet */
@@ -1129,7 +1150,8 @@ got_child:
                  
                  if (!(rec->ur_flags & O_EXCL)) { /* bug 3313 */
                          rc = fsfilt_commit(obd, dchild->d_inode->i_sb,
-                                           dchild->d_inode, handle, 0);
+                                           dchild->d_inode, handle, 
+                                           req->rq_export->exp_sync);
                          handle = NULL;
                  }
  
@@ -1138,7 +1160,7 @@ got_child:
          mds_pack_inode2body(obd, body, dchild->d_inode, 1);
         
          LASSERTF(!mds_inode_is_orphan(dchild->d_inode),
-                 "dchild %*s (%p) inode %p\n", dchild->d_name.len,
+                 "dchild %.*s (%p) inode %p\n", dchild->d_name.len,
                   dchild->d_name.name, dchild, dchild->d_inode);
  
          if (S_ISREG(dchild->d_inode->i_mode)) {
@@ -1166,14 +1188,6 @@ got_child:
                  GOTO(cleanup, rc = -EEXIST); // returns a lock to the client
          }
  
-        /* if we are following a symlink, don't open */
-        if (S_ISLNK(dchild->d_inode->i_mode))
-                GOTO(cleanup, rc = 0);
-
-        if ((rec->ur_flags & MDS_OPEN_DIRECTORY) &&
-            !S_ISDIR(dchild->d_inode->i_mode))
-                GOTO(cleanup, rc = -ENOTDIR);
-
          if (S_ISDIR(dchild->d_inode->i_mode)) {
                  if (rec->ur_flags & MDS_OPEN_CREAT ||
                      rec->ur_flags & FMODE_WRITE) {
@@ -1191,21 +1205,81 @@ got_child:
                  }
          }
  
+        /* if we are following a symlink, don't open */
+        if (S_ISLNK(dchild->d_inode->i_mode))
+                GOTO(cleanup_no_trans, rc = 0);
+
+        if ((rec->ur_flags & MDS_OPEN_DIRECTORY) &&
+            !S_ISDIR(dchild->d_inode->i_mode))
+                GOTO(cleanup, rc = -ENOTDIR);
+               
+       if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_CREATE)) {
+               obd_fail_loc = OBD_FAIL_LDLM_REPLY | OBD_FAIL_ONCE;
+               GOTO(cleanup, rc = -EAGAIN);
+       }
+ 
+        /* Obtain OPEN lock as well */
+        policy.l_inodebits.bits |= MDS_INODELOCK_OPEN;
+
+        /* We cannot use acc_mode here, because it is zeroed in case of
+           creating a file, so we get wrong lockmode */
+        if (accmode(rec->ur_flags) & MAY_WRITE)
+               child_mode = LCK_CW;
+        else if (accmode(rec->ur_flags) & MAY_EXEC)
+                child_mode = LCK_PR;
+        else
+                child_mode = LCK_CR;
+
+        if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) {
+                struct lustre_id sid;
+               
+               down(&dchild->d_inode->i_sem);
+                rc = mds_read_inode_sid(obd, dchild->d_inode, &sid);
+                up(&dchild->d_inode->i_sem);
+                if (rc) {
+                        CERROR("Can't read inode self id, "
+                               "inode %lu, rc %d\n",
+                               dchild->d_inode->i_ino, rc);
+                        GOTO(cleanup, rc);
+                }
+                
+               /* In case of replay we do not get a lock assuming that the
+                   caller has it already */
+                child_res_id.name[0] = id_fid(&sid);
+                child_res_id.name[1] = id_group(&sid);
+
+                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace,
+                                      child_res_id, LDLM_IBITS, &policy,
+                                      child_mode, &lock_flags,
+                                      mds_blocking_ast, ldlm_completion_ast,
+                                      NULL, NULL, NULL, 0, NULL, child_lockh);
+                if (rc != ELDLM_OK)
+                        GOTO(cleanup, rc);
+
+                cleanup_phase = 3;
+        }
+
          /* Step 5: mds_open it */
          rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle,
                               rec, rep);
          GOTO(cleanup, rc);
  
- cleanup:
+cleanup:
          rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
                                  req, rc, rep ? rep->lock_policy_res1 : 0);
  
+cleanup_no_trans:
          switch (cleanup_phase) {
+        case 3:
+                if (rc) {
+                        ldlm_lock_decref(child_lockh, child_mode);
+                        child_lockh->cookie = 0;
+                }
          case 2:
                  if (rc && created) {
                          int err = vfs_unlink(dparent->d_inode, dchild);
                          if (err) {
-                                CERROR("unlink(%*s) in error path: %d\n",
+                                CERROR("unlink(%.*s) in error path: %d\n",
                                         dchild->d_name.len, dchild->d_name.name,
                                         err);
                          }
@@ -1229,6 +1303,18 @@ got_child:
          }
          if (mea)
                  OBD_FREE(mea, mea_size);
+        if (rc == 0)
+                atomic_inc(&mds->mds_open_count);
+
+        /*
+         * If we have not taken the "open" lock, we may not return 0 here,
+         * because caller expects 0 to mean "lock is taken", and it needs
+         * nonzero return here for caller to return EDLM_LOCK_ABORTED to
+         * client. Later caller should rewrite the return value back to zero
+         * if it to be used any further.
+         */
+        if ((cleanup_phase != 3) && !rc)
+                rc = ENOLCK;
          RETURN(rc);
  }
  
@@ -1313,6 +1399,14 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset,
                  LASSERT(pending_child->d_inode != NULL);
  
                  cleanup_phase = 2; /* dput(pending_child) when finished */
+                if (S_ISDIR(pending_child->d_inode->i_mode)) {
+                        rc = vfs_rmdir(pending_dir, pending_child);
+                        if (rc)
+                                CERROR("error unlinking orphan dir %s: rc %d\n",
+                                       idname, rc);
+                        goto out;
+                }
+
                  if (req != NULL && req->rq_repmsg != NULL) {
                          lmm = lustre_msg_buf(req->rq_repmsg, 1, 0);
                          stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
@@ -1329,10 +1423,7 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset,
                  pending_child->d_fsdata = (void *) &dp;
                  dp.p_inum = 0;
                  dp.p_ptr = req;
-                if (S_ISDIR(pending_child->d_inode->i_mode))
-                        rc = vfs_rmdir(pending_dir, pending_child);
-                else
-                        rc = vfs_unlink(pending_dir, pending_child);
+                rc = vfs_unlink(pending_dir, pending_child);
                  if (rc)
                          CERROR("error unlinking orphan %s: rc %d\n",
                                 idname, rc);
@@ -1410,10 +1501,17 @@ out:
          mds_mfd_destroy(mfd);
  
   cleanup:
+        atomic_dec(&mds->mds_open_count);
          if (req != NULL && reply_body != NULL) {
                  rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0);
          } else if (handle) {
-                int err = fsfilt_commit(obd, mds->mds_sb, pending_dir, handle, 0);
+                int err, force_sync = 0;
+
+                if (req && req->rq_export)
+                        force_sync = req->rq_export->exp_sync;
+
+                err = fsfilt_commit(obd, mds->mds_sb, pending_dir, handle, 
+                                    force_sync);
                  if (err) {
                          CERROR("error committing close: %d\n", err);
                          if (!rc)
@@ -1461,7 +1559,6 @@ int mds_close(struct ptlrpc_request *req, int offset)
                         req->rq_repmsg->buflens[2]);
          }
  
-
          body = lustre_swab_reqbuf(req, offset, sizeof(*body),
                                    lustre_swab_mds_body);
          if (body == NULL) {
@@ -1496,7 +1593,7 @@ int mds_close(struct ptlrpc_request *req, int offset)
                                      (body->valid & OBD_MD_FID) ? 1 : 0);
                  
                  mds_pack_md(obd, req->rq_repmsg, 1, rep_body, 
-                           inode, MDS_PACK_MD_LOCK);
+                           inode, MDS_PACK_MD_LOCK, 0);
          }
          spin_lock(&med->med_open_lock);
          list_del(&mfd->mfd_list);
diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c

index a4e7a9b..f9637b7 100644 (file)
--- a/lustre/mds/mds_reint.c
+++ b/lustre/mds/mds_reint.c
@@ -140,10 +140,17 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
                  spin_unlock(&mds->mds_transno_lock);
          }
          req->rq_repmsg->transno = req->rq_transno = transno;
-        mcd->mcd_last_transno = cpu_to_le64(transno);
-        mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
-        mcd->mcd_last_result = cpu_to_le32(rc);
-        mcd->mcd_last_data = cpu_to_le32(op_data);
+        if (req->rq_reqmsg->opc == MDS_CLOSE) {
+                mcd->mcd_last_close_transno = cpu_to_le64(transno);
+                mcd->mcd_last_close_xid = cpu_to_le64(req->rq_xid);
+                mcd->mcd_last_close_result = cpu_to_le32(rc);
+                mcd->mcd_last_close_data = cpu_to_le32(op_data);
+        } else {
+                mcd->mcd_last_transno = cpu_to_le64(transno);
+                mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
+                mcd->mcd_last_result = cpu_to_le32(rc);
+                mcd->mcd_last_data = cpu_to_le32(op_data);
+        }
  
          fsfilt_add_journal_cb(obd, mds->mds_sb, transno, handle,
                                mds_commit_last_transno_cb, NULL);
@@ -178,7 +185,8 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
  
          EXIT;
  out_commit:
-        err = fsfilt_commit(obd, mds->mds_sb, inode, handle, 0);
+        err = fsfilt_commit(obd, mds->mds_sb, inode, handle, 
+                            req->rq_export->exp_sync);
          if (err) {
                  CERROR("error committing transaction: %d\n", err);
                  if (!rc)
@@ -341,10 +349,17 @@ void mds_steal_ack_locks(struct ptlrpc_request *req)
  
  void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd)
  {
-        DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d",
-                  mcd->mcd_last_transno, mcd->mcd_last_result);
-        req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_transno;
-        req->rq_repmsg->status = req->rq_status = mcd->mcd_last_result;
+        if (req->rq_reqmsg->opc == MDS_CLOSE) {
+                DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d",
+                          mcd->mcd_last_close_transno, mcd->mcd_last_close_result);
+                req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_close_transno;
+                req->rq_repmsg->status = req->rq_status = mcd->mcd_last_close_result;
+        } else {
+                DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d",
+                          mcd->mcd_last_transno, mcd->mcd_last_result);
+                req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_transno;
+                req->rq_repmsg->status = req->rq_status = mcd->mcd_last_result;
+        }
  
          mds_steal_ack_locks(req);
  }
@@ -466,24 +481,33 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
                                  rc = inode->i_op->removexattr(de,
                                                      rec->ur_eadata);
                  } else if ((S_ISREG(inode->i_mode) ||
-                           S_ISDIR(inode->i_mode)) && rec->ur_eadata != NULL) {
-                         struct lov_stripe_md *lsm = NULL;
-
+                            S_ISDIR(inode->i_mode)) && rec->ur_eadata != NULL) {
+                        struct lov_stripe_md *lsm = NULL;
+                        struct lov_user_md *lum = NULL;
+                        
                          rc = ll_permission(inode, MAY_WRITE, NULL);
                          if (rc < 0)
                                  GOTO(cleanup, rc);
  
-                        rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, mds->mds_dt_exp,
-                                           0, &lsm, rec->ur_eadata);
-                        if (rc)
-                                GOTO(cleanup, rc);
-
-                        obd_free_memmd(mds->mds_dt_exp, &lsm);
-
-                        rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata,
-                                           rec->ur_eadatalen);
-                        if (rc)
-                                GOTO(cleanup, rc);
+                        lum = rec->ur_eadata;
+                        /* if lmm_stripe_size is -1 delete default stripe from dir */
+                        if (S_ISDIR(inode->i_mode) &&
+                            lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1)){
+                                rc = fsfilt_set_md(obd, inode, handle, NULL, 0, EA_LOV);
+                                if (rc)
+                                        GOTO(cleanup, rc);
+                        } else {
+                                rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, mds->mds_dt_exp,
+                                                   0, &lsm, rec->ur_eadata);
+                                if (rc)
+                                        GOTO(cleanup, rc);
+                                
+                                obd_free_memmd(mds->mds_dt_exp, &lsm);
+                                rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata,
+                                                   rec->ur_eadatalen, EA_LOV);
+                                if (rc)
+                                        GOTO(cleanup, rc);
+                        }
                  }    
          }
  
@@ -702,7 +726,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
          dp.p_ptr = req;
  
          switch (type) {
-        case S_IFREG:{
+        case S_IFREG: {
                  handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL);
                  if (IS_ERR(handle))
                          GOTO(cleanup, rc = PTR_ERR(handle));
@@ -710,7 +734,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                  EXIT;
                  break;
          }
-        case S_IFDIR:{
+        case S_IFDIR: {
                  int i, nstripes = 0;
                  
                  /*
@@ -824,11 +848,6 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                                  oa->o_fid = id_fid(rec->ur_id2);
                                  oa->o_generation = id_gen(rec->ur_id2);
                                  oa->o_flags |= OBD_FL_RECREATE_OBJS;
-
-                                /* 
-                                 * fid should be defined here. It should be
-                                 * passedfrom client.
-                                 */
                                  LASSERT(oa->o_fid != 0);
                          }
  
@@ -941,11 +960,6 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                          inode->i_generation = id_gen(rec->ur_id2);
  
                          if (type != S_IFDIR) {
-                                /* 
-                                 * updating inode self id, as inode already
-                                 * exists and we should make sure, its sid will
-                                 * be the same as we reveived.
-                                 */
                                  down(&inode->i_sem);
                                  rc = mds_update_inode_sid(obd, inode,
                                                            handle, rec->ur_id2);
@@ -1017,6 +1031,25 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                  else
                          MD_COUNTER_INCREMENT(obd, create);
  
+                /* take care of default stripe inheritance */
+                if (type == S_IFDIR) {
+                        struct lov_mds_md lmm;
+                        int lmm_size = sizeof(lmm);
+
+                        rc = mds_get_md(obd, dir, &lmm, &lmm_size, 1, 0);
+                        if (rc > 0) {
+                                down(&inode->i_sem);
+                                rc = fsfilt_set_md(obd, inode, handle, 
+                                                   &lmm, lmm_size, EA_LOV);
+                                up(&inode->i_sem);
+                        }
+                        if (rc) {
+                                CERROR("error on copy stripe info: rc = %d\n", 
+                                       rc);
+                                rc = 0;
+                        }
+                }
+                
                  body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
                  mds_pack_inode2body(obd, body, inode, 1);
          }
@@ -2058,7 +2091,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                  } else if (S_ISREG(child_inode->i_mode)) {
                          mds_pack_inode2body(obd, body, child_inode, 0);
                          mds_pack_md(obd, req->rq_repmsg, offset + 1,
-                                    body, child_inode, MDS_PACK_MD_LOCK);
+                                    body, child_inode, MDS_PACK_MD_LOCK, 0);
                  }
          }
  
@@ -2247,7 +2280,7 @@ static int mds_reint_link_acquire(struct mds_update_record *rec,
          EXIT;
  cleanup:
          rc = mds_finish_transno(mds, de_src ? de_src->d_inode : NULL,
-                                        handle, req, rc, 0);
+                                handle, req, rc, 0);
          switch (cleanup_phase) {
                  case 2:
                          if (rc)
@@ -2306,12 +2339,13 @@ static int mds_reint_link_to_remote(struct mds_update_record *rec,
          op_data->id1 = *(rec->ur_id1);
          rc = md_link(mds->mds_md_exp, op_data, &request);
          OBD_FREE(op_data, sizeof(*op_data));
+
+        if (request)
+                ptlrpc_req_finished(request);
          if (rc)
                  GOTO(cleanup, rc);
  
          cleanup_phase = 2;
-        if (request)
-                ptlrpc_req_finished(request);
  
          OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, de_tgt_dir->d_inode->i_sb);
  
@@ -2588,13 +2622,12 @@ static int mds_get_parents_children_locked(struct obd_device *obd,
          struct ldlm_res_id c1_res_id = { .name = {0} };
          struct ldlm_res_id c2_res_id = { .name = {0} };
          ldlm_policy_data_t p_policy = {.l_inodebits = {MDS_INODELOCK_UPDATE}};
-        /* Only dentry should change, but the inode itself would be
-           intact otherwise */
+        /* Only dentry should disappear, but the inode itself would be
+           intact otherwise. */
          ldlm_policy_data_t c1_policy = {.l_inodebits = {MDS_INODELOCK_LOOKUP}};
          /* If something is going to be replaced, both dentry and inode locks are
             needed */
-        ldlm_policy_data_t c2_policy = {.l_inodebits = {MDS_INODELOCK_LOOKUP|
-                                                        MDS_INODELOCK_UPDATE}};
+        ldlm_policy_data_t c2_policy = {.l_inodebits = {MDS_INODELOCK_FULL}};
          struct ldlm_res_id *maxres_src, *maxres_tgt;
          struct inode *inode;
          int rc = 0, cleanup_phase = 0;
@@ -2662,7 +2695,7 @@ static int mds_get_parents_children_locked(struct obd_device *obd,
                                       old_len - 1);
          if (IS_ERR(*de_oldp)) {
                  rc = PTR_ERR(*de_oldp);
-                CERROR("old child lookup error (%*s): %d\n",
+                CERROR("old child lookup error (%.*s): %d\n",
                         old_len - 1, old_name, rc);
                  GOTO(cleanup, rc);
          }
@@ -2706,7 +2739,7 @@ static int mds_get_parents_children_locked(struct obd_device *obd,
                                       new_len - 1);
          if (IS_ERR(*de_newp)) {
                  rc = PTR_ERR(*de_newp);
-                CERROR("new child lookup error (%*s): %d\n",
+                CERROR("new child lookup error (%.*s): %d\n",
                         old_len - 1, old_name, rc);
                  GOTO(cleanup, rc);
          }
@@ -3094,7 +3127,11 @@ static int mds_reint_rename_to_remote(struct mds_update_record *rec, int offset,
                                           &update_mode, rec->ur_name, 
                                           rec->ur_namelen, &child_lockh, &de_old,
                                           LCK_EX, MDS_INODELOCK_LOOKUP);
-        LASSERT(rc == 0);
+        if (rc) {
+                OBD_FREE(op_data, sizeof(*op_data));
+                RETURN(rc);
+        }
+
          LASSERT(de_srcdir);
          LASSERT(de_srcdir->d_inode);
          LASSERT(de_old);
@@ -3274,7 +3311,11 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
  
          }
          
-        /* check if inodes point to each other. */
+        /*
+         * check if inodes point to each other. This should be checked before
+         * is_subdir() check, as for the same entries it will think that they
+         * are subdirs.
+         */
          if (!(de_old->d_flags & DCACHE_CROSS_REF) &&
              !(de_new->d_flags & DCACHE_CROSS_REF) &&
              old_inode == new_inode)
@@ -3312,7 +3353,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset,
                  } else if (S_ISREG(new_inode->i_mode)) {
                          mds_pack_inode2body(obd, body, new_inode, 0);
                          mds_pack_md(obd, req->rq_repmsg, 1, body, 
-                                    new_inode, MDS_PACK_MD_LOCK);
+                                    new_inode, MDS_PACK_MD_LOCK, 0);
                   }
          }
  
diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c

index 9b023f9..127adc8 100644 (file)
--- a/lustre/mds/mds_unlink_open.c
+++ b/lustre/mds/mds_unlink_open.c
@@ -94,32 +94,31 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild,
          struct mds_obd *mds = &obd->u.mds;
          struct lov_mds_md *lmm = NULL;
          struct llog_cookie *logcookies = NULL;
-        int lmm_size = 0, log_unlink = 0;
+        int lmm_size, log_unlink = 0;
          void *handle = NULL;
          int rc, err;
          ENTRY;
  
          LASSERT(mds->mds_dt_obd != NULL);
  
-        OBD_ALLOC(lmm, mds->mds_max_mdsize);
+        /* We don't need to do any of these other things for orhpan dirs,
+         * especially not mds_get_md (may get a default LOV EA, bug 4554) */
+        if (S_ISDIR(inode->i_mode)) {
+                rc = vfs_rmdir(pending_dir, dchild);
+                if (rc)
+                        CERROR("error %d unlinking dir %*s from PENDING\n",
+                               rc, dchild->d_name.len, dchild->d_name.name);
+                RETURN(rc);
+        }
+
+        lmm_size = mds->mds_max_mdsize;
+        OBD_ALLOC(lmm, lmm_size);
          if (lmm == NULL)
                  RETURN(-ENOMEM);
  
-        down(&inode->i_sem);
-        rc = fsfilt_get_md(obd, inode, lmm, mds->mds_max_mdsize);
-        up(&inode->i_sem);
-
-        if (rc < 0) {
-                CERROR("Error %d reading eadata for ino %lu\n",
-                       rc, inode->i_ino);
+        rc = mds_get_md(obd, inode, lmm, &lmm_size, 1, 0);
+        if (rc < 0)
                  GOTO(out_free_lmm, rc);
-        } else if (rc > 0) {
-                lmm_size = rc;
-                rc = mds_convert_lov_ea(obd, inode, lmm, lmm_size);
-                if (rc > 0)
-                        lmm_size = rc;
-                rc = 0;
-        }
  
          handle = fsfilt_start_log(obd, pending_dir, FSFILT_OP_UNLINK, NULL,
                                    le32_to_cpu(lmm->lmm_stripe_count));
@@ -130,16 +129,11 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild,
                  GOTO(out_free_lmm, rc);
          }
  
-        if (S_ISDIR(inode->i_mode))
-                rc = vfs_rmdir(pending_dir, dchild);
-        else
-                rc = vfs_unlink(pending_dir, dchild);
-
-        if (rc)
-                CERROR("error %d unlinking orphan %*s from PENDING directory\n",
+        rc = vfs_unlink(pending_dir, dchild);
+        if (rc) {
+                CERROR("error %d unlinking orphan %.*s from PENDING\n",
                         rc, dchild->d_name.len, dchild->d_name.name);
-
-        if (!rc && lmm_size) {
+        } else if (lmm_size) {
                  OBD_ALLOC(logcookies, mds->mds_max_cookiesize);
                  if (logcookies == NULL)
                          rc = -ENOMEM;
@@ -152,8 +146,7 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild,
                  CERROR("error committing orphan unlink: %d\n", err);
                  if (!rc)
                          rc = err;
-        }
-        if (!rc) {
+        } else if (!rc) {
                  rc = mds_osc_destroy_orphan(mds, inode, lmm, lmm_size,
                                              logcookies, log_unlink);
          }
@@ -176,6 +169,7 @@ int mds_cleanup_orphans(struct obd_device *obd)
          struct l_linux_dirent *dirent, *n;
          struct list_head dentry_list;
          char d_name[LL_ID_NAMELEN];
+       unsigned long inum;
          __u64 i = 0;
          int rc = 0, item = 0, namlen;
          ENTRY;
@@ -200,21 +194,21 @@ int mds_cleanup_orphans(struct obd_device *obd)
                  GOTO(err_out, rc);
  
          list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
-                i ++;
+                i++;
                  list_del(&dirent->lld_list);
  
                  namlen = strlen(dirent->lld_name);
                  LASSERT(sizeof(d_name) >= namlen + 1);
                  strcpy(d_name, dirent->lld_name);
+                inum = dirent->lld_ino;
                  OBD_FREE(dirent, sizeof(*dirent));
  
                  CDEBUG(D_INODE, "entry "LPU64" of PENDING DIR: %s\n",
                         i, d_name);
  
                  if (((namlen == 1) && !strcmp(d_name, ".")) ||
-                    ((namlen == 2) && !strcmp(d_name, ".."))) {
+                    ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0)
                          continue;
-                }
  
                  down(&pending_dir->i_sem);
                  dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen);
@@ -227,6 +221,13 @@ int mds_cleanup_orphans(struct obd_device *obd)
                          GOTO(next, rc = 0);
                  }
  
+                if (is_bad_inode(dchild->d_inode)) {
+                        CERROR("bad orphan inode found %lu/%u\n",
+                               dchild->d_inode->i_ino,
+                               dchild->d_inode->i_generation);
+                        GOTO(next, rc = -ENOENT);
+                }
+
                  child_inode = dchild->d_inode;
                  DOWN_READ_I_ALLOC_SEM(child_inode);
                  if (mds_inode_is_orphan(child_inode) &&
diff --git a/lustre/mgmt/mgmt_svc.c b/lustre/mgmt/mgmt_svc.c

index f0ff838..9debe47 100644 (file)
--- a/lustre/mgmt/mgmt_svc.c
+++ b/lustre/mgmt/mgmt_svc.c
@@ -106,10 +106,9 @@ static int mgmt_setup(struct obd_device *obd, obd_count len, void *buf)
                  RETURN(-EALREADY);
          
          mgmt_service = 
-                ptlrpc_init_svc(MGMT_NBUFS, MGMT_BUFSIZE, MGMT_MAXREQSIZE, 
-                                MGMT_REQUEST_PORTAL, MGMT_REPLY_PORTAL, 
-                                mgmt_handler, "mgmt",
-                                obd->obd_proc_entry);
+                ptlrpc_init_svc(MGMT_NBUFS, MGMT_BUFSIZE, MGMT_MAXREQSIZE,
+                                MGMT_REQUEST_PORTAL, MGMT_REPLY_PORTAL, 30000,
+                                mgmt_handler, "mgmt", obd->obd_proc_entry);
          if (!mgmt_service) {
                  CERROR("Failed to start mgmt service\n");
                  RETURN(-ENOMEM);
diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in

index 459c790..66f6eb2 100644 (file)
--- a/lustre/obdclass/Makefile.in
+++ b/lustre/obdclass/Makefile.in
@@ -1,4 +1,4 @@
-MODULES := obdclass llog_test
+MODULES := obdclass llog_test confobd
  
  obdclass-objs := llog_obd.o class_obd.o
  obdclass-objs += debug.o genops.o sysctl.o uuid.o llog_ioctl.o
@@ -6,6 +6,12 @@ obdclass-objs += lprocfs_status.o lustre_handles.o lustre_peer.o
  obdclass-objs += statfs_pack.o obdo.o obd_config.o mea.o
  
  ifeq ($(PATCHLEVEL),6)
+
+confobd-objs := conf_obd.o
+
+$(obj)/conf_obd.c: $(obj)/confobd.c
+       ln -sf $< $@
+
  llog_test-objs := llog-test.o
  
  $(obj)/llog-test.c: $(obj)/llog_test.c
diff --git a/lustre/obdclass/autoMakefile.am b/lustre/obdclass/autoMakefile.am

index d1d2ced..6b659d4 100644 (file)
--- a/lustre/obdclass/autoMakefile.am
+++ b/lustre/obdclass/autoMakefile.am
@@ -10,9 +10,9 @@ liblustreclass_a_CFLAGS = $(LLCFLAGS)
  endif
  
  if MODULES
-modulefs_DATA = obdclass$(KMODEXT)
+modulefs_DATA = obdclass$(KMODEXT) confobd$(KMODEXT)
  noinst_DATA = llog_test$(KMODEXT)
  endif # MODULES
  
-MOSTLYCLEANFILES = *.o *.ko *.mod.c llog-test.c
-DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c
+MOSTLYCLEANFILES = *.o *.ko *.mod.c llog-test.c 
+DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) confobd.c llog_test.c
diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c

index 90a61e1..5876810 100644 (file)
--- a/lustre/obdclass/class_obd.c
+++ b/lustre/obdclass/class_obd.c
@@ -635,7 +635,6 @@ int init_obdclass(void)
  static void cleanup_obdclass(void)
  {
          int i;
-        int leaked;
          ENTRY;
  
          misc_deregister(&obd_psdev);
@@ -659,11 +658,6 @@ static void cleanup_obdclass(void)
  
          class_handle_cleanup();
          class_exit_uuidlist();
-
-        leaked = atomic_read(&obd_memory);
-        CDEBUG(leaked ? D_ERROR : D_INFO,
-               "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
-
          EXIT;
  }
  
diff --git a/lustre/obdclass/confobd.c b/lustre/obdclass/confobd.c

new file mode 100644 (file)

index 0000000..d8517b8
--- /dev/null
+++ b/lustre/obdclass/confobd.c
@@ -0,0 +1,423 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_CONFOBD
+
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/obd_support.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_net.h>
+#include <linux/lustre_idl.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/obd_class.h>
+#include <linux/lprocfs_status.h>
+
+#define LUSTRE_CONFOBD_NAME "confobd"
+
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+
+LPROCFS_INIT_VARS(confobd, lprocfs_module_vars, lprocfs_obd_vars)
+
+static int confobd_fs_setup(struct obd_device *obd, 
+                            struct lvfs_obd_ctxt *lvfs_ctxt)
+{
+        struct conf_obd *confobd = &obd->u.conf;
+        struct lvfs_run_ctxt saved;
+        struct dentry *dentry;
+        int rc = 0;
+        ENTRY;
+
+        OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
+        obd->obd_lvfs_ctxt.pwdmnt = lvfs_ctxt->loc_mnt;
+        obd->obd_lvfs_ctxt.pwd = lvfs_ctxt->loc_mnt->mnt_root;
+        obd->obd_lvfs_ctxt.fs = get_ds();
+        /*Now we did not set cb_ops of CONFOBD FIXME later*/ 
+        
+        /*setup llog ctxt*/
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        
+        dentry = simple_mkdir(current->fs->pwd, "LOGS", 0777, 1);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create LOGS directory: rc = %d\n", rc);
+                GOTO(err_out, rc);
+        }
+        confobd->cfobd_logs_dir = dentry;
+
+        dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create OBJECTS directory: rc = %d\n", rc);
+                GOTO(err_logs, rc);
+        }
+        confobd->cfobd_objects_dir = dentry;
+
+        dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777, 1);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create PENDING directory: rc = %d\n", rc);
+                GOTO(err_logs, rc);
+        }
+        confobd->cfobd_pending_dir = dentry;
+
+err_logs:
+        if (rc) 
+               l_dput(confobd->cfobd_logs_dir);
+err_out:
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        RETURN(rc);
+}
+ 
+static int confobd_fs_cleanup(struct obd_device *obd, int flags)
+{
+        struct conf_obd *confobd = &obd->u.conf;
+        struct lvfs_run_ctxt saved;
+        int rc = 0;
+        ENTRY;
+
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        if (confobd->cfobd_logs_dir != NULL) {
+                l_dput(confobd->cfobd_logs_dir);
+                confobd->cfobd_logs_dir = NULL; 
+        }
+        if (confobd->cfobd_objects_dir != NULL) {
+                l_dput(confobd->cfobd_objects_dir);
+                confobd->cfobd_objects_dir = NULL; 
+        }
+        if (confobd->cfobd_pending_dir != NULL) {
+                l_dput(confobd->cfobd_pending_dir);
+                confobd->cfobd_pending_dir = NULL;
+        }
+
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        RETURN(rc);
+}
+
+int confobd_attach(struct obd_device *dev, obd_count len, void *data)
+{
+        struct lprocfs_static_vars lvars;
+        int rc = 0;
+       ENTRY;
+
+        lprocfs_init_vars(confobd, &lvars);
+        rc = lprocfs_obd_attach(dev, lvars.obd_vars);
+        if (rc)
+                RETURN(rc);
+
+        rc = lprocfs_alloc_md_stats(dev, 0);
+       RETURN(rc);
+}
+
+int confobd_detach(struct obd_device *dev)
+{
+       int rc;
+       ENTRY;
+       
+        lprocfs_free_md_stats(dev);
+        rc = lprocfs_obd_detach(dev);
+       RETURN(rc);
+}
+
+static int confobd_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+        struct conf_obd *confobd = &obd->u.conf;
+        struct lustre_cfg* lcfg = buf;
+        struct lvfs_obd_ctxt *lvfs_ctxt = NULL;
+        char *name = NULL;
+        char *fstype = NULL;
+        char *mountoption = NULL;
+        int rc = 0;
+        ENTRY;
+
+        if (lcfg->lcfg_inllen1 < 1 || !lcfg->lcfg_inlbuf1) {
+                CERROR("CONFOBD setup requires device name\n");
+                RETURN(-EINVAL);
+        }
+        if (lcfg->lcfg_inllen2 < 1 || !lcfg->lcfg_inlbuf2) {
+                CERROR("CONFOBD setup requires fstype\n");
+                RETURN(-EINVAL);
+        }
+
+        OBD_ALLOC(name, lcfg->lcfg_inllen1 + 1);
+        if (!name) {
+                CERROR("No Memory\n");
+                GOTO(out, rc = -ENOMEM);
+        }
+        memcpy(name, lcfg->lcfg_inlbuf1, lcfg->lcfg_inllen1);
+
+        OBD_ALLOC(fstype, lcfg->lcfg_inllen2 + 1);
+        if (!fstype) {
+                CERROR("No Memory\n");
+                GOTO(out, rc = -ENOMEM);
+        }
+        memcpy(fstype, lcfg->lcfg_inlbuf2, lcfg->lcfg_inllen2);
+
+        obd->obd_fsops = fsfilt_get_ops(fstype);
+        if (IS_ERR(obd->obd_fsops)) {
+               CERROR("No fstype %s rc=%ld\n", fstype, PTR_ERR(obd->obd_fsops));
+               GOTO(err_ops, rc = PTR_ERR(obd->obd_fsops));
+        }
+
+        if (lcfg->lcfg_inllen3 >= 1 && lcfg->lcfg_inlbuf3) {
+                OBD_ALLOC(mountoption, lcfg->lcfg_inllen3 + 1);
+                if (!mountoption) {
+                        CERROR("No Memory\n");
+                        GOTO(err_ops, rc = -ENOMEM);
+                }
+                memcpy(mountoption, lcfg->lcfg_inlbuf3, lcfg->lcfg_inllen3); 
+        }
+        rc = lvfs_mount_fs(name, fstype, mountoption, 0, &lvfs_ctxt);
+        if (rc)
+                GOTO(err_ops, rc);
+        LASSERT(lvfs_ctxt);
+
+        confobd->cfobd_lvfs_ctxt = lvfs_ctxt;
+
+        rc = confobd_fs_setup(obd, lvfs_ctxt);
+        if (rc)
+                GOTO(err_ops, rc);
+
+        rc = obd_llog_setup(obd, &obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT, 
+                           obd, 0, NULL, &llog_lvfs_ops);
+        if (rc)
+                GOTO(err_ops, rc);
+
+       EXIT;
+out:
+        if (rc && lvfs_ctxt)
+                lvfs_umount_fs(lvfs_ctxt);
+        if (name)
+                OBD_FREE(name, lcfg->lcfg_inllen1 + 1);
+        if (fstype)
+                OBD_FREE(fstype, lcfg->lcfg_inllen2 + 1);
+        if (mountoption)
+                OBD_FREE(mountoption, lcfg->lcfg_inllen3 + 1);
+
+        return rc;
+err_ops:
+        fsfilt_put_ops(obd->obd_fsops);
+        goto out;
+}
+
+static int confobd_cleanup(struct obd_device *obd, int flags)
+{
+        struct conf_obd *confobd = &obd->u.conf;
+        ENTRY;
+
+        /* stop recording any log in case lconf didn't do that for us */
+        if (confobd->cfobd_cfg_llh) {
+                struct lvfs_run_ctxt saved;
+                push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                llog_close(confobd->cfobd_cfg_llh);
+                pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        }
+
+        obd_llog_cleanup(llog_get_context(&obd->obd_llogs, 
+                                         LLOG_CONFIG_ORIG_CTXT));
+        confobd_fs_cleanup(obd, flags);
+        if (confobd->cfobd_lvfs_ctxt)
+                lvfs_umount_fs(confobd->cfobd_lvfs_ctxt);
+
+        if (!list_empty(&obd->obd_exports))
+                return (-EBUSY);
+        fsfilt_put_ops(obd->obd_fsops);
+        RETURN(0);
+}
+
+static int confobd_iocontrol(unsigned int cmd, struct obd_export *exp, 
+                            int len, void *karg, void *uarg)
+{
+        static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
+        struct obd_device *obd = exp->exp_obd;
+        struct conf_obd *confobd = &obd->u.conf;
+        struct obd_ioctl_data *data = karg;
+        struct lvfs_run_ctxt saved;
+        int rc = 0;
+        ENTRY;
+
+        CDEBUG(D_INFO, "ioctl cmd %x\n", cmd);
+        switch (cmd) {
+        case OBD_IOC_CLEAR_LOG: {
+                char *name = data->ioc_inlbuf1;
+                if (confobd->cfobd_cfg_llh)
+                        RETURN(-EBUSY);
+
+                push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                rc = llog_open(llog_get_context(&obd->obd_llogs, 
+                                               LLOG_CONFIG_ORIG_CTXT),
+                              &confobd->cfobd_cfg_llh, NULL, name,
+                              OBD_LLOG_FL_CREATE);
+                if (rc == 0) {
+                        llog_init_handle(confobd->cfobd_cfg_llh,
+                                         LLOG_F_IS_PLAIN, NULL);
+
+                        rc = llog_destroy(confobd->cfobd_cfg_llh);
+                        llog_free_handle(confobd->cfobd_cfg_llh);
+                }
+                pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+                confobd->cfobd_cfg_llh = NULL;
+                RETURN(rc);
+        }
+        case OBD_IOC_RECORD: {
+                char *name = data->ioc_inlbuf1;
+                if (confobd->cfobd_cfg_llh)
+                        RETURN(-EBUSY);
+
+                push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                rc = llog_open(llog_get_context(&obd->obd_llogs, 
+                                               LLOG_CONFIG_ORIG_CTXT),
+                              &confobd->cfobd_cfg_llh, NULL, name,
+                              OBD_LLOG_FL_CREATE);
+                if (rc == 0)
+                        llog_init_handle(confobd->cfobd_cfg_llh,
+                                         LLOG_F_IS_PLAIN, &cfg_uuid);
+                else
+                        confobd->cfobd_cfg_llh = NULL;
+                pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+                RETURN(rc);
+        }
+        case OBD_IOC_ENDRECORD: {
+                if (!confobd->cfobd_cfg_llh)
+                        RETURN(-EBADF);
+
+                push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                rc = llog_close(confobd->cfobd_cfg_llh);
+                pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+                confobd->cfobd_cfg_llh = NULL;
+                RETURN(rc);
+        }
+        case OBD_IOC_DORECORD: {
+                char *cfg_buf;
+                struct llog_rec_hdr rec;
+                if (!confobd->cfobd_cfg_llh)
+                        RETURN(-EBADF);
+
+                rec.lrh_len = llog_data_len(data->ioc_plen1);
+
+                switch(data->ioc_type) {
+                case LUSTRE_CFG_TYPE:
+                        rec.lrh_type = OBD_CFG_REC;
+                        break;
+                case PORTALS_CFG_TYPE:
+                        rec.lrh_type = PTL_CFG_REC;
+                        break;
+                default:
+                        CERROR("unknown cfg record type:%d \n", data->ioc_type);
+                        RETURN(-EINVAL);
+                }
+
+                OBD_ALLOC(cfg_buf, data->ioc_plen1);
+                if (cfg_buf == NULL) {
+                        CERROR("No Memory\n");
+                        RETURN(-ENOMEM);
+                }
+                if (copy_from_user(cfg_buf, data->ioc_pbuf1, data->ioc_plen1)) {
+                        OBD_FREE(cfg_buf, data->ioc_plen1);
+                        RETURN(-EFAULT);
+                }
+
+                push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                rc = llog_write_rec(confobd->cfobd_cfg_llh, &rec, NULL, 0,
+                                    cfg_buf, -1);
+                pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+                OBD_FREE(cfg_buf, data->ioc_plen1);
+                RETURN(rc);
+        }
+        case OBD_IOC_DUMP_LOG: {
+                struct llog_ctxt *ctxt =
+                        llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT);
+                push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
+                pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                if (rc)
+                        RETURN(rc);
+
+                RETURN(rc);
+        }
+
+        case OBD_IOC_START: {
+                struct llog_ctxt *ctxt;
+                char *conf_prof;
+                char *name = data->ioc_inlbuf1;
+                int len = strlen(name) + sizeof("-conf");
+
+                OBD_ALLOC(conf_prof, len);
+                if (!conf_prof) {
+                        CERROR("no memory\n");
+                        RETURN(-ENOMEM);
+                }
+                sprintf(conf_prof, "%s-conf", name);
+
+                ctxt = llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT);
+                rc = class_config_process_llog(ctxt, conf_prof, NULL);
+                if (rc < 0)
+                        CERROR("Unable to process log: %s\n", conf_prof);
+                OBD_FREE(conf_prof, len);
+
+                RETURN(rc);
+        }
+
+        default:
+                CDEBUG(D_INFO, "unknown command %x\n", cmd);
+                RETURN(-EINVAL);
+        }
+        RETURN(0);
+}
+
+static struct obd_ops conf_obd_ops = {
+        .o_owner         = THIS_MODULE,
+        .o_setup         = confobd_setup,
+        .o_cleanup       = confobd_cleanup,
+        .o_attach        = confobd_attach,
+        .o_detach        = confobd_detach,
+        .o_iocontrol     = confobd_iocontrol,
+};
+
+static int __init confobd_init(void)
+{
+        struct lprocfs_static_vars lvars;
+        ENTRY;
+
+        lprocfs_init_vars(confobd, &lvars);
+        RETURN(class_register_type(&conf_obd_ops, NULL, lvars.module_vars,
+                                   LUSTRE_CONFOBD_NAME));
+}
+
+static void __exit confobd_exit(void)
+{
+        class_unregister_type(LUSTRE_CONFOBD_NAME);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Config OBD driver");
+MODULE_LICENSE("GPL");
+
+module_init(confobd_init);
+module_exit(confobd_exit);
diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c

index 2bd9b91..bb7781b 100644 (file)
--- a/lustre/obdclass/genops.c
+++ b/lustre/obdclass/genops.c
@@ -212,7 +212,8 @@ void class_release_dev(struct obd_device *obd)
          int minor = obd->obd_minor;
  
          spin_lock(&obd_dev_lock);
-        memset(obd, 0, sizeof(*obd));
+        obd->obd_type = NULL;
+        //memset(obd, 0, sizeof(*obd));
          obd->obd_minor = minor;
          spin_unlock(&obd_dev_lock);
  }
@@ -788,6 +789,7 @@ static inline void oig_grab(struct obd_io_group *oig)
  {
          atomic_inc(&oig->oig_refcount);
  }
+
  void oig_release(struct obd_io_group *oig)
  {
          if (atomic_dec_and_test(&oig->oig_refcount))
diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c

index 5644386..0e5d201 100644 (file)
--- a/lustre/obdclass/llog_ioctl.c
+++ b/lustre/obdclass/llog_ioctl.c
@@ -304,9 +304,11 @@ int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data)
  
          case OBD_IOC_LLOG_PRINT: {
                  LASSERT(data->ioc_inllen1);
-                err = llog_process(handle, llog_print_cb, data, NULL);
+                err = llog_process(handle, class_config_dump_handler,data,NULL);
                  if (err == -LLOG_EEMPTY)
                          err = 0;
+                else
+                        err = llog_process(handle, llog_print_cb, data, NULL);
  
                  GOTO(out_close, err);
          }
diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c

index bcecaae..259bcbc 100644 (file)
--- a/lustre/obdclass/llog_obd.c
+++ b/lustre/obdclass/llog_obd.c
@@ -25,13 +25,12 @@
  #include <libcfs/list.h>
  
  /* helper functions for calling the llog obd methods */
-
  int obd_llog_setup(struct obd_device *obd, struct obd_llogs *llogs, 
                     int index, struct obd_device *disk_obd, int count, 
                     struct llog_logid *logid, struct llog_operations *op)
  {
-        int rc = 0;
          struct llog_ctxt *ctxt;
+        int rc = 0;
          ENTRY;
  
          LASSERT(llogs);
@@ -60,6 +59,10 @@ int obd_llog_setup(struct obd_device *obd, struct obd_llogs *llogs,
                          struct mds_obd *mds = &disk_obd->u.mds;
                          ctxt->loc_objects_dir = mds->mds_objects_dir;
                          ctxt->loc_logs_dir = mds->mds_logs_dir;
+                } else if (!strcmp(disk_obd->obd_type->typ_name, "confobd")) {
+                        struct conf_obd *confobd = &disk_obd->u.conf;
+                        ctxt->loc_objects_dir = confobd->cfobd_objects_dir;
+                        ctxt->loc_logs_dir = confobd->cfobd_logs_dir;
                  }
          }
  
diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c

index e5796cf..812d6af 100644 (file)
--- a/lustre/obdclass/lprocfs_status.c
+++ b/lustre/obdclass/lprocfs_status.c
@@ -638,6 +638,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd,
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, adjust_kms);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate);
@@ -807,7 +808,10 @@ int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
  
          kernbuf[count] = '\0';
  
-        *val = simple_strtoull(kernbuf, &end, 0);
+        if (kernbuf[0] == '-')
+                *val = -simple_strtoull(kernbuf + 1, &end, 0);
+        else
+                *val = simple_strtoull(kernbuf, &end, 0);
          if (kernbuf == end)
                  return -EINVAL;
  
diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c

index 8298fc3..1f4ef19 100644 (file)
--- a/lustre/obdclass/lustre_peer.c
+++ b/lustre/obdclass/lustre_peer.c
@@ -103,7 +103,7 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal)
                  return -ENOMEM;
          }
  
-        CDEBUG(D_INFO, "add uuid %s "LPX64" %u\n", uuid, nid, nal);
+        CDEBUG(D_INFO, "add uuid %s "LPX64" %x\n", uuid, nid, nal);
          memcpy(data->uuid, uuid, nob);
          data->nid = nid;
          data->nal = nal;
@@ -142,8 +142,11 @@ int class_del_uuid (char *uuid)
  
          spin_unlock (&g_uuid_lock);
  
-        if (list_empty (&deathrow))
+        if (list_empty (&deathrow)) {
+                if (uuid)
+                        CERROR("del non-existed uuid %s\n", uuid);
                  return -EINVAL;
+        }
  
          do {
                  data = list_entry(deathrow.next, struct uuid_nid_data, head);
diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c

index fa0bda6..414fde4 100644 (file)
--- a/lustre/obdclass/obd_config.c
+++ b/lustre/obdclass/obd_config.c
@@ -158,7 +158,6 @@ static int class_attach(struct lustre_cfg *lcfg)
                  class_release_dev(obd);
          case 1:
                  class_put_type(type);
-                obd->obd_type = NULL;
          }
          return rc;
  }
@@ -505,7 +504,7 @@ int class_process_config(struct lustre_cfg *lcfg)
          }
          case LCFG_ADD_UUID: {
                  CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid "LPX64
-                       " (%s), nal %d\n", lcfg->lcfg_inlbuf1, lcfg->lcfg_nid,
+                       " (%s), nal %x\n", lcfg->lcfg_inlbuf1, lcfg->lcfg_nid,
                         portals_nid2str(lcfg->lcfg_nal, lcfg->lcfg_nid, str),
                         lcfg->lcfg_nal);
  
@@ -596,7 +595,7 @@ int class_process_config(struct lustre_cfg *lcfg)
          }
          }
  out:
-        RETURN(err);
+        return err;
  }
  
  static int class_config_parse_handler(struct llog_handle * handle,
@@ -696,8 +695,8 @@ int class_config_process_llog(struct llog_ctxt *ctxt, char *name,
          RETURN(rc);
  }
  
-static int class_config_dump_handler(struct llog_handle * handle,
-                                     struct llog_rec_hdr *rec, void *data)
+int class_config_dump_handler(struct llog_handle * handle,
+                              struct llog_rec_hdr *rec, void *data)
  {
          int cfg_len = rec->lrh_len;
          char *cfg_buf = (char*) (rec + 1);
@@ -745,10 +744,10 @@ static int class_config_dump_handler(struct llog_handle * handle,
  
                  CDEBUG(D_INFO, "pcfg command: 0x%x\n", pcfg->pcfg_command);
                  if (pcfg->pcfg_nal)
-                        CDEBUG(D_INFO, "         nal: %d\n",
+                        CDEBUG(D_INFO, "         nal: %x\n",
                                 pcfg->pcfg_nal);
                  if (pcfg->pcfg_gw_nal)
-                        CDEBUG(D_INFO, "      gw_nal: %d\n",
+                        CDEBUG(D_INFO, "      gw_nal: %x\n",
                                 pcfg->pcfg_gw_nal);
                  if (pcfg->pcfg_nid)
                          CDEBUG(D_INFO, "         nid: "LPX64"\n",
diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c

index c232078..f28232d 100644 (file)
--- a/lustre/obdecho/echo_client.c
+++ b/lustre/obdecho/echo_client.c
@@ -29,6 +29,7 @@
  #include <linux/iobuf.h>
  #endif
  #include <asm/div64.h>
+#include <linux/smp_lock.h>
  #else
  #include <liblustre.h>
  #endif
@@ -65,7 +66,7 @@ echo_printk_object (char *msg, struct ec_object *eco)
  static struct ec_object *
  echo_find_object_locked (struct obd_device *obd, obd_id id)
  {
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          struct ec_object       *eco = NULL;
          struct list_head       *el;
  
@@ -97,7 +98,7 @@ static int
  echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm,
                   void *ulsm, int ulsm_nob)
  {
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          int                     nob;
  
          if (ulsm_nob < sizeof (*lsm))
@@ -125,7 +126,7 @@ echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm,
  static struct ec_object *
  echo_allocate_object (struct obd_device *obd)
  {
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          struct ec_object       *eco;
          int rc;
  
@@ -152,7 +153,7 @@ static void
  echo_free_object (struct ec_object *eco)
  {
          struct obd_device      *obd = eco->eco_device;
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
  
          LASSERT (eco->eco_refcount == 0);
          obd_free_memmd(ec->ec_exp, &eco->eco_lsm);
@@ -163,7 +164,7 @@ static int echo_create_object(struct obd_device *obd, int on_target,
                                struct obdo *oa, void *ulsm, int ulsm_nob,
                                struct obd_trans_info *oti)
  {
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          struct ec_object       *eco2;
          struct ec_object       *eco;
          struct lov_stripe_md   *lsm;
@@ -268,7 +269,7 @@ static int
  echo_get_object (struct ec_object **ecop, struct obd_device *obd,
                   struct obdo *oa)
  {
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          struct ec_object       *eco;
          struct ec_object       *eco2;
          int                     rc;
@@ -353,7 +354,7 @@ static void
  echo_put_object (struct ec_object *eco)
  {
          struct obd_device      *obd = eco->eco_device;
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
  
          /* Release caller's ref on the object.
           * delete => mark for deletion when last ref goes
@@ -493,7 +494,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa,
                              struct lov_stripe_md *lsm, obd_off offset,
                              obd_size count, struct obd_trans_info *oti)
  {
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          obd_count               npages;
          struct brw_page        *pga;
          struct brw_page        *pgp;
@@ -575,7 +576,7 @@ static int echo_client_ubrw(struct obd_device *obd, int rw,
                              obd_off offset, obd_size count, char *buffer,
                              struct obd_trans_info *oti)
  {
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          obd_count               npages;
          struct brw_page        *pga;
          struct brw_page        *pgp;
@@ -660,6 +661,10 @@ struct echo_async_page {
          struct list_head        eap_item;
  };
  
+#define EAP_FROM_COOKIE(c)                                                      \
+        (LASSERT(((struct echo_async_page *)(c))->eap_magic == EAP_MAGIC),      \
+         (struct echo_async_page *)(c))
+
  struct echo_async_state {
          spinlock_t              eas_lock;
          obd_off                 eas_next_offset;
@@ -683,14 +688,6 @@ static int eas_should_wake(struct echo_async_state *eas)
          return rc;
  };
  
-struct echo_async_page *eap_from_cookie(void *cookie)
-{
-        struct echo_async_page *eap = cookie;
-        if (eap->eap_magic != EAP_MAGIC)
-                return ERR_PTR(-EINVAL);
-        return eap;
-};
-
  static int ec_ap_make_ready(void *data, int cmd)
  {
          /* our pages are issued ready */
@@ -705,22 +702,17 @@ static int ec_ap_refresh_count(void *data, int cmd)
  }
  static void ec_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
  {
-        struct echo_async_page *eap;
-        eap = eap_from_cookie(data);
-        if (IS_ERR(eap))
-                return;
+        struct echo_async_page *eap = EAP_FROM_COOKIE(data);
  
          memcpy(oa, &eap->eap_eas->eas_oa, sizeof(*oa));
  }
  
  static void ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
  {
-        struct echo_async_page *eap = eap_from_cookie(data);
+        struct echo_async_page *eap = EAP_FROM_COOKIE(data);
          struct echo_async_state *eas;
          unsigned long flags;
  
-        if (IS_ERR(eap))
-                return;
          eas = eap->eap_eas;
  
          if (cmd == OBD_BRW_READ &&
@@ -992,7 +984,7 @@ int echo_client_brw_ioctl(int rw, struct obd_export *exp,
                            struct obd_ioctl_data *data)
  {
          struct obd_device *obd = class_exp2obd(exp);
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          struct obd_trans_info dummy_oti;
          struct ec_object *eco;
          int rc;
@@ -1046,7 +1038,7 @@ echo_ldlm_callback (struct ldlm_lock *lock, struct ldlm_lock_desc *new,
                      void *data, int flag)
  {
          struct ec_object       *eco = (struct ec_object *)data;
-        struct echo_client_obd *ec = &(eco->eco_device->u.echo_client);
+        struct echo_client_obd *ec = &(eco->eco_device->u.echocli);
          struct lustre_handle    lockh;
          struct list_head       *el;
          int                     found = 0;
@@ -1091,7 +1083,7 @@ echo_client_enqueue(struct obd_export *exp, struct obdo *oa,
                      int mode, obd_off offset, obd_size nob)
  {
          struct obd_device      *obd = exp->exp_obd;
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          struct lustre_handle   *ulh = obdo_handle (oa);
          struct ec_object       *eco;
          struct ec_lock         *ecl;
@@ -1152,7 +1144,7 @@ static int
  echo_client_cancel(struct obd_export *exp, struct obdo *oa)
  {
          struct obd_device      *obd = exp->exp_obd;
-        struct echo_client_obd *ec = &obd->u.echo_client;
+        struct echo_client_obd *ec = &obd->u.echocli;
          struct lustre_handle   *ulh = obdo_handle (oa);
          struct ec_lock         *ecl = NULL;
          int                     found = 0;
@@ -1203,10 +1195,12 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp,
          int                     i;
          ENTRY;
  
+        unlock_kernel();
+
          memset(&dummy_oti, 0, sizeof(dummy_oti));
  
          obd = exp->exp_obd;
-        ec = &obd->u.echo_client;
+        ec = &obd->u.echocli;
  
          switch (cmd) {
          case OBD_IOC_CREATE:                    /* may create echo object */
@@ -1321,6 +1315,8 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp,
                  ldlm_lock_decref(&ack_lock->lock, ack_lock->mode);
          }
  
+        lock_kernel();
+
          return rc;
  }
  
@@ -1328,7 +1324,7 @@ static int
  echo_client_setup(struct obd_device *obddev, obd_count len, void *buf)
  {
          struct lustre_cfg* lcfg = buf;
-        struct echo_client_obd *ec = &obddev->u.echo_client;
+        struct echo_client_obd *ec = &obddev->u.echocli;
          struct obd_device *tgt;
          struct lustre_handle conn = {0, };
          struct obd_uuid echo_uuid = { "ECHO_UUID" };
@@ -1351,7 +1347,7 @@ echo_client_setup(struct obd_device *obddev, obd_count len, void *buf)
          INIT_LIST_HEAD (&ec->ec_objects);
          ec->ec_unique = 0;
  
-        rc = obd_connect(&conn, tgt, &echo_uuid, 0);
+        rc = obd_connect(&conn, tgt, &echo_uuid, FILTER_GROUP_ECHO);
          if (rc) {
                  CERROR("fail to connect to device %s\n", lcfg->lcfg_inlbuf1);
                  return (rc);
@@ -1365,7 +1361,7 @@ static int echo_client_cleanup(struct obd_device *obddev, int flags)
  {
          struct list_head       *el;
          struct ec_object       *eco;
-        struct echo_client_obd *ec = &obddev->u.echo_client;
+        struct echo_client_obd *ec = &obddev->u.echocli;
          int rc;
          ENTRY;
  
@@ -1423,7 +1419,7 @@ static int echo_client_disconnect(struct obd_export *exp,
                  GOTO(out, rc = -EINVAL);
  
          obd = exp->exp_obd;
-        ec = &obd->u.echo_client;
+        ec = &obd->u.echocli;
  
          /* no more contention on export's lock list */
          while (!list_empty (&exp->exp_ec_data.eced_locks)) {
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c

index e7495de..4a82b28 100644 (file)
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -300,8 +300,7 @@ static int filter_free_server_data(struct filter_obd *filter)
  {
          OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
          filter->fo_fsd = NULL;
-        OBD_FREE(filter->fo_last_rcvd_slots,
-                 FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long));
+        OBD_FREE(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS/8);
          filter->fo_last_rcvd_slots = NULL;
          return 0;
  }
@@ -378,8 +377,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                  RETURN(-ENOMEM);
          filter->fo_fsd = fsd;
  
-        OBD_ALLOC(filter->fo_last_rcvd_slots,
-                  FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long));
+        OBD_ALLOC(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS/8);
          if (filter->fo_last_rcvd_slots == NULL) {
                  OBD_FREE(fsd, sizeof(*fsd));
                  RETURN(-ENOMEM);
@@ -1141,8 +1139,7 @@ struct dentry *filter_parent_lock(struct obd_device *obd, obd_gr group,
          LASSERT(dparent->d_inode);
  
          *lock = filter_lock_dentry(obd, dparent, objid);
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "parent lock");
          return dparent;
  }
  
@@ -1172,10 +1169,14 @@ struct dentry *filter_id2dentry(struct obd_device *obd,
          len = sprintf(name, LPU64, id);
          if (dir_dentry == NULL) {
                  dparent = filter_parent_lock(obd, group, id, &lock);
-                if (IS_ERR(dparent))
+                if (IS_ERR(dparent)) {
+                        CERROR("%s: error getting object "LPU64":"LPU64
+                               " parent: rc %ld\n", obd->obd_name,
+                               id, group, PTR_ERR(dparent));
                          RETURN(dparent);
+                }
          }
-        CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
+        CDEBUG(D_INODE, "looking up object O/%.*s/%s\n",
                 dparent->d_name.len, dparent->d_name.name, name);
          dchild = /*ll_*/lookup_one_len(name, dparent, len);
          if (dir_dentry == NULL)
@@ -1234,7 +1235,7 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
          ENTRY;
  
          if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) {
-                CERROR("destroying objid %*s nlink = %lu, count = %d\n",
+                CERROR("destroying objid %.*s nlink = %lu, count = %d\n",
                         dchild->d_name.len, dchild->d_name.name,
                         (unsigned long)inode->i_nlink,
                         atomic_read(&inode->i_count));
@@ -1243,7 +1244,7 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
          rc = vfs_unlink(dparent->d_inode, dchild);
  
          if (rc)
-                CERROR("error unlinking objid %*s: rc %d\n",
+                CERROR("error unlinking objid %.*s: rc %d\n",
                         dchild->d_name.len, dchild->d_name.name, rc);
  
          RETURN(rc);
@@ -1431,6 +1432,7 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
  {
          struct lustre_cfg* lcfg = buf;
          struct filter_obd *filter = &obd->u.filter;
+        struct lvfs_obd_ctxt *lvfs_ctxt = NULL;
          struct vfsmount *mnt;
          char ns_name[48];
          int rc = 0, i;
@@ -1445,11 +1447,16 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
          if (IS_ERR(obd->obd_fsops))
                  RETURN(PTR_ERR(obd->obd_fsops));
  
-        mnt = do_kern_mount(lcfg->lcfg_inlbuf2, MS_NOATIME | MS_NODIRATIME,
-                            lcfg->lcfg_inlbuf1, option);
-        rc = PTR_ERR(mnt);
-        if (IS_ERR(mnt))
+        rc = lvfs_mount_fs(lcfg->lcfg_inlbuf1, lcfg->lcfg_inlbuf2, 
+                           option, MS_NOATIME | MS_NODIRATIME, &lvfs_ctxt);
+        if (rc) {
+                CERROR("lvfs_mount_fs failed: rc = %d\n", rc);
                  GOTO(err_ops, rc);
+        }
+        LASSERT(lvfs_ctxt);
+
+        mnt = lvfs_ctxt->loc_mnt;
+        filter->fo_lvfs_ctxt = lvfs_ctxt;
  
          if (lcfg->lcfg_inllen3 > 0 && lcfg->lcfg_inlbuf3) {
                  if (*lcfg->lcfg_inlbuf3 == 'f') {
@@ -1529,7 +1536,7 @@ err_post:
          filter_post(obd);
  err_mntput:
          unlock_kernel();
-        mntput(mnt);
+        lvfs_umount_fs(filter->fo_lvfs_ctxt);
          filter->fo_sb = 0;
          lock_kernel();
  err_ops:
@@ -1612,13 +1619,8 @@ static int filter_cleanup(struct obd_device *obd, int flags)
          shrink_dcache_parent(filter->fo_sb->s_root);
          filter->fo_sb = 0;
  
-        if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1)
-                CERROR("%s: mount point %p busy, mnt_count: %d\n",
-                       obd->obd_name, filter->fo_vfsmnt,
-                       atomic_read(&filter->fo_vfsmnt->mnt_count));
-
          unlock_kernel();
-        mntput(filter->fo_vfsmnt);
+        lvfs_umount_fs(filter->fo_lvfs_ctxt);
          //destroy_buffers(filter->fo_sb->s_dev);
          filter->fo_sb = NULL;
          fsfilt_put_ops(obd->obd_fsops);
@@ -1787,6 +1789,7 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func)
          obd_size maxsize = obd->obd_osfs.os_blocks * obd->obd_osfs.os_bsize;
          obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0;
          obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted;
+        int level = D_CACHE;
  
          if (list_empty(&obd->obd_exports))
                  return;
@@ -1795,13 +1798,20 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func)
          spin_lock(&obd->obd_dev_lock);
          list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
                  fed = &exp->exp_filter_data;
-                LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
-                         "cli %s/%p %lu+%lu > "LPU64"\n",
-                         exp->exp_client_uuid.uuid, exp,
-                         fed->fed_grant, fed->fed_pending, maxsize);
-                LASSERTF(fed->fed_dirty <= maxsize, "cli %s/%p %lu > "LPU64"\n",
-                         exp->exp_client_uuid.uuid, exp,fed->fed_dirty,maxsize);
-                CDEBUG(D_CACHE,"%s: cli %s/%p dirty %lu pend %lu grant %lu\n",
+                if (fed->fed_grant < 0 || fed->fed_pending < 0 ||
+                    fed->fed_dirty < 0)
+                        level = D_ERROR;
+                if (maxsize > 0) { /* we may not have done a statfs yet */
+                        LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
+                                 "cli %s/%p %ld+%ld > "LPU64"\n",
+                                 exp->exp_client_uuid.uuid, exp,
+                                 fed->fed_grant, fed->fed_pending, maxsize);
+                        LASSERTF(fed->fed_dirty <= maxsize,
+                                 "cli %s/%p %ld > "LPU64"\n",
+                                 exp->exp_client_uuid.uuid, exp,
+                                 fed->fed_dirty, maxsize);
+                }
+                CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
                         obd->obd_name, exp->exp_client_uuid.uuid, exp,
                         fed->fed_dirty, fed->fed_pending, fed->fed_grant);
                  tot_granted += fed->fed_grant + fed->fed_pending;
@@ -1844,27 +1854,30 @@ static void filter_grant_discard(struct obd_export *exp)
          struct obd_device *obd = exp->exp_obd;
          struct filter_obd *filter = &obd->u.filter;
          struct filter_export_data *fed = &exp->exp_filter_data;
+        int level = D_CACHE;
  
          spin_lock(&obd->obd_osfs_lock);
          spin_lock(&exp->exp_obd->obd_dev_lock);
          list_del_init(&exp->exp_obd_chain);
          spin_unlock(&exp->exp_obd->obd_dev_lock);
  
-        CDEBUG(D_CACHE, "%s: cli %s/%p dirty %lu pend %lu grant %lu\n",
+        if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0)
+                level = D_ERROR;
+        CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
                 obd->obd_name, exp->exp_client_uuid.uuid, exp,
                 fed->fed_dirty, fed->fed_pending, fed->fed_grant);
  
          LASSERTF(filter->fo_tot_granted >= fed->fed_grant,
-                 "%s: tot_granted "LPU64" cli %s/%p fed_grant %lu\n",
+                 "%s: tot_granted "LPU64" cli %s/%p fed_grant %ld\n",
                   obd->obd_name, filter->fo_tot_granted,
                   exp->exp_client_uuid.uuid, exp, fed->fed_grant);
          filter->fo_tot_granted -= fed->fed_grant;
-        LASSERTF(exp->exp_obd->u.filter.fo_tot_pending >= fed->fed_pending,
-                 "%s: tot_pending "LPU64" cli %s/%p fed_pending %lu\n",
+        LASSERTF(filter->fo_tot_pending >= fed->fed_pending,
+                 "%s: tot_pending "LPU64" cli %s/%p fed_pending %ld\n",
                   obd->obd_name, filter->fo_tot_pending,
                   exp->exp_client_uuid.uuid, exp, fed->fed_pending);
          LASSERTF(filter->fo_tot_dirty >= fed->fed_dirty,
-                 "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %lu\n",
+                 "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n",
                   obd->obd_name, filter->fo_tot_dirty,
                   exp->exp_client_uuid.uuid, exp, fed->fed_dirty);
          filter->fo_tot_dirty -= fed->fed_dirty;
@@ -1972,7 +1985,6 @@ static int filter_disconnect(struct obd_export *exp, unsigned long flags)
  
          /* flush any remaining cancel messages out to the target */
          filter_sync_llogs(obd, exp);
-
          class_export_put(exp);
          RETURN(rc);
  }
@@ -1989,7 +2001,8 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd,
          dchild = filter_id2dentry(obd, NULL, group, oa->o_id);
  
          if (IS_ERR(dchild)) {
-                CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
+                CERROR("%s error looking up object: "LPU64"\n",
+                       what, oa->o_id);
                  RETURN(dchild);
          }
  
@@ -2072,7 +2085,8 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa,
          else
                  rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
          rc = filter_finish_transno(exp, oti, rc);
-        rc2 = fsfilt_commit(exp->exp_obd, filter->fo_sb, dentry->d_inode, handle, 0);
+        rc2 = fsfilt_commit(exp->exp_obd, filter->fo_sb, dentry->d_inode, 
+                            handle, exp->exp_sync);
          if (rc2) {
                  CERROR("error on commit, err = %d\n", rc2);
                  if (!rc)
@@ -2088,8 +2102,7 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa,
                                                                       0, 0);
                  ldlm_resource_putref(res);
          } else if (iattr.ia_valid & ATTR_SIZE) {
-                CERROR("!!! resource_get failed for object "LPU64" -- "
-                       "filter_setattr with no lock?\n", oa->o_id);
+                /* called from MDS. */
          }
  
          oa->o_valid = OBD_MD_FLID;
@@ -2226,7 +2239,7 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa,
              (oa->o_flags & OBD_FL_DELORPHAN)) {
                  if (diff >= 0)
                          RETURN(diff);
-                if (-diff > 10000) { /* XXX make this smarter */
+                if (-diff > OST_MAX_PRECREATE) {
                          CERROR("ignoring bogus orphan destroy request: obdid "
                                 LPU64" last_id "LPU64"\n",
                                 oa->o_id, filter_last_id(filter, oa->o_gr));
@@ -2244,7 +2257,8 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa,
                      (/*group != 0 ||*/ oa->o_id == 0))
                          RETURN(1);
  
-                LASSERT(diff >= 0);
+                LASSERTF(diff >= 0, LPU64" - "LPU64" = %d\n", oa->o_id,
+                         filter_last_id(filter, oa->o_gr), diff);
                  RETURN(diff);
          }
  }
@@ -2259,6 +2273,37 @@ static int filter_precreate_rec(struct obd_device *obd, struct dentry *dentry,
          RETURN(rc);
  }
  
+static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                         unsigned long max_age)
+{
+        struct filter_obd *filter = &obd->u.filter;
+        int blockbits = filter->fo_sb->s_blocksize_bits;
+        int rc;
+        ENTRY;
+
+        /* at least try to account for cached pages.  its still racey and
+         * might be under-reporting if clients haven't announced their
+         * caches with brw recently */
+        spin_lock(&obd->obd_osfs_lock);
+        rc = fsfilt_statfs(obd, filter->fo_sb, max_age);
+        memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+        spin_unlock(&obd->obd_osfs_lock);
+
+        CDEBUG(D_SUPER | D_CACHE, "blocks cached "LPU64" granted "LPU64
+               " pending "LPU64" free "LPU64" avail "LPU64"\n",
+               filter->fo_tot_dirty, filter->fo_tot_granted,
+               filter->fo_tot_pending,
+               osfs->os_bfree << blockbits, osfs->os_bavail << blockbits);
+
+        filter_grant_sanity_check(obd, __FUNCTION__);
+
+        osfs->os_bavail -= min(osfs->os_bavail,
+                               (filter->fo_tot_dirty + filter->fo_tot_pending +
+                                osfs->os_bsize -1) >> blockbits);
+
+        RETURN(rc);
+}
+
  /* We rely on the fact that only one thread will be creating files in a given
   * group at a time, which is why we don't need an atomic filter_get_new_id.
   * Even if we had that atomic function, the following race would exist:
@@ -2276,6 +2321,8 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
          struct filter_obd *filter;
          void *handle = NULL;
          void *lock = NULL;
+        struct obd_statfs *osfs;
+        unsigned long enough_time = jiffies + (obd_timeout * HZ) / 3;
          __u64 next_id;
          ENTRY;
  
@@ -2284,6 +2331,21 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
          if ((oa->o_valid & OBD_MD_FLFLAGS) &&
              (oa->o_flags & OBD_FL_RECREATE_OBJS)) {
                  recreate_obj = 1;
+        } else {
+                OBD_ALLOC(osfs, sizeof(*osfs));
+                if (osfs == NULL)
+                        RETURN(-ENOMEM);
+                rc = filter_statfs(obd, osfs, jiffies-HZ);
+                if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) {
+                        CDEBUG(D_HA, "OST out of space! avail "LPU64"\n",
+                              osfs->os_bavail<<filter->fo_sb->s_blocksize_bits);
+                        *num = 0;
+                        rc = -ENOSPC;
+                }
+                OBD_FREE(osfs, sizeof(*osfs));
+                if (rc) {
+                        RETURN(rc);
+                }
          }
  
          CDEBUG(D_HA, "%s: precreating %d objects\n", obd->obd_name, *num);
@@ -2334,13 +2396,11 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                           * already exists
                           */
                          if (recreate_obj) {
-                                CERROR("%s: Serious error: recreating obj %*s "
-                                       "but obj already exists \n",
+                                CERROR("%s: recreating existing object %.*s?\n",
                                         obd->obd_name, dchild->d_name.len,
                                         dchild->d_name.name);
-                                LBUG();
                          } else {
-                                CERROR("%s: Serious error: objid %*s already "
+                                CERROR("%s: Serious error: objid %.*s already "
                                         "exists; is this filesystem corrupt?\n",
                                         obd->obd_name, dchild->d_name.len,
                                         dchild->d_name.name);
@@ -2390,6 +2450,11 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
  
                  if (rc)
                          break;
+                if (time_after(jiffies, enough_time)) {
+                        CDEBUG(D_INODE,"%s: precreate slow - want %d got %d \n",
+                               obd->obd_name, *num, i);
+                        break;
+                }
          }
  
          *num = i;
@@ -2431,9 +2496,8 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
          }
  
          if ((oa->o_valid & OBD_MD_FLFLAGS) &&
-            (oa->o_flags & OBD_FL_RECREATE_OBJS)) {
+            (oa->o_flags & OBD_FL_RECREATE_OBJS))
                  recreate_objs = 1;
-        }
  
          obd = exp->exp_obd;
          fed = &exp->exp_filter_data;
@@ -2482,7 +2546,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
                  if (diff > 0) {
                          oa->o_id = filter_last_id(&obd->u.filter, group);
                          rc = filter_precreate(obd, oa, group, &diff);
-                        oa->o_id += diff;
+                        oa->o_id = filter_last_id(&obd->u.filter, oa->o_gr);
                          oa->o_valid = OBD_MD_FLID;
                  }
          }
@@ -2532,7 +2596,7 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa,
  
          dchild = filter_id2dentry(obd, dparent, oa->o_gr, oa->o_id);
          if (IS_ERR(dchild))
-                GOTO(cleanup, rc = -ENOENT);
+                GOTO(cleanup, rc = PTR_ERR(dchild));
          cleanup_phase = 2;
  
          if (dchild->d_inode == NULL) {
@@ -2620,7 +2684,7 @@ cleanup:
                  }
                  rc = filter_finish_transno(exp, oti, rc);
                  rc2 = fsfilt_commit(obd, filter->fo_sb, dparent->d_inode, 
-                                    handle, 0);
+                                    handle, exp->exp_sync);
                  if (rc2) {
                          CERROR("error on commit, err = %d\n", rc2);
                          if (!rc)
@@ -2711,37 +2775,6 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
          RETURN(rc);
  }
  
-static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
-                         unsigned long max_age)
-{
-        struct filter_obd *filter = &obd->u.filter;
-        int blockbits = filter->fo_sb->s_blocksize_bits;
-        int rc;
-        ENTRY;
-
-        /* at least try to account for cached pages.  its still racey and
-         * might be under-reporting if clients haven't announced their
-         * caches with brw recently */
-        spin_lock(&obd->obd_osfs_lock);
-        rc = fsfilt_statfs(obd, filter->fo_sb, max_age);
-        memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
-        spin_unlock(&obd->obd_osfs_lock);
-
-        CDEBUG(D_SUPER | D_CACHE, "blocks cached "LPU64" granted "LPU64
-               " pending "LPU64" free "LPU64" avail "LPU64"\n",
-               filter->fo_tot_dirty, filter->fo_tot_granted,
-               filter->fo_tot_pending,
-               osfs->os_bfree << blockbits, osfs->os_bavail << blockbits);
-
-        filter_grant_sanity_check(obd, __FUNCTION__);
-
-        osfs->os_bavail -= min(osfs->os_bavail,
-                               (filter->fo_tot_dirty + filter->fo_tot_pending +
-                                osfs->os_bsize -1) >> blockbits);
-
-        RETURN(rc);
-}
-
  static int filter_get_info(struct obd_export *exp, __u32 keylen,
                             void *key, __u32 *vallen, void *val)
  {
@@ -3074,15 +3107,31 @@ static int __init obdfilter_init(void)
  
          lprocfs_init_vars(filter, &lvars);
  
+        OBD_ALLOC(obdfilter_created_scratchpad,
+                  OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * 
+                  sizeof(*obdfilter_created_scratchpad));
+        if (obdfilter_created_scratchpad == NULL) {
+                CERROR ("Can't allocate scratchpad\n");
+                return -ENOMEM;
+        }
+
          rc = class_register_type(&filter_obd_ops, NULL, lvars.module_vars,
                                   OBD_FILTER_DEVICENAME);
-        if (rc)
+        if (rc) {
+                OBD_FREE(obdfilter_created_scratchpad,
+                         OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * 
+                         sizeof(*obdfilter_created_scratchpad));
                  return rc;
+        }
  
          rc = class_register_type(&filter_sanobd_ops, NULL, lvars.module_vars,
                                   OBD_FILTER_SAN_DEVICENAME);
-        if (rc)
+        if (rc) {
                  class_unregister_type(OBD_FILTER_DEVICENAME);
+                OBD_FREE(obdfilter_created_scratchpad,
+                         OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * 
+                         sizeof(*obdfilter_created_scratchpad));
+        }
          return rc;
  }
  
@@ -3090,6 +3139,9 @@ static void __exit obdfilter_exit(void)
  {
          class_unregister_type(OBD_FILTER_SAN_DEVICENAME);
          class_unregister_type(OBD_FILTER_DEVICENAME);
+        OBD_FREE(obdfilter_created_scratchpad,
+                 OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * 
+                 sizeof(*obdfilter_created_scratchpad));
  }
  
  MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h

index f6e0628..06b802a 100644 (file)
--- a/lustre/obdfilter/filter_internal.h
+++ b/lustre/obdfilter/filter_internal.h
@@ -34,7 +34,6 @@
  
  /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
  #define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
-#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
  
  #define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
  
@@ -90,6 +89,11 @@ enum {
  
  #define FILTER_MAX_CACHE_SIZE (32 * 1024 * 1024) /* was OBD_OBJECT_EOF */
  
+/* We have to pass a 'created' array to fsfilt_map_inode_pages() which we
+ * then ignore.  So we pre-allocate one that everyone can use... */
+#define OBDFILTER_CREATED_SCRATCHPAD_ENTRIES 1024
+extern int *obdfilter_created_scratchpad;
+
  /* filter.c */
  void f_dput(struct dentry *);
  struct dentry *filter_id2dentry(struct obd_device *, struct dentry *dir,
diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c

index e4cd7a9..21d7464 100644 (file)
--- a/lustre/obdfilter/filter_io.c
+++ b/lustre/obdfilter/filter_io.c
@@ -37,6 +37,8 @@
  #include <linux/lustre_snap.h>
  #include "filter_internal.h"
  
+int *obdfilter_created_scratchpad;
+
  static int filter_alloc_dio_page(struct obd_device *obd, struct inode *inode,
                                   struct niobuf_local *lnb)
  
@@ -258,7 +260,6 @@ long filter_grant(struct obd_export *exp, obd_size current_grant,
          return grant;
  }
  
-
  static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
                                int objcount, struct obd_ioobj *obj,
                                int niocount, struct niobuf_remote *nb,
@@ -271,7 +272,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
          struct niobuf_local *lnb;
          struct dentry *dentry = NULL;
          struct inode *inode;
-        void *iobuf = NULL; 
+        void *iobuf = NULL;
          int rc = 0, i, tot_bytes = 0;
          unsigned long now = jiffies;
          ENTRY;
@@ -286,7 +287,6 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
                  filter_grant_incoming(exp, oa);
  
                  oa->o_grant = 0;
-                
                  spin_unlock(&obd->obd_osfs_lock);
          }
  
@@ -309,11 +309,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
  
          inode = dentry->d_inode; 
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow preprw_read setup %lus\n", (jiffies - now) / HZ);
-        else
-                CDEBUG(D_INFO, "preprw_read setup: %lu jiffies\n",
-                       (jiffies - now));
+        fsfilt_check_slow(now, obd_timeout, "preprw_read setup");
  
          for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt;
               i++, rnb++, lnb++) {
@@ -347,11 +343,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
                  filter_iobuf_add_page(obd, iobuf, inode, lnb->page);
          }
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow start_page_read %lus\n", (jiffies - now) / HZ);
-        else
-                CDEBUG(D_INFO, "start_page_read: %lu jiffies\n",
-                       (jiffies - now));
+        fsfilt_check_slow(now, obd_timeout, "start_page_read");
  
          rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp,
                                NULL, NULL, NULL);
@@ -543,11 +535,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
          fso.fso_dentry = dentry;
          fso.fso_bufcnt = obj->ioo_bufcnt;
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow preprw_write setup %lus\n", (jiffies - now) / HZ);
-        else
-                CDEBUG(D_INFO, "preprw_write setup: %lu jiffies\n",
-                       (jiffies - now));
+        fsfilt_check_slow(now, obd_timeout, "preprw_write setup");
  
          spin_lock(&exp->exp_obd->obd_osfs_lock);
          if (oa)
@@ -562,6 +550,10 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
          if (oa && oa->o_valid & OBD_MD_FLGRANT)
                  oa->o_grant = filter_grant(exp,oa->o_grant,oa->o_undirty,left);
  
+        /* We're finishing using body->oa as an input variable, so reset
+         * o_valid here. */
+        oa->o_valid = 0;
+
          spin_unlock(&exp->exp_obd->obd_osfs_lock);
  
          if (rc) 
@@ -610,11 +602,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
          rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp,
                                NULL, NULL, NULL);
          
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow start_page_write %lus\n", (jiffies - now) / HZ);
-        else
-                CDEBUG(D_INFO, "start_page_write: %lu jiffies\n",
-                       (jiffies - now));
+        fsfilt_check_slow(now, obd_timeout, "start_page_write");
  
          lprocfs_counter_add(exp->exp_obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
                              tot_bytes);
diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c

index 6029329..43bf78d 100644 (file)
--- a/lustre/obdfilter/filter_io_24.c
+++ b/lustre/obdfilter/filter_io_24.c
@@ -134,7 +134,7 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf,
  {
          struct obd_device *obd = exp->exp_obd;
          struct inode *inode = dchild->d_inode;
-         struct kiobuf *iobuf = buf;
+        struct kiobuf *iobuf = buf;
          int rc, create = (rw == OBD_BRW_WRITE), *created = NULL, committed = 0;
          int blocks_per_page = PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0;
          struct semaphore *sem = NULL;
@@ -148,9 +148,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf,
          if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS)
                  GOTO(cleanup, rc = -EINVAL);
  
-        OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page);
-        if (created == NULL)
-                GOTO(cleanup, rc = -ENOMEM);
+        if (iobuf->nr_pages * blocks_per_page > 
+            OBDFILTER_CREATED_SCRATCHPAD_ENTRIES)
+                GOTO(cleanup, rc = -EINVAL);
+
          cleanup_phase = 1;
  
          rc = lock_kiovec(1, &iobuf, 1);
@@ -164,8 +165,8 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf,
          }
          
          rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist,
-                                    iobuf->nr_pages, iobuf->blocks, created,
-                                    create, sem);
+                                    iobuf->nr_pages, iobuf->blocks, 
+                                    obdfilter_created_scratchpad, create, sem);
          if (rc)
                  GOTO(cleanup, rc);
  
@@ -244,8 +245,6 @@ cleanup:
          case 2:
                  unlock_kiovec(1, &iobuf);
          case 1:
-                OBD_FREE(created, sizeof(*created) *
-                         iobuf->nr_pages*blocks_per_page);
          case 0:
                  if (cleanup_phase != 3 && rw == OBD_BRW_WRITE)            
                          up(&inode->i_sem);
@@ -279,7 +278,6 @@ int filter_range_is_mapped(struct inode *inode, obd_size offset, int len)
          return 1;
  }
  
-
  /* some kernels require alloc_kiovec callers to zero members through the use of
   * map_user_kiobuf and unmap_.. we don't use those, so we have a little helper
   * that makes sure we don't break the rules. */
@@ -377,13 +375,14 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
  
                  /* If overwriting an existing block, we don't need a grant */
                  if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
-                     filter_range_is_mapped(inode, lnb->offset, lnb->len))    
+                    filter_range_is_mapped(inode, lnb->offset, lnb->len))
                          lnb->rc = 0;
  
                  if (lnb->rc) /* ENOSPC, network RPC error */
                          continue;
  
                  filter_iobuf_add_page(obd, iobuf, inode, lnb->page);
+                
                  /* We expect these pages to be in offset order, but we'll
                   * be forgiving */
                  this_size = lnb->offset + lnb->len;
@@ -406,8 +405,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
                  GOTO(cleanup, rc);
          }
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "brw_start");
  
          iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
          /* filter_direct_io drops i_sem */
@@ -416,16 +414,14 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
          if (rc == 0)
                  obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "direct_io");
  
          err = fsfilt_commit_wait(obd, inode, wait_handle);
          if (err)
                  rc = err;
-        if (obd_sync_filter)
+        if (obd_sync_filter && !err)
                  LASSERT(oti->oti_transno <= obd->obd_last_committed);
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "commitrw commit");
  cleanup:
          filter_grant_commit(exp, niocount, res);
  
diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c

index d807f76..7237988 100644 (file)
--- a/lustre/obdfilter/filter_io_26.c
+++ b/lustre/obdfilter/filter_io_26.c
@@ -41,17 +41,15 @@
  /* 512byte block min */
  #define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512)
  struct dio_request {
-        atomic_t numreqs;       /* number of reqs being processed */
-        struct bio *bio_current;/* bio currently being constructed */
-        struct bio *bio_list;   /* list of completed bios */
+        atomic_t          dr_numreqs;  /* number of reqs being processed */
+        struct bio       *dr_bios;     /* list of completed bios */
          wait_queue_head_t dr_wait;
-        int dr_num_pages;
-        int dr_rw;
-        int dr_error;
-        int dr_created[MAX_BLOCKS_PER_PAGE];
-        unsigned long dr_blocks[MAX_BLOCKS_PER_PAGE];
-        spinlock_t dr_lock;
-
+        int               dr_max_pages;
+        int               dr_npages;
+        int               dr_error;
+        struct page     **dr_pages;
+        unsigned long    *dr_blocks;
+        spinlock_t        dr_lock;
  };
  
  static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
@@ -60,13 +58,15 @@ static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
          unsigned long flags;
  
          spin_lock_irqsave(&dreq->dr_lock, flags);
-        bio->bi_private = dreq->bio_list;
-        dreq->bio_list = bio;
-        spin_unlock_irqrestore(&dreq->dr_lock, flags);
-        if (atomic_dec_and_test(&dreq->numreqs))
-                wake_up(&dreq->dr_wait);
+        bio->bi_private = dreq->dr_bios;
+        dreq->dr_bios = bio;
          if (dreq->dr_error == 0)
                  dreq->dr_error = error;
+        spin_unlock_irqrestore(&dreq->dr_lock, flags);
+
+        if (atomic_dec_and_test(&dreq->dr_numreqs))
+                wake_up(&dreq->dr_wait);
+
          return 0;
  }
  
@@ -79,6 +79,8 @@ static int can_be_merged(struct bio *bio, sector_t sector)
          size = bio->bi_size >> 9;
          return bio->bi_sector + size == sector ? 1 : 0;
  }
+
+
  int filter_alloc_iobuf(int rw, int num_pages, void **ret)
  {
          struct dio_request *dreq;
@@ -87,30 +89,52 @@ int filter_alloc_iobuf(int rw, int num_pages, void **ret)
  
          OBD_ALLOC(dreq, sizeof(*dreq));
          if (dreq == NULL)
-                RETURN(-ENOMEM);
-
-        dreq->bio_list = NULL;
+                goto failed_0;
+        
+        OBD_ALLOC(dreq->dr_pages, num_pages * sizeof(*dreq->dr_pages));
+        if (dreq->dr_pages == NULL)
+                goto failed_1;
+        
+        OBD_ALLOC(dreq->dr_blocks,
+                  MAX_BLOCKS_PER_PAGE * num_pages * sizeof(*dreq->dr_blocks));
+        if (dreq->dr_blocks == NULL)
+                goto failed_2;
+
+        dreq->dr_bios = NULL;
          init_waitqueue_head(&dreq->dr_wait);
-        atomic_set(&dreq->numreqs, 0);
+        atomic_set(&dreq->dr_numreqs, 0);
          spin_lock_init(&dreq->dr_lock);
-        dreq->dr_num_pages = num_pages;
-        dreq->dr_rw = rw;
+        dreq->dr_max_pages = num_pages;
+        dreq->dr_npages = 0;
  
          *ret = dreq;
          RETURN(0);
+        
+ failed_2:
+        OBD_FREE(dreq->dr_pages,
+                 num_pages * sizeof(*dreq->dr_pages));
+ failed_1:
+        OBD_FREE(dreq, sizeof(*dreq));
+ failed_0:
+        RETURN(-ENOMEM);
  }
  
  void filter_free_iobuf(void *iobuf)
  {
          struct dio_request *dreq = iobuf;
+        int                 num_pages = dreq->dr_max_pages;
  
          /* free all bios */
-        while (dreq->bio_list) {
-                struct bio *bio = dreq->bio_list;
-                dreq->bio_list = bio->bi_private;
+        while (dreq->dr_bios) {
+                struct bio *bio = dreq->dr_bios;
+                dreq->dr_bios = bio->bi_private;
                  bio_put(bio);
          }
  
+        OBD_FREE(dreq->dr_blocks,
+                 MAX_BLOCKS_PER_PAGE * num_pages * sizeof(*dreq->dr_blocks));
+        OBD_FREE(dreq->dr_pages,
+                 num_pages * sizeof(*dreq->dr_pages));
          OBD_FREE(dreq, sizeof(*dreq));
  }
  
@@ -118,57 +142,136 @@ int filter_iobuf_add_page(struct obd_device *obd, void *iobuf,
                            struct inode *inode, struct page *page)
  {
          struct dio_request *dreq = iobuf;
-        int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
-        unsigned int len = inode->i_sb->s_blocksize, offs;
-        struct bio *bio = dreq->bio_current;
-        sector_t sector;
-        int k, rc;
+
+        LASSERT (dreq->dr_npages < dreq->dr_max_pages);
+        dreq->dr_pages[dreq->dr_npages++] = page;
+
+        return 0;
+}
+
+int filter_do_bio(struct obd_device *obd, struct inode *inode,
+                  struct dio_request *dreq, int rw)
+{
+        int            blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+        struct page  **pages = dreq->dr_pages;
+        int            npages = dreq->dr_npages;
+        unsigned long *blocks = dreq->dr_blocks;
+        int            total_blocks = npages * blocks_per_page;
+        int            sector_bits = inode->i_sb->s_blocksize_bits - 9;
+        unsigned int   blocksize = inode->i_sb->s_blocksize;
+        struct bio    *bio = NULL;
+        struct page   *page;
+        unsigned int   page_offset;
+        sector_t       sector;
+        int            nblocks;
+        int            block_idx;
+        int            page_idx;
+        int            i;
+        int            rc = 0;
          ENTRY;
  
-        /* get block number for next page */
-        rc = fsfilt_map_inode_pages(obd, inode, &page, 1, dreq->dr_blocks,
-                                    dreq->dr_created,
-                                    dreq->dr_rw == OBD_BRW_WRITE, NULL);
-        if (rc)
-                RETURN(rc);
+        LASSERT(dreq->dr_npages == npages);
+        LASSERT(total_blocks <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES);
  
-        for (k = 0, offs = 0; k < blocks_per_page; k++, offs += len) {
-                if (dreq->dr_created[k] == -1) {
-                        memset(kmap(page) + offs, 0, len);
-                        kunmap(page);
-                        continue;
-                }
+        for (page_idx = 0, block_idx = 0; 
+             page_idx < npages; 
+             page_idx++, block_idx += blocks_per_page) {
+                        
+                page = pages[page_idx];
+                LASSERT (block_idx + blocks_per_page <= total_blocks);
+
+                for (i = 0, page_offset = 0; 
+                     i < blocks_per_page;
+                     i += nblocks, page_offset += blocksize * nblocks) {
+
+                        nblocks = 1;
  
-                sector = dreq->dr_blocks[k] <<(inode->i_sb->s_blocksize_bits-9);
-
-                if (!bio || !can_be_merged(bio, sector) ||
-                    !bio_add_page(bio, page, len, offs)) {
-                        if (bio) {
-                                atomic_inc(&dreq->numreqs);
-                                /* FIXME
-                                filter_tally_write(&obd->u.filter,dreq->maplist,
-                                                   dreq->nr_pages,dreq->blocks,
-                                                   blocks_per_page);
-                                */
-                                fsfilt_send_bio(dreq->dr_rw, obd, inode, bio);
-                                dreq->bio_current = bio = NULL;
+                        if (blocks[block_idx + i] == 0) {  /* hole */
+                                LASSERT(rw == OBD_BRW_READ);
+                                memset(kmap(page) + page_offset, 0, blocksize);
+                                kunmap(page);
+                                continue;
                          }
+
+                        sector = blocks[block_idx + i] << sector_bits;
+
+                        /* Additional contiguous file blocks? */
+                        while (i + nblocks < blocks_per_page &&
+                               (sector + nblocks*(blocksize>>9)) ==
+                               (blocks[block_idx + i + nblocks] << sector_bits))
+                                nblocks++;
+
+                        if (bio != NULL &&
+                            can_be_merged(bio, sector) &&
+                            bio_add_page(bio, page, 
+                                         blocksize * nblocks, page_offset) != 0)
+                                continue;       /* added this frag OK */
+
+                        if (bio != NULL) {
+                                request_queue_t *q = bdev_get_queue(bio->bi_bdev);
+
+                                /* Dang! I have to fragment this I/O */
+                                CDEBUG(D_INODE, "bio++ sz %d vcnt %d(%d) "
+                                       "sectors %d(%d) psg %d(%d) hsg %d(%d)\n",
+                                       bio->bi_size, 
+                                       bio->bi_vcnt, bio->bi_max_vecs,
+                                       bio->bi_size >> 9, q->max_sectors,
+                                       bio_phys_segments(q, bio), 
+                                       q->max_phys_segments,
+                                       bio_hw_segments(q, bio), 
+                                       q->max_hw_segments);
+
+                                atomic_inc(&dreq->dr_numreqs);
+                                rc = fsfilt_send_bio(rw, obd, inode, bio);
+                                if (rc < 0) {
+                                        CERROR("Can't send bio: %d\n", rc);
+                                        /* OK do dec; we do the waiting */
+                                        atomic_dec(&dreq->dr_numreqs);
+                                        goto out;
+                                }
+                                rc = 0;
+                                        
+                                bio = NULL;
+                        }
+
                          /* allocate new bio */
-                        dreq->bio_current = bio =
-                                bio_alloc(GFP_NOIO, dreq->dr_num_pages *
-                                                    blocks_per_page);
+                        bio = bio_alloc(GFP_NOIO, 
+                                        (npages - page_idx) * blocks_per_page);
+                        if (bio == NULL) {
+                                CERROR ("Can't allocate bio\n");
+                                rc = -ENOMEM;
+                                goto out;
+                        }
+
                          bio->bi_bdev = inode->i_sb->s_bdev;
                          bio->bi_sector = sector;
                          bio->bi_end_io = dio_complete_routine;
                          bio->bi_private = dreq;
  
-                        if (!bio_add_page(bio, page, len, offs))
-                                LBUG();
+                        rc = bio_add_page(bio, page, 
+                                          blocksize * nblocks, page_offset);
+                        LASSERT (rc != 0);
                  }
          }
-        dreq->dr_num_pages--;
  
-        RETURN(0);
+        if (bio != NULL) {
+                atomic_inc(&dreq->dr_numreqs);
+                rc = fsfilt_send_bio(rw, obd, inode, bio);
+                if (rc >= 0) {
+                        rc = 0;
+                } else {
+                        CERROR("Can't send bio: %d\n", rc);
+                        /* OK do dec; we do the waiting */
+                        atomic_dec(&dreq->dr_numreqs);
+                }
+        }
+                        
+ out:
+        wait_event(dreq->dr_wait, atomic_read(&dreq->dr_numreqs) == 0);
+
+        if (rc == 0)
+                rc = dreq->dr_error;
+        RETURN(rc);
  }
  
  static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
@@ -197,62 +300,74 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf,
                       struct obd_export *exp, struct iattr *attr,
                       struct obd_trans_info *oti, void **wait_handle)
  {
-        struct dio_request *dreq = iobuf;
+        struct obd_device *obd = exp->exp_obd;
          struct inode *inode = dchild->d_inode;
-        int rc;
+        struct dio_request *dreq = iobuf;
+        int rc, rc2;
          ENTRY;
  
          LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw);
+        LASSERTF(dreq->dr_npages <= dreq->dr_max_pages, "%d,%d\n",
+                 dreq->dr_npages, dreq->dr_max_pages);
+
+        if (dreq->dr_npages == 0)
+                RETURN(0);
+
+        if (dreq->dr_npages > OBDFILTER_CREATED_SCRATCHPAD_ENTRIES)
+                RETURN(-EINVAL);
+        
+        rc = fsfilt_map_inode_pages(obd, inode,
+                                    dreq->dr_pages, dreq->dr_npages,
+                                    dreq->dr_blocks,
+                                    obdfilter_created_scratchpad,
+                                    rw == OBD_BRW_WRITE, NULL);
+
+        if (rw == OBD_BRW_WRITE) {
+                if (rc == 0) {
+#if 0
+                        filter_tally_write(&obd->u.filter, 
+                                           dreq->dr_pages,
+                                           dreq->dr_page_idx,
+                                           dreq->dr_blocks,
+                                           blocks_per_page);
+#endif
+                        if (attr->ia_size > inode->i_size)
+                                attr->ia_valid |= ATTR_SIZE;
+                        rc = fsfilt_setattr(obd, dchild, 
+                                            oti->oti_handle, attr, 0);
+                }
+                
+                up(&inode->i_sem);
+
+                rc2 = filter_finish_transno(exp, oti, 0);
+                if (rc2 != 0)
+                        CERROR("can't close transaction: %d\n", rc);
+
+                if (rc == 0)
+                        rc = rc2;
+                if (rc != 0)
+                        RETURN(rc);
+        }
  
          /* This is nearly osync_inode, without the waiting
          rc = generic_osync_inode(inode, inode->i_mapping,
                                   OSYNC_DATA|OSYNC_METADATA); */
          rc = filemap_fdatawrite(inode->i_mapping);
+        rc2 = sync_mapping_buffers(inode->i_mapping);
          if (rc == 0)
-                rc = sync_mapping_buffers(inode->i_mapping);
+                rc = rc2;
+        rc2 = filemap_fdatawait(inode->i_mapping);
          if (rc == 0)
-                rc = filemap_fdatawait(inode->i_mapping);
-        if (rc < 0)
-                GOTO(cleanup, rc);
+                rc = rc2;
  
-        if (rw == OBD_BRW_WRITE)
-                up(&inode->i_sem);
+        if (rc != 0)
+                RETURN(rc);
  
          /* be careful to call this after fsync_inode_data_buffers has waited
           * for IO to complete before we evict it from the cache */
          filter_clear_page_cache(inode, iobuf);
  
-        if (dreq->bio_current != NULL) {
-                atomic_inc(&dreq->numreqs);
-                fsfilt_send_bio(rw, exp->exp_obd, inode, dreq->bio_current);
-                dreq->bio_current = NULL;
-        }
-
-        /* time to wait for I/O completion */
-        wait_event(dreq->dr_wait, atomic_read(&dreq->numreqs) == 0);
-
-        rc = dreq->dr_error;
-        if (rw == OBD_BRW_WRITE && rc == 0) {
-                /* FIXME:
-                filter_tally_write(&obd->u.filter, dreq->maplist,
-                                   dreq->nr_pages, dreq->blocks,
-                                   blocks_per_page);
-                */
-
-                if (attr->ia_size > inode->i_size) {
-                        CDEBUG(D_INFO, "setting i_size to "LPU64"\n",
-                               attr->ia_size);
-
-                        attr->ia_valid |= ATTR_SIZE;
-                        down(&inode->i_sem);
-                        fsfilt_setattr(exp->exp_obd, dchild, oti->oti_handle,
-                                       attr, 0);
-                        up(&inode->i_sem);
-                }
-        }
-
-cleanup:
-        RETURN(rc);
+        RETURN(filter_do_bio(obd, inode, dreq, rw));
  }
  
  /* See if there are unallocated parts in given file region */
@@ -290,7 +405,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
          unsigned long now = jiffies;
          int i, err, cleanup_phase = 0;
          struct obd_device *obd = exp->exp_obd;
-
+        int   total_size = 0;
          ENTRY;
  
          LASSERT(oti != NULL);
@@ -299,38 +414,16 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
  
          if (rc != 0)
                  GOTO(cleanup, rc);
-
-        inode = res->dentry->d_inode;
-
+        
          rc = filter_alloc_iobuf(OBD_BRW_WRITE, obj->ioo_bufcnt, (void **)&dreq);
          if (rc)
                  GOTO(cleanup, rc);
-
          cleanup_phase = 1;
+
          fso.fso_dentry = res->dentry;
          fso.fso_bufcnt = obj->ioo_bufcnt;
+        inode = res->dentry->d_inode;
  
-        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-        cleanup_phase = 2;
-
-        generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA);
-
-        oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res,
-                                           oti);
-        if (IS_ERR(oti->oti_handle)) {
-                rc = PTR_ERR(oti->oti_handle);
-                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
-                       "error starting transaction: rc = %d\n", rc);
-                oti->oti_handle = NULL;
-                GOTO(cleanup, rc);
-        }
-
-        /* have to call fsfilt_commit() from this point on */
-
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
-
-        down(&inode->i_sem);
          for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
                  loff_t this_size;
  
@@ -339,14 +432,15 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                      filter_range_is_mapped(inode, lnb->offset, lnb->len))
                          lnb->rc = 0;
  
-                if (lnb->rc) /* ENOSPC, network RPC error, etc. */ 
+                if (lnb->rc) { /* ENOSPC, network RPC error, etc. */
+                        CDEBUG(D_INODE, "Skipping [%d] == %d\n", i, lnb->rc);
                          continue;
+                }
  
                  err = filter_iobuf_add_page(obd, dreq, inode, lnb->page);
-                if (err != 0) {
-                        lnb->rc = err;
-                        continue;
-                }
+                LASSERT (err == 0);
+
+                total_size += lnb->len;
  
                  /* we expect these pages to be in offset order, but we'll
                   * be forgiving */
@@ -354,26 +448,48 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                  if (this_size > iattr.ia_size)
                          iattr.ia_size = this_size;
          }
+#if 0
+        /* I use this when I'm checking our lovely 1M I/Os reach the disk -eeb */
+        if (total_size != (1<<20))
+                CWARN("total size %d (%d pages)\n", 
+                      total_size, total_size/PAGE_SIZE);
+#endif
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        cleanup_phase = 2;
+
+        down(&inode->i_sem);
+        oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res,
+                                           oti);
+        if (IS_ERR(oti->oti_handle)) {
+                up(&inode->i_sem);
+                rc = PTR_ERR(oti->oti_handle);
+                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+                       "error starting transaction: rc = %d\n", rc);
+                oti->oti_handle = NULL;
+                GOTO(cleanup, rc);
+        }
+        /* have to call fsfilt_commit() from this point on */
+
+        fsfilt_check_slow(now, obd_timeout, "brw_start");
  
          iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
+        /* filter_direct_io drops i_sem */
          rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, dreq, exp, &iattr,
                                oti, NULL);
-        rc = filter_finish_transno(exp, oti, rc);
-
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+        if (rc == 0)
+                obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
  
+        fsfilt_check_slow(now, obd_timeout, "direct_io");
  
          err = fsfilt_commit(obd, obd->u.filter.fo_sb, inode, oti->oti_handle,
                              obd_sync_filter);
          if (err)
                  rc = err;
  
-        if (obd_sync_filter)
+        if (obd_sync_filter && !err)
                  LASSERT(oti->oti_transno <= obd->obd_last_committed);
  
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "commitrw commit");
  
  cleanup:
          filter_grant_commit(exp, niocount, res);
diff --git a/lustre/obdfilter/filter_lvb.c b/lustre/obdfilter/filter_lvb.c

index 0b569c3..c54b1d7 100644 (file)
--- a/lustre/obdfilter/filter_lvb.c
+++ b/lustre/obdfilter/filter_lvb.c
@@ -153,13 +153,18 @@ static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m,
                                 lvb->lvb_mtime, new->lvb_mtime);
                          lvb->lvb_mtime = new->lvb_mtime;
                  }
-                if (new->lvb_blocks > lvb->lvb_blocks || !increase) {
-                        CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb blocks: "
+                if (new->lvb_atime > lvb->lvb_atime || !increase) {
+                        CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb atime: "
                                 LPU64" -> "LPU64"\n", res->lr_name.name[0],
-                               lvb->lvb_blocks, new->lvb_blocks);
-                        lvb->lvb_blocks = new->lvb_blocks;
+                               lvb->lvb_atime, new->lvb_atime);
+                        lvb->lvb_atime = new->lvb_atime;
+                }
+                if (new->lvb_ctime > lvb->lvb_ctime || !increase) {
+                        CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb ctime: "
+                               LPU64" -> "LPU64"\n", res->lr_name.name[0],
+                               lvb->lvb_ctime, new->lvb_ctime);
+                        lvb->lvb_ctime = new->lvb_ctime;
                  }
-                GOTO(out, rc = 0);
          }
  
          /* Update the LVB from the disk inode */
@@ -194,6 +199,18 @@ static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m,
                         lvb->lvb_mtime, LTIME_S(dentry->d_inode->i_mtime));
                  lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime);
          }
+        if (LTIME_S(dentry->d_inode->i_atime) > lvb->lvb_atime || !increase) {
+                CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb atime from disk: "
+                       LPU64" -> %lu\n", res->lr_name.name[0],
+                       lvb->lvb_atime, LTIME_S(dentry->d_inode->i_atime));
+                lvb->lvb_atime = LTIME_S(dentry->d_inode->i_atime);
+        }
+        if (LTIME_S(dentry->d_inode->i_ctime) > lvb->lvb_ctime || !increase) {
+                CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb ctime from disk: "
+                       LPU64" -> %lu\n", res->lr_name.name[0],
+                       lvb->lvb_ctime, LTIME_S(dentry->d_inode->i_ctime));
+                lvb->lvb_ctime = LTIME_S(dentry->d_inode->i_ctime);
+        }
          CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb blocks from disk: "
                 LPU64" -> %lu\n", res->lr_name.name[0],
                 lvb->lvb_blocks, dentry->d_inode->i_blocks);
diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c

index fdbdfde..ef632a9 100644 (file)
--- a/lustre/obdfilter/lproc_obdfilter.c
+++ b/lustre/obdfilter/lproc_obdfilter.c
@@ -240,7 +240,7 @@ static int filter_brw_stats_seq_show(struct seq_file *seq, void *v)
  
          /* this sampling races with updates */
  
-        seq_printf(seq, "snapshot_time:         %lu:%lu (secs:usecs)\n",
+        seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
                     now.tv_sec, now.tv_usec);
  
          seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c

index 4ac9a77..4f0035d 100644 (file)
--- a/lustre/osc/lproc_osc.c
+++ b/lustre/osc/lproc_osc.c
@@ -166,41 +166,6 @@ int osc_rd_cur_grant_bytes(char *page, char **start, off_t off, int count,
          return rc;
  }
  
-int osc_rd_create_low_wm(char *page, char **start, off_t off, int count,
-                         int *eof, void *data)
-{
-        struct obd_device *obd = data;
-
-        if (obd == NULL)
-                return 0;
-
-        return snprintf(page, count, "%d\n",
-                        obd->u.cli.cl_oscc.oscc_kick_barrier);
-}
-
-int osc_wr_create_low_wm(struct file *file, const char *buffer,
-                         unsigned long count, void *data)
-{
-        struct obd_device *obd = data;
-        int val, rc;
-
-        if (obd == NULL)
-                return 0;
-
-        rc = lprocfs_write_helper(buffer, count, &val);
-        if (rc)
-                return rc;
-
-        if (val < 0)
-                return -ERANGE;
-
-        spin_lock(&obd->obd_dev_lock);
-        obd->u.cli.cl_oscc.oscc_kick_barrier = val;
-        spin_unlock(&obd->obd_dev_lock);
-
-        return count;
-}
-
  int osc_rd_create_count(char *page, char **start, off_t off, int count,
                          int *eof, void *data)
  {
@@ -276,7 +241,6 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
          { "max_dirty_mb", osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 },
          { "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 },
          { "cur_grant_bytes", osc_rd_cur_grant_bytes, 0, 0 },
-        {"create_low_watermark", osc_rd_create_low_wm, osc_wr_create_low_wm, 0},
          { "create_count", osc_rd_create_count, osc_wr_create_count, 0 },
          { "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 },
          { "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 },
@@ -307,7 +271,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
  
          spin_lock_irqsave(&cli->cl_loi_list_lock, flags);
  
-        seq_printf(seq, "snapshot_time:         %lu:%lu (secs:usecs)\n",
+        seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
                     now.tv_sec, now.tv_usec);
  
          seq_printf(seq, "read RPCs in flight:  %d\n",
diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c

index 557923b..184df58 100644 (file)
--- a/lustre/osc/osc_create.c
+++ b/lustre/osc/osc_create.c
@@ -56,8 +56,7 @@
  #include <linux/obd_class.h>
  #include "osc_internal.h"
  
-static int osc_interpret_create(struct ptlrpc_request *req, void *data,
-                                int rc)
+static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc)
  {
          struct osc_creator *oscc;
          struct ost_body *body = NULL;
@@ -73,19 +72,32 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data,
          oscc = req->rq_async_args.pointer_arg[0];
          spin_lock(&oscc->oscc_lock);
          oscc->oscc_flags &= ~OSCC_FLAG_CREATING;
-        if (body)
-                oscc->oscc_last_id = body->oa.o_id;
-        if (rc == -ENOSPC) {
+        if (rc == -ENOSPC || rc == -EROFS) {
                  oscc->oscc_flags |= OSCC_FLAG_NOSPC;
+                if (body && rc == -ENOSPC) {
+                        oscc->oscc_grow_count = OST_MIN_PRECREATE;
+                        oscc->oscc_last_id = body->oa.o_id;
+                }
                  spin_unlock(&oscc->oscc_lock);
                  DEBUG_REQ(D_INODE, req, "OST out of space, flagging");
          } else if (rc != 0 && rc != -EIO) {
                  oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
+                oscc->oscc_grow_count = OST_MIN_PRECREATE;
                  spin_unlock(&oscc->oscc_lock);
                  DEBUG_REQ(D_ERROR, req,
                            "unknown rc %d from async create: failing oscc", rc);
                  ptlrpc_fail_import(req->rq_import, req->rq_import_generation);
          } else {
+                if (rc == 0) {
+                        oscc->oscc_flags &= ~OSCC_FLAG_LOW;
+                        if (body) {
+                                int diff = body->oa.o_id - oscc->oscc_last_id;
+                                if (diff != oscc->oscc_grow_count)
+                                        oscc->oscc_grow_count =
+                                                max(diff/3, OST_MIN_PRECREATE);
+                                oscc->oscc_last_id = body->oa.o_id;
+                        }
+                }
                  spin_unlock(&oscc->oscc_lock);
          }
  
@@ -104,6 +116,17 @@ static int oscc_internal_create(struct osc_creator *oscc)
          ENTRY;
  
          spin_lock(&oscc->oscc_lock);
+        if (oscc->oscc_grow_count < OST_MAX_PRECREATE &&
+            !(oscc->oscc_flags & (OSCC_FLAG_LOW | OSCC_FLAG_RECOVERING)) &&
+            (__s64)(oscc->oscc_last_id - oscc->oscc_next_id) <=
+                   (oscc->oscc_grow_count / 4 + 1)) {
+                oscc->oscc_flags |= OSCC_FLAG_LOW;
+                oscc->oscc_grow_count *= 2;
+        }
+
+        if (oscc->oscc_grow_count > OST_MAX_PRECREATE / 2)
+                oscc->oscc_grow_count = OST_MAX_PRECREATE / 2;
+
          if (oscc->oscc_flags & OSCC_FLAG_CREATING ||
              oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
                  spin_unlock(&oscc->oscc_lock);
@@ -185,7 +208,7 @@ static int oscc_precreate(struct osc_creator *oscc, int wait)
          int rc = 0;
          ENTRY;
  
-        if (oscc_has_objects(oscc, oscc->oscc_kick_barrier))
+        if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2))
                  RETURN(0);
  
          if (!wait)
@@ -203,7 +226,7 @@ static int oscc_precreate(struct osc_creator *oscc, int wait)
          RETURN(rc);
  }
  
-int oscc_recovering(struct osc_creator *oscc) 
+int oscc_recovering(struct osc_creator *oscc)
  {
          int recov = 0;
  
@@ -263,8 +286,8 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                  oa->o_valid |= OBD_MD_FLID;
                  oa->o_id = oscc->oscc_next_id - 1;
  
-                CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", 
-                       exp->exp_obd->obd_name, oa->o_id);
+                CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n",
+                       oscc->oscc_obd->obd_name, oa->o_id);
  
                  rc = osc_real_create(exp, oa, ea, NULL);
                  if (oscc->oscc_obd == NULL) {
@@ -279,14 +302,14 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                                  oscc->oscc_flags |= OSCC_FLAG_NOSPC;
                          oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
                          oscc->oscc_last_id = oa->o_id;
-
-                        CDEBUG(D_HA, "%s: oscc recovery finished: %d\n", 
-                               exp->exp_obd->obd_name, rc);
+                        
+                        CDEBUG(D_HA, "%s: oscc recovery finished: %d\n",
+                               oscc->oscc_obd->obd_name, rc);
                          wake_up(&oscc->oscc_waitq);
                          
                  } else {
-                        CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", 
-                               exp->exp_obd->obd_name, rc);
+                        CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n",
+                               oscc->oscc_obd->obd_name, rc);
                  }
                  spin_unlock(&oscc->oscc_lock);
  
@@ -323,6 +346,11 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                  }
                  
                  spin_lock(&oscc->oscc_lock);
+                if (oscc->oscc_flags & OSCC_FLAG_EXITING) {
+                        spin_unlock(&oscc->oscc_lock);
+                        break;
+                }
+
                  if (oscc->oscc_last_id >= oscc->oscc_next_id) {
                          memcpy(oa, &oscc->oscc_oa, sizeof(*oa));
                          oa->o_id = oscc->oscc_next_id;
@@ -339,7 +367,7 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                  }
                  spin_unlock(&oscc->oscc_lock);
                  rc = oscc_precreate(oscc, try_again);
-                if (rc == -EIO)
+                if (rc)
                          break;
          }
  
@@ -367,8 +395,8 @@ void oscc_init(struct obd_device *obd)
          spin_lock_init(&oscc->oscc_lock);
          oscc->oscc_obd = obd;
          oscc->oscc_kick_barrier = 100;
-        oscc->oscc_grow_count = 36;
          oscc->oscc_max_grow_count = 2000;
+        oscc->oscc_grow_count = OST_MIN_PRECREATE;
  
          oscc->oscc_next_id = 2;
          oscc->oscc_last_id = 1;
diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h

index 592c3d0..b3d69a9 100644 (file)
--- a/lustre/osc/osc_internal.h
+++ b/lustre/osc/osc_internal.h
@@ -37,6 +37,10 @@ struct osc_async_page {
          void                   *oap_caller_data;
  };
  
+#define OAP_FROM_COOKIE(c)                                                      \
+        (LASSERT(((struct osc_async_page *)(c))->oap_magic == OAP_MAGIC),       \
+         (struct osc_async_page *)(c))
+
  struct osc_cache_waiter {
          struct list_head        ocw_entry;
          wait_queue_head_t       ocw_waitq;
@@ -44,10 +48,12 @@ struct osc_cache_waiter {
          int                     ocw_rc;
  };
  
-#define OSCC_FLAG_RECOVERING 1
-#define OSCC_FLAG_CREATING   2
-#define OSCC_FLAG_NOSPC      4 /* can't create more objects on this OST */
-#define OSCC_FLAG_SYNC_IN_PROGRESS  8 /* only allow one thread to sync */
+#define OSCC_FLAG_RECOVERING         0x01
+#define OSCC_FLAG_CREATING           0x02
+#define OSCC_FLAG_NOSPC              0x04 /* can't create more objects on OST */
+#define OSCC_FLAG_SYNC_IN_PROGRESS   0x08 /* only allow one thread to sync */
+#define OSCC_FLAG_LOW                0x10
+#define OSCC_FLAG_EXITING            0x20
  
  int osc_create(struct obd_export *exp, struct obdo *oa,
                struct lov_stripe_md **ea, struct obd_trans_info *oti);
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index b41258e..880a59d 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -1037,7 +1037,7 @@ static obd_count check_elan_limit(struct brw_page *pg, obd_count pages)
  }
  
  static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
-                   struct lov_stripe_md *md, obd_count page_count,
+                   struct lov_stripe_md *lsm, obd_count page_count,
                     struct brw_page *pga, struct obd_trans_info *oti)
  {
          ENTRY;
@@ -1064,7 +1064,7 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
                  sort_brw_pages(pga, pages_per_brw);
                  pages_per_brw = check_elan_limit(pga, pages_per_brw);
  
-                rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, pga);
+                rc = osc_brw_internal(cmd, exp, oa, lsm, pages_per_brw, pga);
  
                  if (rc != 0)
                          RETURN(rc);
@@ -1076,7 +1076,7 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
  }
  
  static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa,
-                         struct lov_stripe_md *md, obd_count page_count,
+                         struct lov_stripe_md *lsm, obd_count page_count,
                           struct brw_page *pga, struct ptlrpc_request_set *set,
                           struct obd_trans_info *oti)
  {
@@ -1104,7 +1104,7 @@ static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa,
                  sort_brw_pages(pga, pages_per_brw);
                  pages_per_brw = check_elan_limit(pga, pages_per_brw);
  
-                rc = async_internal(cmd, exp, oa, md, pages_per_brw, pga, set);
+                rc = async_internal(cmd, exp, oa, lsm, pages_per_brw, pga, set);
  
                  if (rc != 0)
                          RETURN(rc);
@@ -1819,14 +1819,6 @@ int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
          RETURN(0);
  }
  
-struct osc_async_page *oap_from_cookie(void *cookie)
-{
-        struct osc_async_page *oap = cookie;
-        if (oap->oap_magic != OAP_MAGIC)
-                return ERR_PTR(-EINVAL);
-        return oap;
-};
-
  static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm,
                                struct lov_oinfo *loi, void *cookie,
                                int cmd, obd_off off, int count,
@@ -1838,9 +1830,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm,
          int rc;
          ENTRY;
  
-        oap = oap_from_cookie(cookie);
-        if (IS_ERR(oap))
-                RETURN(PTR_ERR(oap));
+        oap = OAP_FROM_COOKIE(cookie);
  
          if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
                  RETURN(-EIO);
@@ -1902,9 +1892,7 @@ static int osc_set_async_flags(struct obd_export *exp,
          int rc = 0;
          ENTRY;
  
-        oap = oap_from_cookie(cookie);
-        if (IS_ERR(oap))
-                RETURN(PTR_ERR(oap));
+        oap = OAP_FROM_COOKIE(cookie);
  
          if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
                  RETURN(-EIO);
@@ -1956,9 +1944,7 @@ static int osc_queue_group_io(struct obd_export *exp, struct lov_stripe_md *lsm,
          struct loi_oap_pages *lop;
          ENTRY;
  
-        oap = oap_from_cookie(cookie);
-        if (IS_ERR(oap))
-                RETURN(PTR_ERR(oap));
+        oap = OAP_FROM_COOKIE(cookie);
  
          if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
                  RETURN(-EIO);
@@ -2045,9 +2031,7 @@ static int osc_teardown_async_page(struct obd_export *exp,
          int rc = 0;
          ENTRY;
  
-        oap = oap_from_cookie(cookie);
-        if (IS_ERR(oap))
-                RETURN(PTR_ERR(oap));
+        oap = OAP_FROM_COOKIE(cookie);
  
          if (loi == NULL)
                  loi = &lsm->lsm_oinfo[0];
@@ -2377,6 +2361,8 @@ static void osc_set_data_with_check(struct lustre_handle *lockh, void *data)
          if (lock->l_ast_data && lock->l_ast_data != data) {
                  struct inode *new_inode = data;
                  struct inode *old_inode = lock->l_ast_data;
+                if (!(old_inode->i_state & I_FREEING))
+                        LDLM_ERROR(lock, "inconsistent l_ast_data found");
                  LASSERTF(old_inode->i_state & I_FREEING,
                           "Found existing inode %p/%lu/%u state %lu in lock: "
                           "setting data to %p/%lu/%u\n", old_inode,
@@ -2490,6 +2476,30 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
                  }
          }
  
+        if (mode == LCK_PW) {
+                rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type,
+                                     policy, LCK_PR, lockh);
+                if (rc == 1) {
+                        rc = ldlm_cli_convert(lockh, mode, flags);
+                        if (!rc) {
+                                /* Update readers/writers accounting */
+                                ldlm_lock_addref(lockh, LCK_PW);
+                                ldlm_lock_decref(lockh, LCK_PR);
+                                osc_set_data_with_check(lockh, data);
+                                RETURN(ELDLM_OK);
+                        }
+                        /* If the conversion failed, we need to drop refcount
+                           on matched lock before we get new one */
+                        /* XXX Won't it save us some efforts if we cancel PR
+                           lock here? We are going to take PW lock anyway and it
+                           will invalidate PR lock */
+                        ldlm_lock_decref(lockh, LCK_PR);
+                        if (rc != EDEADLOCK) {
+                                RETURN(rc);
+                        }
+                }
+        }
+
   no_match:
          if (*flags & LDLM_FL_HAS_INTENT) {
                  int size[2] = {0, sizeof(struct ldlm_request)};
@@ -2881,6 +2891,16 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
                  RETURN(0);
          }
  
+        if (keylen == strlen("async") && memcmp(key, "async", keylen) == 0) {
+                struct client_obd *cl = &obd->u.cli;
+                if (vallen != sizeof(int))
+                        RETURN(-EINVAL);
+                cl->cl_async = *(int *)val;
+                CDEBUG(D_HA, "%s: set async = %d\n",
+                       obd->obd_name, cl->cl_async);
+                RETURN(0);
+        }
+
          if (keylen == strlen("sec") && memcmp(key, "sec", keylen) == 0) {
                  struct client_obd *cli = &exp->exp_obd->u.cli;
  
@@ -2906,8 +2926,7 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
                  RETURN(-EINVAL);
          }
  
-        if (keylen < strlen("mds_conn") ||
-            memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+        if (keylen < strlen("mds_conn") || memcmp(key, "mds_conn", keylen) != 0)
                  RETURN(-EINVAL);
  
          ctxt = llog_get_context(&exp->exp_obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
@@ -3105,6 +3124,7 @@ static int osc_setup(struct obd_device *obd, obd_count len, void *buf)
  
  static int osc_cleanup(struct obd_device *obd, int flags)
  {
+        struct osc_creator *oscc = &obd->u.cli.cl_oscc;
          int rc;
  
          rc = ldlm_cli_cancel_unused(obd->obd_namespace, NULL,
@@ -3112,6 +3132,11 @@ static int osc_cleanup(struct obd_device *obd, int flags)
          if (rc)
                  RETURN(rc);
  
+        spin_lock(&oscc->oscc_lock);
+        oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
+        oscc->oscc_flags |= OSCC_FLAG_EXITING;
+        spin_unlock(&oscc->oscc_lock);
+
          rc = client_obd_cleanup(obd, flags);
          ptlrpcd_decref();
          RETURN(rc);
diff --git a/lustre/ost/lproc_ost.c b/lustre/ost/lproc_ost.c

index 9773af1..beef27a 100644 (file)
--- a/lustre/ost/lproc_ost.c
+++ b/lustre/ost/lproc_ost.c
@@ -52,7 +52,7 @@ static int ost_stimes_seq_show(struct seq_file *seq, void *v)
  
          spin_lock(&ost->ost_lock);
  
-        seq_printf(seq, "snapshot_time:         %lu:%lu (secs:usecs)\n",
+        seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
                     now.tv_sec, now.tv_usec);
  
          seq_printf(seq, "\nread rpc service time: (rpcs, average ms)\n");
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c

index d18919a..42d3156 100644 (file)
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -379,9 +379,6 @@ static void ost_stime_record(struct ptlrpc_request *req, struct timeval *start,
         } 
  }
  
-static char str[PTL_NALFMT_SIZE];
-
-
  static int ost_brw_read(struct ptlrpc_request *req)
  {
          struct ptlrpc_bulk_desc *desc;
@@ -458,6 +455,9 @@ static int ost_brw_read(struct ptlrpc_request *req)
          if (rc != 0)
                  GOTO(out_bulk, rc);
  
+        /* We're finishing using body->oa as an input variable */
+        body->oa.o_valid = 0;
+
          nob = 0;
          for (i = 0; i < npages; i++) {
                  int page_rc = local_nb[i].rc;
@@ -549,17 +549,17 @@ static int ost_brw_read(struct ptlrpc_request *req)
                  }
                  if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
                          CERROR("bulk IO comms error: "
-                               "evicting %s@%s nid %s\n",
+                               "evicting %s@%s id %s\n",
                                 req->rq_export->exp_client_uuid.uuid,
                                 req->rq_export->exp_connection->c_remote_uuid.uuid,
-                               ptlrpc_peernid2str(&req->rq_peer, str));
+                               req->rq_peerstr);
                          ptlrpc_fail_export(req->rq_export);
                  } else {
                          CERROR("ignoring bulk IO comms error: "
-                               "client reconnected %s@%s nid %s\n",  
+                               "client reconnected %s@%s id %s\n",  
                                 req->rq_export->exp_client_uuid.uuid,
                                 req->rq_export->exp_connection->c_remote_uuid.uuid,
-                               ptlrpc_peernid2str(&req->rq_peer, str));
+                               req->rq_peerstr);
                  }
          }
  
@@ -702,18 +702,16 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                  obd_count cksum = ost_checksum_bulk(desc);
  
                  if (client_cksum != cksum) {
-                        CERROR("Bad checksum: client %x, server %x NID %s\n",
+                        CERROR("Bad checksum: client %x, server %x id %s\n",
                                 client_cksum, cksum,
-                               ptlrpc_peernid2str(&req->rq_peer, str));
+                               req->rq_peerstr);
                          cksum_counter = 1;
                          repbody->oa.o_cksum = cksum;
                  } else {
                          cksum_counter++;
                          if ((cksum_counter & (-cksum_counter)) == cksum_counter)
                                  CWARN("Checksum %u from NID %s: %x OK\n",         
-                                      cksum_counter,
-                                      ptlrpc_peernid2str(&req->rq_peer, str),
-                                      cksum);
+                                      cksum_counter, req->rq_peerstr, cksum);
                  }
          }
  #endif
@@ -770,19 +768,19 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                          lustre_free_reply_state (req->rq_reply_state);
                  }
                  if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
-                        CERROR("%s: bulk IO comm error evicting %s@%s NID %s\n",
+                        CERROR("%s: bulk IO comm error evicting %s@%s id %s\n",
                                 req->rq_export->exp_obd->obd_name,
                                 req->rq_export->exp_client_uuid.uuid,
                                 req->rq_export->exp_connection->c_remote_uuid.uuid,
-                               ptlrpc_peernid2str(&req->rq_peer, str));
+                               req->rq_peerstr);
                          ptlrpc_fail_export(req->rq_export);
                  } else {
                          CERROR("ignoring bulk IO comms error: "
-                               "client reconnected %s@%s nid %s\n",
+                               "client reconnected %s@%s id %s\n",
                                 req->rq_export->exp_client_uuid.uuid,
                                 req->rq_export->exp_connection->c_remote_uuid.uuid,
-                               ptlrpc_peernid2str(&req->rq_peer, str));
-                }        
+                               req->rq_peerstr);
+                }
          }
          RETURN(rc);
  }
@@ -916,7 +914,7 @@ static int ost_get_info(struct obd_export *exp, struct ptlrpc_request *req)
  }
  
  static int ost_llog_handle_connect(struct obd_export *exp,
-                struct ptlrpc_request *req)
+                                  struct ptlrpc_request *req)
  {
          struct llogd_conn_body *body;
          int rc;
@@ -1020,10 +1018,9 @@ int ost_msg_check_version(struct lustre_msg *msg)
  
  int ost_handle(struct ptlrpc_request *req)
  {
-        struct obd_trans_info trans_info = { 0, };
-        struct obd_trans_info *oti = &trans_info;
          int should_process, fail = OBD_FAIL_OST_ALL_REPLY_NET, rc = 0;
-        struct obd_export *exp = NULL;
+        struct obd_trans_info *oti = NULL;
+        struct obd_device *obd = NULL;
          ENTRY;
  
          LASSERT(current->journal_info == NULL);
@@ -1038,31 +1035,28 @@ int ost_handle(struct ptlrpc_request *req)
          if (req->rq_reqmsg->opc == SEC_INIT ||
              req->rq_reqmsg->opc == SEC_INIT_CONTINUE ||
              req->rq_reqmsg->opc == SEC_FINI) {
-                GOTO(out, rc = 0);
+                RETURN(0);
          }
  
          /* XXX identical to MDS */
          if (req->rq_reqmsg->opc != OST_CONNECT) {
-                struct obd_device *obd;
                  int recovering;
  
-                exp = req->rq_export;
-
-                if (exp == NULL) {
+                if (req->rq_export == NULL) {
                          CDEBUG(D_HA,"operation %d on unconnected OST from %s\n",
                                 req->rq_reqmsg->opc,
-                               ptlrpc_peernid2str(&req->rq_peer, str));
+                               req->rq_peerstr);
                          req->rq_status = -ENOTCONN;
-                        GOTO(out, rc = -ENOTCONN);
+                        GOTO(out_check_req, rc = -ENOTCONN);
                  }
  
-                obd = exp->exp_obd;
+                obd = req->rq_export->exp_obd;
  
                  /* Check for aborted recovery. */
                  spin_lock_bh(&obd->obd_processing_task_lock);
                  recovering = obd->obd_recovering;
                  spin_unlock_bh(&obd->obd_processing_task_lock);
-                if (recovering) {
+               if (recovering) {
                          rc = ost_filter_recovery_request(req, obd,
                                                           &should_process);
                          if (rc || !should_process)
@@ -1075,100 +1069,100 @@ int ost_handle(struct ptlrpc_request *req)
                  }
          }
  
+       OBD_ALLOC(oti, sizeof(*oti));
+       if (oti == NULL)
+               RETURN(-ENOMEM);
+               
          oti_init(oti, req);
  
          switch (req->rq_reqmsg->opc) {
          case OST_CONNECT: {
                  CDEBUG(D_INODE, "connect\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_CONNECT_NET, out_free_oti, rc = 0);
                  rc = target_handle_connect(req);
+                if (!rc)
+                        obd = req->rq_export->exp_obd;
                  break;
          }
          case OST_DISCONNECT:
                  CDEBUG(D_INODE, "disconnect\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_DISCONNECT_NET, out_free_oti, rc = 0);
                  rc = target_handle_disconnect(req);
                  break;
          case OST_CREATE:
                  CDEBUG(D_INODE, "create\n");
-                if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC))
-                        GOTO(out, rc = -ENOSPC);
-                if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS))
-                        GOTO(out, rc = -EROFS);
-                OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
-                rc = ost_create(exp, req, oti);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_ENOSPC, out_check_req, rc = -ENOSPC);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_CREATE_NET, out_free_oti, rc = 0);
+                rc = ost_create(req->rq_export, req, oti);
                  break;
          case OST_DESTROY:
                  CDEBUG(D_INODE, "destroy\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
-                if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS))
-                        GOTO(out, rc = -EROFS);
-                rc = ost_destroy(exp, req, oti);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_DESTROY_NET, out_free_oti, rc = 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS);
+                rc = ost_destroy(req->rq_export, req, oti);
                  break;
          case OST_GETATTR:
                  CDEBUG(D_INODE, "getattr\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
-                rc = ost_getattr(exp, req);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_GETATTR_NET, out_free_oti, rc = 0);
+                rc = ost_getattr(req->rq_export, req);
                  break;
          case OST_SETATTR:
                  CDEBUG(D_INODE, "setattr\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
-                rc = ost_setattr(exp, req, oti);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_SETATTR_NET, out_free_oti, rc = 0);
+                rc = ost_setattr(req->rq_export, req, oti);
                  break;
          case OST_WRITE:
                  CDEBUG(D_INODE, "write\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
-                if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC))
-                        GOTO(out, rc = -ENOSPC);
-                if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS))
-                        GOTO(out, rc = -EROFS);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_ENOSPC, out_check_req, rc = -ENOSPC);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS);
                  rc = ost_brw_write(req, oti);
                  LASSERT(current->journal_info == NULL);
                  /* ost_brw sends its own replies */
-                RETURN(rc);
+                GOTO(out_free_oti, rc);
          case OST_READ:
                  CDEBUG(D_INODE, "read\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0);
                  rc = ost_brw_read(req);
                  LASSERT(current->journal_info == NULL);
                  /* ost_brw sends its own replies */
-                RETURN(rc);
+                GOTO(out_free_oti, rc);
          case OST_SAN_READ:
                  CDEBUG(D_INODE, "san read\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0);
                  rc = ost_san_brw(req, OBD_BRW_READ);
                  /* ost_san_brw sends its own replies */
-                RETURN(rc);
+                GOTO(out_free_oti, rc);
          case OST_SAN_WRITE:
                  CDEBUG(D_INODE, "san write\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0);
                  rc = ost_san_brw(req, OBD_BRW_WRITE);
                  /* ost_san_brw sends its own replies */
-                RETURN(rc);
+                GOTO(out_free_oti, rc);
          case OST_PUNCH:
                  CDEBUG(D_INODE, "punch\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
-                if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS))
-                        GOTO(out, rc = -EROFS);
-                rc = ost_punch(exp, req, oti);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_PUNCH_NET, out_free_oti, rc = 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS);
+                rc = ost_punch(req->rq_export, req, oti);
                  break;
          case OST_STATFS:
                  CDEBUG(D_INODE, "statfs\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_STATFS_NET, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_STATFS_NET, out_free_oti, rc = 0);
                  rc = ost_statfs(req);
                  break;
          case OST_SYNC:
                  CDEBUG(D_INODE, "sync\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OST_SYNC_NET, 0);
-                rc = ost_sync(exp, req);
+                OBD_FAIL_GOTO(OBD_FAIL_OST_SYNC_NET, out_free_oti, rc = 0);
+                rc = ost_sync(req->rq_export, req);
                  break;
          case OST_SET_INFO:
                  DEBUG_REQ(D_INODE, req, "set_info");
-                rc = ost_set_info(exp, req);
+                rc = ost_set_info(req->rq_export, req);
                  break;
          case OST_GET_INFO:
                  DEBUG_REQ(D_INODE, req, "get_info");
-                rc = ost_get_info(exp, req);
+                rc = ost_get_info(req->rq_export, req);
                  break;
          case OBD_PING:
                  DEBUG_REQ(D_INODE, req, "ping");
@@ -1177,24 +1171,24 @@ int ost_handle(struct ptlrpc_request *req)
          /* FIXME - just reply status */
          case LLOG_ORIGIN_CONNECT:
                  DEBUG_REQ(D_INODE, req, "log connect\n");
-                rc = ost_llog_handle_connect(exp, req); 
+                rc = ost_llog_handle_connect(req->rq_export, req); 
                  req->rq_status = rc;
                  rc = lustre_pack_reply(req, 0, NULL, NULL);
                  if (rc)
-                        RETURN(rc);
-                RETURN(ptlrpc_reply(req));
+                        GOTO(out_free_oti, rc);
+                GOTO(out_free_oti, rc = ptlrpc_reply(req));
          case OBD_LOG_CANCEL:
                  CDEBUG(D_INODE, "log cancel\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_OBD_LOG_CANCEL_NET, out_free_oti, rc = 0);
                  rc = llog_origin_handle_cancel(req);
                  req->rq_status = rc;
                  rc = lustre_pack_reply(req, 0, NULL, NULL);
                  if (rc)
-                        RETURN(rc);
-                RETURN(ptlrpc_reply(req));
+                        GOTO(out_free_oti, rc);
+                GOTO(out_free_oti, rc = ptlrpc_reply(req));
          case LDLM_ENQUEUE:
                  CDEBUG(D_INODE, "enqueue\n");
-                OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_LDLM_ENQUEUE, out_free_oti, rc = 0);
                  rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
                                           ldlm_server_blocking_ast,
                                           ldlm_server_glimpse_ast);
@@ -1202,12 +1196,12 @@ int ost_handle(struct ptlrpc_request *req)
                  break;
          case LDLM_CONVERT:
                  CDEBUG(D_INODE, "convert\n");
-                OBD_FAIL_RETURN(OBD_FAIL_LDLM_CONVERT, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_LDLM_CONVERT, out_free_oti, rc = 0);
                  rc = ldlm_handle_convert(req);
                  break;
          case LDLM_CANCEL:
                  CDEBUG(D_INODE, "cancel\n");
-                OBD_FAIL_RETURN(OBD_FAIL_LDLM_CANCEL, 0);
+                OBD_FAIL_GOTO(OBD_FAIL_LDLM_CANCEL, out_free_oti, rc = 0);
                  rc = ldlm_handle_cancel(req);
                  break;
          case LDLM_BL_CALLBACK:
@@ -1219,7 +1213,7 @@ int ost_handle(struct ptlrpc_request *req)
                  CERROR("Unexpected opcode %d\n", req->rq_reqmsg->opc);
                  req->rq_status = -ENOTSUPP;
                  rc = ptlrpc_error(req);
-                RETURN(rc);
+                GOTO(out_free_oti, rc);
          }
  
          LASSERT(current->journal_info == NULL);
@@ -1227,7 +1221,6 @@ int ost_handle(struct ptlrpc_request *req)
          EXIT;
          /* If we're DISCONNECTing, the export_data is already freed */
          if (!rc && req->rq_reqmsg->opc != OST_DISCONNECT) {
-                struct obd_device *obd  = req->rq_export->exp_obd;
                  if (!obd->obd_no_transno) {
                          req->rq_repmsg->last_committed =
                                  obd->obd_last_committed;
@@ -1239,13 +1232,12 @@ int ost_handle(struct ptlrpc_request *req)
                         obd->obd_last_committed, req->rq_xid);
          }
  
-out:
+out_check_req:
          if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) {
-                struct obd_device *obd = req->rq_export->exp_obd;
-
                  if (obd && obd->obd_recovering) {
                          DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
-                        return target_queue_final_reply(req, rc);
+                        rc = target_queue_final_reply(req, rc);
+                        GOTO(out_free_oti, rc);
                  }
                  /* Lost a race with recovery; let the error path DTRT. */
                  rc = req->rq_status = -ENOTCONN;
@@ -1253,9 +1245,13 @@ out:
  
          if (!rc)
                  oti_to_request(oti, req);
-
          target_send_reply(req, rc, fail);
-        return 0;
+        rc = 0;
+        
+out_free_oti:
+        if (oti)
+                OBD_FREE(oti, sizeof(*oti));
+        return rc;
  }
  EXPORT_SYMBOL(ost_handle);
  
@@ -1293,7 +1289,7 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf)
  
          ost->ost_service =
                  ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE,
-                                OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
+                                OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, 30000,
                                  ost_handle, "ost",
                                  obd->obd_proc_entry);
          if (ost->ost_service == NULL) {
@@ -1308,7 +1304,7 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf)
  
          ost->ost_create_service =
                  ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE,
-                                OST_CREATE_PORTAL, OSC_REPLY_PORTAL,
+                                OST_CREATE_PORTAL, OSC_REPLY_PORTAL, 30000,
                                  ost_handle, "ost_create",
                                  obd->obd_proc_entry);
          if (ost->ost_create_service == NULL) {
diff --git a/lustre/ptlbd/server.c b/lustre/ptlbd/server.c

index e54e5b3..99ddeaa 100644 (file)
--- a/lustre/ptlbd/server.c
+++ b/lustre/ptlbd/server.c
@@ -63,7 +63,7 @@ static int ptlbd_sv_setup(struct obd_device *obd, obd_count len, void *buf)
  
          ptlbd->ptlbd_service =
                  ptlrpc_init_svc(PTLBD_NBUFS, PTLBD_BUFSIZE, PTLBD_MAXREQSIZE,
-                                PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL,
+                                PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL, 30000,
                                  ptlbd_handle, "ptlbd_sv",
                                  obd->obd_proc_entry);
  
diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am

index f2105e8..16db713 100644 (file)
--- a/lustre/ptlrpc/autoMakefile.am
+++ b/lustre/ptlrpc/autoMakefile.am
@@ -3,13 +3,13 @@
  # This code is issued under the GNU General Public License.
  # See the file COPYING in this distribution
  
-LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c          \
+LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c  \
         $(top_srcdir)/lustre/ldlm/ldlm_lock.c           \
         $(top_srcdir)/lustre/ldlm/ldlm_resource.c       \
         $(top_srcdir)/lustre/ldlm/ldlm_lib.c            \
         $(top_srcdir)/lustre/ldlm/ldlm_plain.c          \
         $(top_srcdir)/lustre/ldlm/ldlm_extent.c         \
-       $(top_srcdir)/lustre/ldlm/ldlm_request.c                \
+       $(top_srcdir)/lustre/ldlm/ldlm_request.c        \
         $(top_srcdir)/lustre/ldlm/ldlm_lockd.c          \
         $(top_srcdir)/lustre/ldlm/ldlm_internal.h       \
         $(top_srcdir)/lustre/ldlm/ldlm_inodebits.c
@@ -33,5 +33,4 @@ modulefs_DATA = ptlrpc$(KMODEXT)
  endif # MODULES
  
  MOSTLYCLEANFILES = *.o *.ko *.mod.c ldlm_*.c l_lock.c
-
  DIST_SOURCES = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 1f6127e..8bbbf62 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -542,12 +542,12 @@ static int after_reply(struct ptlrpc_request *req)
  
          if (req->rq_import->imp_replayable) {
                  spin_lock_irqsave(&imp->imp_lock, flags);
-                if (req->rq_replay || req->rq_transno != 0)
+                if (req->rq_transno != 0)
                          ptlrpc_retain_replayable_request(req, imp);
                  else if (req->rq_commit_cb != NULL) {
-                       spin_unlock_irqrestore(&imp->imp_lock, flags);
+                        spin_unlock_irqrestore(&imp->imp_lock, flags);
                          req->rq_commit_cb(req);
-                       spin_lock_irqsave(&imp->imp_lock, flags);
+                        spin_lock_irqsave(&imp->imp_lock, flags);
                 }
  
                  if (req->rq_transno > imp->imp_max_transno)
@@ -698,7 +698,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                  }
  
                  if (req->rq_phase == RQ_PHASE_RPC) {
-                        if (req->rq_waiting || req->rq_resend) {
+                        if (req->rq_timedout||req->rq_waiting||req->rq_resend) {
                                  int status;
  
                                  ptlrpc_unregister_reply(req);
@@ -709,7 +709,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                          spin_unlock_irqrestore(&imp->imp_lock,
                                                                 flags);
                                          continue;
-                                } 
+                                }
  
                                  list_del_init(&req->rq_list);
                                  if (status != 0)  {
@@ -856,6 +856,9 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
          int replied = 0;
          ENTRY;
  
+        DEBUG_REQ(D_ERROR, req, "timeout (sent at %lu, %lus ago)",
+                  (long)req->rq_sent, LTIME_S(CURRENT_TIME) - req->rq_sent);
+
          spin_lock_irqsave (&req->rq_lock, flags);
          replied = req->rq_replied;
          if (!replied)
@@ -869,6 +872,9 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
  
          ptlrpc_unregister_reply (req);
  
+        if (obd_dump_on_timeout)
+                portals_debug_dumplog();
+
          if (req->rq_bulk != NULL)
                  ptlrpc_unregister_bulk (req);
  
@@ -883,7 +889,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
  
          /* If this request is for recovery or other primordial tasks,
           * then error it out here. */
-        if (req->rq_send_state != LUSTRE_IMP_FULL || 
+        if (req->rq_send_state != LUSTRE_IMP_FULL ||
              imp->imp_obd->obd_no_recov) {
                  spin_lock_irqsave (&req->rq_lock, flags);
                  req->rq_status = -ETIMEDOUT;
@@ -901,7 +907,7 @@ int ptlrpc_expired_set(void *data)
  {
          struct ptlrpc_request_set *set = data;
          struct list_head          *tmp;
-        time_t                     now = LTIME_S (CURRENT_TIME);
+        time_t                     now = LTIME_S(CURRENT_TIME);
          ENTRY;
  
          LASSERT(set != NULL);
@@ -1014,7 +1020,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                  CDEBUG(D_HA, "set %p going to sleep for %d seconds\n",
                         set, timeout);
                  lwi = LWI_TIMEOUT_INTR((timeout ? timeout : 1) * HZ,
-                                       ptlrpc_expired_set, 
+                                       ptlrpc_expired_set,
                                         ptlrpc_interrupted_set, set);
                  rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
  
@@ -1043,7 +1049,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
          if (set->set_interpret != NULL) {
                  int (*interpreter)(struct ptlrpc_request_set *set,void *,int) =
                          set->set_interpret;
-                rc = interpreter (set, &set->set_args, rc);
+                rc = interpreter (set, set->set_arg, rc);
          }
  
          RETURN(rc);
diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c

index 646cb07..c2c5288 100644 (file)
--- a/lustre/ptlrpc/connection.c
+++ b/lustre/ptlrpc/connection.c
@@ -67,7 +67,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer,
          spin_lock(&conn_lock);
          list_for_each(tmp, &conn_list) {
                  c = list_entry(tmp, struct ptlrpc_connection, c_link);
-                if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) &&
+                if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 &&
                      peer->peer_ni == c->c_peer.peer_ni) {
                          ptlrpc_connection_addref(c);
                          GOTO(out, c);
@@ -76,7 +76,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer,
  
          list_for_each_safe(tmp, pos, &conn_unused_list) {
                  c = list_entry(tmp, struct ptlrpc_connection, c_link);
-                if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) &&
+                if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 &&
                      peer->peer_ni == c->c_peer.peer_ni) {
                          ptlrpc_connection_addref(c);
                          list_del(&c->c_link);
diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c

index 37a7f94..2a253c5 100644 (file)
--- a/lustre/ptlrpc/events.c
+++ b/lustre/ptlrpc/events.c
@@ -31,7 +31,7 @@
  #include <linux/lustre_net.h>
  #include "ptlrpc_internal.h"
  
-#if !defined(__KERNEL__) && defined(CRAY_PORTALS)
+#if !defined(__KERNEL__) && CRAY_PORTALS
  /* forward ref in events.c */
  static void cray_portals_callback(ptl_event_t *ev);
  #endif
@@ -198,7 +198,7 @@ void request_in_callback(ptl_event_t *ev)
                                 "Dropping %s RPC from %s\n",
                                 service->srv_name, 
                                 portals_id2str(srv_ni->sni_ni->pni_number,
-                                               ev->initiator, str));
+                                              ev->initiator, str));
                          return;
                  }
          }
@@ -214,8 +214,12 @@ void request_in_callback(ptl_event_t *ev)
          do_gettimeofday(&req->rq_arrival_time);
          req->rq_peer.peer_id = ev->initiator;
          req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni;
+        ptlrpc_id2str(&req->rq_peer, req->rq_peerstr);
          req->rq_rqbd = rqbd;
-
+#if CRAY_PORTALS
+        req->rq_uid = ev->uid;
+#endif
+        
          spin_lock_irqsave (&service->srv_lock, flags);
  
          if (ev->unlinked) {
@@ -360,15 +364,21 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer)
          for (i = 0; i < ptlrpc_ninterfaces; i++) {
                  pni = &ptlrpc_interfaces[i];
  
+#ifndef CRAY_PORTALS
                  if (pni->pni_number == peer_nal) {
+#else
+               /* compatible nals but may be from different bridges */
+               if (NALID_FROM_IFACE(pni->pni_number) == 
+                   NALID_FROM_IFACE(peer_nal)) {
+#endif
                          peer->peer_id.nid = peer_nid;
-                        peer->peer_id.pid = LUSTRE_SRV_PTL_PID; //#4165:only client will call this func.
+                        peer->peer_id.pid = LUSTRE_SRV_PTL_PID;
                          peer->peer_ni = pni;
                          return (0);
                  }
          }
  
-        CERROR("Can't find ptlrpc interface for NAL %d, NID %s\n",
+        CERROR("Can't find ptlrpc interface for NAL %x, NID %s\n",
                 peer_nal, portals_nid2str(peer_nal, peer_nid, str));
          return (-ENOENT);
  }
@@ -416,6 +426,12 @@ ptl_pid_t ptl_get_pid(void)
  
  #ifndef  __KERNEL__
          pid = getpid();
+#ifdef CRAY_PORTALS
+       /* hack to keep pid in range accepted by ernal */
+       pid &= 0xFF;
+       if (pid == LUSTRE_SRV_PTL_PID)
+               pid++;
+#endif
  #else
          pid = LUSTRE_SRV_PTL_PID;
  #endif
@@ -442,7 +458,7 @@ int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni)
          CDEBUG(D_NET, "My pid is: %x\n", ptl_get_pid());
          
          PtlSnprintHandle(str, sizeof(str), nih);
-        CDEBUG (D_NET, "init %d %s: %s\n", number, name, str);
+        CDEBUG (D_NET, "init %x %s: %s\n", number, name, str);
  
          pni->pni_name = name;
          pni->pni_number = number;
@@ -580,7 +596,7 @@ liblustre_wait_event (int timeout)
          return found_something;
  }
  
-#ifdef CRAY_PORTALS
+#if CRAY_PORTALS
  static void cray_portals_callback(ptl_event_t *ev)
  {
          /* We get a callback from the client Cray portals implementation
@@ -626,7 +642,9 @@ int ptlrpc_init_portals(void)
                  {LONAL,     "lonal"},
                  {RANAL,     "ranal"},
  #else
-                {CRAY_KB_ERNAL, "cray_kb_ernal"},
+                {CRAY_KERN_NAL, "cray_kern_nal"},
+                {CRAY_QK_NAL, "cray_qk_nal"},
+                {CRAY_USER_NAL, "cray_user_nal"},
  #endif
          };
          int   rc;
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index 122f878..b8bcf5a 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -21,11 +21,7 @@
   */
  
  #define DEBUG_SUBSYSTEM S_RPC
-#ifdef __KERNEL__
-# include <linux/config.h>
-# include <linux/module.h>
-# include <linux/kmod.h>
-#else
+#ifndef __KERNEL__
  # include <liblustre.h>
  #endif
  
@@ -101,10 +97,10 @@ int ptlrpc_set_import_discon(struct obd_import *imp)
          spin_lock_irqsave(&imp->imp_lock, flags);
  
          if (imp->imp_state == LUSTRE_IMP_FULL) {
-                CERROR("%s: connection lost to %s@%s\n",
-                       imp->imp_obd->obd_name,
-                       imp->imp_target_uuid.uuid,
-                       imp->imp_connection->c_remote_uuid.uuid);
+                CWARN("%s: connection lost to %s@%s\n",
+                      imp->imp_obd->obd_name, 
+                      imp->imp_target_uuid.uuid,
+                      imp->imp_connection->c_remote_uuid.uuid);
                  IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
                  spin_unlock_irqrestore(&imp->imp_lock, flags);
                  obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
@@ -180,7 +176,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc)
          if (rc)
                  CERROR("%s: rc = %d waiting for callback (%d != %d)\n",
                         imp->imp_target_uuid.uuid, rc,
-                       atomic_read(&imp->imp_inflight), inflight);
+                       atomic_read(&imp->imp_inflight), !!in_rpc);
  
          obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
  }
@@ -374,6 +370,9 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
  #ifndef __KERNEL__
          lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_LIBCLIENT);
  #endif
+        if (obd->u.cli.cl_async) {
+                lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_ASYNC);
+        }
  
          request->rq_send_state = LUSTRE_IMP_CONNECTING;
          request->rq_replen = lustre_msg_size(0, NULL);
@@ -559,8 +558,10 @@ static int signal_completed_replay(struct obd_import *imp)
          atomic_inc(&imp->imp_replay_inflight);
  
          req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL);
-        if (!req)
+        if (!req) {
+                atomic_dec(&imp->imp_replay_inflight);
                  RETURN(-ENOMEM);
+        }
  
          req->rq_replen = lustre_msg_size(0, NULL);
          req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
@@ -572,6 +573,37 @@ static int signal_completed_replay(struct obd_import *imp)
          RETURN(0);
  }
  
+#ifdef __KERNEL__
+static int ptlrpc_invalidate_import_thread(void *data)
+{
+        struct obd_import *imp = data;
+        unsigned long flags;
+
+        ENTRY;
+
+        lock_kernel();
+        ptlrpc_daemonize();
+
+        SIGNAL_MASK_LOCK(current, flags);
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+        THREAD_NAME(current->comm, sizeof(current->comm), "ll_imp_inval");
+        unlock_kernel();
+
+        CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
+               imp->imp_obd->obd_name, imp->imp_target_uuid.uuid,
+               imp->imp_connection->c_remote_uuid.uuid);
+
+        ptlrpc_invalidate_import(imp, 0);
+        IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+
+        ptlrpc_import_recovery_state_machine(imp);
+
+        RETURN(0);
+}
+#endif
+
  int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
  {
          int rc = 0;
@@ -582,9 +614,17 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                         imp->imp_target_uuid.uuid,
                         imp->imp_connection->c_remote_uuid.uuid);
  
+#ifdef __KERNEL__
+                rc = kernel_thread(ptlrpc_invalidate_import_thread, imp,
+                                   CLONE_VM | CLONE_FILES);
+                if (rc < 0)
+                        CERROR("error starting invalidate thread: %d\n", rc);
+                RETURN(rc);
+#else
                  ptlrpc_invalidate_import(imp, 1);
  
                  IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+#endif
          }
  
          if (imp->imp_state == LUSTRE_IMP_REPLAY) {
@@ -627,10 +667,10 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                          GOTO(out, rc);
                  IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
                  ptlrpc_activate_import(imp);
-                CERROR("%s: connection restored to %s@%s\n",
-                       imp->imp_obd->obd_name, 
-                       imp->imp_target_uuid.uuid,
-                       imp->imp_connection->c_remote_uuid.uuid);
+                CWARN("%s: connection restored to %s@%s\n",
+                      imp->imp_obd->obd_name, 
+                      imp->imp_target_uuid.uuid,
+                      imp->imp_connection->c_remote_uuid.uuid);
          }
  
          if (imp->imp_state == LUSTRE_IMP_FULL) {
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c

index d03f2ed..8a7179b 100644 (file)
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -144,7 +144,7 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc)
          else
                  rc = PtlGet (desc->bd_md_h, peer->peer_id,
                               desc->bd_portal, 0, xid, 0);
-        
+
          if (rc != PTL_OK) {
                  /* Can't send, so we unlink the MD bound above.  The UNLINK
                   * event this creates will signal completion with failure,
@@ -234,9 +234,9 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req)
          LASSERT (!desc->bd_registered || req->rq_xid != desc->bd_last_xid);
          desc->bd_registered = 1;
          desc->bd_last_xid = req->rq_xid;
-        
-        rc = PtlMEAttach(peer->peer_ni->pni_ni_h,
-                         desc->bd_portal, desc->bd_import->imp_connection->c_peer.peer_id, 
+
+        rc = PtlMEAttach(peer->peer_ni->pni_ni_h, desc->bd_portal,
+                         desc->bd_import->imp_connection->c_peer.peer_id, 
                           req->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h);
          if (rc != PTL_OK) {
                  CERROR("PtlMEAttach failed: %d\n", rc);
@@ -433,8 +433,8 @@ int ptl_send_rpc(struct ptlrpc_request *request)
  
          rc = PtlMEAttach(connection->c_peer.peer_ni->pni_ni_h,
                           request->rq_reply_portal, /* XXX FIXME bug 249 */
-                         connection->c_peer.peer_id, request->rq_xid, 0, PTL_UNLINK,
-                         PTL_INS_AFTER, &reply_me_h);
+                         connection->c_peer.peer_id, request->rq_xid, 0,
+                         PTL_UNLINK, PTL_INS_AFTER, &reply_me_h);
          if (rc != PTL_OK) {
                  CERROR("PtlMEAttach failed: %d\n", rc);
                  LASSERT (rc == PTL_NO_SPACE);
diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c

index bcbf095..1443a6a 100644 (file)
--- a/lustre/ptlrpc/pers.c
+++ b/lustre/ptlrpc/pers.c
@@ -35,7 +35,8 @@
  #include "ptlrpc_internal.h"
  
  #ifdef __KERNEL__
-#ifndef CRAY_PORTALS
+#if !CRAY_PORTALS
+
  void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc)
  {
          LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
@@ -57,11 +58,16 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
  
          desc->bd_iov_count++;
  }
-#else
+
+#else  /* CRAY_PORTALS */
+#ifdef PTL_MD_KIOV
+#error "Conflicting compilation directives"
+#endif
+
  void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc)
  {
          LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
-        LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS)));
+        LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS)));
          
          md->options |= (PTL_MD_IOVEC | PTL_MD_PHYS);
          md->start = &desc->bd_iov[0];
@@ -79,22 +85,24 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
  
          desc->bd_iov_count++;
  }
-#endif
  
+#endif /* CRAY_PORTALS */
  #else /* !__KERNEL__ */
+
  void ptlrpc_fill_bulk_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc)
  {
+#if CRAY_PORTALS
+        LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS)));
+        LASSERT (desc->bd_iov_count == 1);
+#else
          LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS)));
-
+#endif
          if (desc->bd_iov_count == 1) {
                  md->start = desc->bd_iov[0].iov_base;
                  md->length = desc->bd_iov[0].iov_len;
                  return;
          }
          
-#if CRAY_PORTALS
-        LBUG();
-#endif
          md->options |= PTL_MD_IOVEC;
          md->start = &desc->bd_iov[0];
          md->length = desc->bd_iov_count;
@@ -104,10 +112,8 @@ static int can_merge_iovs(ptl_md_iovec_t *existing, ptl_md_iovec_t *candidate)
  {
          if (existing->iov_base + existing->iov_len == candidate->iov_base) 
                  return 1;
-        /* XXX it's good to have an warning here, but user-level echo_client
-         * will hit this. reenable it when we fixed echo_client.
-         */
  #if 0
+        /* Enable this section to provide earlier evidence of fragmented bulk */
          CERROR("Can't merge iovs %p for %x, %p for %x\n",
                 existing->iov_base, existing->iov_len,
                 candidate->iov_base, candidate->iov_len);
@@ -129,4 +135,5 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
                  desc->bd_iov_count++;
          }
  }
-#endif
+
+#endif /* !__KERNEL__ */
diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c

index 7fab9b9..89b1191 100644 (file)
--- a/lustre/ptlrpc/pinger.c
+++ b/lustre/ptlrpc/pinger.c
@@ -122,8 +122,9 @@ static int ptlrpc_pinger_main(void *arg)
                          spin_unlock_irqrestore(&imp->imp_lock, flags);
  
                          if (imp->imp_next_ping <= this_ping || force) {
-                                if (level == LUSTRE_IMP_DISCON) {
-                                        /* wait at least a timeout before 
+                                if (level == LUSTRE_IMP_DISCON &&
+                                    !imp->imp_deactive) {
+                                        /* wait at least a timeout before
                                             trying recovery again. */
                                          imp->imp_next_ping =
                                                  ptlrpc_next_ping(imp);
@@ -132,7 +133,7 @@ static int ptlrpc_pinger_main(void *arg)
                                             imp->imp_obd->obd_no_recov) {
                                          CDEBUG(D_HA, 
                                                 "not pinging %s (in recovery "
-                                               " or recovery disabled: %s)\n",
+                                               "or recovery disabled: %s)\n",
                                                 imp->imp_target_uuid.uuid,
                                                 ptlrpc_import_state_name(level));
                                  } else if (imp->imp_pingable || force) {
diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c

index ca6e22a..8b386ee 100644 (file)
--- a/lustre/ptlrpc/recov_thread.c
+++ b/lustre/ptlrpc/recov_thread.c
@@ -486,6 +486,7 @@ int llog_cleanup_commit_master(int force)
                                   atomic_read(&lcm->lcm_thread_total) == 0);
          return 0;
  }
+EXPORT_SYMBOL(llog_cleanup_commit_master);
  
  static int log_process_thread(void *args)
  {
diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c

index c3eaf17..6731c7d 100644 (file)
--- a/lustre/ptlrpc/recover.c
+++ b/lustre/ptlrpc/recover.c
@@ -68,8 +68,8 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd)
                         argv[0], argv[1], argv[2], rc);
  
          } else {
-                CERROR("Invoked upcall %s %s %s\n",
-                       argv[0], argv[1], argv[2]);
+                CWARN("Invoked upcall %s %s %s\n",
+                      argv[0], argv[1], argv[2]);
          }
  }
  
@@ -109,8 +109,8 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
                         argv[0], argv[1], argv[2], argv[3], argv[4],rc);
  
          } else {
-                CERROR("Invoked upcall %s %s %s %s %s\n",
-                       argv[0], argv[1], argv[2], argv[3], argv[4]);
+                CWARN("Invoked upcall %s %s %s %s %s\n",
+                      argv[0], argv[1], argv[2], argv[3], argv[4]);
          }
  #else
          if (imp->imp_state == LUSTRE_IMP_CLOSED) {
@@ -289,13 +289,12 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
                  rc = ptlrpc_connect_import(imp, NULL);
          }
  
-        
          /* Wait for recovery to complete and resend. If evicted, then
             this request will be errored out later.*/
          spin_lock_irqsave(&failed_req->rq_lock, flags);
          failed_req->rq_resend = 1;
          spin_unlock_irqrestore(&failed_req->rq_lock, flags);
-        
+
          EXIT;
  }
  
@@ -314,10 +313,12 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
           * requests. */
          if (!active) {
                  ptlrpc_invalidate_import(imp, 0);
-        } 
+                imp->imp_deactive = 1;
+        }
  
          /* When activating, mark import valid, and attempt recovery */
          if (active) {
+                imp->imp_deactive = 0;
                  CDEBUG(D_HA, "setting import %s VALID\n",
                         imp->imp_target_uuid.uuid);
                  rc = ptlrpc_recover_import(imp, NULL);
@@ -330,10 +331,10 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
  {
          int rc;
          ENTRY;
-        
+
          /* force import to be disconnected. */
          ptlrpc_set_import_discon(imp);
-        
+
          rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
  
          RETURN(rc);
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index 30217ab..edf9f5f 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -57,17 +57,17 @@ ptlrpc_free_server_req (struct ptlrpc_request *req)
  
          OBD_FREE(req, sizeof(*req));
  }
-        
+
  static char *
  ptlrpc_alloc_request_buffer (int size)
  {
          char *ptr;
-        
+
          if (size > SVC_BUF_VMALLOC_THRESHOLD)
                  OBD_VMALLOC(ptr, size);
          else
                  OBD_ALLOC(ptr, size);
-        
+
          return (ptr);
  }
  
@@ -135,6 +135,9 @@ ptlrpc_grow_req_bufs(struct ptlrpc_srv_ni *srv_ni)
          struct ptlrpc_request_buffer_desc *rqbd;
          int                                i;
  
+        CDEBUG(D_RPCTRACE, "%s: allocate %d new %d-byte reqbufs (%d/%d left)\n",
+               svc->srv_name, svc->srv_nbuf_per_group, svc->srv_buf_size,
+               srv_ni->sni_nrqbd_receiving, svc->srv_nbufs);
          for (i = 0; i < svc->srv_nbuf_per_group; i++) {
                  rqbd = ptlrpc_alloc_rqbd(srv_ni);
  
@@ -308,7 +311,7 @@ ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc)
  
  struct ptlrpc_service *
  ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size,
-                int req_portal, int rep_portal, 
+                int req_portal, int rep_portal, int watchdog_timeout,
                  svc_handler_t handler, char *name,
                  struct proc_dir_entry *proc_entry)
  {
@@ -339,6 +342,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size,
          service->srv_buf_size = bufsize;
          service->srv_rep_portal = rep_portal;
          service->srv_req_portal = req_portal;
+        service->srv_watchdog_timeout = watchdog_timeout;
          service->srv_handler = handler;
  
          INIT_LIST_HEAD(&service->srv_request_queue);
@@ -405,7 +409,6 @@ ptlrpc_server_free_request(struct ptlrpc_service *svc, struct ptlrpc_request *re
          ptlrpc_free_server_req(req);
  }
  
-static char str[PTL_NALFMT_SIZE];
  static int 
  ptlrpc_server_handle_request (struct ptlrpc_service *svc)
  {
@@ -477,16 +480,14 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc)
          if (rc != 0) {
                  CERROR ("error unpacking request: ptl %d from %s"
                          " xid "LPU64"\n", svc->srv_req_portal,
-                        ptlrpc_peernid2str(&request->rq_peer, str),
-                        request->rq_xid);
+                        request->rq_peerstr, request->rq_xid);
                  goto out;
          }
  
          rc = -EINVAL;
          if (request->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) {
                  CERROR("wrong packet type received (type=%u) from %s\n",
-                       request->rq_reqmsg->type,
-                       ptlrpc_peernid2str(&request->rq_peer, str));
+                       request->rq_reqmsg->type, request->rq_peerstr);
                  goto out;
          }
  
@@ -498,7 +499,7 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc)
          if (timediff / 1000000 > (long)obd_timeout) {
                  CERROR("Dropping timed-out opc %d request from %s"
                         ": %ld seconds old\n", request->rq_reqmsg->opc,
-                       ptlrpc_peernid2str(&request->rq_peer, str),
+                       request->rq_peerstr,
                         timediff / 1000000);
                  goto out;
          }
@@ -528,11 +529,13 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc)
                  atomic_read(&request->rq_export->exp_refcount) : -99),
                 request->rq_reqmsg->status, request->rq_xid,
                 request->rq_peer.peer_ni->pni_name,
-               ptlrpc_peernid2str(&request->rq_peer, str),
+               request->rq_peerstr,
                 request->rq_reqmsg->opc);
+
          request->rq_svc = svc;
          rc = svc->srv_handler(request);
          request->rq_svc = NULL;
+
          CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:ni:nid:opc "
                 "%s:%s+%d:%d:"LPU64":%s:%s:%d\n", current->comm,
                 (request->rq_export ?
@@ -541,7 +544,7 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc)
                  atomic_read(&request->rq_export->exp_refcount) : -99),
                 request->rq_reqmsg->status, request->rq_xid,
                 request->rq_peer.peer_ni->pni_name,
-               ptlrpc_peernid2str(&request->rq_peer, str),
+               request->rq_peerstr,
                 request->rq_reqmsg->opc);
  
          if (export != NULL)
@@ -558,9 +561,9 @@ put_conn:
  
          CDEBUG((timediff / 1000000 > (long)obd_timeout) ? D_ERROR : D_HA,
                 "request "LPU64" opc %u from NID %s processed in %ldus "
-               "(%ldus total)\n", request->rq_xid,
+               "(%ldus total)\n", request->rq_xid, 
                 request->rq_reqmsg ? request->rq_reqmsg->opc : 0,
-               ptlrpc_peernid2str(&request->rq_peer, str),
+               request->rq_peerstr,
                 timediff, timeval_sub(&work_end, &request->rq_arrival_time));
  
          if (svc->srv_stats != NULL && request->rq_reqmsg != NULL) {
@@ -769,7 +772,11 @@ static int ptlrpc_main(void *arg)
          struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
          struct ptlrpc_service  *svc = data->svc;
          struct ptlrpc_thread   *thread = data->thread;
+        struct lc_watchdog     *watchdog;
          unsigned long           flags;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+        struct group_info *ginfo = NULL;
+#endif
          ENTRY;
  
          lock_kernel();
@@ -787,10 +794,24 @@ static int ptlrpc_main(void *arg)
          
          unlock_kernel();
  
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+        ginfo = groups_alloc(0);
+        if (!ginfo) {
+                thread->t_flags = SVC_RUNNING;
+                wake_up(&thread->t_ctl_waitq);
+                return (-ENOMEM);
+        }
+        set_current_groups(ginfo);
+        put_group_info(ginfo);
+#endif
+
          /* Record that the thread is running */
          thread->t_flags = SVC_RUNNING;
          wake_up(&thread->t_ctl_waitq);
  
+        watchdog = lc_watchdog_add(svc->srv_watchdog_timeout,
+                                   LC_WATCHDOG_DEFAULT_CB, NULL);
+
          spin_lock_irqsave(&svc->srv_lock, flags);
          svc->srv_nthreads++;
          spin_unlock_irqrestore(&svc->srv_lock, flags);
@@ -803,6 +824,8 @@ static int ptlrpc_main(void *arg)
                  struct l_wait_info lwi = LWI_TIMEOUT(svc->srv_rqbd_timeout,
                                                       ptlrpc_retry_rqbds, svc);
  
+                lc_watchdog_disable(watchdog);
+
                  l_wait_event_exclusive (svc->srv_waitq,
                                ((thread->t_flags & SVC_STOPPING) != 0 &&
                                 svc->srv_n_difficult_replies == 0) ||
@@ -814,7 +837,8 @@ static int ptlrpc_main(void *arg)
                                  svc->srv_n_active_reqs <
                                  (svc->srv_nthreads - 1))),
                                &lwi);
-                
+
+                lc_watchdog_touch(watchdog);
                  ptlrpc_check_rqbd_pools(svc);
                  
                  if (!list_empty (&svc->srv_reply_queue))
@@ -845,6 +869,8 @@ static int ptlrpc_main(void *arg)
  
          spin_unlock_irqrestore(&svc->srv_lock, flags);
  
+        lc_watchdog_delete(watchdog);
+
          CDEBUG(D_NET, "service thread exiting, process %d\n", current->pid);
          return 0;
  }
diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre

index 95c1d06..8f8d890 100755 (executable)
--- a/lustre/scripts/lustre
+++ b/lustre/scripts/lustre
@@ -19,6 +19,7 @@ LOCK=/var/lock/subsys/$SERVICE
  : ${LCONF:=/usr/sbin/lconf}
  : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
  : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+: ${LCTL:=/usr/sbin/lctl}
  
  # Source function library.
  if [ -f /etc/init.d/functions ] ; then
@@ -33,7 +34,9 @@ fi
  # Check that networking is up.
  [ "${NETWORKING}" = "no" ] && exit 0
  
-[ -x ${LCONF} -a -f ${LUSTRE_CONFIG_XML} ] || exit 0
+[ -x ${LCONF} -a -x ${LCTL} ] || exit 0
+
+[ -f ${LUSTRE_CONFIG_XML} ] || ( echo "unconfigured" && exit 0 )
  
  # Create /var/lustre directory 
  # This is used by snmp agent for checking lustre services       \
@@ -73,6 +76,19 @@ restart() {
         start
  }
  
+status() {
+       ${LCTL} dl 2>/dev/null | while read INDEX STAT MODULE NAME; do
+               case $MODULE in
+                       ost|mds|osc|mdc)
+                               [ "`grep -v FULL /proc/fs/lustre/*c/*/*_server_uuid`" ] \
+                               && echo "recovery" || echo "running"
+                               return
+                               ;;
+               esac
+       done
+       echo "stopped"
+}
+
  # See how we were called.
  case "$1" in
    start)
diff --git a/lustre/scripts/lustrefs b/lustre/scripts/lustrefs

index 976e8e7..8148b75 100644 (file)
--- a/lustre/scripts/lustrefs
+++ b/lustre/scripts/lustrefs
@@ -9,7 +9,7 @@
  # Authors:     Bill Nottingham <notting@redhat.com>
  #              Miquel van Smoorenburg, <miquels@drinkel.nl.mugnet.org>
  #
-# chkconfig: 345 25 75
+# chkconfig: 345 26 74
  # description: Mounts and unmounts all Lustre mount points.
  #
  ### BEGIN INIT INFO
diff --git a/lustre/scripts/suse-functions.sh b/lustre/scripts/suse-functions.sh

new file mode 100644 (file)

index 0000000..a7e421d
--- /dev/null
+++ b/lustre/scripts/suse-functions.sh
@@ -0,0 +1,22 @@
+# Readlink is not present on some older distributions: emulate it.
+readlink() {
+    local path=$1 ll
+
+    if [ -L "$path" ]; then
+       ll="$(LC_ALL=C ls -l "$path" 2> /dev/null)" &&
+       echo "${ll/* -> }"
+    else
+       return 1
+    fi
+}
+relink() {
+    if [ -h "$2" ]; then
+       local old=$(readlink "$2")
+       [ "$old" = "$1" ] && return 0
+       echo "Changing symlink $2 from $old to $1"
+    elif [ -e "$2" ]; then
+       echo "Replacing file $2 with symlink to $1"
+    fi
+    rm -f "$2" \
+    && ln -s "$1" "$2"
+}
diff --git a/lustre/scripts/suse-post.sh b/lustre/scripts/suse-post.sh

new file mode 100644 (file)

index 0000000..ec38664
--- /dev/null
+++ b/lustre/scripts/suse-post.sh
@@ -0,0 +1,46 @@
+if [ -f /boot/vmlinuz-%ver_str ]; then
+    image=vmlinuz
+elif [ -f /boot/image-%ver_str ]; then
+    image=image
+elif [ -f /boot/vmlinux-%ver_str ]; then
+    image=vmlinux
+else
+    # nothing to do (UML kernels for example).
+    exit 0
+fi
+
+# If we have old symlinks, rename them to *.previous
+if [ -L /boot/$image -a -L /boot/initrd -a \
+     "$(readlink /boot/$image)" != $image-%ver_str -a \
+     "$(readlink /boot/initrd)" != initrd-%ver_str ]; then
+    mv /boot/$image /boot/$image.previous
+    mv /boot/initrd /boot/initrd.previous
+fi
+
+# update /boot/vmlinuz symlink
+relink $image-%ver_str /boot/$image
+
+if test "$YAST_IS_RUNNING" != instsys ; then
+    if [ -f /etc/fstab ]; then
+       echo Setting up /lib/modules/%ver_str
+       /sbin/update-modules.dep -v %ver_str
+       cd /boot
+       /sbin/mkinitrd -k $image-%ver_str -i initrd-%ver_str
+
+       if [ -e /boot/initrd-%ver_str ]; then
+           relink initrd-%ver_str /boot/initrd
+       else
+           rm -f /boot/initrd
+       fi
+    else
+       echo "please run mkinitrd as soon as your system is complete"
+    fi
+fi
+
+if [ "$YAST_IS_RUNNING" != instsys -a -x /sbin/new-kernel-pkg ]; then
+    # Notify boot loader that a new kernel image has been installed.
+    # (during initial installation the boot loader configuration does not
+    #  yet exist when the kernel is installed, but yast kicks the boot
+    #  loader itself later.)
+    /sbin/new-kernel-pkg %ver_str
+fi
diff --git a/lustre/scripts/suse-postun.sh b/lustre/scripts/suse-postun.sh

new file mode 100644 (file)

index 0000000..eb86d03
--- /dev/null
+++ b/lustre/scripts/suse-postun.sh
@@ -0,0 +1,43 @@
+if [ -L /boot/vmlinux ]; then
+    image=vmlinux
+elif [ -L /boot/vmlinuz ]; then
+    image=vmlinuz
+elif [ -L /boot/image ]; then
+    image=image
+else
+    # nothing to do (UML kernels for example).
+    exit 0
+fi
+
+if [ "$(readlink /boot/$image)" = $image-%ver_str ]; then
+    # This may be the last kernel RPM on the system, or it may
+    # be an update. In both of those cases the symlinks will
+    # eventually be correct. Only if this kernel
+    # is removed and other kernel rpms remain installed,
+    # find the most recent of the remaining kernels, and make
+    # the symlinks point to it. This makes sure that the boot
+    # manager will always have a kernel to boot in its default
+    # configuration.
+    shopt -s nullglob
+    for image in $(cd /boot ; ls -dt $image-*); do
+       initrd=initrd-${image#*-}
+       if [ -f /boot/$image -a -f /boot/$initrd ]; then
+           relink $image /boot/${image%%%%-*}
+           relink $initrd /boot/${initrd%%%%-*}
+           break
+       fi
+    done
+    shopt -u nullglob
+fi
+
+# Created in the other kernel's %post
+case "$(readlink /boot/$image.previous)" in
+$image-%ver_str|$(readlink /boot/$image))
+    rm -f /boot/$image.previous ;;
+esac
+case "$(readlink /boot/initrd.previous)" in
+initrd-%ver_str|$(readlink /boot/initrd))
+    rm -f /boot/initrd.previous ;;
+esac
+# created in %post
+rm -f /boot/initrd-%ver_str
diff --git a/lustre/scripts/suse-trigger-script.sh.in b/lustre/scripts/suse-trigger-script.sh.in

new file mode 100644 (file)

index 0000000..0ead9e8
--- /dev/null
+++ b/lustre/scripts/suse-trigger-script.sh.in
@@ -0,0 +1,9 @@
+old_shopt=$(shopt -p nullglob || :)
+shopt -s nullglob
+for script in /lib/modules/scripts/* ; do
+    if [ -f "$script" -a -x "$script" ] \
+       && ! "$script" --@when@ %ver_str $1 ; then
+       echo "$script failed."
+    fi
+done
+eval $old_shopt
diff --git a/lustre/sec/gss/gss_api.h b/lustre/sec/gss/gss_api.h

index 06557d4..94f57ef 100644 (file)
--- a/lustre/sec/gss/gss_api.h
+++ b/lustre/sec/gss/gss_api.h
@@ -15,7 +15,7 @@
   * Bruce Fields <bfields@umich.edu>
   * Copyright (c) 2000 The Regents of the University of Michigan
   *
- * $Id: gss_api.h,v 1.2 2005/03/31 22:18:24 ericm Exp $
+ * $Id: gss_api.h,v 1.3 2005/04/04 13:12:39 yury Exp $
   */
  
  #ifndef __SEC_GSS_GSS_API_H_
diff --git a/lustre/sec/gss/sec_gss.c b/lustre/sec/gss/sec_gss.c

index db89a71..e96d75b 100644 (file)
--- a/lustre/sec/gss/sec_gss.c
+++ b/lustre/sec/gss/sec_gss.c
@@ -43,7 +43,7 @@
   *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   *
- * $Id: sec_gss.c,v 1.2 2005/03/31 22:18:24 ericm Exp $
+ * $Id: sec_gss.c,v 1.3 2005/04/04 13:12:39 yury Exp $
   */
  
  #ifndef EXPORT_SYMTAB
diff --git a/lustre/sec/sec.c b/lustre/sec/sec.c

index 9dd5d4f..bf09bf7 100644 (file)
--- a/lustre/sec/sec.c
+++ b/lustre/sec/sec.c
@@ -611,7 +611,7 @@ struct ptlrpc_sec * ptlrpcs_sec_create(ptlrpcs_flavor_t *flavor,
  static void ptlrpcs_sec_destroy(struct ptlrpc_sec *sec)
  {
          struct ptlrpc_sec_type *type = sec->ps_type;
-        struct ptlrpc_import *imp = sec->ps_import;
+        struct obd_import *imp = sec->ps_import;
  
          LASSERT(type && type->pst_ops);
          LASSERT(type->pst_ops->destroy_sec);
diff --git a/lustre/smfs/inode.c b/lustre/smfs/inode.c

index 0243f2c..39432c4 100644 (file)
--- a/lustre/smfs/inode.c
+++ b/lustre/smfs/inode.c
@@ -123,7 +123,6 @@ static int smfs_test_inode(struct inode *inode, void *opaque)
              !smfs_snap_test_inode(inode, opaque))
                  return 0;  
  #endif
-        
          return 1;
  }
  
@@ -183,9 +182,7 @@ struct inode *smfs_get_inode(struct super_block *sb, ino_t hash,
          sargs.s_inode = dir; 
          sargs.s_index = index;
          CDEBUG(D_VFSTRACE, "get_inode: %lu\n", hash);
-
          inode = smfs_iget(sb, hash, &sargs);
-
          RETURN(inode);
  }
   
diff --git a/lustre/smfs/kml.c b/lustre/smfs/kml.c

index ecac5fe..2692963 100644 (file)
--- a/lustre/smfs/kml.c
+++ b/lustre/smfs/kml.c
@@ -276,29 +276,35 @@ int smfs_rec_setattr(struct inode *dir, struct dentry *dentry,
  }
  EXPORT_SYMBOL(smfs_rec_setattr);
  
-int smfs_rec_md(struct inode *inode, void *lmm, int lmm_size)
+int smfs_rec_md(struct inode *inode, void *lmm, int lmm_size,
+                enum ea_type type)
  {
          char *set_lmm = NULL;
-        int  rc = 0;
+        int rc = 0;
          ENTRY;
  
          if (!SMFS_DO_REC(S2SMI(inode->i_sb)))
                  RETURN(0);
  
          if (lmm) {
-                OBD_ALLOC(set_lmm, lmm_size + sizeof(lmm_size));
+                int size = lmm_size + sizeof(lmm_size) +
+                        sizeof(type);
+
+                OBD_ALLOC(set_lmm, size);
                  if (!set_lmm)
                          RETURN(-ENOMEM);
+
                  memcpy(set_lmm, &lmm_size, sizeof(lmm_size));
-                memcpy(set_lmm + sizeof(lmm_size), lmm, lmm_size);
+                memcpy(set_lmm + sizeof(lmm_size), &type, sizeof(type));
+                memcpy(set_lmm + sizeof(lmm_size) + sizeof(type), lmm, lmm_size);
+
                  rc = smfs_post_rec_setattr(inode, NULL, NULL, set_lmm);
                  if (rc) {
-                        CERROR("Error: Record md for inode %lu rc=%d\n",
+                        CERROR("Error: Record md for inode %lu rc = %d\n",
                                  inode->i_ino, rc);
                  }
+                OBD_FREE(set_lmm, size);
          }
-        if (set_lmm)
-                OBD_FREE(set_lmm, lmm_size + sizeof(lmm_size));
          RETURN(rc);
  }
  EXPORT_SYMBOL(smfs_rec_md);
@@ -690,12 +696,12 @@ out:
  }
  
  int smfs_post_rec_setattr(struct inode *inode, struct dentry *dentry, 
-                          void  *data1, void  *data2)
+                          void *data1, void *data2)
  {        
-        struct smfs_super_info *sinfo;
          struct iattr *attr = (struct iattr *)data1;
-        char   *buffer = NULL, *pbuf;
          int rc = 0, length = 0, buf_len = 0;
+        struct smfs_super_info *sinfo;
+        char *buffer = NULL, *pbuf;
          ENTRY;
  
          sinfo = S2SMI(inode->i_sb);
diff --git a/lustre/smfs/smfs_lib.c b/lustre/smfs/smfs_lib.c

index 95c5fd4..2855883 100644 (file)
--- a/lustre/smfs/smfs_lib.c
+++ b/lustre/smfs/smfs_lib.c
@@ -218,6 +218,7 @@ static int smfs_init_hooks(struct super_block *sb)
  }
  
  extern char* smfs_options(char*, char**, char**, char*, int *);
+extern void cleanup_option(void);
  
  int smfs_fill_super(struct super_block *sb, void *data, int silent)
  {
diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore

index 3fa72fa..f915f89 100644 (file)
--- a/lustre/tests/.cvsignore
+++ b/lustre/tests/.cvsignore
@@ -67,4 +67,4 @@ copy_attr
  rename_many
  mmap_sanity
  memhog
-
+rmdirmany
diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am

index 5c603f7..c81ec07 100644 (file)
--- a/lustre/tests/Makefile.am
+++ b/lustre/tests/Makefile.am
@@ -1,5 +1,5 @@
  # Lustre test Makefile
-AM_CPPFLAGS = $(LLCPPFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+AM_CPPFLAGS = $(LLCPPFLAGS) -I/opt/lam/include -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
  AM_CFLAGS = $(LLCFLAGS)
  # LDADD = -lldap
  # LDADD := -lreadline -ltermcap # -lefence
@@ -28,6 +28,9 @@ noinst_PROGRAMS += small_write multiop sleeptest ll_sparseness_verify cmknod
  noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany rmdirmany
  noinst_PROGRAMS += openfilleddirunlink rename_many memhog iopentest1 iopentest2
  noinst_PROGRAMS += mmap_sanity
+if MPITESTS
+noinst_PROGRAMS += parallel_grouplock write_append_truncate createmany_mpi
+endif
  # noinst_PROGRAMS += ldaptest copy_attr
  bin_PROGRAMS = mcreate munlink
  endif # TESTS
@@ -36,11 +39,14 @@ endif # TESTS
  
  stat_SOURCES = stat.c stat_fs.h
  mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl $(LIBREADLINE)
-#write_append_truncate_CC=mpicc
-#createmany_mpi_CC=mpicc
-#parallel_grouplock_SOURCES=parallel_grouplock.c lp_utils.c
-#parallel_grouplock_CC=mpicc
-
-#copy_attr_LDADD= -lattr
  mmap_sanity_SOURCES= mmap_sanity.c
  
+if MPITESTS
+LAM_LD_FLAGS=-L/opt/lam/lib -lmpi -llam -lpthread
+write_append_truncate_SOURCES=write_append_truncate.c
+write_append_truncate_LDADD=$(LAM_LD_FLAGS)
+createmany_mpi_SOURCES=createmany-mpi.c
+createmany_mpi_LDADD=$(LAM_LD_FLAGS)
+parallel_grouplock_SOURCES=parallel_grouplock.c lp_utils.c
+parallel_grouplock_LDADD=$(LAM_LD_FLAGS)
+endif
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh

index acab312..c6eff64 100644 (file)
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -47,9 +47,10 @@ start_mds() {
         start mds1 --reformat $MDSLCONFARGS  || return 94
         start_lsvcgssd || return 501
  }
+
  stop_mds() {
         echo "stop mds1 service on `facet_active_host mds1`"
-       stop mds1 $@  || return 97
+       stop mds1 $@ || return 97
         stop_lsvcgssd
  }
  
@@ -60,7 +61,7 @@ start_ost() {
  
  stop_ost() {
         echo "stop ost service on `facet_active_host ost`"
-       stop ost $@  || return 98
+       stop ost $@ || return 98
  }
  
  mount_client() {
@@ -188,7 +189,17 @@ test_5() {
         # cleanup may return an error from the failed
         # disconnects; for now I'll consider this successful
         # if all the modules have unloaded.
-       umount $MOUNT &
+
+       # as MDS is down, umount without -f may cause blocking
+       # and this test will never finish. Blocking is possible
+       # as umount may want to cancel locks with RPC's and these
+       # RPC's will wait forever, as pinger thread will try to
+       # recover failed import endlessly.
+       #
+       # Thus, main point is: nobody should expect umount finish
+       # quickly and cleanly without -f flag when MDS or OST is 
+       # down for sure. --umka
+       umount -f $MOUNT &
         UMOUNT_PID=$!
         sleep 2
         echo "killing umount"
@@ -198,16 +209,21 @@ test_5() {
         stop_lgssd
  
         # cleanup client modules
-       $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
+       $LCONF --force --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
         
         # stop_mds is a no-op here, and should not fail
-       stop_mds  || return 4
-       stop_ost || return 5
+       stop_mds || return 4
+       
+       # this should have --force flag specified, as umount -f
+       # will skip disconnect phase and thus OST will have one
+       # extra refcount what will cause class_cleanup() failure
+       # if --force is not specified. --umka
+       stop_ost --force || return 5
  
         lsmod | grep -q portals && return 6
         return 0
  }
-run_test 5 "force cleanup mds, then cleanup"
+run_test 5 "force cleanup mds, then cleanup --force"
  
  test_5b() {
         start_ost
@@ -217,17 +233,17 @@ test_5b() {
         [ -d $MOUNT ] || mkdir -p $MOUNT
         $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
         start_lgssd || return 1
-       llmount $mds_HOST://mds1_svc/client_facet $MOUNT  && exit 1
+       llmount -o nettype=$NETTYPE $mds_HOST://mds_svc/client_facet $MOUNT  && exit 2
  
         # cleanup client modules
         $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
         stop_lgssd
         
         # stop_mds is a no-op here, and should not fail
-       stop_mds || return 2
-       stop_ost || return 3
+       stop_mds || return 3
+       stop_ost || return 4
  
-       lsmod | grep -q portals && return 4 
+       lsmod | grep -q portals && return 5
         return 0
  
  }
@@ -240,7 +256,7 @@ test_5c() {
         [ -d $MOUNT ] || mkdir -p $MOUNT
         $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
         start_lgssd || return 1
-        llmount $mds_HOST://wrong_mds1_svc/client_facet $MOUNT  && return 2
+       llmount -o nettype=$NETTYPE $mds_HOST://wrong_mds_svc/client_facet $MOUNT  && return 2
  
         # cleanup client modules
         $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
@@ -642,7 +658,7 @@ test_16() {
              mount_client $MOUNT
              check_mount || return 41
              cleanup || return $?
-         fi
+        fi
                                                                                                                               
          echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555"
          [ -d $TMPMTPT ] || mkdir -p $TMPMTPT
diff --git a/lustre/tests/createmany.c b/lustre/tests/createmany.c

index 2bf9245..1e6ff1f 100644 (file)
--- a/lustre/tests/createmany.c
+++ b/lustre/tests/createmany.c
@@ -14,14 +14,14 @@
  
  void usage(char *prog)
  {
-        printf("usage: %s {-o|-m|-l<tgt>} filenamefmt count\n", prog);
-        printf("       %s {-o|-m|-l<tgt>} filenamefmt -seconds\n", prog);
-        printf("       %s {-o|-m|-l<tgt>} filenamefmt start count\n", prog);
+        printf("usage: %s {-o|-m|-d|-l<tgt>} filenamefmt count\n", prog);
+        printf("       %s {-o|-m|-d|-l<tgt>} filenamefmt -seconds\n", prog);
+        printf("       %s {-o|-m|-d|-l<tgt>} filenamefmt start count\n", prog);
  }
  
  int main(int argc, char ** argv)
  {
-        int i, rc = 0, do_open = 0, do_link = 0;
+        int i, rc = 0, do_open = 0, do_link = 0, do_mkdir = 0;
          char format[4096], *fmt, *tgt = NULL;
          char filename[4096];
          long start, last, end;
@@ -32,7 +32,9 @@ int main(int argc, char ** argv)
                  return 1;
          }
  
-        if (strcmp(argv[1], "-o") == 0) {
+        if (strcmp(argv[1], "-d") == 0) {
+                do_mkdir = 1;
+        } else if (strcmp(argv[1], "-o") == 0) {
                  do_open = 1;
          } else if (strncmp(argv[1], "-l", 2) == 0 && argv[1][2]) {
                  tgt = argv[1] + 2;
@@ -85,7 +87,15 @@ int main(int argc, char ** argv)
                          rc = link(tgt, filename);
                          if (rc) {
                                  printf("link(%s, %s) error: %s\n",
-                                      tgt, filename, strerror(errno));
+                                       tgt, filename, strerror(errno));
+                                rc = errno;
+                                break;
+                        }
+               } else if (do_mkdir) {
+                        rc = mkdir(filename, 0755);
+                        if (rc) {
+                                printf("mkdir(%s) error: %s\n",
+                                       filename, strerror(errno));
                                  rc = errno;
                                  break;
                          }
diff --git a/lustre/tests/echo.sh b/lustre/tests/echo.sh

index a45fd39..61a0378 100755 (executable)
--- a/lustre/tests/echo.sh
+++ b/lustre/tests/echo.sh
@@ -31,6 +31,13 @@ h2gm () {
  h2elan () {
      echo $1 | sed 's/[^0-9]*//g'
  }
+
+h2iib () {
+        case $1 in
+        client) echo '\*' ;;
+        *) echo $1 | sed "s/[^0-9]*//" ;;
+        esac
+}
          
  # FIXME: make LMC not require MDS for obdecho LOV
  MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
diff --git a/lustre/tests/fsx.c b/lustre/tests/fsx.c

index ead1c85..0e0d198 100644 (file)
--- a/lustre/tests/fsx.c
+++ b/lustre/tests/fsx.c
@@ -608,7 +608,7 @@ output_line(struct test_file *tf, int op, unsigned offset,
                           (monitorend == -1 || offset <= monitorend)))))))
                 return;
  
-       prt("%06lu %lu.%06lu %*s%-10s %#08x %s %#08x\t(0x%x bytes)\n",
+       prt("%06lu %lu.%06lu %.*s%-10s %#08x %s %#08x\t(0x%x bytes)\n",
                 testcalls, tv->tv_sec, tv->tv_usec, max_tf_len,
                 tf_num, ops[op], 
                 offset, op == OP_TRUNCATE ? " to " : "thru",
diff --git a/lustre/tests/lfsck_config.sh b/lustre/tests/lfsck_config.sh

index e0a61de..dab466e 100755 (executable)
--- a/lustre/tests/lfsck_config.sh
+++ b/lustre/tests/lfsck_config.sh
@@ -43,5 +43,5 @@ i=`expr $i + 1`
  done
  
  # create client config
-${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40
+${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40
  #${LMC} --add mtpt --node localhost --path $MOUNT2 --mds mds1 --lov lov1 || exit 40
diff --git a/lustre/tests/lfscktest.sh b/lustre/tests/lfscktest.sh

index b497521..95f7e55 100755 (executable)
--- a/lustre/tests/lfscktest.sh
+++ b/lustre/tests/lfscktest.sh
@@ -1,19 +1,18 @@
  #!/bin/bash
  set -vx
-set -e
+#set -e
  
  . ./lfscktest_config.sh
  
+sh llmount.sh || exit 1
+
  #Create mount points on target OST and MDS
  #Create test directory 
-
  mkdir -p $OST_MOUNTPT
  mkdir -p $MDS_MOUNTPT
  mkdir -p $TEST_DIR
  
-export PATH=$LFSCK_PATH/e2fsck:`dirname $0`:`dirname $0`/../utils:$PATH
-
-sh llmount.sh || exit 1
+export PATH=$LFSCK_PATH:`dirname $0`:`dirname $0`/../utils:$PATH
  
  # Create some files on the filesystem
  for i in `seq 0 3`; do
@@ -27,16 +26,14 @@ for i in `seq 0 3`; do
                 done
         done
  done
-# Create Files to be modified
  
+# Create Files to be modified
  file_name=${TESTNAME}
-
  for FILE in `seq -f ${TEST_DIR}/${file_name}.%g 0 40`; do
-       dd if=/dev/zero count=1 bs=64k of=$FILE || exit 1
+       dd if=/dev/zero count=1 bs=64K of=$FILE || exit 1
  done
  
  #Create some more files
-
  for i in `seq 21 23`; do
         mkdir -p ${MOUNT}/d$i
         for j in `seq 0 5`; do
@@ -85,25 +82,25 @@ for i in $MDS_FILES; do
  done
  
  #Create EAs on files so objects are referenced twice from different mds files
-for i in `seq 40 59`; do
+for i in `seq 0 19`; do
         touch $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.bad.$i
         copy_attr $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.$i $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.bad.$i || (umount $MDS_MOUNTPT && exit 1)
          i=`expr $i + 1`
  done
-       umount $MDS_MOUNTPT 
-       rmdir $MDS_MOUNTPT
-       rmdir $OST_MOUNTPT
+umount $MDS_MOUNTPT 
+rmdir $MDS_MOUNTPT
+rmdir $OST_MOUNTPT
  
  # Run e2fsck to get mds and ost info
  # a return status of 1 indicates e2fsck successfuly fixed problems found
  
-e2fsck -d -f -y --mdsdb $GPATH/mdsdb $MDSDEV 
+e2fsck -d -f -y --mdsdb $GPATH/mdsdb $MDSDEV
  RET=$?
  [ $RET -ne 0 -a $RET -ne 1 ] && exit 1
  i=0
  OSTDB_LIST=""
  while [ $i -lt $NUM_OSTS ]; do
-       e2fsck -d -f -y --mdsdb $GPATH/mdsdb --ostdb $GPATH/ostdb-$i $TMP/ost$i-`hostname`
+       e2fsck -d -f -y --mdsdb $GPATH/mdsdb --ostdb $GPATH/ostdb-$i $TMP/ost`expr $i + 1`-`hostname`
         RET=$?
         [ $RET -ne 0 -a $RET -ne 1 ] && exit 1
         if [ -z "${OSTDB_LIST}" ]; then
diff --git a/lustre/tests/lfscktest_config.sh b/lustre/tests/lfscktest_config.sh

index 6bae2bc..3fc95c0 100644 (file)
--- a/lustre/tests/lfscktest_config.sh
+++ b/lustre/tests/lfscktest_config.sh
@@ -1,7 +1,13 @@
  export TESTNAME="lfscktest"
  export TESTDESC="Test of lfsck functionality"
  
-export LFSCK_PATH=${E2FSCK_PATH:-"/usr/src/e2fsprogs-1.34"}
+export LUSTRE=${LUSTRE:-".."}
+export LCONF=${LCONF:-"$LUSTRE/utils/lconf"}
+export LMC=${LMC:-"$LUSTRE/utils/lmc"}
+export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
+export LFIND=${LFIND:-"$LUSTRE/utils/lfind"}
+
+export LFSCK_PATH=${E2FSCK_PATH:-"/home/yangjun/e2fsprogs-1.35.lfsck2/build/e2fsck"}
  export TMP=${TMP:-"/tmp"}
  export LOG=${LOG:-"${TMP}/lfscktest.log"}
  export LUSTRE_TAG=${LUSTRE_TAG:="HEAD"}
@@ -13,3 +19,5 @@ export MDS_MOUNTPT="/mnt/mds_${TESTNAME}"
  export OST_MOUNTPT="/mnt/ost_${TESTNAME}"
  export MOUNT="/mnt/lustre"
  export TEST_DIR="${MOUNT}/${TESTNAME}"
+export MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+export NUM_OSTS=${NUM_OSTS:-1}
diff --git a/lustre/tests/ll_dirstripe_verify.c b/lustre/tests/ll_dirstripe_verify.c

index bfbe7bc..310587d 100644 (file)
--- a/lustre/tests/ll_dirstripe_verify.c
+++ b/lustre/tests/ll_dirstripe_verify.c
@@ -40,7 +40,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
  
          stripe_count = (int)lum_dir->lmm_stripe_count;
          if (stripe_count == 0) {
-                fd = open("/proc/fs/lustre/lov/lov1/stripecount", O_RDONLY);
+                fd = open("/proc/fs/lustre/llite/fs0/lov/stripecount", O_RDONLY);
                  if (fd == -1) {
                          fprintf(stderr, "open proc file error: %s\n", 
                                  strerror(errno));
@@ -60,7 +60,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
  
          stripe_size = (int)lum_dir->lmm_stripe_size;
          if (stripe_size == 0) {
-                fd = open("/proc/fs/lustre/lov/lov1/stripesize", O_RDONLY);
+                fd = open("/proc/fs/lustre/llite/fs0/lov/stripesize", O_RDONLY);
                  if (fd == -1) {
                          fprintf(stderr, "open proc file error: %s\n", 
                                  strerror(errno)); 
@@ -77,7 +77,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
                  close(fd);
          }
  
-        fd = open("/proc/fs/lustre/lov/lov1/numobd", O_RDONLY);
+        fd = open("/proc/fs/lustre/llite/fs0/lov/numobd", O_RDONLY);
          if(fd  == -1) {
                  fprintf(stderr, "open proc file error: %s\n", 
                          strerror(errno));
@@ -95,7 +95,9 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
  
          if ((lum_file1->lmm_stripe_count != stripe_count) ||
              (lum_file1->lmm_stripe_size != stripe_size))
+        {
                  return -1;
+        }
          
          stripe_offset = (short int)lum_dir->lmm_stripe_offset;
          if (stripe_offset != -1) {
diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh

index 17ce6f5..a8901ba 100755 (executable)
--- a/lustre/tests/llmount.sh
+++ b/lustre/tests/llmount.sh
@@ -8,8 +8,8 @@ NAME=${NAME:-local}
  LLMOUNT=${LLMOUNT:-llmount}
  SECURITY=${SECURITY:-"null"}
  
-config=$NAME.xml
-mkconfig=$NAME.sh
+config=$(dirname $0)/$NAME.xml
+mkconfig=$(dirname $0)/$NAME.sh
  
  . krb5_env.sh
  start_krb5_kdc || exit 1
@@ -30,6 +30,7 @@ else
  fi    
  
  [ "$NODE" ] && node_opt="--node $NODE"
+[ "$DEBUG" ] && debug_opt="--ptldebug=$DEBUG"
  
  # We'd better start lsvcgssd after gss modules loaded.
  # remove this if we don't depend on lsvcgssd in the future
diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh

index 13886f9..86dd409 100755 (executable)
--- a/lustre/tests/local.sh
+++ b/lustre/tests/local.sh
@@ -62,5 +62,5 @@ ${LMC} --add ost --ost ost1 --nspath /mnt/ost_ns --node localhost --lov lov1 \
  --backdev $OST_BACKDEV $OST_MOUNT_OPTS --size $OSTSIZE $JARG || exit 30
  
  # create client config
-${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40
-${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 || exit 41
+${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40
+${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --clientoptions async --lov lov1 || exit 41
diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh

index b302b80..598ece5 100755 (executable)
--- a/lustre/tests/lov.sh
+++ b/lustre/tests/lov.sh
@@ -55,8 +55,8 @@ done
  
  if [ -z "$ECHO_CLIENT" ]; then
         # create client config
-       ${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40
-       ${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 || exit 41
+       ${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40
+       ${LMC} --add mtpt --node client --path $MOUNT2 --clientoptions async --mds mds1 --lov lov1 || exit 41
  else
         ${LMC} --add echo_client --node localhost --ost lov1 || exit 42
  fi
diff --git a/lustre/tests/mcr-mds-failover-config.sh b/lustre/tests/mcr-mds-failover-config.sh

index 29ec215..29c0f61 100755 (executable)
--- a/lustre/tests/mcr-mds-failover-config.sh
+++ b/lustre/tests/mcr-mds-failover-config.sh
@@ -47,4 +47,4 @@ $LMC -m $CONFIG --add ost --node $OST --ost ost_$OST $OST_UUID --dev bluearc
  $LMC -m $CONFIG --add route --node $GW_NODE --nettype tcp --gw `h2tcp $GW_NODE` --lo $OST
  
  # mount
-$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --mds mds_$ACTIVEMDS --lov ost_$OST
+$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds_$ACTIVEMDS --lov ost_$OST
diff --git a/lustre/tests/mcr-routed-config.sh b/lustre/tests/mcr-routed-config.sh

index bf08dbb..7db8887 100755 (executable)
--- a/lustre/tests/mcr-routed-config.sh
+++ b/lustre/tests/mcr-routed-config.sh
@@ -51,7 +51,7 @@ ${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 1048576 --stripe_cnt 1 --stri
  # Client node
  #${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp || exit 1
  ${LMC} --add net --node client --nid '*' --nettype elan || exit 1
-${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1
+${LMC} --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds1 --lov lov1
  
  # this is crude, but effective
  let server_per_gw=($SERVER_CNT / $GW_CNT )
diff --git a/lustre/tests/mcrlov.sh b/lustre/tests/mcrlov.sh

index d09866b..0e7981c 100755 (executable)
--- a/lustre/tests/mcrlov.sh
+++ b/lustre/tests/mcrlov.sh
@@ -38,7 +38,7 @@ ${LMC} --add net --node $MDS --nid `h2elan $MDS` --nettype elan || exit 1
  ${LMC} --add mds --node $MDS --mds mds1 --dev $TMP/mds1 --size 100000 || exit 1
  ${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 1048576 --stripe_cnt 0 --stripe_pattern 0 || exit 1
  
-${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1
+${LMC} --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds1 --lov lov1
  
  for s in $SERVERS
   do
diff --git a/lustre/tests/mount2fs.sh b/lustre/tests/mount2fs.sh

index 64decff..f5c8a85 100644 (file)
--- a/lustre/tests/mount2fs.sh
+++ b/lustre/tests/mount2fs.sh
@@ -40,5 +40,5 @@ ${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --fstype $FSTYPE --dev $O
  ${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
  
  # create client config
-${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT1} --mds mds1 --lov lov1 || exit 30
-${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT2} --mds mds2 --lov lov2 || exit 30
+${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT1} --clientoptions async --mds mds1 --lov lov1 || exit 30
+${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT2} --clientoptions async --mds mds2 --lov lov2 || exit 30
diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c

index 776eaea..b147d9d 100755 (executable)
--- a/lustre/tests/multiop.c
+++ b/lustre/tests/multiop.c
@@ -45,7 +45,11 @@ char usage[] =
  "        Y  fdatasync\n"
  "        z  seek to zero\n";
  
-void null_handler(int unused) { }
+static int usr1_received;
+void usr1_handler(int unused) 
+{ 
+        usr1_received = 1;
+}
  
  static const char *
  pop_arg(int argc, char *argv[])
@@ -73,14 +77,17 @@ int main(int argc, char **argv)
                  exit(1);
          }
  
-        signal(SIGUSR1, null_handler);
+        signal(SIGUSR1, usr1_handler);
  
          fname = argv[1];
  
          for (commands = argv[2]; *commands; commands++) {
                  switch (*commands) {
                  case '_':
-                        pause();
+                        if (usr1_received == 0)
+                                pause();
+                        usr1_received = 0;
+                        signal(SIGUSR1, usr1_handler);
                          break;
                  case 'c':
                          if (close(fd) == -1) {
diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh

index 114b4f9..9df34ef 100755 (executable)
--- a/lustre/tests/recovery-cleanup.sh
+++ b/lustre/tests/recovery-cleanup.sh
@@ -59,7 +59,7 @@ make_config() {
          --stripe_cnt 0 --stripe_pattern 0 || exit 6
      lmc -m $CONFIG --add ost --nspath /mnt/ost_ns --node $OSTNODE \
          --lov lov1 --dev $OSTDEV --size $OSTSIZE --fstype $FSTYPE || exit 7
-    lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
+    lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --clientoptions async --mds mds1 \
          --lov lov1 || exit 8
  }
  
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh

index 6e9d31d..2f84c01 100755 (executable)
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -2,8 +2,8 @@
  
  set -e
  
-#         bug  2986
-ALWAYS_EXCEPT="20b"
+# 20b: bug  2986
+ALWAYS_EXCEPT=" 20b"
  
  
  LUSTRE=${LUSTRE:-`dirname $0`/..}
@@ -92,6 +92,8 @@ fi
  REFORMAT=--reformat $SETUP
  unset REFORMAT
  
+[ "$ONLY" == "setup" ] && exit
+
  test_1() {
      drop_request "mcreate $MOUNT/1"  || return 1
      drop_reint_reply "mcreate $MOUNT/2"    || return 2
@@ -190,6 +192,7 @@ test_12(){
  #define OBD_FAIL_MDS_CLOSE_NET           0x115
      sleep 2
      kill -USR1 $PID
+    cancel_lru_locks MDC  # force the close
      echo "waiting for multiop $PID"
      wait $PID || return 2
      do_facet client munlink $MOUNT/$tfile  || return 3
@@ -198,13 +201,13 @@ run_test 12 "recover from timed out resend in ptlrpcd (b=2494)"
  
  # Bug 113, check that readdir lost recv timeout works.
  test_13() {
-    mkdir /mnt/lustre/readdir
-    touch /mnt/lustre/readdir/newentry
+    mkdir /mnt/lustre/readdir || return 1
+    touch /mnt/lustre/readdir/newentry  || return 
  # OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE
      do_facet mds "sysctl -w lustre.fail_loc=0x80000104"
-    ls /mnt/lustre/readdir || return 1
+    ls /mnt/lustre/readdir || return 3
      do_facet mds "sysctl -w lustre.fail_loc=0"
-    rm -rf /mnt/lustre/readdir
+    rm -rf /mnt/lustre/readdir  || return 4
  }
  run_test 13 "mdc_readpage restart test (bug 1138)"
  
@@ -238,6 +241,10 @@ start_read_ahead() {
     done
  }
  
+# recovery timeout. This actually should be taken from 
+# obd_timeout
+RECOV_TIMEOUT=30
+
  test_16() {
      do_facet client cp /etc/termcap $MOUNT
      sync
@@ -250,7 +257,7 @@ test_16() {
      do_facet client "cmp /etc/termcap $MOUNT/termcap"  && return 1
      sysctl -w lustre.fail_loc=0
      # give recovery a chance to finish (shouldn't take long)
-    sleep $TIMEOUT
+    sleep $RECOV_TIMEOUT
      do_facet client "cmp /etc/termcap $MOUNT/termcap"  || return 2
      start_read_ahead
  }
@@ -262,7 +269,7 @@ test_17() {
      sysctl -w lustre.fail_loc=0x80000503
      do_facet client cp /etc/termcap $DIR/$tfile
  
-    sleep $TIMEOUT
+    sleep $RECOV_TIMEOUT
      sysctl -w lustre.fail_loc=0
      do_facet client "df $DIR"
      # expect cmp to fail
@@ -380,4 +387,233 @@ test_20b() {      # bug 2986 - ldlm_handle_enqueue error during open
  }
  run_test 20b "ldlm_handle_enqueue error (should return error)"
  
+test_21a() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       close_pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
+       multiop $DIR/$tdir-2/f Oc &
+       open_pid=$!
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+       kill -USR1 $close_pid
+       cancel_lru_locks MDC  # force the close
+       wait $close_pid || return 1
+       wait $open_pid || return 2
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
+
+       rm -rf $DIR/$tdir-*
+}
+run_test 21a "drop close request while close and open are both in flight"
+
+test_21b() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       close_pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+       mcreate $DIR/$tdir-2/f &
+       open_pid=$!
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       kill -USR1 $close_pid
+       cancel_lru_locks MDC  # force the close
+       wait $close_pid || return 1
+       wait $open_pid || return 3
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 4
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 5
+       rm -rf $DIR/$tdir-*
+}
+run_test 21b "drop open request while close and open are both in flight"
+
+test_21c() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       close_pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+       mcreate $DIR/$tdir-2/f &
+       open_pid=$!
+       sleep 3
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+       kill -USR1 $close_pid
+       cancel_lru_locks MDC  # force the close
+       wait $close_pid || return 1
+       wait $open_pid || return 2
+
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+       rm -rf $DIR/$tdir-*
+}
+run_test 21c "drop both request while close and open are both in flight"
+
+test_21d() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
+       multiop $DIR/$tdir-2/f Oc &
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+       kill -USR1 $pid
+       cancel_lru_locks MDC  # force the close
+       wait $pid || return 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+
+       rm -rf $DIR/$tdir-*
+}
+run_test 21d "drop close reply while close and open are both in flight"
+
+test_21e() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+       touch $DIR/$tdir-2/f &
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       kill -USR1 $pid
+       cancel_lru_locks MDC  # force the close
+       wait $pid || return 1
+
+       sleep $TIMEOUT
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+       rm -rf $DIR/$tdir-*
+}
+run_test 21e "drop open reply while close and open are both in flight"
+
+test_21f() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+       touch $DIR/$tdir-2/f &
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+       kill -USR1 $pid
+       cancel_lru_locks MDC  # force the close
+       wait $pid || return 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+       rm -rf $DIR/$tdir-*
+}
+run_test 21f "drop both reply while close and open are both in flight"
+
+test_21g() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+       touch $DIR/$tdir-2/f &
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+       kill -USR1 $pid
+       cancel_lru_locks MDC  # force the close
+       wait $pid || return 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+       rm -rf $DIR/$tdir-*
+}
+run_test 21g "drop open reply and close request while close and open are both in flight"
+
+test_21h() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+       touch $DIR/$tdir-2/f &
+       touch_pid=$!
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+       cancel_lru_locks MDC  # force the close
+       kill -USR1 $pid
+       wait $pid || return 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       wait $touch_pid || return 2
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
+       rm -rf $DIR/$tdir-*
+}
+run_test 21h "drop open request and close reply while close and open are both in flight"
+
+# bug 3462 - multiple MDC requests
+test_22() {
+    f1=$DIR/${tfile}-1
+    f2=$DIR/${tfile}-2
+    
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+    multiop $f2 Oc &
+    close_pid=$!
+
+    sleep 1
+    multiop $f1 msu || return 1
+
+     cancel_lru_locks MDC  # force the close
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    wait $close_pid || return 2
+    rm -rf $f2 || return 4
+}
+run_test 22 "drop close request and do mknod"
+
+test_23() { #b=4561
+    multiop $DIR/$tfile O_c &
+    pid=$!
+    # give a chance for open
+    sleep 5
+
+    # try the close
+    drop_request "kill -USR1 $pid"
+
+    fail mds
+    wait $pid || return 1
+    return 0
+}
+#run_test 23 "client hang when close a file after mds crash"
+
+
  $CLEANUP
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh

index 8848b78..bd5a748 100755 (executable)
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -51,7 +51,16 @@ cleanup() {
      umount $MOUNT2 || true
      umount $MOUNT || true
      rmmod llite
+    
+    # b=3941
+    # In mds recovery, the mds will clear orphans in ost by 
+    # mds_lov_clear_orphan, which will sent the request to ost and waiting for
+    # the reply, if we stop mds at this time, we will got the obd_refcount > 1 
+    # errors, because mds_lov_clear_orphan grab a export of mds, 
+    # so the obd_refcount of mds will not be zero. So, wait a while before
+    # stop mds. This bug needs further work.
      for mds in `mds_list`; do
+       sleep 5
         stop $mds ${FORCE} $MDSLCONFARGS
      done
      stop_lgssd
@@ -345,6 +354,7 @@ test_14() {
      facet_failover mds1
      # expect failover to fail
      df $MOUNT && return 1
+    sleep 1
  
      # first 25 files shouuld have been
      # replayed
@@ -364,6 +374,7 @@ test_15() {
  
      facet_failover mds1
      df $MOUNT || return 1
+    sleep 1
  
      unlinkmany $MOUNT1/$tfile- 25 || return 2
  
@@ -381,6 +392,7 @@ test_16() {
      sleep $TIMEOUT
      facet_failover mds1
      df $MOUNT || return 1
+    sleep 1
  
      unlinkmany $MOUNT1/$tfile- 25 || return 2
  
@@ -403,6 +415,7 @@ test_17() {
      sleep $TIMEOUT
      facet_failover ost
      df $MOUNT || return 1
+    sleep 1
  
      unlinkmany $MOUNT1/$tfile- 25 || return 2
  
@@ -431,7 +444,6 @@ test_18 () {
  }
  run_test 18 "replay open, Abort recovery, don't assert (3892)"
  
-
  # cleanup with blocked enqueue fails until timer elapses (MDS busy), wait for
  # itexport NOW=0
  
diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh

index bda77df..398386d 100755 (executable)
--- a/lustre/tests/replay-ost-single.sh
+++ b/lustre/tests/replay-ost-single.sh
@@ -138,11 +138,15 @@ test_4() {
  run_test 4 "Fail OST during read, with verification"
  
  test_5() {
-    IOZONE_OPTS="-i 0 -i 1 -i 2 -+d -r 64 -s 1g"
+    FREE=`df -P -h $DIR | tail -n 1 | awk '{ print $3 }'`
+    case $FREE in
+    *T|*G) FREE=1G;;
+    esac
+    IOZONE_OPTS="-i 0 -i 1 -i 2 -+d -r 4 -s $FREE"
      iozone $IOZONE_OPTS -f $DIR/$tfile &
      PID=$!
      
-    sleep 10
+    sleep 8
      fail ost
      wait $PID || return 1
      rm -f $DIR/$tfile
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh

index ef0e09c..b8cbd9b 100755 (executable)
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -18,7 +18,8 @@ build_test_filter
  assert_env MDSCOUNT
  
  # Skip these tests
-ALWAYS_EXCEPT=""
+# 46 - The MDS will always have to force close the cached opens
+ALWAYS_EXCEPT="46"
  
  if [ `using_krb5_sec $SECURITY` == 'n' ] ; then
      ALWAYS_EXCEPT="0c $ALWAYS_EXCEPT"
@@ -935,6 +936,7 @@ test_45() {
      wait $pid || return 1
  
      $LCTL --device $mdcdev activate
+    sleep 1
  
      $CHECKSTAT -t file $DIR/$tfile || return 2
      return 0
@@ -1017,6 +1019,207 @@ test_50() {
  }
  run_test 50 "Double OSC recovery, don't LASSERT (3812)"
  
+# bug 3462 - simultaneous MDC requests
+test_51a() {
+    replay_barrier_nodf mds
+    mkdir -p $DIR/${tdir}-1
+    mkdir -p $DIR/${tdir}-2
+    touch $DIR/${tdir}-2/f
+    multiop $DIR/${tdir}-1/f O_c &
+    pid=$!
+    # give multiop a chance to open
+    sleep 1
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+    kill -USR1 $pid
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 1
+
+    fail mds
+
+    wait $pid || return 2
+    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
+    rm -rf $DIR/${tdir}-*
+}
+run_test 51a "|X| close request while two MDC requests in flight"
+
+test_51b() {
+    replay_barrier_nodf mds
+    mkdir -p $DIR/$tdir-1
+    mkdir -p $DIR/$tdir-2
+    multiop $DIR/$tdir-1/f O_c &
+    pid=$!
+    # give multiop a chance to open
+    sleep 1
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+    touch $DIR/${tdir}-2/f &
+    usleep 500
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    kill -USR1 $pid
+    wait $pid || return 1
+                                                                                                                             
+    fail mds
+
+    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+    rm -rf $DIR/${tdir}-*
+}
+run_test 51b "|X| open request while two MDC requests in flight"
+
+test_51c() {
+    replay_barrier_nodf mds
+    mkdir -p $DIR/${tdir}-1
+    mkdir -p $DIR/${tdir}-2
+    multiop $DIR/${tdir}-1/f O_c &
+    pid=$!
+    # give multiop a chance to open
+    sleep 1
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+    touch $DIR/${tdir}-2/f &
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+    kill -USR1 $pid
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    fail mds
+
+    wait $pid || return 1
+    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+    rm -rf $DIR/${tdir}-*
+}
+run_test 51c "|X| open request and close request while two MDC requests in flight"
+
+test_51d() {
+    replay_barrier_nodf mds
+    mkdir -p $DIR/${tdir}-1
+    mkdir -p $DIR/${tdir}-2
+    touch $DIR/${tdir}-2/f
+    multiop $DIR/${tdir}-1/f O_c &
+    pid=$!
+    # give multiop a chance to open
+    sleep 1
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+    kill -USR1 $pid
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+    #$CHECKSTAT -t file $DIR/${tdir}-2/f || return 1
+
+    fail mds
+
+    wait $pid || return 2
+    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
+    rm -rf $DIR/${tdir}-*
+}
+run_test 51d "|X| close reply while two MDC requests in flight"
+
+test_51e() {
+    replay_barrier_nodf mds
+    mkdir -p $DIR/$tdir-1
+    mkdir -p $DIR/$tdir-2
+    multiop $DIR/$tdir-1/f O_c &
+    pid=$!
+    # give multiop a chance to open
+    sleep 1
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+    touch $DIR/${tdir}-2/f &
+    usleep 500
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    kill -USR1 $pid
+    wait $pid || return 1
+
+    fail mds
+
+    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+    rm -rf $DIR/${tdir}-*
+}
+run_test 51e "|X| open reply while two MDC requests in flight"
+
+test_51f() {
+    replay_barrier_nodf mds
+    mkdir -p $DIR/${tdir}-1
+    mkdir -p $DIR/${tdir}-2
+    multiop $DIR/${tdir}-1/f O_c &
+    pid=$!
+    # give multiop a chance to open
+    sleep 1
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+    touch $DIR/${tdir}-2/f &
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+    kill -USR1 $pid
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    fail mds
+
+    wait $pid || return 1
+    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+    rm -rf $DIR/${tdir}-*
+}
+run_test 51f "|X| open reply and close reply while two MDC requests in flight"
+
+test_51g() {
+    replay_barrier_nodf mds
+    mkdir -p $DIR/${tdir}-1
+    mkdir -p $DIR/${tdir}-2
+    multiop $DIR/${tdir}-1/f O_c &
+    pid=$!
+    # give multiop a chance to open
+    sleep 1
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+    touch $DIR/${tdir}-2/f &
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+    kill -USR1 $pid
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    fail mds
+
+    wait $pid || return 1
+    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+    rm -rf $DIR/${tdir}-*
+}
+run_test 51g "|X| open reply and close request while two MDC requests in flight"
+
+test_51h() {
+    replay_barrier_nodf mds
+    mkdir -p $DIR/${tdir}-1
+    mkdir -p $DIR/${tdir}-2
+    multiop $DIR/${tdir}-1/f O_c &
+    pid=$!
+    # give multiop a chance to open
+    sleep 1
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+    touch $DIR/${tdir}-2/f &
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+    kill -USR1 $pid
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    fail mds
+
+    wait $pid || return 1
+    $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+    $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+    rm -rf $DIR/${tdir}-*
+}
+run_test 51h "|X| open request and close reply while two MDC requests in flight"
+
  # b3764 timed out lock replay
  test_52() {
      touch $DIR/$tfile
@@ -1032,6 +1235,58 @@ test_52() {
  }
  run_test 52 "time out lock replay (3764)"
  
+test_53() {
+    replay_barrier_nodf mds
+    f1=$DIR/${tfile}-1
+    cat <<EOF > $f1
+#!/bin/sh
+true
+EOF
+    chmod +x $f1
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+    $f1 || return 1
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    fail mds
+    rm -f $f1
+}
+run_test 53 "|X| open request and close reply while two MDC requests in flight"
+
+test_54() {
+    replay_barrier mds
+    createmany -o $DIR/$tfile 20
+    unlinkmany $DIR/$tfile 20
+    fail mds
+}
+run_test 54 "|X| open request and close reply while two MDC requests in flight"
+
+#b3440 ASSERTION(rec->ur_fid2->id) failed
+test_55() {
+    sysctl -w portals.debug=-1 portals.debug_mb=25
+    ln -s foo $DIR/$tfile
+    replay_barrier mds
+    #drop_reply "cat $DIR/$tfile"
+    fail mds
+    sleep 10
+    lctl dk /r/tmp/debug
+}
+run_test 55 "don't replay a symlink open request (3440)"
+
+#b3761 ASSERTION(hash != 0) failed
+test_56() {
+# OBD_FAIL_MDS_OPEN_CREATE | OBD_FAIL_ONCE
+    do_facet mds "sysctl -w lustre.fail_loc=0x8000012b"
+    touch $DIR/$tfile
+    pid=$!
+    # give a chance for touch to run
+    sleep 5
+    do_facet mds "sysctl -w lustre.fail_loc=0x0"
+    wait $pid || return 1
+    rm $DIR/$tfile
+    return 0
+}
+run_test 56 "let MDS_CHECK_RESENT return the original return code instead of 0
+
  equals_msg test complete, cleaning up
  $CLEANUP
  
diff --git a/lustre/tests/runtests b/lustre/tests/runtests

index aa30f9f..05914d7 100755 (executable)
--- a/lustre/tests/runtests
+++ b/lustre/tests/runtests
@@ -22,9 +22,9 @@ ERROR=
  SRC=/etc
  [ "$COUNT" ] || COUNT=1000
  
-[ "$LCONF" ] || LCONF=$SRCDIR/../utils/lconf
+[ "$LCONF" ] || LCONF=lconf
  
-[ "$MCREATE" ] || MCREATE=$SRCDIR/../tests/mcreate
+[ "$MCREATE" ] || MCREATE=mcreate
  
  [ "$MKDIRMANY" ] || MKDIRMANY=$SRCDIR/../tests/mkdirmany
  
@@ -117,7 +117,7 @@ if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
  fi
  
  # mkdirmany test (bug 589)
-log "running mkdirmany $MOUNT/base$$ 100"
+log "running $MKDIRMANY $MOUNT/base$$ 100"
  $MKDIRMANY $MOUNT/base$$ 100 || fail "mkdirmany failed"
  log "removing mkdirmany directories"
  rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed"
diff --git a/lustre/tests/sanity-fid.sh b/lustre/tests/sanity-fid.sh

index 5d61718..4fb86af 100644 (file)
--- a/lustre/tests/sanity-fid.sh
+++ b/lustre/tests/sanity-fid.sh
@@ -10,6 +10,7 @@ ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""}
  
  SRCDIR=`dirname $0`
  export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
+export SECURITY=${SECURITY:-"null"}
  
  TMP=${TMP:-/tmp}
  FSTYPE=${FSTYPE:-ext3}
@@ -34,6 +35,8 @@ IOPENTEST2=${IOPENTEST2:-iopentest2}
  PTLDEBUG=${PTLDEBUG:-0}
  MODE=${MODE:mds}
  
+. krb5_env.sh
+
  if [ $UID -ne 0 ]; then
         RUNAS_ID="$UID"
         RUNAS=""
@@ -42,6 +45,13 @@ else
         RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
  fi
  
+if [ `using_krb5_sec $SECURITY` == 'y' ] ; then
+    start_krb5_kdc || exit 1
+    if [ $RUNAS_ID -ne $UID ]; then
+        $RUNAS ./krb5_refresh_cache.sh || exit 2
+    fi
+fi
+
  export NAME=${NAME:-local}
  
  SAVE_PWD=$PWD
diff --git a/lustre/tests/sanity-gns.sh b/lustre/tests/sanity-gns.sh

index 74e5657..64497b4 100644 (file)
--- a/lustre/tests/sanity-gns.sh
+++ b/lustre/tests/sanity-gns.sh
@@ -12,6 +12,7 @@ ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""}
  
  SRCDIR=`dirname $0`
  export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
+export SECURITY=${SECURITY:-"null"}
  
  TMP=${TMP:-/tmp}
  FSTYPE=${FSTYPE:-ext3}
@@ -35,6 +36,8 @@ IOPENTEST1=${IOPENTEST1:-iopentest1}
  IOPENTEST2=${IOPENTEST2:-iopentest2}
  PTLDEBUG=${PTLDEBUG:-0}
  
+. krb5_env.sh
+
  if [ $UID -ne 0 ]; then
         RUNAS_ID="$UID"
         RUNAS=""
@@ -43,6 +46,13 @@ else
         RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
  fi
  
+if [ `using_krb5_sec $SECURITY` == 'y' ] ; then
+    start_krb5_kdc || exit 1
+    if [ $RUNAS_ID -ne $UID ]; then
+        $RUNAS ./krb5_refresh_cache.sh || exit 2
+    fi
+fi
+
  export NAME=${NAME:-local}
  
  SAVE_PWD=$PWD
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index ded1e08..33ad8e5 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -7,11 +7,14 @@
  set -e
  
  ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 2739
-# 51b and 51c depend on kernel
-# 65* fixes in b_hd_cray_merge3
-# the new kernel api make 48 not valid anymore
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"48 51b 51c 65a 65b 65c 65d 65e 65f"}
+# bug number for skipped tests:
+# skipped test: 
+# - 51b 51c depend on used kernel
+#   more than only LOV EAs
+# - 65h (default stripe inheritance) is not implemented for LMV 
+#   configurations. Will be done in second phase of collibri.
+
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"51b 51c 65h"}
  # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
  
  [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
@@ -106,12 +109,12 @@ run_one() {
         if ! mount | grep -q $DIR; then
                 $START
         fi
-       echo -1 >/proc/sys/portals/debug        
-       log "== test $1: $2= `date +%H:%M:%S`"
+       BEFORE=`date +%s`
+       log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)"
         export TESTNAME=test_$1
         test_$1 || error "test_$1: exit with rc=$?"
         unset TESTNAME
-       pass
+       pass "($((`date +%s` - $BEFORE))s)"
         cd $SAVE_PWD
         $CLEAN
  }
@@ -126,11 +129,11 @@ build_test_filter() {
  }
  
  _basetest() {
-    echo $*
+       echo $*
  }
  
  basetest() {
-    IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
+       IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
  }
  
  run_test() {
@@ -175,7 +178,7 @@ error() {
  }
  
  pass() { 
-       echo PASS
+       echo PASS $@
  }
  
  MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
@@ -194,6 +197,8 @@ DIR=${DIR:-$MOUNT}
  OSTCOUNT=`cat /proc/fs/lustre/llite/fs0/lov/numobd`
  STRIPECOUNT=`cat /proc/fs/lustre/llite/fs0/lov/stripecount`
  STRIPESIZE=`cat /proc/fs/lustre/llite/fs0/lov/stripesize`
+ORIGFREE=`cat /proc/fs/lustre/llite/fs0/lov/kbytesavail`
+MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))}
  
  [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo
  [ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo
@@ -455,7 +460,7 @@ test_16() {
  run_test 16 "touch .../d16/f; rm -rf .../d16/f ================="
  
  test_17a() {
-       mkdir $DIR/d17
+       mkdir -p $DIR/d17
         touch $DIR/d17/f
         ln -s $DIR/d17/f $DIR/d17/l-exist
         ls -l $DIR/d17
@@ -467,9 +472,7 @@ test_17a() {
  run_test 17a "symlinks: create, remove (real) =================="
  
  test_17b() {
-       if [ ! -d $DIR/d17 ]; then
-               mkdir $DIR/d17
-       fi
+       mkdir -p $DIR/d17
         ln -s no-such-file $DIR/d17/l-dangle
         ls -l $DIR/d17
         $CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error
@@ -479,6 +482,20 @@ test_17b() {
  }
  run_test 17b "symlinks: create, remove (dangling) =============="
  
+test_17c() { # bug 3440 - don't save failed open RPC for replay
+       mkdir -p $DIR/d17
+       ln -s foo $DIR/d17/f17c
+       cat $DIR/d17/f17c && error "opened non-existent symlink" || true
+}
+run_test 17c "symlinks: open dangling (should return error) ===="
+
+test_17d() {
+       mkdir -p $DIR/d17
+       ln -s foo $DIR/d17/f17d
+       touch $DIR/d17/f17d || error "creating to new symlink"
+}
+run_test 17d "symlinks: create dangling ========================"
+
  test_18() {
         touch $DIR/f
         ls $DIR || error
@@ -889,6 +906,33 @@ test_27l() {
  }
  run_test 27l "check setstripe permissions (should return error)"
  
+test_27m() {
+        [ "$OSTCOUNT" -lt "2" ] && echo "skipping out-of-space test on OST0" && return
+        if [ $ORIGFREE -gt $MAXFREE ]; then
+                echo "skipping out-of-space test on OST0"
+                return
+        fi
+        mkdir -p $DIR/d27
+        $LSTRIPE $DIR/d27/f27m_1 0 0 1
+        dd if=/dev/zero of=$DIR/d27/f27m_1 bs=1024 count=$MAXFREE && \
+                error "dd should fill OST0"
+        i=2
+        while $LSTRIPE $DIR/d27/f27m_$i 0 0 1 ; do
+                i=`expr $i + 1`
+                [ $i -gt 256 ] && break
+        done
+        i=`expr $i + 1`
+        touch $DIR/d27/f27m_$i
+        [ `$LFIND $DIR/d27/f27m_$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "0"` ] && \
+                error "OST0 was full but new created file still use it"
+        i=`expr $i + 1`
+        touch $DIR/d27/f27m_$i
+        [ `$LFIND $DIR/d27/f27m_$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "0"` ] && \
+                error "OST0 was full but new created file still use it"
+        rm $DIR/d27/f27m_1
+}
+run_test 27m "create file while OST0 was full =================="
+
  test_28() {
         mkdir $DIR/d28
         $CREATETEST $DIR/d28/ct || error
@@ -972,6 +1016,40 @@ test_31e() { # bug 2904
  }
  run_test 31e "remove of open non-empty directory ==============="
  
+test_31f() { # bug 4554
+       set -vx
+       mkdir $DIR/d31f
+       lfs setstripe $DIR/d31f 1048576 -1 1
+       cp /etc/hosts $DIR/d31f
+       ls -l $DIR/d31f
+       lfs getstripe $DIR/d31f/hosts
+       multiop $DIR/d31f D_c &
+       MULTIPID=$!
+
+       sleep 1
+
+       rm -rv $DIR/d31f || error "first of $DIR/d31f"
+       mkdir $DIR/d31f
+       lfs setstripe $DIR/d31f 1048576 -1 1
+       cp /etc/hosts $DIR/d31f
+       ls -l $DIR/d31f
+       lfs getstripe $DIR/d31f/hosts
+       multiop $DIR/d31f D_c &
+       MULTIPID2=$!
+
+       sleep 6
+
+       kill -USR1 $MULTIPID || error "first opendir $MULTIPID not running"
+       wait $MULTIPID || error "first opendir $MULTIPID failed"
+
+       sleep 6
+
+       kill -USR1 $MULTIPID2 || error "second opendir $MULTIPID not running"
+       wait $MULTIPID2 || error "second opendir $MULTIPID2 failed"
+       set +vx
+}
+run_test 31f "remove of open directory with open-unlink file ==="
+
  test_32a() {
         echo "== more mountpoints and symlinks ================="
         [ -e $DIR/d32a ] && rm -fr $DIR/d32a
@@ -1687,6 +1765,26 @@ test_48d() { # bug 2350
  }
  run_test 48d "Access removed parent subdir (should return errors)"
  
+test_48e() { # bug 4134
+       check_kernel_version 41 || return 0
+       #sysctl -w portals.debug=-1
+       #set -vx
+       mkdir -p $DIR/d48e/dir
+       # On a buggy kernel addition of "; touch file" after cd .. will
+       # produce kernel oops in lookup_hash_it
+
+       cd $DIR/d48e/dir
+       ( sleep 2 && cd -P .. ) &
+       cdpid=$!
+       $TRACE rmdir $DIR/d48e/dir || error "remove cwd $DIR/d48e/dir failed"
+       $TRACE rmdir $DIR/d48e || error "remove parent $DIR/d48e failed"
+       $TRACE touch $DIR/d48e || error "'touch $DIR/d48e' failed"
+       $TRACE chmod +x $DIR/d48e || error "'chmod +x $DIR/d48e' failed"
+       $TRACE wait $cdpid && error "'cd ..' worked after recreate parent"
+       $TRACE rm $DIR/d48e || error "'$DIR/d48e' failed"
+}
+run_test 48e "Access to recreated parent (should return errors) "
+
  test_50() {
         # bug 1485
         mkdir $DIR/d50
@@ -1712,15 +1810,18 @@ test_51() {
  }
  run_test 51 "special situations: split htree with empty entry =="
  
+export NUMTEST=70000
  test_51b() {
-       NUMTEST=70000
-       check_kernel_version 40 || NUMTEST=31000
-       NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
-       [ $NUMFREE -lt $NUMTEST ] && \
-               echo "skipping test 51b, not enough free inodes($NUMFREE)" && \
-               return
-       mkdir -p $DIR/d51b
-       (cd $DIR/d51b; mkdirmany t $NUMTEST)
+       NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
+       [ $NUMFREE -lt 21000 ] && \
+               echo "skipping test 51b, not enough free inodes($NUMFREE)" && \
+               return
+
+       check_kernel_version 40 || NUMTEST=31000
+       [ $NUMFREE -lt $NUMTEST ] && NUMTEST=$(($NUMFREE - 50))
+
+       mkdir -p $DIR/d51b
+       (cd $DIR/d51b; mkdirmany t $NUMTEST)
  }
  run_test 51b "mkdir .../t-0 --- .../t-$NUMTEST ===================="
  
@@ -2055,27 +2156,27 @@ run_test 64b "check out-of-space detection on client ==========="
  
  # bug 1414 - set/get directories' stripe info
  test_65a() {
-       mkdir -p $DIR/d65
-       touch $DIR/d65/f1
-       $LVERIFY $DIR/d65 $DIR/d65/f1 || error "lverify failed"
+       mkdir -p $DIR/d65a
+       touch $DIR/d65a/f1
+       $LVERIFY $DIR/d65a $DIR/d65a/f1 || error "lverify failed"
  }
  run_test 65a "directory with no stripe info ===================="
  
  test_65b() {
-       mkdir -p $DIR/d65
-               $LSTRIPE $DIR/d65 $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
-       touch $DIR/d65/f2
-       $LVERIFY $DIR/d65 $DIR/d65/f2 || error "lverify failed"
+       mkdir -p $DIR/d65b
+               $LSTRIPE $DIR/d65b $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
+       touch $DIR/d65b/f2
+       $LVERIFY $DIR/d65b $DIR/d65b/f2 || error "lverify failed"
  }
  run_test 65b "directory setstripe $(($STRIPESIZE * 2)) 0 1 ==============="
  
  test_65c() {
         if [ $OSTCOUNT -gt 1 ]; then
-               mkdir -p $DIR/d65
-               $LSTRIPE $DIR/d65 $(($STRIPESIZE * 4)) 1 \
+               mkdir -p $DIR/d65c
+               $LSTRIPE $DIR/d65c $(($STRIPESIZE * 4)) 1 \
                         $(($OSTCOUNT - 1)) || error "setstripe"
-               touch $DIR/d65/f3
-               $LVERIFY $DIR/d65 $DIR/d65/f3 || error "lverify failed"
+               touch $DIR/d65c/f3
+               $LVERIFY $DIR/d65c $DIR/d65c/f3 || error "lverify failed"
         fi
  }
  run_test 65c "directory setstripe $(($STRIPESIZE * 4)) 1 $(($OSTCOUNT - 1))"
@@ -2083,19 +2184,20 @@ run_test 65c "directory setstripe $(($STRIPESIZE * 4)) 1 $(($OSTCOUNT - 1))"
  [ $STRIPECOUNT -eq 0 ] && sc=1 || sc=$(($STRIPECOUNT - 1))
  
  test_65d() {
-       mkdir -p $DIR/d65
-       $LSTRIPE $DIR/d65 $STRIPESIZE -1 $sc || error "setstripe"
-       touch $DIR/d65/f4 $DIR/d65/f5
-       $LVERIFY $DIR/d65 $DIR/d65/f4 $DIR/d65/f5 || error "lverify failed"
+       mkdir -p $DIR/d65d
+       $LSTRIPE $DIR/d65d $STRIPESIZE -1 $sc || error "setstripe"
+       touch $DIR/d65d/f4 $DIR/d65d/f5
+       $LVERIFY $DIR/d65d $DIR/d65d/f4 $DIR/d65d/f5 || error "lverify failed"
  }
  run_test 65d "directory setstripe $STRIPESIZE -1 $sc ======================"
  
  test_65e() {
-       mkdir -p $DIR/d65
+       mkdir -p $DIR/d65e
  
-       $LSTRIPE $DIR/d65 0 -1 0 || error "setstripe"
-       touch $DIR/d65/f6
-       $LVERIFY $DIR/d65 $DIR/d65/f6 || error "lverify failed"
+       $LSTRIPE $DIR/d65e 0 -1 0 || error "setstripe"
+        $LFS find -v $DIR/d65e | grep "$DIR/d65e/ has no stripe info" || error "no stripe info failed"
+       touch $DIR/d65e/f6
+       $LVERIFY $DIR/d65e $DIR/d65e/f6 || error "lverify failed"
  }
  run_test 65e "directory setstripe 0 -1 0 (default) ============="
  
@@ -2105,6 +2207,23 @@ test_65f() {
  }
  run_test 65f "dir setstripe permission (should return error) ==="
  
+test_65g() {
+        mkdir -p $DIR/d65g
+        $LSTRIPE $DIR/d65g $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
+        $LSTRIPE -d $DIR/d65g || error "deleting stripe info failed"
+        $LFS find -v $DIR/d65g | grep "$DIR/d65g/ has no stripe info" || error "no stripe info failed"
+}
+run_test 65g "directory setstripe -d ========"
+                                                                                                               
+test_65h() {
+        mkdir -p $DIR/d65h
+        $LSTRIPE $DIR/d65h $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
+        mkdir -p $DIR/d65h/dd1
+        [ "`$LFS find -v $DIR/d65h | grep "^count"`" == \
+          "`$LFS find -v $DIR/d65h/dd1 | grep "^count"`" ] || error "stripe info inherit failed"
+}
+run_test 65h "directory stripe info inherit ======"
+
  # bug 2543 - update blocks count on client
  test_66() {
         COUNT=${COUNT:-8}
@@ -2170,6 +2289,88 @@ test_68() {
  }
  run_test 68 "support swapping to Lustre ========================"
  
+# bug 3462 - multiple simultaneous MDC requests
+test_69() {
+       mkdir $DIR/D68-1 
+       mkdir $DIR/D68-2
+       multiop $DIR/D68-1/f68-1 O_c &
+       pid1=$!
+       #give multiop a chance to open
+       usleep 500
+
+       echo 0x80000129 > /proc/sys/lustre/fail_loc
+       multiop $DIR/D68-1/f68-2 Oc &
+       sleep 1
+       echo 0 > /proc/sys/lustre/fail_loc
+
+       multiop $DIR/D68-2/f68-3 Oc &
+       pid3=$!
+
+       kill -USR1 $pid1
+       wait $pid1 || return 1
+
+       sleep 25
+
+       $CHECKSTAT -t file $DIR/D68-1/f68-1 || return 4
+       $CHECKSTAT -t file $DIR/D68-1/f68-2 || return 5 
+       $CHECKSTAT -t file $DIR/D68-2/f68-3 || return 6 
+
+       rm -rf $DIR/D68-*
+}
+run_test 69 "multiple MDC requests (should not deadlock)"
+
+
+test_70() {
+       STAT="/proc/fs/lustre/osc/OSC*MNT*/stats"
+       mkdir $DIR/d70
+       dd if=/dev/zero of=$DIR/d70/file bs=512 count=5
+       cancel_lru_locks OSC
+       cat $DIR/d70/file >/dev/null
+       # Hopefully there is only one.
+       ENQ=`cat $STAT|awk -vnum=0 '/ldlm_enq/ {num += $2} END {print num;}'`
+       CONV=`cat $STAT|awk -vnum=0 '/ldlm_conv/ {num += $2} END {print num;}'`
+       CNCL=`cat $STAT|awk -vnum=0 '/ldlm_canc/ {num += $2} END {print num;}'`
+       dd if=/dev/zero of=$DIR/d70/file bs=512 count=5
+       ENQ1=`cat $STAT|awk -vnum=0 '/ldlm_enq/ {num += $2} END {print num;}'`
+       CONV1=`cat $STAT|awk -vnum=0 '/ldlm_conv/ {num += $2} END {print num;}'`
+       CNCL1=`cat $STAT|awk -vnum=0 '/ldlm_canc/ {num += $2} END {print num;}'`
+
+       if [ $CONV1 -le $CONV ] ; then
+               error "No conversion happened. Before: enq $ENQ, conv $CONV, cancel $CNCL ; After: enq $ENQ1, conv $CONV1, cancel $CNCL1"
+       else
+               echo "OK"
+               true
+       fi
+
+}
+run_test 70 "Test that PR->PW conversion takes place ==========="
+
+test_71() {
+       cp `which dbench` $DIR
+       
+       [ ! -f $DIR/dbench ] && echo "dbench not installed, skip this test" && return 0
+
+       TGT=$DIR/client.txt
+       SRC=${SRC:-/usr/lib/dbench/client.txt}
+       [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT
+       SRC=/usr/lib/dbench/client_plain.txt
+       [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT
+
+       echo "copying /lib to $DIR"
+       cp -r /lib $DIR/lib
+       
+       echo "chroot $DIR /dbench -c client.txt 2"
+       chroot $DIR /dbench -c client.txt 2
+       RC=$?
+
+       rm -f $DIR/dbench
+       rm -f $TGT
+       rm -fr $DIR/lib
+
+       return $RC
+}
+run_test 71 "Running dbench on lustre (don't segment fault) ===="
+
  # on the LLNL clusters, runas will still pick up root's $TMP settings,
  # which will not be writable for the runas user, and then you get a CVS
  # error message with a corrupt path string (CVS bug) and panic.
@@ -2231,6 +2432,19 @@ test_99f() {
  }
  run_test 99f "cvs commit ======================================="
  
+test_100() {
+        netstat -ta | while read PROT SND RCV LOCAL REMOTE STAT; do
+                LPORT=`echo $LOCAL | cut -d: -f2`
+                RPORT=`echo $REMOTE | cut -d: -f2`
+                if [ "$PROT" = "tcp" ] && [ "$LPORT" != "*" ] && [ "$RPORT" != "*" ] && [ $RPORT -eq 988 ] && [ $LPORT -gt 1024 ]; then
+                        echo "local port: $LPORT > 1024"
+                        error
+                fi
+        done
+}
+run_test 100 "check local port using privileged port ==========="
+
+
  TMPDIR=$OLDTMPDIR
  TMP=$OLDTMP
  HOME=$OLDHOME
diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh

index faecfc4..96532b7 100644 (file)
--- a/lustre/tests/sanityN.sh
+++ b/lustre/tests/sanityN.sh
@@ -3,8 +3,8 @@
  set -e
  
  ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 1768 3192
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4   14b 14c"}
+# bug number for skipped test: 1768 3192 3192
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4   14b  14c"}
  # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
  
  [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
@@ -22,6 +22,7 @@ OPENFILE=${OPENFILE:-openfile}
  OPENUNLINK=${OPENUNLINK:-openunlink}
  TOEXCL=${TOEXCL:-toexcl}
  TRUNCATE=${TRUNCATE:-truncate}
+export TMP=${TMP:-/tmp}
  
  if [ $UID -ne 0 ]; then
         RUNAS_ID="$UID"
@@ -53,15 +54,25 @@ log() {
         lctl mark "$*" 2> /dev/null || true
  }
  
+trace() {
+       log "STARTING: $*"
+       strace -o $TMP/$1.strace -ttt $*
+       RC=$?
+       log "FINISHED: $*: rc $RC"
+       return 1
+}
+TRACE=${TRACE:-""}
+
  run_one() {
         if ! mount | grep -q $DIR1; then
                 $START
         fi
-       log "== test $1: $2"
+       BEFORE=`date +%s`
+       log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)"
         export TESTNAME=test_$1
         test_$1 || error "test_$1: exit with rc=$?"
         unset TESTNAME
-       pass
+       pass "($((`date +%s` - $BEFORE))s)"
         cd $SAVE_PWD
         $CLEAN
  }
@@ -100,7 +111,7 @@ error () {
  }
  
  pass() {
-       echo PASS
+       echo PASS $@
  }
  
  export MOUNT1=`mount| awk '/ lustre/ { print $3 }'| head -n 1`
@@ -362,7 +373,8 @@ test_17() { # bug 3513, 3667
  run_test 17 "resource creation/LVB creation race ==============="
  
  test_18() {
-       ./mmap_sanity -d $MOUNT1 -m $MOUNT2
+       ./mmap_sanity -d $MOUNT1 -m $MOUNT2
+       sync; sleep 1; sync
  }
  run_test 18 "mmap sanity check ================================="
  
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh

index 395184d..66965ed 100644 (file)
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -174,6 +174,15 @@ replay_barrier() {
      $LCTL mark "REPLAY BARRIER"
  }
  
+replay_barrier_nodf() {
+    local facet=$1
+    do_facet $facet sync
+    do_facet $facet $LCTL --device %${facet}_svc readonly
+    do_facet $facet $LCTL --device %${facet}_svc notransno
+    do_facet $facet $LCTL mark "REPLAY BARRIER"
+    $LCTL mark "REPLAY BARRIER"
+}
+
  mds_evict_client() {
      UUID=`cat /proc/fs/lustre/mdc/*_MNT_*/uuid`
      do_facet mds "echo $UUID > /proc/fs/lustre/mds/mds1_svc/evict_client"
@@ -403,7 +412,7 @@ add_client() {
      mds=$2
      shift; shift
      add_facet $facet --lustre_upcall $UPCALL
-    do_lmc --add mtpt --node ${facet}_facet --mds ${mds}_svc $*
+    do_lmc --add mtpt --node ${facet}_facet --clientoptions async --mds ${mds}_svc $*
  }
  
  config_commit() {
@@ -550,9 +559,11 @@ error() {
  }
  
  build_test_filter() {
+        [ "$ONLY" ] && log "only running $ONLY"
          for O in $ONLY; do
              eval ONLY_${O}=true
          done
+        [ "$EXCEPT$ALWAYS_EXCEPT" ] && log "skipping $EXCEPT $ALWAYS_EXCEPT"
          for E in $EXCEPT $ALWAYS_EXCEPT; do
              eval EXCEPT_${E}=true
          done
@@ -611,6 +622,10 @@ log() {
         lctl mark "$*" 2> /dev/null || true
  }
  
+pass() {
+       echo PASS $@
+}
+
  run_one() {
      testnum=$1
      message=$2
@@ -620,8 +635,10 @@ run_one() {
      # Pretty tests run faster.
      equals_msg $testnum: $message
  
-    log "== test $1: $2"
+    BEFORE=`date +%s`
+    log "== test $testnum: $message ============ `date +%H:%M:%S` ($BEFORE)"
      test_${testnum} || error "test_$testnum failed with $?"
+    pass "($((`date +%s` - $BEFORE))s)"
  }
  
  canonical_path() {
diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh

index f9942a9..d945719 100644 (file)
--- a/lustre/tests/uml.sh
+++ b/lustre/tests/uml.sh
@@ -77,6 +77,13 @@ h2gm () {
         echo `gmnalnid -n$1`
  }
  
+h2iib () {
+       case $1 in
+       client) echo '\*' ;;
+       *) echo $1 | sed "s/[^0-9]*//" ;;
+       esac
+}
+
  # create nodes
  echo -n "adding NET for:"
  for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do
@@ -112,6 +119,6 @@ done
  echo; echo -n "adding CLIENT on:"
  for NODE in $CLIENTS; do
         echo -n " $NODE"
-       ${LMC} -m $config --add mtpt --node $NODE --path $MOUNT --mds mds1 --lov lov1 || exit 30
+       ${LMC} -m $config --add mtpt --node $NODE --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 30
  done
  echo
diff --git a/lustre/tests/unlinkmany.c b/lustre/tests/unlinkmany.c

index ba1bee7..080b1c6 100644 (file)
--- a/lustre/tests/unlinkmany.c
+++ b/lustre/tests/unlinkmany.c
@@ -16,17 +16,23 @@ void usage(char *prog)
  
  int main(int argc, char ** argv)
  {
-        int i, rc = 0;
+        int i, rc = 0, do_rmdir = 0;
          char format[4096], *fmt;
          char filename[4096];
          long start, last;
         long begin = 0, count;
  
-        if (argc < 3 || argc > 4) {
+        if (argc < 3 || argc > 5) {
                 usage(argv[0]);
                  return 1;
          }
  
+       if (strcmp(argv[1], "-d") == 0) {
+               do_rmdir = 1;
+               argv++;
+               argc--;
+       }
+
          if (strlen(argv[1]) > 4080) {
                  printf("name too long\n");
                  return 1;
@@ -53,9 +59,13 @@ int main(int argc, char ** argv)
         }
          for (i = 0; i < count; i++, begin++) {
                  sprintf(filename, fmt, begin);
-                rc = unlink(filename);
+               if (do_rmdir)
+                       rc = rmdir(filename);
+               else
+                       rc = unlink(filename);
                  if (rc) {
-                        printf("unlink(%s) error: %s\n",
+                        printf("%s(%s) error: %s\n",
+                              do_rmdir ? "rmdir" : "unlink",
                                 filename, strerror(errno));
                          rc = errno;
                          break;
diff --git a/lustre/utils/Lustre/lustredb.py b/lustre/utils/Lustre/lustredb.py

index 3d3c4ae..71716b6 100644 (file)
--- a/lustre/utils/Lustre/lustredb.py
+++ b/lustre/utils/Lustre/lustredb.py
@@ -307,6 +307,13 @@ class LustreDB_XML(LustreDB):
              ret.append((net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi))
          return ret
  
+    def get_hostaddr(self):
+        ret = []
+        list = self.dom_node.getElementsByTagName('hostaddr')
+        for node in list:
+            ret.append(node.firstChild.data)
+        return ret
+
      def _update_active(self, tgt, new):
          raise Lustre.LconfError("updates not implemented for XML")
  
diff --git a/lustre/utils/lconf b/lustre/utils/lconf

index d42ae9d..17fbe68 100755 (executable)
--- a/lustre/utils/lconf
+++ b/lustre/utils/lconf
@@ -587,6 +587,13 @@ class LCTLInterface:
    quit""" % (type, name, uuid)
          self.run(cmds)
          
+    def detach(self, name):
+        cmds = """
+  cfg_device %s
+  detach
+  quit""" % (name)
+        self.run(cmds)
+        
      def set_security(self, name, key, value):
          cmds = """
    cfg_device %s
@@ -608,6 +615,12 @@ class LCTLInterface:
    quit""" % (name, conn_uuid)
          self.run(cmds)
  
+    def start(self, name, conf_name):
+        cmds = """
+  device $%s
+  start %s
+  quit""" % (name, conf_name)
+        self.run(cmds)
  
      # create a new device with lctl
      def newdev(self, type, name, uuid, setup = ""):
@@ -618,7 +631,6 @@ class LCTLInterface:
              self.cleanup(name, uuid, 0)
              raise e
          
-
      # cleanup a device
      def cleanup(self, name, uuid, force, failover = 0):
          if failover: force = 1
@@ -1375,7 +1387,8 @@ class Network(Module):
      def add_module(self, manager):
          manager.add_portals_module("libcfs", 'libcfs')
          manager.add_portals_module("portals", 'portals')
-        if node_needs_router():
+        
+       if node_needs_router():
              manager.add_portals_module("router", 'kptlrouter')
          if self.net_type == 'tcp':
              manager.add_portals_module("knals/socknal", 'ksocknal')
@@ -1706,27 +1719,29 @@ class LMV(Module):
      def correct_level(self, level, op=None):
          return level
  
-class MDSDEV(Module):
-    def __init__(self,db):
-        Module.__init__(self, 'MDSDEV', db)
+class CONFDEV(Module):
+    def __init__(self, db, name, target_uuid, uuid):
+        Module.__init__(self, 'CONFDEV', db)
          self.devpath = self.db.get_val('devpath','')
          self.backdevpath = self.db.get_val('backdevpath','')
          self.size = self.db.get_val_int('devsize', 0)
          self.journal_size = self.db.get_val_int('journalsize', 0)
          self.fstype = self.db.get_val('fstype', '')
          self.backfstype = self.db.get_val('backfstype', '')
-        self.nspath = self.db.get_val('nspath', '')
          self.mkfsoptions = self.db.get_val('mkfsoptions', '')
          self.mountfsoptions = self.db.get_val('mountfsoptions', '')
+       self.target = self.db.lookup(target_uuid)
+        self.name = "conf_%s" % self.target.getName()
+        self.client_uuids = self.target.get_refs('client')
          self.obdtype = self.db.get_val('obdtype', '')
-        self.root_squash = self.db.get_val('root_squash', '')
-        self.no_root_squash = self.db.get_val('no_root_squash', '')
-       # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
-        target_uuid = self.db.get_first_ref('target')
-        self.mds = self.db.lookup(target_uuid)
-        self.name = self.mds.getName()
-        self.client_uuids = self.mds.get_refs('client')
-        
+       
+        if self.obdtype == None:
+            self.obdtype = 'dumb'
+       
+        self.conf_name = name
+        self.conf_uuid = uuid
+        self.realdev = self.devpath
+       
         self.lmv = None
          self.master = None
         
@@ -1736,47 +1751,52 @@ class MDSDEV(Module):
             if self.lmv != None:
                  self.client_uuids = self.lmv.get_refs('client')
  
-        # FIXME: if fstype not set, then determine based on kernel version
-        self.format = self.db.get_val('autoformat', "no")
-        if self.mds.get_val('failover', 0):
-            self.failover_mds = 'f'
+        if self.target.get_class() == 'mds':
+           if self.target.get_val('failover', 0):
+               self.failover_mds = 'f'
+           else:
+               self.failover_mds = 'n'
+            self.format = self.db.get_val('autoformat', "no")
          else:
-            self.failover_mds = 'n'
-        active_uuid = get_active_target(self.mds)
-        if not active_uuid:
-            panic("No target device found:", target_uuid)
-        if active_uuid == self.uuid:
-            self.active = 1
-        else:
-            self.active = 0
-        if self.active and config.group and config.group != self.mds.get_val('group'):
-            self.active = 0
+            self.format = self.db.get_val('autoformat', "yes")
+            self.osdtype = self.db.get_val('osdtype')
+            ost = self.db.lookup(target_uuid)
+            if ost.get_val('failover', 0):
+                self.failover_ost = 'f'
+            else:
+                self.failover_ost = 'n'
  
-        # default inode inode for case when neither LOV either 
-        # LMV is accessible.
-        self.inode_size = 256
-        
+       self.inode_size = self.get_inode_size()
+
+       if self.lmv != None:
+           client_uuid = self.name + "_lmv_UUID"
+           self.master = LMV(self.lmv, client_uuid, 
+                             self.conf_name, self.conf_name)
+
+    def get_inode_size(self):
          inode_size = self.db.get_val_int('inodesize', 0)
-        if not inode_size == 0:
-            self.inode_size = inode_size
-        else:
+        if inode_size == 0 and self.target.get_class() == 'mds':
+       
+           # default inode size for case when neither LOV either 
+           # LMV is accessible.
+           self.inode_size = 256
+           
              # find the LOV for this MDS
-            lovconfig_uuid = self.mds.get_first_ref('lovconfig')
+            lovconfig_uuid = self.target.get_first_ref('lovconfig')
              if lovconfig_uuid or self.lmv != None:
                  if self.lmv != None:
                      lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
                      lovconfig = self.lmv.lookup(lovconfig_uuid)
                      lov_uuid = lovconfig.get_first_ref('lov')
                      if lov_uuid == None:
-                        panic(self.mds.getName() + ": No LOV found for lovconfig ", 
+                        panic(self.target.getName() + ": No LOV found for lovconfig ", 
                                lovconfig.name)
                 else:
-                    lovconfig = self.mds.lookup(lovconfig_uuid)
+                    lovconfig = self.target.lookup(lovconfig_uuid)
                      lov_uuid = lovconfig.get_first_ref('lov')
                      if lov_uuid == None:
-                       panic(self.mds.getName() + ": No LOV found for lovconfig ", 
+                       panic(self.target.getName() + ": No LOV found for lovconfig ", 
                               lovconfig.name)
-
                     if self.lmv != None:
                         lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
                         lovconfig = self.lmv.lookup(lovconfig_uuid)
@@ -1786,60 +1806,26 @@ class MDSDEV(Module):
                            config_only = 1)
  
                  # default stripe count controls default inode_size
-                stripe_count = lov.stripe_cnt
+               if lov.stripe_cnt > 0:
+                   stripe_count = lov.stripe_cnt
+               else:
+                   stripe_count = len(lov.devlist)
                  if stripe_count > 77:
-                    self.inode_size = 4096
+                    inode_size = 4096
                  elif stripe_count > 35:
-                    self.inode_size = 2048
+                    inode_size = 2048
                  elif stripe_count > 13:
-                    self.inode_size = 1024
+                    inode_size = 1024
                  elif stripe_count > 3:
-                    self.inode_size = 512
+                    inode_size = 512
                  else:
-                    self.inode_size = 256
-
-        self.target_dev_uuid = self.uuid
-        self.uuid = target_uuid
-
-       # setup LMV
-       if self.lmv != None:
-           client_uuid = self.name + "_lmv_UUID"
-           self.master = LMV(self.lmv, client_uuid, 
-                             self.name, self.name)
-
-    def add_module(self, manager):
-        if self.active:
-            manager.add_lustre_module('mdc', 'mdc')
-            manager.add_lustre_module('osc', 'osc')
-            manager.add_lustre_module('ost', 'ost')
-            manager.add_lustre_module('lov', 'lov')
-            manager.add_lustre_module('mds', 'mds')
-
-            if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
-                manager.add_lustre_module(self.fstype, self.fstype)
-               
-            if self.fstype:
-                manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
-            
-            # if fstype is smfs, then we should also take care about backing 
-            # store fs.
-            if self.fstype == 'smfs':
-                manager.add_lustre_module(self.backfstype, self.backfstype)
-                manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
-
-           for option in string.split(self.mountfsoptions, ','):
-               if option == 'snap':
-                   if not self.fstype == 'smfs':
-                       panic("mountoptions has 'snap', but fstype is not smfs.")
-                   manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
-                   manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
-
-       # add LMV modules
-       if self.master != None:
-            self.master.add_module(manager)
+                    inode_size = 256
+                   
+       return inode_size
             
      def get_mount_options(self, blkdev):
-        options = def_mount_options(self.fstype, 'mds')
+        options = def_mount_options(self.fstype, 
+                                   self.target.get_class())
              
          if config.mountfsoptions:
              if options:
@@ -1857,156 +1843,110 @@ class MDSDEV(Module):
              
          if self.fstype == 'smfs':
              if options:
-                options = "%s,type=%s,dev=%s" %(options, 
-                          self.backfstype, blkdev)
+                options = "%s,type=%s,dev=%s" %(options, self.backfstype, 
+                                               blkdev)
              else:
-                options = "type=%s,dev=%s" %(self.backfstype, blkdev)
+                options = "type=%s,dev=%s" %(self.backfstype, 
+                                            blkdev)
+        
+       if self.target.get_class() == 'mds':
+           if options:
+               options = "%s,iopen_nopriv" %(options)
+           else:
+               options = "iopen_nopriv"
+           
         return options
-       
+
      def prepare(self):
-        if not config.record and is_prepared(self.name):
-            return
-        if not self.active:
-            debug(self.uuid, "not active")
+        if is_prepared(self.name):
              return
-        if config.reformat:
-            # run write_conf automatically, if --reformat used
-            self.write_conf()
-        run_acceptors()
          
-       # prepare LMV
-       if self.master != None:
-             self.master.prepare()
-            
-        # never reformat here
-        blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
-                           self.format, self.journal_size, self.inode_size,
-                           self.mkfsoptions, self.backfstype, self.backdevpath)
-        
-        if not is_prepared('MDT'):
-            lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
-        try: 
-            if self.fstype == 'smfs':
-                realdev = self.fstype
-            else:
-                realdev = blkdev
-               
-            if self.obdtype == None:
-                self.obdtype = 'dumb'
+        blkdev = block_dev(self.devpath, self.size, self.fstype,
+                           config.reformat, self.format, self.journal_size,
+                           self.inode_size, self.mkfsoptions, self.backfstype,
+                           self.backdevpath)
+
+        if self.fstype == 'smfs':
+            realdev = self.fstype
+        else:
+            realdev = blkdev
                 
-           if self.master == None:
-               master_name = 'dumb'
-           else:
-               master_name = self.master.name
-                
-            if self.client_uuids == None:
-               profile_name = 'dumb'
-           else:
-               profile_name = self.name
-            
-           mountfsoptions = self.get_mount_options(blkdev)
+       mountfsoptions = self.get_mount_options(blkdev)
  
-           self.info("mds", realdev, mountfsoptions, self.fstype, self.size, 
-                     self.format, master_name, profile_name, self.obdtype)
-           
-            lctl.attach("mds", self.name, self.uuid)
-            if config.mds_mds_sec:
-                lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec)
-            if config.mds_ost_sec:
-                lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec)
-
-            lctl.setup(self.name, setup = "%s %s %s %s %s %s" %(realdev, 
-                           self.fstype, profile_name, mountfsoptions,
-                            master_name, self.obdtype))
-
-            if development_mode():
-                procentry = "/proc/fs/lustre/mds/lsd_upcall"
-                upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
-                if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
-                    print "MDS Warning: failed to set lsd cache upcall"
-                else:
-                    run("echo ", upcall, " > ", procentry)
+       self.info(self.target.get_class(), realdev, mountfsoptions, 
+                 self.fstype, self.size, self.format)
  
-        except CommandError, e:
-            if e.rc == 2:
-                panic("MDS is missing the config log. Need to run " +
-                       "lconf --write_conf.")
-            else:
-                raise e
-        
-       if config.root_squash == None:
-            config.root_squash = self.root_squash
-        if config.no_root_squash == None:
-            config.no_root_squash = self.no_root_squash
-        if config.root_squash:
-            if config.no_root_squash:
-                nsnid = config.no_root_squash
-            else:
-                nsnid = "0"
-            lctl.root_squash(self.name, config.root_squash, nsnid)
+        lctl.newdev("confobd", self.name, self.uuid, 
+                   setup ="%s %s %s" %(realdev, self.fstype, 
+                                       mountfsoptions))
+
+        self.mountfsoptions = mountfsoptions
+        self.realdev = realdev
+
+    def add_module(self, manager):
+       manager.add_lustre_module('obdclass', 'confobd')
  
      def write_conf(self):
-        if not self.client_uuids:
-            return 0
-            
-        do_cleanup = 0
-        if not is_prepared(self.name):
-            blkdev = block_dev(self.devpath, self.size, self.fstype,
-                               config.reformat, self.format, self.journal_size,
-                               self.inode_size, self.mkfsoptions,
-                               self.backfstype, self.backdevpath)
+        if self.target.get_class() == 'ost':
+            config.record = 1
+            lctl.clear_log(self.name, self.target.getName() + '-conf')
+            lctl.record(self.name, self.target.getName() + '-conf')
+            lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
+                        setup ="%s %s %s %s" %(self.realdev, self.fstype,
+                                               self.failover_ost,
+                                               self.mountfsoptions))
+            lctl.end_record()
+            lctl.clear_log(self.name, 'OSS-conf')
+            lctl.record(self.name, 'OSS-conf')
+            lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
+            lctl.end_record()
+            config.record = 0
+            return
  
-            if self.fstype == 'smfs':
-                realdev = self.fstype
-            else:
-                realdev = blkdev
-            
-            # Even for writing logs we mount mds with supplied mount options
-            # because it will not mount smfs (if used) otherwise.
-           mountfsoptions = self.get_mount_options(blkdev)
+        if self.target.get_class() == 'mds':
+           if self.master != None:
+               master_name = self.master.name
+           else:
+               master_name = 'dumb'
  
-            if self.obdtype == None:
-                self.obdtype = 'dumb'
-                
-           self.info("mds", realdev, mountfsoptions, self.fstype, self.size, 
-                     self.format, "dumb", "dumb", self.obdtype)
-           
-            lctl.newdev("mds", self.name, self.uuid,
-                        setup ="%s %s %s %s %s %s" %(realdev, self.fstype, 
-                                                     'dumb', mountfsoptions,
-                                                     'dumb', self.obdtype))
-            do_cleanup = 1
+           config.record = 1
+            lctl.clear_log(self.name, self.target.getName() + '-conf')
+            lctl.record(self.name, self.target.getName() + '-conf')
+            lctl.newdev("mds", self.conf_name, self.conf_uuid,
+                        setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
+                                                    self.conf_name, self.mountfsoptions,
+                                                    master_name, self.obdtype))
+            lctl.end_record()
+            config.record = 0
  
-        # record logs for all MDS clients
-        for obd_uuid in self.client_uuids:
-            log("recording client:", obd_uuid)
+        if not self.client_uuids:
+            return 0
  
+        for uuid in self.client_uuids:
+            log("recording client:", uuid)
              client_uuid = generate_client_uuid(self.name)
-            client = VOSC(self.db.lookup(obd_uuid), client_uuid, 
-                          self.name, self.name)
+            client = VOSC(self.db.lookup(uuid), client_uuid, 
+                         self.target.getName(), self.name)
              config.record = 1
-            lctl.clear_log(self.name, self.name)
-            lctl.record(self.name, self.name)
+            lctl.clear_log(self.name, self.target.getName())
+            lctl.record(self.name, self.target.getName())
              client.prepare()
-            lctl.mount_option(self.name, client.get_name(), "")
+            lctl.mount_option(self.target.getName(), client.get_name(), "")
              lctl.end_record()
-            process_updates(self.db, self.name, self.name, client)
  
              config.cleanup = 1
-            lctl.clear_log(self.name, self.name + '-clean')
-            lctl.record(self.name, self.name + '-clean')
+            lctl.clear_log(self.name, self.target.getName() + '-clean')
+            lctl.record(self.name, self.target.getName() + '-clean')
              client.cleanup()
-            lctl.del_mount_option(self.name)
+            lctl.del_mount_option(self.target.getName())
              lctl.end_record()
-            process_updates(self.db, self.name, self.name + '-clean', client)
              config.cleanup = 0
              config.record = 0
  
+        if config.record:
+            return
+
          # record logs for each client
-        if config.noexec:
-            noexec_opt = '-n'
-        else:
-            noexec_opt = ''
          if config.ldapurl:
              config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
          else:
@@ -2023,7 +1963,9 @@ class MDSDEV(Module):
                          debug("recording", client_name)
                          old_noexec = config.noexec
                          config.noexec = 0
-                        ret, out = run (sys.argv[0], noexec_opt,
+                        noexec_opt = ('', '-n')
+                        ret, out = run (sys.argv[0],
+                                        noexec_opt[old_noexec == 1],
                                          " -v --record --nomod",
                                          "--record_log", client_name,
                                          "--record_device", self.name,
@@ -2031,7 +1973,8 @@ class MDSDEV(Module):
                                          config_options)
                          if config.verbose:
                              for s in out: log("record> ", string.strip(s))
-                        ret, out = run (sys.argv[0], noexec_opt,
+                        ret, out = run (sys.argv[0],
+                                        noexec_opt[old_noexec == 1],
                                          "--cleanup -v --record --nomod",
                                          "--record_log", client_name + "-clean",
                                          "--record_device", self.name,
@@ -2040,17 +1983,172 @@ class MDSDEV(Module):
                          if config.verbose:
                              for s in out: log("record> ", string.strip(s))
                          config.noexec = old_noexec
-        if do_cleanup:
+
+    def start(self):
+         try:
+            lctl.start(self.name, self.conf_name)
+         except CommandError, e:
+            raise e
+         if self.target.get_class() == 'ost':
+             if not is_prepared('OSS'):
+                 try:
+                     lctl.start(self.name, 'OSS')
+                 except CommandError, e:
+                     raise e
+
+    def cleanup(self):
+        if is_prepared(self.name):
              try:
                  lctl.cleanup(self.name, self.uuid, 0, 0)
+               clean_dev(self.devpath, self.fstype, 
+                         self.backfstype, self.backdevpath)
              except CommandError, e:
                  log(self.module_name, "cleanup failed: ", self.name)
                  e.dump()
                  cleanup_error(e.rc)
                  Module.cleanup(self)
  
-            clean_dev(self.devpath, self.fstype, self.backfstype, 
-                     self.backdevpath)
+class MDSDEV(Module):
+    def __init__(self,db):
+        Module.__init__(self, 'MDSDEV', db)
+        self.devpath = self.db.get_val('devpath','')
+        self.backdevpath = self.db.get_val('backdevpath','')
+        self.size = self.db.get_val_int('devsize', 0)
+        self.journal_size = self.db.get_val_int('journalsize', 0)
+        self.fstype = self.db.get_val('fstype', '')
+        self.backfstype = self.db.get_val('backfstype', '')
+        self.nspath = self.db.get_val('nspath', '')
+        self.mkfsoptions = self.db.get_val('mkfsoptions', '')
+        self.mountfsoptions = self.db.get_val('mountfsoptions', '')
+        self.obdtype = self.db.get_val('obdtype', '')
+        self.root_squash = self.db.get_val('root_squash', '')
+        self.no_root_squash = self.db.get_val('no_root_squash', '')
+
+        target_uuid = self.db.get_first_ref('target')
+        self.target = self.db.lookup(target_uuid)
+        self.name = self.target.getName()
+        self.master = None
+       self.lmv = None
+       
+        lmv_uuid = self.db.get_first_ref('lmv')
+       if lmv_uuid != None:
+           self.lmv = self.db.lookup(lmv_uuid)
+
+        active_uuid = get_active_target(self.target)
+        if not active_uuid:
+            panic("No target device found:", target_uuid)
+        if active_uuid == self.uuid:
+            self.active = 1
+           group = self.target.get_val('group')
+           if config.group and config.group != group:
+               self.active = 0
+        else:
+            self.active = 0
+
+        self.uuid = target_uuid
+
+       # setup LMV
+       if self.lmv != None:
+           client_uuid = self.name + "_lmv_UUID"
+           self.master = LMV(self.lmv, client_uuid, 
+                             self.name, self.name)
+                             
+        self.confobd = CONFDEV(self.db, self.name, 
+                              target_uuid, self.uuid)
+
+    def add_module(self, manager):
+        if self.active:
+            manager.add_lustre_module('mdc', 'mdc')
+            manager.add_lustre_module('osc', 'osc')
+            manager.add_lustre_module('ost', 'ost')
+            manager.add_lustre_module('lov', 'lov')
+            manager.add_lustre_module('mds', 'mds')
+
+            if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
+                manager.add_lustre_module(self.fstype, self.fstype)
+               
+            if self.fstype:
+                manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
+            
+            # if fstype is smfs, then we should also take care about backing 
+            # store fs.
+            if self.fstype == 'smfs':
+                manager.add_lustre_module(self.backfstype, self.backfstype)
+                manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
+
+           for option in string.split(self.mountfsoptions, ','):
+               if option == 'snap':
+                   if not self.fstype == 'smfs':
+                       panic("mountoptions has 'snap', but fstype is not smfs.")
+                   manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
+                   manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
+
+       # add LMV modules
+       if self.master != None:
+            self.master.add_module(manager)
+ 
+       # add CONFOBD modules
+       if self.confobd != None:
+            self.confobd.add_module(manager)
+           
+    def write_conf(self):
+        if is_prepared(self.name):
+            return
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        run_acceptors()
+        self.confobd.prepare()
+        self.confobd.write_conf()
+        self.confobd.cleanup()
+
+    def prepare(self):
+        if is_prepared(self.name):
+            return
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+        run_acceptors()
+
+        self.confobd.prepare()
+        if config.reformat:
+            self.confobd.write_conf()
+
+       # prepare LMV
+       if self.master != None:
+             self.master.prepare()
+
+        lctl.attach("mds", self.name, self.uuid)
+        if config.mds_mds_sec:
+           lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec)
+        if config.mds_ost_sec:
+            lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec)
+        lctl.detach(self.name)
+
+       if not config.record:
+            self.confobd.start()
+       
+       if not is_prepared('MDT'):
+           lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
+
+        if development_mode():
+            procentry = "/proc/fs/lustre/mds/lsd_upcall"
+            upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
+            if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
+                print "MDS Warning: failed to set lsd cache upcall"
+            else:
+                run("echo ", upcall, " > ", procentry)
+
+       if config.root_squash == None:
+            config.root_squash = self.root_squash
+        if config.no_root_squash == None:
+            config.no_root_squash = self.no_root_squash
+        if config.root_squash:
+            if config.no_root_squash:
+                nsnid = config.no_root_squash
+            else:
+                nsnid = "0"
+            lctl.root_squash(self.name, config.root_squash, nsnid)
  
      def msd_remaining(self):
          out = lctl.device_list()
@@ -2090,14 +2188,14 @@ class MDSDEV(Module):
                  e.dump()
                  cleanup_error(e.rc)
          
-        clean_dev(self.devpath, self.fstype, self.backfstype, 
-                 self.backdevpath)
+        if self.confobd:
+            self.confobd.cleanup()
  
      def correct_level(self, level, op=None):
         #if self.master != None:
         #   level = level + 2
          return level
-
+       
  class OSD(Module):
      def __init__(self, db):
          Module.__init__(self, 'OSD', db)
@@ -2126,97 +2224,78 @@ class OSD(Module):
              panic("No target device found:", target_uuid)
          if active_uuid == self.uuid:
              self.active = 1
+           group = ost.get_val('group')
+           if config.group and config.group != group:
+               self.active = 0
          else:
              self.active = 0
-        if self.active and config.group and config.group != ost.get_val('group'):
-            self.active = 0
  
-        self.target_dev_uuid = self.uuid
          self.uuid = target_uuid
+        self.confobd = CONFDEV(self.db, self.name, 
+                              target_uuid, self.uuid)
      
      def add_module(self, manager):
-        if self.active:
-            manager.add_lustre_module('ost', 'ost')
+        if not self.active:
+           return
+        manager.add_lustre_module('ost', 'ost')
              
-            if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
-                manager.add_lustre_module(self.fstype, self.fstype)
+        if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
+            manager.add_lustre_module(self.fstype, self.fstype)
                 
-            if self.fstype:
-                manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
-
-            if self.fstype == 'smfs':
-                manager.add_lustre_module(self.backfstype, self.backfstype)
-                manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
+        if self.fstype:
+            manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
  
-           for option in self.mountfsoptions:
-               if option == 'snap':
-                   if not self.fstype == 'smfs':
-                       panic("mountoptions with snap, but fstype is not smfs\n")
-                   manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
-                   manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
+        if self.fstype == 'smfs':
+            manager.add_lustre_module(self.backfstype, self.backfstype)
+            manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
  
-            manager.add_lustre_module(self.osdtype, self.osdtype)
+       for option in self.mountfsoptions:
+           if option == 'snap':
+               if not self.fstype == 'smfs':
+                   panic("mountoptions with snap, but fstype is not smfs\n")
+               manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
+               manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
  
-    def get_mount_options(self, blkdev):
-        options = def_mount_options(self.fstype, 'ost')
-            
-        if config.mountfsoptions:
-            if options:
-                options = "%s,%s" %(options, config.mountfsoptions)
-            else:
-                options = config.mountfsoptions
-            if self.mountfsoptions:
-                options = "%s,%s" %(options, self.mountfsoptions)
-        else:
-            if self.mountfsoptions:
-                if options:
-                    options = "%s,%s" %(options, self.mountfsoptions)
-                else:
-                    options = self.mountfsoptions
-            
-        if self.fstype == 'smfs':
-            if options:
-                options = "%s,type=%s,dev=%s" %(options, 
-                   self.backfstype, blkdev)
-            else:
-                options = "type=%s,dev=%s" %(self.backfstype, 
-                   blkdev)
-       return options
+        manager.add_lustre_module(self.osdtype, self.osdtype)
         
-    # need to check /proc/mounts and /etc/mtab before
-    # formatting anything.
-    # FIXME: check if device is already formatted.
+       # add CONFOBD modules
+       if self.confobd != None:
+            self.confobd.add_module(manager)
+
      def prepare(self):
          if is_prepared(self.name):
              return
          if not self.active:
              debug(self.uuid, "not active")
              return
+
          run_acceptors()
          if self.osdtype == 'obdecho':
-            blkdev = ''
-        else:
-            blkdev = block_dev(self.devpath, self.size, self.fstype,
-                               config.reformat, self.format, self.journal_size,
-                               self.inode_size, self.mkfsoptions, self.backfstype,
-                               self.backdevpath)
-
-        if self.fstype == 'smfs':
-            realdev = self.fstype
-        else:
-            realdev = blkdev
+            self.info(self.osdtype)
+            lctl.newdev("obdecho", self.name, self.uuid)
+            if not is_prepared('OSS'):
+                lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
+       else:
+           self.confobd.prepare()
+           if config.reformat:
+               self.confobd.write_conf()
+           if not config.record:
+               self.confobd.start()        
  
-       mountfsoptions = self.get_mount_options(blkdev)
-       
-        self.info(self.osdtype, realdev, mountfsoptions, self.fstype, 
-                 self.size, self.format, self.journal_size, self.inode_size)
-                 
-        lctl.newdev(self.osdtype, self.name, self.uuid,
-                    setup ="%s %s %s %s" %(realdev, self.fstype,
-                                           self.failover_ost, 
-                                           mountfsoptions))
-        if not is_prepared('OSS'):
-            lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
+    def write_conf(self):
+        if is_prepared(self.name):
+            return
+        if not self.active:
+            debug(self.uuid, "not active")
+            return
+                                                                                                               
+        run_acceptors()
+        if self.osdtype != 'obdecho':
+           self.confobd.prepare()
+           self.confobd.write_conf()
+           if not config.write_conf:
+               self.confobd.start()
+           self.confobd.cleanup()
  
      def osd_remaining(self):
          out = lctl.device_list()
@@ -2234,6 +2313,7 @@ class OSD(Module):
          if not self.active:
              debug(self.uuid, "not active")
              return
+           
          if is_prepared(self.name):
              self.info()
              try:
@@ -2251,33 +2331,24 @@ class OSD(Module):
                  print "cleanup failed: ", self.name
                  e.dump()
                  cleanup_error(e.rc)
-        if not self.osdtype == 'obdecho':
-           clean_dev(self.devpath, self.fstype, self.backfstype, 
-                     self.backdevpath)
+
+        if self.osdtype != 'obdecho':
+            if self.confobd:
+                self.confobd.cleanup()
  
      def correct_level(self, level, op=None):
          return level
  
-def mgmt_uuid_for_fs(mtpt_name):
-    if not mtpt_name:
-        return ''
-    mtpt_db = toplustreDB.lookup_name(mtpt_name)
-    fs_uuid = mtpt_db.get_first_ref('filesystem')
-    fs = toplustreDB.lookup(fs_uuid)
-    if not fs:
-        return ''
-    return fs.get_first_ref('mgmt')
-
  # Generic client module, used by OSC and MDC
  class Client(Module):
-    def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
-                 module_dir=None):
+    def __init__(self, tgtdb, uuid, module, fs_name, 
+                self_name=None, module_dir=None):
          self.target_name = tgtdb.getName()
          self.target_uuid = tgtdb.getUUID()
          self.module_dir = module_dir
+        self.backup_targets = []
         self.module = module
          self.db = tgtdb
-        self.active = 1
  
          self.tgt_dev_uuid = get_active_target(tgtdb)
          if not self.tgt_dev_uuid:
@@ -2295,11 +2366,7 @@ class Client(Module):
              self.name = self_name
          self.uuid = uuid
          self.lookup_server(self.tgt_dev_uuid)
-        mgmt_uuid = mgmt_uuid_for_fs(fs_name)
-        if mgmt_uuid:
-            self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
-        else:
-            self.mgmt_name = ''
+        self.lookup_backup_targets()
          self.fs_name = fs_name
          if not self.module_dir:
              self.module_dir = module
@@ -2319,6 +2386,20 @@ class Client(Module):
      def get_servers(self):
          return self._server_nets
  
+    def lookup_backup_targets(self):
+        """ Lookup alternative network information """
+        prof_list = toplustreDB.get_refs('profile')
+        for prof_uuid in prof_list:
+            prof_db = toplustreDB.lookup(prof_uuid)
+            if not prof_db:
+                panic("profile:", prof_uuid, "not found.")
+            for ref_class, ref_uuid in prof_db.get_all_refs():
+                if ref_class in ('osd', 'mdsdev'):
+                    devdb = toplustreDB.lookup(ref_uuid)
+                    uuid = devdb.get_first_ref('target')
+                    if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
+                        self.backup_targets.append(ref_uuid)
+
      def prepare(self, ignore_connect_failure = 0):
          self.info(self.target_uuid)
          if not config.record and is_prepared(self.name):
@@ -2336,16 +2417,32 @@ class Client(Module):
          except CommandError, e:
              if not ignore_connect_failure:
                  raise e
+
          if srv:
-            if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
+            if self.target_uuid in config.inactive and self.permits_inactive():
                  debug("%s inactive" % self.target_uuid)
                  inactive_p = "inactive"
              else:
                  debug("%s active" % self.target_uuid)
                  inactive_p = ""
              lctl.newdev(self.module, self.name, self.uuid,
-                        setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid,
-                                                inactive_p, self.mgmt_name))
+                        setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
+                                                inactive_p))
+        for tgt_dev_uuid in self.backup_targets:
+            this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
+            if len(this_nets) == 0:
+                panic ("Unable to find a server for:", tgt_dev_uuid)
+            srv = choose_local_server(this_nets)
+            if srv:
+                lctl.connect(srv)
+            else:
+                routes = find_route(this_nets);
+                if len(routes) == 0:
+                    panic("no route to", tgt_dev_uuid)
+                for (srv, r) in routes:
+                    lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
+            if srv:
+                lctl.add_conn(self.name, srv.nid_uuid);
  
      def cleanup(self):
          if is_prepared(self.name):
@@ -2362,6 +2459,15 @@ class Client(Module):
                  e.dump()
                  cleanup_error(e.rc)
  
+            for tgt_dev_uuid in self.backup_targets:
+                this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
+                srv = choose_local_server(this_net)
+                if srv:
+                    lctl.disconnect(srv)
+                else:
+                    for (srv, r) in find_route(this_net):
+                        lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
+
      def correct_level(self, level, op=None):
          return level
  
@@ -2387,15 +2493,6 @@ class OSC(Client):
      def permits_inactive(self):
          return 1
  
-def mgmtcli_name_for_uuid(uuid):
-    return 'MGMTCLI_%s' % uuid
-
-class ManagementClient(Client):
-    def __init__(self, db, uuid):
-        Client.__init__(self, db, uuid, 'mgmt_cli', '',
-                        self_name = mgmtcli_name_for_uuid(db.getUUID()),
-                        module_dir = 'mgmt')
-
  class CMOBD(Module):
      def __init__(self, db):
         Module.__init__(self, 'CMOBD', db)
@@ -2652,14 +2749,13 @@ class Mountpoint(Module):
      def __init__(self,db):
          Module.__init__(self, 'MTPT', db)
          self.path = self.db.get_val('path')
-       self.clientoptions = self.db.get_val('clientoptions', '')
+        self.clientoptions = self.db.get_val('clientoptions', '')
          self.fs_uuid = self.db.get_first_ref('filesystem')
          fs = self.db.lookup(self.fs_uuid)
          self.mds_uuid = fs.get_first_ref('lmv')
         if not self.mds_uuid:
             self.mds_uuid = fs.get_first_ref('mds')
          self.obd_uuid = fs.get_first_ref('obd')
-        self.mgmt_uuid = fs.get_first_ref('mgmt')
          client_uuid = generate_client_uuid(self.name)
  
          ost = self.db.lookup(self.obd_uuid)
@@ -2673,46 +2769,37 @@ class Mountpoint(Module):
          self.vosc = VOSC(ost, client_uuid, self.name, self.name)
         self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
          
-        if self.mgmt_uuid:
-            self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
-                                            client_uuid)
-        else:
-            self.mgmtcli = None
-
      def prepare(self):
          if not config.record and fs_is_mounted(self.path):
              log(self.path, "already mounted.")
              return
          run_acceptors()
-        if self.mgmtcli:
-            self.mgmtcli.prepare()
-        self.vosc.prepare()
+
+       self.vosc.prepare()
          self.vmdc.prepare()
-        vmdc_name = self.vmdc.get_name()
  
          self.info(self.path, self.mds_uuid, self.obd_uuid)
          if config.record or config.lctl_dump:
-            lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
+            lctl.mount_option(local_node_name, self.vosc.get_name(), 
+                             self.vmdc.get_name())
              return
  
          if config.clientoptions:
              if self.clientoptions:
-                self.clientoptions = self.clientoptions + ',' + \
-                                    config.clientoptions
+                self.clientoptions = self.clientoptions + ',' + config.clientoptions
              else:
                  self.clientoptions = config.clientoptions
          if self.clientoptions:
              self.clientoptions = ',' + self.clientoptions
              # Linux kernel will deal with async and not pass it to ll_fill_super,
              # so replace it with Lustre async
-            self.clientoptions = string.replace(self.clientoptions, "async", 
-                                               "lasync")
+            self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
  
          if not config.sec:
              config.sec = "null"
          cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,sec=%s%s %s %s" % \
-              (self.vosc.get_name(), vmdc_name, config.sec, self.clientoptions,
-              config.config, self.path)
+              (self.vosc.get_name(), self.vmdc.get_name(), config.sec, 
+              self.clientoptions, config.config, self.path)
          run("mkdir", self.path)
          ret, val = run(cmd)
          if ret:
@@ -2739,18 +2826,10 @@ class Mountpoint(Module):
  
          self.vmdc.cleanup()
          self.vosc.cleanup()
-        if self.mgmtcli:
-            self.mgmtcli.cleanup()
  
      def add_module(self, manager):
-        manager.add_lustre_module('mdc', 'mdc')
-        
-        if self.mgmtcli:
-            self.mgmtcli.add_module(manager)
-        
          self.vosc.add_module(manager)
          self.vmdc.add_module(manager)
-
          manager.add_lustre_module('llite', 'llite')
  
      def correct_level(self, level, op=None):
@@ -2774,7 +2853,6 @@ def get_ost_net(self, osd_uuid):
          srv_list.append(Network(db))
      return srv_list
  
-
  # the order of iniitailization is based on level. 
  def getServiceLevel(self):
      type = self.get_class()
@@ -2851,9 +2929,8 @@ def find_local_clusters(node_db):
          debug("add_local", netuuid)
          local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
          if srv.port > 0:
-            if acceptors.has_key(srv.port):
-                panic("duplicate port:", srv.port)
-            acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
+            if not acceptors.has_key(srv.port):
+                acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
  
  # This node is a gateway.
  is_router = 0
@@ -3134,9 +3211,10 @@ def doWriteconf(services):
      #if config.nosetup:
      #    return
      for s in services:
-        if s[1].get_class() == 'mdsdev':
+        if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
              n = newService(s[1])
              n.write_conf()
+            n.cleanup()
  
  def doSetup(services):
      if config.nosetup:
@@ -3157,7 +3235,7 @@ def doSetup(services):
  def doLoadModules(services):
      if config.nomod:
          return
-    
+       
      # adding all needed modules from all services
      for s in services:
          n = newService(s[1])
@@ -3444,14 +3522,12 @@ def sys_set_netmem_max(path, max):
          fp.write('%d\n' %(max))
          fp.close()
      
-    
  def sys_make_devices():
      if not os.access('/dev/portals', os.R_OK):
          run('mknod /dev/portals c 10 240')
      if not os.access('/dev/obd', os.R_OK):
          run('mknod /dev/obd c 10 241')
  
-
  # Add dir to the global PATH, if not already there.
  def add_to_path(new_dir):
      syspath = string.split(os.environ['PATH'], ':')
@@ -3473,7 +3549,6 @@ def default_gdb_script():
      else:
          return script
  
-
  DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
  # ensure basic elements are in the system path
  def sanitise_path():
diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c

index 27d2b5f..879237c 100644 (file)
--- a/lustre/utils/lctl.c
+++ b/lustre/utils/lctl.c
@@ -67,7 +67,6 @@ command_t cmdlist[] = {
           "usage: --net <tcp/elan/myrinet> <command>"},
          {"network", jt_ptl_network, 0, "commands that follow apply to net\n"
           "usage: network <tcp/elan/myrinet>"},
-        
          {"interface_list", jt_ptl_print_interfaces, 0, "print interface entries\n"
           "usage: interface_list"},
          {"add_interface", jt_ptl_add_interface, 0, "add interface entry\n"
@@ -86,7 +85,7 @@ command_t cmdlist[] = {
           "usage: connect <host> <port> [iIOC]"},
          {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid\n"
           "usage: disconnect [<nid>]"},
-        {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)\n"
+        {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits\n"
           "usage: active_tx"},
          {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local nid. "
           "The nid defaults to hostname for tcp networks and is automatically "
@@ -104,13 +103,17 @@ command_t cmdlist[] = {
           "add an entry to the portals routing table\n"
           "usage: add_route <gateway> <target> [<target>]"},
          {"del_route", jt_ptl_del_route, 0,
-         "delete the route via the given gateway to the given targets from the portals routing table\n"
+         "delete route via gateway to targets from the portals routing table\n"
           "usage: del_route <gateway> [<target>] [<target>]"},
          {"set_route", jt_ptl_notify_router, 0,
-         "enable/disable routes via the given gateway in the portals routing table\n"
+         "enable/disable routes via gateway in the portals routing table\n"
           "usage: set_route <gateway> <up/down> [<time>]"},
-        {"route_list", jt_ptl_print_routes, 0, "print the portals routing table\n"
+        {"route_list", jt_ptl_print_routes, 0,
+         "print the portals routing table, same as show_route\n"
           "usage: route_list"},
+        {"show_route", jt_ptl_print_routes, 0,
+         "print the portals routing table, same as route_list\n"
+         "usage: show_route"},
          {"fail", jt_ptl_fail_nid, 0, "fail/restore communications.\n"
           "Omitting the count means indefinitely, 0 means restore, "
           "otherwise fail 'count' messages.\n"
@@ -233,6 +236,9 @@ command_t cmdlist[] = {
          {"root_squash", jt_obd_root_squash, 0,
           "squash root to 'uid:gid' except client 'nid'\n"
           "usage: root_squash [uid:gid [nid]]\n"},
+        {"start", jt_obd_start, 0,
+         "setup mds/ost from the llog file\n"
+         "usage: start <profilename>"},
          {"mount_option", jt_lcfg_mount_option, 0, 
           "usage: mount_option profile osc_name [mdc_name] \n"},
          {"del_mount_option", jt_lcfg_del_mount_option, 0,
@@ -285,16 +291,19 @@ command_t cmdlist[] = {
          {"======== debug =========", jt_noop, 0, "debug"},
          {"debug_daemon", jt_dbg_debug_daemon, 0,
           "debug daemon control and dump to a file\n"
-         "usage: debug_daemon [start file <#MB>|stop|pause|continue]"},
+         "usage: debug_daemon {start file [#MB]|stop}"},
          {"debug_kernel", jt_dbg_debug_kernel, 0,
-         "get debug buffer and dump to a file\n"
+         "get debug buffer and dump to a file, same as dk\n"
           "usage: debug_kernel [file] [raw]"},
          {"dk", jt_dbg_debug_kernel, 0,
-         "get debug buffer and dump to a file\n"
+         "get debug buffer and dump to a file, same as debug_kernel\n"
           "usage: dk [file] [raw]"},
          {"debug_file", jt_dbg_debug_file, 0,
-         "read debug buffer from input and dump to output\n"
+         "read debug buffer from input and dump to output, same as df\n"
           "usage: debug_file <input> [output] [raw]"},
+        {"df", jt_dbg_debug_file, 0,
+         "read debug buffer from input and dump to output, same as debug_file\n"
+         "usage: df <input> [output] [raw]"},
          {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer\n"
           "usage: clear"},
          {"mark", jt_dbg_mark_debug_buf, 0,"insert marker text in kernel debug buffer\n"
diff --git a/lustre/utils/lfind b/lustre/utils/lfind

index ae62017..552394e 100755 (executable)
--- a/lustre/utils/lfind
+++ b/lustre/utils/lfind
@@ -1,3 +1,9 @@
  #!/bin/bash
  PATH=`dirname $0`:$PATH
+
+if [ $# -eq 0 ] || [ "$1" == "-h" -o "$1" == "--help" ]; then
+       lfs help find 2>&1 | sed "s/find/lfind/g"
+       exit 0
+fi
+
  lfs find "$@"
diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c

index 7a8c374..1bc3d83 100644 (file)
--- a/lustre/utils/lfs.c
+++ b/lustre/utils/lfs.c
@@ -53,8 +53,11 @@ static int lfs_catinfo(int argc, char **argv);
  command_t cmdlist[] = {
          {"setstripe", lfs_setstripe, 0,
           "Create a new file with a specific striping pattern or\n"
-         "Set the default striping pattern on an existing directory\n"
+         "set the default striping pattern on an existing directory or\n"
+         "delete the default striping pattern from an existing directory\n"
           "usage: setstripe <filename|dirname> <stripe size> <stripe start> <stripe count>\n"
+         "       or \n"
+         "       setstripe -d <dirname>\n"
           "\tstripe size:  Number of bytes in each stripe (0 default)\n"
           "\tstripe start: OST index of first stripe (-1 default)\n"
           "\tstripe count: Number of OSTs to stripe over (0 default)"},
@@ -92,37 +95,48 @@ command_t cmdlist[] = {
  /* functions */
  static int lfs_setstripe(int argc, char **argv)
  {
+        char *fname;
          int result;
-        long st_size;
-        int  st_offset, st_count;
+        long st_size = 0;
+        int  st_offset = -1, st_count = 0;
          char *end;
  
-        if (argc != 5)
+        if (argc != 5 && argc != 3)
                  return CMD_HELP;
  
-        // get the stripe size
-        st_size = strtoul(argv[2], &end, 0);
-        if (*end != '\0') {
-                fprintf(stderr, "error: %s: bad stripe size '%s'\n",
-                                argv[0], argv[2]);
-                return CMD_HELP;
-        }
-        // get the stripe offset
-        st_offset = strtoul(argv[3], &end, 0);
-        if (*end != '\0') {
-                fprintf(stderr, "error: %s: bad stripe offset '%s'\n",
-                                argv[0], argv[3]);
-                return CMD_HELP;
-        }
-        // get the stripe count
-        st_count = strtoul(argv[4], &end, 0);
-        if (*end != '\0') {
-                fprintf(stderr, "error: %s: bad stripe count '%s'\n",
-                                argv[0], argv[4]);
-                return CMD_HELP;
+        if (argc == 3) {
+                if (strcmp(argv[1], "-d") != 0)
+                        return CMD_HELP;
+
+                fname = argv[2];
+                st_size = -1;
+        } else {
+                fname = argv[1];
+
+                // get the stripe size
+                st_size = strtoul(argv[2], &end, 0);
+                if (*end != '\0') {
+                        fprintf(stderr, "error: %s: bad stripe size '%s'\n",
+                                        argv[0], argv[2]);
+                        return CMD_HELP;
+                }
+                // get the stripe offset
+                st_offset = strtoul(argv[3], &end, 0);
+                if (*end != '\0') {
+                        fprintf(stderr, "error: %s: bad stripe offset '%s'\n",
+                                        argv[0], argv[3]);
+                        return CMD_HELP;
+                }
+                // get the stripe count
+                st_count = strtoul(argv[4], &end, 0);
+                if (*end != '\0') {
+                        fprintf(stderr, "error: %s: bad stripe count '%s'\n",
+                                        argv[0], argv[4]);
+                        return CMD_HELP;
+                }
          }
  
-        result = llapi_file_create(argv[1], st_size, st_offset, st_count, 0);
+        result = llapi_file_create(fname, st_size, st_offset, st_count, 0);
          if (result)
                  fprintf(stderr, "error: %s: create stripe file failed\n",
                                  argv[0]);
@@ -218,16 +232,53 @@ static int lfs_find(int argc, char **argv)
  
  static int lfs_getstripe(int argc, char **argv)
  {
+        struct option long_opts[] = {
+                {"quiet", 0, 0, 'q'},
+                {"verbose", 0, 0, 'v'},
+                {0, 0, 0, 0}
+        };
+        char short_opts[] = "qv";
+        int quiet, verbose, recursive, c, rc;
          struct obd_uuid *obduuid = NULL;
-        int rc;
  
-        if (argc != 2)
-                return CMD_HELP;
+        optind = 0;
+        quiet = verbose = recursive = 0;
+        while ((c = getopt_long(argc, argv, short_opts,
+                                        long_opts, NULL)) != -1) {
+                switch (c) {
+                case 'o':
+                        if (obduuid) {
+                                fprintf(stderr,
+                                        "error: %s: only one obduuid allowed",
+                                        argv[0]);
+                                return CMD_HELP;
+                        }
+                        obduuid = (struct obd_uuid *)optarg;
+                        break;
+                case 'q':
+                        quiet++;
+                        verbose = 0;
+                        break;
+                case 'v':
+                        verbose++;
+                        quiet = 0;
+                        break;
+                case '?':
+                        return CMD_HELP;
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '%s' unrecognized\n",
+                                argv[0], argv[optind - 1]);
+                        return CMD_HELP;
+                        break;
+                }
+        }
  
-        optind = 1;
+        if (optind >= argc)
+                return CMD_HELP;
  
          do {
-                rc = llapi_find(argv[optind], obduuid, 0, 0, 0, 0);
+                rc = llapi_find(argv[optind], obduuid, recursive, verbose, quiet, 0);
          } while (++optind < argc && !rc);
  
          if (rc)
@@ -333,8 +384,8 @@ static int lfs_check(int argc, char **argv)
          if (argc != 2)
                  return CMD_HELP;
  
-        obd_types[1] = obd_type1;
-        obd_types[2] = obd_type2;
+        obd_types[0] = obd_type1;
+        obd_types[1] = obd_type2;
  
          if (strcmp(argv[1], "osts") == 0) {
                  strcpy(obd_types[0], "osc");
diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c

index fe6560b..fcd5016 100644 (file)
--- a/lustre/utils/liblustreapi.c
+++ b/lustre/utils/liblustreapi.c
@@ -39,9 +39,16 @@
  #include <stdarg.h>
  #include <sys/stat.h>
  #include <sys/types.h>
+#ifdef HAVE_LINUX_TYPES_H
  #include <linux/types.h>
+#else
+#include "types.h"
+#endif
+#ifdef HAVE_LINUX_UNISTD_H
  #include <linux/unistd.h>
-
+#else
+#include <unistd.h>
+#endif
  #include <liblustre.h>
  #include <linux/obd.h>
  #include <linux/lustre_lib.h>
@@ -66,6 +73,7 @@ int llapi_file_create(char *name, long stripe_size, int stripe_offset,
  {
          struct lov_user_md lum = { 0 };
          int fd, rc = 0;
+        int isdir = 0;
  
          /*  Initialize IOCTL striping pattern structure  */
          lum.lmm_magic = LOV_USER_MAGIC;
@@ -75,8 +83,10 @@ int llapi_file_create(char *name, long stripe_size, int stripe_offset,
          lum.lmm_stripe_offset = stripe_offset;
  
          fd = open(name, O_CREAT | O_RDWR | O_LOV_DELAY_CREATE, 0644);
-        if (errno == EISDIR)
+        if (errno == EISDIR) {
                  fd = open(name, O_DIRECTORY | O_RDONLY);
+                isdir++;
+        }
  
          if (fd < 0) {
                  err_msg("unable to open '%s'",name);
@@ -84,6 +94,19 @@ int llapi_file_create(char *name, long stripe_size, int stripe_offset,
                  return rc;
          }
  
+        /* setting stripe pattern 0 -1 0 to a dir means to delete it */
+        if (isdir) {
+                if (stripe_size == 0 && stripe_count == 0 &&
+                    stripe_offset == -1)
+                        lum.lmm_stripe_size = -1;
+        } else {
+                if (stripe_size == -1) {
+                        err_msg("deleting file stripe info is not allowed\n");
+                        rc = -EPERM;
+                        goto out;
+                }
+        }
+
          if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, &lum)) {
                  char *errmsg = "stripe already set";
                  if (errno != EEXIST && errno != EALREADY)
@@ -93,6 +116,7 @@ int llapi_file_create(char *name, long stripe_size, int stripe_offset,
                          name, fd, errmsg);
                  rc = -errno;
          }
+out:
          if (close(fd) < 0) {
                  err_msg("error on close for '%s' (%d)", name, fd);
                  if (rc == 0)
@@ -247,7 +271,7 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp,
  out:
          free(buf);
  
-        return 0;
+        return rc;
  }
  
  static int setup_obd_uuids(DIR *dir, char *dname, struct find_param *param)
@@ -721,8 +745,7 @@ int llapi_target_check(int type_num, char **obd_type, char *dir)
                  char rawbuf[OBD_MAX_IOCTL_BUFFER];
                  char *bufl = rawbuf;
                  char *bufp = buf;
-                int max = sizeof(rawbuf);
-                struct obd_ioctl_data datal;
+                struct obd_ioctl_data datal = { 0, };
                  struct obd_statfs osfs_buffer;
  
                  while(bufp[0] == ' ')
@@ -744,7 +767,11 @@ int llapi_target_check(int type_num, char **obd_type, char *dir)
                                  datal.ioc_inlbuf1 = obd_name;
                                  datal.ioc_inllen1 = strlen(obd_name) + 1;
  
-                                obd_ioctl_pack(&datal,&bufl,max);
+                                rc = obd_ioctl_pack(&datal, &bufl, OBD_MAX_IOCTL_BUFFER);
+                                if (rc) {
+                                        fprintf(stderr, "internal buffer error packing\n");
+                                        break;
+                                }
  
                                  rc = ioctl(dirfd(opendir(dir)), OBD_IOC_PING,
                                             bufl);
diff --git a/lustre/utils/llmount.c b/lustre/utils/llmount.c

index 27b39cd..e7f2354 100644 (file)
--- a/lustre/utils/llmount.c
+++ b/lustre/utils/llmount.c
@@ -119,6 +119,7 @@ init_options(struct lustre_mount_data *lmd)
          lmd->lmd_local_nid = PTL_NID_ANY;
          lmd->lmd_port = 988;    /* XXX define LUSTRE_DEFAULT_PORT */
          lmd->lmd_nal = SOCKNAL;
+        lmd->lmd_async = 0;
          lmd->lmd_nllu = 99;
          lmd->lmd_nllg = 99;
          strncpy(lmd->lmd_security, "null", sizeof(lmd->lmd_security));
@@ -134,10 +135,22 @@ print_options(struct lustre_mount_data *lmd)
          printf("profile:         %s\n", lmd->lmd_profile);
          printf("sec_flavor:      %s\n", lmd->lmd_security);
          printf("server_nid:      "LPX64"\n", lmd->lmd_server_nid);
-        printf("local_nid:       "LPX64"\n", lmd->lmd_local_nid);
-        printf("nal:             %d\n", lmd->lmd_nal);
-        printf("server_ipaddr:   0x%x\n", lmd->lmd_server_ipaddr);
-        printf("port:            %d\n", lmd->lmd_port);
+#ifdef CRAY_PORTALS
+        if (lmd->lmd_nal != CRAY_KB_SSNAL) {
+#endif
+                printf("local_nid:       "LPX64"\n", lmd->lmd_local_nid);
+#ifdef CRAY_PORTALS
+        }
+#endif
+        printf("nal:             %x\n", lmd->lmd_nal);
+#ifdef CRAY_PORTALS
+        if (lmd->lmd_nal != CRAY_KB_SSNAL) {
+#endif
+                printf("server_ipaddr:   0x%x\n", lmd->lmd_server_ipaddr);
+                printf("port:            %d\n", lmd->lmd_port);
+#ifdef CRAY_PORTALS
+        }
+#endif
  
          for (i = 0; i < route_index; i++)
                  printf("route:           "LPX64" : "LPX64" - "LPX64"\n",
@@ -333,6 +346,8 @@ int parse_options(char * options, struct lustre_mount_data *lmd)
                          }
                          if (!strcmp(opt, "debug")) {
                                  debug = val;
+                        } else if (!strcmp(opt, "async")) {
+                                lmd->lmd_async = 1;
                          }
                  }
          }
@@ -370,8 +385,12 @@ set_local(struct lustre_mount_data *lmd)
  
          memset(buf, 0, sizeof(buf));
  
+#ifdef CRAY_PORTALS
+        if (lmd->lmd_nal == CRAY_KB_ERNAL) {
+#else
          if (lmd->lmd_nal == SOCKNAL || lmd->lmd_nal == TCPNAL ||
-            lmd->lmd_nal == OPENIBNAL) {
+            lmd->lmd_nal == OPENIBNAL || lmd->lmd_nal == IIBNAL) {
+#endif
                  struct utsname uts;
  
                  rc = gethostname(buf, sizeof(buf) - 1);
@@ -380,6 +399,7 @@ set_local(struct lustre_mount_data *lmd)
                                  progname, strerror(rc));
                          return rc;
                  }
+
                  rc = uname(&uts);
                  /* for 2.6 kernels, reserve at least 8MB free, or we will
                   * go OOM during heavy read load */
@@ -406,7 +426,8 @@ set_local(struct lustre_mount_data *lmd)
                                  write(f, val, strlen(val));
                                  close(f);
                          }
-                 }
+                }
+#ifndef CRAY_PORTALS
          } else if (lmd->lmd_nal == QSWNAL) {
                  char *pfiles[] = {"/proc/qsnet/elan3/device0/position",
                                    "/proc/qsnet/elan4/device0/position",
@@ -424,6 +445,10 @@ set_local(struct lustre_mount_data *lmd)
  
                          return -1;
                  }
+#else
+       } else if (lmd->lmd_nal == CRAY_KB_SSNAL) {
+               return 0;
+#endif
          }
  
          if (ptl_parse_nid (&nid, buf) != 0) {
@@ -440,9 +465,13 @@ set_peer(char *hostname, struct lustre_mount_data *lmd)
  {
          ptl_nid_t nid = 0;
          int rc;
-        
+
+#ifdef CRAY_PORTALS
+        if (lmd->lmd_nal == CRAY_KB_ERNAL) {
+#else
          if (lmd->lmd_nal == SOCKNAL || lmd->lmd_nal == TCPNAL ||
-            lmd->lmd_nal == OPENIBNAL) {
+            lmd->lmd_nal == OPENIBNAL || lmd->lmd_nal == IIBNAL) {
+#endif
                  if (lmd->lmd_server_nid == PTL_NID_ANY) {
                          if (ptl_parse_nid (&nid, hostname) != 0) {
                                  fprintf (stderr, "%s: can't parse NID %s\n",
@@ -457,7 +486,8 @@ set_peer(char *hostname, struct lustre_mount_data *lmd)
                                   progname, hostname);
                          return (-1);
                  }
-        } else if (lmd->lmd_nal == QSWNAL) {
+#ifndef CRAY_PORTALS
+        } else if (lmd->lmd_nal == QSWNAL &&lmd->lmd_server_nid == PTL_NID_ANY){
                  char buf[64];
                  rc = sscanf(hostname, "%*[^0-9]%63[0-9]", buf);
                  if (rc != 1) {
@@ -471,6 +501,10 @@ set_peer(char *hostname, struct lustre_mount_data *lmd)
                          return (-1);
                  }
                  lmd->lmd_server_nid = nid;
+#else
+       } else if (lmd->lmd_nal == CRAY_KB_SSNAL) {
+               lmd->lmd_server_nid = strtoll(hostname,0,0);
+#endif
          }
  
  
@@ -698,10 +732,20 @@ int main(int argc, char *const argv[])
                  exit(0);
          }
  
+        rc = access(target, F_OK);
+        if (rc) {
+                rc = errno;
+                fprintf(stderr, "%s: %s inaccessible: %s\n", progname, target,
+                        strerror(errno));
+                return rc;
+        }
+
          rc = mount(source, target, "lustre", 0, (void *)&lmd);
          if (rc) {
                  rc = errno;
                  perror(argv[0]);
+                fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", source,
+                        target, progname, strerror(errno));
                  if (rc == ENODEV)
                          fprintf(stderr, "Are the lustre modules loaded?\n"
                               "Check /etc/modules.conf and /proc/filesystems\n");
diff --git a/lustre/utils/lmc b/lustre/utils/lmc

index 092522b..966ffdb 100755 (executable)
--- a/lustre/utils/lmc
+++ b/lustre/utils/lmc
@@ -897,7 +897,7 @@ def add_net(gen, lustre, options):
  
      if net_type in ('tcp','openib','ra'):
          port = get_option_int(options, 'port')
-    elif net_type in ('elan','gm','iib','vib','lo'):
+    elif net_type in ('elan', 'gm', 'iib', 'vib', 'lo', 'cray_kern_nal'):
          port = 0
      else:
          print "Unknown net_type: ", net_type
@@ -1410,13 +1410,18 @@ def get_fs_uuid(gen, lustre, mds_name, obd_name, mgmt_name):
      if not mds_uuid:
          mds_uuid = name2uuid(lustre, mds_name, tag='lmv', fatal=0)
      if not mds_uuid:
-        mds_uuid = name2uuid(lustre, mds_name, tag='cobd', fatal=1)
+        mds_uuid = name2uuid(lustre, mds_name, tag='cobd', fatal=0)
+    if not mds_uuid:
+       error("mds '" + mds_name + "' is not found")
  
      obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=0)
      if not obd_uuid:
          obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0)
      if not obd_uuid:
-       obd_uuid = name2uuid(lustre, obd_name, tag='cobd', fatal=1)
+       obd_uuid = name2uuid(lustre, obd_name, tag='cobd', fatal=0)
+    if not obd_uuid:
+       error("ost '" + obd_name + "' is not found")
+       
      if mgmt_name:
          mgmt_uuid = name2uuid(lustre, mgmt_name, tag='mgmt', fatal=1)
      else:
diff --git a/lustre/utils/lrun b/lustre/utils/lrun

index 6106634..2c56b96 100755 (executable)
--- a/lustre/utils/lrun
+++ b/lustre/utils/lrun
@@ -4,12 +4,16 @@ LIBLUSTRE_MOUNT_POINT=${LIBLUSTRE_MOUNT_POINT:-"/mnt/lustre"}
  LIBLUSTRE_MOUNT_TARGET=${LIBLUSTRE_MOUNT_TARGET:-"TARGET_NOT_SET"}
  LIBLUSTRE_SECURITY=${LIBLUSTRE_SECURITY:-"null"}
  LIBLUSTRE_DUMPFILE=${LIBLUSTRE_DUMPFILE:-"/tmp/DUMP_FILE"}
+LIBLUSTRE_DEBUG_MASK=${LIBLUSTRE_DEBUG_MASK:-"0"}
+LIBLUSTRE_DEBUG_SUBSYS=${LIBLUSTRE_DEBUG_SUBSYS:-"0"}
  LD_PRELOAD=${LD_PRELOAD:-"/usr/lib/liblustre.so"}
  
  export LIBLUSTRE_MOUNT_POINT
  export LIBLUSTRE_MOUNT_TARGET
  export LIBLUSTRE_SECURITY
  export LIBLUSTRE_DUMPFILE
+export LIBLUSTRE_DEBUG_MASK
+export LIBLUSTRE_DEBUG_SUBSYS
  export LD_PRELOAD
  
  exec $@
diff --git a/lustre/utils/lstripe b/lustre/utils/lstripe

index 51ebd79..03b2014 100755 (executable)
--- a/lustre/utils/lstripe
+++ b/lustre/utils/lstripe
@@ -1,3 +1,9 @@
  #!/bin/bash
  PATH=`dirname $0`:$PATH
+
+if [ $# -eq 0 ] || [ "$1" == "-h" -o "$1" == "--help" ]; then
+       lfs help setstripe 2>&1 | sed "s/setstripe/lstripe/g"
+       exit 0
+fi
+
  lfs setstripe "$@"
diff --git a/lustre/utils/lwizard b/lustre/utils/lwizard

index c080eaa..31c78c3 100755 (executable)
--- a/lustre/utils/lwizard
+++ b/lustre/utils/lwizard
@@ -331,7 +331,8 @@ create_config()
                     --node "$DEVICE_NAME" \
                     --mds "$DEVICE_MDS" \
                     --lov "$DEVICE_LOV" \
-                   --path "$DEVICE"
+                   --path "$DEVICE" \
+                    --clientoptions "async"
                 ;;
         esac
      done
diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c

index 962e26a..03864ca 100644 (file)
--- a/lustre/utils/obd.c
+++ b/lustre/utils/obd.c
@@ -50,7 +50,9 @@
  #include <errno.h>
  #include <string.h>
  
+#ifdef HAVE_ASM_PAGE_H
  #include <asm/page.h>           /* needed for PAGE_SIZE - rread */
+#endif
  
  #include <linux/obd_class.h>
  #include <portals/ptlctl.h>
@@ -1246,7 +1248,7 @@ int jt_obd_test_setattr(int argc, char **argv)
          char *end;
          int rc = 0;
  
-        if (argc < 2 && argc > 4)
+        if (argc < 2 || argc > 4)
                  return CMD_HELP;
  
          IOC_INIT(data);
@@ -2037,6 +2039,29 @@ int jt_obd_close_uuid(int argc, char **argv)
          return 0;
  }
  
+int jt_obd_start(int argc, char **argv)
+{
+        int rc;
+        struct obd_ioctl_data data;
+                                                                                                                             
+        if (argc != 2) {
+                fprintf(stderr, "usage: %s <logfile>\n", argv[0]);
+                return 0;
+        }
+                                                                                                                             
+        IOC_INIT(data);
+        data.ioc_inllen1 = strlen(argv[1]) + 1;
+        data.ioc_inlbuf1 = argv[1];
+                                                                                                                             
+        IOC_PACK(argv[0], data);
+        rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_START, buf);
+        if (rc) {
+                fprintf(stderr, "error: %s: ioctl error: %s\n",
+                        jt_cmdname(argv[0]), strerror(errno));
+                return -1;
+        }
+        return 0;
+}
  
  int jt_cfg_record(int argc, char **argv)
  {
diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h

index e4b47da..7944081 100644 (file)
--- a/lustre/utils/obdctl.h
+++ b/lustre/utils/obdctl.h
@@ -76,6 +76,7 @@ int jt_obd_recover(int argc, char **argv);
  int jt_obd_mdc_lookup(int argc, char **argv);
  int jt_get_version(int argc, char **argv);
  int jt_obd_close_uuid(int argc, char **argv);
+int jt_obd_start(int argc, char **argv);
  int jt_cfg_record(int argc, char **argv);
  int jt_cfg_endrecord(int argc, char **argv);
  int jt_cfg_parse(int argc, char **argv);
diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c

index 89f2ec0..3682f8d 100644 (file)
--- a/lustre/utils/wirecheck.c
+++ b/lustre/utils/wirecheck.c
@@ -30,8 +30,8 @@ do {                                                            \
  #define CHECK_VALUE(a)                                          \
  do {                                                            \
          printf("        LASSERTF("#a                            \
-               " == %d, \" found %%lld\\n\",\n                 "\
-               "(long long)"#a");\n",a);\
+               " == %llu, \" found %%llu\\n\",\n                 "\
+               "(unsigned long long)"#a");\n",(unsigned long long)a);\
  } while(0)
  
  #define CHECK_MEMBER_OFFSET(s,m)                                \
author	yury <yury>
	Mon, 4 Apr 2005 13:13:25 +0000 (13:13 +0000)
committer	yury <yury>
	Mon, 4 Apr 2005 13:13:25 +0000 (13:13 +0000)
ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch	[new file with mode: 0644]	patch \| blob
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch	[new file with mode: 0644]	patch \| blob
ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch	[new file with mode: 0644]	patch \| blob
ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch		patch \| blob \| history
lustre/ChangeLog		patch \| blob \| history
lustre/autoconf/lustre-core.m4		patch \| blob \| history
lustre/conf/lustre.dtd		patch \| blob \| history
lustre/conf/lustre2ldif.xsl		patch \| blob \| history
lustre/include/liblustre.h		patch \| blob \| history
lustre/include/linux/lprocfs_status.h		patch \| blob \| history
lustre/include/linux/lustre_cfg.h		patch \| blob \| history
lustre/include/linux/lustre_commit_confd.h		patch \| blob \| history
lustre/include/linux/lustre_compat25.h		patch \| blob \| history
lustre/include/linux/lustre_debug.h		patch \| blob \| history
lustre/include/linux/lustre_dlm.h		patch \| blob \| history
lustre/include/linux/lustre_export.h		patch \| blob \| history
lustre/include/linux/lustre_fsfilt.h		patch \| blob \| history
lustre/include/linux/lustre_idl.h		patch \| blob \| history
lustre/include/linux/lustre_import.h		patch \| blob \| history
lustre/include/linux/lustre_lib.h		patch \| blob \| history
lustre/include/linux/lustre_lite.h		patch \| blob \| history
lustre/include/linux/lustre_log.h		patch \| blob \| history
lustre/include/linux/lustre_mds.h		patch \| blob \| history
lustre/include/linux/lustre_net.h		patch \| blob \| history
lustre/include/linux/lustre_smfs.h		patch \| blob \| history
lustre/include/linux/lvfs.h		patch \| blob \| history
lustre/include/linux/obd.h		patch \| blob \| history
lustre/include/linux/obd_class.h		patch \| blob \| history
lustre/include/linux/obd_lov.h		patch \| blob \| history
lustre/include/linux/obd_support.h		patch \| blob \| history
lustre/include/lustre/lustre_user.h		patch \| blob \| history
lustre/include/types.h	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext3-extents-2.6.5.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/ext3-xattr-header-move-2.6.suse.patch	[deleted file]	patch \| blob \| history
lustre/kernel_patches/patches/iopen-2.6-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7.patch	[moved from lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 with 100% similarity]	patch \| blob \| history
lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch	[deleted file]	patch \| blob \| history
lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch		patch \| blob \| history
lustre/kernel_patches/patches/vfs_lookup_in_file-2.6.patch	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch		patch \| blob \| history
lustre/kernel_patches/series/2.6-vanilla.series		patch \| blob \| history
lustre/kernel_patches/targets/.cvsignore	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/targets/2.6-suse.target.in	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/targets/2.6-vanilla.target.in	[moved from lustre/kernel_patches/targets/2.6-suse.target with 50% similarity]	patch \| blob \| history
lustre/kernel_patches/targets/hp_pnnl-2.4.target.in	[moved from lustre/kernel_patches/targets/hp_pnnl-2.4.target with 54% similarity]	patch \| blob \| history
lustre/kernel_patches/targets/rh-2.4.target	[deleted file]	patch \| blob \| history
lustre/kernel_patches/targets/rh-2.4.target.in	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/targets/rhel-2.4.target.in	[new file with mode: 0644]	patch \| blob
lustre/kernel_patches/targets/suse-2.4.21-2.target.in	[moved from lustre/kernel_patches/targets/suse-2.4.21-2.target with 86% similarity]	patch \| blob \| history
lustre/kernel_patches/which_patch		patch \| blob \| history
lustre/ldlm/l_lock.c		patch \| blob \| history
lustre/ldlm/ldlm_flock.c		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/ldlm/ldlm_lock.c		patch \| blob \| history
lustre/ldlm/ldlm_lockd.c		patch \| blob \| history
lustre/ldlm/ldlm_request.c		patch \| blob \| history
lustre/ldlm/ldlm_resource.c		patch \| blob \| history
lustre/ldlm/ldlm_test.c		patch \| blob \| history
lustre/liblustre/Makefile.am		patch \| blob \| history
lustre/liblustre/dir.c		patch \| blob \| history
lustre/liblustre/file.c		patch \| blob \| history
lustre/liblustre/genlib.sh		patch \| blob \| history
lustre/liblustre/llite_lib.c		patch \| blob \| history
lustre/liblustre/llite_lib.h		patch \| blob \| history
lustre/liblustre/namei.c		patch \| blob \| history
lustre/liblustre/rw.c		patch \| blob \| history
lustre/liblustre/super.c		patch \| blob \| history
lustre/liblustre/tests/Makefile.am		patch \| blob \| history
lustre/liblustre/tests/echo_test.c		patch \| blob \| history
lustre/liblustre/tests/replay_single.c		patch \| blob \| history
lustre/liblustre/tests/sanity.c		patch \| blob \| history
lustre/llite/dcache.c		patch \| blob \| history
lustre/llite/dir.c		patch \| blob \| history
lustre/llite/file.c		patch \| blob \| history
lustre/llite/llite_internal.h		patch \| blob \| history
lustre/llite/llite_lib.c		patch \| blob \| history
lustre/llite/llite_mmap.c		patch \| blob \| history
lustre/llite/llite_nfs.c		patch \| blob \| history
lustre/llite/lproc_llite.c		patch \| blob \| history
lustre/llite/namei.c		patch \| blob \| history
lustre/llite/rw.c		patch \| blob \| history
lustre/llite/rw24.c		patch \| blob \| history
lustre/llite/special.c		patch \| blob \| history
lustre/llite/symlink.c		patch \| blob \| history
lustre/lmv/lmv_obd.c		patch \| blob \| history
lustre/lmv/lmv_objmgr.c		patch \| blob \| history
lustre/lov/Makefile.in		patch \| blob \| history
lustre/lov/autoMakefile.am		patch \| blob \| history
lustre/lov/lov_internal.h		patch \| blob \| history
lustre/lov/lov_merge.c	[new file with mode: 0644]	patch \| blob
lustre/lov/lov_obd.c		patch \| blob \| history
lustre/lov/lov_offset.c	[new file with mode: 0644]	patch \| blob
lustre/lov/lov_pack.c		patch \| blob \| history
lustre/lov/lov_qos.c	[new file with mode: 0644]	patch \| blob
lustre/lov/lov_request.c	[new file with mode: 0644]	patch \| blob
lustre/lvfs/Makefile.in		patch \| blob \| history
lustre/lvfs/autoMakefile.am		patch \| blob \| history
lustre/lvfs/fsfilt_ext3.c		patch \| blob \| history
lustre/lvfs/fsfilt_smfs.c		patch \| blob \| history
lustre/lvfs/lvfs_common.c		patch \| blob \| history
lustre/lvfs/lvfs_internal.h		patch \| blob \| history
lustre/lvfs/lvfs_linux.c		patch \| blob \| history
lustre/mdc/mdc_internal.h		patch \| blob \| history
lustre/mdc/mdc_lib.c		patch \| blob \| history
lustre/mdc/mdc_locks.c		patch \| blob \| history
lustre/mdc/mdc_request.c		patch \| blob \| history
lustre/mds/handler.c		patch \| blob \| history
lustre/mds/lproc_mds.c		patch \| blob \| history
lustre/mds/mds_fs.c		patch \| blob \| history
lustre/mds/mds_internal.h		patch \| blob \| history
lustre/mds/mds_lib.c		patch \| blob \| history
lustre/mds/mds_lmv.c		patch \| blob \| history
lustre/mds/mds_lov.c		patch \| blob \| history
lustre/mds/mds_open.c		patch \| blob \| history
lustre/mds/mds_reint.c		patch \| blob \| history
lustre/mds/mds_unlink_open.c		patch \| blob \| history
lustre/mgmt/mgmt_svc.c		patch \| blob \| history
lustre/obdclass/Makefile.in		patch \| blob \| history
lustre/obdclass/autoMakefile.am		patch \| blob \| history
lustre/obdclass/class_obd.c		patch \| blob \| history
lustre/obdclass/confobd.c	[new file with mode: 0644]	patch \| blob
lustre/obdclass/genops.c		patch \| blob \| history
lustre/obdclass/llog_ioctl.c		patch \| blob \| history
lustre/obdclass/llog_obd.c		patch \| blob \| history
lustre/obdclass/lprocfs_status.c		patch \| blob \| history
lustre/obdclass/lustre_peer.c		patch \| blob \| history
lustre/obdclass/obd_config.c		patch \| blob \| history
lustre/obdecho/echo_client.c		patch \| blob \| history
lustre/obdfilter/filter.c		patch \| blob \| history
lustre/obdfilter/filter_internal.h		patch \| blob \| history
lustre/obdfilter/filter_io.c		patch \| blob \| history
lustre/obdfilter/filter_io_24.c		patch \| blob \| history
lustre/obdfilter/filter_io_26.c		patch \| blob \| history
lustre/obdfilter/filter_lvb.c		patch \| blob \| history
lustre/obdfilter/lproc_obdfilter.c		patch \| blob \| history
lustre/osc/lproc_osc.c		patch \| blob \| history
lustre/osc/osc_create.c		patch \| blob \| history
lustre/osc/osc_internal.h		patch \| blob \| history
lustre/osc/osc_request.c		patch \| blob \| history
lustre/ost/lproc_ost.c		patch \| blob \| history
lustre/ost/ost_handler.c		patch \| blob \| history
lustre/ptlbd/server.c		patch \| blob \| history
lustre/ptlrpc/autoMakefile.am		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/connection.c		patch \| blob \| history
lustre/ptlrpc/events.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history
lustre/ptlrpc/niobuf.c		patch \| blob \| history
lustre/ptlrpc/pers.c		patch \| blob \| history
lustre/ptlrpc/pinger.c		patch \| blob \| history
lustre/ptlrpc/recov_thread.c		patch \| blob \| history
lustre/ptlrpc/recover.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/scripts/lustre		patch \| blob \| history
lustre/scripts/lustrefs		patch \| blob \| history
lustre/scripts/suse-functions.sh	[new file with mode: 0644]	patch \| blob
lustre/scripts/suse-post.sh	[new file with mode: 0644]	patch \| blob
lustre/scripts/suse-postun.sh	[new file with mode: 0644]	patch \| blob
lustre/scripts/suse-trigger-script.sh.in	[new file with mode: 0644]	patch \| blob
lustre/sec/gss/gss_api.h		patch \| blob \| history
lustre/sec/gss/sec_gss.c		patch \| blob \| history
lustre/sec/sec.c		patch \| blob \| history
lustre/smfs/inode.c		patch \| blob \| history
lustre/smfs/kml.c		patch \| blob \| history
lustre/smfs/smfs_lib.c		patch \| blob \| history
lustre/tests/.cvsignore		patch \| blob \| history
lustre/tests/Makefile.am		patch \| blob \| history
lustre/tests/conf-sanity.sh		patch \| blob \| history
lustre/tests/createmany.c		patch \| blob \| history
lustre/tests/echo.sh		patch \| blob \| history
lustre/tests/fsx.c		patch \| blob \| history
lustre/tests/lfsck_config.sh		patch \| blob \| history
lustre/tests/lfscktest.sh		patch \| blob \| history
lustre/tests/lfscktest_config.sh		patch \| blob \| history
lustre/tests/ll_dirstripe_verify.c		patch \| blob \| history
lustre/tests/llmount.sh		patch \| blob \| history
lustre/tests/local.sh		patch \| blob \| history
lustre/tests/lov.sh		patch \| blob \| history
lustre/tests/mcr-mds-failover-config.sh		patch \| blob \| history
lustre/tests/mcr-routed-config.sh		patch \| blob \| history
lustre/tests/mcrlov.sh		patch \| blob \| history
lustre/tests/mount2fs.sh		patch \| blob \| history
lustre/tests/multiop.c		patch \| blob \| history
lustre/tests/recovery-cleanup.sh		patch \| blob \| history
lustre/tests/recovery-small.sh		patch \| blob \| history
lustre/tests/replay-dual.sh		patch \| blob \| history
lustre/tests/replay-ost-single.sh		patch \| blob \| history
lustre/tests/replay-single.sh		patch \| blob \| history
lustre/tests/runtests		patch \| blob \| history
lustre/tests/sanity-fid.sh		patch \| blob \| history
lustre/tests/sanity-gns.sh		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history
lustre/tests/sanityN.sh		patch \| blob \| history
lustre/tests/test-framework.sh		patch \| blob \| history
lustre/tests/uml.sh		patch \| blob \| history
lustre/tests/unlinkmany.c		patch \| blob \| history
lustre/utils/Lustre/lustredb.py		patch \| blob \| history
lustre/utils/lconf		patch \| blob \| history
lustre/utils/lctl.c		patch \| blob \| history
lustre/utils/lfind		patch \| blob \| history
lustre/utils/lfs.c		patch \| blob \| history
lustre/utils/liblustreapi.c		patch \| blob \| history
lustre/utils/llmount.c		patch \| blob \| history
lustre/utils/lmc		patch \| blob \| history
lustre/utils/lrun		patch \| blob \| history
lustre/utils/lstripe		patch \| blob \| history
lustre/utils/lwizard		patch \| blob \| history
lustre/utils/obd.c		patch \| blob \| history
lustre/utils/obdctl.h		patch \| blob \| history
lustre/utils/wirecheck.c		patch \| blob \| history